Esempio n. 1
0
def _init(args):
    env_name = args.env_name
    print('Using environment %s' % env_name)
    params_dict = {
        'env_name': [env_name],
        'rundir': [args.rundir],
        'ent_wt': [args.trpo_ent],
        'trpo_step': [args.trpo_step],
        'hid_size': [args.hid_size],
        'hid_layers': [args.hid_layers],
        'many_runs': [args.repeat > 1]
    }
    if args.repeat > 1:
        # stacked parallel thing doesn't work, bleh
        warnings.warn(
            "You're trying to use --repeat N for N > 1, but that "
            "disables parallel sampling. This is probably going to be "
            "heinously slow or something, use at own risk.")
        # parallel_sampler.initialize(n_parallel=1)
        # parallel_sampler.set_seed(1)
        run_sweep_parallel(main, params_dict, repeat=args.repeat)
    else:
        parallel_sampler.initialize(n_parallel=8)
        parallel_sampler.set_seed(1)
        run_sweep_serial(main, params_dict, repeat=1)
Esempio n. 2
0
    default=None,
    type=int,
    help='size of each hidden layer in discriminator (or policy)')
parser.add_argument('--method',
                    choices=('airl', 'vairl', 'gail', 'vail'),
                    default=None,
                    help='IRL method to  use')

if __name__ == "__main__":
    args = parser.parse_args()
    if args.method is None:
        args.method = infer_method(args.pkl_path)
        print('Assuming method %s' % args.method)
    assert (args.trpo_anneal_init_ent is None) \
        == (args.trpo_anneal_steps is None), \
        "must supply both of --trpo-anneal-{init-ent,steps} or neither"
    params_dict = {
        'irl_pkl': [args.pkl_path],
        'ent_wt': [args.trpo_ent],
        'trpo_step': [args.trpo_step],
        'trpo_anneal_steps': [args.trpo_anneal_steps],
        'trpo_anneal_init_ent': [args.trpo_anneal_init_ent],
        'method': [args.method],
        'rundir': [args.rundir],
        'hid_size': [args.hid_size],
        'hid_layers': [args.hid_layers],
        'switch_env': [args.switch_env],
    }
    # run_sweep_parallel(main, params_dict, repeat=3)
    run_sweep_serial(main, params_dict, repeat=1)