def experiment(variant): seed = variant['seed'] n_parallel = variant['n_parallel'] log_dir = variant['log_dir'] setup(seed, n_parallel, log_dir) fast_learning_rate = variant['flr'] fast_batch_size = variant[ 'fbs'] # 10 works for [0.1, 0.2], 20 doesn't improve much for [0,0.2] meta_batch_size = 20 # 10 also works, but much less stable, 20 is fairly stable, 40 is more stable max_path_length = 100 num_grad_updates = 1 meta_step_size = variant['mlr'] env = TfEnv(normalize(PointEnvRandGoal())) policy = MAMLGaussianMLPPolicy( name="policy", env_spec=env.spec, grad_step_size=fast_learning_rate, hidden_nonlinearity=tf.nn.relu, hidden_sizes=variant['hidden_sizes'], ) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = MAMLTRPO( env=env, policy=policy, baseline=baseline, batch_size=fast_batch_size, # number of trajs for grad update max_path_length=max_path_length, meta_batch_size=meta_batch_size, num_grad_updates=num_grad_updates, n_itr=1000, use_maml=True, step_size=meta_step_size, plot=False, ) algo.train()
def experiment(variant): fast_learning_rate = variant['fast_learning_rate'] fast_batch_size = variant['fast_batch_size'] meta_batch_size = variant['meta_batch_size'] max_path_length = variant['max_path_length'] num_grad_updates = variant['num_grad_updates'] meta_step_size = variant['meta_step_size'] env = TfEnv(BallEnv()) policy = MAMLGaussianMLPPolicy( name="policy", env_spec=env.spec, grad_step_size=fast_learning_rate, hidden_nonlinearity=tf.nn.relu, hidden_sizes=(100, 100), ) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = MAMLTRPO( env=env, policy=policy, baseline=baseline, batch_size=fast_batch_size, # number of trajs for grad update max_path_length=max_path_length, meta_batch_size=meta_batch_size, num_grad_updates=num_grad_updates, n_itr=1000, use_maml=True, step_size=meta_step_size, plot=False, ) algo.train()
def main(): config = tf.ConfigProto() config.gpu_options.allow_growth = True # pylint: disable=E1101 with tf.Session(config=config): # env = TfEnv(normalize(GridWorldEnvRand('four-state'))) env = DummyVecEnv([make_env]) policy = MAMLCategoricalMLPPolicy( name="policy", env_spec=env.spec, grad_step_size=fast_learning_rate, prob_network = nature_cnn hidden_nonlinearity=tf.nn.relu, hidden_sizes=(100,100), ) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = MAMLTRPO( env=env, policy=policy, baseline=baseline, batch_size=fast_batch_size, # number of trajs for grad update max_path_length=max_path_length, meta_batch_size=meta_batch_size, num_grad_updates=num_grad_updates, n_itr=800, use_maml=use_maml, step_size=meta_step_size, plot=False, ) run_experiment_lite( algo.train(), n_parallel=4, snapshot_mode="last", seed=1, exp_prefix='trpo_maml_4state', exp_name='trpo_maml'+str(int(use_maml))+'_fbs'+str(fast_batch_size)+'_mbs'+str(meta_batch_size)+'_flr_' + str(fast_learning_rate) + 'metalr_' + str(meta_step_size) +'_step1'+str(num_grad_updates), plot=False, )
def experiment(variant): seed = variant['seed'] n_parallel = variant['n_parallel'] log_dir = variant['log_dir'] setup(seed, n_parallel, log_dir) expertDataLoc = variant['expertDataLoc'] expertDataItr = variant['expertDataItr'] fast_learning_rate = variant['flr'] fast_batch_size = variant[ 'fbs'] # 10 works for [0.1, 0.2], 20 doesn't improve much for [0,0.2] meta_batch_size = 20 # 10 also works, but much less stable, 20 is fairly stable, 40 is more stable max_path_length = 150 num_grad_updates = 1 meta_step_size = variant['mlr'] regionSize = variant['regionSize'] if regionSize == '20X20': tasksFile = '/root/code/multiworld/multiworld/envs/goals/pickPlace_20X20_v1.pkl' else: assert regionSize == '60X30' tasksFile = '/root/code/multiworld/multiworld/envs/goals/PickPlace_60X30.pkl' tasks = pickle.load(open(tasksFile, 'rb')) envType = variant['envType'] if envType == 'Push': baseEnv = SawyerPushEnv(tasks=tasks) else: assert (envType) == 'PickPlace' baseEnv = SawyerPickPlaceEnv(tasks=tasks) env = FinnMamlEnv(FlatGoalEnv(baseEnv, obs_keys=['state_observation'])) env = TfEnv(NormalizedBoxEnv(env)) policy = MAMLGaussianMLPPolicy( name="policy", env_spec=env.spec, grad_step_size=fast_learning_rate, hidden_nonlinearity=tf.nn.relu, hidden_sizes=variant['hidden_sizes'], ) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = MAMLTRPO( env=env, policy=policy, baseline=baseline, batch_size=fast_batch_size, # number of trajs for grad update max_path_length=max_path_length, meta_batch_size=meta_batch_size, num_grad_updates=num_grad_updates, n_itr=1000, use_maml=True, step_size=meta_step_size, plot=False, numExpertPolicies=20, expertDataInfo={ 'expert_loc': expertDataLoc, 'expert_itr': expertDataItr }) algo.train()
) if bas == 'zero': baseline = ZeroBaseline(env_spec=env.spec) elif 'linear' in bas: baseline = LinearFeatureBaseline(env_spec=env.spec) else: baseline = GaussianMLPBaseline(env_spec=env.spec) algo = MAMLTRPO( env=env, policy=policy, baseline=baseline, batch_size=fast_batch_size, # number of trajs for grad update max_path_length=max_path_length, meta_batch_size=meta_batch_size, num_grad_updates=num_grad_updates, n_itr=100, use_maml=use_maml, step_size=meta_step_size, plot=False, ) run_experiment_lite( algo.train(), n_parallel=1, snapshot_mode="last", python_command='python3', seed=1, exp_prefix='vpg_maml_point100', exp_name='trpomaml'+str(int(use_maml))+'_fbs'+str(fast_batch_size)+'_mbs'+str(meta_batch_size)+'_flr_' + str(fast_learning_rate) + 'metalr_' + str(meta_step_size) +'_step1'+str(num_grad_updates), plot=False, )
algo = MAMLTRPO( env=env, policy=policy, baseline=baseline, batch_size=v['fast_batch_size'], # number of trajs for grad update max_path_length=max_path_length, meta_batch_size=v['meta_batch_size'], num_grad_updates=num_grad_updates, n_itr=800, use_maml=use_maml, step_size=v['meta_step_size'], plot=False, ) run_experiment_lite( algo.train(frac=v.gpu_frac), exp_prefix='maml_mdp', exp_name='N%d_mbs%d' % (v['n_episodes'], v['meta_batch_size']), # Number of parallel workers for sampling n_parallel=1, # Only keep the snapshot parameters for the last iteration snapshot_mode="gap", env={'CUDA_VISIBLE_DEVICES' : str(FLAGS.devices)}, snapshot_gap=25, sync_s3_pkl=True, # Specifies the seed for the experiment. If this is not provided, a random seed # will be used seed=v["seed"], mode="local", #mode="ec2", variant=v,
def experiment(variant): seed = variant['seed'] n_parallel = variant['n_parallel'] log_dir = variant['log_dir'] setup(seed, n_parallel, log_dir) fast_learning_rate = variant['flr'] fast_batch_size = variant[ 'fbs'] # 10 works for [0.1, 0.2], 20 doesn't improve much for [0,0.2] meta_batch_size = 20 # 10 also works, but much less stable, 20 is fairly stable, 40 is more stable max_path_length = 150 num_grad_updates = 1 meta_step_size = variant['mlr'] tasksFile = '/root/code/multiworld/multiworld/envs/goals/Door_60X20X20.pkl' tasks = pickle.load(open(tasksFile, 'rb')) baseEnv = SawyerDoorOpenEnv(tasks=tasks) env = FinnMamlEnv(FlatGoalEnv(baseEnv, obs_keys=['state_observation'])) env = TfEnv(NormalizedBoxEnv(env)) policy = MAMLGaussianMLPPolicy( name="policy", env_spec=env.spec, grad_step_size=fast_learning_rate, hidden_nonlinearity=tf.nn.relu, hidden_sizes=variant['hidden_sizes'], ) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = MAMLTRPO( env=env, policy=policy, baseline=baseline, batch_size=fast_batch_size, # number of trajs for grad update max_path_length=max_path_length, meta_batch_size=meta_batch_size, num_grad_updates=num_grad_updates, n_itr=1000, use_maml=True, step_size=meta_step_size, plot=False, ) # import os # saveDir = variant['saveDir'] # if os.path.isdir(saveDir)==False: # os.mkdir(saveDir) # logger.set_snapshot_dir(saveDir) # #logger.set_snapshot_gap(20) # logger.add_tabular_output(saveDir+'progress.csv') algo.train()
def experiment(variant): seed = variant['seed'] tf.set_random_seed(seed) np.random.seed(seed) random.seed(seed) fast_learning_rate = variant['flr'] fast_batch_size = variant[ 'fbs'] # 10 works for [0.1, 0.2], 20 doesn't improve much for [0,0.2] meta_batch_size = 20 # 10 also works, but much less stable, 20 is fairly stable, 40 is more stable max_path_length = 150 num_grad_updates = 1 meta_step_size = variant['mlr'] regionSize = variant['regionSize'] if regionSize == '20X20': tasksFile = '/root/code/multiworld/multiworld/envs/goals/pickPlace_20X20_6_8.pkl' else: assert regionSize == '60X30' tasksFile = '/root/code/multiworld/multiworld/envs/goals/pickPlace_60X30.pkl' tasks = pickle.load(open(tasksFile, 'rb')) envType = variant['envType'] if envType == 'Push': baseEnv = SawyerPushEnv(tasks=tasks) else: assert (envType) == 'PickPlace' baseEnv = SawyerPickPlaceEnv(tasks=tasks) env = FinnMamlEnv( FlatGoalEnv(baseEnv, obs_keys=['state_observation', 'state_desired_goal'])) env = TfEnv(NormalizedBoxEnv(env)) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = MAMLTRPO( env=env, policy=None, load_policy=variant['init_param_file'], baseline=baseline, batch_size=fast_batch_size, # number of trajs for grad update max_path_length=max_path_length, meta_batch_size=meta_batch_size, num_grad_updates=num_grad_updates, n_itr=1000, use_maml=True, step_size=meta_step_size, plot=False, ) import os saveDir = variant['saveDir'] if os.path.isdir(saveDir) == False: os.mkdir(saveDir) logger.set_snapshot_dir(saveDir) logger.add_tabular_output(saveDir + 'progress.csv') algo.train()