hidden_nonlinearity=tf.nn.relu, hidden_sizes=(100, 100), ) if bas == 'zero': baseline = ZeroBaseline(env_spec=env.spec) elif bas == 'linear': baseline = LinearFeatureBaseline(env_spec=env.spec) else: baseline = GaussianMLPBaseline(env_spec=env.spec) algo = SensitiveTRPO( #algo = SensitiveVPG( env=env, policy=policy, baseline=baseline, batch_size=fast_batch_size, # number of trajs for grad update max_path_length=max_path_length, meta_batch_size=meta_batch_size, num_grad_updates=num_grad_updates, n_itr=400, use_sensitive=use_sensitive, #optimizer_args={'tf_optimizer_args':{'learning_rate': learning_rate}}, plot=False, ) run_experiment_lite( algo.train(), n_parallel=0, snapshot_mode="last", seed=1, #exp_prefix='deleteme', #exp_name='deleteme' #exp_prefix='sensitive1dT5_2017_01_19', #exp_prefix='bugfix_sensitive0d_8tasks_T'+str(max_path_length)+'_2017_02_05',
hidden_nonlinearity=tf.nn.relu, hidden_sizes=(100, 100), ) if bas == 'zero': baseline = ZeroBaseline(env_spec=env.spec) elif 'linear' in bas: baseline = LinearFeatureBaseline(env_spec=env.spec) else: baseline = GaussianMLPBaseline(env_spec=env.spec) algo = SensitiveTRPO( env=env, policy=policy, baseline=baseline, batch_size=fast_batch_size, # number of trajs for grad update max_path_length=max_path_length, meta_batch_size=meta_batch_size, num_grad_updates=num_grad_updates, n_itr=100, use_sensitive=use_sensitive, step_size=meta_step_size, plot=False, ) run_experiment_lite( algo.train(), n_parallel=40, snapshot_mode="last", python_command='python3', seed=1, exp_prefix='vpg_sensitive_point100', exp_name='trposens' + str(int(use_sensitive)) + '_fbs' + str(fast_batch_size) + '_mbs' + str(meta_batch_size) +