def run_ars(exp_prefix, seed): env = TfEnv(MassSpringEnv_OptL_HwAsAction(params)) with tf.compat.v1.Session() as sess: comp_policy_model = MLPModel( output_dim=1, hidden_sizes=params.comp_policy_network_size, hidden_nonlinearity=tf.nn.tanh, output_nonlinearity=tf.nn.tanh, ) mech_policy_model = MechPolicyModel_OptL_HwAsAction(params) policy = CompMechPolicy_OptL_HwAsAction( name='comp_mech_policy', env_spec=env.spec, comp_policy_model=comp_policy_model, mech_policy_model=mech_policy_model) ars = ARS(env_name=None, env=env, policy_params=None, policy=policy, seed=seed, **params.ars_kwargs) with DowelManager(exp_prefix=exp_prefix) as manager: ars.train(params.ars_n_iter, dump=True)
def run_task(snapshot_config, *_): """Run task.""" global k_pre_init params.k_pre_init = k_pre_init with LocalTFRunner(snapshot_config=snapshot_config) as runner: # env = TfEnv(normalize(MassSpringEnv_OptK_HwAsAction(params), normalize_action=False, normalize_obs=False, normalize_reward=True, reward_alpha=0.1)) env = TfEnv(MassSpringEnv_OptK_HwAsAction(params)) # zip_project(log_dir=runner._snapshotter._snapshot_dir) comp_policy_model = MLPModel( output_dim=1, hidden_sizes=params.comp_policy_network_size, hidden_nonlinearity=tf.nn.tanh, output_nonlinearity=tf.nn.tanh, ) mech_policy_model = MechPolicyModel_OptK_FixedHW(params) policy = CompMechPolicy_OptK_HwAsAction( # reuse the policy of HWasAction name='comp_mech_policy', env_spec=env.spec, comp_policy_model=comp_policy_model, mech_policy_model=mech_policy_model) # baseline = GaussianMLPBaseline( # env_spec=env.spec, # regressor_args=dict( # hidden_sizes=params.baseline_network_size, # hidden_nonlinearity=tf.nn.tanh, # use_trust_region=True, # ), # ) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = PPO(env_spec=env.spec, policy=policy, baseline=baseline, **params.ppo_algo_kwargs) runner.setup(algo, env) runner.train(**params.ppo_inner_train_kwargs) tf.compat.v1.reset_default_graph()
def run_task(snapshot_config, *_): """Run task.""" with LocalTFRunner(snapshot_config=snapshot_config) as runner: zip_project(log_dir=runner._snapshotter._snapshot_dir) env = TfEnv(MassSpringEnv_OptK_HwAsPolicy(params)) comp_policy_model = MLPModel( output_dim=1, hidden_sizes=params.comp_policy_network_size, hidden_nonlinearity=tf.nn.tanh, output_nonlinearity=tf.nn.tanh) mech_policy_model = MechPolicyModel_OptK_HwAsPolicy(params) policy = CompMechPolicy_OptK_HwAsPolicy( name='comp_mech_policy', env_spec=env.spec, comp_policy_model=comp_policy_model, mech_policy_model=mech_policy_model) # baseline = GaussianMLPBaseline( # env_spec=env.spec, # regressor_args=dict( # hidden_sizes=params.baseline_network_size, # hidden_nonlinearity=tf.nn.tanh, # use_trust_region=True, # ), # ) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = CMAES(env_spec=env.spec, policy=policy, baseline=baseline, **params.cmaes_algo_kwargs) runner.setup(algo, env) runner.train(**params.cmaes_train_kwargs)