예제 #1
0
def run_ars(exp_prefix, seed):
    env = TfEnv(MassSpringEnv_OptL_HwAsAction(params))

    with tf.compat.v1.Session() as sess:
        comp_policy_model = MLPModel(
            output_dim=1,
            hidden_sizes=params.comp_policy_network_size,
            hidden_nonlinearity=tf.nn.tanh,
            output_nonlinearity=tf.nn.tanh,
        )
        mech_policy_model = MechPolicyModel_OptL_HwAsAction(params)
        policy = CompMechPolicy_OptL_HwAsAction(
            name='comp_mech_policy',
            env_spec=env.spec,
            comp_policy_model=comp_policy_model,
            mech_policy_model=mech_policy_model)

        ars = ARS(env_name=None,
                  env=env,
                  policy_params=None,
                  policy=policy,
                  seed=seed,
                  **params.ars_kwargs)

        with DowelManager(exp_prefix=exp_prefix) as manager:
            ars.train(params.ars_n_iter, dump=True)
예제 #2
0
def run_task(snapshot_config, *_):
    """Run task."""

    global k_pre_init
    params.k_pre_init = k_pre_init

    with LocalTFRunner(snapshot_config=snapshot_config) as runner:
        # env = TfEnv(normalize(MassSpringEnv_OptK_HwAsAction(params), normalize_action=False, normalize_obs=False, normalize_reward=True, reward_alpha=0.1))
        env = TfEnv(MassSpringEnv_OptK_HwAsAction(params))

        # zip_project(log_dir=runner._snapshotter._snapshot_dir)

        comp_policy_model = MLPModel(
            output_dim=1,
            hidden_sizes=params.comp_policy_network_size,
            hidden_nonlinearity=tf.nn.tanh,
            output_nonlinearity=tf.nn.tanh,
        )

        mech_policy_model = MechPolicyModel_OptK_FixedHW(params)

        policy = CompMechPolicy_OptK_HwAsAction(  # reuse the policy of HWasAction
            name='comp_mech_policy',
            env_spec=env.spec,
            comp_policy_model=comp_policy_model,
            mech_policy_model=mech_policy_model)

        # baseline = GaussianMLPBaseline(
        #     env_spec=env.spec,
        #     regressor_args=dict(
        #         hidden_sizes=params.baseline_network_size,
        #         hidden_nonlinearity=tf.nn.tanh,
        #         use_trust_region=True,
        #     ),
        # )

        baseline = LinearFeatureBaseline(env_spec=env.spec)

        algo = PPO(env_spec=env.spec,
                   policy=policy,
                   baseline=baseline,
                   **params.ppo_algo_kwargs)

        runner.setup(algo, env)

        runner.train(**params.ppo_inner_train_kwargs)

    tf.compat.v1.reset_default_graph()
def run_task(snapshot_config, *_):
    """Run task."""
    with LocalTFRunner(snapshot_config=snapshot_config) as runner:

        zip_project(log_dir=runner._snapshotter._snapshot_dir)

        env = TfEnv(MassSpringEnv_OptK_HwAsPolicy(params))

        comp_policy_model = MLPModel(
            output_dim=1,
            hidden_sizes=params.comp_policy_network_size,
            hidden_nonlinearity=tf.nn.tanh,
            output_nonlinearity=tf.nn.tanh)

        mech_policy_model = MechPolicyModel_OptK_HwAsPolicy(params)

        policy = CompMechPolicy_OptK_HwAsPolicy(
            name='comp_mech_policy',
            env_spec=env.spec,
            comp_policy_model=comp_policy_model,
            mech_policy_model=mech_policy_model)

        # baseline = GaussianMLPBaseline(
        #     env_spec=env.spec,
        #     regressor_args=dict(
        #         hidden_sizes=params.baseline_network_size,
        #         hidden_nonlinearity=tf.nn.tanh,
        #         use_trust_region=True,
        #     ),
        # )
        baseline = LinearFeatureBaseline(env_spec=env.spec)

        algo = CMAES(env_spec=env.spec,
                     policy=policy,
                     baseline=baseline,
                     **params.cmaes_algo_kwargs)

        runner.setup(algo, env)

        runner.train(**params.cmaes_train_kwargs)