Esempio n. 1
0
        output_nonlinearity=None,
    )

    policy = GaussianConvFeaturePolicy(
        "conv_feature_policy",
        env_spec=env_spec,
        feature_network=policy_cnn,
        hidden_sizes=(128, 64),
        clip_action=False,
    )

    baseline = NNBaseline(
        env_spec=env_spec,
        feature_network=baseline_cnn,
        hidden_sizes=(128, 64),
        hidden_nonlinearity=tf.nn.relu,
        init_lr=1e-4,
        n_itr=5,
        train_feature_network=True,
    )

    batch_size = 9600
    algo = TRPO(env=env,
                policy=policy,
                baseline=baseline,
                batch_size=batch_size,
                whole_paths=True,
                max_path_length=1000,
                n_itr=4000,
                step_size=0.01,
                subsample_factor=0.2,
Esempio n. 2
0
        hidden_nonlinearity=tf.nn.relu,
        output_nonlinearity=None,
    )

    policy = GaussianConvFeaturePolicy(
        "conv_feature_policy",
        env_spec=env_spec,
        feature_network=cnn,
        hidden_sizes=(128,64),
        output_nonlinearity=tf.nn.tanh,
    )

    baseline = NNBaseline(
        env_spec=env_spec,
        feature_network=cnn,
        hidden_sizes=(128,64),
        hidden_nonlinearity=tf.nn.relu,
        init_lr=0.001,
        n_itr=5,
    )

    batch_size = 2400
    algo = TRPO(
        env=env,
        policy=policy,
        baseline=baseline,
        batch_size=batch_size,
        whole_paths=True,
        max_path_length=200,
        n_itr=2000,
        step_size=0.01,
        subsample_factor=1.0,