output_nonlinearity=None, ) policy = GaussianConvFeaturePolicy( "conv_feature_policy", env_spec=env_spec, feature_network=policy_cnn, hidden_sizes=(128, 64), clip_action=False, ) baseline = NNBaseline( env_spec=env_spec, feature_network=baseline_cnn, hidden_sizes=(128, 64), hidden_nonlinearity=tf.nn.relu, init_lr=1e-4, n_itr=5, train_feature_network=True, ) batch_size = 9600 algo = TRPO(env=env, policy=policy, baseline=baseline, batch_size=batch_size, whole_paths=True, max_path_length=1000, n_itr=4000, step_size=0.01, subsample_factor=0.2,
hidden_nonlinearity=tf.nn.relu, output_nonlinearity=None, ) policy = GaussianConvFeaturePolicy( "conv_feature_policy", env_spec=env_spec, feature_network=cnn, hidden_sizes=(128,64), output_nonlinearity=tf.nn.tanh, ) baseline = NNBaseline( env_spec=env_spec, feature_network=cnn, hidden_sizes=(128,64), hidden_nonlinearity=tf.nn.relu, init_lr=0.001, n_itr=5, ) batch_size = 2400 algo = TRPO( env=env, policy=policy, baseline=baseline, batch_size=batch_size, whole_paths=True, max_path_length=200, n_itr=2000, step_size=0.01, subsample_factor=1.0,