Ejemplo n.º 1
0
from torch_rl import config

import sys
import os
import roboschool

env_name = 'RoboschoolAnt-v1'
# Interpolation parameter v * ppo_gradient + (1-v) * off_policy_gradient
v = 0.5
np.random.seed(456)
tor.manual_seed(456)

config.set_root('torch_rl_ipgppo_' + env_name.lower().split("-")[0] +
                "_v={}".format(v),
                force=True)
config.configure_logging(clear=False, output_formats=['tensorboard', 'stdout'])
# config.start_tensorboard()

monitor = Monitor(EnvLogger(NormalisedActionsWrapper(gym.make(env_name))),
                  directory=os.path.join(config.root_path(), 'stats'),
                  force=True,
                  video_callable=False,
                  write_upon_reset=True)
env = RunningMeanStdNormalize(monitor)

num_observations = env.observation_space.shape[0]
num_actions = env.action_space.shape[0]

print('Action shape: ', num_actions, 'Observation shape: ', num_observations)

tanh, relu = tor.nn.Tanh(), tor.nn.ReLU()
Ejemplo n.º 2
0
tau = 0.001
epsilon = 1.0
depsilon = 1. / 3000
gamma = 0.99
replay_capacity = 1000000
warmup = 2000
max_episode_length = 500
actor_learning_rate = 1e-4
critic_learning_rate = 1e-3
middle_layer_size = [64, 64]
weight_init_sigma = 0.003

replay_memory = SequentialMemory(limit=6000000, window_length=1)

config.configure_logging(clear=False,
                         output_formats=['tensorboard', 'stdout'],
                         root_dir='ddpg_' + timestamp(),
                         force=True)

env = EnvLogger(NormalisedActionsWrapper(gym.make('Pendulum-v0')))
env.reset()
num_actions = env.action_space.shape[0]
num_observations = env.observation_space.shape[0]
relu, tanh = tor.nn.ReLU(), tor.nn.Tanh()

actor = cuda_if_available(
    SimpleNetwork([
        num_observations, middle_layer_size[0], middle_layer_size[1],
        num_actions
    ],
                  activation_functions=[relu, relu, tanh]))