Example #1
0
    ],
                  activation_functions=[relu, relu]))

actor.apply(gauss_init(0, weight_init_sigma))
critic.apply(gauss_init(0, weight_init_sigma))

# Training
trainer = DDPGTrainer(env=env,
                      actor=actor,
                      critic=critic,
                      tau=tau,
                      epsilon=epsilon,
                      batch_size=batch_size,
                      depsilon=depsilon,
                      gamma=gamma,
                      lr_actor=actor_learning_rate,
                      lr_critic=critic_learning_rate,
                      warmup=warmup,
                      replay_memory=replay_memory)

checkpoint_callback = CheckpointCallback(save_path=config.root_path(),
                                         models={
                                             "actor": actor,
                                             "critic": critic
                                         })

trainer.train(2000,
              max_episode_len=500,
              verbose=True,
              callbacks=[checkpoint_callback])
Example #2
0
import roboschool

env_name = 'RoboschoolAnt-v1'
# Interpolation parameter v * ppo_gradient + (1-v) * off_policy_gradient
v = 0.5
np.random.seed(456)
tor.manual_seed(456)

config.set_root('torch_rl_ipgppo_' + env_name.lower().split("-")[0] +
                "_v={}".format(v),
                force=True)
config.configure_logging(clear=False, output_formats=['tensorboard', 'stdout'])
# config.start_tensorboard()

monitor = Monitor(EnvLogger(NormalisedActionsWrapper(gym.make(env_name))),
                  directory=os.path.join(config.root_path(), 'stats'),
                  force=True,
                  video_callable=False,
                  write_upon_reset=True)
env = RunningMeanStdNormalize(monitor)

num_observations = env.observation_space.shape[0]
num_actions = env.action_space.shape[0]

print('Action shape: ', num_actions, 'Observation shape: ', num_observations)

tanh, relu = tor.nn.Tanh(), tor.nn.ReLU()

replay_memory = GeneralisedMemory(1000000)

tt = to_tensor
Example #3
0
def save_params(**kwargs):
    import json
    dir = root_path()
    with open(os.path.join(dir, 'params.json'), 'w') as f:
        json.dump(kwargs, f, indent=4)