], activation_functions=[relu, relu])) actor.apply(gauss_init(0, weight_init_sigma)) critic.apply(gauss_init(0, weight_init_sigma)) # Training trainer = DDPGTrainer(env=env, actor=actor, critic=critic, tau=tau, epsilon=epsilon, batch_size=batch_size, depsilon=depsilon, gamma=gamma, lr_actor=actor_learning_rate, lr_critic=critic_learning_rate, warmup=warmup, replay_memory=replay_memory) checkpoint_callback = CheckpointCallback(save_path=config.root_path(), models={ "actor": actor, "critic": critic }) trainer.train(2000, max_episode_len=500, verbose=True, callbacks=[checkpoint_callback])
import roboschool env_name = 'RoboschoolAnt-v1' # Interpolation parameter v * ppo_gradient + (1-v) * off_policy_gradient v = 0.5 np.random.seed(456) tor.manual_seed(456) config.set_root('torch_rl_ipgppo_' + env_name.lower().split("-")[0] + "_v={}".format(v), force=True) config.configure_logging(clear=False, output_formats=['tensorboard', 'stdout']) # config.start_tensorboard() monitor = Monitor(EnvLogger(NormalisedActionsWrapper(gym.make(env_name))), directory=os.path.join(config.root_path(), 'stats'), force=True, video_callable=False, write_upon_reset=True) env = RunningMeanStdNormalize(monitor) num_observations = env.observation_space.shape[0] num_actions = env.action_space.shape[0] print('Action shape: ', num_actions, 'Observation shape: ', num_observations) tanh, relu = tor.nn.Tanh(), tor.nn.ReLU() replay_memory = GeneralisedMemory(1000000) tt = to_tensor
def save_params(**kwargs): import json dir = root_path() with open(os.path.join(dir, 'params.json'), 'w') as f: json.dump(kwargs, f, indent=4)