def train_eval(log_dir="DDPG", prev_log="", google_colab=False, seed=123, gpu_id=0, env_name="HalfCheetah-v2", num_frames=10000, tau=1e-2, memory_size=5000, hot_start=100, batch_size=200, interval_MAR=10, gamma=0.99, L2_reg=0.5, random_process="ou", mu=0.3, sigma=0.2, num_eval_episodes=1, eval_interval=1000): tf.compat.v1.set_random_seed(seed) np.random.seed(seed=seed) # prep for training log_dir = set_up_for_training(env_name=env_name, seed=seed, gpu_id=gpu_id, log_dir=log_dir, prev_log=prev_log, google_colab=google_colab) env = gym.make(env_name) env = Monitor(env=env, directory=log_dir["video_path"], force=True) replay_buffer = ReplayBuffer(memory_size, traj_dir=log_dir["traj_path"]) reward_buffer = deque(maxlen=interval_MAR) summary_writer = tf.compat.v2.summary.create_file_writer( log_dir["summary_path"]) if random_process == "ou": random_process = OrnsteinUhlenbeckProcess( size=env.action_space.shape[0], theta=0.15, mu=mu, sigma=sigma) elif random_process == "gaussian": random_process = GaussianNoise(mu=mu, sigma=sigma) else: random_process = False assert False, "choose the random process from either gaussian or ou" agent = DDPG(actor=Actor, critic=Critic, num_action=env.action_space.shape[0], random_process=random_process, gamma=gamma, L2_reg=L2_reg, actor_model_dir=log_dir["model_path"] + "/actor", critic_model_dir=log_dir["model_path"] + "/critic") train(agent, env, replay_buffer, reward_buffer, summary_writer, num_eval_episodes, num_frames, tau, eval_interval, hot_start, batch_size, interval_MAR, log_dir, google_colab)
mu = str(params.mu).split(".") mu = str(mu[0] + mu[1]) params.log_dir = "../../logs/logs/DDPG_batchnorm-{}-seed{}/{}-mu{}".format( params.train_flg, params.seed, str(params.env_name.split("-")[0]), mu) params.actor_model_dir = "../../logs/models/DDPG_batchnorm-{}-seed{}/{}/actor-mu{}/".format( params.train_flg, params.seed, str(params.env_name.split("-")[0]), mu) params.critic_model_dir = "../../logs/models/DDPG_batchnorm-{}-seed{}/{}/critic-mu{}/".format( params.train_flg, params.seed, str(params.env_name.split("-")[0]), mu) params.video_dir = "../../logs/video/DDPG_batchnorm-{}-seed{}/{}-mu{}/".format( params.train_flg, params.seed, str(params.env_name.split("-")[0]), mu) params.plot_path = "../../logs/plots/DDPG_batchnorm-{}-seed{}/{}-mu{}/".format( params.train_flg, params.seed, str(params.env_name.split("-")[0]), mu) env = gym.make(params.env_name) env = Monitor(env, params.video_dir) # set seed env.seed(params.seed) tf.random.set_random_seed(params.seed) replay_buffer = ReplayBuffer(params.memory_size) reward_buffer = deque(maxlen=params.reward_buffer_ep) summary_writer = tf.contrib.summary.create_file_writer(params.log_dir) random_process = OrnsteinUhlenbeckProcess(size=env.action_space.shape[0], theta=0.15, mu=params.mu, sigma=params.sigma) # random_process = GaussianNoise(mu=params.mu, sigma=params.sigma) agent = DDPG(Actor, Critic, env.action_space.shape[0], random_process, params) train_DDPG_original(agent, env, replay_buffer, reward_buffer, summary_writer)
import numpy as np from tf_rl.common.random_process import OrnsteinUhlenbeckProcess random_process = OrnsteinUhlenbeckProcess(size=1, theta=0.15, mu=1.0, sigma=0.2) res = list() for i in range(1000): res.append(random_process.sample()) print(np.mean(res))
mu = [0.0, 0.3, 0.6, 0.9] sigma = [0.1, 0.3, 0.6, 0.9] ticks = ["0.0", "0.3", "0.6", "0.9"] def set_box_color(bp, color): plt.setp(bp['boxes'], color=color) plt.setp(bp['whiskers'], color=color) plt.setp(bp['caps'], color=color) plt.setp(bp['medians'], color=color) temp_ou, temp_ga = list(), list() for i in range(len(mu)): OU = OrnsteinUhlenbeckProcess(size=1, theta=0.15, mu=mu[i], sigma=0.2) ou = [OU.sample()[0] for _ in range(num_sample)] Gaussian = GaussianNoise(mu=0.3, sigma=sigma[i]) gaussian = [Gaussian.sample() for _ in range(num_sample)] temp_ou.append(ou) temp_ga.append(gaussian) ou = plt.boxplot(temp_ou, positions=np.arange(len(mu)) * 2.0 - 0.4, sym='') ga = plt.boxplot(temp_ga, positions=np.arange(len(mu)) * 2.0 + 0.4, sym='') set_box_color(ou, "#D7191C") # colors are from http://colorbrewer2.org/ set_box_color(ga, "#2C7BB6") # draw temporary red and blue lines and use them to create a legend plt.plot([], c="#D7191C", label="OU Process") plt.plot([], c="#2C7BB6", label="Gaussian")