Beispiel #1
0
def train_eval(log_dir="DDPG",
               prev_log="",
               google_colab=False,
               seed=123,
               gpu_id=0,
               env_name="HalfCheetah-v2",
               num_frames=10000,
               tau=1e-2,
               memory_size=5000,
               hot_start=100,
               batch_size=200,
               interval_MAR=10,
               gamma=0.99,
               L2_reg=0.5,
               random_process="ou",
               mu=0.3,
               sigma=0.2,
               num_eval_episodes=1,
               eval_interval=1000):
    tf.compat.v1.set_random_seed(seed)
    np.random.seed(seed=seed)

    # prep for training
    log_dir = set_up_for_training(env_name=env_name,
                                  seed=seed,
                                  gpu_id=gpu_id,
                                  log_dir=log_dir,
                                  prev_log=prev_log,
                                  google_colab=google_colab)

    env = gym.make(env_name)
    env = Monitor(env=env, directory=log_dir["video_path"], force=True)

    replay_buffer = ReplayBuffer(memory_size, traj_dir=log_dir["traj_path"])
    reward_buffer = deque(maxlen=interval_MAR)
    summary_writer = tf.compat.v2.summary.create_file_writer(
        log_dir["summary_path"])

    if random_process == "ou":
        random_process = OrnsteinUhlenbeckProcess(
            size=env.action_space.shape[0], theta=0.15, mu=mu, sigma=sigma)
    elif random_process == "gaussian":
        random_process = GaussianNoise(mu=mu, sigma=sigma)
    else:
        random_process = False
        assert False, "choose the random process from either gaussian or ou"

    agent = DDPG(actor=Actor,
                 critic=Critic,
                 num_action=env.action_space.shape[0],
                 random_process=random_process,
                 gamma=gamma,
                 L2_reg=L2_reg,
                 actor_model_dir=log_dir["model_path"] + "/actor",
                 critic_model_dir=log_dir["model_path"] + "/critic")

    train(agent, env, replay_buffer, reward_buffer, summary_writer,
          num_eval_episodes, num_frames, tau, eval_interval, hot_start,
          batch_size, interval_MAR, log_dir, google_colab)
Beispiel #2
0
mu = str(params.mu).split(".")
mu = str(mu[0] + mu[1])
params.log_dir = "../../logs/logs/DDPG_batchnorm-{}-seed{}/{}-mu{}".format(
    params.train_flg, params.seed, str(params.env_name.split("-")[0]), mu)
params.actor_model_dir = "../../logs/models/DDPG_batchnorm-{}-seed{}/{}/actor-mu{}/".format(
    params.train_flg, params.seed, str(params.env_name.split("-")[0]), mu)
params.critic_model_dir = "../../logs/models/DDPG_batchnorm-{}-seed{}/{}/critic-mu{}/".format(
    params.train_flg, params.seed, str(params.env_name.split("-")[0]), mu)
params.video_dir = "../../logs/video/DDPG_batchnorm-{}-seed{}/{}-mu{}/".format(
    params.train_flg, params.seed, str(params.env_name.split("-")[0]), mu)
params.plot_path = "../../logs/plots/DDPG_batchnorm-{}-seed{}/{}-mu{}/".format(
    params.train_flg, params.seed, str(params.env_name.split("-")[0]), mu)

env = gym.make(params.env_name)
env = Monitor(env, params.video_dir)

# set seed
env.seed(params.seed)
tf.random.set_random_seed(params.seed)

replay_buffer = ReplayBuffer(params.memory_size)
reward_buffer = deque(maxlen=params.reward_buffer_ep)
summary_writer = tf.contrib.summary.create_file_writer(params.log_dir)
random_process = OrnsteinUhlenbeckProcess(size=env.action_space.shape[0],
                                          theta=0.15,
                                          mu=params.mu,
                                          sigma=params.sigma)
# random_process = GaussianNoise(mu=params.mu, sigma=params.sigma)
agent = DDPG(Actor, Critic, env.action_space.shape[0], random_process, params)
train_DDPG_original(agent, env, replay_buffer, reward_buffer, summary_writer)
import numpy as np
from tf_rl.common.random_process import OrnsteinUhlenbeckProcess

random_process = OrnsteinUhlenbeckProcess(size=1, theta=0.15, mu=1.0, sigma=0.2)

res = list()
for i in range(1000):
    res.append(random_process.sample())
    print(np.mean(res))
mu = [0.0, 0.3, 0.6, 0.9]
sigma = [0.1, 0.3, 0.6, 0.9]
ticks = ["0.0", "0.3", "0.6", "0.9"]


def set_box_color(bp, color):
    plt.setp(bp['boxes'], color=color)
    plt.setp(bp['whiskers'], color=color)
    plt.setp(bp['caps'], color=color)
    plt.setp(bp['medians'], color=color)


temp_ou, temp_ga = list(), list()

for i in range(len(mu)):
    OU = OrnsteinUhlenbeckProcess(size=1, theta=0.15, mu=mu[i], sigma=0.2)
    ou = [OU.sample()[0] for _ in range(num_sample)]
    Gaussian = GaussianNoise(mu=0.3, sigma=sigma[i])
    gaussian = [Gaussian.sample() for _ in range(num_sample)]
    temp_ou.append(ou)
    temp_ga.append(gaussian)

ou = plt.boxplot(temp_ou, positions=np.arange(len(mu)) * 2.0 - 0.4, sym='')
ga = plt.boxplot(temp_ga, positions=np.arange(len(mu)) * 2.0 + 0.4, sym='')

set_box_color(ou, "#D7191C")  # colors are from http://colorbrewer2.org/
set_box_color(ga, "#2C7BB6")

# draw temporary red and blue lines and use them to create a legend
plt.plot([], c="#D7191C", label="OU Process")
plt.plot([], c="#2C7BB6", label="Gaussian")