Ejemplo n.º 1
0
def invoke_agent_env(params, alg):
    """Returns the wrapped env and string name of agent, then Use `eval(agent)` to activate it from main script
    """
    if params.mode == "Atari":
        env = wrap_deepmind(make_atari("{}NoFrameskip-v4".format(
            params.env_name, skip_frame_k=params.skip_frame_k)),
                            skip_frame_k=params.skip_frame_k)
        if params.debug_flg:
            agent = "{}_debug".format(alg)
        else:
            agent = "{}".format(alg)
    else:
        agent = "{}".format(alg)
        if params.mode == "CartPole":
            env = MyWrapper(gym.make("CartPole-v0"))
        elif params.mode == "CartPole-p":
            env = CartPole_Pixel(gym.make("CartPole-v0"))
    return agent, env
Ejemplo n.º 2
0
    try:
        os.system("rm -rf {}".format(logdirs.log_DQfD))
    except:
        pass

    parser = argparse.ArgumentParser()
    parser.add_argument("--mode", default="CartPole", help="game env type")
    parser.add_argument("--num_episodes",
                        default=100,
                        type=int,
                        help="game env type")
    args = parser.parse_args()

    if args.mode == "CartPole":
        env = MyWrapper(gym.make("CartPole-v0"))
    elif args.mode == "Atari":
        env = wrap_deepmind(make_atari("PongNoFrameskip-v4"))

    params = Parameters(algo="DQfD", mode=args.mode)
    params.num_episodes = args.num_episodes
    replay_buffer = PrioritizedReplayBuffer(
        params.memory_size, alpha=params.prioritized_replay_alpha)
    Beta = AnnealingSchedule(start=params.prioritized_replay_beta_start,
                             end=params.prioritized_replay_beta_end,
                             decay_steps=params.decay_steps)
    agent = DQfD(args.mode, Model, Model, env.action_space.n, params,
                 logdirs.model_DQN)
    if params.policy_fn == "Eps":
        Epsilon = AnnealingSchedule(start=params.epsilon_start,
                                    end=params.epsilon_end,
                              decay_type="linear")
optimizer = tf.train.RMSPropOptimizer(anneal_lr.get_value(), 0.99, 0.0, 1e-6)

if params.loss_fn == "huber":
    loss_fn = tf.losses.huber_loss
elif params.loss_fn == "mse":
    loss_fn = tf.losses.mean_squared_error
else:
    assert False, "Choose the loss_fn from either huber or mse"

grad_clip_fn = gradient_clip_fn(flag=params.grad_clip_flg)
summary_writer = tf.contrib.summary.create_file_writer(params.log_dir)
now = datetime.now()

if params.mode == "CartPole":
    env = MyWrapper(gym.make("CartPole-v0"))
    params.log_dir = "../../logs/logs/" + now.strftime(
        "%Y%m%d-%H%M%S") + "-DDDP/"
    params.model_dir = "../../logs/models/" + now.strftime(
        "%Y%m%d-%H%M%S") + "-DDDP/"
    agent = Double_DQN_cartpole(Model, optimizer, loss_fn, grad_clip_fn,
                                env.action_space.n, params)
elif params.mode == "CartPole-p":
    env = CartPole_Pixel(gym.make("CartPole-v0"))
    params.log_dir = "../../logs/logs/" + now.strftime(
        "%Y%m%d-%H%M%S") + "-DDDP-p/"
    params.model_dir = "../../logs/models/" + now.strftime(
        "%Y%m%d-%H%M%S") + "-DDDP-p/"
    agent = Double_DQN(Model_p, optimizer, loss_fn, grad_clip_fn,
                       env.action_space.n, params)
Ejemplo n.º 4
0
                        default="../../logs/models/DQN/",
                        help="directory for trained model")
    parser.add_argument("--debug_flg",
                        default=False,
                        type=bool,
                        help="debug mode or not")
    parser.add_argument("--google_colab",
                        default=False,
                        type=bool,
                        help="if you are executing this on GoogleColab")
    params = parser.parse_args()
    params.goal = 195
    params.test_episodes = 10

    if params.mode == "CartPole":
        env = MyWrapper(gym.make("CartPole-v0"))
    elif params.mode == "CartPole-p":
        env = CartPole_Pixel(gym.make("CartPole-v0"))

    if params.google_colab:
        # mount your drive on google colab
        from google.colab import drive

        drive.mount("/content/gdrive")
        params.log_dir = "/content/gdrive/My Drive/logs/logs/DQN/{}".format(
            params.env_name)
        params.model_dir = "/content/gdrive/My Drive/logs/models/DQN/{}".format(
            params.env_name)
        os.makedirs(params.log_dir)
        os.makedirs(params.model_dir)
        assert os.path.isdir(