def main():
    env, _ = distdeepq.make_env("Pong")

    model = distdeepq.models.cnn_to_dist_mlp(convs=[(32, 8, 4), (64, 4, 2),
                                                    (64, 3, 1)],
                                             hiddens=[256],
                                             dueling=False)
    act = distdeepq.learn(env,
                          p_dist_func=model,
                          lr=1e-4,
                          max_timesteps=2000000,
                          buffer_size=10000,
                          exploration_fraction=0.1,
                          exploration_final_eps=0.01,
                          train_freq=4,
                          learning_starts=10000,
                          target_network_update_freq=1000,
                          gamma=0.99,
                          prioritized_replay=False,
                          dist_params={
                              'Vmin': -10,
                              'Vmax': 10,
                              'nb_atoms': 51
                          })
    act.save("pong_model.pkl")
    env.close()
def main():
    env = gym.make("CartPole-v0")
    env.seed(1337)
    np.random.seed(1337)
    tf.set_random_seed(1337)

    model = distdeepq.models.dist_mlp([64])
    act = distdeepq.learn(env,
                          p_dist_func=model,
                          lr=3e-4,
                          max_timesteps=100000,
                          buffer_size=50000,
                          exploration_fraction=0.1,
                          exploration_final_eps=0.02,
                          print_freq=10,
                          callback=callback,
                          target_network_update_freq=500,
                          batch_size=32,
                          gamma=0.95,
                          dist_params={
                              'Vmin': 0,
                              'Vmax': 25,
                              'nb_atoms': 11
                          })
    print("Saving model to cartpole_model.pkl")
    act.save("cartpole_model.pkl")
Exemple #3
0
def exp(env_name='CarRacing-v0',
        lr=1e-4,
        eps=0.0003125,
        max_timesteps=25e6,
        buffer_size=1e6,
        batch_size=32,
        exp_t1=1e6,
        exp_p1=0.1,
        exp_t2=25e6,
        exp_p2=0.01,
        train_freq=4,
        learning_starts=5e4,
        target_network_update_freq=1e4,
        gamma=0.99,
        num_cpu=50,
        nb_atoms=5,
        convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)],
        hiddens=[512],
        action_res=None):

    env = gym.make(env_name)
    env = wrapper_car_racing(env)  # frame stack
    # logger.configure(dir=os.path.join('.', datetime.datetime.now().strftime("openai-%Y-%m-%d-%H-%M-%S-%f")))
    logger.configure()

    n_action, action_map = get_action_information(env,
                                                  env_name,
                                                  action_res=action_res)

    model = distdeepq.models.cnn_to_dist_mlp(
        convs=convs,  # [(32, 8, 4), (64, 4, 2), (64, 3, 1)],
        hiddens=hiddens,  # [512],
        # n_action=n_action,
        dueling=False)
    act = distdeepq.learn(
        env,
        p_dist_func=model,
        lr=lr,  # 1e-4
        eps=eps,
        max_timesteps=int(max_timesteps),  # 25M
        buffer_size=int(buffer_size),  # 1M
        batch_size=int(batch_size),
        exp_t1=exp_t1,
        exp_p1=exp_p1,
        exp_t2=exp_t2,
        exp_p2=exp_p2,
        train_freq=train_freq,
        learning_starts=learning_starts,  # 50000
        target_network_update_freq=target_network_update_freq,  # 10000
        gamma=gamma,
        num_cpu=num_cpu,
        prioritized_replay=False,
        dist_params={'nb_atoms': nb_atoms},
        n_action=int(n_action),
        action_map=action_map)
    act.save("car_racing_model.pkl")
    env.close()
Exemple #4
0
def exp(env_name='Assault',  # default setting on server
        lr=2.5e-4,
        eps=0.0003125,
        max_timesteps=25e6,
        buffer_size=1e6,
        batch_size=32,
        exp_t1=1e6,
        exp_p1=0.1,
        exp_t2=25e6,
        exp_p2=0.01,
        train_freq=4,
        learning_starts=5e4,
        target_network_update_freq=1e4,
        gamma=0.99,
        num_cpu=50,
        nb_atoms=5,
        convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)],
        hiddens=[512],
        ):

    env, _ = distdeepq.make_env(env_name)

    # logging directory setting:
    # logger.configure(dir=os.path.join('.', datetime.datetime.now().strftime("openai-%Y-%m-%d-%H-%M-%S-%f"))) 
    # logging configuration: baselines/baselines/logger.py
    logger.configure()

    model = distdeepq.models.cnn_to_dist_mlp(
        convs=convs, #[(32, 8, 4), (64, 4, 2), (64, 3, 1)],
        hiddens=hiddens, #[512], #512
        dueling=False
    )
    act = distdeepq.learn(
        env,
        p_dist_func=model,
        lr=lr,  # 1e-4
        eps=eps,
        max_timesteps=int(max_timesteps), # 25M
        buffer_size=int(buffer_size), # 1M
        batch_size=int(batch_size),
        exp_t1=exp_t1,
        exp_p1=exp_p1,
        exp_t2=exp_t2,
        exp_p2=exp_p2,
        train_freq=train_freq,
        learning_starts=learning_starts, # 50000
        target_network_update_freq=target_network_update_freq, # 10000
        gamma=gamma,
        num_cpu=num_cpu,
        prioritized_replay=False,
        dist_params={'nb_atoms': nb_atoms}
    )
    act.save("assault_model.pkl")
    env.close()