Beispiel #1
0
c = Config()
# c.restart_from_trial = "2020_05_09_15_00_31"
c.max_episodes = 50000
c.max_steps = 1000
c.replay_size = 50000

c.agent_num = 1
c.device = "cuda:0"
c.root_dir = "/data/AI/tmp/multi_agent/mcarrier/naive_ppo_parallel/"

# train configs
# lr: learning rate, int: interval
c.workers = 5
c.discount = 0.99
c.learning_rate = 3e-4
c.entropy_weight = None
c.ppo_update_batch_size = 100
c.ppo_update_times = 50
c.ppo_update_int = 5  # = the number of episodes stored in ppo replay buffer
c.model_save_int = c.ppo_update_int * 20  # in episodes
c.profile_int = 50  # in episodes

if __name__ == "__main__":
    save_env = SaveEnv(c.root_dir, restart_use_trial=c.restart_from_trial)
    prep_args(c, save_env)

    # save_env.remove_trials_older_than(diff_hour=1)
    global_board.init(save_env.get_trial_train_log_dir())
    writer = global_board.writer
    logger.info("Directories prepared.")
Beispiel #2
0
# configs
c = Config()
# c.restart_from_trial = "2020_05_09_15_00_31"
c.max_episodes = 50000
c.max_steps = 300
c.replay_size = 10000

c.device = "cuda:0"
c.root_dir = "/data/AI/tmp/multi_agent/lunar_lander/naive_ppo/"

# train configs
# lr: learning rate, int: interval
c.discount = 0.99
c.learning_rate = 1e-3
c.entropy_weight = 1e-2
c.ppo_update_batch_size = 100
c.ppo_update_times = 4
c.ppo_update_int = 6  # = the number of episodes stored in ppo replay buffer
c.model_save_int = 100  # in episodes
c.profile_int = 50  # in episodes

if __name__ == "__main__":
    save_env = SaveEnv(c.root_dir, restart_use_trial=c.restart_from_trial)
    prep_args(c, save_env)

    # save_env.remove_trials_older_than(diff_hour=1)
    global_board.init(save_env.get_trial_train_log_dir())
    writer = global_board.writer
    logger.info("Directories prepared.")