Example #1
0
# warm up should be less than one epoch
c.ddpg_update_batch_size = 100
c.ddpg_warmup_steps = 200
c.model_save_int = 100  # in episodes
c.profile_int = 50  # in episodes


def policy_noise(action):
    return t.clamp(
        add_clipped_normal_noise_to_action(action, c.policy_noise_params), -1,
        1)


if __name__ == "__main__":
    save_env = SaveEnv(c.root_dir, restart_use_trial=c.restart_from_trial)
    prep_args(c, save_env)

    # save_env.remove_trials_older_than(diff_hour=1)
    global_board.init(save_env.get_trial_train_log_dir())
    writer = global_board.writer
    logger.info("Directories prepared.")

    actor = MW(
        Actor(observe_dim, action_dim, 1).to(c.device), c.device, c.device)
    actor_t = MW(
        Actor(observe_dim, action_dim, 1).to(c.device), c.device, c.device)
    critic = MW(
        Critic(observe_dim, action_dim).to(c.device), c.device, c.device)
    critic_t = MW(
        Critic(observe_dim, action_dim).to(c.device), c.device, c.device)
    critic2 = MW(
Example #2
0
load_framework2 = "naive_ppo_parallel"
load_trial1 = "2020_05_06_21_50_57"
load_trial2 = "2020_05_06_21_50_57"
test_root_dir = ""


def load_framework(name):
    module = imp.import_module(".magent_" + name)
    return module.c, module.create_models, module.run_agents


if __name__ == "__main__":
    c1, create_models1, run_agents1 = load_framework(load_framework1)
    c2, create_models2, run_agents2 = load_framework(load_framework1)
    save_env1 = SaveEnv(c1.root_dir, restart_use_trial=load_trial1)
    prep_args(c1, save_env1)
    save_env2 = SaveEnv(c2.root_dir, restart_use_trial=load_trial2)
    prep_args(c2, save_env2)

    c1.restart_from_trial = load_trial1
    framework1 = create_models1()
    logger.info("Framework 1 initialized")

    c2.restart_from_trial = load_trial2
    framework2 = create_models2()
    logger.info("Framework 2 initialized")

    operators = [(framework1, run_agents1, load_framework1),
                 (framework2, run_agents2, load_framework2)]

    # testing