예제 #1
0
def train_one_iteration(
        iter_id,
        exp_name,
        init_yaml_path,
        config,
        stop_criterion,
        num_seeds=1,
        num_gpus=0,
        test_mode=False
):
    assert isinstance(iter_id, int)
    assert isinstance(exp_name, str)
    assert isinstance(stop_criterion, dict)
    assert isinstance(init_yaml_path, str)
    assert osp.exists(init_yaml_path)

    local_dir = get_local_dir() if get_local_dir() else "~/ray_results"
    local_dir = os.path.expanduser(local_dir)
    save_path = os.path.join(local_dir, exp_name)
    current_yaml_path = init_yaml_path

    assert 'seed' not in exp_name, exp_name
    assert 'iter' not in exp_name, exp_name

    for i in range(num_seeds):
        input_exp_name = exp_name + "_seed{}_iter{}".format(i, iter_id)

        tmp_config = copy.deepcopy(config)
        tmp_config.update(seed=i)
        tmp_config['env_config']['yaml_path'] = current_yaml_path
        initialize_ray(
            num_gpus=num_gpus, test_mode=test_mode, local_mode=test_mode
        )
        tune.run(
            "PPO",
            name=input_exp_name,
            verbose=2 if test_mode else 1,
            local_dir=save_path,
            checkpoint_freq=10,
            checkpoint_at_end=True,
            stop=stop_criterion,
            config=tmp_config
        )

        name_ckpt_mapping = read_yaml(current_yaml_path)
        ckpt_path = _search_ckpt(save_path, input_exp_name)

        last_ckpt_dict = copy.deepcopy(list(name_ckpt_mapping.values())[-1])
        assert isinstance(last_ckpt_dict, dict), last_ckpt_dict
        assert 'path' in last_ckpt_dict, last_ckpt_dict
        last_ckpt_dict.update(path=ckpt_path)

        print("Finish the current last_ckpt_dict: ", last_ckpt_dict)
        name_ckpt_mapping[input_exp_name] = last_ckpt_dict

        current_yaml_path = osp.join(save_path, "post_agent_ppo.yaml")
        out = save_yaml(name_ckpt_mapping, current_yaml_path)
        assert out == current_yaml_path
예제 #2
0
def test_marl_individual_ppo(extra_config, local_mode=True, test_mode=True):
    num_gpus = 0
    exp_name = "test_marl_individual_ppo"
    env_name = "BipedalWalker-v2"
    num_iters = 50
    num_agents = 8

    initialize_ray(
        test_mode=test_mode, num_gpus=num_gpus, local_mode=local_mode
    )

    tmp_env = get_env_maker(env_name)()

    default_policy = (
        None, tmp_env.observation_space, tmp_env.action_space, {}
    )

    policy_names = ["ppo_agent{}".format(i) for i in range(num_agents)]

    def policy_mapping_fn(aid):
        # print("input aid: ", aid)
        return aid

    config = {
        "env": MultiAgentEnvWrapper,
        "env_config": {
            "env_name": env_name,
            "agent_ids": policy_names
        },
        "log_level": "DEBUG",
        "num_gpus": num_gpus,
        "multiagent": {
            "policies": {i: default_policy
                         for i in policy_names},
            "policy_mapping_fn": policy_mapping_fn,
        },
    }

    if isinstance(extra_config, dict):
        config.update(extra_config)

    tune.run(
        "PPO",
        local_dir=get_local_dir(),
        name=exp_name,
        checkpoint_at_end=True,
        checkpoint_freq=10,
        stop={"training_iteration": num_iters},
        config=config,
    )
예제 #3
0
        "log_level": "DEBUG" if args.test_mode else "ERROR",
        # "num_gpus": 0.45,
        "num_gpus": 1,
        "num_cpus_per_worker": 2,
        "num_cpus_for_driver": 1,
        "num_envs_per_worker": 16,
        "sample_batch_size": 256,
        "multiagent": {
            "policies": {i: default_policy
                         for i in policy_names},
            "policy_mapping_fn": lambda aid: aid,
        },
        "callbacks": {
            "on_train_result": on_train_result
        },
        "num_sgd_iter": 10,
        "seed": tune.grid_search(list(range(args.num_seeds)))
        if args.num_seeds != 0 else 0
    }
    config.update(run_specify_config[run_name])

    tune.run(
        run_dict[run_name],
        local_dir=get_local_dir(),
        name=exp_name,
        checkpoint_at_end=True,
        checkpoint_freq=10,
        stop=run_specify_stop[run_name],
        config=config,
    )