예제 #1
0
def generate_json(yaml_path, name_path_dict, output_dir):
    """
    name_path_dict = {
        agent_name: {
            mode: data/vis/exp/agent/gif
        }
    }
    """
    def modify(mode_path_dict):
        return {
            mode: osp.join(*path.split("/")[3:])
            for mode, path in mode_path_dict.items()
        }

    name_ckpt_dict = read_yaml(yaml_path)
    json_dict = {}
    for agent_name, info in name_ckpt_dict.items():
        json_dict[agent_name] = {
            "info": info,
            "gif_path": modify(name_path_dict[agent_name]),
            "name": agent_name
        }
    json_path = osp.join(output_dir, JSON_FILE_NAME)
    with open(json_path, 'w') as f:
        json.dump(json_dict, f)
    return json_path
예제 #2
0
def train_one_iteration(
        iter_id,
        exp_name,
        init_yaml_path,
        config,
        stop_criterion,
        num_seeds=1,
        num_gpus=0,
        test_mode=False
):
    assert isinstance(iter_id, int)
    assert isinstance(exp_name, str)
    assert isinstance(stop_criterion, dict)
    assert isinstance(init_yaml_path, str)
    assert osp.exists(init_yaml_path)

    local_dir = get_local_dir() if get_local_dir() else "~/ray_results"
    local_dir = os.path.expanduser(local_dir)
    save_path = os.path.join(local_dir, exp_name)
    current_yaml_path = init_yaml_path

    assert 'seed' not in exp_name, exp_name
    assert 'iter' not in exp_name, exp_name

    for i in range(num_seeds):
        input_exp_name = exp_name + "_seed{}_iter{}".format(i, iter_id)

        tmp_config = copy.deepcopy(config)
        tmp_config.update(seed=i)
        tmp_config['env_config']['yaml_path'] = current_yaml_path
        initialize_ray(
            num_gpus=num_gpus, test_mode=test_mode, local_mode=test_mode
        )
        tune.run(
            "PPO",
            name=input_exp_name,
            verbose=2 if test_mode else 1,
            local_dir=save_path,
            checkpoint_freq=10,
            checkpoint_at_end=True,
            stop=stop_criterion,
            config=tmp_config
        )

        name_ckpt_mapping = read_yaml(current_yaml_path)
        ckpt_path = _search_ckpt(save_path, input_exp_name)

        last_ckpt_dict = copy.deepcopy(list(name_ckpt_mapping.values())[-1])
        assert isinstance(last_ckpt_dict, dict), last_ckpt_dict
        assert 'path' in last_ckpt_dict, last_ckpt_dict
        last_ckpt_dict.update(path=ckpt_path)

        print("Finish the current last_ckpt_dict: ", last_ckpt_dict)
        name_ckpt_mapping[input_exp_name] = last_ckpt_dict

        current_yaml_path = osp.join(save_path, "post_agent_ppo.yaml")
        out = save_yaml(name_ckpt_mapping, current_yaml_path)
        assert out == current_yaml_path
예제 #3
0
def generate_single_video(yaml_path, output_path):
    assert yaml_path.endswith(".yaml")
    name_ckpt_mapping = read_yaml(yaml_path, 1)
    path = generate_grid_of_videos(name_ckpt_mapping,
                                   output_path,
                                   name_callback=lambda x, y=None: x,
                                   require_full_frame=True,
                                   require_text=False)
    print("Successfully generated video at: ", path)
    return path
예제 #4
0
def test_generate_two_videos2():
    name_ckpt_mapping = read_yaml("../../data/yaml/test-2-agents.yaml", 2)
    path = generate_grid_of_videos(
        name_ckpt_mapping,
        "/tmp/test_double_agent",
        name_callback=lambda x, y=None: x,
        require_full_frame=True,
        require_text=False,
    )

    print("test finish: ", path)
예제 #5
0
def symbolic_agent_rollout(
        yaml_path,
        num_agents,
        num_rollouts,
        num_workers,
        num_children,
        normal_std,
        normal_mean,
        dir_name,
        clear_at_end=True,
        store=True,
        mask_mode="multiply"
):
    assert ray.is_initialized()

    file_name = osp.join(
        dir_name, "{}agents_{}rollouts_{}children_{}mean_{}std.pkl".format(
            num_agents, num_rollouts, num_children, normal_mean, normal_std
        )
    )

    if os.path.exists(file_name):
        logger.warning(
            "File Detected! We will load rollout results from <{}>".
            format(file_name)
        )
        with open(file_name, 'rb') as f:
            rollout_ret = pickle.load(f)
        return rollout_ret, file_name

    name_ckpt_mapping = read_yaml(yaml_path, number=num_agents, mode="uniform")
    master_agents = OrderedDict()

    for name, ckpt in name_ckpt_mapping.items():
        agent = MaskSymbolicAgent(ckpt, mask_mode=mask_mode)
        master_agents[name] = agent

    spawned_agents = OrderedDict()

    for i, (name, master_agent) in \
            enumerate(master_agents.items()):

        child_name = name + " child=0"

        spawned_agents[child_name] = copy.deepcopy(master_agent)

        master_agent_ckpt = master_agent.agent_info

        for index in range(1, 1 + num_children):
            child_name = name + " child={}".format(index)
            callback_info = {
                "method": 'normal',
                'mean': normal_mean,
                "std": normal_std,
                "seed": index + i * 100
            }

            spawned_agents[child_name] = \
                MaskSymbolicAgent(master_agent_ckpt, callback_info,
                                  name=child_name, mask_mode=mask_mode)

    rollout_ret = quick_rollout_from_symbolic_agents(
        spawned_agents, num_rollouts, num_workers
    )

    if clear_at_end:
        for k, a in spawned_agents.items():
            a.clear()

    os.makedirs(dir_name, exist_ok=True)

    if store:
        dump_obj = rollout_ret
        with open(file_name, 'wb') as f:
            pickle.dump(dump_obj, f)

    return rollout_ret, file_name
예제 #6
0
def several_agent_replay(
        yaml_path,
        obs,
        # num_rollouts,
        seed=0,
        num_workers=10,
        _num_agents=None
    # force_rewrite=False,
    # return_data=False
):
    name_ckpt_mapping = read_yaml(yaml_path, number=_num_agents)
    now_t_get = now_t = start_t = time.time()
    num_agents = len(name_ckpt_mapping)
    num_iteration = int(ceil(num_agents / num_workers))
    agent_ckpt_dict_range = list(name_ckpt_mapping.items())
    agent_count = 1
    agent_count_get = 1

    have_gpu = has_gpu()
    return_dict = {}

    for iteration in range(num_iteration):
        start = iteration * num_workers
        end = min((iteration + 1) * num_workers, num_agents)
        # obj_ids = []
        # workers = []
        obj_ids_dict = {}
        for i, (name, ckpt_dict) in \
                enumerate(agent_ckpt_dict_range[start:end]):
            ckpt = ckpt_dict["path"]
            env_name = ckpt_dict["env_name"]
            # if "env_name" in ckpt_dict else "BipedalWalker-v2"
            # env_maker = ENV_MAKER_LOOKUP[env_name]
            run_name = ckpt_dict["run_name"]
            # if "run_name" in ckpt_dict else "PPO"
            assert run_name == "PPO"

            if have_gpu:
                obj_id = remote_replay_gpu.remote(obs, run_name, ckpt,
                                                  env_name)
            else:
                obj_id = remote_replay_cpu.remote(obs, run_name, ckpt,
                                                  env_name)
            obj_ids_dict[name] = obj_id

            print("[{}/{}] (+{:.1f}s/{:.1f}s) Start collect replay result"
                  " of {} samples from agent <{}>".format(
                      agent_count, num_agents,
                      time.time() - now_t,
                      time.time() - start_t, obs.shape, name))

            agent_count += 1
            now_t = time.time()

        for agent_name, obj_id in obj_ids_dict.items():
            act, infos = copy.deepcopy(ray.get(obj_id))
            return_dict[agent_name] = {"act": act, "infos": infos}

            # trajectory_list = []
            # for obj_id in obj_ids:
            #     trajectory_list.append(ray.get(obj_id))
            # return_dict[name] = trajectory_list
            # worker.close.remote()
            print("[{}/{}] (+{:.1f}s/{:.1f}s) Collected output of {} samples "
                  "from agent <{}>".format(agent_count_get, num_agents,
                                           time.time() - now_t_get,
                                           time.time() - start_t, obs.shape,
                                           agent_name))
            agent_count_get += 1
            now_t_get = time.time()
    return return_dict
예제 #7
0
def several_agent_rollout(yaml_path,
                          num_rollouts,
                          seed=0,
                          num_workers=10,
                          force_rewrite=False,
                          return_data=False,
                          require_activation=True,
                          _num_agents=None):
    name_ckpt_mapping = read_yaml(yaml_path, number=_num_agents)
    now_t_get = now_t = start_t = time.time()
    num_agents = len(name_ckpt_mapping)
    num_iteration = int(ceil(num_agents / num_workers))
    agent_ckpt_dict_range = list(name_ckpt_mapping.items())
    agent_count = 1
    agent_count_get = 1

    have_gpu = has_gpu()
    workers = [
        RolloutWorkerWrapper.as_remote(
            num_gpus=0.2 if have_gpu else 0).remote(force_rewrite)
        for _ in range(num_workers)
    ]

    return_dict = {}

    for iteration in range(num_iteration):
        start = iteration * num_workers
        end = min((iteration + 1) * num_workers, num_agents)
        # obj_ids = []
        # workers = []
        obj_ids_dict = {}
        for i, (name, ckpt_dict) in \
                enumerate(agent_ckpt_dict_range[start:end]):
            ckpt = ckpt_dict["path"]
            env_name = ckpt_dict["env_name"]
            env_maker = get_env_maker(env_name)
            run_name = ckpt_dict["run_name"]
            assert run_name == "PPO"

            # TODO Only support PPO now.
            workers[i].reset.remote(ckpt=ckpt,
                                    num_rollouts=num_rollouts,
                                    seed=seed,
                                    env_creater=env_maker,
                                    run_name=run_name,
                                    env_name=env_name,
                                    require_activation=require_activation)
            obj_id = workers[i].wrap_sample.remote()
            obj_ids_dict[name] = obj_id
            print("[{}/{}] (+{:.1f}s/{:.1f}s) Start collect {} rollouts from "
                  "agent"
                  " <{}>".format(agent_count, num_agents,
                                 time.time() - now_t,
                                 time.time() - start_t, num_rollouts, name))

            agent_count += 1
            now_t = time.time()

        for (name, obj_id), worker in zip(obj_ids_dict.items(), workers):
            trajectory_list = copy.deepcopy(ray.get(obj_id))
            # for obj_id in obj_ids:
            #     trajectory_list.append(ray.get(obj_id))
            return_dict[name] = trajectory_list
            # worker.close.remote()
            print("[{}/{}] (+{:.1f}s/{:.1f}s) Collected {} rollouts from agent"
                  " <{}>".format(agent_count_get, num_agents,
                                 time.time() - now_t_get,
                                 time.time() - start_t, num_rollouts, name))
            agent_count_get += 1
            now_t_get = time.time()
    return return_dict if return_data else None