Exemple #1
0
    def run(self,
            agent,
            num_rollouts,
            env_wrapper,
            env_name,
            num_steps=None,
            require_frame=False,
            require_trajectory=False,
            require_extra_info=False,
            require_full_frame=False,
            require_env_state=False,
            render_mode="rgb_array"):
        assert isinstance(agent, SymbolicAgentBase)

        if self.existing_agent is not None:
            real_agent = agent.get(self.existing_agent)['agent']
        else:
            real_agent = agent.get()['agent']
            self.existing_agent = real_agent

        logger.debug("SymbolicAgent <{}> is restored.".format(agent.name))

        env = get_env_maker(env_name)(seed=0)
        if env_wrapper is not None:
            env = env_wrapper(env)

        ret_list = []
        for i in range(num_rollouts):
            ret = rollout(real_agent, env, env_name, num_steps, require_frame,
                          require_trajectory, require_extra_info,
                          require_full_frame, require_env_state, render_mode)
            ret_list.append(ret)

        agent.clear()
        return copy.deepcopy(ret_list), copy.deepcopy(agent)
Exemple #2
0
    def collect_frames(self,
                       num_steps,
                       run_name,
                       env_name,
                       config,
                       ckpt,
                       require_full_frame=False,
                       render_mode="rgb_array",
                       ideal_steps=None,
                       random_seed=False):
        agent = restore_agent(run_name, ckpt, env_name, config)
        # if ideal_steps is not None:
        tmp_frames = []
        tmp_extra_info = []

        # We allow 10 attemps.
        for i in range(10):
            if random_seed:
                seed = np.random.randint(0, 10000)
            else:
                seed = i
            env_maker = get_env_maker(env_name, require_render=True)
            env = env_maker(seed=seed)
            result = rollout(agent,
                             env,
                             env_name,
                             num_steps,
                             require_frame=True,
                             require_full_frame=require_full_frame,
                             render_mode=render_mode)
            frames, extra_info = result['frames'], result['frame_extra_info']

            if len(frames) > len(tmp_frames):
                tmp_frames = copy.deepcopy(frames)
                tmp_extra_info = copy.deepcopy(extra_info)

            if (ideal_steps is None) or (len(frames) > ideal_steps):
                frames = tmp_frames
                extra_info = tmp_extra_info
                break
            else:
                print("In collect_frames, current frame length is {} and "
                      "we expect length {}. So we rerun the rollout "
                      "with different seed {}."
                      " Current length of potential 'frames' is {}".format(
                          len(frames), ideal_steps, i + 1, len(tmp_frames)))
        env.close()
        # agent.stop()
        return frames, extra_info
Exemple #3
0
def restore_policy_with_mask(run_name, ckpt, env_name, extra_config=None):
    tf = try_import_tf()
    Graph = tf.Graph

    assert run_name == "PPO"
    register_fc_with_mask()
    env = get_env_maker(env_name)()
    with Graph().as_default():
        # This is a workaround to avoid variable multiple init.
        p = PPOTFPolicyWithMask(env.observation_space, env.action_space,
                                ppo_agent_default_config_with_mask)
        if ckpt is not None:
            path = os.path.abspath(os.path.expanduser(ckpt))
            wkload = pickle.load(open(path, 'rb'))['worker']
            state = pickle.loads(wkload)['state']['default_policy']
            p.set_state(state)
    return path
Exemple #4
0
    def generate_frames_from_agent(self,
                                   agent,
                                   agent_name,
                                   num_steps=None,
                                   seed=0,
                                   render_mode="rgb_array",
                                   require_trajectory=False,
                                   ideal_steps=None):
        config = agent.config
        env_name = config["env"]
        env = get_env_maker(env_name, require_render=True)()
        if seed is not None:
            assert isinstance(seed, int)
            env.seed(seed)

        for iteration in range(10):

            result = copy.deepcopy(
                rollout(agent,
                        env,
                        env_name,
                        num_steps,
                        require_frame=True,
                        require_trajectory=require_trajectory,
                        require_full_frame=self.require_full_frame,
                        render_mode=render_mode))
            frames, extra_info = result['frames'], result['frame_extra_info']
            if require_trajectory:
                extra_info['trajectory'] = result['trajectory']

            if ideal_steps is None:
                break
            elif len(frames) > ideal_steps:
                break

        env.close()
        # agent.stop()
        period_info = extra_info['period_info']
        if period_info:
            period_source = np.stack(period_info)
            period = get_period(period_source, self.fps)
            print("period for agent <{}> is {}, its len is {}".format(
                agent_name, period, len(frames)))
        else:
            period = 100
        frames_info = {
            "frames": frames,
            "column": None,
            "row": None,
            "loc": None,
            "period": period
        }
        return_dict = {agent_name: frames_info}
        extra_info_dict = PRESET_INFORMATION_DICT.copy()
        for key, val in extra_info.items():
            if key in extra_info_dict:
                extra_info_dict[key][agent_name] = val
            elif key == "vf_preds":
                extra_info_dict["value_function"][agent_name] = val
            elif key == "trajectory" and require_trajectory:
                if "trajectory" in extra_info_dict:
                    extra_info_dict["trajectory"][agent_name] = val
                else:
                    extra_info_dict["trajectory"] = {agent_name: val}
        extra_info_dict['title'][agent_name] = agent_name

        new_extra_info_dict = PRESET_INFORMATION_DICT.copy()
        for key in PRESET_INFORMATION_DICT.keys():
            new_extra_info_dict[key].update(extra_info_dict[key])

        if require_trajectory:
            new_extra_info_dict["trajectory"] = extra_info_dict["trajectory"]

        new_extra_info_dict['frame_info'] = {
            "width": frames[0].shape[1],
            "height": frames[0].shape[0]
        }

        return return_dict, new_extra_info_dict
Exemple #5
0
def get_fft_representation(
        name_ckpt_mapping,
        num_seeds,
        num_rollouts,
        padding="fix",
        padding_length=500,
        padding_value=0,
        stack=False,
        normalize="range",
        num_workers=10
):
    initialize_ray()

    data_frame_dict = {}
    representation_dict = {}

    num_agents = len(name_ckpt_mapping)

    num_iteration = int(ceil(num_agents / num_workers))

    agent_ckpt_dict_range = list(name_ckpt_mapping.items())
    agent_count = 1
    agent_count_get = 1

    workers = [FFTWorker.remote() for _ in range(num_workers)]
    now_t_get = now_t = start_t = time.time()

    for iteration in range(num_iteration):
        start = iteration * num_workers
        end = min((iteration + 1) * num_workers, num_agents)
        df_obj_ids = []
        for i, (name, ckpt_dict) in enumerate(agent_ckpt_dict_range[start:end]
                                              ):
            ckpt = ckpt_dict["path"]
            env_name = ckpt_dict["env_name"]
            run_name = ckpt_dict["run_name"]
            env_maker = get_env_maker(env_name)
            workers[i].reset.remote(
                run_name=run_name,
                ckpt=ckpt,
                num_rollouts=num_rollouts,
                env_name=env_name,
                env_maker=env_maker,
                agent_name=name,
                padding=padding,
                padding_length=padding_length,
                padding_value=padding_value,
                worker_name="Worker{}".format(i)
            )

            df_obj_id = workers[i].fft.remote(
                normalize=normalize,
                _extra_name="[{}/{}] ".format(agent_count, num_agents)
            )

            print(
                "[{}/{}] (+{:.1f}s/{:.1f}s) Start collecting data from agent "
                "<{}>".format(
                    agent_count, num_agents,
                    time.time() - now_t,
                    time.time() - start_t, name
                )
            )
            agent_count += 1
            now_t = time.time()
            df_obj_ids.append(df_obj_id)

        for df_obj_id, (name, _) in zip(df_obj_ids,
                                        agent_ckpt_dict_range[start:end]):
            df, rep = copy.deepcopy(ray.get(df_obj_id))
            data_frame_dict[name] = df
            representation_dict[name] = rep
            print(
                "[{}/{}] (+{:.1f}s/{:.1f}s) Got data from agent <{}>".format(
                    agent_count_get, num_agents,
                    time.time() - now_t_get,
                    time.time() - start_t, name
                )
            )
            agent_count_get += 1
            now_t_get = time.time()
    return data_frame_dict, representation_dict
Exemple #6
0
def several_agent_rollout(yaml_path,
                          num_rollouts,
                          seed=0,
                          num_workers=10,
                          force_rewrite=False,
                          return_data=False,
                          require_activation=True,
                          _num_agents=None):
    name_ckpt_mapping = read_yaml(yaml_path, number=_num_agents)
    now_t_get = now_t = start_t = time.time()
    num_agents = len(name_ckpt_mapping)
    num_iteration = int(ceil(num_agents / num_workers))
    agent_ckpt_dict_range = list(name_ckpt_mapping.items())
    agent_count = 1
    agent_count_get = 1

    have_gpu = has_gpu()
    workers = [
        RolloutWorkerWrapper.as_remote(
            num_gpus=0.2 if have_gpu else 0).remote(force_rewrite)
        for _ in range(num_workers)
    ]

    return_dict = {}

    for iteration in range(num_iteration):
        start = iteration * num_workers
        end = min((iteration + 1) * num_workers, num_agents)
        # obj_ids = []
        # workers = []
        obj_ids_dict = {}
        for i, (name, ckpt_dict) in \
                enumerate(agent_ckpt_dict_range[start:end]):
            ckpt = ckpt_dict["path"]
            env_name = ckpt_dict["env_name"]
            env_maker = get_env_maker(env_name)
            run_name = ckpt_dict["run_name"]
            assert run_name == "PPO"

            # TODO Only support PPO now.
            workers[i].reset.remote(ckpt=ckpt,
                                    num_rollouts=num_rollouts,
                                    seed=seed,
                                    env_creater=env_maker,
                                    run_name=run_name,
                                    env_name=env_name,
                                    require_activation=require_activation)
            obj_id = workers[i].wrap_sample.remote()
            obj_ids_dict[name] = obj_id
            print("[{}/{}] (+{:.1f}s/{:.1f}s) Start collect {} rollouts from "
                  "agent"
                  " <{}>".format(agent_count, num_agents,
                                 time.time() - now_t,
                                 time.time() - start_t, num_rollouts, name))

            agent_count += 1
            now_t = time.time()

        for (name, obj_id), worker in zip(obj_ids_dict.items(), workers):
            trajectory_list = copy.deepcopy(ray.get(obj_id))
            # for obj_id in obj_ids:
            #     trajectory_list.append(ray.get(obj_id))
            return_dict[name] = trajectory_list
            # worker.close.remote()
            print("[{}/{}] (+{:.1f}s/{:.1f}s) Collected {} rollouts from agent"
                  " <{}>".format(agent_count_get, num_agents,
                                 time.time() - now_t_get,
                                 time.time() - start_t, num_rollouts, name))
            agent_count_get += 1
            now_t_get = time.time()
    return return_dict if return_data else None