コード例 #1
0
ファイル: evaluator.py プロジェクト: zeta1999/xingtian
    def start(self):
        """ run evaluator """
        _ag = AgentGroup(self.env_para, self.alg_para, self.agent_para)
        while True:
            recv_data = self.recv_broker.get()
            cmd = get_msg_info(recv_data, "cmd")
            logging.debug("evaluator get meg: {}".format(recv_data))
            if cmd not in ["eval"]:
                continue

            model_name = get_msg_data(recv_data)

            _ag.restore(model_name)  # fixme: load weight 'file' from the disk
            eval_data = _ag.evaluate(self.bm_eval.get("episodes_per_eval", 1))

            # return each rewards for each agent
            record_item = tuple([eval_data, model_name])
            print_immediately("collect eval results: {}".format(record_item))
            record_item = message(
                record_item,
                cmd="eval_result",
                broker_id=self.broker_id,
                test_id=self.test_id,
            )
            self.send_broker.send(record_item)
コード例 #2
0
    def infer_action(self, state, use_explore):
        """
        Infer an action with the `state`
        :param state:
        :param use_explore: Used True, in train, False in evaluate
        :return: action value
        """

        # if explore action
        if use_explore and random.random() < self.epsilon:
            action = np.random.randint(0, self.alg.action_dim)
        elif use_explore:  # explore with remote predict
            # Get Q values with deliver for each action.
            send_data = message(state, cmd="predict")
            self.send_explorer.send(send_data)
            action = self.recv_explorer.recv()
        else:  # don't explore, used in evaluate
            action = self.alg.predict(state)

        # update episode value
        if use_explore:
            self.epsilon -= 1.0 / self.episode_count
            self.epsilon = max(0.01, self.epsilon)

        # update transition data
        self.transition_data.update(
            {"cur_state": state, "action": action,}
        )

        return action
コード例 #3
0
    def _dist_model(self, dist_model_name=("none", "none"), save_index=-1):
        """dist model tool"""
        ctr_info = self.alg.dist_model_policy.get_dist_info(save_index)

        # Not do distribute model with empty list
        if isinstance(ctr_info, list):
            for _ctr in ctr_info:
                to_send_data = message(dist_model_name,
                                       cmd="dist_model",
                                       **_ctr)
                self.model_q.send(to_send_data)
        else:
            to_send_data = message(dist_model_name,
                                   cmd="dist_model",
                                   **ctr_info)
            self.model_q.send(to_send_data)
コード例 #4
0
 def put_test_model(self, model_name):
     """ send test model """
     key = self.get_avail_node()
     ctr_info = {"cmd": "eval", "broker_id": key[0], "test_id": key[1]}
     eval_cmd = message(model_name, **ctr_info)
     self.send_broker.send(eval_cmd)
     logging.debug("put evaluate model: {}".format(model_name))
     self.used_node[key] += 1
コード例 #5
0
    def create_evaluator(self, broker_id, test_id):
        """ create evaluator """
        config = deepcopy(self.config_info)
        config.update({"test_id": test_id})

        create_cmd = message(config,
                             cmd="create_evaluator",
                             broker_id=broker_id)
        self.send_broker.send(create_cmd)
コード例 #6
0
    def get_trajectory(self):
        for env_id in range(self.vector_env_size):
            for _data_key in ("cur_state", "logit", "action", "reward", "done",
                              "info"):
                self.trajectory[_data_key].extend(
                    self.sample_vector[env_id][_data_key])

        # merge data into env_num * seq_len
        for _data_key in self.trajectory:
            self.trajectory[_data_key] = np.stack(self.trajectory[_data_key])

        self.trajectory["action"].astype(np.int32)

        trajectory = message(self.trajectory.copy())
        set_msg_info(trajectory, agent_id=self.id)
        return trajectory
コード例 #7
0
    def handle_env_feedback(self, next_raw_state, reward, done, info, use_explore):

        self.transition_data.update({
            "next_state": next_raw_state,
            "reward": np.sign(reward) if use_explore else reward,
            "done": done,
            "info": info
        })

        # deliver this transition data to learner, trigger train process.
        if use_explore:
            train_data = {k: [v] for k, v in self.transition_data.items()}
            train_data = message(train_data, agent_id=self.id)
            self.send_explorer.send(train_data)

        return self.transition_data
コード例 #8
0
ファイル: agent_group.py プロジェクト: zeta1999/xingtian
    def explore(self, episode_count):
        """
        agent_num impact on the api about run interaction with environment.
            == 1: use standalone api, `run_one_episode`
            >= 2 and env.api_type == "standalone": agent.run_one_episode
            >= 2 and env.api_type == "unified": agent.do_one_interaction.

        :param episode_count:
        :return:
        """
        _start0 = time()
        model_name = self.agents[0].sync_model()  # fixme: async alg dummy
        self.ag_stats.wait_model_time = time() - _start0

        logging.debug("get sync model: {}".format(model_name))

        if isinstance(model_name, dict) or \
                (isinstance(model_name, list) and "none" not in model_name):
            _start1 = time()
            self.restore(model_name)
            self.ag_stats.restore_model_time = time() - _start1

        # single agent, always use the `run_one_episode` api.
        # multi agent with `standalone` api_type, use the `run_one_episode` api.
        if self.env_info["api_type"] == "standalone":
            # (use_explore, collect)
            _paras = [
                (True, False if _ag.alg.async_flag else True) for _ag in self.agents
            ]
            job_funcs = [agent.run_one_episode for agent in self.agents]
            for _epi_index in range(episode_count):
                _start2 = time()
                self.env.reset()
                for agent in self.agents:
                    agent.reset()

                trajectory_list = self.bot.do_multi_job(job_funcs, _paras)
                for _ag, trajectory in zip(self.agents, trajectory_list):
                    if not _ag.alg.async_flag:
                        self.trajectories.append(trajectory)
                        self.send_explorer.send(trajectory)

                self._post_processes()
                self.ag_stats.explore_time_in_epi = time() - _start2

                if _epi_index == episode_count - 1:
                    self.ag_stats.update_with_agent_stats(
                        [_a.get_perf_stats() for _a in self.agents]
                    )

        elif self.env_info["api_type"] == "unified":
            for _ in range(episode_count):
                _start2 = time()
                trajectories = self._run_one_unified_episode(
                    use_explore=True, collect=True
                )

                for _ag, trajectory in zip(self.agents, trajectories):
                    if not _ag.alg.async_flag:
                        self.trajectories.append(trajectory)
                        self.send_explorer.send(trajectory)

                self._post_processes()
                self.ag_stats.explore_time_in_epi = time() - _start2

        else:
            raise ValueError(
                "invalid 'api_type':{} from environment".format(self.env_info)
            )

        stats_info = self.ag_stats.get()
        stats_msg = message(stats_info, cmd="stats_msg")
        self.send_explorer.send(stats_msg)
コード例 #9
0
ファイル: explorer.py プロジェクト: zeta1999/xingtian
def setup_explorer(broker_master, config_info, env_id):
    config = deepcopy(config_info)
    config["env_para"].update({"env_id": env_id})

    msg = message(config, cmd="create_explorer")
    broker_master.recv_local_q.send(msg)
コード例 #10
0
 def stop(self):
     """ stop all system """
     close_cmd = message(None, cmd="close")
     self.recv_local_q.send(close_cmd)
コード例 #11
0
ファイル: agent.py プロジェクト: zeta1999/xingtian
 def get_trajectory(self):
     trajectory = message(self.trajectory.copy())
     set_msg_info(trajectory, agent_id=self.id)
     return trajectory