Ejemplo n.º 1
0
    def start(self):
        """ run evaluator """
        _ag = AgentGroup(self.env_para, self.alg_para, self.agent_para)
        while True:
            recv_data = self.recv_broker.get()
            cmd = get_msg_info(recv_data, "cmd")
            logging.debug("evaluator get meg: {}".format(recv_data))
            if cmd not in ["eval"]:
                continue

            model_name = get_msg_data(recv_data)

            _ag.restore(model_name)  # fixme: load weight 'file' from the disk
            eval_data = _ag.evaluate(self.bm_eval.get("episodes_per_eval", 1))

            # return each rewards for each agent
            record_item = tuple([eval_data, model_name])
            print_immediately("collect eval results: {}".format(record_item))
            record_item = message(
                record_item,
                cmd="eval_result",
                broker_id=self.broker_id,
                test_id=self.test_id,
            )
            self.send_broker.send(record_item)
Ejemplo n.º 2
0
    def start(self):
        """Run evaluator."""
        setproctitle.setproctitle("xt_evaluator")

        _ags = AgentGroup(self.env_para,
                          self.alg_para,
                          self.agent_para,
                          scene="evaluate")
        while True:
            recv_data = self.recv_broker.get()
            cmd = get_msg_info(recv_data, "cmd")
            logging.debug("evaluator get meg: {}".format(type(recv_data)))
            if cmd in ("close", ):
                break

            if cmd not in ["eval"]:
                print_immediately("eval get un-used data:{}".format(recv_data))
                continue

            # print_immediately("recv_data in evaluator: {}".format(
            #     [v.keys() for v in recv_data["data"].values()]))

            for train_count, weights in recv_data["data"].items():
                _ags.restore(weights, is_id=False)
                eval_data = _ags.evaluate(
                    self.bm_eval.get("episodes_per_eval", 1))

                # return each rewards for each agent
                record_item = tuple([
                    eval_data, {
                        "train_count": train_count,
                        "broker_id": self.broker_id,
                        "test_id": self.test_id
                    }
                ])
                print_immediately(
                    "collect eval results: {}".format(record_item))
                record_item = message(
                    record_item,
                    cmd="eval_return",
                    broker_id=self.broker_id,
                    test_id=self.test_id,
                )
                self.send_broker.send(record_item)