Ejemplo n.º 1
0
    def start(self):
        """ run evaluator """
        _ag = AgentGroup(self.env_para, self.alg_para, self.agent_para)
        while True:
            recv_data = self.recv_broker.get()
            cmd = get_msg_info(recv_data, "cmd")
            logging.debug("evaluator get meg: {}".format(recv_data))
            if cmd not in ["eval"]:
                continue

            model_name = get_msg_data(recv_data)

            _ag.restore(model_name)  # fixme: load weight 'file' from the disk
            eval_data = _ag.evaluate(self.bm_eval.get("episodes_per_eval", 1))

            # return each rewards for each agent
            record_item = tuple([eval_data, model_name])
            print_immediately("collect eval results: {}".format(record_item))
            record_item = message(
                record_item,
                cmd="eval_result",
                broker_id=self.broker_id,
                test_id=self.test_id,
            )
            self.send_broker.send(record_item)
Ejemplo n.º 2
0
    def start_explore(self):
        """Start explore process."""
        signal.signal(signal.SIGINT, signal.SIG_IGN)
        os.environ["CUDA_VISIBLE_DEVICES"] = str(-1)
        explored_times = 0

        report_stats_interval = 20
        last_report_index = -999
        try:
            self.rl_agent = AgentGroup(self.env_para, self.alg_para,
                                       self.agent_para, self.send_agent,
                                       self.recv_agent, self._buf)
            explore_time = self.agent_para.get("agent_config",
                                               {}).get("sync_model_interval",
                                                       1)
            logging.info(
                "AgentGroup start to explore with sync interval-{}".format(
                    explore_time))

            while True:
                stats = self.rl_agent.explore(explore_time)
                explored_times += explore_time

                if self.explorer_id < 1:
                    logging.debug("explore-{} ran {} times".format(
                        self.explorer_id, explored_times))

                if explored_times - last_report_index > report_stats_interval:
                    stats_msg = message(stats, cmd="stats_msg")
                    self.recv_agent.send(stats_msg)
                    last_report_index = explored_times

        except BaseException as ex:
            logging.exception(ex)
            os._exit(4)
Ejemplo n.º 3
0
    def start(self):
        """Run evaluator."""
        setproctitle.setproctitle("xt_evaluator")

        _ags = AgentGroup(self.env_para,
                          self.alg_para,
                          self.agent_para,
                          scene="evaluate")
        while True:
            recv_data = self.recv_broker.get()
            cmd = get_msg_info(recv_data, "cmd")
            logging.debug("evaluator get meg: {}".format(type(recv_data)))
            if cmd in ("close", ):
                break

            if cmd not in ["eval"]:
                print_immediately("eval get un-used data:{}".format(recv_data))
                continue

            # print_immediately("recv_data in evaluator: {}".format(
            #     [v.keys() for v in recv_data["data"].values()]))

            for train_count, weights in recv_data["data"].items():
                _ags.restore(weights, is_id=False)
                eval_data = _ags.evaluate(
                    self.bm_eval.get("episodes_per_eval", 1))

                # return each rewards for each agent
                record_item = tuple([
                    eval_data, {
                        "train_count": train_count,
                        "broker_id": self.broker_id,
                        "test_id": self.test_id
                    }
                ])
                print_immediately(
                    "collect eval results: {}".format(record_item))
                record_item = message(
                    record_item,
                    cmd="eval_return",
                    broker_id=self.broker_id,
                    test_id=self.test_id,
                )
                self.send_broker.send(record_item)
Ejemplo n.º 4
0
    def start_explore(self):
        """ start explore process """
        signal.signal(signal.SIGINT, signal.SIG_IGN)
        os.environ["CUDA_VISIBLE_DEVICES"] = str(-1)
        explored_times = 0
        try:
            self.rl_agent = AgentGroup(
                self.env_para,
                self.alg_para,
                self.agent_para,
                self.send_agent,
                self.recv_agent,
            )
            explore_time = self.agent_para.get("agent_config", {}).get("sync_model_interval", 1)
            logging.info("AgentGroup start to explore with sync interval-{}".format(explore_time))

            while True:
                self.rl_agent.explore(explore_time)
                explored_times += explore_time
                logging.debug("end explore-{}".format(explored_times))
        except BaseException as ex:
            logging.exception(ex)
            os._exit(4)
Ejemplo n.º 5
0
    def start_explore(self):
        """Start explore process."""
        signal.signal(signal.SIGINT, signal.SIG_IGN)
        os.environ["CUDA_VISIBLE_DEVICES"] = str(-1)
        explored_times = 0

        try:
            self.rl_agent = AgentGroup(self.env_para, self.alg_para,
                                       self.agent_para, self.send_agent,
                                       self.recv_agent, self._buf)
            explore_time = self.agent_para.get("agent_config",
                                               {}).get("sync_model_interval",
                                                       1)
            logging.info("explorer-{} start with sync interval-{}".format(
                self.explorer_id, explore_time))

            while True:
                model_type = self.rl_agent.update_model()
                stats = self.rl_agent.explore(explore_time)

                explored_times += explore_time
                if explored_times % self.report_stats_interval == self.explorer_id \
                        or explored_times == explore_time:
                    stats_msg = message(stats,
                                        cmd="stats_msg",
                                        broker_id=self.broker_id,
                                        explorer_id=self.explorer_id)
                    self.recv_agent.send(stats_msg)
                    if self.explorer_id < 1:
                        logging.debug(
                            "EXP{} ran {} ts, restore {} ts, last type:{}".
                            format(self.explorer_id, explored_times,
                                   self.rl_agent.restore_count, model_type))

        except BaseException as ex:
            logging.exception(ex)
            os._exit(4)
Ejemplo n.º 6
0
class Explorer(object):
    """Create an explorer to explore environment to generate train data."""
    def __init__(self, config_info, broker_id, recv_broker, send_broker):
        self.env_para = deepcopy(config_info.get("env_para"))
        self.alg_para = deepcopy(config_info.get("alg_para"))
        self.agent_para = deepcopy(config_info.get("agent_para"))
        self.recv_broker = recv_broker
        self.send_broker = send_broker
        self.recv_agent = UniComm("LocalMsg")
        self.send_agent = UniComm("LocalMsg")
        self.explorer_id = self.env_para.get("env_id")
        self.broker_id = broker_id
        self.learner_postfix = config_info.get("learner_postfix")
        self.rl_agent = None
        self.report_stats_interval = max(config_info.get('env_num'), 7)

        self._buf_path = config_info["share_path"]
        self._buf = ShareBuf(live=10,
                             path=self._buf_path)  # live para is dummy

        logging.info("init explorer with id: {}, buf_path: {}".format(
            self.explorer_id, self._buf_path))

    def start_explore(self):
        """Start explore process."""
        signal.signal(signal.SIGINT, signal.SIG_IGN)
        os.environ["CUDA_VISIBLE_DEVICES"] = str(-1)
        explored_times = 0

        try:
            self.rl_agent = AgentGroup(self.env_para, self.alg_para,
                                       self.agent_para, self.send_agent,
                                       self.recv_agent, self._buf)
            explore_time = self.agent_para.get("agent_config",
                                               {}).get("sync_model_interval",
                                                       1)
            logging.info("explorer-{} start with sync interval-{}".format(
                self.explorer_id, explore_time))

            while True:
                model_type = self.rl_agent.update_model()
                stats = self.rl_agent.explore(explore_time)

                explored_times += explore_time
                if explored_times % self.report_stats_interval == self.explorer_id \
                        or explored_times == explore_time:
                    stats_msg = message(stats,
                                        cmd="stats_msg",
                                        broker_id=self.broker_id,
                                        explorer_id=self.explorer_id)
                    self.recv_agent.send(stats_msg)
                    if self.explorer_id < 1:
                        logging.debug(
                            "EXP{} ran {} ts, restore {} ts, last type:{}".
                            format(self.explorer_id, explored_times,
                                   self.rl_agent.restore_count, model_type))

        except BaseException as ex:
            logging.exception(ex)
            os._exit(4)

    def start_data_transfer(self):
        """Start transfer data and other thread."""
        data_transfer_thread = threading.Thread(target=self.transfer_to_broker)
        data_transfer_thread.start()

        data_transfer_thread = threading.Thread(target=self.transfer_to_agent)
        data_transfer_thread.start()

    def transfer_to_agent(self):
        """Send train data to learner."""
        while True:
            data = self.recv_broker.get()
            cmd = get_msg_info(data, "cmd")
            if cmd == "close":
                logging.debug("enter explore close")
                self.close()
                continue

            data = get_msg_data(data)
            self.send_agent.send(data)

    def transfer_to_broker(self):
        """Send train data to learner."""
        while True:
            data = self.recv_agent.recv()
            info_cmd = get_msg_info(data, "cmd")

            new_cmd = info_cmd + self.learner_postfix
            set_msg_info(data,
                         broker_id=self.broker_id,
                         explorer_id=self.explorer_id,
                         cmd=new_cmd)

            self.send_broker.send(data)

    def start(self):
        """Start actor's thread and process."""
        setproctitle.setproctitle("xt_explorer")

        self.start_data_transfer()
        self.start_explore()

    def close(self):
        self.rl_agent.close()
Ejemplo n.º 7
0
class Explorer(object):
    """ explorer is used to explore environment to generate train data """
    def __init__(self, config_info, broker_id, recv_broker, send_broker):
        self.env_para = deepcopy(config_info.get("env_para"))
        self.alg_para = deepcopy(config_info.get("alg_para"))
        self.agent_para = deepcopy(config_info.get("agent_para"))
        self.recv_broker = recv_broker
        self.send_broker = send_broker
        self.recv_agent = UniComm("LocalMsg")
        self.send_agent = UniComm("LocalMsg")
        self.explorer_id = self.env_para.get("env_id")
        self.broker_id = broker_id
        self.rl_agent = None

        logging.debug("init explorer with id: {}".format(self.explorer_id))

    def start_explore(self):
        """ start explore process """
        signal.signal(signal.SIGINT, signal.SIG_IGN)
        os.environ["CUDA_VISIBLE_DEVICES"] = str(-1)
        explored_times = 0
        try:
            self.rl_agent = AgentGroup(
                self.env_para,
                self.alg_para,
                self.agent_para,
                self.send_agent,
                self.recv_agent,
            )
            explore_time = self.agent_para.get("agent_config", {}).get("sync_model_interval", 1)
            logging.info("AgentGroup start to explore with sync interval-{}".format(explore_time))

            while True:
                self.rl_agent.explore(explore_time)
                explored_times += explore_time
                logging.debug("end explore-{}".format(explored_times))
        except BaseException as ex:
            logging.exception(ex)
            os._exit(4)

    def start_data_transfer(self):
        """ start transfer data and other thread """
        data_transfer_thread = threading.Thread(target=self.transfer_to_broker)
        data_transfer_thread.start()

        data_transfer_thread = threading.Thread(target=self.transfer_to_agent)
        data_transfer_thread.start()

    def transfer_to_agent(self):
        """ send train data to learner """
        while True:
            data = self.recv_broker.get()
            cmd = get_msg_info(data, "cmd")
            if cmd == "close":
                print("enter explore close")
                self.close()
                continue

            data = get_msg_data(data)
            self.send_agent.send(data)

    def transfer_to_broker(self):
        """ send train data to learner """
        while True:
            data = self.recv_agent.recv()

            set_msg_info(data, broker_id=self.broker_id,
                         explorer_id=self.explorer_id)

            self.send_broker.send(data)

    def start(self):
        """ start actor's thread and process """
        self.start_data_transfer()
        self.start_explore()

    def close(self):
        self.rl_agent.close()
Ejemplo n.º 8
0
class Explorer(object):
    """Create an explorer to explore environment to generate train data."""
    def __init__(self, config_info, broker_id, recv_broker, send_broker):
        self.env_para = deepcopy(config_info.get("env_para"))
        self.alg_para = deepcopy(config_info.get("alg_para"))
        self.agent_para = deepcopy(config_info.get("agent_para"))
        self.recv_broker = recv_broker
        self.send_broker = send_broker
        self.recv_agent = UniComm("LocalMsg")
        self.send_agent = UniComm("LocalMsg")
        self.explorer_id = self.env_para.get("env_id")
        self.broker_id = broker_id
        self.rl_agent = None

        self._buf_path = config_info["share_path"]
        self._buf = ShareBuf(live=10,
                             path=self._buf_path)  # live para is dummy

        logging.debug("init explorer with id: {}, buf_path: {}".format(
            self.explorer_id, self._buf_path))

    def start_explore(self):
        """Start explore process."""
        signal.signal(signal.SIGINT, signal.SIG_IGN)
        os.environ["CUDA_VISIBLE_DEVICES"] = str(-1)
        explored_times = 0

        report_stats_interval = 20
        last_report_index = -999
        try:
            self.rl_agent = AgentGroup(self.env_para, self.alg_para,
                                       self.agent_para, self.send_agent,
                                       self.recv_agent, self._buf)
            explore_time = self.agent_para.get("agent_config",
                                               {}).get("sync_model_interval",
                                                       1)
            logging.info(
                "AgentGroup start to explore with sync interval-{}".format(
                    explore_time))

            while True:
                stats = self.rl_agent.explore(explore_time)
                explored_times += explore_time

                if self.explorer_id < 1:
                    logging.debug("explore-{} ran {} times".format(
                        self.explorer_id, explored_times))

                if explored_times - last_report_index > report_stats_interval:
                    stats_msg = message(stats, cmd="stats_msg")
                    self.recv_agent.send(stats_msg)
                    last_report_index = explored_times

        except BaseException as ex:
            logging.exception(ex)
            os._exit(4)

    def start_data_transfer(self):
        """Start transfer data and other thread."""
        data_transfer_thread = threading.Thread(target=self.transfer_to_broker)
        data_transfer_thread.start()

        data_transfer_thread = threading.Thread(target=self.transfer_to_agent)
        data_transfer_thread.start()

    def transfer_to_agent(self):
        """Send train data to learner."""
        while True:
            data = self.recv_broker.get()
            cmd = get_msg_info(data, "cmd")
            if cmd == "close":
                logging.debug("enter explore close")
                self.close()
                continue

            data = get_msg_data(data)
            self.send_agent.send(data)

    def transfer_to_broker(self):
        """Send train data to learner."""
        while True:
            data = self.recv_agent.recv()

            info_cmd = get_msg_info(data, "cmd")
            # print("info_cmd in explorer: ", info_cmd, data)
            data_type = "buf_reduce" if info_cmd == "buf_reduce" else "data"
            set_msg_info(data,
                         broker_id=self.broker_id,
                         explorer_id=self.explorer_id)

            self.send_broker.send(data, data_type=data_type)

    def start(self):
        """Start actor's thread and process."""
        self.start_data_transfer()
        self.start_explore()

    def close(self):
        self.rl_agent.close()