Example #1
0
class IMPALAOpt(Algorithm):
    """Build IMPALA algorithm."""
    def __init__(self, model_info, alg_config, **kwargs):
        import_config(globals(), alg_config)
        super().__init__(alg_name="impala",
                         model_info=model_info["actor"],
                         alg_config=alg_config)
        self.states = list()
        self.behavior_logits = list()
        self.actions = list()
        self.dones = list()
        self.rewards = list()
        self.async_flag = False

        # update to divide model policy
        self.dist_model_policy = EqualDistPolicy(
            alg_config["instance_num"],
            prepare_times=self._prepare_times_per_train)

        self.use_train_thread = False
        if self.use_train_thread:
            self.send_train = UniComm("LocalMsg")
            train_thread = threading.Thread(target=self._train_thread)
            train_thread.setDaemon(True)
            train_thread.start()

    def _train_thread(self):
        while True:
            data = self.send_train.recv()
            batch_state, batch_logit, batch_action, batch_done, batch_reward = data
            actor_loss = self.actor.train(
                batch_state,
                [batch_logit, batch_action, batch_done, batch_reward],
            )

    def train(self, **kwargs):
        """Train impala agent by calling tf.sess."""
        states = np.concatenate(self.states)
        behavior_logits = np.concatenate(self.behavior_logits)
        actions = np.concatenate(self.actions)
        dones = np.concatenate(self.dones)
        rewards = np.concatenate(self.rewards)

        nbatch = len(states)
        count = (nbatch + BATCH_SIZE - 1) // BATCH_SIZE
        loss_list = []

        for start in range(count):
            start_index = start * BATCH_SIZE
            env_index = start_index + BATCH_SIZE
            batch_state = states[start_index:env_index]
            batch_logit = behavior_logits[start_index:env_index]
            batch_action = actions[start_index:env_index]
            batch_done = dones[start_index:env_index]
            batch_reward = rewards[start_index:env_index]

            actor_loss = self.actor.train(
                batch_state,
                [batch_logit, batch_action, batch_done, batch_reward],
            )
            loss_list.append(loss_to_val(actor_loss))

        # clear states for next iter
        self.states.clear()
        self.behavior_logits.clear()
        self.actions.clear()
        self.dones.clear()
        self.rewards.clear()
        return np.mean(loss_list)

    def save(self, model_path, model_index):
        """Save model."""
        actor_name = "actor" + str(model_index).zfill(5)
        actor_name = self.actor.save_model(os.path.join(
            model_path, actor_name))
        actor_name = actor_name.split("/")[-1]

        return [actor_name]

    def prepare_data(self, train_data, **kwargs):
        """Prepare the data for impala algorithm."""
        state, logit, action, done, reward = self._data_proc(train_data)
        self.states.append(state)
        self.behavior_logits.append(logit)
        self.actions.append(action)
        self.dones.append(done)
        self.rewards.append(reward)

    def predict(self, state):
        """Predict with actor inference operation."""
        pred = self.actor.predict(state)

        return pred

    @staticmethod
    def _data_proc(episode_data):
        """
        Process data for impala.

        Agent will record the follows:
            states, behavior_logits, actions, dones, rewards
        """
        states = episode_data["cur_state"]

        behavior_logits = episode_data["logit"]
        actions = episode_data["action"]
        dones = np.asarray(episode_data["done"], dtype=np.bool)

        rewards = np.asarray(episode_data["reward"])

        return states, behavior_logits, actions, dones, rewards
Example #2
0
class BrokerMaster(object):
    """BrokerMaster Manage Broker within Learner."""

    def __init__(self, node_config_list, start_port=None):
        self.node_config_list = node_config_list
        self.node_num = len(node_config_list)
        comm_conf = None
        if not start_port:
            comm_conf = CommConf()
            start_port = comm_conf.get_start_port()
        self.start_port = start_port
        logging.info("master broker init on port: {}".format(start_port))
        self.comm_conf = comm_conf

        recv_port, send_port = get_port(start_port)
        self.recv_slave = UniComm("CommByZmq", type="PULL", port=recv_port)
        self.send_slave = [
            UniComm("CommByZmq", type="PUSH", port=send_port + i)
            for i in range(self.node_num)
        ]

        self.recv_local_q = UniComm("LocalMsg")
        self.send_local_q = dict()

        self.main_task = None
        self.metric = TimerRecorder("master", maxlen=50, fields=("send", "recv"))

    def start_data_transfer(self):
        """Start transfer data and other thread."""
        data_transfer_thread = threading.Thread(target=self.recv_broker_slave)
        data_transfer_thread.setDaemon(True)
        data_transfer_thread.start()

        data_transfer_thread = threading.Thread(target=self.recv_local)
        data_transfer_thread.setDaemon(True)
        data_transfer_thread.start()

        # alloc_thread = threading.Thread(target=self.alloc_actor)
        # alloc_thread.setDaemon(True)
        # alloc_thread.start()

    def recv_broker_slave(self):
        """Receive remote train data in sync mode."""
        while True:
            recv_data = self.recv_slave.recv_bytes()
            _t0 = time.time()
            recv_data = deserialize(lz4.frame.decompress(recv_data))
            self.metric.append(recv=time.time() - _t0)

            cmd = get_msg_info(recv_data, "cmd")
            if cmd in []:
                pass
            else:
                send_cmd = self.send_local_q.get(cmd)
                if send_cmd:
                    send_cmd.send(recv_data)

            # report log
            self.metric.report_if_need()

    def recv_local(self):
        """Receive local cmd."""
        while True:
            recv_data = self.recv_local_q.recv()
            cmd = get_msg_info(recv_data, "cmd")
            if cmd in ["close"]:
                self.close(recv_data)

            if cmd in [self.send_local_q.keys()]:
                self.send_local_q[cmd].send(recv_data)
                logging.debug("recv: {} with cmd-{}".format(type(recv_data["data"]), cmd))
            else:
                _t1 = time.time()
                broker_id = get_msg_info(recv_data, "broker_id")
                _cmd = get_msg_info(recv_data, "cmd")
                logging.debug("master recv:{} with cmd:'{}' to broker_id: <{}>".format(
                    type(recv_data["data"]), _cmd, broker_id))
                # self.metric.append(debug=time.time() - _t1)

                if broker_id == -1:
                    for slave, node_info in zip(self.send_slave, self.node_config_list):
                        slave.send(recv_data)
                else:
                    self.send_slave[broker_id].send(recv_data)
                self.metric.append(send=time.time() - _t1)

    def register(self, cmd):
        self.send_local_q.update({cmd: UniComm("LocalMsg")})
        return self.send_local_q[cmd]

    def alloc_actor(self):
        while True:
            time.sleep(10)
            if not self.send_local_q.get("train"):
                continue

            train_list = self.send_local_q["train"].comm.data_list
            if len(train_list) > 200:
                self.send_alloc_msg("decrease")
            elif len(train_list) < 10:
                self.send_alloc_msg("increase")

    def send_alloc_msg(self, actor_status):
        alloc_cmd = {
            "ctr_info": {"cmd": actor_status, "actor_id": -1, "explorer_id": -1}
        }
        for q in self.send_slave:
            q.send(alloc_cmd)

    def close(self, close_cmd):
        for slave in self.send_slave:
            slave.send(close_cmd)

        time.sleep(1)
        try:
            self.comm_conf.release_start_port(self.start_port)
        except BaseException:
            pass

        os._exit(0)

    def start(self):
        """Start all system."""
        self.start_data_transfer()

    def main_loop(self):
        """
        Create the main_loop after ready the messy setup works.

        The foreground task of broker master.
        :return:
        """
        if not self.main_task:
            logging.fatal("learning process isn't ready!")
        self.main_task.main_loop()

    def stop(self):
        """Stop all system."""
        close_cmd = message(None, cmd="close")
        self.recv_local_q.send(close_cmd)
Example #3
0
class Explorer(object):
    """Create an explorer to explore environment to generate train data."""
    def __init__(self, config_info, broker_id, recv_broker, send_broker):
        self.env_para = deepcopy(config_info.get("env_para"))
        self.alg_para = deepcopy(config_info.get("alg_para"))
        self.agent_para = deepcopy(config_info.get("agent_para"))
        self.recv_broker = recv_broker
        self.send_broker = send_broker
        self.recv_agent = UniComm("LocalMsg")
        self.send_agent = UniComm("LocalMsg")
        self.explorer_id = self.env_para.get("env_id")
        self.broker_id = broker_id
        self.learner_postfix = config_info.get("learner_postfix")
        self.rl_agent = None
        self.report_stats_interval = max(config_info.get('env_num'), 7)

        self._buf_path = config_info["share_path"]
        self._buf = ShareBuf(live=10,
                             path=self._buf_path)  # live para is dummy

        logging.info("init explorer with id: {}, buf_path: {}".format(
            self.explorer_id, self._buf_path))

    def start_explore(self):
        """Start explore process."""
        signal.signal(signal.SIGINT, signal.SIG_IGN)
        os.environ["CUDA_VISIBLE_DEVICES"] = str(-1)
        explored_times = 0

        try:
            self.rl_agent = AgentGroup(self.env_para, self.alg_para,
                                       self.agent_para, self.send_agent,
                                       self.recv_agent, self._buf)
            explore_time = self.agent_para.get("agent_config",
                                               {}).get("sync_model_interval",
                                                       1)
            logging.info("explorer-{} start with sync interval-{}".format(
                self.explorer_id, explore_time))

            while True:
                model_type = self.rl_agent.update_model()
                stats = self.rl_agent.explore(explore_time)

                explored_times += explore_time
                if explored_times % self.report_stats_interval == self.explorer_id \
                        or explored_times == explore_time:
                    stats_msg = message(stats,
                                        cmd="stats_msg",
                                        broker_id=self.broker_id,
                                        explorer_id=self.explorer_id)
                    self.recv_agent.send(stats_msg)
                    if self.explorer_id < 1:
                        logging.debug(
                            "EXP{} ran {} ts, restore {} ts, last type:{}".
                            format(self.explorer_id, explored_times,
                                   self.rl_agent.restore_count, model_type))

        except BaseException as ex:
            logging.exception(ex)
            os._exit(4)

    def start_data_transfer(self):
        """Start transfer data and other thread."""
        data_transfer_thread = threading.Thread(target=self.transfer_to_broker)
        data_transfer_thread.start()

        data_transfer_thread = threading.Thread(target=self.transfer_to_agent)
        data_transfer_thread.start()

    def transfer_to_agent(self):
        """Send train data to learner."""
        while True:
            data = self.recv_broker.get()
            cmd = get_msg_info(data, "cmd")
            if cmd == "close":
                logging.debug("enter explore close")
                self.close()
                continue

            data = get_msg_data(data)
            self.send_agent.send(data)

    def transfer_to_broker(self):
        """Send train data to learner."""
        while True:
            data = self.recv_agent.recv()
            info_cmd = get_msg_info(data, "cmd")

            new_cmd = info_cmd + self.learner_postfix
            set_msg_info(data,
                         broker_id=self.broker_id,
                         explorer_id=self.explorer_id,
                         cmd=new_cmd)

            self.send_broker.send(data)

    def start(self):
        """Start actor's thread and process."""
        setproctitle.setproctitle("xt_explorer")

        self.start_data_transfer()
        self.start_explore()

    def close(self):
        self.rl_agent.close()
Example #4
0
class Explorer(object):
    """Create an explorer to explore environment to generate train data."""
    def __init__(self, config_info, broker_id, recv_broker, send_broker):
        self.env_para = deepcopy(config_info.get("env_para"))
        self.alg_para = deepcopy(config_info.get("alg_para"))
        self.agent_para = deepcopy(config_info.get("agent_para"))
        self.recv_broker = recv_broker
        self.send_broker = send_broker
        self.recv_agent = UniComm("LocalMsg")
        self.send_agent = UniComm("LocalMsg")
        self.explorer_id = self.env_para.get("env_id")
        self.broker_id = broker_id
        self.rl_agent = None

        self._buf_path = config_info["share_path"]
        self._buf = ShareBuf(live=10,
                             path=self._buf_path)  # live para is dummy

        logging.debug("init explorer with id: {}, buf_path: {}".format(
            self.explorer_id, self._buf_path))

    def start_explore(self):
        """Start explore process."""
        signal.signal(signal.SIGINT, signal.SIG_IGN)
        os.environ["CUDA_VISIBLE_DEVICES"] = str(-1)
        explored_times = 0

        report_stats_interval = 20
        last_report_index = -999
        try:
            self.rl_agent = AgentGroup(self.env_para, self.alg_para,
                                       self.agent_para, self.send_agent,
                                       self.recv_agent, self._buf)
            explore_time = self.agent_para.get("agent_config",
                                               {}).get("sync_model_interval",
                                                       1)
            logging.info(
                "AgentGroup start to explore with sync interval-{}".format(
                    explore_time))

            while True:
                stats = self.rl_agent.explore(explore_time)
                explored_times += explore_time

                if self.explorer_id < 1:
                    logging.debug("explore-{} ran {} times".format(
                        self.explorer_id, explored_times))

                if explored_times - last_report_index > report_stats_interval:
                    stats_msg = message(stats, cmd="stats_msg")
                    self.recv_agent.send(stats_msg)
                    last_report_index = explored_times

        except BaseException as ex:
            logging.exception(ex)
            os._exit(4)

    def start_data_transfer(self):
        """Start transfer data and other thread."""
        data_transfer_thread = threading.Thread(target=self.transfer_to_broker)
        data_transfer_thread.start()

        data_transfer_thread = threading.Thread(target=self.transfer_to_agent)
        data_transfer_thread.start()

    def transfer_to_agent(self):
        """Send train data to learner."""
        while True:
            data = self.recv_broker.get()
            cmd = get_msg_info(data, "cmd")
            if cmd == "close":
                logging.debug("enter explore close")
                self.close()
                continue

            data = get_msg_data(data)
            self.send_agent.send(data)

    def transfer_to_broker(self):
        """Send train data to learner."""
        while True:
            data = self.recv_agent.recv()

            info_cmd = get_msg_info(data, "cmd")
            # print("info_cmd in explorer: ", info_cmd, data)
            data_type = "buf_reduce" if info_cmd == "buf_reduce" else "data"
            set_msg_info(data,
                         broker_id=self.broker_id,
                         explorer_id=self.explorer_id)

            self.send_broker.send(data, data_type=data_type)

    def start(self):
        """Start actor's thread and process."""
        self.start_data_transfer()
        self.start_explore()

    def close(self):
        self.rl_agent.close()