class IMPALAOpt(Algorithm): """Build IMPALA algorithm.""" def __init__(self, model_info, alg_config, **kwargs): import_config(globals(), alg_config) super().__init__(alg_name="impala", model_info=model_info["actor"], alg_config=alg_config) self.states = list() self.behavior_logits = list() self.actions = list() self.dones = list() self.rewards = list() self.async_flag = False # update to divide model policy self.dist_model_policy = EqualDistPolicy( alg_config["instance_num"], prepare_times=self._prepare_times_per_train) self.use_train_thread = False if self.use_train_thread: self.send_train = UniComm("LocalMsg") train_thread = threading.Thread(target=self._train_thread) train_thread.setDaemon(True) train_thread.start() def _train_thread(self): while True: data = self.send_train.recv() batch_state, batch_logit, batch_action, batch_done, batch_reward = data actor_loss = self.actor.train( batch_state, [batch_logit, batch_action, batch_done, batch_reward], ) def train(self, **kwargs): """Train impala agent by calling tf.sess.""" states = np.concatenate(self.states) behavior_logits = np.concatenate(self.behavior_logits) actions = np.concatenate(self.actions) dones = np.concatenate(self.dones) rewards = np.concatenate(self.rewards) nbatch = len(states) count = (nbatch + BATCH_SIZE - 1) // BATCH_SIZE loss_list = [] for start in range(count): start_index = start * BATCH_SIZE env_index = start_index + BATCH_SIZE batch_state = states[start_index:env_index] batch_logit = behavior_logits[start_index:env_index] batch_action = actions[start_index:env_index] batch_done = dones[start_index:env_index] batch_reward = rewards[start_index:env_index] actor_loss = self.actor.train( batch_state, [batch_logit, batch_action, batch_done, batch_reward], ) loss_list.append(loss_to_val(actor_loss)) # clear states for next iter self.states.clear() self.behavior_logits.clear() self.actions.clear() self.dones.clear() self.rewards.clear() return np.mean(loss_list) def save(self, model_path, model_index): """Save model.""" actor_name = "actor" + str(model_index).zfill(5) actor_name = self.actor.save_model(os.path.join( model_path, actor_name)) actor_name = actor_name.split("/")[-1] return [actor_name] def prepare_data(self, train_data, **kwargs): """Prepare the data for impala algorithm.""" state, logit, action, done, reward = self._data_proc(train_data) self.states.append(state) self.behavior_logits.append(logit) self.actions.append(action) self.dones.append(done) self.rewards.append(reward) def predict(self, state): """Predict with actor inference operation.""" pred = self.actor.predict(state) return pred @staticmethod def _data_proc(episode_data): """ Process data for impala. Agent will record the follows: states, behavior_logits, actions, dones, rewards """ states = episode_data["cur_state"] behavior_logits = episode_data["logit"] actions = episode_data["action"] dones = np.asarray(episode_data["done"], dtype=np.bool) rewards = np.asarray(episode_data["reward"]) return states, behavior_logits, actions, dones, rewards
class BrokerMaster(object): """BrokerMaster Manage Broker within Learner.""" def __init__(self, node_config_list, start_port=None): self.node_config_list = node_config_list self.node_num = len(node_config_list) comm_conf = None if not start_port: comm_conf = CommConf() start_port = comm_conf.get_start_port() self.start_port = start_port logging.info("master broker init on port: {}".format(start_port)) self.comm_conf = comm_conf recv_port, send_port = get_port(start_port) self.recv_slave = UniComm("CommByZmq", type="PULL", port=recv_port) self.send_slave = [ UniComm("CommByZmq", type="PUSH", port=send_port + i) for i in range(self.node_num) ] self.recv_local_q = UniComm("LocalMsg") self.send_local_q = dict() self.main_task = None self.metric = TimerRecorder("master", maxlen=50, fields=("send", "recv")) def start_data_transfer(self): """Start transfer data and other thread.""" data_transfer_thread = threading.Thread(target=self.recv_broker_slave) data_transfer_thread.setDaemon(True) data_transfer_thread.start() data_transfer_thread = threading.Thread(target=self.recv_local) data_transfer_thread.setDaemon(True) data_transfer_thread.start() # alloc_thread = threading.Thread(target=self.alloc_actor) # alloc_thread.setDaemon(True) # alloc_thread.start() def recv_broker_slave(self): """Receive remote train data in sync mode.""" while True: recv_data = self.recv_slave.recv_bytes() _t0 = time.time() recv_data = deserialize(lz4.frame.decompress(recv_data)) self.metric.append(recv=time.time() - _t0) cmd = get_msg_info(recv_data, "cmd") if cmd in []: pass else: send_cmd = self.send_local_q.get(cmd) if send_cmd: send_cmd.send(recv_data) # report log self.metric.report_if_need() def recv_local(self): """Receive local cmd.""" while True: recv_data = self.recv_local_q.recv() cmd = get_msg_info(recv_data, "cmd") if cmd in ["close"]: self.close(recv_data) if cmd in [self.send_local_q.keys()]: self.send_local_q[cmd].send(recv_data) logging.debug("recv: {} with cmd-{}".format(type(recv_data["data"]), cmd)) else: _t1 = time.time() broker_id = get_msg_info(recv_data, "broker_id") _cmd = get_msg_info(recv_data, "cmd") logging.debug("master recv:{} with cmd:'{}' to broker_id: <{}>".format( type(recv_data["data"]), _cmd, broker_id)) # self.metric.append(debug=time.time() - _t1) if broker_id == -1: for slave, node_info in zip(self.send_slave, self.node_config_list): slave.send(recv_data) else: self.send_slave[broker_id].send(recv_data) self.metric.append(send=time.time() - _t1) def register(self, cmd): self.send_local_q.update({cmd: UniComm("LocalMsg")}) return self.send_local_q[cmd] def alloc_actor(self): while True: time.sleep(10) if not self.send_local_q.get("train"): continue train_list = self.send_local_q["train"].comm.data_list if len(train_list) > 200: self.send_alloc_msg("decrease") elif len(train_list) < 10: self.send_alloc_msg("increase") def send_alloc_msg(self, actor_status): alloc_cmd = { "ctr_info": {"cmd": actor_status, "actor_id": -1, "explorer_id": -1} } for q in self.send_slave: q.send(alloc_cmd) def close(self, close_cmd): for slave in self.send_slave: slave.send(close_cmd) time.sleep(1) try: self.comm_conf.release_start_port(self.start_port) except BaseException: pass os._exit(0) def start(self): """Start all system.""" self.start_data_transfer() def main_loop(self): """ Create the main_loop after ready the messy setup works. The foreground task of broker master. :return: """ if not self.main_task: logging.fatal("learning process isn't ready!") self.main_task.main_loop() def stop(self): """Stop all system.""" close_cmd = message(None, cmd="close") self.recv_local_q.send(close_cmd)
class Explorer(object): """Create an explorer to explore environment to generate train data.""" def __init__(self, config_info, broker_id, recv_broker, send_broker): self.env_para = deepcopy(config_info.get("env_para")) self.alg_para = deepcopy(config_info.get("alg_para")) self.agent_para = deepcopy(config_info.get("agent_para")) self.recv_broker = recv_broker self.send_broker = send_broker self.recv_agent = UniComm("LocalMsg") self.send_agent = UniComm("LocalMsg") self.explorer_id = self.env_para.get("env_id") self.broker_id = broker_id self.learner_postfix = config_info.get("learner_postfix") self.rl_agent = None self.report_stats_interval = max(config_info.get('env_num'), 7) self._buf_path = config_info["share_path"] self._buf = ShareBuf(live=10, path=self._buf_path) # live para is dummy logging.info("init explorer with id: {}, buf_path: {}".format( self.explorer_id, self._buf_path)) def start_explore(self): """Start explore process.""" signal.signal(signal.SIGINT, signal.SIG_IGN) os.environ["CUDA_VISIBLE_DEVICES"] = str(-1) explored_times = 0 try: self.rl_agent = AgentGroup(self.env_para, self.alg_para, self.agent_para, self.send_agent, self.recv_agent, self._buf) explore_time = self.agent_para.get("agent_config", {}).get("sync_model_interval", 1) logging.info("explorer-{} start with sync interval-{}".format( self.explorer_id, explore_time)) while True: model_type = self.rl_agent.update_model() stats = self.rl_agent.explore(explore_time) explored_times += explore_time if explored_times % self.report_stats_interval == self.explorer_id \ or explored_times == explore_time: stats_msg = message(stats, cmd="stats_msg", broker_id=self.broker_id, explorer_id=self.explorer_id) self.recv_agent.send(stats_msg) if self.explorer_id < 1: logging.debug( "EXP{} ran {} ts, restore {} ts, last type:{}". format(self.explorer_id, explored_times, self.rl_agent.restore_count, model_type)) except BaseException as ex: logging.exception(ex) os._exit(4) def start_data_transfer(self): """Start transfer data and other thread.""" data_transfer_thread = threading.Thread(target=self.transfer_to_broker) data_transfer_thread.start() data_transfer_thread = threading.Thread(target=self.transfer_to_agent) data_transfer_thread.start() def transfer_to_agent(self): """Send train data to learner.""" while True: data = self.recv_broker.get() cmd = get_msg_info(data, "cmd") if cmd == "close": logging.debug("enter explore close") self.close() continue data = get_msg_data(data) self.send_agent.send(data) def transfer_to_broker(self): """Send train data to learner.""" while True: data = self.recv_agent.recv() info_cmd = get_msg_info(data, "cmd") new_cmd = info_cmd + self.learner_postfix set_msg_info(data, broker_id=self.broker_id, explorer_id=self.explorer_id, cmd=new_cmd) self.send_broker.send(data) def start(self): """Start actor's thread and process.""" setproctitle.setproctitle("xt_explorer") self.start_data_transfer() self.start_explore() def close(self): self.rl_agent.close()
class Explorer(object): """Create an explorer to explore environment to generate train data.""" def __init__(self, config_info, broker_id, recv_broker, send_broker): self.env_para = deepcopy(config_info.get("env_para")) self.alg_para = deepcopy(config_info.get("alg_para")) self.agent_para = deepcopy(config_info.get("agent_para")) self.recv_broker = recv_broker self.send_broker = send_broker self.recv_agent = UniComm("LocalMsg") self.send_agent = UniComm("LocalMsg") self.explorer_id = self.env_para.get("env_id") self.broker_id = broker_id self.rl_agent = None self._buf_path = config_info["share_path"] self._buf = ShareBuf(live=10, path=self._buf_path) # live para is dummy logging.debug("init explorer with id: {}, buf_path: {}".format( self.explorer_id, self._buf_path)) def start_explore(self): """Start explore process.""" signal.signal(signal.SIGINT, signal.SIG_IGN) os.environ["CUDA_VISIBLE_DEVICES"] = str(-1) explored_times = 0 report_stats_interval = 20 last_report_index = -999 try: self.rl_agent = AgentGroup(self.env_para, self.alg_para, self.agent_para, self.send_agent, self.recv_agent, self._buf) explore_time = self.agent_para.get("agent_config", {}).get("sync_model_interval", 1) logging.info( "AgentGroup start to explore with sync interval-{}".format( explore_time)) while True: stats = self.rl_agent.explore(explore_time) explored_times += explore_time if self.explorer_id < 1: logging.debug("explore-{} ran {} times".format( self.explorer_id, explored_times)) if explored_times - last_report_index > report_stats_interval: stats_msg = message(stats, cmd="stats_msg") self.recv_agent.send(stats_msg) last_report_index = explored_times except BaseException as ex: logging.exception(ex) os._exit(4) def start_data_transfer(self): """Start transfer data and other thread.""" data_transfer_thread = threading.Thread(target=self.transfer_to_broker) data_transfer_thread.start() data_transfer_thread = threading.Thread(target=self.transfer_to_agent) data_transfer_thread.start() def transfer_to_agent(self): """Send train data to learner.""" while True: data = self.recv_broker.get() cmd = get_msg_info(data, "cmd") if cmd == "close": logging.debug("enter explore close") self.close() continue data = get_msg_data(data) self.send_agent.send(data) def transfer_to_broker(self): """Send train data to learner.""" while True: data = self.recv_agent.recv() info_cmd = get_msg_info(data, "cmd") # print("info_cmd in explorer: ", info_cmd, data) data_type = "buf_reduce" if info_cmd == "buf_reduce" else "data" set_msg_info(data, broker_id=self.broker_id, explorer_id=self.explorer_id) self.send_broker.send(data, data_type=data_type) def start(self): """Start actor's thread and process.""" self.start_data_transfer() self.start_explore() def close(self): self.rl_agent.close()