def _save_model_checkpoint(self, checkpoint_root, checkpoint_name): checkpoint_dir = os.path.join(checkpoint_root, checkpoint_name) if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) logger.log(now() + 'Pulling updatetime') updatetime_dict = self._model_pool_apis.pull_all_attr('updatetime') logger.log( now() + 'Done pulling updatetime, no.={}'.format(len(updatetime_dict))) filenames = [] for model_key, updatetime in updatetime_dict.items(): filename = "%s_%s.model" % (model_key, updatetime) filepath = os.path.join(checkpoint_root, filename) filenames.append(filename + '\n') if not os.path.isfile(filepath): logger.log(now() + 'Pulling model {}'.format(model_key)) model = self._model_pool_apis.pull_model(model_key) logger.log(now() + 'Done pulling model {}'.format(model_key)) assert model_key == model.key with open(filepath, 'wb') as f: pickle.dump(model, f) if self._save_learner_meta: learner_meta = self._model_pool_apis.pull_learner_meta( model_key) pickle.dump(learner_meta, f) logger.log(now() + 'Saved model to {}'.format(f.name)) filelistpath = os.path.join(checkpoint_dir, 'filename.list') with open(filelistpath, 'w') as f: f.writelines(filenames) with open(os.path.join(checkpoint_dir, '.ready'), 'w') as f: f.write('ready') f.flush()
def get_pseudo_id(self, learner_id): if learner_id not in self.id_map: self.cur_pseudo_id = (self.cur_pseudo_id + 1) % self.pseudo_learner_num logger.log("learner {} begins training with pseude learner_id" " {}".format(learner_id, self.cur_pseudo_id)) self.id_map[learner_id] = self.cur_pseudo_id return self.id_map[learner_id]
def _push_data(self, data_queue): """ push trajectory for the learning agent (id 0). Invoked in a thread """ logger.log('entering _push_data_to_learner', 'steps: {}'.format(self._steps), level=logger.DEBUG + 5) me_id = self._learning_agent_id # short name last_obs, actions, reward, info, done, other_vars = data_queue.get() value, state, neglogpac = other_vars while True: data_model_id = self.task.model_key1 mb_obs, mb_rewards, mb_actions, mb_values, mb_dones, mb_neglogpacs = ( [], [], [], [], [], []) mb_states = [] for _ in range(self._unroll_length): mb_obs.append( transform_tuple(last_obs[me_id], lambda x: x.copy())) mb_actions.append(actions[me_id]) mb_rewards.append(reward) mb_dones.append(done) mb_values.append(value) mb_neglogpacs.append(neglogpac) mb_states.append(state) last_obs, actions, reward, info, done, other_vars = data_queue.get( ) value, state, neglogpac = other_vars if (isinstance(last_obs[me_id], tuple) or isinstance(last_obs[me_id], list)): mb_obs = tuple( np.asarray(obs, dtype=obs[me_id].dtype) for obs in zip(*mb_obs)) else: mb_obs = np.asarray(mb_obs, dtype=last_obs[me_id].dtype) mb_rewards = np.asarray(mb_rewards, dtype=np.float32) if isinstance(actions[me_id], list) or isinstance( actions[me_id], tuple): # actions can a list (e.g., from a transformer network) mb_actions = tuple( np.squeeze(np.asarray(a, dtype=np.float32)) for a in zip(*mb_actions)) else: mb_actions = np.asarray(mb_actions) mb_values = np.asarray(mb_values, dtype=np.float32) mb_neglogpacs = np.asarray(mb_neglogpacs, dtype=np.float32) mb_dones = np.asarray(mb_dones, dtype=np.bool) mb_states = np.asarray(mb_states) mb_returns = np.zeros_like(mb_rewards) mb_advs = np.zeros_like(mb_rewards) last_gae_lam = 0 for t in reversed(range(self._unroll_length)): next_values = (value if t == self._unroll_length - 1 else mb_values[t + 1]) delta = (mb_rewards[t] + self._gamma * next_values * (1 - mb_dones[t]) - mb_values[t]) mb_advs[t] = last_gae_lam = (delta + self._gamma * self._lam * (1 - mb_dones[t]) * last_gae_lam) mb_returns = mb_advs + mb_values # All done, send them to remote self._remote.push_data( (data_model_id, mb_obs, mb_returns, mb_dones, mb_actions, mb_values, mb_neglogpacs, mb_states))
def ready_for_val(self): if not self._val_rm.ready_for_sample(): logger.log( 'val data queue not full ({}/{} unrolls, wait...)'.format( len(self._val_rm), self._val_rm._minimal_unroll)) return False else: return True
def ready_for_train(self): if not self._replay_mem.ready_for_sample(): logger.log( 'train data queue not full ({}/{} unrolls, wait...)'.format( len(self._replay_mem), self._replay_mem._minimal_unroll)) return False else: return True
def log_outcome(self, info): if 'outcome' not in info: me_outcome = -95678 logger.log("info['outcome'] not available", 'return an arbitrary value', me_outcome, level=logger.WARN) else: me_outcome = info['outcome'][self._learning_agent_id] return me_outcome
def _on_request_eval_actor_task(self, actor_id): logger.log("get_eval_actor_task: actor_id:{}".format(str(actor_id)), level=logger.DEBUG) if len(self.game_mgr.players) > 1: rp, cp = self.game_mgr.get_eval_match() return ActorTask(rp, cp, None) else: return LeagueMgrErroMsg("Actor task not ready.")
def _on_notify_learner_task_end(self, learner_id): pseudo_id = self.get_pseudo_id(learner_id) self.cur_pseudo_id = (self.cur_pseudo_id + 1) % self.pseudo_learner_num self.id_map[learner_id] = self.cur_pseudo_id logger.log("learner {} switches from pseudo learner_id {} to " "pseudo learner_id {}".format(learner_id, pseudo_id, self.id_map[learner_id])) return super(PARLeagueMgr, self)._on_notify_learner_task_end(pseudo_id)
def _request_task(self): """Request the task for this actor.""" logger.log('entering _request_task', 'steps: {}'.format(self._steps), level=logger.DEBUG + 5) task = self._league_mgr_apis.request_actor_task( self._actor_id, self._learner_id) logger.log('leaving _request_task', level=logger.DEBUG + 5) return task
def extract(self): try: for frame in self._extract(): yield frame except Exception as e: logger.log("Extract replay[%s] player[%d] failed: %s" % (self._replay_filepath, self._player_id, e), level=logger.WARN) raise e
def run(self): self.replay_task = self._data_pool_apis.request_replay_task() while self.replay_task != "": game_version = self.replay_task.game_version or self._game_version self._adapt_system(game_version) if game_version != self._game_version: # need re-init replay converter self._game_version = game_version self.converter_config['game_version'] = game_version self._replay_converter = self.replay_converter_type( **self.converter_config) game_core_config = ({} if 'game_core_config' not in self.converter_config else self.converter_config['game_core_config']) extractor = ReplayExtractor( replay_dir=self._replay_dir, replay_filename=self.replay_task.replay_name, player_id=self.replay_task.player_id, replay_converter=self._replay_converter, step_mul=self._step_mul, version=game_version, game_core_config=game_core_config, da_rate=self._da_rate, unk_mmr_dft_to=self._unk_mmr_dft_to) self._steps = 0 first_frame = True if self._use_policy: self.agent.reset() self._update_agent_model() for frame in extractor.extract(): if self._post_process_data: obs, act = self._post_process_data(*frame[0]) else: obs, act = frame[0] if self._use_policy: data = (obs, act, self.agent.state, np.array(first_frame, np.bool)) self.agent.update_state(obs) first_frame = False else: data = (obs, act) data = self.ds.flatten(self.ds.structure(data)) if self._data_queue.full(): logger.log("Actor's queue is full.", level=logger.WARN) self._data_queue.put((TensorZipper.compress(data), frame[1])) logger.log('successfully put one tuple.', level=logger.DEBUG) self._steps += 1 if self._steps % self._log_interval == 0: logger.log( "%d frames of replay task [%s] sent to learner." % (self._steps, self.replay_task)) if self._use_policy and self._steps % self._update_model_freq == 0: self._update_agent_model() logger.log("Replay task [%s] done. %d frames sent to learner." % (self.replay_task, self._steps)) self.replay_task = self._data_pool_apis.request_replay_task() logger.log("All tasks done.")
def _update_agent_model(self): if self.infserver_addr is not None: return logger.log('entering _update_agents_model', 'steps: {}'.format(self._steps), level=logger.DEBUG + 5) if self._should_update_model(self.model, self.model_key): model = self._model_pool_apis.pull_model(self.model_key) self.agent.load_model(model.model) self.model = model
def _update_distill_agent_model(self): if self.distill_infserver_addr is not None: return logger.log('entering _update_distill agent_model', f'steps: {self._steps}', level=logger.DEBUG + 5) model_key = self.task.hyperparam.distill_model_key if self._should_update_model(self.distill_model, model_key): model3 = self._model_pool_apis.pull_model(model_key) self.distill_agent.load_model(model3.model) self.distill_model = model3 logger.log('leaving _update_distill_agent_model', level=logger.DEBUG + 5)
def _finish_task(self, task, outcome, info=None): """Do stuff (e.g., send match result) when task finishes.""" info = info or {} logger.log('entering _finish_task', 'steps: {}'.format(self._steps), level=logger.DEBUG + 5) match_result = MatchResult(task.model_key1, task.model_key2, outcome, info) self._league_mgr_apis.notify_actor_task_end(self._actor_id, match_result) logger.log('leaving _finish_task', level=logger.DEBUG + 5)
def _on_request_actor_task(self, actor_id, learner_id): actor_task = super(LeagueMgr, self)._on_request_actor_task(actor_id, learner_id) if not isinstance(actor_task, LeagueMgrErroMsg): assert isinstance(actor_task, ActorTask) self.game_mgr.start_match(actor_task.model_key1, actor_task.model_key2, actor_id) logger.log("_on_request_actor_task: %s" % str(actor_task), level=logger.DEBUG) return actor_task
def _restore_checkpoint(self, checkpoint_dir): super(LeagueMgr, self)._restore_checkpoint(checkpoint_dir) logger.log('{}loading league-mgr from {}'.format(now(), checkpoint_dir)) # 3. self.game_mgr.load(checkpoint_dir) # 2. self._hyper_mgr.load(checkpoint_dir) # 1. filepath = os.path.join(checkpoint_dir, 'learner_task_table') with open(filepath, 'rb') as f: self._learner_task_table = pickle.load(f) logger.log('{}done loading league-mgr'.format(now()))
def _print_infos(self): # filter the zstat infos, can not average filter = lambda d: dict([(_k, _v) for _k, _v in d.items() if _v is not None and not isinstance(_v, str)]) stat = Counter({}) for info in self._infos: stat += Counter(filter(info)) num = float(len(self._infos)) for k, v in stat.items(): stat[k] = float(int( v / num * 100)) / 100.0 # two significant digits logger.log(stat)
def _update_hyperparam(self, task): logger.log('entering _update_hyperparam', f'steps: {self._steps}', level=logger.DEBUG + 5) if self._enable_push: if hasattr(task.hyperparam, 'gamma'): self._gamma = task.hyperparam.gamma if hasattr(task.hyperparam, 'lam'): self._lam = task.hyperparam.lam if hasattr(task.hyperparam, 'reward_weights'): self._reward_weights = np.array(task.hyperparam.reward_weights, dtype=np.float32) logger.log('leaving _update_hyperparam', level=logger.DEBUG + 5)
def log_kvs(self, reward_sum, info): time_end = time.time() logger.logkvs({ 'producing_fps': self._steps / (time_end - self.time_beg), 'reward_sum': reward_sum, 'episode_steps': self._steps, }) if self.should_log_info: # log additional info fields if isinstance(info, dict): logger.logkvs(info) else: logger.log(info) logger.dumpkvs()
def _save_checkpoint(self, checkpoint_root, checkpoint_name): checkpoint_dir = os.path.join(checkpoint_root, checkpoint_name) logger.log('{}saving league-mgr to {}'.format(now(), checkpoint_dir)) super(LeagueMgr, self)._save_checkpoint(checkpoint_root, checkpoint_name) # 1. filepath = os.path.join(checkpoint_dir, 'learner_task_table') with open(filepath, 'wb') as f: pickle.dump(self._learner_task_table, f) # 2. self._hyper_mgr.save(checkpoint_dir) # 3. self.game_mgr.save(checkpoint_dir) logger.log('{}done saving league-mgr'.format(now()))
def _on_request_train_actor_task(self, actor_id, learner_id): logger.log( "_on_request_train_actor_task: actor_id:{}, learner_id:{}".format( actor_id, learner_id), level=logger.DEBUG) if (learner_id in self._learner_task_table and self._learner_task_table[learner_id].model_key is not None): hyperparam = self._learner_task_table[learner_id].hyperparam model_key = self._learner_task_table[learner_id].model_key oppo_model_key = self.game_mgr.get_opponent(model_key, hyperparam) if isinstance(oppo_model_key, LeagueMgrErroMsg): logger.log(f'get_opponent not ready: {oppo_model_key}', level=logger.WARN) return oppo_model_key return ActorTask(model_key, oppo_model_key, hyperparam) else: if learner_id not in self._learner_task_table: logger.log( "learner_id({}) hasn't request_learner_task.".format( learner_id), level=logger.WARN) elif self._learner_task_table[learner_id].model_key is None: logger.log( "learner_id({}) hasn't notify_learner_task_begin.".format( learner_id), level=logger.WARN) return LeagueMgrErroMsg("Actor task not ready.")
def _push_data(self, data_queue): """ push trajectory for the learning agent (id 0). Invoked in a thread """ while data_queue.empty(): time.sleep(5) logger.log('entering _push_data_to_learner', 'steps: {}'.format(self._steps), level=logger.DEBUG + 5) while True: task = self.replay_task frames = [] weights = [] for _ in range(self._unroll_length): frame, weight = data_queue.get() frames.append(frame) weights.append(weight) self._data_pool_apis.push_data((task, frames, weights))
def pull_keys(self): self._req_lock.acquire() while True: try: self._req_socket.send_string("keys") keys = self._req_socket.recv_pyobj() if not isinstance(keys, ModelPoolErroMsg): break else: logger.log(keys.msg) # keys isinstance ModelPoolErroMsg time.sleep(2) except Exception as e: print("ModelPoolAPIs crushed on pull_keys," " the exception:\n{}".format(e)) self._req_lock.release() return keys
def pull_all_attr(self, attr): self._req_lock.acquire() while True: try: self._req_socket.send_string('all_attr', zmq.SNDMORE) self._req_socket.send_string(attr) attrs = self._req_socket.recv_pyobj() if not isinstance(attrs, ModelPoolErroMsg): break else: logger.log(attrs.msg) # attrs isinstance ModelPoolErroMsg time.sleep(2) except Exception as e: print("ModelPoolAPIs crushed on pull_all_attr," " the exception:\n{}".format(e)) self._req_lock.release() return attrs
def pull_model(self, key): self._req_lock.acquire() while True: try: self._req_socket.send_string("model", zmq.SNDMORE) self._req_socket.send_string(key) model = self._req_socket.recv_pyobj() if not isinstance(model, ModelPoolErroMsg): break else: logger.log(model.msg) # model isinstance ModelPoolErroMsg time.sleep(2) except Exception as e: print("ModelPoolAPIs crushed on pull_model {}," " the exception:\n{}".format(key, e)) self._req_lock.release() return model
def pull_attr(self, attr, key): self._req_lock.acquire() while True: try: self._req_socket.send_string(attr, zmq.SNDMORE) self._req_socket.send_string(key) ret_attr = self._req_socket.recv_pyobj() if not isinstance(ret_attr, ModelPoolErroMsg): break else: logger.log( ret_attr.msg) # ret_attr isinstance ModelPoolErroMsg time.sleep(2) except Exception as e: print("ModelPoolAPIs crushed on pull_attr {} of model {}," " the exception:\n{}".format(attr, key, e)) self._req_lock.release() return ret_attr
def _message_worker(self): train_idx = 0 l = len(self._train_replays) while True: msg = self._rep_socket.recv_string() if msg == 'replay_task': if len(self._replay_tasks) > 0: self._rep_socket.send_pyobj(self._replay_tasks.pop(0)) elif self._repeat_training_task: if train_idx == 0: logger.log( 'Training task empty, repeat training task.') self._rep_socket.send_pyobj(self._train_replays[train_idx]) train_idx = (train_idx + 1) % l else: self._rep_socket.send_pyobj("") else: raise RuntimeError("message not recognized")
def pull_learner_meta(self, key): self._req_lock.acquire() while True: try: self._req_socket.send_string('learner_meta', zmq.SNDMORE) self._req_socket.send_string(key) learner_meta = self._req_socket.recv_pyobj() if not isinstance(learner_meta, ModelPoolErroMsg): break else: # learner_meta isinstance ModelPoolErroMsg logger.log(learner_meta.msg) time.sleep(2) except Exception as e: print("ModelPoolAPIs crushed on pull_learner_meta {}," " the exception:\n{}".format(key, e)) self._req_lock.release() return learner_meta
def request(self, req): self._req_lock.acquire() while True: try: for msg in req[0:-1]: self._req_socket.send_string(msg, zmq.SNDMORE) self._req_socket.send_string(req[-1]) ret = self._req_socket.recv_pyobj() if not isinstance(ret, ModelPoolErroMsg): break else: logger.log(ret.msg) # ret isinstance ModelPoolErroMsg time.sleep(2) except BaseException as e: logger.error("ModelPoolAPIs may crushed on request {}," " the exception:\n{}".format(req, e)) raise e self._req_lock.release() return ret
def _update_agents_model(self, task): """Update the model (i.e., Neural Net parameters) for each agent. The learning agent uses model1, all the other opponent(s) use model2 """ logger.log('entering _update_agents_model', 'steps: {}'.format(self._steps), level=logger.DEBUG + 5) if (self.self_infserver_addr is None and self._should_update_model(self.self_model, task.model_key1)): model1 = self._model_pool_apis.pull_model(task.model_key1) me_id = self._learning_agent_id # short name self.agents[me_id].load_model(model1.model) self.self_model = model1 if self._should_update_model(self.oppo_model, task.model_key2): model2 = self._model_pool_apis.pull_model(task.model_key2) oppo_id = self._oppo_agent_id # short name for agt in self.agents[oppo_id:]: agt.load_model(model2.model) self.oppo_model = model2 logger.log('leaving _update_agents_model', level=logger.DEBUG + 5)