def normalize_vector_obs(self, func): ''' TODO: Annotation ''' assert 'obs' in self.data_buffer.keys( ), "assert 'obs' in self.data_buffer.keys()" assert 'obs_' in self.data_buffer.keys( ), "assert 'obs_' in self.data_buffer.keys()" self.data_buffer['obs'] = [ NamedTupleStaticClass.data_convert(func, obs, keys=['vector']) for obs in self.data_buffer['obs'] ] self.data_buffer['obs_'] = [ NamedTupleStaticClass.data_convert(func, obs_, keys=['vector']) for obs_ in self.data_buffer['obs_'] ]
def _data_process2dict(self, exps: BatchExperiences) -> BatchExperiences: # TODO 优化 if not self.is_continuous: assert 'action' in exps._fields, "assert 'action' in exps._fields" exps = exps._replace(action=int2one_hot(exps.action.astype(np.int32), self.a_dim)) assert 'obs' in exps._fields and 'obs_' in exps._fields, "'obs' in exps._fields and 'obs_' in exps._fields" # exps = exps._replace( # obs=exps.obs._replace(vector=self.normalize_vector_obs()), # obs_=exps.obs_._replace(vector=self.normalize_vector_obs())) return NamedTupleStaticClass.data_convert(self.data_convert, exps)
def _learn(self, function_dict: Dict) -> NoReturn: ''' TODO: Annotation ''' _cal_stics = function_dict.get('calculate_statistics', lambda *args: None) _train = function_dict.get('train_function', lambda *args: None) # 训练过程 _summary = function_dict.get('summary_dict', {}) # 记录输出到tensorboard的词典 self.intermediate_variable_reset() # self.data.normalize_vector_obs(self.normalize_vector_obs) if not self.is_continuous: self.data.convert_action2one_hot(self.a_dim) if self.use_curiosity and not self.use_rnn: curiosity_data = self.data.get_curiosity_data() curiosity_data = NamedTupleStaticClass.data_convert(self.data_convert, curiosity_data) cell_state = self.initial_cell_state(batch=self.n_agents) crsty_r, crsty_summaries = self.curiosity_model(curiosity_data, cell_state) self.data.update_reward(crsty_r.numpy()) # self.data.r += crsty_r.numpy().reshape([self.data.eps_len, -1]) self.summaries.update(crsty_summaries) _cal_stics() if self.use_rnn: all_data = self.data.sample_generater_rnn() else: all_data = self.data.sample_generater() for data, cell_state in all_data: data = NamedTupleStaticClass.data_convert(self.data_convert, data) cell_state = self.data_convert(cell_state) summaries = _train(data, cell_state) self.summaries.update(summaries) self.summaries.update(_summary) self.write_training_summaries(self.train_step, self.summaries) self.clear()
def observation(self, observation: List[SingleModelInformation]): def func(x): return np.asarray(x * 255).astype(np.uint8) for bn in self.behavior_names: visual = observation[bn].obs.visual if isinstance(visual, np.ndarray): visual = func(visual) else: visual = NamedTupleStaticClass.data_convert(func, visual) observation[bn] = observation[bn]._replace( obs=observation[bn].obs._replace(visual=visual)) visual = observation[bn].obs_.visual if isinstance(visual, np.ndarray): visual = func(visual) else: visual = NamedTupleStaticClass.data_convert(func, visual) observation[bn] = observation[bn]._replace( obs_=observation[bn].obs_._replace(visual=visual)) return observation
def get_transitions(self, databuffer, data_name_list=['s', 'a', 'r', 's_', 'done']): ''' TODO: Annotation ''' exps = databuffer.sample() # 经验池取数据 if not self.is_continuous: assert 'action' in exps._fields, "assert 'action' in exps._fields" a = exps.action.astype(np.int32) pre_shape = a.shape a = a.reshape(-1) a = int2one_hot(a, self.a_dim) a = a.reshape(pre_shape + (-1, )) exps = exps._replace(action=a) return NamedTupleStaticClass.data_convert(self.data_convert, exps)
def sample(self) -> BatchExperiences: n_sample = self.batch_size if self.is_lg_batch_size else self._size trajs = np.random.choice(self._buffer[:self._size], size=n_sample, replace=False) # 选n_sample条轨迹 def f(v, l): # [B, T, N] return lambda x: tf.keras.preprocessing.sequence.pad_sequences( x, padding='pre', dtype='float32', value=v, maxlen=l, truncating='pre') def truncate(traj): idx = np.random.randint(max(1, len(traj) - self.timestep + 1)) # [min, max) return traj[idx:idx + self.timestep] datas = [] # [B, 不定长时间步, N] for traj in trajs: data = NamedTupleStaticClass.pack(truncate(traj)) datas.append(data) sample_data = NamedTupleStaticClass.pack(datas) sample_data = NamedTupleStaticClass.data_convert( f(v=1., l=self.timestep), sample_data, ['done']) # [B, T, N] sample_data = NamedTupleStaticClass.data_convert( f(v=0., l=self.timestep), sample_data) # [B, T, N] burn_in_data = NamedTupleStaticClass.data_convert( lambda x: x[:, :self.burn_in_time_step], sample_data) train_data = NamedTupleStaticClass.data_convert( lambda x: x[:, self.burn_in_time_step:], sample_data) self.burn_in_data = NamedTupleStaticClass.data_convert( lambda x: tf.reshape(x, [-1, *x.shape[2:]]), burn_in_data) train_data = NamedTupleStaticClass.data_convert( lambda x: tf.reshape(x, [-1, *x.shape[2:]]), train_data) return train_data