Ejemplo n.º 1
0
 def normalize_vector_obs(self, func):
     '''
     TODO: Annotation
     '''
     assert 'obs' in self.data_buffer.keys(
     ), "assert 'obs' in self.data_buffer.keys()"
     assert 'obs_' in self.data_buffer.keys(
     ), "assert 'obs_' in self.data_buffer.keys()"
     self.data_buffer['obs'] = [
         NamedTupleStaticClass.data_convert(func, obs, keys=['vector'])
         for obs in self.data_buffer['obs']
     ]
     self.data_buffer['obs_'] = [
         NamedTupleStaticClass.data_convert(func, obs_, keys=['vector'])
         for obs_ in self.data_buffer['obs_']
     ]
Ejemplo n.º 2
0
 def _data_process2dict(self, exps: BatchExperiences) -> BatchExperiences:
     # TODO 优化
     if not self.is_continuous:
         assert 'action' in exps._fields, "assert 'action' in exps._fields"
         exps = exps._replace(action=int2one_hot(exps.action.astype(np.int32), self.a_dim))
     assert 'obs' in exps._fields and 'obs_' in exps._fields, "'obs' in exps._fields and 'obs_' in exps._fields"
     # exps = exps._replace(
     #     obs=exps.obs._replace(vector=self.normalize_vector_obs()),
     #     obs_=exps.obs_._replace(vector=self.normalize_vector_obs()))
     return NamedTupleStaticClass.data_convert(self.data_convert, exps)
Ejemplo n.º 3
0
    def _learn(self, function_dict: Dict) -> NoReturn:
        '''
        TODO: Annotation
        '''
        _cal_stics = function_dict.get('calculate_statistics', lambda *args: None)
        _train = function_dict.get('train_function', lambda *args: None)    # 训练过程
        _summary = function_dict.get('summary_dict', {})    # 记录输出到tensorboard的词典

        self.intermediate_variable_reset()

        # self.data.normalize_vector_obs(self.normalize_vector_obs)

        if not self.is_continuous:
            self.data.convert_action2one_hot(self.a_dim)

        if self.use_curiosity and not self.use_rnn:
            curiosity_data = self.data.get_curiosity_data()
            curiosity_data = NamedTupleStaticClass.data_convert(self.data_convert, curiosity_data)
            cell_state = self.initial_cell_state(batch=self.n_agents)
            crsty_r, crsty_summaries = self.curiosity_model(curiosity_data, cell_state)
            self.data.update_reward(crsty_r.numpy())
            # self.data.r += crsty_r.numpy().reshape([self.data.eps_len, -1])
            self.summaries.update(crsty_summaries)

        _cal_stics()

        if self.use_rnn:
            all_data = self.data.sample_generater_rnn()
        else:
            all_data = self.data.sample_generater()

        for data, cell_state in all_data:
            data = NamedTupleStaticClass.data_convert(self.data_convert, data)
            cell_state = self.data_convert(cell_state)
            summaries = _train(data, cell_state)

        self.summaries.update(summaries)
        self.summaries.update(_summary)

        self.write_training_summaries(self.train_step, self.summaries)

        self.clear()
Ejemplo n.º 4
0
    def observation(self, observation: List[SingleModelInformation]):
        def func(x):
            return np.asarray(x * 255).astype(np.uint8)

        for bn in self.behavior_names:
            visual = observation[bn].obs.visual
            if isinstance(visual, np.ndarray):
                visual = func(visual)
            else:
                visual = NamedTupleStaticClass.data_convert(func, visual)
            observation[bn] = observation[bn]._replace(
                obs=observation[bn].obs._replace(visual=visual))

            visual = observation[bn].obs_.visual
            if isinstance(visual, np.ndarray):
                visual = func(visual)
            else:
                visual = NamedTupleStaticClass.data_convert(func, visual)
            observation[bn] = observation[bn]._replace(
                obs_=observation[bn].obs_._replace(visual=visual))

        return observation
Ejemplo n.º 5
0
 def get_transitions(self,
                     databuffer,
                     data_name_list=['s', 'a', 'r', 's_', 'done']):
     '''
     TODO: Annotation
     '''
     exps = databuffer.sample()  # 经验池取数据
     if not self.is_continuous:
         assert 'action' in exps._fields, "assert 'action' in exps._fields"
         a = exps.action.astype(np.int32)
         pre_shape = a.shape
         a = a.reshape(-1)
         a = int2one_hot(a, self.a_dim)
         a = a.reshape(pre_shape + (-1, ))
         exps = exps._replace(action=a)
     return NamedTupleStaticClass.data_convert(self.data_convert, exps)
Ejemplo n.º 6
0
    def sample(self) -> BatchExperiences:
        n_sample = self.batch_size if self.is_lg_batch_size else self._size
        trajs = np.random.choice(self._buffer[:self._size],
                                 size=n_sample,
                                 replace=False)  # 选n_sample条轨迹

        def f(v, l):  # [B, T, N]
            return lambda x: tf.keras.preprocessing.sequence.pad_sequences(
                x,
                padding='pre',
                dtype='float32',
                value=v,
                maxlen=l,
                truncating='pre')

        def truncate(traj):
            idx = np.random.randint(max(1,
                                        len(traj) - self.timestep +
                                        1))  # [min, max)
            return traj[idx:idx + self.timestep]

        datas = []  # [B, 不定长时间步, N]
        for traj in trajs:
            data = NamedTupleStaticClass.pack(truncate(traj))
            datas.append(data)

        sample_data = NamedTupleStaticClass.pack(datas)
        sample_data = NamedTupleStaticClass.data_convert(
            f(v=1., l=self.timestep), sample_data, ['done'])  # [B, T, N]
        sample_data = NamedTupleStaticClass.data_convert(
            f(v=0., l=self.timestep), sample_data)  # [B, T, N]

        burn_in_data = NamedTupleStaticClass.data_convert(
            lambda x: x[:, :self.burn_in_time_step], sample_data)
        train_data = NamedTupleStaticClass.data_convert(
            lambda x: x[:, self.burn_in_time_step:], sample_data)

        self.burn_in_data = NamedTupleStaticClass.data_convert(
            lambda x: tf.reshape(x, [-1, *x.shape[2:]]), burn_in_data)
        train_data = NamedTupleStaticClass.data_convert(
            lambda x: tf.reshape(x, [-1, *x.shape[2:]]), train_data)

        return train_data