Ejemplo n.º 1
0
    def sample_generater(self, batch_size: int = None):
        '''
        create sampling data iterator without using rnn.

        params:
            batch_size: the batch size of training data
            keys: the keys of data that should be sampled to train policies
        return:
            sampled data.
        '''

        batch_size = batch_size or self.batch_size

        buffer = {}
        # T * [B, N] => [T*B, N]
        for k in self.sample_data_type._fields:
            assert k in self.data_buffer.keys(
            ), f"assert {k} in self.data_buffer.keys()"
            if isinstance(self.data_buffer[k][0], tuple):
                buffer[k] = NamedTupleStaticClass.pack(self.data_buffer[k],
                                                       func=np.concatenate)
                assert NamedTupleStaticClass.check_len(buffer[k], l=self.n_agents * self.eps_len), \
                    f"shape of {k} not equal to {self.n_agents * self.eps_len}"
            else:
                buffer[k] = np.concatenate(self.data_buffer[k])
                assert buffer[k].shape[0] == self.n_agents * self.eps_len, \
                    f"shape of {k} not equal to {self.n_agents * self.eps_len}"

        idxs = np.arange(self.eps_len * self.n_agents)
        np.random.shuffle(idxs)
        for i in range(0, self.eps_len * self.n_agents,
                       batch_size * self.n_agents):
            _idxs = idxs[i:i + batch_size * self.n_agents]
            data = []
            for k in self.sample_data_type._fields:
                if isinstance(buffer[k], tuple):
                    data.append(
                        NamedTupleStaticClass.getbatchitems(buffer[k], _idxs))
                else:
                    data.append(buffer[k][_idxs])
            yield self.sample_data_type._make(data), (None, )
Ejemplo n.º 2
0
    def get_curiosity_data(self):
        '''
        返回用于好奇心机制的数据
        '''

        # T * [B, N] => [B, T, N] => [B*T, N]
        def func(x):
            return np.stack(x, axis=1).reshape(self.n_agents * self.eps_len,
                                               -1)

        data = {}
        for k in BatchExperiences._fields:
            assert k in self.data_buffer.keys(
            ), f"assert {k} in self.data_buffer.keys()"
            if isinstance(self.data_buffer[k][0], tuple):
                data[k] = NamedTupleStaticClass.pack(self.data_buffer[k],
                                                     func=func)
                assert NamedTupleStaticClass.check_len(data[k], l=self.n_agents * self.eps_len), \
                    f"shape of {k} not equal to {self.n_agents * self.eps_len}"
            else:
                data[k] = func(self.data_buffer[k])
                assert data[k].shape[0] == self.n_agents * self.eps_len, \
                    f"shape of {k} not equal to {self.n_agents * self.eps_len}"
        return BatchExperiences(**data)