Exemple #1
0
    def sample(self, batch_size):
        """Sample a data batch from the internal replay buffer. It will call
        :meth:`~tianshou.policy.BasePolicy.process_fn` before returning
        the final batch data.

        :param int batch_size: ``0`` means it will extract all the data from
            the buffer, otherwise it will extract the data with the given
            batch_size.
        """
        if self._multi_buf:
            if batch_size > 0:
                lens = [len(b) for b in self.buffer]
                total = sum(lens)
                batch_index = np.random.choice(total,
                                               batch_size,
                                               p=np.array(lens) / total)
            else:
                batch_index = np.array([])
            batch_data = Batch()
            for i, b in enumerate(self.buffer):
                cur_batch = (batch_index == i).sum()
                if batch_size and cur_batch or batch_size <= 0:
                    batch, indice = b.sample(cur_batch)
                    batch = self.process_fn(batch, b, indice)
                    batch_data.append(batch)
        else:
            batch_data, indice = self.buffer.sample(batch_size)
            batch_data = self.process_fn(batch_data, self.buffer, indice)
        return batch_data
Exemple #2
0
def test_batch():
    batch = Batch(obs=[0], np=np.zeros([3, 4]))
    batch.obs = [1]
    assert batch.obs == [1]
    batch.append(batch)
    assert batch.obs == [1, 1]
    assert batch.np.shape == (6, 4)
    assert batch[0].obs == batch[1].obs
    with pytest.raises(IndexError):
        batch[2]
    batch.obs = np.arange(5)
    for i, b in enumerate(batch.split(1, permute=False)):
        assert b.obs == batch[i].obs
Exemple #3
0
def test_batch():
    batch = Batch(obs=[0], np=np.zeros([3, 4]))
    assert batch.obs == batch["obs"]
    batch.obs = [1]
    assert batch.obs == [1]
    batch.append(batch)
    assert batch.obs == [1, 1]
    assert batch.np.shape == (6, 4)
    assert batch[0].obs == batch[1].obs
    batch.obs = np.arange(5)
    for i, b in enumerate(batch.split(1, shuffle=False)):
        if i != 5:
            assert b.obs == batch[i].obs
        else:
            with pytest.raises(AttributeError):
                batch[i].obs
            with pytest.raises(AttributeError):
                b.obs
    print(batch)
Exemple #4
0
 def sample(self, batch_size):
     if self._multi_buf:
         if batch_size > 0:
             lens = [len(b) for b in self.buffer]
             total = sum(lens)
             batch_index = np.random.choice(total,
                                            batch_size,
                                            p=np.array(lens) / total)
         else:
             batch_index = np.array([])
         batch_data = Batch()
         for i, b in enumerate(self.buffer):
             cur_batch = (batch_index == i).sum()
             if batch_size and cur_batch or batch_size <= 0:
                 batch, indice = b.sample(cur_batch)
                 batch = self.process_fn(batch, b, indice)
                 batch_data.append(batch)
     else:
         batch_data, indice = self.buffer.sample(batch_size)
         batch_data = self.process_fn(batch_data, self.buffer, indice)
     return batch_data
def test_Batch():
    """
    batch.split()
    batch.append()
    len(batch)
    :return:
    """
    # data is a batch involves 4 transitions
    data = Batch(obs=np.array([[0, 0, 0], [0, 0, 1], [0, 1, 0], [0, 1, 1]]),
                 rew=np.array([0, 0, 0, 1]))
    index = [0, 1]  # pick the first 2 transition
    print(data[0])
    print(len(data))
    print("--------------------")
    data.append(
        Batch(obs=np.array([[1, 0, 0], [1, 0, 1], [1, 1, 0]]),
              rew=np.array([-1, -1, -1, -1])))
    print(data)
    print(len(data))
    print("--------------------")
    # the last batch might has size less than 3
    for mini_batch in data.split(size=3, permute=False):
        print(mini_batch)