Beispiel #1
0
    def _gen_rnn_batches_by_wheel(self, batch_size, num_steps, round_len, L,
                                  **_):
        """Each sequence in batch is a sub-sequence of length L of a randomly
       selected sequence. First introduced in sampling LOB data.
       The sub-sequence length L must be specified.
    """
        # Sanity check
        if batch_size < 0 or batch_size is None:
            batch_size = self.size
        if num_steps < 0 or num_steps is None:
            num_steps = L
        # Generate feature list and target list
        features, targets = [], []
        wheel = Wheel(self.structure if th.
                      use_wheel else list(np.ones([self.size]) / self.size))
        for _ in range(batch_size):
            # Choose a sequence to sample from
            index = wheel.spin()
            t = np.random.randint(0, self.structure[index] - L + 1)
            x = self.features[index][t:t + L]
            y = self.targets[index][t:t + L]
            assert len(x) == len(y) == L
            features.append(x)
            targets.append(y)
        # Stack features and targets
        features, targets = np.stack(features), np.stack(targets)
        data_set = DataSet(features, targets, is_rnn_input=True)
        assert data_set.size == batch_size
        # Generate RNN batches using DataSet.gen_rnn_batches
        counter = 0
        for batch in data_set.gen_rnn_batches(batch_size,
                                              num_steps,
                                              is_training=True):
            yield batch
            counter += 1

        # Check round_len
        if counter != round_len:
            raise AssertionError(
                "!! counter = {} while round_len = {}. (batch_size = {}, num_steps={})"
                "".format(counter, round_len, batch_size, num_steps))
Beispiel #2
0
    def rnn_batch_generator(data_set, batch_size, num_steps, is_training,
                            round_len):
        """Generated epoch batches are guaranteed to cover all sequences"""
        assert isinstance(data_set, SequenceSet) and is_training
        L = int(sum(data_set.structure) / batch_size)
        assert L < min(data_set.structure) and L == th.sub_seq_len
        rad = int(th.random_shift_pct * L)
        # Distribute batch_size to stocks
        # [23336, 44874, 38549, 54675, 93316]
        num_sequences = wise_man.apportion(data_set.structure, batch_size)
        # Generate feature list and target list
        features, targets = [], []
        for num, x, y in zip(num_sequences, data_set.features,
                             data_set.targets):
            # Find starts for each sequence to sample
            starts = wise_man.spread(len(x), num, L, rad)
            # Sanity check
            assert len(starts) == num
            # Put the sub-sequences into corresponding lists
            for s in starts:
                features.append(x[s:s + L])
                targets.append(y[s:s + L])
        # Stack features and targets
        features, targets = np.stack(features), np.stack(targets)
        data_set = DataSet(features, targets, is_rnn_input=True)
        assert data_set.size == batch_size
        # Generate RNN batches using DataSet.gen_rnn_batches
        counter = 0
        for batch in data_set.gen_rnn_batches(batch_size,
                                              num_steps,
                                              is_training=True):
            yield batch
            counter += 1

        # Check round_len
        if counter != round_len:
            raise AssertionError(
                '!! counter = {} while round_len = {}. (batch_size = {}, num_steps={})'
                ''.format(counter, round_len, batch_size, num_steps))