def _gen_rnn_batches_by_wheel(self, batch_size, num_steps, round_len, L, **_): """Each sequence in batch is a sub-sequence of length L of a randomly selected sequence. First introduced in sampling LOB data. The sub-sequence length L must be specified. """ # Sanity check if batch_size < 0 or batch_size is None: batch_size = self.size if num_steps < 0 or num_steps is None: num_steps = L # Generate feature list and target list features, targets = [], [] wheel = Wheel(self.structure if th. use_wheel else list(np.ones([self.size]) / self.size)) for _ in range(batch_size): # Choose a sequence to sample from index = wheel.spin() t = np.random.randint(0, self.structure[index] - L + 1) x = self.features[index][t:t + L] y = self.targets[index][t:t + L] assert len(x) == len(y) == L features.append(x) targets.append(y) # Stack features and targets features, targets = np.stack(features), np.stack(targets) data_set = DataSet(features, targets, is_rnn_input=True) assert data_set.size == batch_size # Generate RNN batches using DataSet.gen_rnn_batches counter = 0 for batch in data_set.gen_rnn_batches(batch_size, num_steps, is_training=True): yield batch counter += 1 # Check round_len if counter != round_len: raise AssertionError( "!! counter = {} while round_len = {}. (batch_size = {}, num_steps={})" "".format(counter, round_len, batch_size, num_steps))
def rnn_batch_generator(data_set, batch_size, num_steps, is_training, round_len): """Generated epoch batches are guaranteed to cover all sequences""" assert isinstance(data_set, SequenceSet) and is_training L = int(sum(data_set.structure) / batch_size) assert L < min(data_set.structure) and L == th.sub_seq_len rad = int(th.random_shift_pct * L) # Distribute batch_size to stocks # [23336, 44874, 38549, 54675, 93316] num_sequences = wise_man.apportion(data_set.structure, batch_size) # Generate feature list and target list features, targets = [], [] for num, x, y in zip(num_sequences, data_set.features, data_set.targets): # Find starts for each sequence to sample starts = wise_man.spread(len(x), num, L, rad) # Sanity check assert len(starts) == num # Put the sub-sequences into corresponding lists for s in starts: features.append(x[s:s + L]) targets.append(y[s:s + L]) # Stack features and targets features, targets = np.stack(features), np.stack(targets) data_set = DataSet(features, targets, is_rnn_input=True) assert data_set.size == batch_size # Generate RNN batches using DataSet.gen_rnn_batches counter = 0 for batch in data_set.gen_rnn_batches(batch_size, num_steps, is_training=True): yield batch counter += 1 # Check round_len if counter != round_len: raise AssertionError( '!! counter = {} while round_len = {}. (batch_size = {}, num_steps={})' ''.format(counter, round_len, batch_size, num_steps))