class TestDatasetSame(TestCase): def setUp(self) -> None: self.plus_index = 0 x_exist_new = np.ones(len_mock, np.bool) x_window = 1 self.x_generator = XGenerator(raw_index=mock_index + self.plus_index, raw_sequence=mock_prices, x_window=x_window, x_exist=x_exist_new) self.labels = Labels(labels=mock_labels, index=mock_index, with_index=False) self.dataset = Dataset(x=self.x_generator, y=self.labels, name="test") def test_returns(self): for i in range(100): sample = self.dataset.get_shuffled_sample() x_sample = sample[0] y_sample = sample[1] assert x_sample[-1] == y_sample[0] == y_sample[1] for i in range(100): sample = self.dataset.get_oversampled_sample() x_sample = sample[0] y_sample = sample[1] assert x_sample[-1] == y_sample[0] == y_sample[1]
def setUp(self) -> None: self.plus_index = 1 self.x_generator = XGenerator(raw_index=mock_index + self.plus_index, raw_sequence=mock_prices, x_window=x_window, x_exist=x_exist) self.labels = Labels(labels=mock_labels, index=mock_index, with_index=False) self.dataset = Dataset(x=self.x_generator, y=self.labels, name="test")
def setUp(self) -> None: self.class_high = [0, 0] self.class_low = [1, 1] mock_labels_2 = np.asarray([self.class_high for _ in range(len_mock)]) mock_labels_2[50] = self.class_low self.x_generator = XGenerator(raw_index=mock_index, raw_sequence=mock_prices, x_window=x_window, x_exist=x_exist) self.labels = Labels(labels=mock_labels_2, index=mock_index, with_index=False) self.dataset = Dataset(x=self.x_generator, y=self.labels, name="test")
class TestDatasetoversampling(TestCase): def setUp(self) -> None: self.class_high = [0, 0] self.class_low = [1, 1] mock_labels_2 = np.asarray([self.class_high for _ in range(len_mock)]) mock_labels_2[50] = self.class_low self.x_generator = XGenerator(raw_index=mock_index, raw_sequence=mock_prices, x_window=x_window, x_exist=x_exist) self.labels = Labels(labels=mock_labels_2, index=mock_index, with_index=False) self.dataset = Dataset(x=self.x_generator, y=self.labels, name="test") def test_oversampling(self): class_high_count = 0 class_low_count = 0 for i in range(2000): x, y = self.dataset.get_oversampled_sample() if np.array_equal(y, self.class_high): class_high_count += 1 if np.array_equal(y, self.class_low): class_low_count += 1 assert 0.8 < class_high_count / class_low_count < 1.2
class TestDatasetPlus1(TestCase): def setUp(self) -> None: self.plus_index = 1 self.x_generator = XGenerator(raw_index=mock_index + self.plus_index, raw_sequence=mock_prices, x_window=x_window, x_exist=x_exist) self.labels = Labels(labels=mock_labels, index=mock_index, with_index=False) self.dataset = Dataset(x=self.x_generator, y=self.labels, name="test") def test_len(self): assert len( self.dataset) == len_mock - self.plus_index - np.sum(~x_exist) def test_returns(self): assert np.array_equal(self.dataset[0][0], self.x_generator[0]) assert np.array_equal(self.dataset[0][1], self.labels[self.plus_index + np.sum(~x_exist)]) assert np.array_equal(self.dataset[-1][0], self.x_generator[-self.plus_index - 1]) assert np.array_equal(self.dataset[-1][1], self.labels[-1]) def test_n_samples(self): unique_classes, class_clount = self.dataset.get_count_classes() assert np.sum(class_clount) == len(self.dataset) def test_get(self): shuffled_sample = self.dataset.get_shuffled_sample() assert np.array_equal(shuffled_sample[0][-1] + 1, shuffled_sample[1][0]) unique_classes_2, class_clount = self.dataset.get_count_classes() class_choice = unique_classes_2[-1] sample = self.dataset.get_shuffled_sample_from_specified_class( class_choice) assert np.array_equal(sample[1], class_choice) unique_classes_2, class_clount = self.dataset.get_count_classes() class_choice = unique_classes_2[0] self.assertRaises( ValueError, self.dataset.get_shuffled_sample_from_specified_class, class_choice)
class TestDatasetPlusAndPrice(TestCase): def setUp(self) -> None: self.plus_index = 1 self.x_generator = XGenerator(raw_index=mock_index + self.plus_index, raw_sequence=mock_prices + self.plus_index, x_window=x_window + 10, x_exist=x_exist) self.labels = Labels(labels=mock_labels, index=mock_index, with_index=False) self.dataset = Dataset(x=self.x_generator, y=self.labels, name="test") def test_returns(self): for i in range(100): sample = self.dataset.get_shuffled_sample() x_sample = sample[0] y_sample = sample[1] assert x_sample[-1] == y_sample[0] == y_sample[1]
def setUp(self) -> None: len_mock = 1000 x_window = 10 self.mock_prices = np.arange(len_mock, dtype=np.int) start_index = np.datetime64(0, "ns") mock_index = [] mock_index.append(start_index) for i in range(1, len_mock): mock_index.append(mock_index[-1] + 1) self.mock_index = np.asarray(mock_index) x_exist = np.ones(self.mock_index.shape, dtype=np.bool) self.x_non_exist = 30 x_exist[:x_non_exist] = False self.batch_size = 2 multi_class_n = 2 self.mock_labels = np.repeat(np.arange(len_mock, dtype=np.int), multi_class_n).reshape( (len_mock, multi_class_n)) self.x_generator = XGenerator(raw_index=self.mock_index, raw_sequence=self.mock_prices, x_window=x_window, x_exist=x_exist) self.labels = Labels(labels=self.mock_labels, index=self.mock_index, with_index=False) self.dataset = Dataset(x=self.x_generator, y=self.labels, name="test") self.batch_generator = BatchGenerator(dataset=self.dataset, batch_size=self.batch_size, shuffle=False, oversampling=False, iterations=None, stride=None, x_only=False)
mock_labels = np.repeat(np.arange(len_mock, dtype=np.int), 2).reshape( (len_mock, 2)) x_exist = np.ones(mock_index.shape, dtype=np.bool) x_non_exist = 30 x_exist[:x_non_exist] = False if __name__ == "__main__": labels = Labels(labels=mock_labels, index=mock_index, with_index=False) x_generator = XGenerator(raw_sequence=mock_sequence, raw_index=mock_index, x_window=x_window, x_exist=x_exist, with_index=False) dataset = Dataset(x=x_generator, y=labels, name="tutorial") batch_size = 10 batch_generator = BatchGenerator(dataset=dataset, batch_size=batch_size, shuffle=True, stride=None) samples = len(batch_generator) start = time.clock() test_batch = batch_generator[0] duration = time.clock() - start print(f"Samples per second:{samples * batch_size / duration}") print(test_batch[0].shape, test_batch[1].shape) print(f"Batches per second:{samples / duration}")