def test_feature_sampling(self): num = 1024 data = generate_array_floats(n=num) feat1 = Feature(data, random_seed=0) feat2 = Feature(data, random_seed=0) self.assertTrue( numpy.array_equal(feat1.input_sample, feat2.input_sample))
def test_dataset_feature_names(self): num = SAMPLE_SIZE_SMALL feat1 = Feature(generate_array_floats(n=num), name="a") feat2 = Feature(generate_array_floats(n=num), name="b") dataset = DataSet([feat1, feat2]) self.assertEqual(num, dataset.count) self.assertEqual(num, dataset.count) self.assertTrue( numpy.array_equal(feat1.values, dataset.features["a"].values)) self.assertTrue( numpy.array_equal(feat2.values, dataset.features["b"].values)) self.assertTrue(numpy.array_equal(dataset["a", :num], feat1[:num])) self.assertTrue(numpy.array_equal(dataset["b", :num], feat2[:num]))
def test_dataset_custom_loader(self): num = SAMPLE_SIZE_SMALL arr = generate_array_floats(n=num) class MyCustomDataLoader(object): def __len__(self): return len(arr) def __getitem__(self, idx): return arr[idx] dataset1 = DataSet([Feature(arr)], random_seed=0) dataset2 = DataSet([Feature(MyCustomDataLoader())], random_seed=0) self.assertTrue( numpy.array_equal(dataset1.input_fn(), dataset2.input_fn()))
def test_feature_indexing(self): num = 1024 data = generate_array_floats(n=num) feat = Feature(data) self.assertEqual(10, len(feat[:10])) self.assertEqual(10, len(feat[0:10])) self.assertEqual(10, len(feat[0:10:1])) self.assertEqual(5, len(feat[0:10:2]))
def test_feature_init(self): num = 1024 data = generate_array_floats(n=num) feat = Feature(data) self.assertEqual(num, len(feat)) self.assertEqual(num, len(feat[:])) self.assertTrue(numpy.array_equal(data, feat.values)) self.assertTrue(numpy.array_equal(data[:num], feat[:num])) self.assertNotEqual(feat.sample_var, -1)
def test_dataset_different_shapes(self): num = SAMPLE_SIZE_SMALL feat1 = Feature(generate_array_floats(n=num), name="feat1") feat2 = Feature(generate_onehot_matrix(n=num), name="feat2") dataset = DataSet([feat1, feat2]) self.assertEqual(num, dataset.count) self.assertEqual(num, dataset.count) self.assertTrue( numpy.array_equal(feat1.values, dataset.features["feat1"].values)) self.assertTrue( numpy.array_equal(feat2.values, dataset.features["feat2"].values)) self.assertTrue(numpy.array_equal(dataset["feat1", :num], feat1[:num])) self.assertTrue(numpy.array_equal(dataset["feat2", :num], feat2[:num])) arr1 = dataset[:, :num] arr2 = [feat.values[:num] for feat in (feat1, feat2)] for col1, col2 in zip(arr1, arr2): self.assertTrue(numpy.array_equal(col1, col2))
def test_feature_custom_loader(self): num = 1024 data = generate_array_floats(n=num) class MyCustomDataLoader(object): def __len__(self): return len(data) def __getitem__(self, idx): return data[idx] feat1 = Feature(data, random_seed=0) feat2 = Feature(MyCustomDataLoader(), random_seed=0) self.assertTrue( numpy.array_equal(feat1.input_sample, feat2.input_sample)) self.assertTrue(numpy.array_equal(feat1.sampler(), feat2.sampler()))
def test_dataset_mismatch_len(self): num = SAMPLE_SIZE_SMALL feat1 = Feature(generate_array_floats(n=num)) feat2 = Feature(generate_array_floats(n=num * 2)) self.assertRaises(AssertionError, lambda: DataSet([feat1, feat2]))