def test_csv_csv_writeall_tensorlist(self): # Read&write dataset1 = spn.CSVFileDataset(self.data_path("data_int1.csv"), num_vals=[None] * 3, defaults=[[101], [102], [103.0], [104.0], [105.0]], num_epochs=2, batch_size=4, shuffle=False, num_labels=2, min_after_dequeue=1000, num_threads=1, allow_smaller_final_batch=True) path = self.out_path(self.cid() + ".csv") writer = spn.CSVDataWriter(path) data1 = dataset1.read_all() dataset1.write_all(writer) # Read again dataset2 = spn.CSVFileDataset(path, num_vals=[None] * 3, defaults=[[201], [202], [203.0], [204.0], [205.0]], num_epochs=1, batch_size=4, shuffle=False, num_labels=2, min_after_dequeue=1000, num_threads=1, allow_smaller_final_batch=True) data2 = dataset2.read_all() # Compare np.testing.assert_array_almost_equal(data1[0], data2[0]) np.testing.assert_array_equal(data1[1], data2[1])
def test_read_all_labeled_csv_file_dataset(self): """Test read_all for CSV file with 2 labels.""" dataset = spn.CSVFileDataset( self.data_path(["data_int1.csv", "data_int2.csv"]), num_vals=[255] * 3, defaults=[[101], [102], [103], [104], [105]], num_epochs=2, batch_size=3, shuffle=False, num_labels=2, min_after_dequeue=1000, num_threads=1, allow_smaller_final_batch=True) data = dataset.read_all() self.assertEqual(len(data), 2) np.testing.assert_array_equal( data[0], np.array( [[3, 4, 5], [8, 9, 10], [103, 14, 15], [18, 19, 20], [103, 24, 25], [28, 104, 30], [33, 104, 35], [38, 104, 40], [43, 104, 45], [48, 104, 50], [3, 4, 5], [8, 9, 10], [103, 14, 15], [18, 19, 20], [103, 24, 25], [28, 104, 30], [33, 104, 35], [38, 104, 40], [43, 104, 45], [48, 104, 50]], dtype=np.int32)) np.testing.assert_array_equal( data[1], np.array([[1, 2], [6, 102], [11, 12], [16, 102], [21, 22], [26, 27], [31, 32], [36, 37], [41, 42], [46, 47], [1, 2], [6, 102], [11, 12], [16, 102], [21, 22], [26, 27], [31, 32], [36, 37], [41, 42], [46, 47]], dtype=np.int32))
def test_image_gray_float_csv_writeall(self): # Read and write dataset1 = spn.ImageDataset( image_files=self.data_path("img_dir1/*-{*}.png"), format=spn.ImageFormat.FLOAT, num_epochs=1, batch_size=2, shuffle=False, ratio=1, crop=0, accurate=True, allow_smaller_final_batch=True) writer = spn.CSVDataWriter(path=self.out_path(self.cid() + ".csv")) data1 = dataset1.read_all() dataset1.write_all(writer) # Re-read dataset2 = spn.CSVFileDataset(files=self.out_path(self.cid() + ".csv"), num_vals=[None] * 25, defaults=[[b'']] + [[1.0] for _ in range(25)], num_epochs=1, batch_size=2, shuffle=False, num_labels=1, allow_smaller_final_batch=True) data2 = dataset2.read_all() # Compare np.testing.assert_allclose(data1[0], data2[0]) np.testing.assert_array_equal(data1[1], data2[1])
def test_csv_data_writer(self): # Write path = self.out_path(self.cid() + ".csv") writer = spn.CSVDataWriter(path) arr1 = np.array([1, 2, 3, 4]) arr2 = np.array([[1 / 1, 1 / 2], [1 / 3, 1 / 4], [1 / 5, 1 / 6], [1 / 7, 1 / 8]]) writer.write(arr2, arr1) writer.write(arr2, arr1) # Read dataset = spn.CSVFileDataset(path, num_vals=[None] * 2, defaults=[[1], [1.0], [1.0]], num_epochs=1, batch_size=10, shuffle=False, num_labels=1, min_after_dequeue=1000, num_threads=1, allow_smaller_final_batch=True) data = dataset.read_all() # Compare np.testing.assert_array_almost_equal(np.concatenate((arr2, arr2)), data[0]) np.testing.assert_array_equal(np.concatenate((arr1, arr1)), data[1].flatten())
def test_labeled_csv_file_dataset_int(self): """Batch generation for CSV file with integer data and 2 labels""" # Note: shuffling is NOT tested dataset = spn.CSVFileDataset( self.data_path(["data_int1.csv", "data_int2.csv"]), num_vals=[255] * 3, defaults=[[101], [102], [103], [104], [105]], num_epochs=2, batch_size=3, shuffle=False, num_labels=2, min_after_dequeue=1000, num_threads=1, allow_smaller_final_batch=True) batches = [[ np.array([[1, 2], [6, 102], [11, 12]], dtype=np.int32), np.array([[3, 4, 5], [8, 9, 10], [103, 14, 15]], dtype=np.int32) ], [ np.array([[16, 102], [21, 22], [26, 27]], dtype=np.int32), np.array([[18, 19, 20], [103, 24, 25], [28, 104, 30]], dtype=np.int32) ], [ np.array([[31, 32], [36, 37], [41, 42]], dtype=np.int32), np.array([[33, 104, 35], [38, 104, 40], [43, 104, 45]], dtype=np.int32) ], [ np.array([[46, 47], [1, 2], [6, 102]], dtype=np.int32), np.array([[48, 104, 50], [3, 4, 5], [8, 9, 10]], dtype=np.int32) ], [ np.array([[11, 12], [16, 102], [21, 22]], dtype=np.int32), np.array([[103, 14, 15], [18, 19, 20], [103, 24, 25]], dtype=np.int32) ], [ np.array([[26, 27], [31, 32], [36, 37]], dtype=np.int32), np.array([[28, 104, 30], [33, 104, 35], [38, 104, 40]], dtype=np.int32) ], [ np.array([[41, 42], [46, 47]], dtype=np.int32), np.array([[43, 104, 45], [48, 104, 50]], dtype=np.int32) ]] # Since we changed the order of data in CSVFileDataset, # we also change the order in batches for b in batches: b[1], b[0] = b[0], b[1] self.generic_dataset_test(dataset, batches)
def test_labeled_csv_file_dataset_float(self): """Batch generation for CSV file with float data and 2 labels""" # Note: shuffling is NOT tested dataset = spn.CSVFileDataset(self.data_path("data_mix.csv"), num_vals=[None] * 3, defaults=[[101.0], [102.0], [103.0], [104.0], [105.0]], num_epochs=2, batch_size=3, shuffle=False, num_labels=2, min_after_dequeue=1000, num_threads=1, allow_smaller_final_batch=True) batches = [ [ np.array([[1., 2.], [6., 102.], [11., 12.]], dtype=np.float32), np.array([[3., 4., 5.], [8., 104., 10.], [103., 104., 15.]], dtype=np.float32) ], [ np.array([[16., 102.], [21., 22.], [1., 2.]], dtype=np.float32), np.array([[18., 19., 20.], [103., 24., 25.], [3., 4., 5.]], dtype=np.float32) ], [ np.array([[6., 102.], [11., 12.], [16., 102.]], dtype=np.float32), np.array([[8., 104., 10.], [103., 104., 15.], [18., 19., 20.]], dtype=np.float32) ], [ np.array([[21., 22.]], dtype=np.float32), np.array([[103., 24., 25.]], dtype=np.float32) ] ] # Since we changed the order of data in CSVFileDataset, # we also change the order in batches for b in batches: b[1], b[0] = b[0], b[1] self.generic_dataset_test(dataset, batches)
def test_unlabeled_csv_file_dataset_with_final_batch(self): """Batch generation (without smaller final batch) for CSV file with labels""" # Note: shuffling is NOT tested dataset = spn.CSVFileDataset( self.data_path(["data_int1.csv", "data_int2.csv"]), num_vals=[255] * 5, defaults=[[101], [102], [103], [104], [105]], num_epochs=2, batch_size=3, shuffle=False, num_labels=0, min_after_dequeue=1000, num_threads=1, allow_smaller_final_batch=True) batches = [ np.array( [[1, 2, 3, 4, 5], [6, 102, 8, 9, 10], [11, 12, 103, 14, 15]], dtype=np.int32), np.array([[16, 102, 18, 19, 20], [21, 22, 103, 24, 25], [26, 27, 28, 104, 30]], dtype=np.int32), np.array([[31, 32, 33, 104, 35], [36, 37, 38, 104, 40], [41, 42, 43, 104, 45]], dtype=np.int32), np.array( [[46, 47, 48, 104, 50], [1, 2, 3, 4, 5], [6, 102, 8, 9, 10]], dtype=np.int32), np.array([[11, 12, 103, 14, 15], [16, 102, 18, 19, 20], [21, 22, 103, 24, 25]], dtype=np.int32), np.array([[26, 27, 28, 104, 30], [31, 32, 33, 104, 35], [36, 37, 38, 104, 40]], dtype=np.int32), np.array([[41, 42, 43, 104, 45], [46, 47, 48, 104, 50]], dtype=np.int32) ] self.generic_dataset_test(dataset, batches)
def test_labeled_csv_file_dataset_int_onelabel(self): """Batch generation for CSV file with integer data and 1 label""" # Note: shuffling is NOT tested dataset = spn.CSVFileDataset( self.data_path(["data_int1.csv", "data_int2.csv"]), num_vals=[255] * 4, defaults=[[101], [102], [103], [104], [105]], num_epochs=2, batch_size=3, shuffle=False, num_labels=1, min_after_dequeue=1000, num_threads=1, allow_smaller_final_batch=True) batches = [ [ np.array([[1], [6], [11]], dtype=np.int32), np.array([[2, 3, 4, 5], [102, 8, 9, 10], [12, 103, 14, 15]], dtype=np.int32) ], [ np.array([[16], [21], [26]], dtype=np.int32), np.array( [[102, 18, 19, 20], [22, 103, 24, 25], [27, 28, 104, 30]], dtype=np.int32) ], [ np.array([[31], [36], [41]], dtype=np.int32), np.array( [[32, 33, 104, 35], [37, 38, 104, 40], [42, 43, 104, 45]], dtype=np.int32) ], [ np.array([[46], [1], [6]], dtype=np.int32), np.array([[47, 48, 104, 50], [2, 3, 4, 5], [102, 8, 9, 10]], dtype=np.int32) ], [ np.array([[11], [16], [21]], dtype=np.int32), np.array( [[12, 103, 14, 15], [102, 18, 19, 20], [22, 103, 24, 25]], dtype=np.int32) ], [ np.array([[26], [31], [36]], dtype=np.int32), np.array( [[27, 28, 104, 30], [32, 33, 104, 35], [37, 38, 104, 40]], dtype=np.int32) ], [ np.array([[41], [46]], dtype=np.int32), np.array([[42, 43, 104, 45], [47, 48, 104, 50]], dtype=np.int32) ] ] # Since we changed the order of data in CSVFileDataset, # we also change the order in batches for b in batches: b[1], b[0] = b[0], b[1] self.generic_dataset_test(dataset, batches)