def test_create_float_list_tensor(self): tensorizer = FloatListTensorizer( column="dense", dim=2, error_check=True, normalize=False ) tests = [ ("[0.1,0.2]", [0.1, 0.2]), # comma ("[0.1, 0.2]", [0.1, 0.2]), # comma with single space ("[0.1, 0.2]", [0.1, 0.2]), # comma with multiple spaces ("[0.1 0.2]", [0.1, 0.2]), # space ("[0.1 0.2]", [0.1, 0.2]), # multiple spaces ("[ 0.1 0.2]", [0.1, 0.2]), # space after [ ("[0.1 0.2 ]", [0.1, 0.2]), # space before ] ("[0. 1.]", [0.0, 1.0]), # 0., 1. ] for raw, expected in tests: row = {"dense": load_float_list(raw)} numberized = tensorizer.numberize(row) self.assertEqual(expected, numberized) precision.FP16_ENABLED = True batch = [] for raw, _ in tests: row = {"dense": load_float_list(raw)} batch.append(tensorizer.numberize(row)) tensor = tensorizer.tensorize(batch) self.assertEqual(list(tensor.size()), [8, 2]) self.assertEqual(tensor.dtype, torch.float16) precision.FP16_ENABLED = False
def test_create_float_list_seq_tensor(self): tensorizer = FloatListSeqTensorizer(column="dense", dim=2, error_check=True) tests = [ ( ["[0.1,0.2]", "[0.1, 0.2]", "[0.1, 0.2]", "[0.1 0.2]"], [[0.1, 0.2], [0.1, 0.2], [0.1, 0.2], [0.1, 0.2]], 4, ), ( [ "[0.1 0.2]", "[ 0.1 0.2]", "[0.1 0.2 ]", "[ 0.1 0.2 ]", "[0. 1.]" ], [[0.1, 0.2], [0.1, 0.2], [0.1, 0.2], [0.1, 0.2], [0.0, 1.0]], 5, ), ] for raw_list, expected, expected_length in tests: row = {"dense": [load_float_list(raw) for raw in raw_list]} numberized, numberized_len = tensorizer.numberize(row) self.assertEqual(expected, numberized) self.assertEqual(expected_length, numberized_len) batch = [] for raw_list, _, _ in tests: row = {"dense": [load_float_list(raw) for raw in raw_list]} tensor, tensor_len = tensorizer.numberize(row) batch.append((tensor, tensor_len)) tensor, tensor_lens = tensorizer.tensorize(batch) self.assertEqual(list(tensor.size()), [2, 5, 2]) self.assertEqual(tensor.dtype, torch.float) self.assertEqual(tensor_lens.tolist(), [4, 5])
def test_float_list_tensor_prepare_input(self): tensorizer = FloatListTensorizer( column="dense", dim=2, error_check=True, normalize=False ) tests = [("[0.1,0.2]", [0.1, 0.2])] for raw, expected in tests: row = {"dense": load_float_list(raw)} numberized = tensorizer.prepare_input(row) self.assertEqual(expected, numberized)
def test_float_list_seq_tensor_prepare_input(self): tensorizer = FloatListSeqTensorizer(column="dense", dim=2, error_check=True) tests = [( ["[0.1,0.2]", "[0.1, 0.2]", "[0.1, 0.2]", "[0.1 0.2]"], [[0.1, 0.2], [0.1, 0.2], [0.1, 0.2], [0.1, 0.2]], 4, )] for raw_list, expected, expect_length in tests: row = {"dense": [load_float_list(raw) for raw in raw_list]} numberized, numberized_len = tensorizer.prepare_input(row) self.assertEqual(expected, numberized) self.assertEqual(expect_length, numberized_len)
def test_create_float_list_tensor(self): tensorizer = FloatListTensorizer(column="dense", dim=2, error_check=True) tests = [ ("[0.1,0.2]", [0.1, 0.2]), # comma ("[0.1, 0.2]", [0.1, 0.2]), # comma with single space ("[0.1, 0.2]", [0.1, 0.2]), # comma with multiple spaces ("[0.1 0.2]", [0.1, 0.2]), # space ("[0.1 0.2]", [0.1, 0.2]), # multiple spaces ("[ 0.1 0.2]", [0.1, 0.2]), # space after [ ("[0.1 0.2 ]", [0.1, 0.2]), # space before ] ("[0. 1.]", [0.0, 1.0]), # 0., 1. ] for raw, expected in tests: row = {"dense": load_float_list(raw)} numberized = tensorizer.numberize(row) self.assertEqual(expected, numberized)