예제 #1
0
    def test_create_float_list_tensor(self):
        tensorizer = FloatListTensorizer(
            column="dense", dim=2, error_check=True, normalize=False
        )
        tests = [
            ("[0.1,0.2]", [0.1, 0.2]),  # comma
            ("[0.1, 0.2]", [0.1, 0.2]),  # comma with single space
            ("[0.1,  0.2]", [0.1, 0.2]),  # comma with multiple spaces
            ("[0.1 0.2]", [0.1, 0.2]),  # space
            ("[0.1  0.2]", [0.1, 0.2]),  # multiple spaces
            ("[ 0.1  0.2]", [0.1, 0.2]),  # space after [
            ("[0.1  0.2 ]", [0.1, 0.2]),  # space before ]
            ("[0.  1.]", [0.0, 1.0]),  # 0., 1.
        ]
        for raw, expected in tests:
            row = {"dense": load_float_list(raw)}
            numberized = tensorizer.numberize(row)
            self.assertEqual(expected, numberized)

        precision.FP16_ENABLED = True
        batch = []
        for raw, _ in tests:
            row = {"dense": load_float_list(raw)}
            batch.append(tensorizer.numberize(row))
        tensor = tensorizer.tensorize(batch)
        self.assertEqual(list(tensor.size()), [8, 2])
        self.assertEqual(tensor.dtype, torch.float16)
        precision.FP16_ENABLED = False
예제 #2
0
    def test_create_float_list_seq_tensor(self):
        tensorizer = FloatListSeqTensorizer(column="dense",
                                            dim=2,
                                            error_check=True)
        tests = [
            (
                ["[0.1,0.2]", "[0.1, 0.2]", "[0.1,  0.2]", "[0.1 0.2]"],
                [[0.1, 0.2], [0.1, 0.2], [0.1, 0.2], [0.1, 0.2]],
                4,
            ),
            (
                [
                    "[0.1  0.2]", "[ 0.1  0.2]", "[0.1  0.2 ]", "[ 0.1 0.2 ]",
                    "[0.  1.]"
                ],
                [[0.1, 0.2], [0.1, 0.2], [0.1, 0.2], [0.1, 0.2], [0.0, 1.0]],
                5,
            ),
        ]
        for raw_list, expected, expected_length in tests:
            row = {"dense": [load_float_list(raw) for raw in raw_list]}
            numberized, numberized_len = tensorizer.numberize(row)
            self.assertEqual(expected, numberized)
            self.assertEqual(expected_length, numberized_len)

        batch = []
        for raw_list, _, _ in tests:
            row = {"dense": [load_float_list(raw) for raw in raw_list]}
            tensor, tensor_len = tensorizer.numberize(row)
            batch.append((tensor, tensor_len))

        tensor, tensor_lens = tensorizer.tensorize(batch)
        self.assertEqual(list(tensor.size()), [2, 5, 2])
        self.assertEqual(tensor.dtype, torch.float)
        self.assertEqual(tensor_lens.tolist(), [4, 5])
예제 #3
0
 def test_float_list_tensor_prepare_input(self):
     tensorizer = FloatListTensorizer(
         column="dense", dim=2, error_check=True, normalize=False
     )
     tests = [("[0.1,0.2]", [0.1, 0.2])]
     for raw, expected in tests:
         row = {"dense": load_float_list(raw)}
         numberized = tensorizer.prepare_input(row)
         self.assertEqual(expected, numberized)
예제 #4
0
 def test_float_list_seq_tensor_prepare_input(self):
     tensorizer = FloatListSeqTensorizer(column="dense",
                                         dim=2,
                                         error_check=True)
     tests = [(
         ["[0.1,0.2]", "[0.1, 0.2]", "[0.1,  0.2]", "[0.1 0.2]"],
         [[0.1, 0.2], [0.1, 0.2], [0.1, 0.2], [0.1, 0.2]],
         4,
     )]
     for raw_list, expected, expect_length in tests:
         row = {"dense": [load_float_list(raw) for raw in raw_list]}
         numberized, numberized_len = tensorizer.prepare_input(row)
         self.assertEqual(expected, numberized)
         self.assertEqual(expect_length, numberized_len)
예제 #5
0
 def test_create_float_list_tensor(self):
     tensorizer = FloatListTensorizer(column="dense", dim=2, error_check=True)
     tests = [
         ("[0.1,0.2]", [0.1, 0.2]),  # comma
         ("[0.1, 0.2]", [0.1, 0.2]),  # comma with single space
         ("[0.1,  0.2]", [0.1, 0.2]),  # comma with multiple spaces
         ("[0.1 0.2]", [0.1, 0.2]),  # space
         ("[0.1  0.2]", [0.1, 0.2]),  # multiple spaces
         ("[ 0.1  0.2]", [0.1, 0.2]),  # space after [
         ("[0.1  0.2 ]", [0.1, 0.2]),  # space before ]
         ("[0.  1.]", [0.0, 1.0]),  # 0., 1.
     ]
     for raw, expected in tests:
         row = {"dense": load_float_list(raw)}
         numberized = tensorizer.numberize(row)
         self.assertEqual(expected, numberized)