Ejemplo n.º 1
0
    def test_invalid_input_experiment(self):
        r"""Raise exception when input `experiment` is invalid."""
        msg1 = (
            'Must raise `TypeError` or `ValueError` when input `experiment` '
            'is invalid.'
        )
        msg2 = 'Inconsistent error message.'
        examples = (
            False, True, 0, 1, -1, 0.0, 1.0, math.nan, -math.nan, math.inf,
            -math.inf, 0j, 1j, '', b'', (), [], {}, set(), object(),
            lambda x: x, type, None, NotImplemented, ...,
        )

        for invalid_input in examples:
            with self.assertRaises(
                    (TypeError, ValueError),
                    msg=msg1
            ) as ctx_man:
                BaseListTokenizer.load(experiment=invalid_input)

            if isinstance(ctx_man.exception, TypeError):
                self.assertEqual(
                    ctx_man.exception.args[0],
                    '`experiment` must be an instance of `str`.',
                    msg=msg2
                )
            else:
                self.assertEqual(
                    ctx_man.exception.args[0],
                    '`experiment` must not be empty.',
                    msg=msg2
                )
Ejemplo n.º 2
0
    def test_experiment_does_not_exist(self):
        r"""Raise `FileNotFoundError` when `experiment` does not exist."""
        msg1 = (
            'Must raise `FileNotFoundError` when `experiment` does not exist.'
        )
        msg2 = 'Inconsistent error message.'
        examples = (self.__class__.experiment, 'I-AM-A-TEST-AND-I-DONT-EXIST')

        for experiment in examples:
            with self.assertRaises(FileNotFoundError, msg=msg1) as ctx_man:
                BaseListTokenizer.load(experiment=experiment)

            test_path = os.path.join(DATA_PATH, experiment, 'tokenizer.json')
            self.assertEqual(
                ctx_man.exception.args[0],
                f'File {test_path} does not exist.',
                msg=msg2
            )
Ejemplo n.º 3
0
    def test_load_result(self):
        r"""Load `tokenizer.json`."""
        msg = 'Inconsistent `tokenizer.json` format.'
        examples = (
            {
                'is_uncased': False,
                'token_to_id': {
                    'A': 0,
                    'B': 1,
                    'C': 2,
                },
            },
            {
                'is_uncased': True,
                'token_to_id': {
                    'a': 0,
                    'b': 1,
                    'c': 2,
                },
            },
        )

        test_path = os.path.join(self.__class__.test_dir, 'tokenizer.json')

        for obj in examples:
            try:
                # Create test file.
                with open(test_path, 'w', encoding='utf-8') as output_file:
                    json.dump(obj, output_file)

                tokenizer = BaseListTokenizer.load(
                    experiment=self.__class__.experiment
                )

                self.assertIsInstance(tokenizer, BaseListTokenizer, msg=msg)

                for attr_key, attr_value in obj.items():
                    self.assertTrue(hasattr(tokenizer, attr_key), msg=msg)
                    self.assertIsInstance(
                        getattr(tokenizer, attr_key),
                        type(attr_value),
                        msg=msg
                    )
                    self.assertEqual(
                        getattr(tokenizer, attr_key),
                        attr_value,
                        msg=msg
                    )
            finally:
                # Clean up test file.
                os.remove(test_path)