コード例 #1
0
    def test_from_pretrained_dynamic_tokenizer_legacy_format(self):
        tokenizer = AutoTokenizer.from_pretrained(
            "hf-internal-testing/test_dynamic_tokenizer_legacy",
            trust_remote_code=True)
        self.assertTrue(tokenizer.special_attribute_present)
        if is_tokenizers_available():
            self.assertEqual(tokenizer.__class__.__name__, "NewTokenizerFast")

            # Test we can also load the slow version
            tokenizer = AutoTokenizer.from_pretrained(
                "hf-internal-testing/test_dynamic_tokenizer_legacy",
                trust_remote_code=True,
                use_fast=False)
            self.assertTrue(tokenizer.special_attribute_present)
            self.assertEqual(tokenizer.__class__.__name__, "NewTokenizer")
        else:
            self.assertEqual(tokenizer.__class__.__name__, "NewTokenizer")
コード例 #2
0
)
from transformers.models.roberta.configuration_roberta import RobertaConfig
from transformers.testing_utils import (
    DUMMY_DIFF_TOKENIZER_IDENTIFIER,
    DUMMY_UNKNOWN_IDENTIFIER,
    SMALL_MODEL_IDENTIFIER,
    require_tokenizers,
    slow,
)

sys.path.append(str(Path(__file__).parent.parent.parent.parent / "utils"))

from test_module.custom_configuration import CustomConfig  # noqa E402
from test_module.custom_tokenization import CustomTokenizer  # noqa E402

if is_tokenizers_available():
    from test_module.custom_tokenization_fast import CustomTokenizerFast


class AutoTokenizerTest(unittest.TestCase):
    @slow
    def test_tokenizer_from_pretrained(self):
        for model_name in (x
                           for x in BERT_PRETRAINED_CONFIG_ARCHIVE_MAP.keys()
                           if "japanese" not in x):
            tokenizer = AutoTokenizer.from_pretrained(model_name)
            self.assertIsNotNone(tokenizer)
            self.assertIsInstance(tokenizer,
                                  (BertTokenizer, BertTokenizerFast))
            self.assertGreater(len(tokenizer), 0)