def test_from_pretrained_dynamic_tokenizer_legacy_format(self): tokenizer = AutoTokenizer.from_pretrained( "hf-internal-testing/test_dynamic_tokenizer_legacy", trust_remote_code=True) self.assertTrue(tokenizer.special_attribute_present) if is_tokenizers_available(): self.assertEqual(tokenizer.__class__.__name__, "NewTokenizerFast") # Test we can also load the slow version tokenizer = AutoTokenizer.from_pretrained( "hf-internal-testing/test_dynamic_tokenizer_legacy", trust_remote_code=True, use_fast=False) self.assertTrue(tokenizer.special_attribute_present) self.assertEqual(tokenizer.__class__.__name__, "NewTokenizer") else: self.assertEqual(tokenizer.__class__.__name__, "NewTokenizer")
) from transformers.models.roberta.configuration_roberta import RobertaConfig from transformers.testing_utils import ( DUMMY_DIFF_TOKENIZER_IDENTIFIER, DUMMY_UNKNOWN_IDENTIFIER, SMALL_MODEL_IDENTIFIER, require_tokenizers, slow, ) sys.path.append(str(Path(__file__).parent.parent.parent.parent / "utils")) from test_module.custom_configuration import CustomConfig # noqa E402 from test_module.custom_tokenization import CustomTokenizer # noqa E402 if is_tokenizers_available(): from test_module.custom_tokenization_fast import CustomTokenizerFast class AutoTokenizerTest(unittest.TestCase): @slow def test_tokenizer_from_pretrained(self): for model_name in (x for x in BERT_PRETRAINED_CONFIG_ARCHIVE_MAP.keys() if "japanese" not in x): tokenizer = AutoTokenizer.from_pretrained(model_name) self.assertIsNotNone(tokenizer) self.assertIsInstance(tokenizer, (BertTokenizer, BertTokenizerFast)) self.assertGreater(len(tokenizer), 0)