def transformerXLTokenizer(*args, **kwargs): """ Instantiate a Transformer-XL tokenizer adapted from Vocab class in https://github.com/kimiyoung/transformer-xl Args: pretrained_model_name_or_path: Path to pretrained model archive or one of pre-trained vocab configs below. * transfo-xl-wt103 Example: >>> import torch >>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLTokenizer', 'transfo-xl-wt103') >>> text = "Who was Jim Henson ?" >>> tokenized_text = tokenizer.tokenize(tokenized_text) >>> indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text) """ tokenizer = TransfoXLTokenizer.from_pretrained(*args, **kwargs) return tokenizer
def get_tokenizer(self, **kwargs): kwargs['lower_case'] = True return TransfoXLTokenizer.from_pretrained(self.tmpdirname, **kwargs)