Ejemplo n.º 1
0
def transformerXLTokenizer(*args, **kwargs):
    """
    Instantiate a Transformer-XL tokenizer adapted from Vocab class in https://github.com/kimiyoung/transformer-xl

    Args:
    pretrained_model_name_or_path: Path to pretrained model archive
                                   or one of pre-trained vocab configs below.
                                       * transfo-xl-wt103

    Example:
        >>> import torch
        >>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'transformerXLTokenizer', 'transfo-xl-wt103')
        
        >>> text = "Who was Jim Henson ?"
        >>> tokenized_text = tokenizer.tokenize(tokenized_text)
        >>> indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
    """
    tokenizer = TransfoXLTokenizer.from_pretrained(*args, **kwargs)
    return tokenizer
Ejemplo n.º 2
0
 def get_tokenizer(self, **kwargs):
     kwargs['lower_case'] = True
     return TransfoXLTokenizer.from_pretrained(self.tmpdirname, **kwargs)
Ejemplo n.º 3
0
 def get_tokenizer(self):
     return TransfoXLTokenizer.from_pretrained(self.tmpdirname,
                                               lower_case=True)