def create_transfo_xl_model(self, config, input_ids_1, input_ids_2, lm_labels): model = TransfoXLModel(config) model.eval() hidden_states_1, mems_1 = model(input_ids_1) hidden_states_2, mems_2 = model(input_ids_2, mems_1) outputs = { "hidden_states_1": hidden_states_1, "mems_1": mems_1, "hidden_states_2": hidden_states_2, "mems_2": mems_2, } return outputs
def __init__(self, config): super(TXLClassificationModel, self).__init__(config) self.transformer = TransfoXLModel(config) self.classifier1 = torch.nn.Linear(config.d_embed, config.num_labels) self.dropout = torch.nn.Dropout(config.dropout) self.loss_fct = torch.nn.CrossEntropyLoss() self.init_weights()
def test_model_from_pretrained(self): cache_dir = "/tmp/pytorch_transformers_test/" for model_name in list( TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]: model = TransfoXLModel.from_pretrained(model_name, cache_dir=cache_dir) shutil.rmtree(cache_dir) self.assertIsNotNone(model)
def __init__(self, chunck_size=64, max_length=35, device=torch.device('cuda:0')): super(XLClient, self).__init__() self.chunck_size = chunck_size self.tokenizer = TransfoXLTokenizer.from_pretrained('transfo-xl-wt103') self.max_length = max_length # load the model self.model = TransfoXLModel.from_pretrained('transfo-xl-wt103') self.model.eval() self.device = device # move model to device self.model.to(self.device)
def test_transformer_xl_embeddings(): transfo_model = 'transfo-xl-wt103' tokenizer = TransfoXLTokenizer.from_pretrained(transfo_model) model = TransfoXLModel.from_pretrained( pretrained_model_name_or_path=transfo_model, output_hidden_states=True) model.to(flair.device) model.eval() s = 'Berlin and Munich have a lot of puppeteer to see .' with torch.no_grad(): tokens = tokenizer.tokenize((s + '<eos>')) print(tokens) indexed_tokens = tokenizer.convert_tokens_to_ids(tokens) tokens_tensor = torch.tensor([indexed_tokens]) tokens_tensor = tokens_tensor.to(flair.device) hidden_states = model(tokens_tensor)[(-1)] first_layer = hidden_states[1][0] assert (len(first_layer) == len(tokens)) def embed_sentence(sentence: str, layers: str = '1', use_scalar_mix: bool = False) -> Sentence: embeddings = TransformerXLEmbeddings( pretrained_model_name_or_path=transfo_model, layers=layers, use_scalar_mix=use_scalar_mix) flair_sentence = Sentence(sentence) embeddings.embed(flair_sentence) return flair_sentence sentence = embed_sentence(sentence=s) first_token_embedding_ref = first_layer[0].tolist() first_token_embedding_actual = sentence.tokens[0].embedding.tolist() puppeteer_embedding_ref = first_layer[7].tolist() puppeteer_embedding_actual = sentence.tokens[7].embedding.tolist() assert (first_token_embedding_ref == first_token_embedding_actual) assert (puppeteer_embedding_ref == puppeteer_embedding_actual) sentence_mult_layers = embed_sentence(sentence='Munich', layers='1,2,3,4') ref_embedding_size = (4 * model.d_embed) actual_embedding_size = len(sentence_mult_layers.tokens[0].embedding) assert (ref_embedding_size == actual_embedding_size) sentence_mult_layers_scalar_mix = embed_sentence(sentence='Berlin', layers='1,2,3,4', use_scalar_mix=True) ref_embedding_size = (1 * model.d_embed) actual_embedding_size = len( sentence_mult_layers_scalar_mix.tokens[0].embedding) assert (ref_embedding_size == actual_embedding_size)
# Tokenized input text_1 = "Who was Jim Henson ?" text_2 = "Jim Henson was a puppeteer" tokenized_text_1 = tokenizer.tokenize(text_1) tokenized_text_2 = tokenizer.tokenize(text_2) # Convert token to vocabulary indices indexed_tokens_1 = tokenizer.convert_tokens_to_ids(tokenized_text_1) indexed_tokens_2 = tokenizer.convert_tokens_to_ids(tokenized_text_2) # Convert inputs to PyTorch tensors tokens_tensor_1 = torch.tensor([indexed_tokens_1]) tokens_tensor_2 = torch.tensor([indexed_tokens_2]) # Load pre-trained model (weights) model = TransfoXLModel.from_pretrained('transfo-xl-wt103') model.eval() # If you have a GPU, put everything on cuda tokens_tensor_1 = tokens_tensor_1.to('cuda') tokens_tensor_2 = tokens_tensor_2.to('cuda') model.to('cuda') with torch.no_grad(): # Predict hidden states features for each layer hidden_states_1, mems_1 = model(tokens_tensor_1) print(len(hidden_states_1)) print(hidden_states_1[-1].size()) print(len(mems_1)) print(mems_1[-1].size()) # We can re-use the memory cells in a subsequent call to attend a longer context
def test_transformer_xl_embeddings(): transfo_model: str = "transfo-xl-wt103" tokenizer = TransfoXLTokenizer.from_pretrained(transfo_model) model = TransfoXLModel.from_pretrained( pretrained_model_name_or_path=transfo_model, output_hidden_states=True) model.to(flair.device) model.eval() s: str = "Berlin and Munich have a lot of puppeteer to see ." with torch.no_grad(): tokens = tokenizer.tokenize(s + "<eos>") print(tokens) indexed_tokens = tokenizer.convert_tokens_to_ids(tokens) tokens_tensor = torch.tensor([indexed_tokens]) tokens_tensor = tokens_tensor.to(flair.device) hidden_states = model(tokens_tensor)[-1] first_layer = hidden_states[1][0] assert len(first_layer) == len(tokens) # 0 1 2 3 4 5 6 7 8 9 10 11 # # 'Berlin', 'and', 'Munich', 'have', 'a', 'lot', 'of', 'puppeteer', 'to', 'see', '.', '<eos>' # | | | | | | | | | | | # Berlin and Munich have a lot of puppeteer to see . # # 0 1 2 3 4 5 6 7 8 9 10 def embed_sentence(sentence: str, layers: str = "1", use_scalar_mix: bool = False) -> Sentence: embeddings = TransformerXLEmbeddings(model=transfo_model, layers=layers, use_scalar_mix=use_scalar_mix) flair_sentence = Sentence(sentence) embeddings.embed(flair_sentence) return flair_sentence sentence = embed_sentence(sentence=s) first_token_embedding_ref = first_layer[0].tolist() first_token_embedding_actual = sentence.tokens[0].embedding.tolist() puppeteer_embedding_ref = first_layer[7].tolist() puppeteer_embedding_actual = sentence.tokens[7].embedding.tolist() assert first_token_embedding_ref == first_token_embedding_actual assert puppeteer_embedding_ref == puppeteer_embedding_actual # Check embedding dimension when using multiple layers sentence_mult_layers = embed_sentence(sentence="Munich", layers="1,2,3,4") ref_embedding_size = 4 * model.d_embed actual_embedding_size = len(sentence_mult_layers.tokens[0].embedding) assert ref_embedding_size == actual_embedding_size # Check embedding dimension when using multiple layers and scalar mix sentence_mult_layers_scalar_mix = embed_sentence(sentence="Berlin", layers="1,2,3,4", use_scalar_mix=True) ref_embedding_size = 1 * model.d_embed actual_embedding_size = len( sentence_mult_layers_scalar_mix.tokens[0].embedding) assert ref_embedding_size == actual_embedding_size