def create_transfo_xl_model(self, config, input_ids_1, input_ids_2, lm_labels): model = TransfoXLModel(config) model.eval() hidden_states_1, mems_1 = model(input_ids_1) hidden_states_2, mems_2 = model(input_ids_2, mems_1) outputs = { "hidden_states_1": hidden_states_1, "mems_1": mems_1, "hidden_states_2": hidden_states_2, "mems_2": mems_2, } return outputs
def __init__(self): super(Model, self).__init__() self.config = TransfoXLConfig(vocab_size_or_config_json_file='../model_configs/classification_XL_configuration.json') self.config.vocab_size=204098 self.config.output_attentions=True self.model = TransfoXLModel(self.config) self.out_layer = torch.nn.Linear(self.model.d_model, 2)
def test_model_from_pretrained(self): cache_dir = "/tmp/transformers_test/" for model_name in list( TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]: model = TransfoXLModel.from_pretrained(model_name, cache_dir=cache_dir) shutil.rmtree(cache_dir) self.assertIsNotNone(model)
def __init__(self): super(Model, self).__init__() self.config = TransfoXLConfig( vocab_size_or_config_json_file=len(vocab) + 267735, n_heads=8, n_layers=9) self.model = TransfoXLModel(self.config) self.tokenizer = TransfoXLTokenizer.from_pretrained('transfo-xl-wt103') self.out_layer = torch.nn.Linear(self.model.d_model, 2)
def __init__(self, config): super().__init__(config) self.num_labels = config.num_labels self.transfoxl = TransfoXLModel(config) self.dropout = nn.Dropout(config.dropout) self.classifier = nn.Linear(config.d_model, self.config.num_labels) self.init_weights()
def __init__(self, ntoken,noutputs, d_model, nhead, d_ffn, nlayers, dropout=0.5, use_embedding=False): super(TransformerXLModel, self).__init__() self.config = TransfoXLConfig( vocab_size = ntoken, cutoffs = [], d_model = d_model, d_embed = d_model, n_head = nhead, d_inner = d_ffn, n_layer = nlayers, tie_weights = False, d_head = d_model // nhead,adaptive = False, dropout = dropout) self.transformer_encoder = TransfoXLModel(self.config) self.decoder = nn.Linear(d_model, noutputs) self.sigmoid= nn.Sigmoid()
def __init__(self, config: TransfoXLConfig): super().__init__(config) self.transformer = TransfoXLModel(config) self.dropout = nn.Dropout(config.hidden_dropout_prob) self.lstm = nn.LSTM(input_size = config.hidden_size, hidden_size = config.hidden_size, num_layers = 1, dropout = 0, batch_first = True, bidirectional = False) self.fc = nn.Linear(config.hidden_size*3, config.num_labels) self.fc_bn = nn.BatchNorm1d(config.num_labels) self.init_weights()
def transformersxl_embeddings(document_collection): """ Computes TransformersXL Embedding of each document handed over input: list of documents document_collection, output: list of TransformersXL Embeddings for all documents embeddings """ tokenizer = TransfoXLTokenizerFast.from_pretrained('transfo-xl-wt103', do_lower_case=True, add_space_before_punct_symbol=True) tokenizer.pad_token = tokenizer.eos_token model = TransfoXLModel.from_pretrained('transfo-xl-wt103') embeddings = [] for doc in document_collection: inputs = tokenizer.encode(doc, add_special_tokens=True, return_tensors="pt").clone().detach() outputs = model(inputs) last_hidden_states = outputs[0] sentence_embedding = torch.mean(last_hidden_states[0], dim=0) embeddings.append(sentence_embedding.detach().numpy()) return embeddings
def create_transfo_xl_model(self, config, input_ids_1, input_ids_2, lm_labels): model = TransfoXLModel(config) model.to(torch_device) model.eval() outputs1 = model(input_ids_1) outputs2 = model(input_ids_2, outputs1["mems"]) outputs = { "hidden_states_1": outputs1["last_hidden_state"], "mems_1": outputs1["mems"], "hidden_states_2": outputs2["last_hidden_state"], "mems_2": outputs2["mems"], } return outputs
def __init__(self, args, num_class): super().__init__() self.args = args # gcn layer self.dropout = nn.Dropout(args.dropout) self.transfo_xl = TransfoXLModel.from_pretrained(args.home_dir + args.bert_model_dir) in_dim = args.bert_dim pool_layers = [nn.Linear(in_dim, args.hidden_dim), nn.ReLU()] self.pool_fc = nn.Sequential(*pool_layers) # output mlp layers layers = [] for _ in range(args.mlp_layers): layers += [nn.Linear(args.hidden_dim, args.hidden_dim), nn.ReLU()] layers += [nn.Linear(args.hidden_dim, num_class)] self.out_mlp = nn.Sequential(*layers)
def run_TFXL_RSA(stim_file, layer, header=False, filter_file=None): EXP = data.Stim(stim_file, header, filter_file, VOCAB_FILE) #Get tokenizer tokenizer = TransfoXLTokenizer.from_pretrained('transfo-xl-wt103') #Load model model = TransfoXLModel.from_pretrained( 'transfo-xl-wt103', output_hidden_states=True) #, force_download=True) #turn off learning model.zero_grad() for x in range(len(EXP.SENTS)): sentences = list(EXP.SENTS[x]) target = sentences[0] sentence = sentences[1] #GET BASELINE target_encoded = tokenizer.encode(target, add_special_tokens=True) target_input_ids = torch.tensor(target_encoded).unsqueeze(0) #Get model outputs output = model(target_input_ids) predictions, mems, hidden_states = output hidden_states = hidden_states[1:] baseline = hidden_states[layer][0][-1].data.cpu().squeeze() #GET SIMs sims = get_TFXL_sims(sentence, layer, baseline, tokenizer, model) values = get_dummy_values(sentence) EXP.load_IT('tfxl', x, values, False, sims) return EXP
def test_model_from_pretrained(self): for model_name in TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_LIST[:1]: model = TransfoXLModel.from_pretrained(model_name) self.assertIsNotNone(model)
def test_model_from_pretrained(self): for model_name in list( TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]: model = TransfoXLModel.from_pretrained(model_name, cache_dir=CACHE_DIR) self.assertIsNotNone(model)
def test_transformer_xl_embeddings(): transfo_model: str = "transfo-xl-wt103" tokenizer = TransfoXLTokenizer.from_pretrained(transfo_model) model = TransfoXLModel.from_pretrained( pretrained_model_name_or_path=transfo_model, output_hidden_states=True) model.to(flair.device) model.eval() s: str = "Berlin and Munich have a lot of puppeteer to see ." with torch.no_grad(): tokens = tokenizer.tokenize(s + "<eos>") print(tokens) indexed_tokens = tokenizer.convert_tokens_to_ids(tokens) tokens_tensor = torch.tensor([indexed_tokens]) tokens_tensor = tokens_tensor.to(flair.device) hidden_states = model(tokens_tensor)[-1] first_layer = hidden_states[1][0] assert len(first_layer) == len(tokens) # 0 1 2 3 4 5 6 7 8 9 10 11 # # 'Berlin', 'and', 'Munich', 'have', 'a', 'lot', 'of', 'puppeteer', 'to', 'see', '.', '<eos>' # | | | | | | | | | | | # Berlin and Munich have a lot of puppeteer to see . # # 0 1 2 3 4 5 6 7 8 9 10 def embed_sentence(sentence: str, layers: str = "1", use_scalar_mix: bool = False) -> Sentence: embeddings = TransformerXLEmbeddings( pretrained_model_name_or_path=transfo_model, layers=layers, use_scalar_mix=use_scalar_mix, ) flair_sentence = Sentence(sentence) embeddings.embed(flair_sentence) return flair_sentence sentence = embed_sentence(sentence=s) first_token_embedding_ref = first_layer[0].tolist() first_token_embedding_actual = sentence.tokens[0].embedding.tolist() puppeteer_embedding_ref = first_layer[7].tolist() puppeteer_embedding_actual = sentence.tokens[7].embedding.tolist() assert first_token_embedding_ref == first_token_embedding_actual assert puppeteer_embedding_ref == puppeteer_embedding_actual # Check embedding dimension when using multiple layers sentence_mult_layers = embed_sentence(sentence="Munich", layers="1,2,3,4") ref_embedding_size = 4 * model.d_embed actual_embedding_size = len(sentence_mult_layers.tokens[0].embedding) assert ref_embedding_size == actual_embedding_size # Check embedding dimension when using multiple layers and scalar mix sentence_mult_layers_scalar_mix = embed_sentence(sentence="Berlin", layers="1,2,3,4", use_scalar_mix=True) ref_embedding_size = 1 * model.d_embed actual_embedding_size = len( sentence_mult_layers_scalar_mix.tokens[0].embedding) assert ref_embedding_size == actual_embedding_size
import torch.nn.functional as F import logging logger = logging.getLogger(__name__) tokenizer = TransfoXLTokenizer.from_pretrained('transfo-xl-wt103') text = "Hello, my dog is cute" # tokenizer.encode => tokenizer.convert_tokens_to_ids(tokenizer.tokenize(text)) tokens = tokenizer.tokenize(text, add_special_tokens=True, add_space_before_punct_symbol=True) token_ids = tokenizer.convert_tokens_to_ids(tokens) model = TransfoXLModel.from_pretrained('transfo-xl-wt103') outputs = model(torch.tensor(token_ids).unsqueeze(0)) last_hidden_states, mems = outputs[:2] #%% class TransfoXLLSTM(TransfoXLPreTrainedModel): def __init__(self, config: TransfoXLConfig): super().__init__(config) self.transformer = TransfoXLModel(config) self.dropout = nn.Dropout(config.hidden_dropout_prob)
from transformers import TransfoXLConfig, TransfoXLModel, TransfoXLTokenizer import pandas as pd import torch import numpy as np import scipy.spatial.distance as distance # 18-layer, 1024-hidden, 16-heads, 257M parameters. # English model trained on wikitext-103 # Initializing a Transformer XL configuration configuration = TransfoXLConfig().from_pretrained("transfo-xl-wt103") tokenizer = TransfoXLTokenizer.from_pretrained('transfo-xl-wt103') # Initializing a model from the configuration model = TransfoXLModel.from_pretrained("transfo-xl-wt103", config=configuration) ## extract the features dataset = pd.read_csv('./data/data.csv')[:10] print(dataset.shape) pages = dataset['desp'].values.tolist() print("the dataset is:\n", pages) saved_features = [] for val in pages: input_ids = torch.tensor(tokenizer.encode( val, add_special_tokens=True)).unsqueeze(0) outputs = model(input_ids) last_hidden_states = outputs[ 0] # dimension is (1, sequence length, hidden states) # average the hidden states to get the semantic content of the input extracted_features = torch.mean(