Beispiel #1
0
 def test_model_from_pretrained(self):
     cache_dir = "/tmp/pytorch_pretrained_bert_test/"
     for model_name in list(PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
         model = TransfoXLModel.from_pretrained(model_name,
                                                cache_dir=cache_dir)
         shutil.rmtree(cache_dir)
         self.assertIsNotNone(model)
    def construct_encoder(self):
        model = TransfoXLModel.from_pretrained(self.model_name)
        model.cuda()
        model = torch.nn.DataParallel(model)

        model.eval()
        tokenizer = TransfoXLTokenizer.from_pretrained(self.model_name)
        print("Model and tokenzier are constructed!")
        return model, tokenizer
Beispiel #3
0
 def __init__(self, name, **kwargs):
     super(TXLEmbeddings, self).__init__(name=name, **kwargs)
     global TXL_TOKENIZER
     self.dsz = kwargs.get('dsz')
     if TXL_TOKENIZER is None:
         TXL_TOKENIZER = TransfoXLTokenizer.from_pretrained(kwargs.get('embed_file'))
     self.model = TransfoXLModel.from_pretrained(kwargs.get('embed_file'))
     self.vocab = TXL_TOKENIZER.sym2idx
     self.vsz = len(TXL_TOKENIZER.sym2idx)
Beispiel #4
0
def get_xl_layer_representations(seq_len, text_array, remove_chars,
                                 word_ind_to_extract):

    model = TransfoXLModel.from_pretrained('transfo-xl-wt103')
    tokenizer = TransfoXLTokenizer.from_pretrained('transfo-xl-wt103')
    model.eval()

    # get the token embeddings
    token_embeddings = []
    for word in text_array:
        current_token_embedding = get_xl_token_embeddings([word], tokenizer,
                                                          model, remove_chars)
        token_embeddings.append(
            np.mean(current_token_embedding.detach().numpy(), 1))

    # where to store layer-wise xl embeddings of particular length
    XL = {}
    for layer in range(19):
        XL[layer] = []
    XL[-1] = token_embeddings

    if word_ind_to_extract < 0:  # the index is specified from the end of the array, so invert the index
        from_start_word_ind_to_extract = seq_len + word_ind_to_extract
    else:
        from_start_word_ind_to_extract = word_ind_to_extract

    start_time = tm.time()

    # before we've seen enough words to make up the sequence length, add the representation for the last word 'seq_len' times
    word_seq = text_array[:seq_len]
    for _ in range(seq_len):
        XL = add_avrg_token_embedding_for_specific_word(
            word_seq, tokenizer, model, remove_chars,
            from_start_word_ind_to_extract, XL)

    # then add the embedding of the last word in a sequence as the embedding for the sequence
    for end_curr_seq in range(seq_len, len(text_array)):
        word_seq = text_array[end_curr_seq - seq_len + 1:end_curr_seq + 1]
        XL = add_avrg_token_embedding_for_specific_word(
            word_seq, tokenizer, model, remove_chars,
            from_start_word_ind_to_extract, XL)

        if end_curr_seq % 100 == 0:
            print('Completed {} out of {}: {}'.format(end_curr_seq,
                                                      len(text_array),
                                                      tm.time() - start_time))
            start_time = tm.time()

    print('Done extracting sequences of length {}'.format(seq_len))

    return XL
def get_hidden(tokens_tensor):
    # Load pre-trained model (weights)
    model = TransfoXLModel.from_pretrained('transfo-xl-wt103')
    model.eval()

    # If you have a GPU, put everything on cuda
    if torch.cuda.is_available():
        tokens_tensor = tokens_tensor.to('cuda')
        model.to('cuda')

    with torch.no_grad():
        # Predict hidden states features for each layer
        hidden_states, mems = model(tokens_tensor)
        # We can re-use the memory cells in a subsequent call to attend a longer context
        #hidden_states_2, mems_2 = model(tokens_tensor_2, mems=mems_1)
    return hidden_states
text_1 = "Who was Jim Henson ?"
text_2 = "Jim Henson was a puppeteer"
tokenized_text_1 = tokenizer.tokenize(text_1); print(tokenized_text_1)  # ['Who', 'was', 'Jim', 'Henson', '?']
print(tokenizer.tokenize("who was jim henson ?"))
tokenized_text_2 = tokenizer.tokenize(text_2)

indexed_tokens_1 = tokenizer.convert_tokens_to_ids(tokenized_text_1); print(indexed_tokens_1)  # [2517, 11, 1666, 12034, 788]
print(tokenizer.convert_tokens_to_ids(tokenizer.tokenize("who was jim henson ?")))  # [52, 11, 24, 24, 788]; 也是 case sensitive
indexed_tokens_2 = tokenizer.convert_tokens_to_ids(tokenized_text_2)
tokens_tensor_1 = torch.tensor([indexed_tokens_1])
tokens_tensor_2 = torch.tensor([indexed_tokens_2])

##################################################################
## TransfoXLModel
model = TransfoXLModel.from_pretrained('/Users/coder352/datasets/WordVec/pytorch_pretrained_bert/transfo-xl-wt103')
model.eval()

with torch.no_grad():
    hidden_states_1, mems_1 = model(tokens_tensor_1)  # Predict hidden states features for each layer
    hidden_states_2, mems_2 = model(tokens_tensor_2, mems=mems_1)  # We can re-use the memory cells in a subsequent call to attend a longer context

##################################################################
## TransfoXLLMHeadModel
model = TransfoXLLMHeadModel.from_pretrained('/Users/coder352/datasets/WordVec/pytorch_pretrained_bert/transfo-xl-wt103/')
model.eval()

with torch.no_grad():
    predictions_1, mems_1 = model(tokens_tensor_1)  # Predict all tokens
    predictions_2, mems_2 = model(tokens_tensor_2, mems=mems_1)  # We can re-use the memory cells in a subsequent call to attend a longer context
text_1 = "Who was Jim Henson ?"
text_2 = "Jim Henson was a puppeteer"
tokenized_text_1 = tokenizer.tokenize(text_1)
tokenized_text_2 = tokenizer.tokenize(text_2)

# Convert token to vocabulary indices
indexed_tokens_1 = tokenizer.convert_tokens_to_ids(tokenized_text_1)
indexed_tokens_2 = tokenizer.convert_tokens_to_ids(tokenized_text_2)

# Convert inputs to PyTorch tensors
tokens_tensor_1 = torch.tensor([indexed_tokens_1])
tokens_tensor_2 = torch.tensor([indexed_tokens_2])
print('tokenized_text_1', tokenized_text_1)
print('tokens_tensor_1', tokens_tensor_1)
# Load pre-trained model (weights)
model = TransfoXLModel.from_pretrained('transfo-xl-wt103')
model.eval()

# If you have a GPU, put everything on cuda
if torch.cuda.is_available():
    tokens_tensor_1 = tokens_tensor_1.to('cuda')
    tokens_tensor_2 = tokens_tensor_2.to('cuda')
    model.to('cuda')

with torch.no_grad():
    # Predict hidden states features for each layer
    hidden_states_1, mems_1 = model(tokens_tensor_1)
    print(hidden_states_1, mems_1)
    # We can re-use the memory cells in a subsequent call to attend a longer context
    hidden_states_2, mems_2 = model(tokens_tensor_2, mems=mems_1)
    print(hidden_states_2, mems_2)
Beispiel #8
0
    def __init__(self,
                 n_layers,
                 in_size,
                 out_size,
                 embed_size,
                 dropout=0.5,
                 initialEmbW=None,
                 rnn_type='lstm',
                 attention=None,
                 q_size=-1,
                 embedding_init=None,
                 weights_init=None,
                 elmo_init=False,
                 elmo_num_outputs=1,
                 finetune_elmo=False,
                 bert_init=False,
                 bert_model=None,
                 finetune_bert=False,
                 add_word_emb=True):
        """Initialize encoder with structure parameters
        Args:
            n_layers (int): Number of layers.
            in_size (int): Dimensionality of input vectors.
            out_size (int) : Dimensionality of hidden vectors to be output.
            embed_size (int): Dimensionality of word embedding.
            dropout (float): Dropout ratio.
        """
        # TODO
        conv_out_size = 512

        super(LSTMEncoder, self).__init__()
        self.embed = nn.Embedding(in_size, embed_size)
        if embedding_init is not None:
            self.embed.weight.data.copy_(torch.from_numpy(embedding_init))
        elif weights_init is not None:
            self.embed.weight.data.copy_(
                torch.from_numpy(weights_init['embed']))
        self.elmo_init = elmo_init
        self.bert_init = bert_init
        self.bert_model = bert_model
        self.add_word_emb = add_word_emb
        if elmo_init:
            options_file = "https://allennlp.s3.amazonaws.com/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_options.json"
            weight_file = "https://allennlp.s3.amazonaws.com/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_weights.hdf5"
            self.elmo = Elmo(options_file,
                             weight_file,
                             elmo_num_outputs,
                             requires_grad=finetune_elmo)
            elmo_layer = [
                nn.Linear(elmo_num_outputs * 1024, out_size),
                nn.ReLU()
            ]
            self.elmo_layer = nn.Sequential(*elmo_layer)
        elif bert_init:
            if 'bert' in bert_model:
                self.bert = BertModel.from_pretrained(bert_model)
            elif 'openai-gpt' in bert_model:
                self.bert = OpenAIGPTModel.from_pretrained(bert_model)
            elif 'gpt2' in bert_model:
                self.bert = GPT2Model.from_pretrained(bert_model)
            elif 'transfo-xl' in bert_model:
                self.bert = TransfoXLModel.from_pretrained(bert_model)
            self.finetune_bert = finetune_bert
            if not finetune_bert:
                for param in self.bert.parameters():
                    param.requires_grad = False
            if bert_model in ['bert-base-uncased', 'openai-gpt', 'gpt2']:
                bert_in = 768
            elif bert_model in [
                    'bert-large-uncased', 'gpt2-medium', 'transfo-xl-wt103'
            ]:
                bert_in = 1024
            bert_layer = [nn.Linear(bert_in, out_size), nn.ReLU()]
            self.bert_layer = nn.Sequential(*bert_layer)
        if rnn_type == 'lstm':
            self.lstm = nn.LSTM(embed_size,
                                out_size,
                                n_layers,
                                batch_first=True,
                                dropout=dropout)
        elif rnn_type == 'gru':
            self.lstm = nn.GRU(embed_size,
                               out_size,
                               n_layers,
                               batch_first=True,
                               dropout=dropout)
        self.attention = attention
        if attention == 'conv' or attention == 'conv_sum':
            conv_in_size = out_size
            self.conv1 = nn.Conv1d(in_channels=conv_in_size,
                                   out_channels=conv_out_size,
                                   kernel_size=1,
                                   padding=0)
            self.conv2 = nn.Conv1d(in_channels=conv_out_size,
                                   out_channels=2,
                                   kernel_size=1,
                                   padding=0)
            if weights_init is not None:
                self.conv1.weight.data.copy_(
                    torch.from_numpy(weights_init['conv1']))
                self.conv2.weight.data.copy_(
                    torch.from_numpy(weights_init['conv2']))
        elif attention == 'c_conv_sum':
            hidden_size = 512
            conv_hidden_size = 256
            layers = [
                weight_norm(nn.Linear(out_size, hidden_size), dim=None),
                nn.ReLU()
            ]
            self.c_fa = nn.Sequential(*layers)
            layers = [
                weight_norm(nn.Linear(q_size, hidden_size), dim=None),
                nn.ReLU()
            ]
            self.q_fa = nn.Sequential(*layers)
            layers = [
                nn.Conv2d(in_channels=hidden_size,
                          out_channels=conv_hidden_size,
                          kernel_size=1),
                nn.ReLU(),
                nn.Conv2d(in_channels=conv_hidden_size,
                          out_channels=1,
                          kernel_size=1)
            ]
            self.cq_att = nn.Sequential(*layers)
            if weights_init is not None:
                self.c_fa[0].weight.data.copy_(
                    torch.from_numpy(weights_init['c_fa']))
                self.q_fa[0].weight.data.copy_(
                    torch.from_numpy(weights_init['q_fa']))
                self.cq_att[0].weight.data.copy_(
                    torch.from_numpy(weights_init['cq_att_conv1']))
                self.cq_att[2].weight.data.copy_(
                    torch.from_numpy(weights_init['cq_att_conv2']))
Beispiel #9
0
    def __init__(self, args):
        super(QAxl, self).__init__()

        hidden_size = args['hidden_size']
        dropout = args['dropout']
        attention_size = args['attention_size']
        word_emb = np.array(read_json(args['data_dir'] + 'word_emb.json'),
                            dtype=np.float32)
        word_size = word_emb.shape[0]
        word_dim = word_emb.shape[1]
        char_dim = args['char_dim']
        char_len = len(read_json(args['data_dir'] + 'char2id.json'))
        pos_dim = args['pos_dim']
        ner_dim = args['ner_dim']

        self.args = args
        self.train_loss = AverageMeter()
        self.use_cuda = args['use_cuda']
        self.use_xl = args['use_xl']

        if self.use_xl:
            self.xl = TransfoXLModel.from_pretrained('transfo-xl-wt103')
            xl_dim = 1024

        ## Embedding Layer
        print('Building embedding...')
        self.word_embeddings = nn.Embedding(word_emb.shape[0],
                                            word_dim,
                                            padding_idx=0)
        self.word_embeddings.weight.data = torch.from_numpy(word_emb)
        self.char_embeddings = nn.Embedding(char_len, char_dim, padding_idx=0)
        self.pos_embeddings = nn.Embedding(args['pos_size'],
                                           args['pos_dim'],
                                           padding_idx=0)
        self.ner_embeddings = nn.Embedding(args['ner_size'],
                                           args['ner_dim'],
                                           padding_idx=0)
        with open(args['data_dir'] + 'tune_word_idx.pkl', 'rb') as f:
            tune_idx = pkl.load(f)
        self.fixed_idx = list(
            set([i for i in range(word_size)]) - set(tune_idx))
        fixed_embedding = torch.from_numpy(word_emb)[self.fixed_idx]
        self.register_buffer('fixed_embedding', fixed_embedding)
        self.fixed_embedding = fixed_embedding

        low_p_dim = word_dim + word_dim + args['pos_dim'] + args['ner_dim'] + 4
        low_q_dim = word_dim + args['pos_dim'] + args['ner_dim']
        if self.use_xl:
            low_p_dim += xl_dim
            low_q_dim += xl_dim

        self.emb_char = Embedding(word_dim, char_dim, hidden_size)

        ## Forward Layers Declaration
        high_p_dim = 2 * hidden_size
        full_q_dim = 2 * high_p_dim
        attention_dim = word_dim + full_q_dim
        if self.use_xl:
            attention_dim += xl_dim

        self.word_attention_layer = WordAttention(word_dim, attention_size,
                                                  dropout)

        self.low_rnn = StackedPaddedRNN(low_p_dim,
                                        hidden_size,
                                        1,
                                        dropout=dropout)
        self.high_rnn = StackedPaddedRNN(high_p_dim,
                                         hidden_size,
                                         1,
                                         dropout=dropout)
        self.full_rnn = StackedPaddedRNN(full_q_dim,
                                         hidden_size,
                                         1,
                                         dropout=dropout)

        self.low_attention_layer = MultiAttention(attention_dim,
                                                  attention_size, dropout)
        self.high_attention_layer = MultiAttention(attention_dim,
                                                   attention_size, dropout)
        self.full_attention_layer = MultiAttention(attention_dim,
                                                   attention_size, dropout)

        ## Fusion Layer and Final Attention + Final RNN
        fuse_dim = 10 * hidden_size
        self_attention_dim = 12 * hidden_size + word_dim + ner_dim + pos_dim + 1
        if self.use_xl:
            self_attention_dim += xl_dim

        self.fuse_rnn = StackedPaddedRNN(fuse_dim,
                                         hidden_size,
                                         1,
                                         dropout=dropout)
        self.self_attention_layer = MultiAttention(self_attention_dim,
                                                   attention_size, dropout)
        self.self_rnn = StackedPaddedRNN(4 * hidden_size,
                                         hidden_size,
                                         1,
                                         dropout=dropout)

        ## Verifier and output
        self.summ_layer = PointerS(2 * hidden_size,
                                   dropout=dropout,
                                   use_cuda=self.use_cuda)
        self.summ_layer2 = PointerS(2 * hidden_size,
                                    dropout=dropout,
                                    use_cuda=self.use_cuda)
        self.pointer_layer = PointerNet(2 * hidden_size,
                                        use_cuda=self.use_cuda)
        self.has_ans = nn.Sequential(nn.Dropout(p=dropout),
                                     nn.Linear(6 * hidden_size, 2))
Beispiel #10
0
def tokens2ids(text):
    # context
    tokenized_text = [tokenizer.tokenize(sent) for sent in text]
    tokenized_text = paddingText(tokenized_text)
    # Convert token to vocabulary indices
    indexed_tokens = [
        tokenizer.convert_tokens_to_ids(sent) for sent in tokenized_text
    ]

    # Convert inputs to PyTorch tensors
    tokens_tensor = torch.tensor(indexed_tokens)
    return tokens_tensor


model = TransfoXLModel.from_pretrained('./pretrained_model')
model.eval()
if torch.cuda.is_available():
    model = model.cuda()


def ids2embeddings(ids):
    if torch.cuda.is_available():
        ids = ids.cuda()
    with torch.no_grad():
        hidden_state, mems = model(ids)
    return hidden_state


def genBatch(mode='train', bsz=2, ismasked=True):
    if mode == 'train':
Beispiel #11
0
    def __init__(self,
                 n_layers,
                 in_size,
                 out_size,
                 embed_size,
                 in_size_hier,
                 hidden_size,
                 proj_size,
                 dropout=0.5,
                 initialEmbW=None,
                 independent=False,
                 rnn_type='lstm',
                 classifier='baseline',
                 states_att=False,
                 state_size=-1,
                 embedding_init=None,
                 weights_init=None,
                 elmo_init=False,
                 elmo_num_outputs=1,
                 finetune_elmo=False,
                 bert_init=False,
                 bert_model=None,
                 finetune_bert=False,
                 add_word_emb=True,
                 pretrained_all=True):
        """Initialize encoder with structure parameters

        Args:
            n_layers (int): Number of layers.
            in_size (int): Dimensionality of input vectors.
            out_size (int): Dimensionality of output vectors.
            embed_size (int): Dimensionality of word embedding.
            hidden_size (int) : Dimensionality of hidden vectors.
            proj_size (int) : Dimensionality of projection before softmax.
            dropout (float): Dropout ratio.
        """
        #TODO
        att_size = 128
        self.rnn_type = rnn_type
        self.classifier = classifier
        super(HLSTMDecoder, self).__init__()
        self.embed = nn.Embedding(in_size, embed_size)
        if embedding_init is not None:
            self.embed.weight.data.copy_(torch.from_numpy(embedding_init))
        elif weights_init is not None:
            self.embed.weight.data.copy_(
                torch.from_numpy(weights_init['embed']))
        if rnn_type == 'lstm':
            self.lstm = nn.LSTM(embed_size + in_size_hier,
                                hidden_size,
                                n_layers,
                                batch_first=True,
                                dropout=dropout)
        elif rnn_type == 'gru':
            self.lstm = nn.GRU(embed_size + in_size_hier,
                               hidden_size,
                               n_layers,
                               batch_first=True,
                               dropout=dropout)
        if weights_init is not None:
            lstm_wt = weights_init['lstm']
            for k, v in lstm_wt.items():
                self.lstm.__getattr__(k).data.copy_(torch.from_numpy(v))

        self.elmo_init = elmo_init
        self.bert_init = bert_init
        self.pretrained_all = pretrained_all
        self.bert_model = bert_model
        self.add_word_emb = add_word_emb
        if False:
            #if pretrained_all and elmo_init:
            options_file = "https://allennlp.s3.amazonaws.com/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_options.json"
            weight_file = "https://allennlp.s3.amazonaws.com/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_weights.hdf5"
            self.elmo = Elmo(options_file,
                             weight_file,
                             elmo_num_outputs,
                             requires_grad=finetune_elmo)
            elmo_layer = [
                nn.Linear(elmo_num_outputs * 1024, out_size),
                nn.ReLU()
            ]
            self.elmo_layer = nn.Sequential(*elmo_layer)
        elif False:
            #elif pretrained_all and bert_init:
            if 'bert' in bert_model:
                self.bert = BertModel.from_pretrained(bert_model)
            elif 'openai-gpt' in bert_model:
                self.bert = OpenAIGPTModel.from_pretrained(bert_model)
            elif 'gpt2' in bert_model:
                self.bert = GPT2Model.from_pretrained(bert_model)
            elif 'transfo-xl' in bert_model:
                self.bert = TransfoXLModel.from_pretrained(bert_model)
            self.finetune_bert = finetune_bert
            if not finetune_bert:
                for param in self.bert.parameters():
                    param.requires_grad = False
            if bert_model in ['bert-base-uncased', 'openai-gpt', 'gpt2']:
                bert_in = 768
            elif bert_model in [
                    'bert-large-uncased', 'gpt2-medium', 'transfo-xl-wt103'
            ]:
                bert_in = 1024
            bert_layer = [nn.Linear(bert_in, out_size), nn.ReLU()]
            self.bert_layer = nn.Sequential(*bert_layer)

        self.n_layers = n_layers
        self.dropout = dropout
        self.independent = independent
        self.states_att = states_att
        if states_att:
            self.ecW = nn.Linear(state_size, att_size)
            self.ysW = nn.Linear(hidden_size, att_size)
            hidden_size += state_size

        if classifier == 'baseline':
            layers = [
                nn.Linear(hidden_size, proj_size),
                nn.Linear(proj_size, out_size)
            ]
            self.y_classifier = nn.Sequential(*layers)
        elif classifier == 'weighted_norm':
            layers = [
                weight_norm(nn.Linear(hidden_size, proj_size), dim=None),
                nn.ReLU(),
                weight_norm(nn.Linear(proj_size, out_size), dim=None)
            ]
            self.y_classifier = nn.Sequential(*layers)
        elif classifier == 'logit':
            layers = [
                weight_norm(nn.Linear(hidden_size, proj_size), dim=None),
                nn.ReLU(),
                nn.Linear(proj_size, out_size)
            ]
            self.classifier_txt = nn.Sequential(*layers)
            layers = [
                weight_norm(nn.Linear(hidden_size, 2048), dim=None),
                nn.ReLU(),
                nn.Linear(2048, out_size)
            ]
            self.classifier_ft = nn.Sequential(*layers)
            if weights_init is not None:
                self.classifier_txt[0].weight.data.copy_(
                    torch.from_numpy(weights_init['classifier_txt']))
                self.classifier_ft[0].weight.data.copy_(
                    torch.from_numpy(weights_init['classifier_ft']))
Beispiel #12
0
    def Get_Transformer_Representation(self, examples_train, examples_test):

        train_rep_file = "./data/" + pb.dataset + "_train_" + "transformerXL"
        test_rep_file = "./data/" + pb.dataset + "_test_" + "transformerXL"

        if (os.path.exists(train_rep_file) == True
                and os.path.exists(test_rep_file) == True):
            with open(train_rep_file, 'rb') as file:
                examples_train_rep = pickle.load(file)
                for i, example in enumerate(examples_train):
                    example.transformerXL_mat = examples_train_rep[i]
            with open(test_rep_file, 'rb') as file:
                examples_test_rep = pickle.load(file)
                for i, example in enumerate(examples_test):
                    example.transformerXL_mat = examples_test_rep[i]
        else:
            examples = []
            for example in examples_train:
                examples.append(example)
            for example in examples_test:
                examples.append(example)

            for i, example in enumerate(examples):

                # example.transformerXL_mat = np.zeros((pb.fgt_maxlength,20))
                # continue

                if (self.transformer_tokenizer == None):
                    self.transformer_tokenizer = TransfoXLTokenizer.from_pretrained(
                        'transfo-xl-wt103')

                text = example.fgt_channels[0]
                tokenized_text = self.transformer_tokenizer.tokenize(text)

                indexed_tokens = self.transformer_tokenizer.convert_tokens_to_ids(
                    tokenized_text)

                tokens_tensor = torch.tensor([indexed_tokens])

                if (self.transformer == None):
                    self.transformer = TransfoXLModel.from_pretrained(
                        'transfo-xl-wt103')
                    self.transformer.eval()

                with torch.no_grad():
                    hidden_states, _ = self.transformer(
                        tokens_tensor)  # (1, 3, 1024)
                    shape = np.array(hidden_states).shape
                    # print(shape)

                    representation, sum = [], 0

                    a, b = shape[1], shape[2]
                    representation = np.zeros((a, b))

                    for layer in hidden_states:
                        for words in layer.numpy():
                            representation += words
                            sum += 1
                    if (sum > 0):
                        representation = representation * 1.0 / sum

                    representation = list(representation)
                    while (len(representation) < pb.fgt_maxlength):
                        representation.append(np.zeros(b))

                    example.transformerXL_mat = representation[0:pb.
                                                               fgt_maxlength]

                print("{:.2%}".format(i * 1.0 / len(examples)))
Beispiel #13
0
    def __init__(self,
                 n_wlayers,
                 n_slayers,
                 in_size,
                 out_size,
                 embed_size,
                 hidden_size,
                 dropout=0.5,
                 ignore_label=None,
                 initialEmbW=None,
                 independent=False,
                 rnn_type='lstm',
                 embedding_init=None,
                 weights_init=None,
                 elmo_init=False,
                 elmo_num_outputs=1,
                 finetune_elmo=False,
                 bert_init=False,
                 bert_model=None,
                 finetune_bert=False,
                 add_word_emb=True,
                 pretrained_all=True,
                 concat_his=False):
        """Initialize encoder with structure parameters
        Args:
            n_layers (int): Number of layers.
            in_size (int): Dimensionality of input vectors.
            out_size (int) : Dimensionality of hidden vectors to be output.
            embed_size (int): Dimensionality of word embedding.
            dropout (float): Dropout ratio.
        """

        super(HLSTMEncoder, self).__init__()
        self.embed = nn.Embedding(in_size, embed_size)
        if embedding_init is not None:
            self.embed.weight.data.copy_(torch.from_numpy(embedding_init))
        elif weights_init is not None:
            self.embed.weight.data.copy_(
                torch.from_numpy(weights_init['embed']))
        if rnn_type == 'lstm':
            self.wlstm = nn.LSTM(embed_size,
                                 hidden_size,
                                 n_wlayers,
                                 batch_first=True,
                                 dropout=dropout)
            self.slstm = nn.LSTM(hidden_size,
                                 out_size,
                                 n_slayers,
                                 batch_first=True,
                                 dropout=dropout)
        elif rnn_type == 'gru':
            self.wlstm = nn.GRU(embed_size,
                                hidden_size,
                                n_wlayers,
                                batch_first=True,
                                dropout=dropout)
            self.slstm = nn.GRU(hidden_size,
                                out_size,
                                n_slayers,
                                batch_first=True,
                                dropout=dropout)
        self.elmo_init = elmo_init
        self.bert_init = bert_init
        self.pretrained_all = pretrained_all
        self.concat_his = concat_his
        self.bert_model = bert_model
        self.add_word_emb = add_word_emb
        if pretrained_all and elmo_init:
            options_file = "https://allennlp.s3.amazonaws.com/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_options.json"
            weight_file = "https://allennlp.s3.amazonaws.com/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_weights.hdf5"
            self.elmo = Elmo(options_file,
                             weight_file,
                             elmo_num_outputs,
                             requires_grad=finetune_elmo)
            elmo_layer = [
                nn.Linear(elmo_num_outputs * 1024, out_size),
                nn.ReLU()
            ]
            self.elmo_layer = nn.Sequential(*elmo_layer)
        elif pretrained_all and bert_init:
            if 'bert' in bert_model:
                self.bert = BertModel.from_pretrained(bert_model)
            elif 'openai-gpt' in bert_model:
                self.bert = OpenAIGPTModel.from_pretrained(bert_model)
            elif 'gpt2' in bert_model:
                self.bert = GPT2Model.from_pretrained(bert_model)
            elif 'transfo-xl' in bert_model:
                self.bert = TransfoXLModel.from_pretrained(bert_model)
            self.finetune_bert = finetune_bert
            if not finetune_bert:
                for param in self.bert.parameters():
                    param.requires_grad = False
            if bert_model in ['bert-base-uncased', 'openai-gpt', 'gpt2']:
                bert_in = 768
            elif bert_model in [
                    'bert-large-uncased', 'gpt2-medium', 'transfo-xl-wt103'
            ]:
                bert_in = 1024
            bert_layer = [nn.Linear(bert_in, out_size), nn.ReLU()]
            self.bert_layer = nn.Sequential(*bert_layer)

        self.independent = independent
        self.rnn_type = rnn_type