def __init__(self, ntoken, h_dim, emb_dim, nlayers, chunk_size, wdrop=0, dropouth=0.5): super(sentence_encoder, self).__init__() self.lockdrop = LockedDropout() self.hdrop = nn.Dropout(dropouth) self.encoder = nn.Embedding(ntoken, emb_dim) self.rnn = ONLSTMStack([emb_dim] + [h_dim] * nlayers, chunk_size=chunk_size, dropconnect=wdrop, dropout=dropouth) initrange = 0.1 self.encoder.weight.data.uniform_(-initrange, initrange) self.h_dim = h_dim self.emb_dim = emb_dim self.nlayers = nlayers self.ntoken = ntoken self.chunk_size = chunk_size self.wdrop = wdrop self.dropouth = dropouth
def __init__(self, config): super(Bert_withLSTM, self).__init__(config) self.num_labels = config.num_labels self.bert = BertModel(config) self.lstm = ONLSTMStack([config.hidden_size, config.hidden_size], chunk_size=8) self.qa_outputs = torch.nn.Linear(config.hidden_size, config.num_labels) self.init_weights()
def __init__(self, rnn_type, ntoken, ninp, nhid, chunk_size, nlayers, dropout=0.5, dropouth=0.5, dropouti=0.5, dropoute=0.1, wdrop=0, tie_weights=False, args=None): super(GPTRNNModel, self).__init__() self.transformer = OpenAIGPTModel.from_pretrained('openai-gpt') config = OpenAIGPTConfig() self.lm_head = OpenAIGPTLMHead(self.transformer.tokens_embed.weight, config) self.lockdrop = LockedDropout() self.idrop = nn.Dropout(dropouti) self.hdrop = nn.Dropout(dropouth) self.drop = nn.Dropout(dropout) self.encoder = nn.Linear(768, ninp) self.args = args assert rnn_type in ['LSTM'], 'RNN type is not supported' self.rnn = ONLSTMStack([ninp] + [nhid] * (nlayers - 1) + [ninp], chunk_size=chunk_size, dropconnect=wdrop, dropout=dropouth) self.decoder = nn.Linear(ninp, ntoken) # Optionally tie weights as in: # "Using the Output Embedding to Improve Language Models" (Press & Wolf 2016) # https://arxiv.org/abs/1608.05859 # and # "Tying Word Vectors and Word Classifiers: A Loss Framework for Language Modeling" (Inan et al. 2016) # https://arxiv.org/abs/1611.01462 # if tie_weights: # #if nhid != ninp: # # raise ValueError('When using the tied flag, nhid must be equal to emsize') # self.decoder.weight = self.encoder.weight self.rnn_type = rnn_type self.ninp = ninp self.nhid = nhid self.nlayers = nlayers self.dropout = dropout self.dropouti = dropouti self.dropouth = dropouth self.dropoute = dropoute self.distance = None self.tie_weights = tie_weights
def __init__(self, rnn_type, ntoken, ninp, nhid, chunk_size, nlayers, wds='no', dropout=0.5, dropouth=0.5, dropouti=0.5, dropoute=0.1, wdrop=0, tie_weights=False, l4d=0): super(RNNModel, self).__init__() self.lockdrop = LockedDropout() self.idrop = nn.Dropout(dropouti) self.hdrop = nn.Dropout(dropouth) self.drop = nn.Dropout(dropout) self.encoder = nn.Embedding(ntoken, ninp) assert rnn_type in ['LSTM'], 'RNN type is not supported' self.rnn = ONLSTMStack([ninp] + [nhid] * (nlayers - 1) + [ninp], l4d=l4d, chunk_size=chunk_size, wds=wds, dropconnect=wdrop, dropout=dropouth) self.decoder = nn.Linear(ninp, ntoken) # Optionally tie weights as in: # "Using the Output Embedding to Improve Language Models" (Press & Wolf 2016) # https://arxiv.org/abs/1608.05859 # and # "Tying Word Vectors and Word Classifiers: A Loss Framework for Language Modeling" (Inan et al. 2016) # https://arxiv.org/abs/1611.01462 if tie_weights: # if nhid != ninp: # raise ValueError('When using the tied flag, nhid must be equal to emsize') self.decoder.weight = self.encoder.weight self.init_weights() self.rnn_type = rnn_type self.ninp = ninp self.nhid = nhid self.nlayers = nlayers self.dropout = dropout self.dropouti = dropouti self.dropouth = dropouth self.dropoute = dropoute self.tie_weights = tie_weights
def __init__(self, word2vec, embedding_size, hidden_state_size, layer_size, sentence_embedding_size, chunk_size, batch_size, gcn_hidden_size, gcn_output_size, dropout = 0.5): super(ModelEncoder, self).__init__() self.word2vec = word2vec self.batch_size = batch_size self.gcn_input_size = embedding_size self.gcn_hidden_size = gcn_hidden_size self.gcn_output_size = gcn_output_size self.dropout = dropout self.encoder = ONLSTMStack([embedding_size] + [hidden_state_size] * (layer_size - 1) + [sentence_embedding_size], chunk_size)
class ModelEncoder(nn.Module): def __init__(self, word2vec, embedding_size, hidden_state_size, layer_size, sentence_embedding_size, chunk_size, batch_size, gcn_hidden_size, gcn_output_size, dropout = 0.5): super(ModelEncoder, self).__init__() self.word2vec = word2vec self.batch_size = batch_size self.gcn_input_size = embedding_size self.gcn_hidden_size = gcn_hidden_size self.gcn_output_size = gcn_output_size self.dropout = dropout self.encoder = ONLSTMStack([embedding_size] + [hidden_state_size] * (layer_size - 1) + [sentence_embedding_size], chunk_size) def forward(self, sentences, hidden): sentences = get_word_embedding(self.word2vec, sentences) # sentences size: length * batch_size * word_embedding_size raw_output, hidden_cell, raw_outputs, outputs, distance = self.encoder(sentences, hidden) # hidden layer_size * batch_size * hidden_state_size distances = distance[0] layer_size, length, batch_size = distances.size() distances = distances[-1] # we use the gates of the last layer as the weights of the tree. distances = distances.transpose(1, 0) word_hidden_state = [] for i in range(batch_size): gcn = ONLSTMGraph(distances[i], self.gcn_input_size, self.gcn_hidden_size, self.gcn_output_size) word_hidden_state.append(gcn(sentences[i]).tolist()) return raw_output[-1], word_hidden_state def init_hidden(self, batch_size): return self.encoder.init_hidden(batch_size)
class sentence_encoder(nn.Module): ### take in a sentence, return its encoded embedding and hidden states(for attention) def __init__(self, ntoken, h_dim, emb_dim, nlayers, chunk_size, wdrop=0, dropouth=0.5): super(sentence_encoder, self).__init__() self.lockdrop = LockedDropout() self.hdrop = nn.Dropout(dropouth) self.encoder = nn.Embedding(ntoken, emb_dim) self.rnn = ONLSTMStack([emb_dim] + [h_dim] * nlayers, chunk_size=chunk_size, dropconnect=wdrop, dropout=dropouth) initrange = 0.1 self.encoder.weight.data.uniform_(-initrange, initrange) self.h_dim = h_dim self.emb_dim = emb_dim self.nlayers = nlayers self.ntoken = ntoken self.chunk_size = chunk_size self.wdrop = wdrop self.dropouth = dropouth def forward(self, inp_sentence, hidden): emb = self.encoder(inp_sentence) print('inp sen: ', inp_sentence) print('emb: ', emb) output, hidden, raw_outputs, outputs, distances = self.rnn(emb, hidden) self.distance = distances result = output.view(output.size(0) * output.size(1), output.size(2)) ''' It seems that the 'hidden' is the encoding output and final cell states of layers the 'result' is (2-d) the hidden output of the last layers the 'outputs' is the stack of 'result' in layers ''' return result.permute(0, 1), hidden, raw_outputs, outputs def init_hidden(self, bsz): return self.rnn.init_hidden(bsz)
class GPTRNNModel(nn.Module): """Container module with an encoder, a recurrent module, and a decoder.""" def __init__(self, rnn_type, ntoken, ninp, nhid, chunk_size, nlayers, dropout=0.5, dropouth=0.5, dropouti=0.5, dropoute=0.1, wdrop=0, tie_weights=False, args=None): super(GPTRNNModel, self).__init__() self.transformer = OpenAIGPTModel.from_pretrained('openai-gpt') config = OpenAIGPTConfig() self.lm_head = OpenAIGPTLMHead(self.transformer.tokens_embed.weight, config) self.lockdrop = LockedDropout() self.idrop = nn.Dropout(dropouti) self.hdrop = nn.Dropout(dropouth) self.drop = nn.Dropout(dropout) self.encoder = nn.Linear(768, ninp) self.args = args assert rnn_type in ['LSTM'], 'RNN type is not supported' self.rnn = ONLSTMStack([ninp] + [nhid] * (nlayers - 1) + [ninp], chunk_size=chunk_size, dropconnect=wdrop, dropout=dropouth) self.decoder = nn.Linear(ninp, ntoken) # Optionally tie weights as in: # "Using the Output Embedding to Improve Language Models" (Press & Wolf 2016) # https://arxiv.org/abs/1608.05859 # and # "Tying Word Vectors and Word Classifiers: A Loss Framework for Language Modeling" (Inan et al. 2016) # https://arxiv.org/abs/1611.01462 # if tie_weights: # #if nhid != ninp: # # raise ValueError('When using the tied flag, nhid must be equal to emsize') # self.decoder.weight = self.encoder.weight self.rnn_type = rnn_type self.ninp = ninp self.nhid = nhid self.nlayers = nlayers self.dropout = dropout self.dropouti = dropouti self.dropouth = dropouth self.dropoute = dropoute self.distance = None self.tie_weights = tie_weights def reset(self): if self.rnn_type == 'QRNN': [r.reset() for r in self.rnns] def init_weights(self, pre_emb): initrange = 0.1 # self.encoder.weight.data.uniform_(-initrange, initrange) # if pre_emb is not None: # self.encoder.weight.data[:pre_emb.size(0), :pre_emb.size(1)] = torch.FloatTensor(pre_emb) self.decoder.bias.data.fill_(0) self.decoder.weight.data.uniform_(-initrange, initrange) def forward(self, input, hidden, gpt_ids, fl_ids, return_h=False): if self.args.feature is not None and 'fixGPT' in self.args.feature.split( '_'): with torch.no_grad(): emb = self.transformer(gpt_ids) else: emb = self.transformer(gpt_ids) # BS * GPT_SL * GPT_EMS lm_logits = self.lm_head(emb) # shift_logits = lm_logits[..., :-1, :].contiguous() emb = torch.cat( [emb[r:r + 1, fl_ids[r], :] for r in range(len(fl_ids))], dim=0) # BS * (2*SL) * GPT_ES emb = torch.nn.functional.avg_pool1d(emb.permute(0, 2, 1), 2) * 2 # BS * GPT_EMS * SL emb = emb.permute(2, 0, 1) # BS * SL * GPT_EMS -> SL * BS * ES self.encoder = embedded_dropout_gpt( self.encoder, dropout=self.dropoute if self.training else 0) emb = nn.functional.relu(self.encoder(emb)) emb = self.lockdrop(emb, self.dropouti) raw_output, hidden, raw_outputs, outputs, self.distance = self.rnn( emb, hidden) output = self.lockdrop(raw_output, self.dropout) result = output.view(output.size(0) * output.size(1), output.size(2)) if return_h: return result, hidden, raw_outputs, outputs, lm_logits.view( -1, lm_logits.size(-1)) else: return result, hidden def init_hidden(self, bsz): return self.rnn.init_hidden(bsz)
class RNNModel(nn.Module): """Container module with an encoder, a recurrent module, and a decoder.""" def __init__(self, rnn_type, ntoken, ninp, nhid, chunk_size, nlayers, dropout=0.5, dropouth=0.5, dropouti=0.5, dropoute=0.1, wdrop=0, tie_weights=False): super(RNNModel, self).__init__() self.lockdrop = LockedDropout() self.idrop = nn.Dropout(dropouti) self.hdrop = nn.Dropout(dropouth) self.drop = nn.Dropout(dropout) self.encoder = nn.Embedding(ntoken, ninp) assert rnn_type in ['LSTM'], 'RNN type is not supported' self.rnn = ONLSTMStack( [ninp] + [nhid] * (nlayers - 1) + [ninp], chunk_size=chunk_size, dropconnect=wdrop, dropout=dropouth ) # self.decoder = nn.Linear(ninp, ntoken) self.prob = nn.Linear(1, 15) # Optionally tie weights as in: # "Using the Output Embedding to Improve Language Models" (Press & Wolf 2016) # https://arxiv.org/abs/1608.05859 # and # "Tying Word Vectors and Word Classifiers: A Loss Framework for Language Modeling" (Inan et al. 2016) # https://arxiv.org/abs/1611.01462 # if tie_weights: # #if nhid != ninp: # # raise ValueError('When using the tied flag, nhid must be equal to emsize') # self.decoder.weight = self.encoder.weight self.init_weights() self.rnn_type = rnn_type self.ninp = ninp self.nhid = nhid self.nlayers = nlayers self.dropout = dropout self.dropouti = dropouti self.dropouth = dropouth self.dropoute = dropoute self.tie_weights = tie_weights self.embedding = nn.Linear(768, 400) self.drop_out = nn.Dropout(p=dropoute) # self.linear = nn.Linear(400, ntoken) self.sen_out = nn.Sequential( nn.Conv1d(10, 5, 3, stride=1, padding=1), nn.ReLU(), nn.Conv1d(5, 1, 3, stride=1, padding=1), ) self.result = nn.Sequential( nn.Conv1d(19, 8, 3, stride=1, padding=1), nn.ReLU(), nn.Conv1d(8, 1, 3, stride=1, padding=1), ) self.word_rnn = ONLSTMStack( [ninp] + [nhid] * (nlayers - 1) + [ninp], chunk_size=chunk_size, dropconnect=wdrop, dropout=dropouth ) def reset(self): if self.rnn_type == 'QRNN': [r.reset() for r in self.rnns] def init_weights(self): initrange = 0.1 # self.encoder.weight.data.uniform_(-initrange, initrange) # self.decoder.bias.data.fill_(0) # self.decoder.weight.data.uniform_(-initrange, initrange) def forward(self, inputs, cand_ids, hidden, hidd, hidd_cand): ## suppose batch_size = 80 inputs_ = inputs.view(inputs.size(0)*inputs.size(1), inputs.size(2)).transpose(0, 1) #[80, 15, 10] -> [10, 1200] cand_ids_ = cand_ids.view(cand_ids.size(0)*cand_ids.size(1), cand_ids.size(2)).transpose(0, 1) #[80, 4, 10] -> [10, 320] emb = embedded_dropout( self.encoder, inputs_, dropout=self.dropoute if self.training else 0 ) emb = self.lockdrop(emb, self.dropouti) #[10, 1200, 400] emb_out, _, _, _, _ = self.word_rnn(emb, hidd) #[10, 1200, 400] sen_emb = self.sen_out(emb_out.permute(1, 0, 2)) #[1200, 1, 400] sen_emb = sen_emb.view(inputs.size(0), inputs.size(1), -1).transpose(0, 1) #[80, 15, 400] -> [15, 80, 400] cand_emb = embedded_dropout( self.encoder, cand_ids_, dropout=self.dropoute if self.training else 0 ) cand_emb = self.lockdrop(cand_emb, self.dropouti) #[10, 320, 400] cand_emb_out, _, _, _, _ = self.word_rnn(cand_emb, hidd_cand) #[10, 320, 400] cand_sen_emb = self.sen_out(cand_emb_out.permute(1, 0, 2)) #[320, 1, 400] cand_sen_emb = cand_sen_emb.view(cand_ids.size(0), cand_ids.size(1), -1).transpose(0, 1) #[4, 80, 400] ##1. language modeling # raw_output, hidden, raw_outputs, outputs, distances = self.rnn(emb, hidden) # self.distance = distances # output = self.lockdrop(raw_output, self.dropout) # result = output.view(output.size(0)*output.size(1), output.size(2)) # result_prob = self.decoder(result) ##2. classification raw_output, hidden, raw_outputs, outputs, distances = self.rnn(sen_emb, hidden) self.distance = distances output = self.lockdrop(raw_output, self.dropout) output = output.permute(1, 0, 2) result = self.result(output) cand_scores = torch.matmul(result, cand_sen_emb.permute(1, 2, 0)).squeeze(1) return result, cand_scores, hidden, raw_outputs, outputs, cand_emb def init_hidden(self, bsz): return self.rnn.init_hidden(bsz)
def __init__(self, rnn_type, ntoken, ninp, nhid, chunk_size, nlayers, dropout=0.5, dropouth=0.5, dropouti=0.5, dropoute=0.1, wdrop=0, tie_weights=False): super(RNNModel, self).__init__() self.lockdrop = LockedDropout() self.idrop = nn.Dropout(dropouti) self.hdrop = nn.Dropout(dropouth) self.drop = nn.Dropout(dropout) self.encoder = nn.Embedding(ntoken, ninp) assert rnn_type in ['LSTM'], 'RNN type is not supported' self.rnn = ONLSTMStack( [ninp] + [nhid] * (nlayers - 1) + [ninp], chunk_size=chunk_size, dropconnect=wdrop, dropout=dropouth ) # self.decoder = nn.Linear(ninp, ntoken) self.prob = nn.Linear(1, 15) # Optionally tie weights as in: # "Using the Output Embedding to Improve Language Models" (Press & Wolf 2016) # https://arxiv.org/abs/1608.05859 # and # "Tying Word Vectors and Word Classifiers: A Loss Framework for Language Modeling" (Inan et al. 2016) # https://arxiv.org/abs/1611.01462 # if tie_weights: # #if nhid != ninp: # # raise ValueError('When using the tied flag, nhid must be equal to emsize') # self.decoder.weight = self.encoder.weight self.init_weights() self.rnn_type = rnn_type self.ninp = ninp self.nhid = nhid self.nlayers = nlayers self.dropout = dropout self.dropouti = dropouti self.dropouth = dropouth self.dropoute = dropoute self.tie_weights = tie_weights self.embedding = nn.Linear(768, 400) self.drop_out = nn.Dropout(p=dropoute) # self.linear = nn.Linear(400, ntoken) self.sen_out = nn.Sequential( nn.Conv1d(10, 5, 3, stride=1, padding=1), nn.ReLU(), nn.Conv1d(5, 1, 3, stride=1, padding=1), ) self.result = nn.Sequential( nn.Conv1d(19, 8, 3, stride=1, padding=1), nn.ReLU(), nn.Conv1d(8, 1, 3, stride=1, padding=1), ) self.word_rnn = ONLSTMStack( [ninp] + [nhid] * (nlayers - 1) + [ninp], chunk_size=chunk_size, dropconnect=wdrop, dropout=dropouth )
class RNNModel(nn.Module): """Container module with an encoder, a recurrent module, and a decoder.""" def __init__(self, rnn_type, ntoken, ninp, nhid, chunk_size, nlayers, dropout=0.5, dropouth=0.5, dropouti=0.5, dropoute=0.1, wdrop=0, tie_weights=False): super(RNNModel, self).__init__() self.lockdrop = LockedDropout() self.idrop = nn.Dropout(dropouti) self.hdrop = nn.Dropout(dropouth) self.drop = nn.Dropout(dropout) self.encoder = nn.Embedding(ntoken, ninp) assert rnn_type in ['LSTM'], 'RNN type is not supported' self.rnn = ONLSTMStack( [ninp] + [nhid] * (nlayers - 1) + [ninp], chunk_size=chunk_size, dropconnect=wdrop, dropout=dropouth ) self.decoder = nn.Linear(ninp, ntoken) # Optionally tie weights as in: # "Using the Output Embedding to Improve Language Models" (Press & Wolf 2016) # https://arxiv.org/abs/1608.05859 # and # "Tying Word Vectors and Word Classifiers: A Loss Framework for Language Modeling" (Inan et al. 2016) # https://arxiv.org/abs/1611.01462 if tie_weights: #if nhid != ninp: # raise ValueError('When using the tied flag, nhid must be equal to emsize') self.decoder.weight = self.encoder.weight self.init_weights() self.rnn_type = rnn_type self.ninp = ninp self.nhid = nhid self.nlayers = nlayers self.dropout = dropout self.dropouti = dropouti self.dropouth = dropouth self.dropoute = dropoute self.tie_weights = tie_weights def reset(self): if self.rnn_type == 'QRNN': [r.reset() for r in self.rnns] def init_weights(self): initrange = 0.1 self.encoder.weight.data.uniform_(-initrange, initrange) self.decoder.bias.data.fill_(0) self.decoder.weight.data.uniform_(-initrange, initrange) def forward(self, input, hidden, return_h=False): emb = embedded_dropout( self.encoder, input, dropout=self.dropoute if self.training else 0 ) emb = self.lockdrop(emb, self.dropouti) raw_output, hidden, raw_outputs, outputs, distances = self.rnn(emb, hidden) self.distance = distances output = self.lockdrop(raw_output, self.dropout) result = output.view(output.size(0)*output.size(1), output.size(2)) if return_h: return result, hidden, raw_outputs, outputs else: return result, hidden def init_hidden(self, bsz): return self.rnn.init_hidden(bsz)
class Bert_withLSTM(BertPreTrainedModel): def __init__(self, config): super(Bert_withLSTM, self).__init__(config) self.num_labels = config.num_labels self.bert = BertModel(config) self.lstm = ONLSTMStack([config.hidden_size, config.hidden_size], chunk_size=8) self.qa_outputs = torch.nn.Linear(config.hidden_size, config.num_labels) self.init_weights() def forward( self, input_ids=None, attention_mask=None, token_type_ids=None, position_ids=None, head_mask=None, inputs_embeds=None, start_positions=None, end_positions=None, ): outputs = self.bert( input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids, position_ids=position_ids, head_mask=head_mask, inputs_embeds=inputs_embeds, ) sequence_output = outputs[0] lstm_in = sequence_output.permute(1, 0, 2) _, bsz, _ = lstm_in.size() lstm_in = self.lstm(lstm_in, self.lstm.init_hidden(bsz)) lstm_in = lstm_in[0].permute(1, 0, 2) logits = self.qa_outputs(lstm_in) start_logits, end_logits = logits.split(1, dim=-1) start_logits = start_logits.squeeze(-1) end_logits = end_logits.squeeze(-1) outputs = ( start_logits, end_logits, ) + outputs[2:] if start_positions is not None and end_positions is not None: # If we are on multi-GPU, split add a dimension if len(start_positions.size()) > 1: start_positions = start_positions.squeeze(-1) if len(end_positions.size()) > 1: end_positions = end_positions.squeeze(-1) # sometimes the start/end positions are outside our model inputs, we ignore these terms ignored_index = start_logits.size(1) start_positions.clamp_(0, ignored_index) end_positions.clamp_(0, ignored_index) loss_fct = CrossEntropyLoss(ignore_index=ignored_index) start_loss = loss_fct(start_logits, start_positions) end_loss = loss_fct(end_logits, end_positions) total_loss = (start_loss + end_loss) / 2 outputs = (total_loss, ) + outputs return outputs # (loss), start_logits, end_logits, (hidden_states), (attentions)