def __init__(self, data): super(BiLSTM, self).__init__() print "build batched bilstm..." self.gpu = data.HP_gpu self.use_char = data.HP_use_char self.batch_size = data.HP_batch_size self.char_hidden_dim = 0 self.average_batch = data.HP_average_batch_loss if self.use_char: self.char_hidden_dim = data.HP_char_hidden_dim self.char_embedding_dim = data.char_emb_dim if data.char_features == "CNN": self.char_feature = CharCNN(data.char_alphabet.size(), self.char_embedding_dim, self.char_hidden_dim, data.HP_dropout, self.gpu) elif data.char_features == "LSTM": self.char_feature = CharBiLSTM(data.char_alphabet.size(), self.char_embedding_dim, self.char_hidden_dim, data.HP_dropout, self.gpu) else: print "Error char feature selection, please check parameter data.char_features (either CNN or LSTM)." exit(0) self.embedding_dim = data.word_emb_dim self.hidden_dim = data.HP_hidden_dim self.drop = nn.Dropout(data.HP_dropout) self.droplstm = nn.Dropout(data.HP_dropout) self.word_embeddings = nn.Embedding(data.word_alphabet.size(), self.embedding_dim) self.bilstm_flag = data.HP_bilstm self.lstm_layer = data.HP_lstm_layer if data.pretrain_word_embedding is not None: self.word_embeddings.weight.data.copy_( torch.from_numpy(data.pretrain_word_embedding)) else: self.word_embeddings.weight.data.copy_( torch.from_numpy( self.random_embedding(data.word_alphabet.size(), self.embedding_dim))) # The LSTM takes word embeddings as inputs, and outputs hidden states # with dimensionality hidden_dim. if self.bilstm_flag: lstm_hidden = data.HP_hidden_dim // 2 else: lstm_hidden = data.HP_hidden_dim self.lstm = nn.LSTM(self.embedding_dim + self.char_hidden_dim, lstm_hidden, num_layers=self.lstm_layer, batch_first=True, bidirectional=self.bilstm_flag) # The linear layer that maps from hidden state space to tag space self.hidden2tag = nn.Linear(self.hidden_dim, data.label_alphabet_size) if self.gpu: self.drop = self.drop.cuda() self.droplstm = self.droplstm.cuda() self.word_embeddings = self.word_embeddings.cuda() self.lstm = self.lstm.cuda() self.hidden2tag = self.hidden2tag.cuda()
def __init__(self, args, model_params): super(WordEmbedding, self).__init__() self.gpu = args.ifgpu self.use_char = args.useChar self.char_hidden_dim = args.char_hidden_dim self.char_embedding_dim = args.char_embedding_dim self.embedding_dim = model_params.embedding_dim self.drop = nn.Dropout(args.dropout) #char Embedding if self.use_char: if args.charExtractor == "CNN": self.char_feature = CharCNN( model_params.char_alphabet.size(), model_params.pretrain_char_embedding, self.char_embedding_dim, self.char_hidden_dim, args.dropout, self.gpu) elif args.charExtractor == "LSTM": self.char_feature = CharBiLSTM( model_params.char_alphabet.size(), model_params.pretrain_char_embedding, self.char_embedding_dim, self.char_hidden_dim, args.dropout, self.gpu) else: print( "Error char feature selection, please check parameter data.char_feature_extractor (CNN/LSTM)." ) exit(0) #word Embedding self.word_embedding = nn.Embedding(model_params.word_alphabet.size(), self.embedding_dim) if model_params.pretrain_word_embedding is not None: self.word_embedding.weight.data.copy_( torch.from_numpy(model_params.pretrain_word_embedding)) else: self.word_embedding.weight.data.copy_( torch.from_numpy( self.random_embedding(model_params.word_alphabet.size(), self.embedding_dim)))
def __init__(self, data, use_position, use_cap, use_postag, use_char): super(WordRep, self).__init__() print "build word representation..." self.gpu = data.HP_gpu self.use_char = use_char self.batch_size = data.HP_batch_size self.char_hidden_dim = 0 self.char_all_feature = False if self.use_char: self.char_hidden_dim = data.HP_char_hidden_dim self.char_embedding_dim = data.char_emb_dim if data.char_feature_extractor == "CNN": self.char_feature = CharCNN(data.char_alphabet.size(), data.pretrain_char_embedding, self.char_embedding_dim, self.char_hidden_dim, data.HP_dropout, self.gpu) elif data.char_feature_extractor == "LSTM": self.char_feature = CharBiLSTM(data.char_alphabet.size(), data.pretrain_char_embedding, self.char_embedding_dim, self.char_hidden_dim, data.HP_dropout, self.gpu) elif data.char_feature_extractor == "GRU": self.char_feature = CharBiGRU(data.char_alphabet.size(), data.pretrain_char_embedding, self.char_embedding_dim, self.char_hidden_dim, data.HP_dropout, self.gpu) elif data.char_feature_extractor == "ALL": self.char_all_feature = True self.char_feature = CharCNN(data.char_alphabet.size(), data.pretrain_char_embedding, self.char_embedding_dim, self.char_hidden_dim, data.HP_dropout, self.gpu) self.char_feature_extra = CharBiLSTM( data.char_alphabet.size(), data.pretrain_char_embedding, self.char_embedding_dim, self.char_hidden_dim, data.HP_dropout, self.gpu) else: print "Error char feature selection, please check parameter data.char_feature_extractor (CNN/LSTM/GRU/ALL)." exit(0) self.embedding_dim = data.word_emb_dim self.drop = nn.Dropout(data.HP_dropout) self.word_embedding = nn.Embedding(data.word_alphabet.size(), self.embedding_dim) if data.pretrain_word_embedding is not None: self.word_embedding.weight.data.copy_( torch.from_numpy(data.pretrain_word_embedding)) else: self.word_embedding.weight.data.copy_( torch.from_numpy( self.random_embedding(data.word_alphabet.size(), self.embedding_dim))) self.feature_num = 0 self.feature_embedding_dims = data.feature_emb_dims self.feature_embeddings = nn.ModuleList() if use_cap: self.feature_num += 1 alphabet_id = data.feature_name2id['[Cap]'] emb = nn.Embedding(data.feature_alphabets[alphabet_id].size(), self.feature_embedding_dims[alphabet_id]) emb.weight.data.copy_( torch.from_numpy( self.random_embedding( data.feature_alphabets[alphabet_id].size(), self.feature_embedding_dims[alphabet_id]))) self.feature_embeddings.append(emb) if use_postag: self.feature_num += 1 alphabet_id = data.feature_name2id['[POS]'] emb = nn.Embedding(data.feature_alphabets[alphabet_id].size(), self.feature_embedding_dims[alphabet_id]) emb.weight.data.copy_( torch.from_numpy( self.random_embedding( data.feature_alphabets[alphabet_id].size(), self.feature_embedding_dims[alphabet_id]))) self.feature_embeddings.append(emb) self.use_position = use_position if self.use_position: position_alphabet_id = data.re_feature_name2id['[POSITION]'] self.position_embedding_dim = data.re_feature_emb_dims[ position_alphabet_id] self.position1_emb = nn.Embedding( data.re_feature_alphabet_sizes[position_alphabet_id], self.position_embedding_dim, data.pad_idx) self.position1_emb.weight.data.copy_( torch.from_numpy( self.random_embedding( data.re_feature_alphabet_sizes[position_alphabet_id], self.position_embedding_dim))) self.position2_emb = nn.Embedding( data.re_feature_alphabet_sizes[position_alphabet_id], self.position_embedding_dim, data.pad_idx) self.position2_emb.weight.data.copy_( torch.from_numpy( self.random_embedding( data.re_feature_alphabet_sizes[position_alphabet_id], self.position_embedding_dim))) if torch.cuda.is_available(): self.drop = self.drop.cuda(self.gpu) self.word_embedding = self.word_embedding.cuda(self.gpu) for idx in range(self.feature_num): self.feature_embeddings[idx] = self.feature_embeddings[ idx].cuda(self.gpu) if self.use_position: self.position1_emb = self.position1_emb.cuda(self.gpu) self.position2_emb = self.position2_emb.cuda(self.gpu)
class WordRep(nn.Module): def __init__(self, data, use_position, use_cap, use_postag, use_char): super(WordRep, self).__init__() print "build word representation..." self.gpu = data.HP_gpu self.use_char = use_char self.batch_size = data.HP_batch_size self.char_hidden_dim = 0 self.char_all_feature = False if self.use_char: self.char_hidden_dim = data.HP_char_hidden_dim self.char_embedding_dim = data.char_emb_dim if data.char_feature_extractor == "CNN": self.char_feature = CharCNN(data.char_alphabet.size(), data.pretrain_char_embedding, self.char_embedding_dim, self.char_hidden_dim, data.HP_dropout, self.gpu) elif data.char_feature_extractor == "LSTM": self.char_feature = CharBiLSTM(data.char_alphabet.size(), data.pretrain_char_embedding, self.char_embedding_dim, self.char_hidden_dim, data.HP_dropout, self.gpu) elif data.char_feature_extractor == "GRU": self.char_feature = CharBiGRU(data.char_alphabet.size(), data.pretrain_char_embedding, self.char_embedding_dim, self.char_hidden_dim, data.HP_dropout, self.gpu) elif data.char_feature_extractor == "ALL": self.char_all_feature = True self.char_feature = CharCNN(data.char_alphabet.size(), data.pretrain_char_embedding, self.char_embedding_dim, self.char_hidden_dim, data.HP_dropout, self.gpu) self.char_feature_extra = CharBiLSTM( data.char_alphabet.size(), data.pretrain_char_embedding, self.char_embedding_dim, self.char_hidden_dim, data.HP_dropout, self.gpu) else: print "Error char feature selection, please check parameter data.char_feature_extractor (CNN/LSTM/GRU/ALL)." exit(0) self.embedding_dim = data.word_emb_dim self.drop = nn.Dropout(data.HP_dropout) self.word_embedding = nn.Embedding(data.word_alphabet.size(), self.embedding_dim) if data.pretrain_word_embedding is not None: self.word_embedding.weight.data.copy_( torch.from_numpy(data.pretrain_word_embedding)) else: self.word_embedding.weight.data.copy_( torch.from_numpy( self.random_embedding(data.word_alphabet.size(), self.embedding_dim))) self.feature_num = 0 self.feature_embedding_dims = data.feature_emb_dims self.feature_embeddings = nn.ModuleList() if use_cap: self.feature_num += 1 alphabet_id = data.feature_name2id['[Cap]'] emb = nn.Embedding(data.feature_alphabets[alphabet_id].size(), self.feature_embedding_dims[alphabet_id]) emb.weight.data.copy_( torch.from_numpy( self.random_embedding( data.feature_alphabets[alphabet_id].size(), self.feature_embedding_dims[alphabet_id]))) self.feature_embeddings.append(emb) if use_postag: self.feature_num += 1 alphabet_id = data.feature_name2id['[POS]'] emb = nn.Embedding(data.feature_alphabets[alphabet_id].size(), self.feature_embedding_dims[alphabet_id]) emb.weight.data.copy_( torch.from_numpy( self.random_embedding( data.feature_alphabets[alphabet_id].size(), self.feature_embedding_dims[alphabet_id]))) self.feature_embeddings.append(emb) self.use_position = use_position if self.use_position: position_alphabet_id = data.re_feature_name2id['[POSITION]'] self.position_embedding_dim = data.re_feature_emb_dims[ position_alphabet_id] self.position1_emb = nn.Embedding( data.re_feature_alphabet_sizes[position_alphabet_id], self.position_embedding_dim, data.pad_idx) self.position1_emb.weight.data.copy_( torch.from_numpy( self.random_embedding( data.re_feature_alphabet_sizes[position_alphabet_id], self.position_embedding_dim))) self.position2_emb = nn.Embedding( data.re_feature_alphabet_sizes[position_alphabet_id], self.position_embedding_dim, data.pad_idx) self.position2_emb.weight.data.copy_( torch.from_numpy( self.random_embedding( data.re_feature_alphabet_sizes[position_alphabet_id], self.position_embedding_dim))) if torch.cuda.is_available(): self.drop = self.drop.cuda(self.gpu) self.word_embedding = self.word_embedding.cuda(self.gpu) for idx in range(self.feature_num): self.feature_embeddings[idx] = self.feature_embeddings[ idx].cuda(self.gpu) if self.use_position: self.position1_emb = self.position1_emb.cuda(self.gpu) self.position2_emb = self.position2_emb.cuda(self.gpu) def random_embedding(self, vocab_size, embedding_dim): pretrain_emb = np.empty([vocab_size, embedding_dim]) scale = np.sqrt(3.0 / embedding_dim) for index in range(vocab_size): pretrain_emb[index, :] = np.random.uniform(-scale, scale, [1, embedding_dim]) return pretrain_emb def forward(self, word_inputs, feature_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover, position1_inputs, position2_inputs): """ input: word_inputs: (batch_size, sent_len) features: list [(batch_size, sent_len), (batch_len, sent_len),...] word_seq_lengths: list of batch_size, (batch_size,1) char_inputs: (batch_size*sent_len, word_length) char_seq_lengths: list of whole batch_size for char, (batch_size*sent_len, 1) char_seq_recover: variable which records the char order information, used to recover char order output: Variable(batch_size, sent_len, hidden_dim) """ batch_size = word_inputs.size(0) sent_len = word_inputs.size(1) word_embs = self.word_embedding(word_inputs) word_list = [word_embs] for idx in range(self.feature_num): word_list.append(self.feature_embeddings[idx](feature_inputs[idx])) if self.use_char: ## calculate char lstm last hidden char_features = self.char_feature.get_last_hiddens( char_inputs, char_seq_lengths.cpu().numpy()) char_features = char_features[char_seq_recover] char_features = char_features.view(batch_size, sent_len, -1) ## concat word and char together word_list.append(char_features) word_embs = torch.cat([word_embs, char_features], 2) if self.char_all_feature: char_features_extra = self.char_feature_extra.get_last_hiddens( char_inputs, char_seq_lengths.cpu().numpy()) char_features_extra = char_features_extra[char_seq_recover] char_features_extra = char_features_extra.view( batch_size, sent_len, -1) ## concat word and char together word_list.append(char_features_extra) if self.use_position: position1_feature = self.position1_emb(position1_inputs) position2_feature = self.position2_emb(position2_inputs) word_list.append(position1_feature) word_list.append(position2_feature) word_embs = torch.cat(word_list, 2) word_represent = self.drop(word_embs) return word_represent
def __init__(self, data): super(WordRep, self).__init__() print "build word representation..." self.gpu = data.HP_gpu self.use_char = data.use_char self.batch_size = data.HP_batch_size self.char_hidden_dim = 0 self.char_all_feature = False if self.use_char: self.char_hidden_dim = data.HP_char_hidden_dim self.char_embedding_dim = data.char_emb_dim if data.char_seq_feature == "CNN": self.char_feature = CharCNN(data.char_alphabet.size(), self.char_embedding_dim, self.char_hidden_dim, data.HP_dropout, self.gpu) elif data.char_seq_feature == "LSTM": self.char_feature = CharBiLSTM(data.char_alphabet.size(), self.char_embedding_dim, self.char_hidden_dim, data.HP_dropout, self.gpu) elif data.char_seq_feature == "GRU": self.char_feature = CharBiGRU(data.char_alphabet.size(), self.char_embedding_dim, self.char_hidden_dim, data.HP_dropout, self.gpu) elif data.char_seq_feature == "ALL": self.char_all_feature = True self.char_feature = CharCNN(data.char_alphabet.size(), self.char_embedding_dim, self.char_hidden_dim, data.HP_dropout, self.gpu) self.char_feature_extra = CharBiLSTM(data.char_alphabet.size(), self.char_embedding_dim, self.char_hidden_dim, data.HP_dropout, self.gpu) else: print "Error char feature selection, please check parameter data.char_seq_feature (CNN/LSTM/GRU/ALL)." exit(0) self.embedding_dim = data.word_emb_dim self.drop = nn.Dropout(data.HP_dropout) self.word_embedding = nn.Embedding(data.word_alphabet.size(), self.embedding_dim) if data.pretrain_word_embedding is not None: self.word_embedding.weight.data.copy_( torch.from_numpy(data.pretrain_word_embedding)) else: self.word_embedding.weight.data.copy_( torch.from_numpy( self.random_embedding(data.word_alphabet.size(), self.embedding_dim))) self.feature_num = data.feature_num self.feature_embedding_dims = data.feature_emb_dims self.feature_embeddings = nn.ModuleList() for idx in range(self.feature_num): self.feature_embeddings.append( nn.Embedding(data.feature_alphabets[idx].size(), self.feature_embedding_dims[idx])) for idx in range(self.feature_num): if data.pretrain_feature_embeddings[idx] is not None: self.feature_embeddings[idx].weight.data.copy_( torch.from_numpy(data.pretrain_feature_embeddings[idx])) else: self.feature_embeddings[idx].weight.data.copy_( torch.from_numpy( self.random_embedding( data.feature_alphabets[idx].size(), self.feature_embedding_dims[idx]))) if self.gpu: self.drop = self.drop.cuda() self.word_embedding = self.word_embedding.cuda() for idx in range(self.feature_num): self.feature_embeddings[idx] = self.feature_embeddings[ idx].cuda()
class BiLSTM(nn.Module): def __init__(self, data): super(BiLSTM, self).__init__() print "build batched bilstm..." self.use_bigram = data.use_bigram self.gpu = data.HP_gpu self.use_char = data.HP_use_char self.use_gaz = data.HP_use_gaz self.batch_size = data.HP_batch_size self.char_hidden_dim = 0 if self.use_char: self.char_hidden_dim = data.HP_char_hidden_dim self.char_embedding_dim = data.char_emb_dim if data.char_features == "CNN": self.char_feature = CharCNN(data.char_alphabet.size(), self.char_embedding_dim, self.char_hidden_dim, data.HP_dropout, self.gpu) elif data.char_features == "LSTM": self.char_feature = CharBiLSTM(data.char_alphabet.size(), self.char_embedding_dim, self.char_hidden_dim, data.HP_dropout, self.gpu) else: print "Error char feature selection, please check parameter data.char_features (either CNN or LSTM)." exit(0) self.embedding_dim = data.word_emb_dim self.hidden_dim = data.HP_hidden_dim self.drop = nn.Dropout(data.HP_dropout) self.droplstm = nn.Dropout(data.HP_dropout) self.word_embeddings = nn.Embedding(data.word_alphabet.size(), self.embedding_dim) self.biword_embeddings = nn.Embedding(data.biword_alphabet.size(), data.biword_emb_dim) self.bilstm_flag = data.HP_bilstm self.lstm_layer = data.HP_lstm_layer # 添加radical embedding if data.pretrain_word_embedding is not None: self.word_embeddings.weight.data.copy_( torch.from_numpy(data.pretrain_word_embedding)) else: self.word_embeddings.weight.data.copy_( torch.from_numpy( self.random_embedding(data.word_alphabet.size(), self.embedding_dim))) if data.pretrain_biword_embedding is not None: self.biword_embeddings.weight.data.copy_( torch.from_numpy(data.pretrain_biword_embedding)) else: self.biword_embeddings.weight.data.copy_( torch.from_numpy( self.random_embedding(data.biword_alphabet.size(), data.biword_emb_dim))) # The LSTM takes word embeddings as inputs, and outputs hidden states with dimensionality hidden_dim. if self.bilstm_flag: lstm_hidden = data.HP_hidden_dim // 2 else: lstm_hidden = data.HP_hidden_dim lstm_input = self.embedding_dim + self.char_hidden_dim if self.use_bigram: lstm_input += data.biword_emb_dim # 添加gaz embedding self.forward_lstm = LatticeLSTM(lstm_input, lstm_hidden, data.gaz_dropout, data.gaz_alphabet.size(), data.gaz_emb_dim, data.pretrain_gaz_embedding, True, data.HP_fix_gaz_emb, self.gpu) if self.bilstm_flag: self.backward_lstm = LatticeLSTM(lstm_input, lstm_hidden, data.gaz_dropout, data.gaz_alphabet.size(), data.gaz_emb_dim, data.pretrain_gaz_embedding, False, data.HP_fix_gaz_emb, self.gpu) # self.lstm = nn.LSTM(lstm_input, lstm_hidden, num_layers=self.lstm_layer, batch_first=True, bidirectional=self.bilstm_flag) # The linear layer that maps from hidden state space to tag space self.hidden2tag = nn.Linear(data.HP_hidden_dim, data.label_alphabet_size) if self.gpu: self.drop = self.drop.cuda() self.droplstm = self.droplstm.cuda() self.word_embeddings = self.word_embeddings.cuda() self.biword_embeddings = self.biword_embeddings.cuda() self.forward_lstm = self.forward_lstm.cuda() if self.bilstm_flag: self.backward_lstm = self.backward_lstm.cuda() self.hidden2tag = self.hidden2tag.cuda() def random_embedding(self, vocab_size, embedding_dim): pretrain_emb = np.empty([vocab_size, embedding_dim]) scale = np.sqrt(3.0 / embedding_dim) for index in range(vocab_size): pretrain_emb[index, :] = np.random.uniform(-scale, scale, [1, embedding_dim]) return pretrain_emb def get_lstm_features(self, gaz_list, word_inputs, biword_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover): """ input: word_inputs: (batch_size, sent_len) gaz_list: word_seq_lengths: list of batch_size, (batch_size,1) char_inputs: (batch_size*sent_len, word_length) char_seq_lengths: list of whole batch_size for char, (batch_size*sent_len, 1) char_seq_recover: variable which records the char order information, used to recover char order output: Variable(sent_len, batch_size, hidden_dim) """ batch_size = word_inputs.size(0) sent_len = word_inputs.size(1) word_embs = self.word_embeddings(word_inputs) if self.use_bigram: biword_embs = self.biword_embeddings(biword_inputs) word_embs = torch.cat([word_embs, biword_embs], 2) if self.use_char: # calculate char lstm last hidden char_features = self.char_feature.get_last_hiddens( char_inputs, char_seq_lengths.cpu().numpy()) char_features = char_features[char_seq_recover] char_features = char_features.view(batch_size, sent_len, -1) # concat word and char together word_embs = torch.cat([word_embs, char_features], 2) word_embs = self.drop(word_embs) # packed_words = pack_padded_sequence(word_embs, word_seq_lengths.cpu().numpy(), True) hidden = None lstm_out, hidden = self.forward_lstm(word_embs, gaz_list, hidden) if self.bilstm_flag: backward_hidden = None backward_lstm_out, backward_hidden = self.backward_lstm( word_embs, gaz_list, backward_hidden) lstm_out = torch.cat([lstm_out, backward_lstm_out], 2) # lstm_out, _ = pad_packed_sequence(lstm_out) lstm_out = self.droplstm(lstm_out) return lstm_out def get_output_score(self, gaz_list, word_inputs, biword_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover): lstm_out = self.get_lstm_features(gaz_list, word_inputs, biword_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover) # lstm_out (batch_size, sent_len, hidden_dim) outputs = self.hidden2tag(lstm_out) return outputs def neg_log_likelihood_loss(self, gaz_list, word_inputs, biword_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover, batch_label, mask): # mask is not used batch_size = word_inputs.size(0) seq_len = word_inputs.size(1) total_word = batch_size * seq_len loss_function = nn.NLLLoss(ignore_index=0, size_average=False) outs = self.get_output_score(gaz_list, word_inputs, biword_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover) # outs (batch_size, seq_len, label_vocab) outs = outs.view(total_word, -1) score = F.log_softmax(outs, 1) loss = loss_function(score, batch_label.view(total_word)) _, tag_seq = torch.max(score, 1) tag_seq = tag_seq.view(batch_size, seq_len) return loss, tag_seq def forward(self, gaz_list, word_inputs, biword_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover, mask): batch_size = word_inputs.size(0) seq_len = word_inputs.size(1) total_word = batch_size * seq_len outs = self.get_output_score(gaz_list, word_inputs, biword_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover) outs = outs.view(total_word, -1) _, tag_seq = torch.max(outs, 1) tag_seq = tag_seq.view(batch_size, seq_len) # filter padded position with zero decode_seq = mask.long() * tag_seq return decode_seq
class WordEmbedding(nn.Module): def __init__(self, args, model_params): super(WordEmbedding, self).__init__() self.gpu = args.ifgpu self.use_char = args.useChar self.char_hidden_dim = args.char_hidden_dim self.char_embedding_dim = args.char_embedding_dim self.embedding_dim = model_params.embedding_dim self.drop = nn.Dropout(args.dropout) #char Embedding if self.use_char: if args.charExtractor == "CNN": self.char_feature = CharCNN( model_params.char_alphabet.size(), model_params.pretrain_char_embedding, self.char_embedding_dim, self.char_hidden_dim, args.dropout, self.gpu) elif args.charExtractor == "LSTM": self.char_feature = CharBiLSTM( model_params.char_alphabet.size(), model_params.pretrain_char_embedding, self.char_embedding_dim, self.char_hidden_dim, args.dropout, self.gpu) else: print( "Error char feature selection, please check parameter data.char_feature_extractor (CNN/LSTM)." ) exit(0) #word Embedding self.word_embedding = nn.Embedding(model_params.word_alphabet.size(), self.embedding_dim) if model_params.pretrain_word_embedding is not None: self.word_embedding.weight.data.copy_( torch.from_numpy(model_params.pretrain_word_embedding)) else: self.word_embedding.weight.data.copy_( torch.from_numpy( self.random_embedding(model_params.word_alphabet.size(), self.embedding_dim))) def random_embedding(self, vocab_size, embedding_dim): pretrain_emb = np.empty([vocab_size, embedding_dim]) scale = np.sqrt(3.0 / embedding_dim) for index in range(vocab_size): pretrain_emb[index, :] = np.random.uniform(-scale, scale, [1, embedding_dim]) return pretrain_emb def forward(self, word_inputs, char_inputs, char_seq_lengths, char_seq_recover): """ input: word_inputs: (batch_size, sent_len) char_inputs: (batch_size*sent_len, word_length) char_seq_lengths: list of whole batch_size for char, (batch_size*sent_len, 1) char_seq_recover: variable which records the char order information, used to recover char order output: Variable(batch_size, sent_len, hidden_dim) """ batch_size = word_inputs.size(0) sent_len = word_inputs.size(1) word_embs = self.word_embedding(word_inputs) word_list = [word_embs] if self.use_char: ## calculate char lstm last hidden char_features = self.char_feature.get_last_hiddens( char_inputs, char_seq_lengths.cpu().numpy()) char_features = char_features[char_seq_recover] char_features = char_features.view(batch_size, sent_len, -1) ## concat word and char together word_list.append(char_features) word_embs = torch.cat(word_list, 2) word_represent = self.drop(word_embs) return word_represent
def __init__(self, data): super(WordRep, self).__init__() print "Build word representation..." self.gpu = data.HP_gpu self.use_char = data.use_char self.use_trans = data.use_trans self.batch_size = data.HP_batch_size self.char_hidden_dim = 0 self.char_all_feature = False self.use_mapping = data.use_mapping self.mapping_func = data.mapping_func # character-level if self.use_trans: if self.use_mapping: # linear mapping self.w = nn.Linear(data.word_emb_dim, data.HP_trans_hidden_dim) # non-linear mapping:w + tanh or w + sigmoid if self.mapping_func: if self.mapping_func == "tanh": self.non_linear = nn.Tanh() elif self.mapping_func == "sigmoid": self.non_linear = nn.Sigmoid() self.trans_hidden_dim = data.HP_trans_hidden_dim self.trans_embedding_dim = data.trans_emb_dim self.trans_feature = TransBiLSTM(data.translation_alphabet.size(), self.trans_embedding_dim, self.trans_hidden_dim, data.HP_dropout, data.pretrain_trans_embedding, self.gpu) # word-level if self.use_char: self.char_hidden_dim = data.HP_char_hidden_dim self.char_embedding_dim = data.char_emb_dim if data.char_seq_feature == "CNN": self.char_feature = CharCNN(data.char_alphabet.size(), self.char_embedding_dim, self.char_hidden_dim, data.HP_dropout, data.pretrain_char_embedding, self.gpu) elif data.char_seq_feature == "LSTM": self.char_feature = CharBiLSTM(data.char_alphabet.size(), self.char_embedding_dim, self.char_hidden_dim, data.HP_dropout, data.pretrain_char_embedding, self.gpu) elif data.char_seq_feature == "GRU": self.char_feature = CharBiGRU(data.char_alphabet.size(), self.char_embedding_dim, self.char_hidden_dim, data.HP_dropout, self.gpu) elif data.char_seq_feature == "ALL": self.char_all_feature = True self.char_feature = CharCNN(data.char_alphabet.size(), self.char_embedding_dim, self.char_hidden_dim, data.HP_dropout, data.pretrain_char_embedding, self.gpu) self.char_feature_extra = CharBiLSTM(data.char_alphabet.size(), self.char_embedding_dim, self.char_hidden_dim, data.HP_dropout, self.gpu) else: print "Error char feature selection, please check parameter data.char_seq_feature (CNN/LSTM/GRU/ALL)." exit(0) # Word embedding self.embedding_dim = data.word_emb_dim self.drop = nn.Dropout(data.HP_dropout) self.word_embedding = nn.Embedding(data.word_alphabet.size(), self.embedding_dim) if data.pretrain_word_embedding is not None: self.word_embedding.weight.data.copy_(torch.from_numpy(data.pretrain_word_embedding)) else: self.word_embedding.weight.data.copy_( torch.from_numpy(self.random_embedding(data.word_alphabet.size(), self.embedding_dim))) # not use self.feature_num = data.feature_num self.feature_embedding_dims = data.feature_emb_dims self.feature_embeddings = nn.ModuleList() for idx in range(self.feature_num): self.feature_embeddings.append( nn.Embedding(data.feature_alphabets[idx].size(), self.feature_embedding_dims[idx])) for idx in range(self.feature_num): if data.pretrain_feature_embeddings[idx] is not None: self.feature_embeddings[idx].weight.data.copy_(torch.from_numpy(data.pretrain_feature_embeddings[idx])) else: self.feature_embeddings[idx].weight.data.copy_(torch.from_numpy( self.random_embedding(data.feature_alphabets[idx].size(), self.feature_embedding_dims[idx]))) if self.gpu: self.drop = self.drop.cuda() self.word_embedding = self.word_embedding.cuda() for idx in range(self.feature_num): self.feature_embeddings[idx] = self.feature_embeddings[idx].cuda()
class WordRep(nn.Module): def __init__(self, data): super(WordRep, self).__init__() print "Build word representation..." self.gpu = data.HP_gpu self.use_char = data.use_char self.use_trans = data.use_trans self.batch_size = data.HP_batch_size self.char_hidden_dim = 0 self.char_all_feature = False self.use_mapping = data.use_mapping self.mapping_func = data.mapping_func # character-level if self.use_trans: if self.use_mapping: # linear mapping self.w = nn.Linear(data.word_emb_dim, data.HP_trans_hidden_dim) # non-linear mapping:w + tanh or w + sigmoid if self.mapping_func: if self.mapping_func == "tanh": self.non_linear = nn.Tanh() elif self.mapping_func == "sigmoid": self.non_linear = nn.Sigmoid() self.trans_hidden_dim = data.HP_trans_hidden_dim self.trans_embedding_dim = data.trans_emb_dim self.trans_feature = TransBiLSTM(data.translation_alphabet.size(), self.trans_embedding_dim, self.trans_hidden_dim, data.HP_dropout, data.pretrain_trans_embedding, self.gpu) # word-level if self.use_char: self.char_hidden_dim = data.HP_char_hidden_dim self.char_embedding_dim = data.char_emb_dim if data.char_seq_feature == "CNN": self.char_feature = CharCNN(data.char_alphabet.size(), self.char_embedding_dim, self.char_hidden_dim, data.HP_dropout, data.pretrain_char_embedding, self.gpu) elif data.char_seq_feature == "LSTM": self.char_feature = CharBiLSTM(data.char_alphabet.size(), self.char_embedding_dim, self.char_hidden_dim, data.HP_dropout, data.pretrain_char_embedding, self.gpu) elif data.char_seq_feature == "GRU": self.char_feature = CharBiGRU(data.char_alphabet.size(), self.char_embedding_dim, self.char_hidden_dim, data.HP_dropout, self.gpu) elif data.char_seq_feature == "ALL": self.char_all_feature = True self.char_feature = CharCNN(data.char_alphabet.size(), self.char_embedding_dim, self.char_hidden_dim, data.HP_dropout, data.pretrain_char_embedding, self.gpu) self.char_feature_extra = CharBiLSTM(data.char_alphabet.size(), self.char_embedding_dim, self.char_hidden_dim, data.HP_dropout, self.gpu) else: print "Error char feature selection, please check parameter data.char_seq_feature (CNN/LSTM/GRU/ALL)." exit(0) # Word embedding self.embedding_dim = data.word_emb_dim self.drop = nn.Dropout(data.HP_dropout) self.word_embedding = nn.Embedding(data.word_alphabet.size(), self.embedding_dim) if data.pretrain_word_embedding is not None: self.word_embedding.weight.data.copy_(torch.from_numpy(data.pretrain_word_embedding)) else: self.word_embedding.weight.data.copy_( torch.from_numpy(self.random_embedding(data.word_alphabet.size(), self.embedding_dim))) # not use self.feature_num = data.feature_num self.feature_embedding_dims = data.feature_emb_dims self.feature_embeddings = nn.ModuleList() for idx in range(self.feature_num): self.feature_embeddings.append( nn.Embedding(data.feature_alphabets[idx].size(), self.feature_embedding_dims[idx])) for idx in range(self.feature_num): if data.pretrain_feature_embeddings[idx] is not None: self.feature_embeddings[idx].weight.data.copy_(torch.from_numpy(data.pretrain_feature_embeddings[idx])) else: self.feature_embeddings[idx].weight.data.copy_(torch.from_numpy( self.random_embedding(data.feature_alphabets[idx].size(), self.feature_embedding_dims[idx]))) if self.gpu: self.drop = self.drop.cuda() self.word_embedding = self.word_embedding.cuda() for idx in range(self.feature_num): self.feature_embeddings[idx] = self.feature_embeddings[idx].cuda() def random_embedding(self, vocab_size, embedding_dim): pretrain_emb = np.empty([vocab_size, embedding_dim]) scale = np.sqrt(3.0 / embedding_dim) for index in range(vocab_size): pretrain_emb[index, :] = np.random.uniform(-scale, scale, [1, embedding_dim]) return pretrain_emb def forward(self, word_inputs, feature_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover, trans_inputs, trans_seq_length, trans_seq_recover): """ input: word_inputs: (batch_size, sent_len) features: list [(batch_size, sent_len), (batch_len, sent_len),...] word_seq_lengths: list of batch_size, (batch_size,1) char_inputs: (batch_size*sent_len, word_length) char_seq_lengths: list of whole batch_size for char, (batch_size*sent_len, 1) char_seq_recover: variable which records the char order information, used to recover char order output: Variable(batch_size, sent_len, hidden_dim) """ batch_size = word_inputs.size(0) sent_len = word_inputs.size(1) word_embs = self.word_embedding(word_inputs) word_list = [word_embs] for idx in range(self.feature_num): word_list.append(self.feature_embeddings[idx](feature_inputs[idx])) if self.use_char: # calculate char lstm last hidden char_features, _ = self.char_feature.get_last_hiddens(char_inputs, char_seq_lengths.cpu().numpy()) char_features = char_features[char_seq_recover] char_features = char_features.view(batch_size, sent_len, -1) word_list.append(char_features) if self.char_all_feature: char_features_extra, _ = self.char_feature_extra.get_last_hiddens(char_inputs, char_seq_lengths.cpu().numpy()) char_features_extra = char_features_extra[char_seq_recover] char_features_extra = char_features_extra.view(batch_size, sent_len, -1) # concat word and char together word_list.append(char_features_extra) trans_features_wc_temp = None word_embed_mapping = None word_embs_temp = word_embs.view(batch_size * sent_len, -1) if self.use_trans: trans_features, trans_rnn_length = self.trans_feature.get_last_hiddens(trans_inputs, trans_seq_length.cpu().numpy()) if self.use_mapping: trans_features_wc = trans_features if self.gpu: trans_features_wc.cuda() trans_features_wc = trans_features_wc[trans_seq_recover] trans_inputs = trans_inputs[trans_seq_recover] for index, line in enumerate(trans_inputs): if line[0].data.cpu().numpy()[0] == 0: if self.mapping_func: trans_features_wc[index] = self.non_linear(self.w(word_embs_temp[index])) else: trans_features_wc[index] = self.w(word_embs_temp[index]) trans_features_wc_temp = trans_features_wc trans_features_wc = trans_features_wc.view(batch_size, sent_len, -1) word_list.append(trans_features_wc) if self.mapping_func: word_embed_mapping = self.non_linear(self.w(word_embs_temp)) else: word_embed_mapping = self.w(word_embs_temp) else: trans_features = trans_features[trans_seq_recover] trans_features = trans_features.view(batch_size, sent_len, -1) word_list.append(trans_features) word_embs = torch.cat(word_list, 2) word_represent = self.drop(word_embs) return word_represent, word_embed_mapping, trans_features_wc_temp
class Examiner(nn.Module): def __init__(self, data): super(Examiner, self).__init__() print "build batched bilstm..." self.gpu = data.HP_gpu self.use_char = data.HP_use_char self.batch_size = data.HP_batch_size self.char_hidden_dim = 0 self.average_batch = data.HP_average_batch_loss if self.use_char: self.char_hidden_dim = data.HP_char_hidden_dim self.char_embedding_dim = data.char_emb_dim if data.char_features == "CNN": self.char_feature = CharCNN(data.char_alphabet.size(), self.char_embedding_dim, self.char_hidden_dim, data.HP_dropout, self.gpu) elif data.char_features == "LSTM": self.char_feature = CharBiLSTM(data.char_alphabet.size(), self.char_embedding_dim, self.char_hidden_dim, data.HP_dropout, self.gpu) else: print "Error char feature selection, please check parameter data.char_features (either CNN or LSTM)." exit(0) self.embedding_dim = data.word_emb_dim self.hidden_dim = data.HP_hidden_dim self.drop = nn.Dropout(data.HP_dropout) self.droplstm = nn.Dropout(data.HP_dropout) self.word_embeddings = nn.Embedding(data.word_alphabet.size(), self.embedding_dim) self.bilstm_flag = data.HP_bilstm self.lstm_layer = data.HP_lstm_layer self.tag_size = data.label_alphabet_size if data.pretrain_word_embedding is not None: self.word_embeddings.weight.data.copy_( torch.from_numpy(data.pretrain_word_embedding)) else: self.word_embeddings.weight.data.copy_( torch.from_numpy( self.random_embedding(data.word_alphabet.size(), self.embedding_dim))) # The LSTM takes word embeddings as inputs, and outputs hidden states # with dimensionality hidden_dim. if self.bilstm_flag: lstm_hidden = data.HP_hidden_dim // 2 else: lstm_hidden = data.HP_hidden_dim self.lstm = nn.LSTM(self.embedding_dim + self.char_hidden_dim + data.label_alphabet_size, lstm_hidden, num_layers=self.lstm_layer, batch_first=True, bidirectional=self.bilstm_flag) # The linear layer that maps from hidden state space to tag space self.hidden2tag = nn.Linear(self.hidden_dim, 2) self.topk = 50 if self.gpu: self.drop = self.drop.cuda() self.droplstm = self.droplstm.cuda() self.word_embeddings = self.word_embeddings.cuda() self.lstm = self.lstm.cuda() self.hidden2tag = self.hidden2tag.cuda() def random_embedding(self, vocab_size, embedding_dim): pretrain_emb = np.empty([vocab_size, embedding_dim]) scale = np.sqrt(3.0 / embedding_dim) for index in range(vocab_size): pretrain_emb[index, :] = np.random.uniform(-scale, scale, [1, embedding_dim]) return pretrain_emb def get_lstm_features(self, word_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover, tag_prob, tag_size): """ input: word_inputs: (batch_size, sent_len) word_seq_lengths: list of batch_size, (batch_size,1) char_inputs: (batch_size*sent_len, word_length) char_seq_lengths: list of whole batch_size for char, (batch_size*sent_len, 1) char_seq_recover: variable which records the char order information, used to recover char order output: Variable(batch_size, sent_len, hidden_dim) """ batch_size = word_inputs.size(0) sent_len = word_inputs.size(1) word_embs = self.word_embeddings(word_inputs) if self.use_char: ## calculate char lstm last hidden char_features = self.char_feature.get_last_hiddens( char_inputs, char_seq_lengths.cpu().numpy()) char_features = char_features[char_seq_recover] char_features = char_features.view(batch_size, sent_len, -1) word_embs = torch.cat([word_embs, char_features], 2) tag_feature = tag_prob #torch.zeros(batch_size, sent_len, tag_size).cuda().scatter_(2,tag_seq.unsqueeze(2).cuda(),1.0) #print("tag_feature") #print(tag_feature) ## concat word and char together word_embs = self.drop(word_embs) word_embs = torch.cat([word_embs, tag_feature], 2) packed_words = pack_padded_sequence(word_embs, word_seq_lengths.cpu().numpy(), True) hidden = None packed_words.requires_grad = False lstm_out, hidden = self.lstm(packed_words, hidden) lstm_out, _ = pad_packed_sequence(lstm_out) lstm_out = self.droplstm(lstm_out.transpose(1, 0)) ## lstm_out (batch_size, seq_len, hidden_size) return lstm_out def get_output_score(self, word_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover, tag_seq): lstm_out = self.get_lstm_features(word_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover, tag_seq, self.tag_size) outputs = self.hidden2tag(lstm_out) return outputs def neg_log_likelihood_loss(self, word_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover, batch_label, tag_seq, tag_prob, mask): batch_size = word_inputs.size(0) seq_len = word_inputs.size(1) total_word = batch_size * seq_len outs = self.get_output_score(word_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover, tag_prob) # outs (batch_size, seq_len, 2) outs = outs.view(total_word, -1) score = F.softmax(outs, 1) # score: The score for choosing each position score = score[:, 0] score = score.contiguous().view(batch_size, seq_len) score = mask.float() * score #the score is always positive score = F.softmax(score, 1) if seq_len >= self.topk: topk, indices = score.topk(self.topk, dim=1) else: topk, indices = score.topk(seq_len, dim=1) tag_mask = Variable(torch.ones(batch_size, seq_len).cuda()) tag_mask = tag_mask.scatter(1, indices, 0).long() # tag mask: selected positons as mask vector topk = torch.log(topk) #topk: the topk scores info_tensor = ( 1 - (-torch.abs(Variable(tag_seq).cuda() - batch_label)).ge(0).float() ) #inequal if one _sum = info_tensor.sum().long()[0] full_loss = -torch.log(score) * ( 1 - (-torch.abs(Variable(tag_seq).cuda() - batch_label)).ge(0).float()) partial_reward = score * ( 1 - (-torch.abs(Variable(tag_seq).cuda() - batch_label)).ge(0).float()) #full_loss: the supervised loss #partial_loss: the partial labeled supervised reward return indices, tag_mask, topk.mean(1), full_loss, partial_reward
class Reformulator(nn.Module): def __init__(self, data): super(Reformulator, self).__init__() print "build batched bilstm..." self.gpu = data.HP_gpu self.use_char = data.HP_use_char self.batch_size = data.HP_batch_size self.char_hidden_dim = 0 self.average_batch = data.HP_average_batch_loss if self.use_char: self.char_hidden_dim = data.HP_char_hidden_dim self.char_embedding_dim = data.char_emb_dim if data.char_features == "CNN": self.char_feature = CharCNN(data.char_alphabet.size(), self.char_embedding_dim, self.char_hidden_dim, data.HP_dropout, self.gpu) elif data.char_features == "LSTM": self.char_feature = CharBiLSTM(data.char_alphabet.size(), self.char_embedding_dim, self.char_hidden_dim, data.HP_dropout, self.gpu) else: print "Error char feature selection, please check parameter data.char_features (either CNN or LSTM)." exit(0) self.embedding_dim = data.word_emb_dim self.hidden_dim = data.HP_hidden_dim self.drop = nn.Dropout(data.HP_dropout) self.droplstm = nn.Dropout(data.HP_dropout) self.word_embeddings = nn.Embedding(data.word_alphabet.size(), self.embedding_dim) self.bilstm_flag = data.HP_bilstm self.lstm_layer = data.HP_lstm_layer self.tag_size = data.label_alphabet_size if data.pretrain_word_embedding is not None: self.word_embeddings.weight.data.copy_( torch.from_numpy(data.pretrain_word_embedding)) else: self.word_embeddings.weight.data.copy_( torch.from_numpy( self.random_embedding(data.word_alphabet.size(), self.embedding_dim))) # The LSTM takes word embeddings as inputs, and outputs hidden states # with dimensionality hidden_dim. if self.bilstm_flag: lstm_hidden = data.HP_hidden_dim // 2 else: lstm_hidden = data.HP_hidden_dim self.lstm = nn.LSTM(self.embedding_dim + self.char_hidden_dim + data.label_alphabet_size, lstm_hidden, num_layers=self.lstm_layer, batch_first=True, bidirectional=self.bilstm_flag) # The linear layer that maps from hidden state space to tag space self.hidden2tag = nn.Linear(self.hidden_dim, 2) self.topk = 50 if self.gpu: self.drop = self.drop.cuda() self.droplstm = self.droplstm.cuda() self.word_embeddings = self.word_embeddings.cuda() self.lstm = self.lstm.cuda() self.hidden2tag = self.hidden2tag.cuda() def random_embedding(self, vocab_size, embedding_dim): pretrain_emb = np.empty([vocab_size, embedding_dim]) scale = np.sqrt(3.0 / embedding_dim) for index in range(vocab_size): pretrain_emb[index, :] = np.random.uniform(-scale, scale, [1, embedding_dim]) return pretrain_emb def get_lstm_features(self, word_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover, tag_seq, tag_size): """ input: word_inputs: (batch_size, sent_len) word_seq_lengths: list of batch_size, (batch_size,1) char_inputs: (batch_size*sent_len, word_length) char_seq_lengths: list of whole batch_size for char, (batch_size*sent_len, 1) char_seq_recover: variable which records the char order information, used to recover char order output: Variable(batch_size, sent_len, hidden_dim) """ batch_size = word_inputs.size(0) sent_len = word_inputs.size(1) word_embs = self.word_embeddings(word_inputs) if self.use_char: ## calculate char lstm last hidden char_features = self.char_feature.get_last_hiddens( char_inputs, char_seq_lengths.cpu().numpy()) char_features = char_features[char_seq_recover] char_features = char_features.view(batch_size, sent_len, -1) word_embs = torch.cat([word_embs, char_features], 2) tag_feature = torch.zeros(batch_size, sent_len, tag_size).cuda().scatter_( 2, tag_seq.unsqueeze(2).cuda(), 1.0) ## concat word and char together word_embs = self.drop(word_embs) word_embs = torch.cat([word_embs, Variable(tag_feature)], 2) packed_words = pack_padded_sequence(word_embs, word_seq_lengths.cpu().numpy(), True) hidden = None packed_words.requires_grad = False lstm_out, hidden = self.lstm(packed_words, hidden) lstm_out, _ = pad_packed_sequence(lstm_out) lstm_out = self.droplstm(lstm_out.transpose(1, 0)) ## lstm_out (batch_size, seq_len, hidden_size) return lstm_out def get_output_score(self, word_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover, tag_seq): lstm_out = self.get_lstm_features(word_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover, tag_seq, self.tag_size) outputs = self.hidden2tag(lstm_out) return outputs def neg_log_likelihood_loss(self, word_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover, batch_label, tag_seq, mask): ## mask is not used batch_size = word_inputs.size(0) seq_len = word_inputs.size(1) total_word = batch_size * seq_len #loss_function = nn.CrossEntropyLoss() outs = self.get_output_score(word_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover, tag_seq) # outs (batch_size, seq_len, 2) outs = outs.view(total_word, -1) score = F.softmax(outs, 1) #loss = loss_function(score, batch_label.view(total_word)) #if self.average_batch: # loss = loss / batch_size #print('score0',loss) _ = score[:, 0] _ = _.contiguous().view(batch_size, seq_len) #print(_) _ = mask.float() * _ #the score is always positive #print(mask) #print("_",_) _ = F.softmax(_, 1) if seq_len >= self.topk: #print(self.topk) topk, indices = _.topk(self.topk, dim=1) else: #print(seq_len) topk, indices = _.topk(seq_len, dim=1) tag_mask = Variable(torch.ones(batch_size, seq_len).cuda()) tag_mask = tag_mask.scatter(1, indices, 0).long() #print("topk",topk) #topk=F.softmax(topk, 1) topk = torch.log(topk) #print(topk) #topk=torch.log(_)*(1-(-torch.abs(Variable(tag_seq).cuda()-batch_label)).ge(0).float()) info_tensor = ( 1 - (-torch.abs(Variable(tag_seq).cuda() - batch_label)).ge(0).float() ) #inequal if one _sum = info_tensor.sum().long()[0] ans = -torch.log(_) * ( 1 - (-torch.abs(Variable(tag_seq).cuda() - batch_label)).ge(0).float()) correct = _ * ( 1 - (-torch.abs(Variable(tag_seq).cuda() - batch_label)).ge(0).float()) #ans=0.0 #for i in range(_sum): # ans+=torch.index_select(info_tensor[0],0,indices[0][:i+1]).sum()/float(i+1) #print(batch_label) #print(Variable(tag_seq).cuda()) return indices, tag_mask, topk.mean(1), ans, correct # tag_seq = autograd.Variable(torch.zeros(batch_size, seq_len)).long() # total_loss = 0 # for idx in range(batch_size): # score = F.log_softmax(outs[idx]) # loss = loss_function(score, batch_label[idx]) # # tag_seq[idx] = score.cpu().data.numpy().argmax(axis=1) # _, tag_seq[idx] = torch.max(score, 1) # total_loss += loss # return total_loss, tag_seq def forward(self, word_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover, mask, tag_seq): batch_size = word_inputs.size(0) seq_len = word_inputs.size(1) total_word = batch_size * seq_len outs = self.get_output_score(word_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover, tag_seq) outs = outs.view(total_word, -1) _, tag_seq = torch.max(outs, 1) tag_seq = tag_seq.view(batch_size, seq_len) ## filter padded position with zero decode_seq = mask.long() * tag_seq #print('outs',out) return decode_seq
class CoveBiLSTM(nn.Module): def __init__(self, data): super(CoveBiLSTM, self).__init__() print "build batched bilstm..." self.gpu = data.HP_gpu self.use_char = data.HP_use_char self.batch_size = data.HP_batch_size self.char_hidden_dim = 0 self.average_batch = data.HP_average_batch_loss if self.use_char: self.char_hidden_dim = data.HP_char_hidden_dim self.char_embedding_dim = data.char_emb_dim if data.char_features == "CNN": self.char_feature = CharCNN(data.char_alphabet.size(), self.char_embedding_dim, self.char_hidden_dim, data.HP_dropout, self.gpu) elif data.char_features == "LSTM": self.char_feature = CharBiLSTM(data.char_alphabet.size(), self.char_embedding_dim, self.char_hidden_dim, data.HP_dropout, self.gpu) else: print "Error char feature selection, please check parameter data.char_features (either CNN or LSTM)." exit(0) self.embedding_dim = data.word_emb_dim self.hidden_dim = data.HP_hidden_dim self.drop = nn.Dropout(data.HP_dropout) self.droplstm = nn.Dropout(data.HP_dropout) self.word_embeddings = nn.Embedding(data.word_alphabet.size(), self.embedding_dim) self.bilstm_flag = data.HP_bilstm self.lstm_layer = data.HP_lstm_layer if data.pretrain_word_embedding is not None: self.word_embeddings.weight.data.copy_( torch.from_numpy(data.pretrain_word_embedding)) else: self.word_embeddings.weight.data.copy_( torch.from_numpy( self.random_embedding(data.word_alphabet.size(), self.embedding_dim))) # The LSTM takes word embeddings as inputs, and outputs hidden states # with dimensionality hidden_dim. if self.bilstm_flag: lstm_hidden = data.HP_hidden_dim // 2 else: lstm_hidden = data.HP_hidden_dim self.cove = nn.LSTM(300, 300, num_layers=2, bidirectional=True, batch_first=True) self.cove.load_state_dict( model_zoo.load_url(model_urls['wmt-lstm'], model_dir=model_cache)) self.lstm = nn.LSTM(self.embedding_dim + self.char_hidden_dim + 600, lstm_hidden, num_layers=self.lstm_layer, batch_first=True, bidirectional=self.bilstm_flag) ## catner # self.lstm = nn.LSTM(self.char_hidden_dim + 300, lstm_hidden, num_layers=self.lstm_layer, # batch_first=True, bidirectional=self.bilstm_flag) # The linear layer that maps from hidden state space to tag space self.hidden2tag = nn.Linear(self.hidden_dim, data.label_alphabet_size) if self.gpu: self.drop = self.drop.cuda() self.droplstm = self.droplstm.cuda() self.word_embeddings = self.word_embeddings.cuda() self.cove = self.cove.cuda() self.lstm = self.lstm.cuda() self.hidden2tag = self.hidden2tag.cuda() def random_embedding(self, vocab_size, embedding_dim): pretrain_emb = np.empty([vocab_size, embedding_dim]) scale = np.sqrt(3.0 / embedding_dim) for index in range(vocab_size): pretrain_emb[index, :] = np.random.uniform(-scale, scale, [1, embedding_dim]) return pretrain_emb def get_lstm_features(self, word_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover): """ input: word_inputs: (batch_size, sent_len) word_seq_lengths: list of batch_size, (batch_size,1) char_inputs: (batch_size*sent_len, word_length) char_seq_lengths: list of whole batch_size for char, (batch_size*sent_len, 1) char_seq_recover: variable which records the char order information, used to recover char order output: Variable(batch_size, sent_len, hidden_dim) """ batch_size = word_inputs.size(0) sent_len = word_inputs.size(1) word_embs = self.word_embeddings(word_inputs) ## cove cove_hidden = None packed_words = pack_padded_sequence(word_embs, word_seq_lengths.cpu().numpy(), True) outputs, hidden_t = self.cove(packed_words, cove_hidden) outputs = pad_packed_sequence(outputs, batch_first=True)[0] # _, _indices = torch.sort(indices, 0) # outputs = outputs[_indices] # outputs = outputs[0] outputs.contiguous() outputs = outputs.view(batch_size, sent_len, -1) ## catner word_embs = torch.cat([word_embs, outputs], 2) # word_embs = outputs if self.use_char: ## calculate char lstm last hidden char_features = self.char_feature.get_last_hiddens( char_inputs, char_seq_lengths.cpu().numpy()) char_features = char_features[char_seq_recover] char_features = char_features.view(batch_size, sent_len, -1) ## concat word and char together word_embs = torch.cat([word_embs, char_features], 2) # cove_hidden = None # cove_words = pack_padded_sequence(word_embs, word_seq_lengths.cpu().numpy(), True) # outputs, hidden_t = self.cove(cove_words, cove_hidden) # outputs = pad_packed_sequence(outputs, batch_first=True)[0] # _, _indices = torch.sort(indices, 0) # outputs = outputs[_indices] word_embs = self.drop(word_embs) ## word_embs (batch_size, seq_len, embed_size) packed_words = pack_padded_sequence(word_embs, word_seq_lengths.cpu().numpy(), True) hidden = None lstm_out, hidden = self.lstm(packed_words, hidden) lstm_out, _ = pad_packed_sequence(lstm_out) ## lstm_out (seq_len, batch_size, hidden_size) lstm_out = self.droplstm(lstm_out.transpose(1, 0)) ## lstm_out (batch_size, seq_len, hidden_size) return lstm_out def get_output_score(self, word_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover): lstm_out = self.get_lstm_features(word_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover) outputs = self.hidden2tag(lstm_out) return outputs def neg_log_likelihood_loss(self, word_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover, batch_label, mask): ## mask is not used batch_size = word_inputs.size(0) seq_len = word_inputs.size(1) total_word = batch_size * seq_len loss_function = nn.NLLLoss(ignore_index=0, size_average=False) outs = self.get_output_score(word_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover) # outs (batch_size, seq_len, label_vocab) outs = outs.view(total_word, -1) score = F.log_softmax(outs, 1) loss = loss_function(score, batch_label.view(total_word)) if self.average_batch: loss = loss / batch_size _, tag_seq = torch.max(score, 1) tag_seq = tag_seq.view(batch_size, seq_len) return loss, tag_seq # tag_seq = autograd.Variable(torch.zeros(batch_size, seq_len)).long() # total_loss = 0 # for idx in range(batch_size): # score = F.log_softmax(outs[idx]) # loss = loss_function(score, batch_label[idx]) # # tag_seq[idx] = score.cpu().data.numpy().argmax(axis=1) # _, tag_seq[idx] = torch.max(score, 1) # total_loss += loss # return total_loss, tag_seq def forward(self, word_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover, mask): batch_size = word_inputs.size(0) seq_len = word_inputs.size(1) total_word = batch_size * seq_len outs = self.get_output_score(word_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover) outs = outs.view(total_word, -1) _, tag_seq = torch.max(outs, 1) tag_seq = tag_seq.view(batch_size, seq_len) ## filter padded position with zero decode_seq = mask.long() * tag_seq return decode_seq