コード例 #1
0
	def __init__(self, vocab, num_classes, char_alphabet):
		super(CNNCNN_SentLSTM,self).__init__()
		self.embedding = vocab.init_embed_layer()
		self.hidden_size = opt.hidden_size

		# charcnn
		self.char_hidden_dim = 10
		self.char_embedding_dim = 20
		self.char_feature = CharCNN(len(char_alphabet), None, self.char_embedding_dim, self.char_hidden_dim,
									opt.dropout, opt.gpu)

		self.embedding_size = self.embedding.weight.size(1)
		self.hidden_size = opt.hidden_size

		Ci = 1
		Co = opt.kernel_num
		Ks = [int(k) for k in list(opt.kernel_sizes) if k != ","]
		# mention char_cnn
		D = self.embedding_size + self.char_hidden_dim
		self.convs1 = nn.ModuleList([nn.Conv2d(in_channels=Ci, out_channels=Co, kernel_size=(K, D), stride=(1, 1),
											   padding=(K // 2, 0), dilation=1, bias=False) for K in Ks])
		self.mention_hidden = nn.Linear(len(Ks) * Co, self.hidden_size)

		#sentence lstm
		self.lstm_hidden = opt.hidden_size
		self.lstm = nn.GRU(self.embedding_size, self.lstm_hidden, num_layers=1, batch_first=True,
							bidirectional=True)
		self.sent_hidden_size = opt.sent_hidden_size
		self.sent_hidden = nn.Linear(self.lstm_hidden*2, self.sent_hidden_size)
		self.hidden = nn.Linear(self.hidden_size + self.sent_hidden_size, self.hidden_size)  # mention_hidden_size + sentence_hidden_size
		self.out = nn.Linear(self.hidden_size, num_classes)
		self.dropout = nn.Dropout(opt.dropout)
コード例 #2
0
    def __init__(self, vocab, num_classes, char_alphabet):

        super(CNNCNN, self).__init__()
        self.embedding = vocab.init_embed_layer()
        self.hidden_size = opt.hidden_size

        # charcnn
        self.char_hidden_dim = 10
        self.char_embedding_dim = 20
        self.char_feature = CharCNN(len(char_alphabet), None,
                                    self.char_embedding_dim,
                                    self.char_hidden_dim, opt.dropout, opt.gpu)

        D = self.embedding.weight.size(1)
        self.hidden_size = opt.hidden_size
        D = D + self.char_hidden_dim

        #mention cnn
        Ci = 1
        Co = opt.kernel_num
        Ks = [int(k) for k in list(opt.kernel_sizes) if k != ","]
        self.convs1 = nn.ModuleList([
            nn.Conv2d(in_channels=Ci,
                      out_channels=Co,
                      kernel_size=(K, D),
                      stride=(1, 1),
                      padding=(K // 2, 0),
                      dilation=1,
                      bias=False) for K in Ks
        ])

        self.hidden = nn.Linear(len(Ks) * Co, self.hidden_size)
        self.out = nn.Linear(self.hidden_size, num_classes)
        self.dropout = nn.Dropout(opt.dropout)
コード例 #3
0
	def __init__(self, vocab, num_classes, char_alphabet):
		super(AttenCNN,self).__init__()
		self.embed_size = opt.word_emb_size
		self.embedding = vocab.init_embed_layer()
		self.hidden_size = opt.hidden_size
		self.char_hidden_dim = 10
		self.char_embedding_dim = 20
		self.char_feature = CharCNN(len(char_alphabet), None, self.char_embedding_dim, self.char_hidden_dim,
									opt.dropout, opt.gpu)
		self.input_size = self.embed_size + self.char_hidden_dim

		self.W = nn.Linear(self.input_size, 1, bias=False)

		self.hidden = nn.Linear(self.input_size, self.hidden_size)
		self.out = nn.Linear(self.hidden_size, num_classes)
		self.dropout = nn.Dropout(opt.dropout)
コード例 #4
0
class AttenCNN(nn.Module):
	def __init__(self, vocab, num_classes, char_alphabet):
		super(AttenCNN,self).__init__()
		self.embed_size = opt.word_emb_size
		self.embedding = vocab.init_embed_layer()
		self.hidden_size = opt.hidden_size
		self.char_hidden_dim = 10
		self.char_embedding_dim = 20
		self.char_feature = CharCNN(len(char_alphabet), None, self.char_embedding_dim, self.char_hidden_dim,
									opt.dropout, opt.gpu)
		self.input_size = self.embed_size + self.char_hidden_dim

		self.W = nn.Linear(self.input_size, 1, bias=False)

		self.hidden = nn.Linear(self.input_size, self.hidden_size)
		self.out = nn.Linear(self.hidden_size, num_classes)
		self.dropout = nn.Dropout(opt.dropout)

	def forward(self, input, char_inputs):
		"""
		inputs: (unpacked_padded_output: batch_size x seq_len x hidden_size, lengths: batch_size)
		"""

		entity_words, _, entity_lengths, entity_seq_recover = input
		entity_words = autograd.Variable(entity_words)
		entity_words_embeds = self.embedding(entity_words)
		batch_size, max_len, _ = entity_words_embeds.size()

		char_inputs, _, char_seq_lengths, char_seq_recover = char_inputs
		char_features = self.char_feature.get_last_hiddens(char_inputs)
		char_features = char_features[char_seq_recover]
		char_features = char_features.view(batch_size, max_len, -1)

		input_embeds = torch.cat((entity_words_embeds, char_features), 2)

		flat_input = input_embeds.contiguous().view(-1, self.input_size)
		logits = self.W(flat_input).view(batch_size, max_len)
		alphas = functional.softmax(logits, dim=1)

		# computing mask
		tmp = torch.LongTensor(max_len)
		if opt.gpu >= 0 and torch.cuda.is_available():
			tmp = tmp.cuda(opt.gpu)
		idxes = torch.arange(0, max_len, out=tmp).unsqueeze(0)
		# idxes = torch.arange(0, max_len, out=torch.LongTensor(max_len)).unsqueeze(0).cuda(opt.gpu)
		mask = autograd.Variable((idxes < entity_lengths.unsqueeze(1)).float())

		alphas = alphas * mask
		alphas = alphas / torch.sum(alphas, 1).view(-1, 1)
		atten_input = torch.bmm(alphas.unsqueeze(1), input_embeds).squeeze(1)
		atten_input = self.dropout(atten_input)

		hidden = self.hidden(atten_input)
		output = self.out(hidden)
		return output
コード例 #5
0
    def __init__(self, data):
        super(WordRep, self).__init__()
        self.char_hidden_dim = data.char_hidden_dim  # 50
        self.char_embedding_dim = data.char_emb_dim  # 300
        self.char_feature = CharCNN(data.char_alphabet_size,
                                    data.pretrain_char_embedding,
                                    self.char_embedding_dim,
                                    self.char_hidden_dim, data.dropout)
        self.drop = nn.Dropout(data.dropout)
        self.word_embedding = nn.Embedding(data.word_alphabet_size,
                                           data.word_emb_dim)

        self.word_embedding.weight.data.copy_(
            torch.from_numpy(
                self.random_embedding(data.word_alphabet_size,
                                      data.word_emb_dim)))
        self.feature_embedding = nn.Embedding(data.feat_alphabet_size,
                                              data.feature_emb_dim)
        self.feature_embedding.weight.data.copy_(
            torch.from_numpy(data.pretrain_feature_embeddings))
コード例 #6
0
class WordRep(nn.Module):
    def __init__(self, data):
        super(WordRep, self).__init__()
        self.char_hidden_dim = data.char_hidden_dim  # 50
        self.char_embedding_dim = data.char_emb_dim  # 300
        self.char_feature = CharCNN(data.char_alphabet_size,
                                    data.pretrain_char_embedding,
                                    self.char_embedding_dim,
                                    self.char_hidden_dim, data.dropout)
        self.drop = nn.Dropout(data.dropout)
        self.word_embedding = nn.Embedding(data.word_alphabet_size,
                                           data.word_emb_dim)

        self.word_embedding.weight.data.copy_(
            torch.from_numpy(
                self.random_embedding(data.word_alphabet_size,
                                      data.word_emb_dim)))
        self.feature_embedding = nn.Embedding(data.feat_alphabet_size,
                                              data.feature_emb_dim)
        self.feature_embedding.weight.data.copy_(
            torch.from_numpy(data.pretrain_feature_embeddings))

    def random_embedding(self, vocab_size, embedding_dim):
        pretrain_emb = np.empty([vocab_size, embedding_dim])
        scale = np.sqrt(3.0 / embedding_dim)
        for index in range(vocab_size):
            pretrain_emb[index, :] = np.random.uniform(-scale, scale,
                                                       [1, embedding_dim])
        return pretrain_emb

    def forward(self, word_inputs, feature_inputs, word_seq_lengths,
                char_inputs, char_seq_lengths, char_seq_recover):
        batch_size = word_inputs.size(0)
        sent_len = word_inputs.size(1)
        word_embs = self.word_embedding(word_inputs)
        word_list = [word_embs]
        word_list.append(self.feature_embedding(feature_inputs))

        char_features = self.char_feature.get_last_hiddens(char_inputs)
        char_features = char_features[char_seq_recover]
        char_features = char_features.view(batch_size, sent_len, -1)
        word_list.append(char_features)

        word_embs = torch.cat(word_list, 2)
        word_represent = self.drop(word_embs)
        return word_represent
コード例 #7
0
    def __init__(self, data):
        super(WordRep, self).__init__()
        print("build word representation...")
        self.gpu = data.HP_gpu
        self.use_char = data.use_char
        self.use_trans = data.use_trans
        self.batch_size = data.HP_batch_size
        self.char_hidden_dim = 0
        self.char_all_feature = False
        self.w = nn.Linear(data.word_emb_dim, data.HP_trans_hidden_dim)

        if self.use_trans:
            self.trans_hidden_dim = data.HP_trans_hidden_dim
            self.trans_embedding_dim = data.trans_emb_dim
            self.trans_feature = TransBiLSTM(data.translation_alphabet.size(),
                                             self.trans_embedding_dim,
                                             self.trans_hidden_dim,
                                             data.HP_dropout,
                                             data.pretrain_trans_embedding,
                                             self.gpu)

        if self.use_char:
            self.char_hidden_dim = data.HP_char_hidden_dim
            self.char_embedding_dim = data.char_emb_dim
            if data.char_seq_feature == "CNN":
                self.char_feature = CharCNN(data.char_alphabet.size(),
                                            self.char_embedding_dim,
                                            self.char_hidden_dim,
                                            data.HP_dropout, self.gpu)
            elif data.char_seq_feature == "LSTM":
                self.char_feature = CharBiLSTM(data.char_alphabet.size(),
                                               self.char_embedding_dim,
                                               self.char_hidden_dim,
                                               data.HP_dropout,
                                               data.pretrain_char_embedding,
                                               self.gpu)
            elif data.char_seq_feature == "GRU":
                self.char_feature = CharBiGRU(data.char_alphabet.size(),
                                              self.char_embedding_dim,
                                              self.char_hidden_dim,
                                              data.HP_dropout, self.gpu)
            elif data.char_seq_feature == "ALL":
                self.char_all_feature = True
                self.char_feature = CharCNN(data.char_alphabet.size(),
                                            self.char_embedding_dim,
                                            self.char_hidden_dim,
                                            data.HP_dropout, self.gpu)
                self.char_feature_extra = CharBiLSTM(data.char_alphabet.size(),
                                                     self.char_embedding_dim,
                                                     self.char_hidden_dim,
                                                     data.HP_dropout, self.gpu)
            else:
                print(
                    "Error char feature selection, please check parameter data.char_seq_feature (CNN/LSTM/GRU/ALL)."
                )
                exit(0)
        self.embedding_dim = data.word_emb_dim
        self.drop = nn.Dropout(data.HP_dropout)
        self.word_embedding = nn.Embedding(data.word_alphabet.size(),
                                           self.embedding_dim)
        if data.pretrain_word_embedding is not None:
            self.word_embedding.weight.data.copy_(
                torch.from_numpy(data.pretrain_word_embedding))
        else:
            self.word_embedding.weight.data.copy_(
                torch.from_numpy(
                    self.random_embedding(data.word_alphabet.size(),
                                          self.embedding_dim)))

        self.feature_num = data.feature_num
        self.feature_embedding_dims = data.feature_emb_dims
        self.feature_embeddings = nn.ModuleList()
        for idx in range(self.feature_num):
            self.feature_embeddings.append(
                nn.Embedding(data.feature_alphabets[idx].size(),
                             self.feature_embedding_dims[idx]))
        for idx in range(self.feature_num):
            if data.pretrain_feature_embeddings[idx] is not None:
                self.feature_embeddings[idx].weight.data.copy_(
                    torch.from_numpy(data.pretrain_feature_embeddings[idx]))
            else:
                self.feature_embeddings[idx].weight.data.copy_(
                    torch.from_numpy(
                        self.random_embedding(
                            data.feature_alphabets[idx].size(),
                            self.feature_embedding_dims[idx])))

        if self.gpu:
            self.drop = self.drop.cuda()
            self.word_embedding = self.word_embedding.cuda()
            for idx in range(self.feature_num):
                self.feature_embeddings[idx] = self.feature_embeddings[
                    idx].cuda()
コード例 #8
0
class WordRep(nn.Module):
    def __init__(self, data):
        super(WordRep, self).__init__()
        print("build word representation...")
        self.gpu = data.HP_gpu
        self.use_char = data.use_char
        self.use_trans = data.use_trans
        self.batch_size = data.HP_batch_size
        self.char_hidden_dim = 0
        self.char_all_feature = False
        self.w = nn.Linear(data.word_emb_dim, data.HP_trans_hidden_dim)

        if self.use_trans:
            self.trans_hidden_dim = data.HP_trans_hidden_dim
            self.trans_embedding_dim = data.trans_emb_dim
            self.trans_feature = TransBiLSTM(data.translation_alphabet.size(),
                                             self.trans_embedding_dim,
                                             self.trans_hidden_dim,
                                             data.HP_dropout,
                                             data.pretrain_trans_embedding,
                                             self.gpu)

        if self.use_char:
            self.char_hidden_dim = data.HP_char_hidden_dim
            self.char_embedding_dim = data.char_emb_dim
            if data.char_seq_feature == "CNN":
                self.char_feature = CharCNN(data.char_alphabet.size(),
                                            self.char_embedding_dim,
                                            self.char_hidden_dim,
                                            data.HP_dropout, self.gpu)
            elif data.char_seq_feature == "LSTM":
                self.char_feature = CharBiLSTM(data.char_alphabet.size(),
                                               self.char_embedding_dim,
                                               self.char_hidden_dim,
                                               data.HP_dropout,
                                               data.pretrain_char_embedding,
                                               self.gpu)
            elif data.char_seq_feature == "GRU":
                self.char_feature = CharBiGRU(data.char_alphabet.size(),
                                              self.char_embedding_dim,
                                              self.char_hidden_dim,
                                              data.HP_dropout, self.gpu)
            elif data.char_seq_feature == "ALL":
                self.char_all_feature = True
                self.char_feature = CharCNN(data.char_alphabet.size(),
                                            self.char_embedding_dim,
                                            self.char_hidden_dim,
                                            data.HP_dropout, self.gpu)
                self.char_feature_extra = CharBiLSTM(data.char_alphabet.size(),
                                                     self.char_embedding_dim,
                                                     self.char_hidden_dim,
                                                     data.HP_dropout, self.gpu)
            else:
                print(
                    "Error char feature selection, please check parameter data.char_seq_feature (CNN/LSTM/GRU/ALL)."
                )
                exit(0)
        self.embedding_dim = data.word_emb_dim
        self.drop = nn.Dropout(data.HP_dropout)
        self.word_embedding = nn.Embedding(data.word_alphabet.size(),
                                           self.embedding_dim)
        if data.pretrain_word_embedding is not None:
            self.word_embedding.weight.data.copy_(
                torch.from_numpy(data.pretrain_word_embedding))
        else:
            self.word_embedding.weight.data.copy_(
                torch.from_numpy(
                    self.random_embedding(data.word_alphabet.size(),
                                          self.embedding_dim)))

        self.feature_num = data.feature_num
        self.feature_embedding_dims = data.feature_emb_dims
        self.feature_embeddings = nn.ModuleList()
        for idx in range(self.feature_num):
            self.feature_embeddings.append(
                nn.Embedding(data.feature_alphabets[idx].size(),
                             self.feature_embedding_dims[idx]))
        for idx in range(self.feature_num):
            if data.pretrain_feature_embeddings[idx] is not None:
                self.feature_embeddings[idx].weight.data.copy_(
                    torch.from_numpy(data.pretrain_feature_embeddings[idx]))
            else:
                self.feature_embeddings[idx].weight.data.copy_(
                    torch.from_numpy(
                        self.random_embedding(
                            data.feature_alphabets[idx].size(),
                            self.feature_embedding_dims[idx])))

        if self.gpu:
            self.drop = self.drop.cuda()
            self.word_embedding = self.word_embedding.cuda()
            for idx in range(self.feature_num):
                self.feature_embeddings[idx] = self.feature_embeddings[
                    idx].cuda()

    def random_embedding(self, vocab_size, embedding_dim):
        pretrain_emb = np.empty([vocab_size, embedding_dim])
        scale = np.sqrt(3.0 / embedding_dim)
        for index in range(vocab_size):
            pretrain_emb[index, :] = np.random.uniform(-scale, scale,
                                                       [1, embedding_dim])
        return pretrain_emb

    def forward(self, word_inputs, feature_inputs, word_seq_lengths,
                char_inputs, char_seq_lengths, char_seq_recover, trans_inputs,
                trans_seq_length, trans_seq_recover):
        """
            input:
                word_inputs: (batch_size, sent_len)
                features: list [(batch_size, sent_len), (batch_len, sent_len),...]
                word_seq_lengths: list of batch_size, (batch_size,1)
                char_inputs: (batch_size*sent_len, word_length)
                char_seq_lengths: list of whole batch_size for char, (batch_size*sent_len, 1)
                char_seq_recover: variable which records the char order information, used to recover char order
            output: 
                Variable(batch_size, sent_len, hidden_dim)
        """
        batch_size = word_inputs.size(0)
        sent_len = word_inputs.size(1)
        word_embs = self.word_embedding(word_inputs)
        word_list = [word_embs]

        for idx in range(self.feature_num):
            word_list.append(self.feature_embeddings[idx](feature_inputs[idx]))

        if self.use_char:
            # calculate char lstm last hidden
            char_features, _ = self.char_feature.get_last_hiddens(
                char_inputs,
                char_seq_lengths.cpu().numpy())
            char_features = char_features[char_seq_recover]
            char_features = char_features.view(batch_size, sent_len, -1)
            # concat word and char together
            word_list.append(char_features)
            # word_embs = torch.cat([word_embs, char_features], 2)
            if self.char_all_feature:
                char_features_extra, _ = self.char_feature_extra.get_last_hiddens(
                    char_inputs,
                    char_seq_lengths.cpu().numpy())
                char_features_extra = char_features_extra[char_seq_recover]
                char_features_extra = char_features_extra.view(
                    batch_size, sent_len, -1)
                # concat word and char together
                word_list.append(char_features_extra)

        if self.use_trans:
            trans_features, trans_rnn_length = self.trans_feature.get_last_hiddens(
                trans_inputs,
                trans_seq_length.cpu().numpy())

            trans_features_wc = trans_features
            if self.gpu:
                trans_features_wc.cuda()
            trans_features_wc = trans_features_wc[trans_seq_recover]
            trans_inputs = trans_inputs[trans_seq_recover]
            word_embs_temp = word_embs.view(batch_size * sent_len, -1)
            for index, line in enumerate(trans_inputs):
                if line[0].data.cpu().numpy()[0] == 0:
                    trans_features_wc[index] = self.w(word_embs_temp[index])

            trans_features_wc_temp = trans_features_wc
            trans_features_wc = trans_features_wc.view(batch_size, sent_len,
                                                       -1)

            word_list.append(trans_features_wc)

        word_embs = torch.cat(word_list, 2)
        word_represent = self.drop(word_embs)
        return word_represent, self.w(word_embs_temp), trans_features_wc_temp
コード例 #9
0
class CNNCNN_SentLSTM(nn.Module):
	def __init__(self, vocab, num_classes, char_alphabet):
		super(CNNCNN_SentLSTM,self).__init__()
		self.embedding = vocab.init_embed_layer()
		self.hidden_size = opt.hidden_size

		# charcnn
		self.char_hidden_dim = 10
		self.char_embedding_dim = 20
		self.char_feature = CharCNN(len(char_alphabet), None, self.char_embedding_dim, self.char_hidden_dim,
									opt.dropout, opt.gpu)

		self.embedding_size = self.embedding.weight.size(1)
		self.hidden_size = opt.hidden_size

		Ci = 1
		Co = opt.kernel_num
		Ks = [int(k) for k in list(opt.kernel_sizes) if k != ","]
		# mention char_cnn
		D = self.embedding_size + self.char_hidden_dim
		self.convs1 = nn.ModuleList([nn.Conv2d(in_channels=Ci, out_channels=Co, kernel_size=(K, D), stride=(1, 1),
											   padding=(K // 2, 0), dilation=1, bias=False) for K in Ks])
		self.mention_hidden = nn.Linear(len(Ks) * Co, self.hidden_size)

		#sentence lstm
		self.lstm_hidden = opt.hidden_size
		self.lstm = nn.GRU(self.embedding_size, self.lstm_hidden, num_layers=1, batch_first=True,
							bidirectional=True)
		self.sent_hidden_size = opt.sent_hidden_size
		self.sent_hidden = nn.Linear(self.lstm_hidden*2, self.sent_hidden_size)
		self.hidden = nn.Linear(self.hidden_size + self.sent_hidden_size, self.hidden_size)  # mention_hidden_size + sentence_hidden_size
		self.out = nn.Linear(self.hidden_size, num_classes)
		self.dropout = nn.Dropout(opt.dropout)

	def conv_and_pool(self, x, conv):
		x = F.relu(conv(x)).squeeze(3)  # (N, Co, W)
		x = F.max_pool1d(x, x.size(2)).squeeze(2)
		return x

	def forward(self, mention_inputs, char_inputs, sent_inputs):
		inputs, lengths, seq_recover = mention_inputs
		mention_embedding = self.embedding(inputs)  # (N, W, D)

		batch_size, max_len = inputs.size()
		char_inputs, char_seq_lengths, char_seq_recover = char_inputs
		char_features = self.char_feature.get_last_hiddens(char_inputs)
		char_features = char_features[char_seq_recover]
		char_features = char_features.view(batch_size, max_len, -1)
		mention_char = torch.cat((mention_embedding, char_features), 2)
		mention_char = mention_char.unsqueeze(1)  # (N, Ci, W, D)
		mention_char = [F.relu(conv(mention_char)).squeeze(3) for conv in self.convs1]  # [(N, Co, W), ...]*len(Ks)
		mention_char = [F.max_pool1d(i, i.size(2)).squeeze(2) for i in mention_char]  # [(N, Co), ...]*len(Ks)
		mention_char = torch.cat(mention_char, 1)
		mention_hidden = self.mention_hidden(mention_char)

		sent_inputs, sent_seq_lengths = sent_inputs
		sent_embedding = self.embedding(sent_inputs)
		packed_words = pack_padded_sequence(sent_embedding, sent_seq_lengths.cpu().numpy(), True)
		hidden = None
		lstm_out, hidden = self.lstm(packed_words, hidden)
		lstm_out, _ = pad_packed_sequence(lstm_out)
		hid_size = lstm_out.size(2) // 2
		sents_bilstm_out = torch.cat([lstm_out[0, :, :hid_size], lstm_out[-1, :, hid_size:]],
									 dim=1)
		sent_hidden = self.sent_hidden(sents_bilstm_out)

		x = torch.cat((mention_hidden, sent_hidden), 1)
		x = self.dropout(x)  # (N, len(Ks)*Co)
		hidden = self.hidden(x)  # (N, hidden)
		output = self.out(hidden)
		return output
コード例 #10
0
ファイル: shared_soft.py プロジェクト: foxlf823/e2e_ner_re
    def __init__(self, data, use_position, use_cap, use_postag, use_char):
        super(WordRep, self).__init__()

        self.gpu = data.HP_gpu
        self.use_char = use_char
        self.batch_size = data.HP_batch_size
        self.char_hidden_dim = 0
        self.char_all_feature = False
        if self.use_char:
            self.char_hidden_dim = data.HP_char_hidden_dim
            self.char_embedding_dim = data.char_emb_dim

            self.char_feature = CharCNN(data.char_alphabet.size(),
                                        data.pretrain_char_embedding,
                                        self.char_embedding_dim,
                                        self.char_hidden_dim, data.HP_dropout,
                                        self.gpu)

        self.embedding_dim = data.word_emb_dim
        self.drop = nn.Dropout(data.HP_dropout)
        self.word_embedding = nn.Embedding(data.word_alphabet.size(),
                                           self.embedding_dim)
        if data.pretrain_word_embedding is not None:
            self.word_embedding.weight.data.copy_(
                torch.from_numpy(data.pretrain_word_embedding))
        else:
            self.word_embedding.weight.data.copy_(
                torch.from_numpy(
                    self.random_embedding(data.word_alphabet.size(),
                                          self.embedding_dim)))

        self.feature_num = 0
        self.feature_embedding_dims = data.feature_emb_dims
        self.feature_embeddings = nn.ModuleList()

        if use_cap:
            self.feature_num += 1
            alphabet_id = data.feature_name2id['[Cap]']
            emb = nn.Embedding(data.feature_alphabets[alphabet_id].size(),
                               self.feature_embedding_dims[alphabet_id])
            emb.weight.data.copy_(
                torch.from_numpy(
                    self.random_embedding(
                        data.feature_alphabets[alphabet_id].size(),
                        self.feature_embedding_dims[alphabet_id])))
            self.feature_embeddings.append(emb)

        if use_postag:
            self.feature_num += 1
            alphabet_id = data.feature_name2id['[POS]']
            emb = nn.Embedding(data.feature_alphabets[alphabet_id].size(),
                               self.feature_embedding_dims[alphabet_id])
            emb.weight.data.copy_(
                torch.from_numpy(
                    self.random_embedding(
                        data.feature_alphabets[alphabet_id].size(),
                        self.feature_embedding_dims[alphabet_id])))
            self.feature_embeddings.append(emb)

        self.use_position = use_position
        if self.use_position:

            position_alphabet_id = data.re_feature_name2id['[POSITION]']
            self.position_embedding_dim = data.re_feature_emb_dims[
                position_alphabet_id]
            self.position1_emb = nn.Embedding(
                data.re_feature_alphabet_sizes[position_alphabet_id],
                self.position_embedding_dim, data.pad_idx)
            self.position1_emb.weight.data.copy_(
                torch.from_numpy(
                    self.random_embedding(
                        data.re_feature_alphabet_sizes[position_alphabet_id],
                        self.position_embedding_dim)))

            self.position2_emb = nn.Embedding(
                data.re_feature_alphabet_sizes[position_alphabet_id],
                self.position_embedding_dim, data.pad_idx)
            self.position2_emb.weight.data.copy_(
                torch.from_numpy(
                    self.random_embedding(
                        data.re_feature_alphabet_sizes[position_alphabet_id],
                        self.position_embedding_dim)))

        if torch.cuda.is_available():
            self.drop = self.drop.cuda(self.gpu)
            self.word_embedding = self.word_embedding.cuda(self.gpu)
            for idx in range(self.feature_num):
                self.feature_embeddings[idx] = self.feature_embeddings[
                    idx].cuda(self.gpu)
            if self.use_position:
                self.position1_emb = self.position1_emb.cuda(self.gpu)
                self.position2_emb = self.position2_emb.cuda(self.gpu)
コード例 #11
0
ファイル: shared_soft.py プロジェクト: foxlf823/e2e_ner_re
class WordRep(nn.Module):
    def __init__(self, data, use_position, use_cap, use_postag, use_char):
        super(WordRep, self).__init__()

        self.gpu = data.HP_gpu
        self.use_char = use_char
        self.batch_size = data.HP_batch_size
        self.char_hidden_dim = 0
        self.char_all_feature = False
        if self.use_char:
            self.char_hidden_dim = data.HP_char_hidden_dim
            self.char_embedding_dim = data.char_emb_dim

            self.char_feature = CharCNN(data.char_alphabet.size(),
                                        data.pretrain_char_embedding,
                                        self.char_embedding_dim,
                                        self.char_hidden_dim, data.HP_dropout,
                                        self.gpu)

        self.embedding_dim = data.word_emb_dim
        self.drop = nn.Dropout(data.HP_dropout)
        self.word_embedding = nn.Embedding(data.word_alphabet.size(),
                                           self.embedding_dim)
        if data.pretrain_word_embedding is not None:
            self.word_embedding.weight.data.copy_(
                torch.from_numpy(data.pretrain_word_embedding))
        else:
            self.word_embedding.weight.data.copy_(
                torch.from_numpy(
                    self.random_embedding(data.word_alphabet.size(),
                                          self.embedding_dim)))

        self.feature_num = 0
        self.feature_embedding_dims = data.feature_emb_dims
        self.feature_embeddings = nn.ModuleList()

        if use_cap:
            self.feature_num += 1
            alphabet_id = data.feature_name2id['[Cap]']
            emb = nn.Embedding(data.feature_alphabets[alphabet_id].size(),
                               self.feature_embedding_dims[alphabet_id])
            emb.weight.data.copy_(
                torch.from_numpy(
                    self.random_embedding(
                        data.feature_alphabets[alphabet_id].size(),
                        self.feature_embedding_dims[alphabet_id])))
            self.feature_embeddings.append(emb)

        if use_postag:
            self.feature_num += 1
            alphabet_id = data.feature_name2id['[POS]']
            emb = nn.Embedding(data.feature_alphabets[alphabet_id].size(),
                               self.feature_embedding_dims[alphabet_id])
            emb.weight.data.copy_(
                torch.from_numpy(
                    self.random_embedding(
                        data.feature_alphabets[alphabet_id].size(),
                        self.feature_embedding_dims[alphabet_id])))
            self.feature_embeddings.append(emb)

        self.use_position = use_position
        if self.use_position:

            position_alphabet_id = data.re_feature_name2id['[POSITION]']
            self.position_embedding_dim = data.re_feature_emb_dims[
                position_alphabet_id]
            self.position1_emb = nn.Embedding(
                data.re_feature_alphabet_sizes[position_alphabet_id],
                self.position_embedding_dim, data.pad_idx)
            self.position1_emb.weight.data.copy_(
                torch.from_numpy(
                    self.random_embedding(
                        data.re_feature_alphabet_sizes[position_alphabet_id],
                        self.position_embedding_dim)))

            self.position2_emb = nn.Embedding(
                data.re_feature_alphabet_sizes[position_alphabet_id],
                self.position_embedding_dim, data.pad_idx)
            self.position2_emb.weight.data.copy_(
                torch.from_numpy(
                    self.random_embedding(
                        data.re_feature_alphabet_sizes[position_alphabet_id],
                        self.position_embedding_dim)))

        if torch.cuda.is_available():
            self.drop = self.drop.cuda(self.gpu)
            self.word_embedding = self.word_embedding.cuda(self.gpu)
            for idx in range(self.feature_num):
                self.feature_embeddings[idx] = self.feature_embeddings[
                    idx].cuda(self.gpu)
            if self.use_position:
                self.position1_emb = self.position1_emb.cuda(self.gpu)
                self.position2_emb = self.position2_emb.cuda(self.gpu)

    def random_embedding(self, vocab_size, embedding_dim):
        pretrain_emb = np.empty([vocab_size, embedding_dim])
        scale = np.sqrt(3.0 / embedding_dim)
        for index in range(vocab_size):
            pretrain_emb[index, :] = np.random.uniform(-scale, scale,
                                                       [1, embedding_dim])
        return pretrain_emb

    def forward(self, word_inputs, feature_inputs, word_seq_lengths,
                char_inputs, char_seq_lengths, char_seq_recover,
                position1_inputs, position2_inputs):
        """
            input:
                word_inputs: (batch_size, sent_len)
                features: list [(batch_size, sent_len), (batch_len, sent_len),...]
                word_seq_lengths: list of batch_size, (batch_size,1)
                char_inputs: (batch_size*sent_len, word_length)
                char_seq_lengths: list of whole batch_size for char, (batch_size*sent_len, 1)
                char_seq_recover: variable which records the char order information, used to recover char order
            output:
                Variable(batch_size, sent_len, hidden_dim)
        """
        batch_size = word_inputs.size(0)
        sent_len = word_inputs.size(1)
        word_embs = self.word_embedding(word_inputs)
        word_list = [word_embs]
        for idx in range(self.feature_num):
            word_list.append(self.feature_embeddings[idx](feature_inputs[idx]))

        if self.use_char:
            ## calculate char lstm last hidden
            char_features = self.char_feature.get_last_hiddens(
                char_inputs,
                char_seq_lengths.cpu().numpy())
            char_features = char_features[char_seq_recover]
            char_features = char_features.view(batch_size, sent_len, -1)
            ## concat word and char together
            word_list.append(char_features)
            word_embs = torch.cat([word_embs, char_features], 2)
            if self.char_all_feature:
                char_features_extra = self.char_feature_extra.get_last_hiddens(
                    char_inputs,
                    char_seq_lengths.cpu().numpy())
                char_features_extra = char_features_extra[char_seq_recover]
                char_features_extra = char_features_extra.view(
                    batch_size, sent_len, -1)
                ## concat word and char together
                word_list.append(char_features_extra)

        if self.use_position:
            position1_feature = self.position1_emb(position1_inputs)
            position2_feature = self.position2_emb(position2_inputs)
            word_list.append(position1_feature)
            word_list.append(position2_feature)

        word_embs = torch.cat(word_list, 2)
        word_represent = self.drop(word_embs)
        return word_represent
コード例 #12
0
    def __init__(self, data):
        super(BiLSTM, self).__init__()
        print( "build batched bilstm...")
        self.use_bigram = data.use_bigram
        self.gpu = data.HP_gpu
        self.use_char = data.HP_use_char
        self.use_gaz = data.HP_use_gaz
        self.batch_size = data.HP_batch_size
        self.char_hidden_dim = 0
        if self.use_char:
            self.char_hidden_dim = data.HP_char_hidden_dim
            self.char_embedding_dim = data.char_emb_dim
            if data.char_features == "CNN":
                self.char_feature = CharCNN(data.char_alphabet.size(), self.char_embedding_dim, self.char_hidden_dim, data.HP_dropout, self.gpu)
            elif data.char_features == "LSTM":
                self.char_feature = CharBiLSTM(data.char_alphabet.size(), self.char_embedding_dim, self.char_hidden_dim, data.HP_dropout, self.gpu)
            else:
                print( "Error char feature selection, please check parameter data.char_features (either CNN or LSTM).")
                exit(0)
        self.embedding_dim = data.word_emb_dim
        self.hidden_dim = data.HP_hidden_dim
        self.drop = nn.Dropout(data.HP_dropout)
        self.droplstm = nn.Dropout(data.HP_dropout)
        self.word_embeddings = nn.Embedding(data.word_alphabet.size(), self.embedding_dim)
        self.biword_embeddings = nn.Embedding(data.biword_alphabet.size(), data.biword_emb_dim)
        self.bilstm_flag = data.HP_bilstm
        # self.bilstm_flag = False
        self.lstm_layer = data.HP_lstm_layer
        if data.pretrain_word_embedding is not None:
            self.word_embeddings.weight.data.copy_(torch.from_numpy(data.pretrain_word_embedding))
        else:
            self.word_embeddings.weight.data.copy_(torch.from_numpy(self.random_embedding(data.word_alphabet.size(), self.embedding_dim)))
            
        if data.pretrain_biword_embedding is not None:
            self.biword_embeddings.weight.data.copy_(torch.from_numpy(data.pretrain_biword_embedding))
        else:
            self.biword_embeddings.weight.data.copy_(torch.from_numpy(self.random_embedding(data.biword_alphabet.size(), data.biword_emb_dim)))
        # The LSTM takes word embeddings as inputs, and outputs hidden states
        # with dimensionality hidden_dim.
        
        if self.bilstm_flag:
            lstm_hidden = data.HP_hidden_dim // 2
        else:
            lstm_hidden = data.HP_hidden_dim
        lstm_input = self.embedding_dim + self.char_hidden_dim
        if self.use_bigram:
            lstm_input += data.biword_emb_dim
        print("********************use_lattice",self.use_gaz)
        if self.use_gaz:
            self.forward_lstm = LatticeLSTM(lstm_input, lstm_hidden, data.gaz_dropout, data.gaz_alphabet.size(), data.gaz_emb_dim, data.pretrain_gaz_embedding, True, data.HP_fix_gaz_emb, self.gpu)
            if self.bilstm_flag:
                self.backward_lstm = LatticeLSTM(lstm_input, lstm_hidden, data.gaz_dropout, data.gaz_alphabet.size(), data.gaz_emb_dim, data.pretrain_gaz_embedding, False, data.HP_fix_gaz_emb, self.gpu)
        else:
            self.lstm = nn.LSTM(lstm_input, lstm_hidden, num_layers=self.lstm_layer, batch_first=True, bidirectional=self.bilstm_flag)

        # The linear layer that maps from hidden state space to tag space
        self.hidden2tag = nn.Linear(data.HP_hidden_dim, data.label_alphabet_size)
        self.hidden2tag_ner = nn.Linear(data.HP_hidden_dim, data.label_alphabet_size_ner)
        self.hidden2tag_general = nn.Linear(data.HP_hidden_dim, data.label_alphabet_size_general)

        if self.gpu:
            self.drop = self.drop.cuda()
            self.droplstm = self.droplstm.cuda()
            self.word_embeddings = self.word_embeddings.cuda()
            self.biword_embeddings = self.biword_embeddings.cuda()
            if self.use_gaz:
                self.forward_lstm = self.forward_lstm.cuda()
                if self.bilstm_flag:
                    self.backward_lstm = self.backward_lstm.cuda()
            else:
                self.lstm = self.lstm.cuda()
            self.hidden2tag = self.hidden2tag.cuda()
            self.hidden2tag_ner = self.hidden2tag_ner.cuda()
            self.hidden2tag_general = self.hidden2tag_general.cuda()
コード例 #13
0
ファイル: model.py プロジェクト: Haitons/Definition_Modeling
    def __init__(self,
                 rnn_type,
                 vocab_size,
                 embedding_dim,
                 hidden_dim,
                 num_layers,
                 tie_weights,
                 dropout,
                 device,
                 pretrain_emb=None,
                 use_ch=False,
                 use_he=False,
                 use_i=False,
                 use_h=False,
                 use_g=True,
                 **kwargs):
        super(RNNModel, self).__init__()
        self.rnn_type = rnn_type
        self.n_layers = num_layers
        self.hi_dim = hidden_dim

        self.device = device
        self.use_i = use_i
        self.use_h = use_h
        self.use_g = use_g
        self.use_ch = use_ch
        self.use_he = use_he

        self.drop = nn.Dropout(dropout)

        char_hid_dim = 0
        char_len = 0
        he_dim = 0

        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        if pretrain_emb is not None:
            self.embedding.weight.data.copy_(torch.from_numpy(pretrain_emb))
        else:
            self.embedding.weight.data.copy_(
                torch.from_numpy(
                    self.random_embedding(vocab_size, embedding_dim)))
        self.embedding.weight.requires_grad = False
        # ch
        if use_ch:
            char_vocab_size = kwargs['char_vocab_size']
            char_emb_dim = kwargs['char_emb_dim']
            char_hid_dim = kwargs['char_hid_dim']
            char_len = kwargs['char_len']
            self.ch = CharCNN(char_vocab_size, None, char_emb_dim,
                              char_hid_dim, dropout).to(device)
        # he
        if use_he:
            print("Build Hypernym Embeddings...")
            he_dim = embedding_dim
            self.he = Hypernym(embedding_dim, self.embedding, device)
        concat_embedding_dim = embedding_dim + char_len * char_hid_dim + he_dim
        if self.use_i:
            embedding_dim = embedding_dim + concat_embedding_dim
        if self.use_h:
            self.h_linear = nn.Linear(concat_embedding_dim + hidden_dim,
                                      hidden_dim)
        if self.use_g:
            self.zt_linear = nn.Linear(concat_embedding_dim + hidden_dim,
                                       hidden_dim)
            self.rt_linear = nn.Linear(concat_embedding_dim + hidden_dim,
                                       concat_embedding_dim)
            self.ht_linear = nn.Linear(concat_embedding_dim + hidden_dim,
                                       hidden_dim)
        if rnn_type in ['LSTM', 'GRU']:
            self.rnn = getattr(nn, rnn_type)(embedding_dim,
                                             hidden_dim,
                                             num_layers,
                                             dropout=dropout)
        else:
            try:
                nonlinearity = {
                    'RNN_TANH': 'tanh',
                    'RNN_RELU': 'relu'
                }[rnn_type]
            except KeyError:
                raise ValueError(
                    """An invalid option for `--model` was supplied,
                                               options are ['LSTM', 'GRU', 'RNN_TANH' or 'RNN_RELU']"""
                )
            self.rnn = nn.RNN(embedding_dim,
                              hidden_dim,
                              num_layers,
                              nonlinearity=nonlinearity,
                              dropout=dropout)
        self.word2hidden = nn.Linear(concat_embedding_dim, hidden_dim)
        self.decoder = nn.Linear(hidden_dim, vocab_size)
        if tie_weights:
            if hidden_dim != embedding_dim:
                raise ValueError(
                    'When using the tied flag, nhid must be equal to emsize')
            self.decoder.weight = self.embedding.weight
        self.init_weights()
コード例 #14
0
ファイル: defseq.py プロジェクト: styxjedi/pytorch-defseq
    def __init__(self,
                 vocab_size,
                 emb_dim,
                 hid_dim,
                 device,
                 pretrain_emb=None,
                 dropout=0,
                 use_i=False,
                 use_h=False,
                 use_g=True,
                 use_ch=True,
                 use_he=False,
                 **kwargs):
        super(DefSeq, self).__init__()

        self.device = device
        self.use_i = use_i
        self.use_h = use_h
        self.use_g = use_g
        self.use_ch = use_ch
        self.use_he = use_he

        char_emb_dim = 0
        char_hid_dim = 0
        char_len = 0
        he_dim = 0

        def weight_init(m):
            if isinstance(m, nn.Embedding):
                nn.init.orthogonal_(m.weight.data)
            if isinstance(m, nn.Linear):
                nn.init.orthogonal_(m.weight.data)
                nn.init.constant_(m.bias.data, 0.5)
            if isinstance(m, nn.LSTMCell):
                nn.init.orthogonal_(m.weight_hh.data)
                nn.init.orthogonal_(m.weight_ih.data)
                nn.init.constant_(m.bias_hh.data, 0.5)
                nn.init.constant_(m.bias_ih.data, 0.5)

        self.embedding = nn.Embedding(vocab_size, emb_dim)
        if pretrain_emb is not None:
            # self.embedding.weight.data.copy_(pretrain_emb)
            self.embedding.from_pretrained(pretrain_emb, freeze=True)
            # self.embedding.weight.requires_grad = False
        else:
            weight_init(self.embedding)

        if self.use_ch:
            print("build char sequence feature extractor: CNN ...")
            char_vocab_size = kwargs['char_vocab_size']
            char_emb_dim = kwargs['char_emb_dim']
            char_hid_dim = kwargs['char_hid_dim']
            char_len = kwargs['char_len']
            self.ch = CharCNN(char_vocab_size, None, char_emb_dim,
                              char_hid_dim, dropout, device)
        if self.use_he:
            print("build Hypernym Embeddings...")
            he_dim = emb_dim
            self.he = Hypernym(emb_dim, self.embedding, device)

        final_word_dim = emb_dim + char_hid_dim * char_len + he_dim
        self.word_linear = nn.Linear(final_word_dim, hid_dim)
        weight_init(self.word_linear)
        self.s_lstm = nn.LSTMCell(emb_dim, hid_dim)
        weight_init(self.s_lstm)
        if self.use_i:
            self.i_lstm = nn.LSTMCell(final_word_dim + emb_dim, hid_dim)
            weight_init(self.i_lstm)
        if self.use_h:
            self.h_linear = nn.Linear(final_word_dim + hid_dim, hid_dim)
            weight_init(self.h_linear)
        if self.use_g:
            self.g_zt_linear = nn.Linear(final_word_dim + hid_dim, hid_dim)
            weight_init(self.g_zt_linear)
            self.g_rt_linear = nn.Linear(final_word_dim + hid_dim,
                                         final_word_dim)
            weight_init(self.g_rt_linear)
            self.g_ht_linear = nn.Linear(final_word_dim + hid_dim, hid_dim)
            weight_init(self.g_ht_linear)

        self.hidden2tag_linear = nn.Linear(hid_dim, vocab_size)
        weight_init(self.hidden2tag_linear)
        self.dropout = nn.Dropout(p=dropout)
コード例 #15
0
class CNNCNN_SentATTEN(nn.Module):
    def __init__(self, vocab, num_classes, char_alphabet):
        super(CNNCNN_SentATTEN, self).__init__()
        self.embedding = vocab.init_embed_layer()
        self.hidden_size = opt.hidden_size

        # charcnn
        self.char_hidden_dim = 10
        self.char_embedding_dim = 20
        self.char_feature = CharCNN(len(char_alphabet), None,
                                    self.char_embedding_dim,
                                    self.char_hidden_dim, opt.dropout, opt.gpu)

        self.embedding_size = self.embedding.weight.size(1)
        self.hidden_size = opt.hidden_size

        Ci = 1
        Co = opt.kernel_num
        Ks = [int(k) for k in list(opt.kernel_sizes) if k != ","]
        # mention char_cnn
        D = self.embedding_size + self.char_hidden_dim
        self.convs1 = nn.ModuleList([
            nn.Conv2d(in_channels=Ci,
                      out_channels=Co,
                      kernel_size=(K, D),
                      stride=(1, 1),
                      padding=(K // 2, 0),
                      dilation=1,
                      bias=False) for K in Ks
        ])
        self.mention_hidden = nn.Linear(len(Ks) * Co, self.hidden_size)

        #sentence atten
        self.atten_W = nn.Linear(self.embedding_size, 1, bias=False)
        self.sent_hidden_size = opt.sent_hidden_size
        self.sent_hidden = nn.Linear(self.embedding_size,
                                     self.sent_hidden_size)
        self.hidden = nn.Linear(
            self.hidden_size + self.sent_hidden_size,
            self.hidden_size)  # mention_hidden_size + sentence_hidden_size
        self.out = nn.Linear(self.hidden_size, num_classes)
        self.dropout = nn.Dropout(opt.dropout)

    def conv_and_pool(self, x, conv):
        x = F.relu(conv(x)).squeeze(3)  # (N, Co, W)
        x = F.max_pool1d(x, x.size(2)).squeeze(2)
        return x

    def forward(self, mention_inputs, char_inputs, sent_inputs):
        inputs, lengths, seq_recover = mention_inputs
        mention_embedding = self.embedding(inputs)  # (N, W, D)

        batch_size, max_len = inputs.size()
        char_inputs, char_seq_lengths, char_seq_recover = char_inputs
        char_features = self.char_feature.get_last_hiddens(char_inputs)
        char_features = char_features[char_seq_recover]
        char_features = char_features.view(batch_size, max_len, -1)
        mention_char = torch.cat((mention_embedding, char_features), 2)
        mention_char = mention_char.unsqueeze(1)  # (N, Ci, W, D)
        mention_char = [
            F.relu(conv(mention_char)).squeeze(3) for conv in self.convs1
        ]  # [(N, Co, W), ...]*len(Ks)
        mention_char = [
            F.max_pool1d(i, i.size(2)).squeeze(2) for i in mention_char
        ]  # [(N, Co), ...]*len(Ks)
        mention_char = torch.cat(mention_char, 1)
        mention_hidden = self.mention_hidden(mention_char)

        sent_inputs, sent_seq_lengths = sent_inputs
        sent_embedding = self.embedding(sent_inputs)
        sent_batch_size, sent_max_len, _ = sent_embedding.size()
        flat_input = sent_embedding.contiguous().view(-1, self.embedding_size)
        logits = self.atten_W(flat_input).view(sent_batch_size, sent_max_len)
        alphas = F.softmax(logits, dim=1)

        # computing mask
        idxes = torch.arange(
            0, sent_max_len,
            out=torch.LongTensor(sent_max_len)).unsqueeze(0).cuda(opt.gpu)
        mask = autograd.Variable(
            (idxes < sent_seq_lengths.unsqueeze(1)).float())

        alphas = alphas * mask
        # renormalize
        alphas = alphas / torch.sum(alphas, 1).view(-1, 1)
        sent_atten_input = torch.bmm(alphas.unsqueeze(1),
                                     sent_embedding).squeeze(1)
        sent_atten_input = self.dropout(sent_atten_input)
        sent_hidden = self.sent_hidden(sent_atten_input)

        x = torch.cat((mention_hidden, sent_hidden), 1)
        x = self.dropout(x)
        hidden = self.hidden(x)  # (N, hidden)
        output = self.out(hidden)
        return output
コード例 #16
0
class CNNCNN(nn.Module):
    def __init__(self, vocab, num_classes, char_alphabet):

        super(CNNCNN, self).__init__()
        self.embedding = vocab.init_embed_layer()
        self.hidden_size = opt.hidden_size

        # charcnn
        self.char_hidden_dim = 10
        self.char_embedding_dim = 20
        self.char_feature = CharCNN(len(char_alphabet), None,
                                    self.char_embedding_dim,
                                    self.char_hidden_dim, opt.dropout, opt.gpu)

        D = self.embedding.weight.size(1)
        self.hidden_size = opt.hidden_size
        D = D + self.char_hidden_dim

        #mention cnn
        Ci = 1
        Co = opt.kernel_num
        Ks = [int(k) for k in list(opt.kernel_sizes) if k != ","]
        self.convs1 = nn.ModuleList([
            nn.Conv2d(in_channels=Ci,
                      out_channels=Co,
                      kernel_size=(K, D),
                      stride=(1, 1),
                      padding=(K // 2, 0),
                      dilation=1,
                      bias=False) for K in Ks
        ])

        self.hidden = nn.Linear(len(Ks) * Co, self.hidden_size)
        self.out = nn.Linear(self.hidden_size, num_classes)
        self.dropout = nn.Dropout(opt.dropout)

    def conv_and_pool(self, x, conv):
        x = F.relu(conv(x)).squeeze(3)  # (N, Co, W)
        x = F.max_pool1d(x, x.size(2)).squeeze(2)
        return x

    def forward(self, x, char_inputs):
        inputs, lengths, seq_recover = x
        x = self.embedding(inputs)  # (N, W, D)

        batch_size, max_len = inputs.size()
        char_inputs, char_seq_lengths, char_seq_recover = char_inputs
        char_features = self.char_feature.get_last_hiddens(char_inputs)
        char_features = char_features[char_seq_recover]
        char_features = char_features.view(batch_size, max_len, -1)

        x = torch.cat((x, char_features), 2)

        x = x.unsqueeze(1)  # (N, Ci, W, D)
        x = [F.relu(conv(x)).squeeze(3)
             for conv in self.convs1]  # [(N, Co, W), ...]*len(Ks)
        x = [F.max_pool1d(i, i.size(2)).squeeze(2)
             for i in x]  # [(N, Co), ...]*len(Ks)
        x = torch.cat(x, 1)
        '''
		x1 = self.conv_and_pool(x,self.conv13) #(N,Co)
		x2 = self.conv_and_pool(x,self.conv14) #(N,Co)
		x3 = self.conv_and_pool(x,self.conv15) #(N,Co)
		x = torch.cat((x1, x2, x3), 1) # (N,len(Ks)*Co)
		'''
        x = self.dropout(x)  # (N, len(Ks)*Co)
        hidden = self.hidden(x)  # (N, hidden)
        output = self.out(hidden)
        return output