예제 #1
0
class AttenCNN(nn.Module):
	def __init__(self, vocab, num_classes, char_alphabet):
		super(AttenCNN,self).__init__()
		self.embed_size = opt.word_emb_size
		self.embedding = vocab.init_embed_layer()
		self.hidden_size = opt.hidden_size
		self.char_hidden_dim = 10
		self.char_embedding_dim = 20
		self.char_feature = CharCNN(len(char_alphabet), None, self.char_embedding_dim, self.char_hidden_dim,
									opt.dropout, opt.gpu)
		self.input_size = self.embed_size + self.char_hidden_dim

		self.W = nn.Linear(self.input_size, 1, bias=False)

		self.hidden = nn.Linear(self.input_size, self.hidden_size)
		self.out = nn.Linear(self.hidden_size, num_classes)
		self.dropout = nn.Dropout(opt.dropout)

	def forward(self, input, char_inputs):
		"""
		inputs: (unpacked_padded_output: batch_size x seq_len x hidden_size, lengths: batch_size)
		"""

		entity_words, _, entity_lengths, entity_seq_recover = input
		entity_words = autograd.Variable(entity_words)
		entity_words_embeds = self.embedding(entity_words)
		batch_size, max_len, _ = entity_words_embeds.size()

		char_inputs, _, char_seq_lengths, char_seq_recover = char_inputs
		char_features = self.char_feature.get_last_hiddens(char_inputs)
		char_features = char_features[char_seq_recover]
		char_features = char_features.view(batch_size, max_len, -1)

		input_embeds = torch.cat((entity_words_embeds, char_features), 2)

		flat_input = input_embeds.contiguous().view(-1, self.input_size)
		logits = self.W(flat_input).view(batch_size, max_len)
		alphas = functional.softmax(logits, dim=1)

		# computing mask
		tmp = torch.LongTensor(max_len)
		if opt.gpu >= 0 and torch.cuda.is_available():
			tmp = tmp.cuda(opt.gpu)
		idxes = torch.arange(0, max_len, out=tmp).unsqueeze(0)
		# idxes = torch.arange(0, max_len, out=torch.LongTensor(max_len)).unsqueeze(0).cuda(opt.gpu)
		mask = autograd.Variable((idxes < entity_lengths.unsqueeze(1)).float())

		alphas = alphas * mask
		alphas = alphas / torch.sum(alphas, 1).view(-1, 1)
		atten_input = torch.bmm(alphas.unsqueeze(1), input_embeds).squeeze(1)
		atten_input = self.dropout(atten_input)

		hidden = self.hidden(atten_input)
		output = self.out(hidden)
		return output
class WordRep(nn.Module):
    def __init__(self, data):
        super(WordRep, self).__init__()
        self.char_hidden_dim = data.char_hidden_dim  # 50
        self.char_embedding_dim = data.char_emb_dim  # 300
        self.char_feature = CharCNN(data.char_alphabet_size,
                                    data.pretrain_char_embedding,
                                    self.char_embedding_dim,
                                    self.char_hidden_dim, data.dropout)
        self.drop = nn.Dropout(data.dropout)
        self.word_embedding = nn.Embedding(data.word_alphabet_size,
                                           data.word_emb_dim)

        self.word_embedding.weight.data.copy_(
            torch.from_numpy(
                self.random_embedding(data.word_alphabet_size,
                                      data.word_emb_dim)))
        self.feature_embedding = nn.Embedding(data.feat_alphabet_size,
                                              data.feature_emb_dim)
        self.feature_embedding.weight.data.copy_(
            torch.from_numpy(data.pretrain_feature_embeddings))

    def random_embedding(self, vocab_size, embedding_dim):
        pretrain_emb = np.empty([vocab_size, embedding_dim])
        scale = np.sqrt(3.0 / embedding_dim)
        for index in range(vocab_size):
            pretrain_emb[index, :] = np.random.uniform(-scale, scale,
                                                       [1, embedding_dim])
        return pretrain_emb

    def forward(self, word_inputs, feature_inputs, word_seq_lengths,
                char_inputs, char_seq_lengths, char_seq_recover):
        batch_size = word_inputs.size(0)
        sent_len = word_inputs.size(1)
        word_embs = self.word_embedding(word_inputs)
        word_list = [word_embs]
        word_list.append(self.feature_embedding(feature_inputs))

        char_features = self.char_feature.get_last_hiddens(char_inputs)
        char_features = char_features[char_seq_recover]
        char_features = char_features.view(batch_size, sent_len, -1)
        word_list.append(char_features)

        word_embs = torch.cat(word_list, 2)
        word_represent = self.drop(word_embs)
        return word_represent
예제 #3
0
class WordRep(nn.Module):
    def __init__(self, data):
        super(WordRep, self).__init__()
        print("build word representation...")
        self.gpu = data.HP_gpu
        self.use_char = data.use_char
        self.use_trans = data.use_trans
        self.batch_size = data.HP_batch_size
        self.char_hidden_dim = 0
        self.char_all_feature = False
        self.w = nn.Linear(data.word_emb_dim, data.HP_trans_hidden_dim)

        if self.use_trans:
            self.trans_hidden_dim = data.HP_trans_hidden_dim
            self.trans_embedding_dim = data.trans_emb_dim
            self.trans_feature = TransBiLSTM(data.translation_alphabet.size(),
                                             self.trans_embedding_dim,
                                             self.trans_hidden_dim,
                                             data.HP_dropout,
                                             data.pretrain_trans_embedding,
                                             self.gpu)

        if self.use_char:
            self.char_hidden_dim = data.HP_char_hidden_dim
            self.char_embedding_dim = data.char_emb_dim
            if data.char_seq_feature == "CNN":
                self.char_feature = CharCNN(data.char_alphabet.size(),
                                            self.char_embedding_dim,
                                            self.char_hidden_dim,
                                            data.HP_dropout, self.gpu)
            elif data.char_seq_feature == "LSTM":
                self.char_feature = CharBiLSTM(data.char_alphabet.size(),
                                               self.char_embedding_dim,
                                               self.char_hidden_dim,
                                               data.HP_dropout,
                                               data.pretrain_char_embedding,
                                               self.gpu)
            elif data.char_seq_feature == "GRU":
                self.char_feature = CharBiGRU(data.char_alphabet.size(),
                                              self.char_embedding_dim,
                                              self.char_hidden_dim,
                                              data.HP_dropout, self.gpu)
            elif data.char_seq_feature == "ALL":
                self.char_all_feature = True
                self.char_feature = CharCNN(data.char_alphabet.size(),
                                            self.char_embedding_dim,
                                            self.char_hidden_dim,
                                            data.HP_dropout, self.gpu)
                self.char_feature_extra = CharBiLSTM(data.char_alphabet.size(),
                                                     self.char_embedding_dim,
                                                     self.char_hidden_dim,
                                                     data.HP_dropout, self.gpu)
            else:
                print(
                    "Error char feature selection, please check parameter data.char_seq_feature (CNN/LSTM/GRU/ALL)."
                )
                exit(0)
        self.embedding_dim = data.word_emb_dim
        self.drop = nn.Dropout(data.HP_dropout)
        self.word_embedding = nn.Embedding(data.word_alphabet.size(),
                                           self.embedding_dim)
        if data.pretrain_word_embedding is not None:
            self.word_embedding.weight.data.copy_(
                torch.from_numpy(data.pretrain_word_embedding))
        else:
            self.word_embedding.weight.data.copy_(
                torch.from_numpy(
                    self.random_embedding(data.word_alphabet.size(),
                                          self.embedding_dim)))

        self.feature_num = data.feature_num
        self.feature_embedding_dims = data.feature_emb_dims
        self.feature_embeddings = nn.ModuleList()
        for idx in range(self.feature_num):
            self.feature_embeddings.append(
                nn.Embedding(data.feature_alphabets[idx].size(),
                             self.feature_embedding_dims[idx]))
        for idx in range(self.feature_num):
            if data.pretrain_feature_embeddings[idx] is not None:
                self.feature_embeddings[idx].weight.data.copy_(
                    torch.from_numpy(data.pretrain_feature_embeddings[idx]))
            else:
                self.feature_embeddings[idx].weight.data.copy_(
                    torch.from_numpy(
                        self.random_embedding(
                            data.feature_alphabets[idx].size(),
                            self.feature_embedding_dims[idx])))

        if self.gpu:
            self.drop = self.drop.cuda()
            self.word_embedding = self.word_embedding.cuda()
            for idx in range(self.feature_num):
                self.feature_embeddings[idx] = self.feature_embeddings[
                    idx].cuda()

    def random_embedding(self, vocab_size, embedding_dim):
        pretrain_emb = np.empty([vocab_size, embedding_dim])
        scale = np.sqrt(3.0 / embedding_dim)
        for index in range(vocab_size):
            pretrain_emb[index, :] = np.random.uniform(-scale, scale,
                                                       [1, embedding_dim])
        return pretrain_emb

    def forward(self, word_inputs, feature_inputs, word_seq_lengths,
                char_inputs, char_seq_lengths, char_seq_recover, trans_inputs,
                trans_seq_length, trans_seq_recover):
        """
            input:
                word_inputs: (batch_size, sent_len)
                features: list [(batch_size, sent_len), (batch_len, sent_len),...]
                word_seq_lengths: list of batch_size, (batch_size,1)
                char_inputs: (batch_size*sent_len, word_length)
                char_seq_lengths: list of whole batch_size for char, (batch_size*sent_len, 1)
                char_seq_recover: variable which records the char order information, used to recover char order
            output: 
                Variable(batch_size, sent_len, hidden_dim)
        """
        batch_size = word_inputs.size(0)
        sent_len = word_inputs.size(1)
        word_embs = self.word_embedding(word_inputs)
        word_list = [word_embs]

        for idx in range(self.feature_num):
            word_list.append(self.feature_embeddings[idx](feature_inputs[idx]))

        if self.use_char:
            # calculate char lstm last hidden
            char_features, _ = self.char_feature.get_last_hiddens(
                char_inputs,
                char_seq_lengths.cpu().numpy())
            char_features = char_features[char_seq_recover]
            char_features = char_features.view(batch_size, sent_len, -1)
            # concat word and char together
            word_list.append(char_features)
            # word_embs = torch.cat([word_embs, char_features], 2)
            if self.char_all_feature:
                char_features_extra, _ = self.char_feature_extra.get_last_hiddens(
                    char_inputs,
                    char_seq_lengths.cpu().numpy())
                char_features_extra = char_features_extra[char_seq_recover]
                char_features_extra = char_features_extra.view(
                    batch_size, sent_len, -1)
                # concat word and char together
                word_list.append(char_features_extra)

        if self.use_trans:
            trans_features, trans_rnn_length = self.trans_feature.get_last_hiddens(
                trans_inputs,
                trans_seq_length.cpu().numpy())

            trans_features_wc = trans_features
            if self.gpu:
                trans_features_wc.cuda()
            trans_features_wc = trans_features_wc[trans_seq_recover]
            trans_inputs = trans_inputs[trans_seq_recover]
            word_embs_temp = word_embs.view(batch_size * sent_len, -1)
            for index, line in enumerate(trans_inputs):
                if line[0].data.cpu().numpy()[0] == 0:
                    trans_features_wc[index] = self.w(word_embs_temp[index])

            trans_features_wc_temp = trans_features_wc
            trans_features_wc = trans_features_wc.view(batch_size, sent_len,
                                                       -1)

            word_list.append(trans_features_wc)

        word_embs = torch.cat(word_list, 2)
        word_represent = self.drop(word_embs)
        return word_represent, self.w(word_embs_temp), trans_features_wc_temp
예제 #4
0
class CNNCNN_SentLSTM(nn.Module):
	def __init__(self, vocab, num_classes, char_alphabet):
		super(CNNCNN_SentLSTM,self).__init__()
		self.embedding = vocab.init_embed_layer()
		self.hidden_size = opt.hidden_size

		# charcnn
		self.char_hidden_dim = 10
		self.char_embedding_dim = 20
		self.char_feature = CharCNN(len(char_alphabet), None, self.char_embedding_dim, self.char_hidden_dim,
									opt.dropout, opt.gpu)

		self.embedding_size = self.embedding.weight.size(1)
		self.hidden_size = opt.hidden_size

		Ci = 1
		Co = opt.kernel_num
		Ks = [int(k) for k in list(opt.kernel_sizes) if k != ","]
		# mention char_cnn
		D = self.embedding_size + self.char_hidden_dim
		self.convs1 = nn.ModuleList([nn.Conv2d(in_channels=Ci, out_channels=Co, kernel_size=(K, D), stride=(1, 1),
											   padding=(K // 2, 0), dilation=1, bias=False) for K in Ks])
		self.mention_hidden = nn.Linear(len(Ks) * Co, self.hidden_size)

		#sentence lstm
		self.lstm_hidden = opt.hidden_size
		self.lstm = nn.GRU(self.embedding_size, self.lstm_hidden, num_layers=1, batch_first=True,
							bidirectional=True)
		self.sent_hidden_size = opt.sent_hidden_size
		self.sent_hidden = nn.Linear(self.lstm_hidden*2, self.sent_hidden_size)
		self.hidden = nn.Linear(self.hidden_size + self.sent_hidden_size, self.hidden_size)  # mention_hidden_size + sentence_hidden_size
		self.out = nn.Linear(self.hidden_size, num_classes)
		self.dropout = nn.Dropout(opt.dropout)

	def conv_and_pool(self, x, conv):
		x = F.relu(conv(x)).squeeze(3)  # (N, Co, W)
		x = F.max_pool1d(x, x.size(2)).squeeze(2)
		return x

	def forward(self, mention_inputs, char_inputs, sent_inputs):
		inputs, lengths, seq_recover = mention_inputs
		mention_embedding = self.embedding(inputs)  # (N, W, D)

		batch_size, max_len = inputs.size()
		char_inputs, char_seq_lengths, char_seq_recover = char_inputs
		char_features = self.char_feature.get_last_hiddens(char_inputs)
		char_features = char_features[char_seq_recover]
		char_features = char_features.view(batch_size, max_len, -1)
		mention_char = torch.cat((mention_embedding, char_features), 2)
		mention_char = mention_char.unsqueeze(1)  # (N, Ci, W, D)
		mention_char = [F.relu(conv(mention_char)).squeeze(3) for conv in self.convs1]  # [(N, Co, W), ...]*len(Ks)
		mention_char = [F.max_pool1d(i, i.size(2)).squeeze(2) for i in mention_char]  # [(N, Co), ...]*len(Ks)
		mention_char = torch.cat(mention_char, 1)
		mention_hidden = self.mention_hidden(mention_char)

		sent_inputs, sent_seq_lengths = sent_inputs
		sent_embedding = self.embedding(sent_inputs)
		packed_words = pack_padded_sequence(sent_embedding, sent_seq_lengths.cpu().numpy(), True)
		hidden = None
		lstm_out, hidden = self.lstm(packed_words, hidden)
		lstm_out, _ = pad_packed_sequence(lstm_out)
		hid_size = lstm_out.size(2) // 2
		sents_bilstm_out = torch.cat([lstm_out[0, :, :hid_size], lstm_out[-1, :, hid_size:]],
									 dim=1)
		sent_hidden = self.sent_hidden(sents_bilstm_out)

		x = torch.cat((mention_hidden, sent_hidden), 1)
		x = self.dropout(x)  # (N, len(Ks)*Co)
		hidden = self.hidden(x)  # (N, hidden)
		output = self.out(hidden)
		return output
예제 #5
0
class WordRep(nn.Module):
    def __init__(self, data, use_position, use_cap, use_postag, use_char):
        super(WordRep, self).__init__()

        self.gpu = data.HP_gpu
        self.use_char = use_char
        self.batch_size = data.HP_batch_size
        self.char_hidden_dim = 0
        self.char_all_feature = False
        if self.use_char:
            self.char_hidden_dim = data.HP_char_hidden_dim
            self.char_embedding_dim = data.char_emb_dim

            self.char_feature = CharCNN(data.char_alphabet.size(),
                                        data.pretrain_char_embedding,
                                        self.char_embedding_dim,
                                        self.char_hidden_dim, data.HP_dropout,
                                        self.gpu)

        self.embedding_dim = data.word_emb_dim
        self.drop = nn.Dropout(data.HP_dropout)
        self.word_embedding = nn.Embedding(data.word_alphabet.size(),
                                           self.embedding_dim)
        if data.pretrain_word_embedding is not None:
            self.word_embedding.weight.data.copy_(
                torch.from_numpy(data.pretrain_word_embedding))
        else:
            self.word_embedding.weight.data.copy_(
                torch.from_numpy(
                    self.random_embedding(data.word_alphabet.size(),
                                          self.embedding_dim)))

        self.feature_num = 0
        self.feature_embedding_dims = data.feature_emb_dims
        self.feature_embeddings = nn.ModuleList()

        if use_cap:
            self.feature_num += 1
            alphabet_id = data.feature_name2id['[Cap]']
            emb = nn.Embedding(data.feature_alphabets[alphabet_id].size(),
                               self.feature_embedding_dims[alphabet_id])
            emb.weight.data.copy_(
                torch.from_numpy(
                    self.random_embedding(
                        data.feature_alphabets[alphabet_id].size(),
                        self.feature_embedding_dims[alphabet_id])))
            self.feature_embeddings.append(emb)

        if use_postag:
            self.feature_num += 1
            alphabet_id = data.feature_name2id['[POS]']
            emb = nn.Embedding(data.feature_alphabets[alphabet_id].size(),
                               self.feature_embedding_dims[alphabet_id])
            emb.weight.data.copy_(
                torch.from_numpy(
                    self.random_embedding(
                        data.feature_alphabets[alphabet_id].size(),
                        self.feature_embedding_dims[alphabet_id])))
            self.feature_embeddings.append(emb)

        self.use_position = use_position
        if self.use_position:

            position_alphabet_id = data.re_feature_name2id['[POSITION]']
            self.position_embedding_dim = data.re_feature_emb_dims[
                position_alphabet_id]
            self.position1_emb = nn.Embedding(
                data.re_feature_alphabet_sizes[position_alphabet_id],
                self.position_embedding_dim, data.pad_idx)
            self.position1_emb.weight.data.copy_(
                torch.from_numpy(
                    self.random_embedding(
                        data.re_feature_alphabet_sizes[position_alphabet_id],
                        self.position_embedding_dim)))

            self.position2_emb = nn.Embedding(
                data.re_feature_alphabet_sizes[position_alphabet_id],
                self.position_embedding_dim, data.pad_idx)
            self.position2_emb.weight.data.copy_(
                torch.from_numpy(
                    self.random_embedding(
                        data.re_feature_alphabet_sizes[position_alphabet_id],
                        self.position_embedding_dim)))

        if torch.cuda.is_available():
            self.drop = self.drop.cuda(self.gpu)
            self.word_embedding = self.word_embedding.cuda(self.gpu)
            for idx in range(self.feature_num):
                self.feature_embeddings[idx] = self.feature_embeddings[
                    idx].cuda(self.gpu)
            if self.use_position:
                self.position1_emb = self.position1_emb.cuda(self.gpu)
                self.position2_emb = self.position2_emb.cuda(self.gpu)

    def random_embedding(self, vocab_size, embedding_dim):
        pretrain_emb = np.empty([vocab_size, embedding_dim])
        scale = np.sqrt(3.0 / embedding_dim)
        for index in range(vocab_size):
            pretrain_emb[index, :] = np.random.uniform(-scale, scale,
                                                       [1, embedding_dim])
        return pretrain_emb

    def forward(self, word_inputs, feature_inputs, word_seq_lengths,
                char_inputs, char_seq_lengths, char_seq_recover,
                position1_inputs, position2_inputs):
        """
            input:
                word_inputs: (batch_size, sent_len)
                features: list [(batch_size, sent_len), (batch_len, sent_len),...]
                word_seq_lengths: list of batch_size, (batch_size,1)
                char_inputs: (batch_size*sent_len, word_length)
                char_seq_lengths: list of whole batch_size for char, (batch_size*sent_len, 1)
                char_seq_recover: variable which records the char order information, used to recover char order
            output:
                Variable(batch_size, sent_len, hidden_dim)
        """
        batch_size = word_inputs.size(0)
        sent_len = word_inputs.size(1)
        word_embs = self.word_embedding(word_inputs)
        word_list = [word_embs]
        for idx in range(self.feature_num):
            word_list.append(self.feature_embeddings[idx](feature_inputs[idx]))

        if self.use_char:
            ## calculate char lstm last hidden
            char_features = self.char_feature.get_last_hiddens(
                char_inputs,
                char_seq_lengths.cpu().numpy())
            char_features = char_features[char_seq_recover]
            char_features = char_features.view(batch_size, sent_len, -1)
            ## concat word and char together
            word_list.append(char_features)
            word_embs = torch.cat([word_embs, char_features], 2)
            if self.char_all_feature:
                char_features_extra = self.char_feature_extra.get_last_hiddens(
                    char_inputs,
                    char_seq_lengths.cpu().numpy())
                char_features_extra = char_features_extra[char_seq_recover]
                char_features_extra = char_features_extra.view(
                    batch_size, sent_len, -1)
                ## concat word and char together
                word_list.append(char_features_extra)

        if self.use_position:
            position1_feature = self.position1_emb(position1_inputs)
            position2_feature = self.position2_emb(position2_inputs)
            word_list.append(position1_feature)
            word_list.append(position2_feature)

        word_embs = torch.cat(word_list, 2)
        word_represent = self.drop(word_embs)
        return word_represent
class CNNCNN_SentATTEN(nn.Module):
    def __init__(self, vocab, num_classes, char_alphabet):
        super(CNNCNN_SentATTEN, self).__init__()
        self.embedding = vocab.init_embed_layer()
        self.hidden_size = opt.hidden_size

        # charcnn
        self.char_hidden_dim = 10
        self.char_embedding_dim = 20
        self.char_feature = CharCNN(len(char_alphabet), None,
                                    self.char_embedding_dim,
                                    self.char_hidden_dim, opt.dropout, opt.gpu)

        self.embedding_size = self.embedding.weight.size(1)
        self.hidden_size = opt.hidden_size

        Ci = 1
        Co = opt.kernel_num
        Ks = [int(k) for k in list(opt.kernel_sizes) if k != ","]
        # mention char_cnn
        D = self.embedding_size + self.char_hidden_dim
        self.convs1 = nn.ModuleList([
            nn.Conv2d(in_channels=Ci,
                      out_channels=Co,
                      kernel_size=(K, D),
                      stride=(1, 1),
                      padding=(K // 2, 0),
                      dilation=1,
                      bias=False) for K in Ks
        ])
        self.mention_hidden = nn.Linear(len(Ks) * Co, self.hidden_size)

        #sentence atten
        self.atten_W = nn.Linear(self.embedding_size, 1, bias=False)
        self.sent_hidden_size = opt.sent_hidden_size
        self.sent_hidden = nn.Linear(self.embedding_size,
                                     self.sent_hidden_size)
        self.hidden = nn.Linear(
            self.hidden_size + self.sent_hidden_size,
            self.hidden_size)  # mention_hidden_size + sentence_hidden_size
        self.out = nn.Linear(self.hidden_size, num_classes)
        self.dropout = nn.Dropout(opt.dropout)

    def conv_and_pool(self, x, conv):
        x = F.relu(conv(x)).squeeze(3)  # (N, Co, W)
        x = F.max_pool1d(x, x.size(2)).squeeze(2)
        return x

    def forward(self, mention_inputs, char_inputs, sent_inputs):
        inputs, lengths, seq_recover = mention_inputs
        mention_embedding = self.embedding(inputs)  # (N, W, D)

        batch_size, max_len = inputs.size()
        char_inputs, char_seq_lengths, char_seq_recover = char_inputs
        char_features = self.char_feature.get_last_hiddens(char_inputs)
        char_features = char_features[char_seq_recover]
        char_features = char_features.view(batch_size, max_len, -1)
        mention_char = torch.cat((mention_embedding, char_features), 2)
        mention_char = mention_char.unsqueeze(1)  # (N, Ci, W, D)
        mention_char = [
            F.relu(conv(mention_char)).squeeze(3) for conv in self.convs1
        ]  # [(N, Co, W), ...]*len(Ks)
        mention_char = [
            F.max_pool1d(i, i.size(2)).squeeze(2) for i in mention_char
        ]  # [(N, Co), ...]*len(Ks)
        mention_char = torch.cat(mention_char, 1)
        mention_hidden = self.mention_hidden(mention_char)

        sent_inputs, sent_seq_lengths = sent_inputs
        sent_embedding = self.embedding(sent_inputs)
        sent_batch_size, sent_max_len, _ = sent_embedding.size()
        flat_input = sent_embedding.contiguous().view(-1, self.embedding_size)
        logits = self.atten_W(flat_input).view(sent_batch_size, sent_max_len)
        alphas = F.softmax(logits, dim=1)

        # computing mask
        idxes = torch.arange(
            0, sent_max_len,
            out=torch.LongTensor(sent_max_len)).unsqueeze(0).cuda(opt.gpu)
        mask = autograd.Variable(
            (idxes < sent_seq_lengths.unsqueeze(1)).float())

        alphas = alphas * mask
        # renormalize
        alphas = alphas / torch.sum(alphas, 1).view(-1, 1)
        sent_atten_input = torch.bmm(alphas.unsqueeze(1),
                                     sent_embedding).squeeze(1)
        sent_atten_input = self.dropout(sent_atten_input)
        sent_hidden = self.sent_hidden(sent_atten_input)

        x = torch.cat((mention_hidden, sent_hidden), 1)
        x = self.dropout(x)
        hidden = self.hidden(x)  # (N, hidden)
        output = self.out(hidden)
        return output
예제 #7
0
class CNNCNN(nn.Module):
    def __init__(self, vocab, num_classes, char_alphabet):

        super(CNNCNN, self).__init__()
        self.embedding = vocab.init_embed_layer()
        self.hidden_size = opt.hidden_size

        # charcnn
        self.char_hidden_dim = 10
        self.char_embedding_dim = 20
        self.char_feature = CharCNN(len(char_alphabet), None,
                                    self.char_embedding_dim,
                                    self.char_hidden_dim, opt.dropout, opt.gpu)

        D = self.embedding.weight.size(1)
        self.hidden_size = opt.hidden_size
        D = D + self.char_hidden_dim

        #mention cnn
        Ci = 1
        Co = opt.kernel_num
        Ks = [int(k) for k in list(opt.kernel_sizes) if k != ","]
        self.convs1 = nn.ModuleList([
            nn.Conv2d(in_channels=Ci,
                      out_channels=Co,
                      kernel_size=(K, D),
                      stride=(1, 1),
                      padding=(K // 2, 0),
                      dilation=1,
                      bias=False) for K in Ks
        ])

        self.hidden = nn.Linear(len(Ks) * Co, self.hidden_size)
        self.out = nn.Linear(self.hidden_size, num_classes)
        self.dropout = nn.Dropout(opt.dropout)

    def conv_and_pool(self, x, conv):
        x = F.relu(conv(x)).squeeze(3)  # (N, Co, W)
        x = F.max_pool1d(x, x.size(2)).squeeze(2)
        return x

    def forward(self, x, char_inputs):
        inputs, lengths, seq_recover = x
        x = self.embedding(inputs)  # (N, W, D)

        batch_size, max_len = inputs.size()
        char_inputs, char_seq_lengths, char_seq_recover = char_inputs
        char_features = self.char_feature.get_last_hiddens(char_inputs)
        char_features = char_features[char_seq_recover]
        char_features = char_features.view(batch_size, max_len, -1)

        x = torch.cat((x, char_features), 2)

        x = x.unsqueeze(1)  # (N, Ci, W, D)
        x = [F.relu(conv(x)).squeeze(3)
             for conv in self.convs1]  # [(N, Co, W), ...]*len(Ks)
        x = [F.max_pool1d(i, i.size(2)).squeeze(2)
             for i in x]  # [(N, Co), ...]*len(Ks)
        x = torch.cat(x, 1)
        '''
		x1 = self.conv_and_pool(x,self.conv13) #(N,Co)
		x2 = self.conv_and_pool(x,self.conv14) #(N,Co)
		x3 = self.conv_and_pool(x,self.conv15) #(N,Co)
		x = torch.cat((x1, x2, x3), 1) # (N,len(Ks)*Co)
		'''
        x = self.dropout(x)  # (N, len(Ks)*Co)
        hidden = self.hidden(x)  # (N, hidden)
        output = self.out(hidden)
        return output