Example #1
0
class BiLSTM_CRF(nn.Module):
    def __init__(self, data):
        super(BiLSTM_CRF, self).__init__()
        print "build batched lstmcrf..."
        self.gpu = data.HP_gpu
        label_size = data.label_alphabet_size
        data.label_alphabet_size += 2
        self.lstm = BiLSTM(data)
        self.crf = CRF(label_size, self.gpu)


    def neg_log_likelihood_loss(self, gaz_list, word_inputs, biword_inputs, word_seq_lengths,  char_inputs, char_seq_lengths, char_seq_recover, batch_label, mask):
        outs = self.lstm.get_output_score(gaz_list, word_inputs, biword_inputs, word_seq_lengths,  char_inputs, char_seq_lengths, char_seq_recover)
        batch_size = word_inputs.size(0)
        seq_len = word_inputs.size(1)
        total_loss = self.crf.neg_log_likelihood_loss(outs, mask, batch_label)
        scores, tag_seq = self.crf._viterbi_decode(outs, mask)
        return total_loss, tag_seq


    def forward(self, gaz_list, word_inputs, biword_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover, mask):
        outs = self.lstm.get_output_score(gaz_list, word_inputs, biword_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover)
        batch_size = word_inputs.size(0)
        seq_len = word_inputs.size(1)
        scores, tag_seq = self.crf._viterbi_decode(outs, mask)
        return tag_seq


    def get_lstm_features(self, gaz_list, word_inputs, biword_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover):
        return self.lstm.get_lstm_features(gaz_list, word_inputs, biword_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover)
        
Example #2
0
class SeqModel(nn.Module):
    def __init__(self, data):
        super(SeqModel, self).__init__()

        self.gpu = data.HP_gpu

        ## add two more label for downlayer lstm, use original label size for CRF
        label_size = data.label_alphabet_size
        # data.label_alphabet_size += 2
        # self.word_hidden = WordSequence(data, False, True, data.use_char)

        # The linear layer that maps from hidden state space to tag space
        self.hidden2tag = nn.Linear(data.HP_hidden_dim, label_size + 2)

        self.crf = CRF(label_size, self.gpu)

        if torch.cuda.is_available():
            self.hidden2tag = self.hidden2tag.cuda(self.gpu)

    # def neg_log_likelihood_loss(self, word_inputs, feature_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover, batch_label, mask):
    # outs = self.word_hidden(word_inputs,feature_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover, None, None)
    def neg_log_likelihood_loss(self, hidden, hidden_adv, batch_label, mask):
        if hidden_adv is not None:
            hidden = (hidden + hidden_adv)

        outs = self.hidden2tag(hidden)

        batch_size = hidden.size(0)

        total_loss = self.crf.neg_log_likelihood_loss(outs, mask, batch_label)
        scores, tag_seq = self.crf._viterbi_decode(outs, mask)

        total_loss = total_loss / batch_size
        return total_loss, tag_seq

    def forward(self, hidden, mask):

        outs = self.hidden2tag(hidden)

        scores, tag_seq = self.crf._viterbi_decode(outs, mask)

        return tag_seq

    # def get_lstm_features(self, word_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover):
    #     return self.word_hidden(word_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover)

    def decode_nbest(self, hidden, mask, nbest):

        outs = self.hidden2tag(hidden)

        scores, tag_seq = self.crf._viterbi_decode_nbest(outs, mask, nbest)
        return scores, tag_seq
Example #3
0
class SeqModel(nn.Module):
    def __init__(self, data, opt):
        super(SeqModel, self).__init__()

        self.gpu = opt.gpu

        ## add two more label for downlayer lstm, use original label size for CRF
        self.word_hidden = WordSequence(data, opt)
        self.crf = CRF(data.label_alphabet.size(), self.gpu)

    def neg_log_likelihood_loss(self, word_inputs, word_seq_lengths,
                                char_inputs, char_seq_lengths,
                                char_seq_recover, batch_label, mask,
                                feature_inputs, text_inputs):

        outs = self.word_hidden(word_inputs, word_seq_lengths, char_inputs,
                                char_seq_lengths, char_seq_recover,
                                feature_inputs, text_inputs)
        batch_size = word_inputs.size(0)

        total_loss = self.crf.neg_log_likelihood_loss(outs, mask, batch_label)

        scores, tag_seq = self.crf._viterbi_decode(outs, mask)

        total_loss = total_loss / batch_size

        return total_loss, tag_seq

    def forward(self, word_inputs, word_seq_lengths, char_inputs,
                char_seq_lengths, char_seq_recover, mask, feature_inputs,
                text_inputs):
        outs = self.word_hidden(word_inputs, word_seq_lengths, char_inputs,
                                char_seq_lengths, char_seq_recover,
                                feature_inputs, text_inputs)

        scores, tag_seq = self.crf._viterbi_decode(outs, mask)

        return tag_seq

    def decode_nbest(self, word_inputs, word_seq_lengths, char_inputs,
                     char_seq_lengths, char_seq_recover, mask, nbest,
                     feature_inputs, text_inputs):

        outs = self.word_hidden(word_inputs, word_seq_lengths, char_inputs,
                                char_seq_lengths, char_seq_recover,
                                feature_inputs, text_inputs)

        scores, tag_seq = self.crf._viterbi_decode_nbest(outs, mask, nbest)
        return scores, tag_seq
Example #4
0
class Net(nn.Module):
    def __init__(self, args):
        super().__init__()
        self.args = args

        self.wemb = Wemb(args)
        self.drop = nn.Dropout(args.dropout)
        odim = len(args.tag_stoi)
        if args.ner:
            self.crf = CRF(args.tag_stoi)
            odim = len(args.tag_stoi) + 2
        if not args.lstm:
            self.ffn = nn.Sequential(nn.Linear(300, 400), nn.ReLU(),
                                     nn.Dropout(args.dropout))
        else:
            self.lstm = nn.LSTM(input_size=300,
                                hidden_size=200,
                                num_layers=2,
                                bias=True,
                                batch_first=True,
                                dropout=args.dropout,
                                bidirectional=True)
        self.hid2tag = nn.Linear(400, odim)

    def forward(self, batch):
        mask = pad_sequence([torch.ones(len(x)) for x in batch], True,
                            0).byte().cuda()
        if self.args.fix:
            with torch.no_grad():
                x = self.wemb.eval()(batch)
        else:
            x = self.wemb(batch)
        x = self.drop(x)
        if not self.args.lstm:
            x = self.ffn(x)
        else:
            x = Lstm(self.lstm, x, mask.sum(-1))
        x = self.hid2tag(x)
        return x, mask

    def train_batch(self, batch, tags):
        x, mask = self.forward(batch)
        tag_ids = pad_sequence([
            torch.LongTensor([self.args.tag_stoi[t] for t in s]) for s in tags
        ], True, self.args.tag_stoi["<pad>"]).cuda()
        if not self.args.ner:
            loss = nn.functional.cross_entropy(x[mask], tag_ids[mask])
        else:
            loss = self.crf.neg_log_likelihood_loss(x, mask, tag_ids)
        return loss

    def test_batch(self, batch):
        x, mask = self.forward(batch)
        if not self.args.ner:
            path = x.max(-1)[1]
        else:
            _, path = self.crf._viterbi_decode(x, mask)
        path = [p[m].tolist() for p, m in zip(path, mask)]
        tags = [[self.args.tag_itos[i] for i in s] for s in path]
        return tags
Example #5
0
class deepBiLSTM_CRF(nn.Module):
    def __init__(self, word_HPs, char_HPs, num_labels=None, drop_final=0.5):
        super(deepBiLSTM_CRF, self).__init__()
        [word_size, word_dim, word_pre_embs, word_hidden_dim, word_dropout, word_layers, word_bidirect] = word_HPs
        if char_HPs:
            [char_size, char_dim, char_pred_embs, char_hidden_dim, char_dropout, char_layers, char_bidirect] = char_HPs
       
        self.lstm = Deep_bisltm(word_HPs, char_HPs, num_labels, att=True)
        # add two more labels for CRF
        self.crf = CRF(num_labels+2, use_cuda)
        ## add two more labels to learn hidden features for start and end transition 
        self.hidden2tag = nn.Linear(2*word_hidden_dim, num_labels+2)
        self.dropfinal = nn.Dropout(drop_final)
        if use_cuda:
            self.hidden2tag = self.hidden2tag.cuda()
            self.dropfinal = self.dropfinal.cuda()


    def NLL_loss(self, label_score, mask_tensor, label_tensor):
        batch_loss = self.crf.neg_log_likelihood_loss(label_score, mask_tensor, label_tensor)
        return batch_loss

    def inference(self, label_score, mask_tensor):
        label_prob, label_pred = self.crf._viterbi_decode(label_score, mask_tensor)
        return label_prob, label_pred

    def forward(self, word_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover):
        # (batch_size,sequence_len,hidden_dim)
        rnn_out = self.lstm.get_all_atthiddens(word_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover)
        # (batch_size,sequence_len,num_labels+2)
        label_score = self.hidden2tag(rnn_out)
        label_score = self.dropfinal(label_score)
        return label_score
Example #6
0
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = BertForTokenClassification.from_pretrained('bert-base-uncased', num_labels=len(tag_stoi) + 2)
        self.g2b = nn.Linear(300, 768)
        self.gate = nn.Linear(768, 1)
        self.crf = CRF(tag_stoi)

    def forward(self, inputs, wids, attention_mask, labels):
        b = self.net.bert.embeddings(input_ids=inputs)
        a = self.gate(b).sigmoid()
        g = self.g2b(wvec[wids].cuda())
        x = (1 - a) * b + a * g

        logits = self.net(inputs_embeds=x, attention_mask=attention_mask)[0]
        first_mask = labels != -100
        mask = lens2mask(first_mask.sum(-1)).cuda()
        logits = torch.zeros(*mask.shape, logits.shape[-1]).cuda().masked_scatter(mask[:, :, None], logits[first_mask])
        labels = torch.zeros(*mask.shape).long().cuda().masked_scatter(mask, labels[first_mask])
        return logits, mask, labels

    def train_batch(self, inputs, wids, attention_mask, labels):
        logits, mask, labels = self.forward(inputs, wids, attention_mask, labels)
        loss = self.crf.neg_log_likelihood_loss(logits, mask, labels)
        return loss

    def test_batch(self, inputs, wids, attention_mask, labels):
        logits, mask, labels = self.forward(inputs, wids, attention_mask, labels)
        _, path = self.crf._viterbi_decode(logits, mask)
        pred = [[tag_itos[i] for i in p[m]] for p, m in zip(path, mask)]
        return pred
Example #7
0
class BiLSTM_CRF(nn.Module):
    def __init__(self, data):
        super(BiLSTM_CRF, self).__init__()
        print "build batched lstmcrf..."
        self.gpu = data.HP_gpu
        self.average_batch = data.HP_average_batch_loss
        ## add two more label for downlayer lstm, use original label size for CRF
        label_size = data.label_alphabet_size
        data.label_alphabet_size += 2
        self.lstm = BiLSTM(data)
        self.crf = CRF(label_size, self.gpu)

    def neg_log_likelihood_loss(self, word_inputs, word_seq_lengths,
                                char_inputs, char_seq_lengths,
                                char_seq_recover, batch_label, mask):
        outs = self.lstm.get_output_score(word_inputs, word_seq_lengths,
                                          char_inputs, char_seq_lengths,
                                          char_seq_recover)
        batch_size = word_inputs.size(0)
        seq_len = word_inputs.size(1)
        total_loss = self.crf.neg_log_likelihood_loss(outs, mask, batch_label)
        scores, tag_seq = self.crf._viterbi_decode(outs, mask)
        if self.average_batch:
            total_loss = total_loss / batch_size
        return total_loss, tag_seq

    def forward(self, word_inputs, word_seq_lengths, char_inputs,
                char_seq_lengths, char_seq_recover, mask):
        outs = self.lstm.get_output_score(word_inputs, word_seq_lengths,
                                          char_inputs, char_seq_lengths,
                                          char_seq_recover)
        batch_size = word_inputs.size(0)
        seq_len = word_inputs.size(1)
        scores, tag_seq = self.crf._viterbi_decode(outs, mask)
        return tag_seq

    def get_lstm_features(self, word_inputs, word_seq_lengths, char_inputs,
                          char_seq_lengths, char_seq_recover):
        return self.lstm.get_lstm_features(word_inputs, word_seq_lengths,
                                           char_inputs, char_seq_lengths,
                                           char_seq_recover)
Example #8
0
class SeqModel(nn.Module):
    def __init__(self, data):
        super(SeqModel, self).__init__()
        self.use_crf = data.use_crf
        print "build network..."
        print "use_char: ", data.use_char 
        if data.use_char:
            print "char feature extractor: ", data.char_feature_extractor
        print "word feature extractor: ", data.word_feature_extractor
        print "use crf: ", self.use_crf

        self.gpu = data.HP_gpu
        self.average_batch = data.average_batch_loss
        ## add two more label for downlayer lstm, use original label size for CRF
        label_size = data.label_alphabet_size
        data.label_alphabet_size += 2
        self.word_hidden = WordSequence(data)        
        if self.use_crf:
            self.crf = CRF(label_size, self.gpu)


    def neg_log_likelihood_loss(self, word_inputs, feature_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover, batch_label, mask):
        outs = self.word_hidden(word_inputs,feature_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover)
        batch_size = word_inputs.size(0)
        seq_len = word_inputs.size(1)
        if self.use_crf:
            total_loss = self.crf.neg_log_likelihood_loss(outs, mask, batch_label)
            scores, tag_seq = self.crf._viterbi_decode(outs, mask)
        else:
            loss_function = nn.NLLLoss(ignore_index=0, size_average=False)
            outs = outs.view(batch_size * seq_len, -1)
            score = F.log_softmax(outs, 1)
            total_loss = loss_function(score, batch_label.view(batch_size * seq_len))
            _, tag_seq  = torch.max(score, 1)
            tag_seq = tag_seq.view(batch_size, seq_len)
        if self.average_batch:
            total_loss = total_loss / batch_size
        return total_loss, tag_seq


    def forward(self, word_inputs, feature_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover, mask):
        outs = self.word_hidden(word_inputs,feature_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover)
        batch_size = word_inputs.size(0)
        seq_len = word_inputs.size(1)
        if self.use_crf:
            scores, tag_seq = self.crf._viterbi_decode(outs, mask)
        else:
            outs = outs.view(batch_size * seq_len, -1)
            _, tag_seq  = torch.max(outs, 1)
            tag_seq = tag_seq.view(batch_size, seq_len)
            ## filter padded position with zero
            tag_seq = mask.long() * tag_seq
        return tag_seq


    # def get_lstm_features(self, word_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover):
    #     return self.word_hidden(word_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover)


    def decode_nbest(self, word_inputs, feature_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover, mask, nbest):
        if not self.use_crf:
            print "Nbest output is currently supported only for CRF! Exit..."
            exit(0)
        outs = self.word_hidden(word_inputs,feature_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover)
        batch_size = word_inputs.size(0)
        seq_len = word_inputs.size(1)
        scores, tag_seq = self.crf._viterbi_decode_nbest(outs, mask, nbest)
        return scores, tag_seq

        
Example #9
0
class NamedEntityRecog(nn.Module):
    def __init__(self,
                 vocab_size,
                 word_embed_dim,
                 word_hidden_dim,
                 alphabet_size,
                 char_embedding_dim,
                 char_hidden_dim,
                 feature_extractor,
                 tag_num,
                 dropout,
                 pretrain_embed=None,
                 use_char=False,
                 use_crf=False,
                 use_gpu=False):
        super(NamedEntityRecog, self).__init__()
        self.use_crf = use_crf
        self.use_char = use_char
        self.drop = nn.Dropout(dropout)
        self.input_dim = word_embed_dim
        self.feature_extractor = feature_extractor

        self.embeds = nn.Embedding(vocab_size, word_embed_dim, padding_idx=0)
        if pretrain_embed is not None:
            self.embeds.weight.data.copy_(torch.from_numpy(pretrain_embed))
        else:
            self.embeds.weight.data.copy_(
                torch.from_numpy(
                    self.random_embedding(vocab_size, word_embed_dim)))

        if self.use_char:
            self.input_dim += char_hidden_dim
            self.char_feature = CharCNN(alphabet_size, char_embedding_dim,
                                        char_hidden_dim, dropout)

        if feature_extractor == 'lstm':
            self.lstm = nn.LSTM(self.input_dim,
                                word_hidden_dim,
                                batch_first=True,
                                bidirectional=True)
        else:
            self.word2cnn = nn.Linear(self.input_dim, word_hidden_dim * 2)
            self.cnn_list = list()
            for _ in range(4):
                self.cnn_list.append(
                    nn.Conv1d(word_hidden_dim * 2,
                              word_hidden_dim * 2,
                              kernel_size=3,
                              padding=1))
                self.cnn_list.append(nn.ReLU())
                self.cnn_list.append(nn.Dropout(dropout))
                self.cnn_list.append(nn.BatchNorm1d(word_hidden_dim * 2))
            self.cnn = nn.Sequential(*self.cnn_list)

        if self.use_crf:
            self.hidden2tag = nn.Linear(word_hidden_dim * 2, tag_num + 2)
            self.crf = CRF(tag_num, use_gpu)
        else:
            self.hidden2tag = nn.Linear(word_hidden_dim * 2, tag_num)

    def random_embedding(self, vocab_size, embedding_dim):
        pretrain_emb = np.empty([vocab_size, embedding_dim])
        scale = np.sqrt(3.0 / embedding_dim)
        for index in range(1, vocab_size):
            pretrain_emb[index, :] = np.random.uniform(-scale, scale,
                                                       [1, embedding_dim])
        return pretrain_emb

    def neg_log_likelihood_loss(self, word_inputs, word_seq_lengths,
                                char_inputs, batch_label, mask):
        batch_size = word_inputs.size(0)
        seq_len = word_inputs.size(1)
        word_embeding = self.embeds(word_inputs)
        word_list = [word_embeding]
        if self.use_char:
            char_features = self.char_feature(char_inputs).contiguous().view(
                batch_size, seq_len, -1)
            word_list.append(char_features)
        word_embeding = torch.cat(word_list, 2)
        word_represents = self.drop(word_embeding)
        if self.feature_extractor == 'lstm':
            packed_words = pack_padded_sequence(word_represents,
                                                word_seq_lengths, True)
            hidden = None
            lstm_out, hidden = self.lstm(packed_words, hidden)
            lstm_out, _ = pad_packed_sequence(lstm_out)
            lstm_out = lstm_out.transpose(0, 1)
            feature_out = self.drop(lstm_out)
        else:
            batch_size = word_inputs.size(0)
            word_in = torch.tanh(self.word2cnn(word_represents)).transpose(
                2, 1).contiguous()
            feature_out = self.cnn(word_in).transpose(1, 2).contiguous()

        feature_out = self.hidden2tag(feature_out)

        if self.use_crf:
            total_loss = self.crf.neg_log_likelihood_loss(
                feature_out, mask, batch_label)
        else:
            loss_function = nn.CrossEntropyLoss(ignore_index=0,
                                                reduction='sum')
            feature_out = feature_out.contiguous().view(
                batch_size * seq_len, -1)
            total_loss = loss_function(
                feature_out,
                batch_label.contiguous().view(batch_size * seq_len))
        return total_loss

    def forward(self, word_inputs, word_seq_lengths, char_inputs, batch_label,
                mask):
        batch_size = word_inputs.size(0)
        seq_len = word_inputs.size(1)
        word_embeding = self.embeds(word_inputs)
        word_list = [word_embeding]
        if self.use_char:
            char_features = self.char_feature(char_inputs).contiguous().view(
                batch_size, seq_len, -1)
            word_list.append(char_features)
        word_embeding = torch.cat(word_list, 2)
        word_represents = self.drop(word_embeding)
        if self.feature_extractor == 'lstm':
            packed_words = pack_padded_sequence(word_represents,
                                                word_seq_lengths, True)
            hidden = None
            lstm_out, hidden = self.lstm(packed_words, hidden)
            lstm_out, _ = pad_packed_sequence(lstm_out)
            lstm_out = lstm_out.transpose(0, 1)
            feature_out = self.drop(lstm_out)
        else:
            batch_size = word_inputs.size(0)
            word_in = torch.tanh(self.word2cnn(word_represents)).transpose(
                2, 1).contiguous()
            feature_out = self.cnn(word_in).transpose(1, 2).contiguous()

        feature_out = self.hidden2tag(feature_out)

        if self.use_crf:
            scores, tag_seq = self.crf._viterbi_decode(feature_out, mask)
        else:
            feature_out = feature_out.contiguous().view(
                batch_size * seq_len, -1)
            _, tag_seq = torch.max(feature_out, 1)
            tag_seq = tag_seq.view(batch_size, seq_len)
            tag_seq = mask.long() * tag_seq
        return tag_seq
Example #10
0
class entityRelation(nn.Module):
    def __init__(self, args, model_params):
        super(entityRelation, self).__init__()
        print("build network...")
        print("bbb")
        self.gpu = args.ifgpu
        self.label_size = model_params.label_alphabet.size()
        self.bert_encoder_dim = args.encoder_dim
        self.targetHiddenDim = args.targetHiddenDim
        self.relationHiddenDim = args.relationHiddenDim
        self.relation_num = args.relationNum
        self.drop = args.dropout
        # buliding model
        # encoding layer
        self.Embedding = WordEmbedding(args, model_params)
        self.encoder = WordHiddenRep(args, model_params)
        # module linear
        self.u_input_Linear = nn.Linear(self.bert_encoder_dim,
                                        self.targetHiddenDim)
        self.r_input_Linear = nn.Linear(self.bert_encoder_dim,
                                        self.relationHiddenDim)
        # Tag Linear
        self.targetHidden2Tag = nn.Linear(self.targetHiddenDim,
                                          self.label_size + 2)
        # CRF
        self.crf = CRF(self.label_size, self.gpu)
        # Relation
        self.relationAttention = RelationAttention(args)
        # Dropout
        self.dropout = nn.Dropout(self.drop)

        if self.gpu:
            self.Embedding = self.Embedding.cuda()
            self.encoder = self.encoder.cuda()
            self.u_input_Linear = self.u_input_Linear.cuda()
            self.r_input_Linear = self.r_input_Linear.cuda()
            self.targetHidden2Tag = self.targetHidden2Tag.cuda()
            self.crf = self.crf.cuda()
            self.relationAttention = self.relationAttention.cuda()
            self.dropout = self.dropout.cuda()

    def neg_log_likelihood_loss(self, all_input_ids, input_length,
                                all_input_mask, all_char_ids, char_length,
                                char_recover, all_relations, all_labels):

        batch_size = all_input_ids.size(0)
        seq_len = all_input_ids.size(1)

        targetPredictScore, R_tensor = self.mainStructure(
            all_input_ids, input_length, all_input_mask, all_char_ids,
            char_length, char_recover)

        target_loss = self.crf.neg_log_likelihood_loss(
            targetPredictScore, all_input_mask.byte(),
            all_labels) / (batch_size)
        scores, tag_seq = self.crf._viterbi_decode(targetPredictScore,
                                                   all_input_mask.byte())

        relationScale = all_relations.transpose(1, 3).contiguous().view(
            -1, self.relation_num)
        relation_loss_function = nn.BCELoss(size_average=False)
        relationScoreLoss = R_tensor.transpose(1, 3).contiguous().view(
            -1, self.relation_num)
        relation_loss = relation_loss_function(
            relationScoreLoss, relationScale.float()) / (batch_size * seq_len)

        return target_loss, relation_loss, tag_seq, R_tensor

    def forward(self, all_input_ids, input_length, all_input_mask,
                all_char_ids, char_length, char_recover):

        targetPredictScore, R_tensor = self.mainStructure(
            all_input_ids, input_length, all_input_mask, all_char_ids,
            char_length, char_recover)
        scores, tag_seq = self.crf._viterbi_decode(targetPredictScore,
                                                   all_input_mask.byte())

        return tag_seq, R_tensor

    def mainStructure(self, all_input_ids, input_length, all_input_mask,
                      all_char_ids, char_length, char_recover):
        batch_size = all_input_ids.size(0)
        seq_len = all_input_ids.size(1)

        # encoding layer
        wordEmbedding = self.Embedding(all_input_ids, all_char_ids,
                                       char_length, char_recover)
        maskEmb = all_input_mask.view(batch_size, seq_len,
                                      1).repeat(1, 1, wordEmbedding.size(2))
        wordEmbedding = wordEmbedding * (maskEmb.float())
        sequence_output = self.encoder(wordEmbedding, input_length)

        # module linear
        h_t = self.u_input_Linear(sequence_output)
        h_r = self.r_input_Linear(sequence_output)

        # entity extraction module
        targetPredictInput = self.targetHidden2Tag(self.dropout(h_t))

        # relation detection module
        relationScore = self.relationAttention(self.dropout(h_r))

        return targetPredictInput, relationScore
Example #11
0
class Bilstmcrf(nn.Module):
    """
    bilstm-crf模型
    """
    def __init__(self, args, pretrain_word_embedding, label_size):
        super(Bilstmcrf, self).__init__()
        self.use_crf = args.use_crf
        self.use_char = args.use_char
        self.gpu = args.gpu
        self.use_char = args.use_char
        self.rnn_hidden_dim = args.rnn_hidden_dim
        self.rnn_type = args.rnn_type
        self.max_seq_length = args.max_seq_length
        self.use_highway = args.use_highway
        self.dropoutlstm = nn.Dropout(args.dropoutlstm)
        self.wordrep = WordRep(args, pretrain_word_embedding)

        if self.use_char:
            self.lstm = nn.LSTM(350,
                                self.rnn_hidden_dim,
                                num_layers=args.num_layers,
                                batch_first=True,
                                bidirectional=True)
            self.gru = nn.GRU(350,
                              self.rnn_hidden_dim,
                              num_layers=args.num_layers,
                              batch_first=True,
                              bidirectional=True)
        else:
            self.lstm = nn.LSTM(300,
                                self.rnn_hidden_dim,
                                num_layers=args.num_layers,
                                batch_first=True,
                                bidirectional=True)
            self.gru = nn.GRU(300,
                              self.rnn_hidden_dim,
                              num_layers=args.num_layers,
                              batch_first=True,
                              bidirectional=True)

        self.label_size = label_size
        if self.use_crf:
            self.crf = CRF(self.label_size, self.gpu)
            self.label_size += 2
        if self.use_highway:
            self.highway = Highway(args.rnn_hidden_dim * 2, 1)

        self.hidden2tag = nn.Linear(args.rnn_hidden_dim * 2, self.label_size)

    # pack_padded  pad_packed_sequence
    def forward(self, word_input, input_mask, labels, char_input=None):
        # word_input input_mask   FloatTensor
        if self.use_char:
            word_input = self.wordrep(word_input, char_input)
        else:
            word_input = self.wordrep(word_input)

        input_mask.requires_grad = False
        word_input = word_input * (input_mask.unsqueeze(-1).float())
        batch_size = word_input.size(0)

        total_length = word_input.size(1)
        ttt = input_mask.ge(1)
        word_seq_lengths = [int(torch.sum(i).cpu().numpy()) for i in ttt]

        if self.rnn_type == 'LSTM':
            packed_words = pack_padded_sequence(word_input,
                                                word_seq_lengths,
                                                True,
                                                enforce_sorted=False)
            lstm_out, hidden = self.lstm(packed_words)
            output, _ = pad_packed_sequence(lstm_out,
                                            batch_first=True,
                                            total_length=total_length)
        elif self.rnn_type == 'GRU':
            packed_words = pack_padded_sequence(word_input,
                                                word_seq_lengths,
                                                True,
                                                enforce_sorted=False)
            lstm_out, hidden = self.gru(packed_words)
            output, _ = pad_packed_sequence(lstm_out,
                                            batch_first=True,
                                            total_length=total_length)

        if self.use_highway:
            output = self.highway(output)

        output = self.dropoutlstm(output)
        output = self.hidden2tag(output)
        maskk = input_mask.ge(1)

        if self.use_crf:
            total_loss = self.crf.neg_log_likelihood_loss(
                output, maskk, labels)
            scores, tag_seq = self.crf._viterbi_decode(output, input_mask)
            return total_loss / batch_size, tag_seq
        else:
            loss_fct = nn.CrossEntropyLoss(ignore_index=0)
            active_loss = input_mask.view(-1) == 1
            active_logits = output.view(-1, self.label_size)[active_loss]
            active_labels = labels.view(-1)[active_loss]
            loss = loss_fct(active_logits, active_labels)
            return loss, output

    def calculate_loss(self, word_input, input_mask, labels, char_input=None):
        # word_input input_mask   FloatTensor
        if self.use_char:
            word_input = self.wordrep(word_input, char_input)
        else:
            word_input = self.wordrep(word_input)

#         print(word_input.shape)
        input_mask.requires_grad = False
        word_input = word_input * (input_mask.unsqueeze(-1).float())

        batch_size = word_input.size(0)
        if self.rnn_type == 'LSTM':
            output, _ = self.lstm(word_input)
        elif self.rnn_type == 'GRU':
            output, _ = self.gru(word_input)

        if self.use_highway:
            output = self.highway(output)

        output = self.dropoutlstm(output)
        output = self.hidden2tag(output)
        maskk = input_mask.ge(1)
        if self.use_crf:
            total_loss = self.crf.neg_log_likelihood_loss(
                output, maskk, labels)
            scores, tag_seq = self.crf._viterbi_decode(output, input_mask)
            return total_loss / batch_size, tag_seq
        else:
            loss_fct = nn.CrossEntropyLoss(ignore_index=0)

            active_loss = input_mask.view(-1) == 1
            active_logits = output.view(-1, self.label_size)[active_loss]
            active_labels = labels.view(-1)[active_loss]
            loss = loss_fct(active_logits, active_labels)

            return loss, output
Example #12
0
class SeqModel(nn.Module):
    def __init__(self, data):
        super(SeqModel, self).__init__()
        self.use_crf = data.use_crf
        self.use_trans = data.use_trans
        self.use_mapping = data.use_mapping
        print "build network..."
        print "use_char: ", data.use_char
        if data.use_char:
            print "char feature extractor: ", data.char_seq_feature

        print "use_trans: ", data.use_trans
        print "word feature extractor: ", data.word_feature_extractor
        print "use crf: ", self.use_crf

        self.gpu = data.gpu
        self.average_batch = data.average_batch_loss
        # add two more label for downlayer lstm, use original label size for CRF
        label_size = data.label_alphabet_size
        data.label_alphabet_size += 2

        self.word_hidden = WordSequence(data)

        if self.use_crf:
            self.crf = CRF(label_size, self.gpu)

    def neg_log_likelihood_loss(self, word_inputs, feature_inputs,
                                word_seq_lengths, char_inputs,
                                char_seq_lengths, char_seq_recover,
                                batch_label, mask, trans_inputs,
                                trans_seq_length, trans_seq_recover):
        outs, w_word_embs, trans_features_wc = self.word_hidden(
            word_inputs, feature_inputs, word_seq_lengths, char_inputs,
            char_seq_lengths, char_seq_recover, trans_inputs, trans_seq_length,
            trans_seq_recover)
        batch_size = word_inputs.size(0)
        seq_len = word_inputs.size(1)
        wc_loss = 0
        if self.use_trans:
            if self.use_crf:
                total_loss = self.crf.neg_log_likelihood_loss(
                    outs, mask, batch_label)
                scores, tag_seq = self.crf._viterbi_decode(outs, mask)
                if self.use_mapping:
                    wc_loss = torch.norm(w_word_embs - trans_features_wc)
            else:
                loss_function = nn.NLLLoss(ignore_index=0, size_average=False)
                outs = outs.view(batch_size * seq_len, -1)
                score = F.log_softmax(outs, 1)
                total_loss = loss_function(
                    score, batch_label.view(batch_size * seq_len))
                _, tag_seq = torch.max(score, 1)
                tag_seq = tag_seq.view(batch_size, seq_len)
                if self.use_mapping:
                    wc_loss = torch.norm(w_word_embs - trans_features_wc)
        else:
            if self.use_crf:
                total_loss = self.crf.neg_log_likelihood_loss(
                    outs, mask, batch_label)
                scores, tag_seq = self.crf._viterbi_decode(outs, mask)
            else:
                loss_function = nn.NLLLoss(ignore_index=0, size_average=False)
                outs = outs.view(batch_size * seq_len, -1)
                score = F.log_softmax(outs, 1)
                total_loss = loss_function(
                    score, batch_label.view(batch_size * seq_len))
                _, tag_seq = torch.max(score, 1)
                tag_seq = tag_seq.view(batch_size, seq_len)

        if self.average_batch:
            total_loss = total_loss / batch_size
            if self.use_mapping:
                wc_loss = wc_loss / batch_size

        return total_loss, tag_seq, wc_loss

    def forward(self, word_inputs, feature_inputs, word_seq_lengths,
                char_inputs, char_seq_lengths, char_seq_recover, mask,
                trans_inputs, trans_seq_length, trans_seq_recover):
        # outs:(after hidden) [batch * seq_len * label_size]
        outs, w_word_embs, trans_features_wc = self.word_hidden(
            word_inputs, feature_inputs, word_seq_lengths, char_inputs,
            char_seq_lengths, char_seq_recover, trans_inputs, trans_seq_length,
            trans_seq_recover)
        batch_size = word_inputs.size(0)
        seq_len = word_inputs.size(1)
        if self.use_crf:

            scores, tag_seq = self.crf._viterbi_decode(outs, mask)

        else:
            outs = outs.view(batch_size * seq_len,
                             -1)  # [batch_size * seq_len,label_size]
            _, tag_seq = torch.max(
                outs, 1
            )  # tag_seq:[batch_size * seq_len , 1] range from 0 to label_size-1
            tag_seq = tag_seq.view(batch_size, seq_len)  # [batch_size,seq_len]
            # print "before mask:{}".format(tag_seq)
            # print "mask:{}".format(mask)

            # filter padded position with zero
            tag_seq = mask.long() * tag_seq

        return tag_seq  # [batch_size,seq_len] and padding part is zero

    # def get_lstm_features(self, word_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover):
    #     return self.word_hidden(word_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover)

    def decode_nbest(self, word_inputs, feature_inputs, word_seq_lengths,
                     char_inputs, char_seq_lengths, char_seq_recover, mask,
                     nbest, trans_inputs, trans_seq_length, trans_seq_recover):
        if not self.use_crf:
            print "Nbest output is currently supported only for CRF! Exit..."
            exit(0)
        outs, w_word_embs, trans_features_wc = self.word_hidden(
            word_inputs, feature_inputs, word_seq_lengths, char_inputs,
            char_seq_lengths, char_seq_recover, trans_inputs, trans_seq_length,
            trans_seq_recover)
        batch_size = word_inputs.size(0)
        seq_len = word_inputs.size(1)
        scores, tag_seq = self.crf._viterbi_decode_nbest(outs, mask, nbest)
        return scores, tag_seq

    def decode_output_intermediate_result(self, word_inputs, feature_inputs,
                                          word_seq_lengths, char_inputs,
                                          char_seq_lengths, char_seq_recover,
                                          mask, trans_inputs, trans_seq_length,
                                          trans_seq_recover):
        outs, w_word_embs, trans_features_wc = self.word_hidden(
            word_inputs, feature_inputs, word_seq_lengths, char_inputs,
            char_seq_lengths, char_seq_recover, trans_inputs, trans_seq_length,
            trans_seq_recover)
        return outs, self.crf.transitions
Example #13
0
class SeqModel(nn.Module):
    def __init__(self, data):
        super(SeqModel, self).__init__()
        self.use_crf = data.use_crf
        print "build network..."
        print "use_char: ", data.use_char
        if data.use_char:
            print "char feature extractor: ", data.char_feature_extractor
        print "word feature extractor: ", data.word_feature_extractor
        print "use crf: ", self.use_crf

        self.gpu = data.HP_gpu
        self.average_batch = data.average_batch_loss
        ## add two more label for downlayer lstm, use original label size for CRF
        label_size = data.label_alphabet_size
        # data.label_alphabet_size += 2
        # self.word_hidden = WordSequence(data, False, True, data.use_char)

        # The linear layer that maps from hidden state space to tag space
        self.hidden2tag = nn.Linear(data.HP_hidden_dim, label_size + 2)

        if self.use_crf:
            self.crf = CRF(label_size, self.gpu)

        if torch.cuda.is_available():
            self.hidden2tag = self.hidden2tag.cuda(self.gpu)

        self.frozen = False

    # def neg_log_likelihood_loss(self, word_inputs, feature_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover, batch_label, mask):
    # outs = self.word_hidden(word_inputs,feature_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover, None, None)
    def neg_log_likelihood_loss(self, hidden, hidden_adv, batch_label, mask):
        if hidden_adv is not None:
            hidden = (hidden + hidden_adv)

        outs = self.hidden2tag(hidden)

        batch_size = hidden.size(0)
        seq_len = hidden.size(1)
        if self.use_crf:
            total_loss = self.crf.neg_log_likelihood_loss(
                outs, mask, batch_label)
            scores, tag_seq = self.crf._viterbi_decode(outs, mask)
        else:
            loss_function = nn.NLLLoss(ignore_index=0, size_average=False)
            outs = outs.view(batch_size * seq_len, -1)
            score = F.log_softmax(outs, 1)
            total_loss = loss_function(score,
                                       batch_label.view(batch_size * seq_len))
            _, tag_seq = torch.max(score, 1)
            tag_seq = tag_seq.view(batch_size, seq_len)
        if self.average_batch:
            total_loss = total_loss / batch_size
        return total_loss, tag_seq

    def forward(self, hidden, mask):

        outs = self.hidden2tag(hidden)

        batch_size = hidden.size(0)
        seq_len = hidden.size(1)
        if self.use_crf:
            scores, tag_seq = self.crf._viterbi_decode(outs, mask)
        else:
            outs = outs.view(batch_size * seq_len, -1)
            _, tag_seq = torch.max(outs, 1)
            tag_seq = tag_seq.view(batch_size, seq_len)
            ## filter padded position with zero
            tag_seq = mask.long() * tag_seq
        return tag_seq

    # def get_lstm_features(self, word_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover):
    #     return self.word_hidden(word_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover)

    def decode_nbest(self, hidden, mask, nbest):
        if not self.use_crf:
            print "Nbest output is currently supported only for CRF! Exit..."
            exit(0)

        outs = self.hidden2tag(hidden)

        batch_size = hidden.size(0)
        seq_len = hidden.size(1)
        scores, tag_seq = self.crf._viterbi_decode_nbest(outs, mask, nbest)
        return scores, tag_seq

    def freeze_net(self):
        if self.frozen:
            return
        self.frozen = True

        for p in self.parameters():
            p.requires_grad = False

    def unfreeze_net(self):
        if not self.frozen:
            return
        self.frozen = False

        for p in self.parameters():
            p.requires_grad = True