예제 #1
0
class SeqModel(nn.Module):
    def __init__(self, data):
        super(SeqModel, self).__init__()
        self.use_crf = data.use_crf

        self.gpu = data.HP_gpu
        self.average_batch = data.average_batch_loss
        ## add two more label for downlayer lstm, use original label size for CRF
        label_size = data.label_alphabet_size
        # data.label_alphabet_size += 2
        # self.word_hidden = WordSequence(data, False, True, data.use_char)

        # The linear layer that maps from hidden state space to tag space
        self.hidden2tag = nn.Linear(data.HP_hidden_dim, label_size + 2)

        if self.use_crf:
            self.crf = CRF(label_size, self.gpu)

        if torch.cuda.is_available():
            self.hidden2tag = self.hidden2tag.cuda(self.gpu)

        self.frozen = False

    def neg_log_likelihood_loss(self, hidden, batch_label, mask):

        outs = self.hidden2tag(hidden)

        batch_size = hidden.size(0)
        seq_len = hidden.size(1)
        if self.use_crf:
            total_loss = self.crf.neg_log_likelihood_loss(
                outs, mask, batch_label)
            scores, tag_seq = self.crf._viterbi_decode(outs, mask)
        else:
            loss_function = nn.NLLLoss(ignore_index=0, size_average=False)
            outs = outs.view(batch_size * seq_len, -1)
            score = F.log_softmax(outs, 1)
            total_loss = loss_function(score,
                                       batch_label.view(batch_size * seq_len))
            _, tag_seq = torch.max(score, 1)
            tag_seq = tag_seq.view(batch_size, seq_len)

        if self.average_batch:
            total_loss = total_loss / batch_size
        return total_loss, tag_seq

    def forward(self, hidden, mask):

        outs = self.hidden2tag(hidden)

        batch_size = hidden.size(0)
        seq_len = hidden.size(1)
        if self.use_crf:
            scores, tag_seq = self.crf._viterbi_decode(outs, mask)
        else:
            outs = outs.view(batch_size * seq_len, -1)
            _, tag_seq = torch.max(outs, 1)
            tag_seq = tag_seq.view(batch_size, seq_len)
            ## filter padded position with zero
            tag_seq = mask.long() * tag_seq
        return tag_seq

    def decode_nbest(self, hidden, mask, nbest):
        if not self.use_crf:
            print "Nbest output is currently supported only for CRF! Exit..."
            exit(0)

        outs = self.hidden2tag(hidden)

        batch_size = hidden.size(0)
        seq_len = hidden.size(1)
        scores, tag_seq = self.crf._viterbi_decode_nbest(outs, mask, nbest)
        return scores, tag_seq
예제 #2
0
class Elmo_SeqLabel(nn.Module):
    def __init__(self, data):
        super(Elmo_SeqLabel, self).__init__()
        self.use_crf = data.use_crf
        print("build elmo sequence labeling network...")
        print("use crf: ", self.use_crf)

        self.gpu = data.HP_gpu
        self.average_batch = data.average_batch_loss
        ## add two more label for downlayer lstm, use original label size for CRF
        label_size = data.label_alphabet_size
        data.label_alphabet_size += 2

        self.word_hidden = Elmo(data.elmo_options_file,
                                data.elmo_weight_file,
                                1,
                                requires_grad=data.elmo_tune,
                                dropout=data.elmo_dropout)

        with open(data.elmo_options_file, 'r') as fin:
            self._options = json.load(fin)
        self.hidden2tag = nn.Linear(
            self._options['lstm']['projection_dim'] * 2,
            data.label_alphabet_size)

        if self.use_crf:
            self.crf = CRF(label_size, self.gpu)

        if self.gpu >= 0 and torch.cuda.is_available():
            self.word_hidden = self.word_hidden.cuda(self.gpu)
            self.hidden2tag = self.hidden2tag.cuda(self.gpu)

    def calculate_loss(self, word_inputs, feature_inputs, word_seq_lengths,
                       char_inputs, char_seq_lengths, char_seq_recover,
                       batch_label, mask):
        elmo_outputs = self.word_hidden(char_inputs)
        outs = elmo_outputs['elmo_representations'][0]
        # mask = elmo_outputs['mask']
        batch_size = char_inputs.size(0)
        seq_len = char_inputs.size(1)
        outs = self.hidden2tag(outs)
        if self.use_crf:
            total_loss = self.crf.neg_log_likelihood_loss(
                outs, mask, batch_label)
            scores, tag_seq = self.crf._viterbi_decode(outs, mask)
        else:
            loss_function = nn.NLLLoss(ignore_index=0, size_average=False)
            outs = outs.view(batch_size * seq_len, -1)
            score = F.log_softmax(outs, 1)
            total_loss = loss_function(score,
                                       batch_label.view(batch_size * seq_len))
            _, tag_seq = torch.max(score, 1)
            tag_seq = tag_seq.view(batch_size, seq_len)
        if self.average_batch:
            total_loss = total_loss / batch_size
        return total_loss, tag_seq

    def forward(self, word_inputs, feature_inputs, word_seq_lengths,
                char_inputs, char_seq_lengths, char_seq_recover, mask):
        elmo_outputs = self.word_hidden(char_inputs)
        outs = elmo_outputs['elmo_representations'][0]
        # mask = elmo_outputs['mask']
        batch_size = char_inputs.size(0)
        seq_len = char_inputs.size(1)
        outs = self.hidden2tag(outs)
        if self.use_crf:
            scores, tag_seq = self.crf._viterbi_decode(outs, mask)
        else:
            outs = outs.view(batch_size * seq_len, -1)
            _, tag_seq = torch.max(outs, 1)
            tag_seq = tag_seq.view(batch_size, seq_len)
            ## filter padded position with zero
            tag_seq = mask.long() * tag_seq
        return tag_seq

    def decode_nbest(self, word_inputs, feature_inputs, word_seq_lengths,
                     char_inputs, char_seq_lengths, char_seq_recover, mask,
                     nbest):
        if not self.use_crf:
            print("Nbest output is currently supported only for CRF! Exit...")
            exit(0)
        elmo_outputs = self.word_hidden(char_inputs)
        outs = elmo_outputs['elmo_representations'][0]
        # mask = elmo_outputs['mask']
        outs = self.hidden2tag(outs)
        scores, tag_seq = self.crf._viterbi_decode_nbest(outs, mask, nbest)
        return scores, tag_seq