Esempio n. 1
0
    def __init__(self, data):
        super(GazLSTM, self).__init__()

        self.gpu = data.HP_gpu
        self.use_biword = data.use_bigram
        self.hidden_dim = data.HP_hidden_dim
        self.gaz_alphabet = data.gaz_alphabet
        self.gaz_emb_dim = data.gaz_emb_dim
        self.word_emb_dim = data.word_emb_dim
        self.biword_emb_dim = data.biword_emb_dim
        self.use_char = data.HP_use_char
        self.bilstm_flag = data.HP_bilstm
        self.lstm_layer = data.HP_lstm_layer
        self.use_count = data.HP_use_count
        self.num_layer = data.HP_num_layer
        self.model_type = data.model_type
        self.use_bert = data.use_bert

        # self.use_gazcount = data.use_gazcount
        #设置是否使用词典
        self.use_dictionary = data.use_dictionary
        self.simi_dic_emb = data.simi_dic_emb
        self.simi_dic_dim = data.simi_dic_dim

        scale = np.sqrt(3.0 / self.gaz_emb_dim)
        data.pretrain_gaz_embedding[0, :] = np.random.uniform(
            -scale, scale, [1, self.gaz_emb_dim])

        if self.use_char:
            scale = np.sqrt(3.0 / self.word_emb_dim)
            data.pretrain_word_embedding[0, :] = np.random.uniform(
                -scale, scale, [1, self.word_emb_dim])

        self.gaz_embedding = nn.Embedding(data.gaz_alphabet.size(),
                                          self.gaz_emb_dim)  #初始化gaz随机矩阵
        self.word_embedding = nn.Embedding(data.word_alphabet.size(),
                                           self.word_emb_dim)  #初始化word随机矩阵
        if self.use_biword:
            self.biword_embedding = nn.Embedding(data.biword_alphabet.size(),
                                                 self.biword_emb_dim)

        if data.pretrain_gaz_embedding is not None:
            self.gaz_embedding.weight.data.copy_(
                torch.from_numpy(data.pretrain_gaz_embedding)
            )  #将data.pretrain_gaz_embedding的值拷贝到gaz_embedding中
        else:
            self.gaz_embedding.weight.data.copy_(
                torch.from_numpy(
                    self.random_embedding(data.gaz_alphabet.size(),
                                          self.gaz_emb_dim)))

        if data.pretrain_word_embedding is not None:
            self.word_embedding.weight.data.copy_(
                torch.from_numpy(data.pretrain_word_embedding))
        else:
            self.word_embedding.weight.data.copy_(
                torch.from_numpy(
                    self.random_embedding(data.word_alphabet.size(),
                                          self.word_emb_dim)))
        if self.use_biword:
            if data.pretrain_biword_embedding is not None:
                self.biword_embedding.weight.data.copy_(
                    torch.from_numpy(data.pretrain_biword_embedding))
            else:
                self.biword_embedding.weight.data.copy_(
                    torch.from_numpy(
                        self.random_embedding(data.biword_alphabet.size(),
                                              self.word_emb_dim)))

        use_gazcount = True
        #字符的特征纬度
        char_feature_dim = self.word_emb_dim + 4 * self.gaz_emb_dim
        if self.use_dictionary:
            if use_gazcount:
                char_feature_dim += self.simi_dic_dim
            else:
                char_feature_dim = self.word_emb_dim  #+ self.simi_dic_dim

        if self.use_biword:
            char_feature_dim += self.biword_emb_dim

        if self.use_bert:
            char_feature_dim = char_feature_dim + 768

        ## lstm model
        if self.model_type == 'lstm':
            lstm_hidden = self.hidden_dim
            if self.bilstm_flag:
                self.hidden_dim *= 2

            self.NERmodel = NERmodel(model_type='lstm',
                                     input_dim=char_feature_dim,
                                     hidden_dim=lstm_hidden,
                                     num_layer=self.lstm_layer,
                                     biflag=self.bilstm_flag)

        ## cnn model
        if self.model_type == 'cnn':
            self.NERmodel = NERmodel(model_type='cnn',
                                     input_dim=char_feature_dim,
                                     hidden_dim=self.hidden_dim,
                                     num_layer=self.num_layer,
                                     dropout=data.HP_dropout,
                                     gpu=self.gpu)

        ## attention model
        if self.model_type == 'transformer':
            self.NERmodel = NERmodel(model_type='transformer',
                                     input_dim=char_feature_dim,
                                     hidden_dim=self.hidden_dim,
                                     num_layer=self.num_layer,
                                     dropout=data.HP_dropout)

        self.drop = nn.Dropout(p=data.HP_dropout)  #按照0.5的概率改为零
        self.hidden2tag = nn.Linear(self.hidden_dim,
                                    data.label_alphabet_size + 2)
        self.crf = CRF(data.label_alphabet_size, self.gpu)

        if self.use_bert:
            self.bert_encoder = BertModel.from_pretrained('bert-base-chinese')
            for p in self.bert_encoder.parameters():
                p.requires_grad = False

        if self.gpu:
            self.gaz_embedding = self.gaz_embedding.cuda()
            self.word_embedding = self.word_embedding.cuda()
            if self.use_biword:
                self.biword_embedding = self.biword_embedding.cuda()
            self.NERmodel = self.NERmodel.cuda()
            self.hidden2tag = self.hidden2tag.cuda()
            self.crf = self.crf.cuda()

            if self.use_bert:
                self.bert_encoder = self.bert_encoder.cuda()
Esempio n. 2
0
class GazLSTM(nn.Module):
    def __init__(self, data):
        super(GazLSTM, self).__init__()
        self.gpu = data.HP_gpu
        self.use_biword = data.use_bigram
        self.hidden_dim = data.HP_hidden_dim
        self.gaz_alphabet = data.gaz_alphabet
        self.gaz_emb_dim = data.gaz_emb_dim
        self.word_emb_dim = data.word_emb_dim
        self.biword_emb_dim = data.biword_emb_dim
        self.use_char = data.HP_use_char
        self.bilstm_flag = data.HP_bilstm
        self.lstm_layer = data.HP_lstm_layer
        self.use_count = data.HP_use_count
        self.num_layer = data.HP_num_layer
        self.model_type = data.model_type

        scale = np.sqrt(3.0 / self.gaz_emb_dim)
        data.pretrain_gaz_embedding[0, :] = np.random.uniform(
            -scale, scale, [1, self.gaz_emb_dim])

        if self.use_char:
            scale = np.sqrt(3.0 / self.word_emb_dim)
            data.pretrain_word_embedding[0, :] = np.random.uniform(
                -scale, scale, [1, self.word_emb_dim])

        self.gaz_embedding = nn.Embedding(data.gaz_alphabet.size(),
                                          self.gaz_emb_dim)
        self.word_embedding = nn.Embedding(data.word_alphabet.size(),
                                           self.word_emb_dim)
        if self.use_biword:
            self.biword_embedding = nn.Embedding(data.biword_alphabet.size(),
                                                 self.biword_emb_dim)

        if data.pretrain_gaz_embedding is not None:
            self.gaz_embedding.weight.data.copy_(
                torch.from_numpy(data.pretrain_gaz_embedding))
        else:
            self.gaz_embedding.weight.data.copy_(
                torch.from_numpy(
                    self.random_embedding(data.gaz_alphabet.size(),
                                          self.gaz_emb_dim)))

        if data.pretrain_word_embedding is not None:
            self.word_embedding.weight.data.copy_(
                torch.from_numpy(data.pretrain_word_embedding))
        else:
            self.word_embedding.weight.data.copy_(
                torch.from_numpy(
                    self.random_embedding(data.word_alphabet.size(),
                                          self.word_emb_dim)))
        if self.use_biword:
            if data.pretrain_biword_embedding is not None:
                self.biword_embedding.weight.data.copy_(
                    torch.from_numpy(data.pretrain_biword_embedding))
            else:
                self.biword_embedding.weight.data.copy_(
                    torch.from_numpy(
                        self.random_embedding(data.biword_alphabet.size(),
                                              self.word_emb_dim)))

        char_feature_dim = self.word_emb_dim + 4 * self.gaz_emb_dim
        if self.use_biword:
            char_feature_dim += self.biword_emb_dim

        ## lstm model
        if self.model_type == 'lstm':
            lstm_hidden = self.hidden_dim
            if self.bilstm_flag:
                self.hidden_dim *= 2
            self.NERmodel = NERmodel(model_type='lstm',
                                     input_dim=char_feature_dim,
                                     hidden_dim=lstm_hidden,
                                     num_layer=self.lstm_layer,
                                     biflag=self.bilstm_flag)

        ## cnn model
        if self.model_type == 'cnn':
            self.NERmodel = NERmodel(model_type='cnn',
                                     input_dim=char_feature_dim,
                                     hidden_dim=self.hidden_dim,
                                     num_layer=self.num_layer,
                                     dropout=data.HP_dropout,
                                     gpu=self.gpu)

        ## attention model
        if self.model_type == 'transformer':
            self.NERmodel = NERmodel(model_type='transformer',
                                     input_dim=char_feature_dim,
                                     hidden_dim=self.hidden_dim,
                                     num_layer=self.num_layer,
                                     dropout=data.HP_dropout)

        self.drop = nn.Dropout(p=data.HP_dropout)
        self.hidden2tag = nn.Linear(self.hidden_dim,
                                    data.label_alphabet_size + 2)
        self.crf = CRF(data.label_alphabet_size, self.gpu)

        if self.gpu:
            #self.drop = self.drop.cuda()
            self.gaz_embedding = self.gaz_embedding.cuda()
            self.word_embedding = self.word_embedding.cuda()
            if self.use_biword:
                self.biword_embedding = self.biword_embedding.cuda()
            self.NERmodel = self.NERmodel.cuda()
            self.hidden2tag = self.hidden2tag.cuda()
            self.crf = self.crf.cuda()

    def get_tags(self, gaz_list, word_inputs, biword_inputs, layer_gaz,
                 gaz_count, gaz_chars, gaz_mask_input, gazchar_mask_input,
                 mask):

        batch_size = word_inputs.size()[0]
        seq_len = word_inputs.size()[1]
        max_gaz_num = layer_gaz.size(-1)
        gaz_match = []

        word_embs = self.word_embedding(word_inputs)

        if self.use_biword:
            biword_embs = self.biword_embedding(biword_inputs)
            word_embs = torch.cat([word_embs, biword_embs], dim=-1)

        if self.model_type != 'transformer':
            word_inputs_d = self.drop(word_embs)  #(b,l,we)
        else:
            word_inputs_d = word_embs

        if self.use_char:
            gazchar_embeds = self.word_embedding(gaz_chars)

            gazchar_mask = gazchar_mask_input.unsqueeze(-1).repeat(
                1, 1, 1, 1, 1, self.word_emb_dim)
            gazchar_embeds = gazchar_embeds.data.masked_fill_(
                gazchar_mask.data, 0)  #(b,l,4,gl,cl,ce)

            # gazchar_mask_input:(b,l,4,gl,cl)
            gaz_charnum = (gazchar_mask_input == 0).sum(
                dim=-1, keepdim=True).float()  #(b,l,4,gl,1)
            gaz_charnum = gaz_charnum + (gaz_charnum == 0).float()
            gaz_embeds = gazchar_embeds.sum(-2) / gaz_charnum  #(b,l,4,gl,ce)

            if self.model_type != 'transformer':
                gaz_embeds = self.drop(gaz_embeds)
            else:
                gaz_embeds = gaz_embeds

        else:  #use gaz embedding
            gaz_embeds = self.gaz_embedding(layer_gaz)

            if self.model_type != 'transformer':
                gaz_embeds_d = self.drop(gaz_embeds)
            else:
                gaz_embeds_d = gaz_embeds

            gaz_mask = gaz_mask_input.unsqueeze(-1).repeat(
                1, 1, 1, 1, self.gaz_emb_dim)

            gaz_embeds = gaz_embeds_d.data.masked_fill_(
                gaz_mask.data, 0)  #(b,l,4,g,ge)  ge:gaz_embed_dim

        if self.use_count:
            count_sum = torch.sum(gaz_count, dim=3, keepdim=True)  #(b,l,4,gn)
            count_sum = torch.sum(count_sum, dim=2, keepdim=True)  #(b,l,1,1)

            weights = gaz_count.div(count_sum)  #(b,l,4,g)
            weights = weights * 4
            weights = weights.unsqueeze(-1)
            gaz_embeds = weights * gaz_embeds  #(b,l,4,g,e)
            gaz_embeds = torch.sum(gaz_embeds, dim=3)  #(b,l,4,e)

        else:
            gaz_num = (gaz_mask_input == 0).sum(
                dim=-1, keepdim=True).float()  #(b,l,4,1)
            gaz_embeds = gaz_embeds.sum(-2) / gaz_num  #(b,l,4,ge)/(b,l,4,1)

        gaz_embeds_cat = gaz_embeds.view(batch_size, seq_len, -1)  #(b,l,4*ge)

        word_input_cat = torch.cat([word_inputs_d, gaz_embeds_cat],
                                   dim=-1)  #(b,l,we+4*ge)

        feature_out_d = self.NERmodel(word_input_cat)

        tags = self.hidden2tag(feature_out_d)

        return tags, gaz_match

    def neg_log_likelihood_loss(self, gaz_list, word_inputs, biword_inputs,
                                word_seq_lengths, layer_gaz, gaz_count,
                                gaz_chars, gaz_mask, gazchar_mask, mask,
                                batch_label):

        tags, _ = self.get_tags(gaz_list, word_inputs, biword_inputs,
                                layer_gaz, gaz_count, gaz_chars, gaz_mask,
                                gazchar_mask, mask)

        total_loss = self.crf.neg_log_likelihood_loss(tags, mask, batch_label)
        scores, tag_seq = self.crf._viterbi_decode(tags, mask)

        return total_loss, tag_seq

    def forward(self, gaz_list, word_inputs, biword_inputs, word_seq_lengths,
                layer_gaz, gaz_count, gaz_chars, gaz_mask, gazchar_mask, mask):

        tags, gaz_match = self.get_tags(gaz_list, word_inputs, biword_inputs,
                                        layer_gaz, gaz_count, gaz_chars,
                                        gaz_mask, gazchar_mask, mask)

        scores, tag_seq = self.crf._viterbi_decode(tags, mask)

        return tag_seq, gaz_match
Esempio n. 3
0
class GazLSTM(nn.Module):
    def __init__(self, data):
        super(GazLSTM, self).__init__()

        self.gpu = data.HP_gpu
        self.use_biword = data.use_bigram
        self.hidden_dim = data.HP_hidden_dim
        self.gaz_alphabet = data.gaz_alphabet
        self.gaz_emb_dim = data.gaz_emb_dim
        self.word_emb_dim = data.word_emb_dim
        self.biword_emb_dim = data.biword_emb_dim
        self.use_char = data.HP_use_char
        self.bilstm_flag = data.HP_bilstm
        self.lstm_layer = data.HP_lstm_layer
        self.use_count = data.HP_use_count
        self.num_layer = data.HP_num_layer
        self.model_type = data.model_type
        self.use_bert = data.use_bert

        # self.use_gazcount = data.use_gazcount
        #设置是否使用词典
        self.use_dictionary = data.use_dictionary
        self.simi_dic_emb = data.simi_dic_emb
        self.simi_dic_dim = data.simi_dic_dim

        scale = np.sqrt(3.0 / self.gaz_emb_dim)
        data.pretrain_gaz_embedding[0, :] = np.random.uniform(
            -scale, scale, [1, self.gaz_emb_dim])

        if self.use_char:
            scale = np.sqrt(3.0 / self.word_emb_dim)
            data.pretrain_word_embedding[0, :] = np.random.uniform(
                -scale, scale, [1, self.word_emb_dim])

        self.gaz_embedding = nn.Embedding(data.gaz_alphabet.size(),
                                          self.gaz_emb_dim)  #初始化gaz随机矩阵
        self.word_embedding = nn.Embedding(data.word_alphabet.size(),
                                           self.word_emb_dim)  #初始化word随机矩阵
        if self.use_biword:
            self.biword_embedding = nn.Embedding(data.biword_alphabet.size(),
                                                 self.biword_emb_dim)

        if data.pretrain_gaz_embedding is not None:
            self.gaz_embedding.weight.data.copy_(
                torch.from_numpy(data.pretrain_gaz_embedding)
            )  #将data.pretrain_gaz_embedding的值拷贝到gaz_embedding中
        else:
            self.gaz_embedding.weight.data.copy_(
                torch.from_numpy(
                    self.random_embedding(data.gaz_alphabet.size(),
                                          self.gaz_emb_dim)))

        if data.pretrain_word_embedding is not None:
            self.word_embedding.weight.data.copy_(
                torch.from_numpy(data.pretrain_word_embedding))
        else:
            self.word_embedding.weight.data.copy_(
                torch.from_numpy(
                    self.random_embedding(data.word_alphabet.size(),
                                          self.word_emb_dim)))
        if self.use_biword:
            if data.pretrain_biword_embedding is not None:
                self.biword_embedding.weight.data.copy_(
                    torch.from_numpy(data.pretrain_biword_embedding))
            else:
                self.biword_embedding.weight.data.copy_(
                    torch.from_numpy(
                        self.random_embedding(data.biword_alphabet.size(),
                                              self.word_emb_dim)))

        use_gazcount = True
        #字符的特征纬度
        char_feature_dim = self.word_emb_dim + 4 * self.gaz_emb_dim
        if self.use_dictionary:
            if use_gazcount:
                char_feature_dim += self.simi_dic_dim
            else:
                char_feature_dim = self.word_emb_dim  #+ self.simi_dic_dim

        if self.use_biword:
            char_feature_dim += self.biword_emb_dim

        if self.use_bert:
            char_feature_dim = char_feature_dim + 768

        ## lstm model
        if self.model_type == 'lstm':
            lstm_hidden = self.hidden_dim
            if self.bilstm_flag:
                self.hidden_dim *= 2

            self.NERmodel = NERmodel(model_type='lstm',
                                     input_dim=char_feature_dim,
                                     hidden_dim=lstm_hidden,
                                     num_layer=self.lstm_layer,
                                     biflag=self.bilstm_flag)

        ## cnn model
        if self.model_type == 'cnn':
            self.NERmodel = NERmodel(model_type='cnn',
                                     input_dim=char_feature_dim,
                                     hidden_dim=self.hidden_dim,
                                     num_layer=self.num_layer,
                                     dropout=data.HP_dropout,
                                     gpu=self.gpu)

        ## attention model
        if self.model_type == 'transformer':
            self.NERmodel = NERmodel(model_type='transformer',
                                     input_dim=char_feature_dim,
                                     hidden_dim=self.hidden_dim,
                                     num_layer=self.num_layer,
                                     dropout=data.HP_dropout)

        self.drop = nn.Dropout(p=data.HP_dropout)  #按照0.5的概率改为零
        self.hidden2tag = nn.Linear(self.hidden_dim,
                                    data.label_alphabet_size + 2)
        self.crf = CRF(data.label_alphabet_size, self.gpu)

        if self.use_bert:
            self.bert_encoder = BertModel.from_pretrained('bert-base-chinese')
            for p in self.bert_encoder.parameters():
                p.requires_grad = False

        if self.gpu:
            self.gaz_embedding = self.gaz_embedding.cuda()
            self.word_embedding = self.word_embedding.cuda()
            if self.use_biword:
                self.biword_embedding = self.biword_embedding.cuda()
            self.NERmodel = self.NERmodel.cuda()
            self.hidden2tag = self.hidden2tag.cuda()
            self.crf = self.crf.cuda()

            if self.use_bert:
                self.bert_encoder = self.bert_encoder.cuda()

    #获取嵌入的函数
    def get_tags(self, gaz_list, word_inputs, biword_inputs, layer_gaz,
                 gaz_count, gaz_chars, gaz_mask_input, gazchar_mask_input,
                 mask, word_seq_lengths, batch_bert, bert_mask, simi_value):
        use_gazcount = True
        batch_size = word_inputs.size()[0]
        seq_len = word_inputs.size()[1]
        max_gaz_num = layer_gaz.size(-1)
        gaz_match = []

        word_embs = self.word_embedding(word_inputs)

        if self.use_biword:
            biword_embs = self.biword_embedding(biword_inputs)
            word_embs = torch.cat([word_embs, biword_embs], dim=-1)

        if self.model_type != 'transformer':
            word_inputs_d = self.drop(word_embs)  #(b,l,we)
            print(type(word_inputs_d))
        else:
            word_inputs_d = word_embs

        if self.use_char:
            gazchar_embeds = self.word_embedding(gaz_chars)

            gazchar_mask = gazchar_mask_input.unsqueeze(-1).repeat(
                1, 1, 1, 1, 1, self.word_emb_dim)
            gazchar_embeds = gazchar_embeds.data.masked_fill_(
                gazchar_mask.data, 0)  #(b,l,4,gl,cl,ce)

            # gazchar_mask_input:(b,l,4,gl,cl)
            gaz_charnum = (gazchar_mask_input == 0).sum(
                dim=-1, keepdim=True).float()  #(b,l,4,gl,1)
            gaz_charnum = gaz_charnum + (gaz_charnum == 0).float()
            gaz_embeds = gazchar_embeds.sum(-2) / gaz_charnum  #(b,l,4,gl,ce)

            if self.model_type != 'transformer':
                gaz_embeds = self.drop(gaz_embeds)
            else:
                gaz_embeds = gaz_embeds

        else:  #use gaz embedding
            gaz_embeds = self.gaz_embedding(layer_gaz)

            if self.model_type != 'transformer':
                gaz_embeds_d = self.drop(gaz_embeds)
            else:
                gaz_embeds_d = gaz_embeds

            gaz_mask = gaz_mask_input.unsqueeze(-1).repeat(
                1, 1, 1, 1, self.gaz_emb_dim)

            gaz_embeds = gaz_embeds_d.data.masked_fill_(
                gaz_mask.data, 0)  #(b,l,4,g,ge)  ge:gaz_embed_dim

        if self.use_count:
            count_sum = torch.sum(gaz_count, dim=3, keepdim=True)  #(b,l,4,gn)
            count_sum = torch.sum(count_sum, dim=2, keepdim=True)  #(b,l,1,1)

            weights = gaz_count.div(count_sum)  #(b,l,4,g)
            weights = weights * 4
            weights = weights.unsqueeze(-1)
            gaz_embeds = weights * gaz_embeds  #(b,l,4,g,e)
            gaz_embeds = torch.sum(gaz_embeds, dim=3)  #(b,l,4,e)

        else:
            gaz_num = (gaz_mask_input == 0).sum(
                dim=-1, keepdim=True).float()  #(b,l,4,1)
            gaz_embeds = gaz_embeds.sum(-2) / gaz_num  #(b,l,4,ge)/(b,l,4,1)

        gaz_embeds_cat = gaz_embeds.view(batch_size, seq_len, -1)  #(b,l,4*ge)
        print(type(gaz_embeds_cat))
        if self.use_dictionary:  #拼接词典的向量
            simi_embeds = []
            for key in simi_value:
                for value in key:
                    simi = [value for i in range(self.simi_dic_dim)]
                    simi_embeds.append(simi)
            print(simi_embeds)
            simi_embeds = torch.Tensor(simi_embeds)
            simi_embeds = simi_embeds.cuda()
            print(simi_embeds)
            simi_embeds_cat = simi_embeds.view(batch_size, seq_len, -1)
            self.simi_dic_emb = simi_embeds
            if use_gazcount:
                word_input_cat = torch.cat(
                    [word_inputs_d, gaz_embeds_cat, simi_embeds_cat], dim=-1)
            else:
                #word_input_cat = torch.cat([word_inputs_d, simi_embeds_cat],dim = -1)
                word_input_cat = torch.cat([word_inputs_d], dim=-1)

        else:
            word_input_cat = torch.cat([word_inputs_d, gaz_embeds_cat],
                                       dim=-1)  #(b,l,we+4*ge)
        #print(len(word_input_cat))
        # if only_char:
        #     word_input_cat= torch.cat()

        ### cat bert feature
        if self.use_bert:
            seg_id = torch.zeros(bert_mask.size()).long().cuda()
            outputs = self.bert_encoder(batch_bert, bert_mask, seg_id)
            outputs = outputs[0][:, 1:-1, :]
            word_input_cat = torch.cat([word_input_cat, outputs], dim=-1)

        feature_out_d = self.NERmodel(word_input_cat)

        tags = self.hidden2tag(feature_out_d)

        return tags, gaz_match

    def neg_log_likelihood_loss(self, gaz_list, word_inputs, biword_inputs,
                                word_seq_lengths, layer_gaz, gaz_count,
                                gaz_chars, gaz_mask, gazchar_mask, mask,
                                batch_label, batch_bert, bert_mask,
                                simi_value):

        tags, _ = self.get_tags(gaz_list, word_inputs, biword_inputs,
                                layer_gaz, gaz_count, gaz_chars, gaz_mask,
                                gazchar_mask, mask, word_seq_lengths,
                                batch_bert, bert_mask, simi_value)

        total_loss = self.crf.neg_log_likelihood_loss(tags, mask, batch_label)
        scores, tag_seq = self.crf._viterbi_decode(tags, mask)

        return total_loss, tag_seq

    def forward(self, gaz_list, word_inputs, biword_inputs, word_seq_lengths,
                layer_gaz, gaz_count, gaz_chars, gaz_mask, gazchar_mask, mask,
                batch_bert, bert_mask, simi_value):

        tags, gaz_match = self.get_tags(gaz_list, word_inputs, biword_inputs,
                                        layer_gaz, gaz_count, gaz_chars,
                                        gaz_mask, gazchar_mask, mask,
                                        word_seq_lengths, batch_bert,
                                        bert_mask, simi_value)

        scores, tag_seq = self.crf._viterbi_decode(tags, mask)

        return tag_seq, gaz_match
Esempio n. 4
0
    def __init__(self, data):
        super(GazLSTM, self).__init__()
        self.gpu = data.HP_gpu
        self.use_biword = data.use_bigram
        self.hidden_dim = data.HP_hidden_dim
        self.gaz_alphabet = data.gaz_alphabet
        self.gaz_emb_dim = data.gaz_emb_dim
        self.word_emb_dim = data.word_emb_dim
        self.biword_emb_dim = data.biword_emb_dim
        self.use_char = data.HP_use_char
        self.bilstm_flag = data.HP_bilstm
        self.lstm_layer = data.HP_lstm_layer
        self.use_count = data.HP_use_count
        self.num_layer = data.HP_num_layer
        self.model_type = data.model_type

        scale = np.sqrt(3.0 / self.gaz_emb_dim)
        data.pretrain_gaz_embedding[0, :] = np.random.uniform(
            -scale, scale, [1, self.gaz_emb_dim])

        if self.use_char:
            scale = np.sqrt(3.0 / self.word_emb_dim)
            data.pretrain_word_embedding[0, :] = np.random.uniform(
                -scale, scale, [1, self.word_emb_dim])

        self.gaz_embedding = nn.Embedding(data.gaz_alphabet.size(),
                                          self.gaz_emb_dim)
        self.word_embedding = nn.Embedding(data.word_alphabet.size(),
                                           self.word_emb_dim)
        if self.use_biword:
            self.biword_embedding = nn.Embedding(data.biword_alphabet.size(),
                                                 self.biword_emb_dim)

        if data.pretrain_gaz_embedding is not None:
            self.gaz_embedding.weight.data.copy_(
                torch.from_numpy(data.pretrain_gaz_embedding))
        else:
            self.gaz_embedding.weight.data.copy_(
                torch.from_numpy(
                    self.random_embedding(data.gaz_alphabet.size(),
                                          self.gaz_emb_dim)))

        if data.pretrain_word_embedding is not None:
            self.word_embedding.weight.data.copy_(
                torch.from_numpy(data.pretrain_word_embedding))
        else:
            self.word_embedding.weight.data.copy_(
                torch.from_numpy(
                    self.random_embedding(data.word_alphabet.size(),
                                          self.word_emb_dim)))
        if self.use_biword:
            if data.pretrain_biword_embedding is not None:
                self.biword_embedding.weight.data.copy_(
                    torch.from_numpy(data.pretrain_biword_embedding))
            else:
                self.biword_embedding.weight.data.copy_(
                    torch.from_numpy(
                        self.random_embedding(data.biword_alphabet.size(),
                                              self.word_emb_dim)))

        char_feature_dim = self.word_emb_dim + 4 * self.gaz_emb_dim
        if self.use_biword:
            char_feature_dim += self.biword_emb_dim

        ## lstm model
        if self.model_type == 'lstm':
            lstm_hidden = self.hidden_dim
            if self.bilstm_flag:
                self.hidden_dim *= 2
            self.NERmodel = NERmodel(model_type='lstm',
                                     input_dim=char_feature_dim,
                                     hidden_dim=lstm_hidden,
                                     num_layer=self.lstm_layer,
                                     biflag=self.bilstm_flag)

        ## cnn model
        if self.model_type == 'cnn':
            self.NERmodel = NERmodel(model_type='cnn',
                                     input_dim=char_feature_dim,
                                     hidden_dim=self.hidden_dim,
                                     num_layer=self.num_layer,
                                     dropout=data.HP_dropout,
                                     gpu=self.gpu)

        ## attention model
        if self.model_type == 'transformer':
            self.NERmodel = NERmodel(model_type='transformer',
                                     input_dim=char_feature_dim,
                                     hidden_dim=self.hidden_dim,
                                     num_layer=self.num_layer,
                                     dropout=data.HP_dropout)

        self.drop = nn.Dropout(p=data.HP_dropout)
        self.hidden2tag = nn.Linear(self.hidden_dim,
                                    data.label_alphabet_size + 2)
        self.crf = CRF(data.label_alphabet_size, self.gpu)

        if self.gpu:
            #self.drop = self.drop.cuda()
            self.gaz_embedding = self.gaz_embedding.cuda()
            self.word_embedding = self.word_embedding.cuda()
            if self.use_biword:
                self.biword_embedding = self.biword_embedding.cuda()
            self.NERmodel = self.NERmodel.cuda()
            self.hidden2tag = self.hidden2tag.cuda()
            self.crf = self.crf.cuda()