Beispiel #1
0
    def test_case2(self):
        # 测试CRF是否正常work。
        import json
        import torch
        from fastNLP import seq_len_to_mask

        with open('tests/data_for_tests/modules/decoder/crf.json', 'r') as f:
            data = json.load(f)

        bio_logits = torch.FloatTensor(data['bio_logits'])
        bio_scores = data['bio_scores']
        bio_path = data['bio_path']
        bio_trans_m = torch.FloatTensor(data['bio_trans_m'])
        bio_seq_lens = torch.LongTensor(data['bio_seq_lens'])

        bmes_logits = torch.FloatTensor(data['bmes_logits'])
        bmes_scores = data['bmes_scores']
        bmes_path = data['bmes_path']
        bmes_trans_m = torch.FloatTensor(data['bmes_trans_m'])
        bmes_seq_lens = torch.LongTensor(data['bmes_seq_lens'])

        labels = ['O']
        for label in ['X', 'Y']:
            for tag in 'BI':
                labels.append('{}-{}'.format(tag, label))
        id2label = {idx: label for idx, label in enumerate(labels)}
        num_tags = len(id2label)

        mask = seq_len_to_mask(bio_seq_lens)

        from fastNLP.modules.decoder.crf import ConditionalRandomField, allowed_transitions
        fast_CRF = ConditionalRandomField(num_tags=num_tags, allowed_transitions=allowed_transitions(id2label,
                                                                                                     include_start_end=True))
        fast_CRF.trans_m.data = bio_trans_m
        fast_res = fast_CRF.viterbi_decode(bio_logits, mask, unpad=True)
        # score equal
        self.assertListEqual(bio_scores, [round(s, 4) for s in fast_res[1].tolist()])
        # seq equal
        self.assertListEqual(bio_path, fast_res[0])

        labels = []
        for label in ['X', 'Y']:
            for tag in 'BMES':
                labels.append('{}-{}'.format(tag, label))
        id2label = {idx: label for idx, label in enumerate(labels)}
        num_tags = len(id2label)

        mask = seq_len_to_mask(bmes_seq_lens)

        from fastNLP.modules.decoder.crf import ConditionalRandomField, allowed_transitions
        fast_CRF = ConditionalRandomField(num_tags=num_tags, allowed_transitions=allowed_transitions(id2label,
                                                                                                     encoding_type='BMES',
                                                                                                     include_start_end=True))
        fast_CRF.trans_m.data = bmes_trans_m
        fast_res = fast_CRF.viterbi_decode(bmes_logits, mask, unpad=True)
        # score equal
        self.assertListEqual(bmes_scores, [round(s, 4) for s in fast_res[1].tolist()])
        # seq equal
        self.assertListEqual(bmes_path, fast_res[0])
Beispiel #2
0
    def test_masking(self):
        # 测试crf的pad masking正常运行
        import torch
        from fastNLP.modules.decoder.crf import ConditionalRandomField
        max_len = 5
        n_tags = 5
        pad_len = 5

        torch.manual_seed(4)
        logit = torch.rand(1, max_len+pad_len, n_tags)
        # logit[0, -1, :] = 0.0
        mask = torch.ones(1, max_len+pad_len)
        mask[0,-pad_len] = 0
        model = ConditionalRandomField(n_tags)
        pred, score = model.viterbi_decode(logit[:,:-pad_len], mask[:,:-pad_len])
        mask_pred, mask_score = model.viterbi_decode(logit, mask)
        self.assertEqual(pred[0].tolist(), mask_pred[0,:-pad_len].tolist())
Beispiel #3
0
class CWSModel(nn.Module):
    def __init__(self,
                 encoder,
                 src_embed,
                 position,
                 d_model,
                 tag_size,
                 crf=None):
        super(CWSModel, self).__init__()
        self.encoder = encoder
        self.src_embed = src_embed
        self.pos = copy.deepcopy(position)
        self.proj = nn.Linear(d_model, tag_size)
        self.tag_size = tag_size
        if crf is None:
            self.crf = None
            self.loss_f = nn.CrossEntropyLoss(size_average=False)
        else:
            print("crf")
            trans = fastNLP.modules.decoder.crf.allowed_transitions(
                crf, encoding_type='bmes')
            self.crf = ConditionalRandomField(tag_size,
                                              allowed_transitions=trans)
        #self.norm=nn.LayerNorm(d_model)

    def forward(self, task, uni, seq_len, bi1=None, bi2=None, tags=None):
        #mask=fastNLP.core.utils.seq_len_to_mask(seq_len)
        mask = seq_len_to_mask(seq_len, uni.size(1))
        out = self.src_embed(task, uni, bi1, bi2)
        out = self.pos(out)
        #out=self.norm(out)
        #print(uni.size(),out.size(),mask.size(),seq_len)
        out = self.proj(self.encoder(out, mask.float()))

        if self.crf is not None:
            if tags is not None:
                out = self.crf(out, tags, mask)
                return {"loss": out}
            else:
                out, _ = self.crf.viterbi_decode(out, mask)
                return {"pred": out}
        else:
            if tags is not None:
                num = out.size(0)
                loss = self.loss_f(
                    torch.masked_select(
                        out,
                        mask.unsqueeze(-1).expand_as(out)).contiguous().view(
                            -1, self.tag_size),
                    torch.masked_select(tags, mask))
                return {"loss": loss / num}
            else:
                out = torch.argmax(out, dim=-1)
                return {"pred": out}
Beispiel #4
0
class CWSModel(nn.Module):
    def __init__(self, encoder, src_embed, position, d_model, tag_size, crf=None):
        super(CWSModel, self).__init__()
        self.encoder = encoder
        self.src_embed = src_embed
        self.pos = copy.deepcopy(position)
        self.proj = nn.Linear(d_model, tag_size)
        self.tag_size = tag_size
        if crf is None:
            self.crf = None
            self.loss_f = nn.CrossEntropyLoss(reduction="mean", ignore_index=-100)
        else:
            print("crf")
            trans = fastNLP.modules.decoder.crf.allowed_transitions(
                crf, encoding_type="bmes"
            )
            self.crf = ConditionalRandomField(tag_size, allowed_transitions=trans)
        # self.norm=nn.LayerNorm(d_model)

    def forward(self, task, uni, seq_len, bi1=None, bi2=None, tags=None):
        # mask=fastNLP.core.utils.seq_len_to_mask(seq_len,uni.size(1)) # for dev 0.5.1
        mask = seq_len_to_mask(seq_len, uni.size(1))
        out = self.src_embed(task, uni, bi1, bi2)
        out = self.pos(out)
        # out=self.norm(out)
        out = self.proj(self.encoder(out, mask.float()))

        if self.crf is not None:
            if tags is not None:
                out = self.crf(out, tags, mask)
                return {"loss": out}
            else:
                out, _ = self.crf.viterbi_decode(out, mask)
                return {"pred": out}
        else:
            if tags is not None:
                out = out.contiguous().view(-1, self.tag_size)
                tags = tags.data.masked_fill_(mask.eq(False), -100).view(-1)
                loss = self.loss_f(out, tags)
                return {"loss": loss}
            else:
                out = torch.argmax(out, dim=-1)
                return {"pred": out}
Beispiel #5
0
class TransformerCWS(nn.Module):
    def __init__(self,
                 vocab_num,
                 embed_dim=100,
                 bigram_vocab_num=None,
                 bigram_embed_dim=100,
                 num_bigram_per_char=None,
                 hidden_size=200,
                 embed_drop_p=0.3,
                 num_layers=1,
                 num_heads=8,
                 tag_size=4):
        super().__init__()

        self.embedding = nn.Embedding(vocab_num, embed_dim)
        input_size = embed_dim
        if bigram_vocab_num:
            self.bigram_embedding = nn.Embedding(bigram_vocab_num,
                                                 bigram_embed_dim)
            input_size += num_bigram_per_char * bigram_embed_dim

        self.drop = nn.Dropout(embed_drop_p, inplace=True)

        self.fc1 = nn.Linear(input_size, hidden_size)

        # value_size = hidden_size//num_heads
        # self.transformer = TransformerEncoder(num_layers, model_size=hidden_size, inner_size=hidden_size,
        #                                       key_size=value_size,
        #                                       value_size=value_size, num_head=num_heads)
        self.transformer = TransformerEncoder(num_layers=num_layers,
                                              model_size=hidden_size,
                                              num_heads=num_heads,
                                              hidden_size=hidden_size)
        self.fc2 = nn.Linear(hidden_size, tag_size)

        allowed_trans = allowed_transitions({
            0: 'b',
            1: 'm',
            2: 'e',
            3: 's'
        },
                                            encoding_type='bmes')
        self.crf = ConditionalRandomField(num_tags=tag_size,
                                          include_start_end_trans=False,
                                          allowed_transitions=allowed_trans)

    def forward(self, chars, target, seq_lens, bigrams=None):
        masks = seq_len_to_byte_mask(seq_lens)
        x = self.embedding(chars)
        batch_size = x.size(0)
        length = x.size(1)
        if hasattr(self, 'bigram_embedding'):
            bigrams = self.bigram_embedding(
                bigrams)  # batch_size x seq_lens x per_char x embed_size
            x = torch.cat([x, bigrams.view(batch_size, length, -1)], dim=-1)
        self.drop(x)
        x = self.fc1(x)
        feats = self.transformer(x, masks)
        feats = self.fc2(feats)
        losses = self.crf(feats, target, masks.float())

        pred_dict = {}
        pred_dict['seq_lens'] = seq_lens
        pred_dict['loss'] = torch.mean(losses)

        return pred_dict

    def predict(self, chars, seq_lens, bigrams=None):
        masks = seq_len_to_byte_mask(seq_lens)

        x = self.embedding(chars)
        batch_size = x.size(0)
        length = x.size(1)
        if hasattr(self, 'bigram_embedding'):
            bigrams = self.bigram_embedding(
                bigrams)  # batch_size x seq_lens x per_char x embed_size
            x = torch.cat([x, bigrams.view(batch_size, length, -1)], dim=-1)
        self.drop(x)
        x = self.fc1(x)
        feats = self.transformer(x, masks)
        feats = self.fc2(feats)

        probs = self.crf.viterbi_decode(feats, masks, get_score=False)

        return {'pred': probs, 'seq_lens': seq_lens}
Beispiel #6
0
class CWSBiLSTMCRF(BaseModel):
    def __init__(self,
                 vocab_num,
                 embed_dim=100,
                 bigram_vocab_num=None,
                 bigram_embed_dim=100,
                 num_bigram_per_char=None,
                 hidden_size=200,
                 bidirectional=True,
                 embed_drop_p=0.2,
                 num_layers=1,
                 tag_size=4):
        """
        默认使用BMES的标注方式
        :param vocab_num:
        :param embed_dim:
        :param bigram_vocab_num:
        :param bigram_embed_dim:
        :param num_bigram_per_char:
        :param hidden_size:
        :param bidirectional:
        :param embed_drop_p:
        :param num_layers:
        :param tag_size:
        """
        super(CWSBiLSTMCRF, self).__init__()

        self.tag_size = tag_size

        self.encoder_model = CWSBiLSTMEncoder(vocab_num, embed_dim,
                                              bigram_vocab_num,
                                              bigram_embed_dim,
                                              num_bigram_per_char, hidden_size,
                                              bidirectional, embed_drop_p,
                                              num_layers)

        size_layer = [hidden_size, 200, tag_size]
        self.decoder_model = MLP(size_layer)
        allowed_trans = allowed_transitions({
            0: 'b',
            1: 'm',
            2: 'e',
            3: 's'
        },
                                            encoding_type='bmes')
        self.crf = ConditionalRandomField(num_tags=tag_size,
                                          include_start_end_trans=False,
                                          allowed_transitions=allowed_trans)

    def forward(self, chars, target, seq_lens, bigrams=None):
        device = self.parameters().__next__().device
        chars = chars.to(device).long()
        if not bigrams is None:
            bigrams = bigrams.to(device).long()
        else:
            bigrams = None
        seq_lens = seq_lens.to(device).long()
        masks = seq_lens_to_mask(seq_lens)
        feats = self.encoder_model(chars, bigrams, seq_lens)
        feats = self.decoder_model(feats)
        losses = self.crf(feats, target, masks)

        pred_dict = {}
        pred_dict['seq_lens'] = seq_lens
        pred_dict['loss'] = torch.mean(losses)

        return pred_dict

    def predict(self, chars, seq_lens, bigrams=None):
        device = self.parameters().__next__().device
        chars = chars.to(device).long()
        if not bigrams is None:
            bigrams = bigrams.to(device).long()
        else:
            bigrams = None
        seq_lens = seq_lens.to(device).long()
        masks = seq_lens_to_mask(seq_lens)
        feats = self.encoder_model(chars, bigrams, seq_lens)
        feats = self.decoder_model(feats)
        paths, _ = self.crf.viterbi_decode(feats, masks)

        return {'pred': paths, 'seq_lens': seq_lens}