Esempio n. 1
0
    def test_case3(self):
        # 测试crf的loss不会出现负数
        import torch
        from fastNLP.modules.decoder.crf import ConditionalRandomField
        from fastNLP.core.utils import seq_len_to_mask
        from torch import optim
        from torch import nn

        num_tags, include_start_end_trans = 4, True
        num_samples = 4
        lengths = torch.randint(3, 50, size=(num_samples, )).long()
        max_len = lengths.max()
        tags = torch.randint(num_tags, size=(num_samples, max_len))
        masks = seq_len_to_mask(lengths)
        feats = nn.Parameter(torch.randn(num_samples, max_len, num_tags))
        crf = ConditionalRandomField(num_tags, include_start_end_trans)
        optimizer = optim.SGD(
            [param
             for param in crf.parameters() if param.requires_grad] + [feats],
            lr=0.1)
        for _ in range(10):
            loss = crf(feats, tags, masks).mean()
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            if _ % 1000 == 0:
                print(loss)
            self.assertGreater(loss.item(), 0,
                               "CRF loss cannot be less than 0.")
Esempio n. 2
0
    def __init__(self, vocab_num, embed_dim=100, bigram_vocab_num=None, bigram_embed_dim=100, num_bigram_per_char=None,
                 hidden_size=200, bidirectional=True, embed_drop_p=0.2, num_layers=1, tag_size=4):
        """
        默认使用BMES的标注方式
        :param vocab_num:
        :param embed_dim:
        :param bigram_vocab_num:
        :param bigram_embed_dim:
        :param num_bigram_per_char:
        :param hidden_size:
        :param bidirectional:
        :param embed_drop_p:
        :param num_layers:
        :param tag_size:
        """
        super(CWSBiLSTMCRF, self).__init__()

        self.tag_size = tag_size

        self.encoder_model = CWSBiLSTMEncoder(vocab_num, embed_dim, bigram_vocab_num, bigram_embed_dim, num_bigram_per_char,
                 hidden_size, bidirectional, embed_drop_p, num_layers)

        size_layer = [hidden_size, 200, tag_size]
        self.decoder_model = MLP(size_layer)
        allowed_trans = allowed_transitions({0:'b', 1:'m', 2:'e', 3:'s'}, encoding_type='bmes')
        self.crf = ConditionalRandomField(num_tags=tag_size, include_start_end_trans=False,
                                          allowed_transitions=allowed_trans)
Esempio n. 3
0
    def __init__(self,
                 vocab_num,
                 embed_dim=100,
                 bigram_vocab_num=None,
                 bigram_embed_dim=100,
                 num_bigram_per_char=None,
                 embed_drop_p=0.3,
                 hidden_size=200,
                 kernel_size=3,
                 dilate='none',
                 num_layers=1,
                 num_heads=8,
                 tag_size=4,
                 relative_pos_embed_dim=0):
        super().__init__()

        self.embedding = nn.Embedding(vocab_num, embed_dim)
        input_size = embed_dim
        if bigram_vocab_num:
            self.bigram_embedding = nn.Embedding(bigram_vocab_num,
                                                 bigram_embed_dim)
            input_size += num_bigram_per_char * bigram_embed_dim

        self.drop = nn.Dropout(embed_drop_p, inplace=True)

        self.fc1 = nn.Linear(input_size, hidden_size)

        # value_size = hidden_size//num_heads
        # self.transformer = TransformerEncoder(num_layers, model_size=hidden_size, inner_size=hidden_size,
        #                                       key_size=value_size,
        #                                       value_size=value_size, num_head=num_heads)
        self.transformer = TransformerDilateEncoder(
            num_layers=num_layers,
            model_size=hidden_size,
            num_heads=num_heads,
            hidden_size=hidden_size,
            kernel_size=kernel_size,
            dilate=dilate,
            relative_pos_embed_dim=relative_pos_embed_dim)
        self.fc2 = nn.Linear(hidden_size, tag_size)

        allowed_trans = allowed_transitions({
            0: 'b',
            1: 'm',
            2: 'e',
            3: 's'
        },
                                            encoding_type='bmes')
        self.crf = ConditionalRandomField(num_tags=tag_size,
                                          include_start_end_trans=False,
                                          allowed_transitions=allowed_trans)
Esempio n. 4
0
 def __init__(self, encoder, src_embed, position, d_model, tag_size, crf=None):
     super(CWSModel, self).__init__()
     self.encoder = encoder
     self.src_embed = src_embed
     self.pos = copy.deepcopy(position)
     self.proj = nn.Linear(d_model, tag_size)
     self.tag_size = tag_size
     if crf is None:
         self.crf = None
         self.loss_f = nn.CrossEntropyLoss(reduction="mean", ignore_index=-100)
     else:
         print("crf")
         trans = fastNLP.modules.decoder.crf.allowed_transitions(
             crf, encoding_type="bmes"
         )
         self.crf = ConditionalRandomField(tag_size, allowed_transitions=trans)
Esempio n. 5
0
    def test_masking(self):
        # 测试crf的pad masking正常运行
        import torch
        from fastNLP.modules.decoder.crf import ConditionalRandomField
        max_len = 5
        n_tags = 5
        pad_len = 5

        torch.manual_seed(4)
        logit = torch.rand(1, max_len+pad_len, n_tags)
        # logit[0, -1, :] = 0.0
        mask = torch.ones(1, max_len+pad_len)
        mask[0,-pad_len] = 0
        model = ConditionalRandomField(n_tags)
        pred, score = model.viterbi_decode(logit[:,:-pad_len], mask[:,:-pad_len])
        mask_pred, mask_score = model.viterbi_decode(logit, mask)
        self.assertEqual(pred[0].tolist(), mask_pred[0,:-pad_len].tolist())
Esempio n. 6
0
class CWSModel(nn.Module):
    def __init__(self,
                 encoder,
                 src_embed,
                 position,
                 d_model,
                 tag_size,
                 crf=None):
        super(CWSModel, self).__init__()
        self.encoder = encoder
        self.src_embed = src_embed
        self.pos = copy.deepcopy(position)
        self.proj = nn.Linear(d_model, tag_size)
        self.tag_size = tag_size
        if crf is None:
            self.crf = None
            self.loss_f = nn.CrossEntropyLoss(size_average=False)
        else:
            print("crf")
            trans = fastNLP.modules.decoder.crf.allowed_transitions(
                crf, encoding_type='bmes')
            self.crf = ConditionalRandomField(tag_size,
                                              allowed_transitions=trans)
        #self.norm=nn.LayerNorm(d_model)

    def forward(self, task, uni, seq_len, bi1=None, bi2=None, tags=None):
        #mask=fastNLP.core.utils.seq_len_to_mask(seq_len)
        mask = seq_len_to_mask(seq_len, uni.size(1))
        out = self.src_embed(task, uni, bi1, bi2)
        out = self.pos(out)
        #out=self.norm(out)
        #print(uni.size(),out.size(),mask.size(),seq_len)
        out = self.proj(self.encoder(out, mask.float()))

        if self.crf is not None:
            if tags is not None:
                out = self.crf(out, tags, mask)
                return {"loss": out}
            else:
                out, _ = self.crf.viterbi_decode(out, mask)
                return {"pred": out}
        else:
            if tags is not None:
                num = out.size(0)
                loss = self.loss_f(
                    torch.masked_select(
                        out,
                        mask.unsqueeze(-1).expand_as(out)).contiguous().view(
                            -1, self.tag_size),
                    torch.masked_select(tags, mask))
                return {"loss": loss / num}
            else:
                out = torch.argmax(out, dim=-1)
                return {"pred": out}
Esempio n. 7
0
    def test_case2(self):
        # 测试CRF是否正常work。
        import json
        import torch
        from fastNLP import seq_len_to_mask

        with open('tests/data_for_tests/modules/decoder/crf.json', 'r') as f:
            data = json.load(f)

        bio_logits = torch.FloatTensor(data['bio_logits'])
        bio_scores = data['bio_scores']
        bio_path = data['bio_path']
        bio_trans_m = torch.FloatTensor(data['bio_trans_m'])
        bio_seq_lens = torch.LongTensor(data['bio_seq_lens'])

        bmes_logits = torch.FloatTensor(data['bmes_logits'])
        bmes_scores = data['bmes_scores']
        bmes_path = data['bmes_path']
        bmes_trans_m = torch.FloatTensor(data['bmes_trans_m'])
        bmes_seq_lens = torch.LongTensor(data['bmes_seq_lens'])

        labels = ['O']
        for label in ['X', 'Y']:
            for tag in 'BI':
                labels.append('{}-{}'.format(tag, label))
        id2label = {idx: label for idx, label in enumerate(labels)}
        num_tags = len(id2label)

        mask = seq_len_to_mask(bio_seq_lens)

        from fastNLP.modules.decoder.crf import ConditionalRandomField, allowed_transitions
        fast_CRF = ConditionalRandomField(num_tags=num_tags, allowed_transitions=allowed_transitions(id2label,
                                                                                                     include_start_end=True))
        fast_CRF.trans_m.data = bio_trans_m
        fast_res = fast_CRF.viterbi_decode(bio_logits, mask, unpad=True)
        # score equal
        self.assertListEqual(bio_scores, [round(s, 4) for s in fast_res[1].tolist()])
        # seq equal
        self.assertListEqual(bio_path, fast_res[0])

        labels = []
        for label in ['X', 'Y']:
            for tag in 'BMES':
                labels.append('{}-{}'.format(tag, label))
        id2label = {idx: label for idx, label in enumerate(labels)}
        num_tags = len(id2label)

        mask = seq_len_to_mask(bmes_seq_lens)

        from fastNLP.modules.decoder.crf import ConditionalRandomField, allowed_transitions
        fast_CRF = ConditionalRandomField(num_tags=num_tags, allowed_transitions=allowed_transitions(id2label,
                                                                                                     encoding_type='BMES',
                                                                                                     include_start_end=True))
        fast_CRF.trans_m.data = bmes_trans_m
        fast_res = fast_CRF.viterbi_decode(bmes_logits, mask, unpad=True)
        # score equal
        self.assertListEqual(bmes_scores, [round(s, 4) for s in fast_res[1].tolist()])
        # seq equal
        self.assertListEqual(bmes_path, fast_res[0])
Esempio n. 8
0
class CWSModel(nn.Module):
    def __init__(self, encoder, src_embed, position, d_model, tag_size, crf=None):
        super(CWSModel, self).__init__()
        self.encoder = encoder
        self.src_embed = src_embed
        self.pos = copy.deepcopy(position)
        self.proj = nn.Linear(d_model, tag_size)
        self.tag_size = tag_size
        if crf is None:
            self.crf = None
            self.loss_f = nn.CrossEntropyLoss(reduction="mean", ignore_index=-100)
        else:
            print("crf")
            trans = fastNLP.modules.decoder.crf.allowed_transitions(
                crf, encoding_type="bmes"
            )
            self.crf = ConditionalRandomField(tag_size, allowed_transitions=trans)
        # self.norm=nn.LayerNorm(d_model)

    def forward(self, task, uni, seq_len, bi1=None, bi2=None, tags=None):
        # mask=fastNLP.core.utils.seq_len_to_mask(seq_len,uni.size(1)) # for dev 0.5.1
        mask = seq_len_to_mask(seq_len, uni.size(1))
        out = self.src_embed(task, uni, bi1, bi2)
        out = self.pos(out)
        # out=self.norm(out)
        out = self.proj(self.encoder(out, mask.float()))

        if self.crf is not None:
            if tags is not None:
                out = self.crf(out, tags, mask)
                return {"loss": out}
            else:
                out, _ = self.crf.viterbi_decode(out, mask)
                return {"pred": out}
        else:
            if tags is not None:
                out = out.contiguous().view(-1, self.tag_size)
                tags = tags.data.masked_fill_(mask.eq(False), -100).view(-1)
                loss = self.loss_f(out, tags)
                return {"loss": loss}
            else:
                out = torch.argmax(out, dim=-1)
                return {"pred": out}
Esempio n. 9
0
class TransformerCWS(nn.Module):
    def __init__(self,
                 vocab_num,
                 embed_dim=100,
                 bigram_vocab_num=None,
                 bigram_embed_dim=100,
                 num_bigram_per_char=None,
                 hidden_size=200,
                 embed_drop_p=0.3,
                 num_layers=1,
                 num_heads=8,
                 tag_size=4):
        super().__init__()

        self.embedding = nn.Embedding(vocab_num, embed_dim)
        input_size = embed_dim
        if bigram_vocab_num:
            self.bigram_embedding = nn.Embedding(bigram_vocab_num,
                                                 bigram_embed_dim)
            input_size += num_bigram_per_char * bigram_embed_dim

        self.drop = nn.Dropout(embed_drop_p, inplace=True)

        self.fc1 = nn.Linear(input_size, hidden_size)

        # value_size = hidden_size//num_heads
        # self.transformer = TransformerEncoder(num_layers, model_size=hidden_size, inner_size=hidden_size,
        #                                       key_size=value_size,
        #                                       value_size=value_size, num_head=num_heads)
        self.transformer = TransformerEncoder(num_layers=num_layers,
                                              model_size=hidden_size,
                                              num_heads=num_heads,
                                              hidden_size=hidden_size)
        self.fc2 = nn.Linear(hidden_size, tag_size)

        allowed_trans = allowed_transitions({
            0: 'b',
            1: 'm',
            2: 'e',
            3: 's'
        },
                                            encoding_type='bmes')
        self.crf = ConditionalRandomField(num_tags=tag_size,
                                          include_start_end_trans=False,
                                          allowed_transitions=allowed_trans)

    def forward(self, chars, target, seq_lens, bigrams=None):
        masks = seq_len_to_byte_mask(seq_lens)
        x = self.embedding(chars)
        batch_size = x.size(0)
        length = x.size(1)
        if hasattr(self, 'bigram_embedding'):
            bigrams = self.bigram_embedding(
                bigrams)  # batch_size x seq_lens x per_char x embed_size
            x = torch.cat([x, bigrams.view(batch_size, length, -1)], dim=-1)
        self.drop(x)
        x = self.fc1(x)
        feats = self.transformer(x, masks)
        feats = self.fc2(feats)
        losses = self.crf(feats, target, masks.float())

        pred_dict = {}
        pred_dict['seq_lens'] = seq_lens
        pred_dict['loss'] = torch.mean(losses)

        return pred_dict

    def predict(self, chars, seq_lens, bigrams=None):
        masks = seq_len_to_byte_mask(seq_lens)

        x = self.embedding(chars)
        batch_size = x.size(0)
        length = x.size(1)
        if hasattr(self, 'bigram_embedding'):
            bigrams = self.bigram_embedding(
                bigrams)  # batch_size x seq_lens x per_char x embed_size
            x = torch.cat([x, bigrams.view(batch_size, length, -1)], dim=-1)
        self.drop(x)
        x = self.fc1(x)
        feats = self.transformer(x, masks)
        feats = self.fc2(feats)

        probs = self.crf.viterbi_decode(feats, masks, get_score=False)

        return {'pred': probs, 'seq_lens': seq_lens}
Esempio n. 10
0
class CWSBiLSTMCRF(BaseModel):
    def __init__(self,
                 vocab_num,
                 embed_dim=100,
                 bigram_vocab_num=None,
                 bigram_embed_dim=100,
                 num_bigram_per_char=None,
                 hidden_size=200,
                 bidirectional=True,
                 embed_drop_p=0.2,
                 num_layers=1,
                 tag_size=4):
        """
        默认使用BMES的标注方式
        :param vocab_num:
        :param embed_dim:
        :param bigram_vocab_num:
        :param bigram_embed_dim:
        :param num_bigram_per_char:
        :param hidden_size:
        :param bidirectional:
        :param embed_drop_p:
        :param num_layers:
        :param tag_size:
        """
        super(CWSBiLSTMCRF, self).__init__()

        self.tag_size = tag_size

        self.encoder_model = CWSBiLSTMEncoder(vocab_num, embed_dim,
                                              bigram_vocab_num,
                                              bigram_embed_dim,
                                              num_bigram_per_char, hidden_size,
                                              bidirectional, embed_drop_p,
                                              num_layers)

        size_layer = [hidden_size, 200, tag_size]
        self.decoder_model = MLP(size_layer)
        allowed_trans = allowed_transitions({
            0: 'b',
            1: 'm',
            2: 'e',
            3: 's'
        },
                                            encoding_type='bmes')
        self.crf = ConditionalRandomField(num_tags=tag_size,
                                          include_start_end_trans=False,
                                          allowed_transitions=allowed_trans)

    def forward(self, chars, target, seq_lens, bigrams=None):
        device = self.parameters().__next__().device
        chars = chars.to(device).long()
        if not bigrams is None:
            bigrams = bigrams.to(device).long()
        else:
            bigrams = None
        seq_lens = seq_lens.to(device).long()
        masks = seq_lens_to_mask(seq_lens)
        feats = self.encoder_model(chars, bigrams, seq_lens)
        feats = self.decoder_model(feats)
        losses = self.crf(feats, target, masks)

        pred_dict = {}
        pred_dict['seq_lens'] = seq_lens
        pred_dict['loss'] = torch.mean(losses)

        return pred_dict

    def predict(self, chars, seq_lens, bigrams=None):
        device = self.parameters().__next__().device
        chars = chars.to(device).long()
        if not bigrams is None:
            bigrams = bigrams.to(device).long()
        else:
            bigrams = None
        seq_lens = seq_lens.to(device).long()
        masks = seq_lens_to_mask(seq_lens)
        feats = self.encoder_model(chars, bigrams, seq_lens)
        feats = self.decoder_model(feats)
        paths, _ = self.crf.viterbi_decode(feats, masks)

        return {'pred': paths, 'seq_lens': seq_lens}