Beispiel #1
0
    def __init__(self, args):
        super().__init__()
        self.args = args
        self.hidden_dim = args.hidden_dim
        self.tag_num = args.tag_num
        self.batch_size = args.batch_size
        self.bidirectional = True
        self.num_layers = args.num_layers
        self.pad_index = args.pad_index
        self.dropout = args.dropout

        vocabulary_size = args.vocabulary_size
        embedding_dimension = args.embedding_dim

        self.embedding = nn.Embedding(vocabulary_size,
                                      embedding_dimension,
                                      padding_idx=pad_idx).to(device)

        self.lstm = nn.LSTM(embedding_dimension,
                            self.hidden_dim // 2,
                            bidirectional=self.bidirectional,
                            num_layers=self.num_layers,
                            dropout=self.dropout).to(device)

        self.hidden2label = nn.Linear(self.hidden_dim, self.tag_num).to(device)

        self.crflayer = CRF(self.tag_num).to(device)
        self.dropoutlayer = nn.Dropout(self.dropout)
Beispiel #2
0
    def __init__(self,
                 num_labels: int,
                 rnn_hidden_size: int,
                 word_emb_dim: int,
                 char_emb_dim: int,
                 pos_emb_dim: int,
                 dropout_rate: float = 0.5):
        """

        Args:
            num_labels (int): [description]
            rnn_hidden_size (int): [description]
            word_emb_dim (int): [description]
            char_emb_dim (int): [description]
            pos_emb_dim (int): [description]
            dropout_rate (float, optional): [description]. Defaults to 0.5.
        """

        super().__init__()
        input_dim = word_emb_dim + char_emb_dim + pos_emb_dim * 2
        self.num_labels = num_labels
        # bilstm output -> next bilstm input. So, hidden_size == input_size
        self.bilstm = nn.LSTM(input_size=input_dim,
                              hidden_size=rnn_hidden_size // 2,
                              num_layers=1,
                              bias=True,
                              batch_first=True,
                              bidirectional=True)
        self.linear = nn.Linear(rnn_hidden_size, num_labels)
        self.dropout_layer = nn.Dropout(p=dropout_rate)
        self.crf = CRF(num_labels, 0)
        self.device = torch.device(
            'cuda' if torch.cuda.is_available() else 'cpu')
Beispiel #3
0
    def __init__(self,
                 vocab_size,
                 tag_to_ix,
                 embedding_dim,
                 hidden_dim,
                 embedding_mat=None):
        super(BiLSTM_CRF, self).__init__()
        self.embedding_dim = embedding_dim
        self.hidden_dim = hidden_dim
        self.vocab_size = vocab_size
        self.tag_to_ix = tag_to_ix
        self.tagset_size = len(tag_to_ix) - 2  # 减去start + stop

        # 注:Embedding可以进行更新
        self.word_embeds = nn.Embedding(vocab_size, embedding_dim)
        if embedding_mat is not None:
            # print(embedding_mat)
            self.word_embeds.load_state_dict(
                {'weight': torch.tensor(embedding_mat)})

        # num_layers —— 叠在一起的lstm层数
        # hidden_size —— 隐状态的维度(注:可以与x维度不同!)
        # LSTM —— 隐状态维度 视为C_0 与 H_0相加 又因为这两个相等 所以除以二
        self.lstm = nn.LSTM(embedding_dim,
                            hidden_dim // 2,
                            batch_first=True,
                            num_layers=1,
                            bidirectional=True)

        # Maps the output of the LSTM into tag space.
        # 将隐状态映射到targetSize 仅线性加权
        self.hidden2tag = nn.Linear(hidden_dim, self.tagset_size)

        # CRF的实现自动添加start 与 end
        self.crf = CRF(len(tag_to_ix) - 2, use_gpu=False)
Beispiel #4
0
    def __init__(self, args):
        super(BiLstmCrf, self).__init__(args)
        self.args = args
        self.hidden_dim = 300
        self.tag_num = args.tag_num
        self.batch_size = args.batch_size
        self.bidirectional = True
        self.num_layers = args.num_layers
        self.pad_index = args.pad_index
        self.dropout = args.dropout
        self.save_path = args.save_path

        vocabulary_size = args.vocabulary_size
        embedding_dimension = args.embedding_dim

        self.embedding = nn.Embedding(vocabulary_size,
                                      embedding_dimension).to(DEVICE)
        if args.static:
            logger.info('logging word vectors from {}/{}'.format(
                args.pretrained_path, args.pretrained_name))
            vectors = Vectors(args.pretrained_name,
                              args.pretrained_path).vectors
            self.embedding = self.embedding.from_pretrained(
                vectors, freeze=not args.non_static).to(DEVICE)

        self.lstm = nn.LSTM(embedding_dimension,
                            self.hidden_dim // 2,
                            bidirectional=self.bidirectional,
                            num_layers=self.num_layers,
                            dropout=self.dropout).to(DEVICE)
        self.hidden2label = nn.Linear(self.hidden_dim, self.tag_num).to(DEVICE)
        self.crflayer = CRF(self.tag_num).to(DEVICE)
Beispiel #5
0
    def __init__(self, params):
        """
        We define an recurrent network that predicts the NER tags for each token in the sentence. The components
        required are:

        - an embedding layer: this layer maps each index in range(params.vocab_size) to a params.embedding_dim vector
        - lstm: applying the LSTM on the sequential input returns an output for each token in the sentence
        - fc: a fully connected layer that converts the LSTM output for each token to a distribution over NER tags

        Args:
            params: (Params) contains vocab_size, embedding_dim, lstm_hidden_dim
        """
        super(Netcppos, self).__init__()

        # the embedding takes as input the vocab_size and the embedding_dim
        self.embedding = nn.Embedding(params.vocab_size, params.embedding_dim)

        # the LSTM takes as input the size of its input (embedding_dim), its hidden size
        # for more details on how to use it, check out the documentation
        self.lstm = nn.LSTM(params.embedding_dim,
                            params.lstm_hidden_dim,
                            batch_first=True)
        #self.lstm = nn.LSTM(params.embedding_dim+params.pos_dim, params.lstm_hidden_dim, batch_first=True)

        # the fully connected layer transforms the output to give the final output layer
        self.fc1 = nn.Linear(params.lstm_hidden_dim, params.pos_dim)

        self.fc = nn.Linear(params.pos_dim + params.pos_dim,
                            params.number_of_tags)

        #self.crf_model = CRF(9, batch_first=False) #num_tags=9
        self.crf_model = CRF(9)
Beispiel #6
0
class Model(nn.Module):
    def __init__(self, config):
        super(Model, self).__init__()

        self.embedding_dim = config.embedding_dim
        self.hidden_dim = config.hidden_dim
        self.vocab_size = config.vocab_size
        self.num_tags = config.num_tags

        self.embeds = nn.Embedding(self.vocab_size, self.embedding_dim)
        self.lstm = nn.LSTM(
            self.embedding_dim,
            self.hidden_dim // 2,
            num_layers=1,
            bidirectional=True,
            batch_first=True,
        )
        self.dropout = nn.Dropout(config.dropout)
        self.linear = nn.Linear(self.hidden_dim, self.num_tags)
        self.crf = CRF(self.num_tags)

    def forward(self, x, mask):
        embeddings = self.embeds(x)
        feats, hidden = self.lstm(embeddings)
        emissions = self.linear(self.dropout(feats))
        outputs = self.crf.viterbi_decode(emissions, mask)
        return outputs

    def log_likelihood(self, x, labels, mask):
        embeddings = self.embeds(x)
        feats, hidden = self.lstm(embeddings)
        emissions = self.linear(self.dropout(feats))
        loss = -self.crf.forward(emissions, labels, mask)
        return torch.sum(loss)
Beispiel #7
0
    def __init__(
        self,
        tag_to_idx: Dict,
        embeddings_dim: int = 300,
        hidden_dim: int = 256,
        num_lstm_layers: int = 2,
        spatial_dropout: float = 0.2,
        **kwargs: Dict
    ):
        super().__init__()
        self.embedding_dim = embeddings_dim
        self.hidden_dim = hidden_dim
        self.num_lstm_layers = num_lstm_layers
        self.tag_to_idx = tag_to_idx
        self.tagset_size = len(tag_to_idx.values())

        self.crf = CRF(self.tagset_size, batch_first=True)

        self.embedding_dropout = SpatialDropout(spatial_dropout)

        self.lstm = nn.LSTM(embeddings_dim, hidden_dim // 2, num_layers=self.num_lstm_layers,
                            bidirectional=True, batch_first=True)
        # Maps the output of the LSTM into tag space.
        self.hidden2tag = nn.Linear(hidden_dim, hidden_dim // 2)
        self.hidden2tag2 = nn.Linear(hidden_dim // 2, self.tagset_size)
Beispiel #8
0
    def setUp(self):

        self.batch_size = 2
        self.sequence_size = 3
        self.num_labels = 5
        self.crf = CRF(self.num_labels)
        self.mask = torch.FloatTensor([[1, 1, 1], [1, 1, 0]])
        self.labels = torch.LongTensor([[0, 2, 3], [1, 4, 1]])
        self.hidden = torch.autograd.Variable(torch.randn(
            self.batch_size, self.sequence_size, self.num_labels),
                                              requires_grad=True)
Beispiel #9
0
    def __init__(self, num_classes, model_name) -> None:
        super(bertCRF, self).__init__()

        if model_name == "bert-base-cased-crf":
            self.bert = BertModel(BertConfig())
        if model_name == "roberta-base-crf":
            self.bert = RobertaModel(RobertaConfig())

        self.dropout = nn.Dropout(0.1)
        self.position_wise_ff = nn.Linear(768, num_classes)
        self.crf = CRF(num_classes)
Beispiel #10
0
    def setUp(self):

        device = "cuda" if torch.cuda.is_available() else "cpu"
        self.batch_size = 2
        self.sequence_size = 3
        self.num_labels = 5
        self.crf = CRF(self.num_labels)
        self.mask = torch.ByteTensor([[1, 1, 1], [1, 1, 0]]).to(device)
        self.labels = torch.LongTensor([[0, 2, 3], [1, 4, 1]]).to(device)
        self.hidden = torch.autograd.Variable(
            torch.randn(self.batch_size, self.sequence_size, self.num_labels),
            requires_grad=True,
        ).to(device)
    def __init__(self, embed_dim, num_layers, hidden_dim, text_vocab,
                 bio_vocab):
        super(LSTM_CRF_Model, self).__init__()
        self.bio_vocab = bio_vocab
        self.text_vocab = text_vocab
        self.embed_dim = embed_dim
        self.num_layers = num_layers
        self.hidden_dim = hidden_dim
        self.vocab_dim = len(text_vocab)
        self.n_classes = len(bio_vocab)

        self.word_embeddings = Embeddings(self.embed_dim, self.vocab_dim)
        self.dropout = nn.Dropout(args.dropout)
        self.lstm = nn.LSTM(embed_dim, hidden_dim, num_layers=num_layers)
        self.hidden2tag = nn.Linear(hidden_dim, self.n_classes)
        self.crf = CRF(self.n_classes)
Beispiel #12
0
    def __init__(self, num_labels: int, hidden_size: int, dropout_rate: float,
                 wordemb_dim: int, charemb_dim: int):
        """

        :param num_labels: number of label
        :param hidden_size: size of hidden state
        :param dropout_rate: dropout rate (0.0 <= dropout_rate < 1.0)
        :param wordemb_dim: dimension of word embedding
        :param charemb_dim: dimension of character embedding
        """

        super().__init__()
        self.blstm = BLSTM(num_labels, hidden_size, dropout_rate, wordemb_dim,
                           charemb_dim)
        self.crf = CRF(num_labels)
        self = self.cuda() if BLSTM.CUDA else self
Beispiel #13
0
 def test_initialize_score_when_set_padidx(self):
     crf = CRF(self.num_labels, 1)
     self.assertTrue(0.1 > torch.max(crf.trans_matrix))
     self.assertTrue(-10000.0 == torch.min(crf.trans_matrix))
     self.assertTrue(0.1 > torch.max(crf.start_trans))
     self.assertTrue(-10000.0 == torch.min(crf.start_trans))
     self.assertTrue(0.1 > torch.max(crf.end_trans))
     self.assertTrue(-0.1 < torch.min(crf.end_trans))
Beispiel #14
0
    def __init__(self, vocab_size, input_size, hidden_size, num_labels, n_layers, lr, dropout):
        super(BLSTM_CRF, self).__init__()
        self.name = "BLSTM_CRF"
        self.blstm = BLSTM(vocab_size, input_size, hidden_size, num_labels, n_layers, dropout)
        self.crf = CRF(num_labels)

        self.blstm_optimizer = optim.Adam(self.blstm.parameters(), lr=lr, weight_decay=1e-4)
        self.crf_optimizer = optim.Adam(self.crf.parameters(), lr=lr, weight_decay=1e-4)

        self.loss = 0
        self.print_every = 1
        self.max_score = 0.


        if USE_CUDA:
            self.blstm = self.blstm.cuda()
            self.crf = self.crf.cuda()
Beispiel #15
0
    def __init__(self,
                 vocabs,
                 word_dim,
                 pos_dim,
                 hidden_size,
                 rnn_layers,
                 dropout_rate,
                 device,
                 bidirectional=True,
                 use_crf=False,
                 embedding=None):
        super(Model, self).__init__()

        word2id, tag2id, label2id = vocabs

        # word embedding set
        self.word_embeddings = nn.Embedding(len(word2id), word_dim)

        if embedding is not None:
            self.word_embeddings.weight.data.copy_(torch.from_numpy(embedding))

        self.tag_embeddings = nn.Embedding(len(tag2id), pos_dim)

        # lstm set
        self.lstm = nn.LSTM(word_dim + pos_dim,
                            hidden_size,
                            rnn_layers,
                            batch_first=True,
                            bidirectional=bidirectional,
                            dropout=dropout_rate)

        # bidirectional is ouput size * 2
        # no bidirectional is ouput size * 1
        output_size = hidden_size * 2 if bidirectional else hidden_size

        # output size
        self.linear = nn.Linear(output_size, len(label2id))

        self.dropout_rate = dropout_rate

        self.use_crf = use_crf
        if use_crf:
            self.crf = CRF(len(label2id), batch_first=True)

        self.cross_entropy = nn.CrossEntropyLoss(reduction='none')
Beispiel #16
0
    def __init__(self, config):
        super(Model, self).__init__()

        self.embedding_dim = config.embedding_dim
        self.hidden_dim = config.hidden_dim
        self.vocab_size = config.vocab_size
        self.num_tags = config.num_tags

        self.embeds = nn.Embedding(self.vocab_size, self.embedding_dim)
        self.lstm = nn.LSTM(
            self.embedding_dim,
            self.hidden_dim // 2,
            num_layers=1,
            bidirectional=True,
            batch_first=True,
        )
        self.dropout = nn.Dropout(config.dropout)
        self.linear = nn.Linear(self.hidden_dim, self.num_tags)
        self.crf = CRF(self.num_tags)
Beispiel #17
0
    def __init__(self, config):
        super(BERT_CRF, self).__init__()

        self.num_tags = config.num_tags
        self.hidden_size = config.hidden_size

        self.bert_layer = BertModel.from_pretrained(args.model_name)
        self.dropout = nn.Dropout(args.droupout_prob)

        self.hidden_to_tag_layer = nn.Linear(args.hidden_size, args.num_tags)

        self.crf_layer = CRF(args.num_tags)
Beispiel #18
0
class LSTMCRF(CRFLayer):
    def __init__(self, tagset_size, start_tag_idx, stop_tag_idx):
        from ..models.lstm_crf import CRF
        self.crf = CRF(tagset_size, start_tag_idx, stop_tag_idx)

    def get_loss(self, logits, labels, mask=None):
        loss = self.crf.neg_log_likelihood(logits, labels)
        return loss

    def decode(self, logits, mask=None):
        seq_path = model.crf(logits)
        return seq_path
Beispiel #19
0
    def test_initialize_variables(self):

        self.assertEqual(self.crf.num_labels, self.num_labels)
        self.assertEqual(self.crf.trans_matrix.size(),
                         (self.num_labels, self.num_labels))
        self.assertEqual(self.crf.start_trans.size(), (self.num_labels, ))
        self.assertEqual(self.crf.end_trans.size(), (self.num_labels, ))

        num_labels = -1
        with self.assertRaises(ValueError) as er:
            CRF(num_labels)
        exception = er.exception
        self.assertEqual(exception.args[0], 'invalid number of labels: -1')
Beispiel #20
0
    def __init__(self):
        super(RNN2, self).__init__()
        self.rnn1 = nn.GRU(
            input_size=912,
            hidden_size=100,  # rnn hidden unit
            num_layers=2,  # number of rnn layer
            batch_first=
            True,  # input & output will has batch size as 1s dimension. e.g. (batch, time_step, input_size)
            dropout=0.3,
            bidirectional=True)

        self.fc = nn.Linear(200, 5)
        # output = nn.Softmax(fc)
        self.crf = CRF(5)
Beispiel #21
0
    def __init__(self, num_labels: int, dropout_rate: float,
                 word_emb_dim: int, char_emb_dim: int, pos_emb_dim: int,
                 pad_idx: int = 0, other_idx: int = 1):
        """

        Args:
            num_labels (int): [description]
            dropout_rate (float): [description]
            word_emb_dim (int): [description]
            char_emb_dim (int): [description]
            pos_emb_dim (int): [description]
            pad_idx (int, optional): [description]. Defaults to 0.
            other_idx (int, optional): [description]. Defaults to 1.
        """

        super().__init__()
        input_dim = word_emb_dim + char_emb_dim + pos_emb_dim * 2
        self.USE_CHAR = True if char_emb_dim > 0 else False
        self.USE_POS = True if pos_emb_dim > 0 else False
        self.num_labels = num_labels
        # bilstm output -> next bilstm input. So, hidden_size == input_size
        self.bilstm = nn.LSTM(
            input_size=input_dim,
            hidden_size=input_dim // 2,
            num_layers=1,
            bias=True,
            batch_first=True,
            bidirectional=True
        )
        self.linear = nn.Linear(input_dim, num_labels)
        self.dropout_layer = nn.Dropout(p=dropout_rate)
        self.crf = CRF(num_labels, pad_idx)
        self.pad_idx = pad_idx
        self.other_idx = other_idx
        self.device = torch.device(
            'cuda' if torch.cuda.is_available() else 'cpu'
        )
Beispiel #22
0
class BiLstmTagger(nn.Module):
    def __init__(self,
                 embedding,
                 nemb,
                 nhid,
                 nlayers,
                 drop,
                 ntags,
                 batch_first=True):

        super(BiLstmTagger, self).__init__()

        self.embedding = embedding

        self.tagger_rnn = nn.LSTM(input_size=nemb,
                                  hidden_size=nhid,
                                  num_layers=nlayers,
                                  dropout=drop,
                                  bidirectional=True)

        self.projection = nn.Sequential(
            nn.Linear(in_features=nhid * 2, out_features=ntags))

        self.crf_tagger = CRF(ntags)
        self._batch_first = batch_first

    def _rnn_forward(self, x, seq_len):
        packed_sequence = pack_padded_sequence(x,
                                               seq_len,
                                               batch_first=self._batch_first)
        out, _ = self.tagger_rnn(packed_sequence)
        out, lengths = pad_packed_sequence(out, batch_first=self._batch_first)
        projection = self.projection(out)

        return projection

    def forward(self, x, x_word, seq_len, y):
        embed = self.embedding(x, x_word)
        projection = self._rnn_forward(embed, seq_len)
        llikelihood = self.crf_tagger(projection, y)

        return -llikelihood

    def decode(self, x, x_word, seq_len):
        embed = self.embedding(x, x_word)
        projection = self._rnn_forward(embed, seq_len)
        result = self.crf_tagger.decode(projection)

        return result
Beispiel #23
0
    def __init__(self,
                 embedding,
                 nemb,
                 nhid,
                 nlayers,
                 drop,
                 ntags,
                 batch_first=True):

        super(BiLstmTagger, self).__init__()

        self.embedding = embedding

        self.tagger_rnn = nn.LSTM(input_size=nemb,
                                  hidden_size=nhid,
                                  num_layers=nlayers,
                                  dropout=drop,
                                  bidirectional=True)

        self.projection = nn.Sequential(
            nn.Linear(in_features=nhid * 2, out_features=ntags))

        self.crf_tagger = CRF(ntags)
        self._batch_first = batch_first
Beispiel #24
0
class NewCRF(CRFLayer):
    def __init__(self, tagset_size, start_tag_idx, stop_tag_idx):
        from ..models.crf import CRF
        self.crf = CRF(tagset_size, start_tag_idx, stop_tag_idx)

    def get_loss(self, logits, labels, mask=None):
        loss = self.crf(logits, labels)
        # batch_second
        #  loss = self.crf(logits.transpose(0, 1), labels.transpose(0, 1))
        return loss

    def decode(self, logits, mask=None):
        seq_path = self.crf.decode(logits, mask)
        seq_path = seq_path.cpu().numpy().tolist()[0]
        return seq_path
Beispiel #25
0
class PyTorchCRF(CRFLayer):
    def __init__(self, tagset_size, start_tag_idx, stop_tag_idx):
        from torchcrf import CRF
        # https://github.com/kmkurn/pytorch-crf/blob/master/torchcrf/__init__.py
        self.crf = CRF(tagset_size, batch_first=torch.BoolTensor([True]))

    def get_loss(self, logits, labels, mask=None):
        #  # preds = np.argmax(logits.cpu().detach().numpy(), axis=2)
        #  # preds = torch.argmax(logits, axis=2)
        loss = -self.crf(logits, labels)  #, mask)
        return loss

    def decode(self, logits, mask=None):
        seq_path = self.crf.decode(logits)  #, mask.bool())
        return seq_path
Beispiel #26
0
class BertCRFTagger(nn.Module):
    def __init__(self, bert, hidden_size, num_tags, dropout):
        super().__init__()
        self.bert = bert
        self.crf = CRF(num_tags)
        self.fc = nn.Linear(hidden_size, num_tags)
        self.dropout = nn.Dropout(dropout)

    def forward(self, input_ids, mask, tags=None):
        bert_output = self.bert(input_ids)
        last_hidden_state = bert_output['hidden_states'][-1]

        emission = self.fc(last_hidden_state)

        if tags is not None:
            loss = -self.crf(
                torch.log_softmax(emission, dim=2), tags, mask=mask)
            return loss.mean()
        else:
            prediction = self.crf.viterbi_decode(emission, mask=mask)
            return prediction
class DeepPunctuationCRF(nn.Module):
    def __init__(self, pretrained_model, freeze_bert=False, lstm_dim=-1):
        super(DeepPunctuationCRF, self).__init__()
        self.bert_lstm = DeepPunctuation(pretrained_model, freeze_bert,
                                         lstm_dim)
        self.crf = CRF(len(punctuation_dict), batch_first=True)

    def log_likelihood(self, x, attn_masks, y):
        x = self.bert_lstm(x, attn_masks)
        attn_masks = attn_masks.byte()
        return -self.crf(x, y, mask=attn_masks, reduction='token_mean')

    def forward(self, x, attn_masks, y):
        if len(x.shape) == 1:
            x = x.view(1, x.shape[0])  # add dummy batch for single sample
        x = self.bert_lstm(x, attn_masks)
        attn_masks = attn_masks.byte()
        dec_out = self.crf.decode(x, mask=attn_masks)
        y_pred = torch.zeros(y.shape).long().to(y.device)
        for i in range(len(dec_out)):
            y_pred[i, :len(dec_out[i])] = torch.tensor(dec_out[i]).to(y.device)
        return y_pred
Beispiel #28
0
    def __init__(self, bert_config, args, intent_label_lst, slot_label_lst):
        super(JointBERT, self).__init__(bert_config)
        self.args = args
        self.num_intent_labels = len(intent_label_lst)
        self.num_slot_labels = len(slot_label_lst)
        #执行预测任务 这两个有什么区别???
        if args.do_pred:
            self.bert = PRETRAINED_MODEL_MAP[args.model_type](
                config=bert_config)
        # 执行训练任务
        else:
            self.bert = PRETRAINED_MODEL_MAP[args.model_type].from_pretrained(
                args.model_name_or_path,
                config=bert_config)  # Load pretrained bert

        self.intent_classifier = IntentClassifier(bert_config.hidden_size,
                                                  self.num_intent_labels,
                                                  args.dropout_rate)
        self.slot_classifier = SlotClassifier(bert_config.hidden_size,
                                              self.num_slot_labels,
                                              args.dropout_rate)

        if args.use_crf:
            self.crf = CRF(num_tags=self.num_slot_labels, batch_first=True)
Beispiel #29
0
class Model(nn.Module):
    def __init__(self,
                 vocabs,
                 word_dim,
                 pos_dim,
                 hidden_size,
                 rnn_layers,
                 dropout_rate,
                 device,
                 bidirectional=True,
                 use_crf=False,
                 embedding=None):
        super(Model, self).__init__()

        word2id, tag2id, label2id = vocabs

        # word embedding set
        self.word_embeddings = nn.Embedding(len(word2id), word_dim)

        if embedding is not None:
            self.word_embeddings.weight.data.copy_(torch.from_numpy(embedding))

        self.tag_embeddings = nn.Embedding(len(tag2id), pos_dim)

        # lstm set
        self.lstm = nn.LSTM(word_dim + pos_dim,
                            hidden_size,
                            rnn_layers,
                            batch_first=True,
                            bidirectional=bidirectional,
                            dropout=dropout_rate)

        # bidirectional is ouput size * 2
        # no bidirectional is ouput size * 1
        output_size = hidden_size * 2 if bidirectional else hidden_size

        # output size
        self.linear = nn.Linear(output_size, len(label2id))

        self.dropout_rate = dropout_rate

        self.use_crf = use_crf
        if use_crf:
            self.crf = CRF(len(label2id), batch_first=True)

        self.cross_entropy = nn.CrossEntropyLoss(reduction='none')

    # forward function
    def forward(self, word_ids, tag_ids, label_ids):
        # embedding set
        word_emb = self.word_embeddings(word_ids)
        tag_emb = self.tag_embeddings(tag_ids)

        rnn_input = torch.cat([word_emb, tag_emb], dim=-1)

        rnn_input = F.dropout(rnn_input, self.dropout_rate, self.training)

        rnn_outputs, (hn, cn) = self.lstm(rnn_input)

        # ouput size set
        logits = self.linear(rnn_outputs)

        # [1, 1, 1, 0, 0]
        # [1, 1, 1, 1, 1]
        mask = word_ids.ne(0)
        if self.training:  # training
            if self.use_crf:
                loss = -self.crf(logits, label_ids, mask=mask.byte())
                return loss

            else:
                batch, seq_len, num_label = logits.size()

                logits = logits.view(-1, logits.data.shape[-1])
                label_ids = label_ids.view(-1)

                loss = F.cross_entropy(logits, label_ids, reduction='none')
                loss = loss.view(batch, seq_len)

                loss = loss * mask.float()

                num_tokens = mask.sum(1).sum(0)

                loss = loss.sum(1).sum(0) / num_tokens
                return loss

        label_ids = label_ids.data.cpu().numpy().tolist()
        lengths = mask.sum(1).long().tolist()

        answers = []
        for answer, length in zip(label_ids, lengths):
            answers.append(answer[:length])

        if self.use_crf:
            predictions = self.crf.decode(logits, mask)

            return answers, predictions

        batch_preds = torch.argmax(logits, dim=-1)
        batch_preds = batch_preds.data.cpu().numpy().tolist()

        predictions = []
        for pred, length in zip(batch_preds, lengths):
            predictions.append(pred[:length])

        return answers, predictions
Beispiel #30
0
    def __init__(self,
                 vocabs,
                 word_dim,
                 pos_dim,
                 hidden_size,
                 rnn_layers,
                 dropout_rate,
                 device,
                 bidirectional=True,
                 use_crf=False,
                 embedding=None):
        super(LabelAttention, self).__init__()

        word2id, tag2id, label2id = vocabs  # vocab == wor2id, tag2id, label2id

        output_size = hidden_size * 2 if bidirectional else hidden_size  # because bidirectional

        # word embedding set
        self.word_embeddings = nn.Embedding(len(word2id),
                                            word_dim)  # dimension == 100

        # parameter embedding is preprocessing(use pretrained or not use pretrained)
        # parameter copy to local variable
        if embedding is not None:
            self.word_embeddings.weight.data.copy_(torch.from_numpy(embedding))

        # preprocessing not embedding tag and label
        self.tag_embeddings = nn.Embedding(len(tag2id),
                                           pos_dim)  # tag embedding

        # this is no labelAttention difference
        self.label_embeddings = nn.Embedding(len(label2id), output_size)

        # lstm set
        # word_dim + pos_dom == 150
        self.lstm1 = nn.LSTM(word_dim + pos_dim,
                             hidden_size,
                             1,
                             batch_first=True,
                             bidirectional=bidirectional,
                             dropout=dropout_rate)

        self.label_attn1 = MultiHeadAttention(input_size=output_size,
                                              hidden_size=hidden_size,
                                              n_head=8,
                                              dropout=dropout_rate,
                                              device=device)
        self.lstm2 = nn.LSTM(hidden_size,
                             hidden_size,
                             1,
                             batch_first=True,
                             bidirectional=bidirectional,
                             dropout=dropout_rate)

        self.label_attn2 = MultiHeadAttention(input_size=output_size,
                                              hidden_size=hidden_size,
                                              n_head=1,
                                              dropout=dropout_rate,
                                              device=device)

        # bidirectional is ouput size * 2
        # no bidirectional is ouput size * 1

        # output size
        self.linear = nn.Linear(output_size, len(label2id))

        # drop out set
        self.dropout_rate = dropout_rate

        # using crf
        self.use_crf = use_crf
        if use_crf:
            self.crf = CRF(len(label2id),
                           batch_first=True)  # parameter: label index

        # loss function: cross entroyp
        self.cross_entropy = nn.CrossEntropyLoss(reduction='none')

        # label total size
        self.label_size = len(label2id)
        self.device = device