Beispiel #1
0
    def __init__(self,
                 words_size,
                 divs_size,
                 chs_size,
                 embedding_dim,
                 hidden_dim,
                 feature_dim=128):
        super().__init__()
        self.embedding_dim = embedding_dim
        self.hidden_dim = hidden_dim
        self.words_size = words_size
        self.tagset_size1 = divs_size
        self.tagset_size2 = chs_size

        self.word_embeds = nn.Embedding(words_size, embedding_dim)
        self.lstm = nn.LSTM(embedding_dim,
                            hidden_dim // 2,
                            num_layers=1,
                            bidirectional=True,
                            batch_first=True)

        self.linear = nn.Linear(hidden_dim, feature_dim)

        self.hidden2tag1 = nn.Linear(feature_dim, self.tagset_size1)
        self.hidden2tag2 = nn.Linear(feature_dim, self.tagset_size2)

        self.div_crf = CRF(self.tagset_size1, batch_first=True)
        self.ch_crf = CRF(self.tagset_size2, batch_first=True)
        self.hidden = self.init_hidden()
Beispiel #2
0
    def __init__(self, freeze_bert, tokenizer, device, bidirectional):
        super(GPT2LSTMLogRegCRF, self).__init__()
        #Instantiating BERT model object 
        self.gpt2_layer = GPT2Model.from_pretrained('gpt2', output_hidden_states=True, output_attentions=False)
        
        #Freeze bert layers: if True, the freeze BERT weights
        if freeze_bert:
            for p in self.gpt2_layer.parameters():
                p.requires_grad = False

        self.tokenizer = tokenizer
        self.device = device
        self.bidirectional = bidirectional

        self.dropout = nn.Dropout(0.5)

        # lstm layer
        self.lstm_layer = nn.LSTM(input_size=768, hidden_size = 512, num_layers = 1, bidirectional=bidirectional, batch_first=True)

        # log reg
        if bidirectional == True:
            self.hidden2tag = nn.Linear(1024, clf_P_num_labels)
            self.hidden2tag_fine = nn.Linear(1024, clf_P_fine_num_labels)
        else:
            self.hidden2tag = nn.Linear(512, clf_P_num_labels)
            self.hidden2tag_fine = nn.Linear(512, clf_P_fine_num_labels)

        # crf (coarse)
        self.crf_layer = CRF(clf_P_num_labels, batch_first=True)
        # crf (fine)
        self.crf_layer_fine = CRF(clf_P_fine_num_labels, batch_first=True)
    def __init__(self,
                 freeze_bert,
                 tokenizer,
                 device,
                 bidirectional,
                 class_weight_c=None,
                 class_weight_f=None):
        super(GPT2LSTMattenLogRegCRF, self).__init__()

        self.hidden_dim = 512
        self.tokenizer = tokenizer
        self.device = device
        self.bidirectional = bidirectional
        self.class_weight_c = class_weight_c
        self.class_weight_f = class_weight_f

        #Instantiating BERT model object
        self.gpt2_layer = GPT2Model.from_pretrained('gpt2',
                                                    output_hidden_states=True,
                                                    output_attentions=False)

        #Freeze bert layers: if True, the freeze BERT weights
        if freeze_bert:
            for p in self.gpt2_layer.parameters():
                p.requires_grad = False

        # lstm layer
        self.lstm_layer = nn.LSTM(input_size=768,
                                  hidden_size=512,
                                  num_layers=1,
                                  bidirectional=bidirectional,
                                  batch_first=True)

        # attention mechanism
        if bidirectional == True:
            self.self_attention = nn.MultiheadAttention(
                self.hidden_dim * 2, 1,
                bias=True)  # attention mechanism from PyTorch
        else:
            self.self_attention = nn.MultiheadAttention(self.hidden_dim,
                                                        1,
                                                        bias=True)

        # log reg
        if bidirectional == True:
            self.hidden2tag = nn.Linear(1024, clf_P_num_labels)
            self.hidden2tag_fine = nn.Linear(1024, clf_P_fine_num_labels)
        else:
            self.hidden2tag = nn.Linear(512, clf_P_num_labels)
            self.hidden2tag_fine = nn.Linear(512, clf_P_fine_num_labels)

        # crf (coarse)
        self.crf_layer = CRF(clf_P_num_labels, batch_first=True)
        # crf (fine)
        self.crf_layer_fine = CRF(clf_P_fine_num_labels, batch_first=True)
    def __init__(self, freeze_bert, tokenizer, device, bidirectional, class_weight_c = None, class_weight_f = None):
        super(BERTLSTMMulattenLogRegCRF, self).__init__()

        self.hidden_dim = 512
        self.tokenizer = tokenizer
        self.device = device
        self.bidirectional = bidirectional
        self.class_weight_c = class_weight_c
        self.class_weight_f = class_weight_f

        #Instantiating BERT model object 
        self.bert_layer = BertModel.from_pretrained('bert-base-uncased', output_hidden_states=True, output_attentions=False)
        
        #Freeze bert layers: if True, the freeze BERT weights
        if freeze_bert:
            for p in self.bert_layer.parameters():
                p.requires_grad = False

        # lstm layer
        self.lstm_layer = nn.LSTM(input_size=768, hidden_size = 512, num_layers = 1, bidirectional=bidirectional, batch_first=True)

        # attention mechanism
        if bidirectional == True:
            self.self_attention = nn.MultiheadAttention(self.hidden_dim * 2, 1, bias=True) # attention mechanism from PyTorch
            self.self_attention2 = nn.MultiheadAttention(self.hidden_dim * 2, 2, bias=True) # Multi-head attention mechanism

        else:
            self.self_attention = nn.MultiheadAttention(self.hidden_dim, 1, bias=True) 
            self.self_attention2 = nn.MultiheadAttention(self.hidden_dim, 2, bias=True)

        # log reg
        if bidirectional == True:
            self.hidden2tag = nn.Linear(1024, clf_P_num_labels)
            self.hidden2tag_fine = nn.Linear(1024, clf_P_fine_num_labels)
        else:
            self.hidden2tag = nn.Linear(512, clf_P_num_labels)
            self.hidden2tag_fine = nn.Linear(512, clf_P_fine_num_labels)

        # crf (coarse)
        self.crf_layer = CRF(clf_P_num_labels, batch_first=True)
        # crf (fine)
        self.crf_layer_fine = CRF(clf_P_fine_num_labels, batch_first=True)

        # loss calculation (for coarse labels)
        self.loss_fct_fine = nn.CrossEntropyLoss(weight=class_weight_f)

        # Weighted uncertainty-based loss (if args.loss == 'mtl')
        self.log_vars = nn.Parameter(torch.zeros((2), device="cuda"), requires_grad=True)
        self.log_vars = self.log_vars.to(device)
 def build_model(self):
     '''
     build the embedding layer, lstm layer and CRF layer
     '''
     self.hidden2tag = nn.Linear(self.embedding_dim, self.n_tags)
     self.crf = CRF(self.n_tags, batch_first=True)
     self.bert = transformers.BertModel.from_pretrained('bert-base-chinese')
    def __init__(self, opt):
        super(NER_NET, self).__init__()
        self.opt = opt
        self.hidden_size = opt.hidden_size
        self.bert = BertModel.from_pretrained(common.bert_path)
        self.pos_embedding = nn.Embedding(len(common.posDic),
                                          opt.pos_embed_size,
                                          padding_idx=common.posDic['PAD'])
        self.bio_label_embedding = nn.Embedding(len(common.labelDic) + 1,
                                                opt.label_embed_size,
                                                padding_idx=len(common.labelDic))

        # if opt.pos_after_gcn:
        #     gcn_input_size = self.hidden_size
        #     biaffine_input_size = self.hidden_size + opt.label_embed_size + opt.pos_embed_size
        # else:
        biaffine_input_size = self.hidden_size + opt.label_embed_size + opt.pos_embed_size
        self.gcn = GCN(in_dim=self.hidden_size,
                       mem_dim=self.hidden_size,
                       num_layers=opt.gcn_layer,
                       in_drop=opt.gcn_dropout,
                       out_drop=opt.gcn_dropout,
                       batch=True)

        self.biaffine = Biaffine(in1_features=biaffine_input_size,
                                 in2_features=biaffine_input_size,
                                 out_features=2)
        self.dropout = nn.Dropout(opt.dropout)
        self.classifier = nn.Linear(self.hidden_size + opt.pos_embed_size, len(common.labelDic))
        self.crf = CRF(len(common.labelDic), batch_first=True)

        self.cross_entropy_loss = nn.CrossEntropyLoss(ignore_index=-1)
Beispiel #7
0
    def __init__(self,
                 text_field,
                 label_field,
                 rnn_size,
                 emb_dim,
                 update_pretrained=False):
        super().__init__()

        voc_size = len(text_field.vocab)
        self.n_labels = len(label_field.vocab)

        self.embedding = nn.Embedding(voc_size, emb_dim)
        if text_field.vocab.vectors is not None:
            self.embedding.weight = torch.nn.Parameter(
                text_field.vocab.vectors, requires_grad=update_pretrained)

        self.rnn = nn.LSTM(input_size=emb_dim,
                           hidden_size=rnn_size,
                           bidirectional=True,
                           num_layers=1)

        self.top_layer = nn.Linear(2 * rnn_size, self.n_labels)

        self.pad_word_id = text_field.vocab.stoi[text_field.pad_token]
        self.pad_label_id = label_field.vocab.stoi[label_field.pad_token]

        self.crf = CRF(self.n_labels)
Beispiel #8
0
 def __init__(self,
              embedding_size,
              hidden_size,
              vocab_size,
              target_size,
              num_layers,
              lstm_drop_out,
              nn_drop_out,
              pretrained_embedding=False,
              embedding_weight=None):
     super(BiLSTM_CRF, self).__init__()
     self.hidden_size = hidden_size
     self.nn_drop_out = nn_drop_out
     # nn.Embedding: parameter size (num_words, embedding_dim)
     # for every word id, output a embedding for this word
     # input size: N x W, N is batch size, W is max sentence len
     # output size: (N, W, embedding_dim), embedding all the words
     if pretrained_embedding:
         self.embedding = nn.Embedding.from_pretrained(embedding_weight)
         self.embedding.weight.requires_grad = True
     else:
         self.embedding = nn.Embedding(vocab_size, embedding_size)
     self.bilstm = nn.LSTM(input_size=embedding_size,
                           hidden_size=hidden_size,
                           batch_first=True,
                           num_layers=num_layers,
                           dropout=lstm_drop_out if num_layers > 1 else 0,
                           bidirectional=True)
     if nn_drop_out > 0:
         self.dropout = nn.Dropout(nn_drop_out)
     self.classifier = nn.Linear(hidden_size * 2, target_size)
     # https://pytorch-crf.readthedocs.io/en/stable/_modules/torchcrf.html
     self.crf = CRF(target_size, batch_first=True)
    def __init__(self,
                 number_of_categories,
                 vocab_size=VocabSize,
                 hidden=HiddenSize,
                 max_len=SentenceLength,
                 num_hidden_layers=HiddenLayerNum,
                 attention_heads=AttentionHeadNum,
                 dropout_prob=DropOut,
                 intermediate_size=IntermediateSize):
        super(RobertaNer, self).__init__()
        self.vocab_size = vocab_size
        self.hidden_size = hidden
        self.max_len = max_len
        self.num_hidden_layers = num_hidden_layers
        self.attention_head_num = attention_heads
        self.dropout_prob = dropout_prob
        self.attention_head_size = hidden // attention_heads
        self.tokenizer = Tokenizer(VocabPath)
        self.intermediate_size = intermediate_size
        self.number_of_categories = number_of_categories

        # 申明网络
        self.roberta_emb = TokenEmbedding()
        self.position_emb = PositionEmbedding()
        self.bi_gru = BiGRU(self.number_of_categories,
                            self.number_of_categories)
        self.transformer_blocks = nn.ModuleList(
            Transformer(hidden_size=self.hidden_size,
                        attention_head_num=self.attention_head_num,
                        attention_head_size=self.attention_head_size,
                        intermediate_size=self.intermediate_size).to(device)
            for _ in range(self.num_hidden_layers))
        self.mlm = Mlm(self.hidden_size, self.number_of_categories)
        self.crf = CRF(self.number_of_categories, batch_first=True)
Beispiel #10
0
    def __init__(self, bert_dir, num_tags, opt, dropout_prob=0.1, **kwargs):
        super(CRFModel, self).__init__(bert_dir=bert_dir,
                                       dropout_prob=dropout_prob)
        self.opt = opt

        out_dims = self.bert_config.hidden_size

        mid_linear_dims = kwargs.pop('mid_linear_dims', 128)

        self.mid_linear = nn.Sequential(nn.Linear(out_dims, mid_linear_dims),
                                        nn.ReLU(), nn.Dropout(dropout_prob))

        out_dims = mid_linear_dims

        self.classifier = nn.Linear(out_dims, num_tags)

        self.loss_weight = nn.Parameter(torch.FloatTensor(1),
                                        requires_grad=True)
        self.loss_weight.data.fill_(-0.5)

        self.crf_module = CRF(num_tags=num_tags, batch_first=True)

        init_blocks = [self.mid_linear, self.classifier]

        self._init_weights(
            init_blocks, initializer_range=self.bert_config.initializer_range)
Beispiel #11
0
 def __init__(self,
              char_vocab_dim,
              char_embed_dim,
              char_hidden_dim,
              sub_vocab_dims,
              sub_embed_dims,
              sub_hidden_dims,
              tag_dim,
              batch_size,
              use_gpu=True):
     super().__init__()
     self.use_gpu = use_gpu
     self.embed_char = nn.Embedding(char_vocab_dim, char_embed_dim)
     self.embed_subs = [
         nn.Embedding(vocab_dim, embed_dim, tag_dim).cuda()
         for vocab_dim, embed_dim in zip(sub_vocab_dims, sub_embed_dims)
     ]
     self.lstm_block_char = BiLSTMBlock(char_embed_dim, char_hidden_dim,
                                        tag_dim, batch_size)
     self.lstm_block_subs = [
         BiLSTMBlock(embed_dim, hidden_dim, tag_dim, batch_size).cuda()
         for embed_dim, hidden_dim in zip(sub_embed_dims, sub_hidden_dims)
     ]
     self.outs2tag = nn.Linear(tag_dim * (1 + len(sub_hidden_dims)),
                               tag_dim)
     self.crf = CRF(tag_dim)
    def __init__(self, config):
        """Initialize token classification model with a CRF layer.

        :param config: Transformers config object.
        """
        super().__init__(config)
        self.crf = CRF(config.num_labels, batch_first=True)
Beispiel #13
0
    def __init__(self, distilbert_config, args, intent_label_lst,
                 slot_label_lst):
        super(JointDistilBERT, self).__init__(distilbert_config)
        self.args = args
        self.num_intent_labels = len(intent_label_lst)
        self.num_slot_labels = len(slot_label_lst)
        if args.do_pred:
            self.distilbert = PRETRAINED_MODEL_MAP[args.model_type](
                config=distilbert_config)
        else:
            self.distilbert = PRETRAINED_MODEL_MAP[
                args.model_type].from_pretrained(
                    args.model_name_or_path,
                    config=distilbert_config)  # Load pretrained bert

        self.intent_classifier = IntentClassifier(
            distilbert_config.hidden_size, self.num_intent_labels,
            args.dropout_rate)
        self.slot_classifier = SlotClassifier(distilbert_config.hidden_size,
                                              self.num_slot_labels,
                                              args.dropout_rate)

        if args.use_crf:
            self.crf = CRF(num_tags=self.num_slot_labels, batch_first=True)

        self.slot_pad_token_idx = slot_label_lst.index(args.slot_pad_label)
Beispiel #14
0
    def __init__(self, config, num_labels, num_ner_labels):
        super(BertForTABSAJoint_CRF_T, self).__init__()
        self.bert = BertModel(config)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
        self.classifier = nn.Linear(
            config.hidden_size,
            num_labels)  # num_labels is the type sum of 0 & 1
        self.ner_hidden2tag = nn.Linear(
            config.hidden_size, num_ner_labels
        )  # num_ner_labels is the type sum of ner labels: TO or BIO etc
        self.num_labels = num_labels
        self.num_ner_labels = num_ner_labels
        # CRF
        self.CRF_model = CRF(num_ner_labels, batch_first=True)

        def init_weights(module):
            if isinstance(module, (nn.Linear, nn.Embedding)):
                # Slightly different from the TF version which uses truncated_normal for initialization
                # cf https://github.com/pytorch/pytorch/pull/5617
                module.weight.data.normal_(mean=0.0,
                                           std=config.initializer_range)
            elif isinstance(module, BERTLayerNorm):
                module.beta.data.normal_(mean=0.0,
                                         std=config.initializer_range)
                module.gamma.data.normal_(mean=0.0,
                                          std=config.initializer_range)
            if isinstance(module, nn.Linear):
                module.bias.data.zero_()

        self.apply(init_weights)
    def __init__(self, config, pad_idx, lstm_hidden_dim, num_lstm_layers,
                 bidirectional, num_labels):
        super(BertLstmCrf, self).__init__(config)
        self.dropout_prob = config.hidden_dropout_prob
        self.pad_idx = pad_idx
        self.lstm_hidden_dim = lstm_hidden_dim
        self.num_lstm_layers = num_lstm_layers
        self.bidirectional = bidirectional
        self.num_labels = num_labels

        self.bert = BertModel(config)

        if self.num_lstm_layers > 1:
            self.lstm = nn.LSTM(input_size=config.hidden_size,
                                hidden_size=self.lstm_hidden_dim,
                                num_layers=self.num_lstm_layers,
                                bidirectional=self.bidirectional,
                                dropout=self.dropout_prob,
                                batch_first=True)
        else:
            self.lstm = nn.LSTM(input_size=config.hidden_size,
                                hidden_size=self.lstm_hidden_dim,
                                num_layers=self.num_lstm_layers,
                                bidirectional=self.bidirectional,
                                batch_first=True)
        if self.bidirectional is True:
            self.linear = nn.Linear(self.lstm_hidden_dim * 2, self.num_labels)
        else:
            self.linear = nn.Linear(self.lstm_hidden_dim, self.num_labels)

        self.crf_layer = CRF(self.num_labels, batch_first=True)
        self.dropout_layer = nn.Dropout(self.dropout_prob)

        self.init_weights()
Beispiel #16
0
    def __init__(self, args, pretrained_word_matrix):
        super(BiLSTM_CNN_CRF, self).__init__()
        self.args = args

        self.char_cnn = CharCNN(max_word_len=args.max_word_len,
                                kernel_lst=args.kernel_lst,
                                num_filters=args.num_filters,
                                char_vocab_size=args.char_vocab_size,
                                char_emb_dim=args.char_emb_dim,
                                final_char_dim=args.final_char_dim)

        if pretrained_word_matrix is not None:
            self.word_emb = nn.Embedding.from_pretrained(pretrained_word_matrix)
        else:
            self.word_emb = nn.Embedding(args.word_vocab_size, args.word_emb_dim, padding_idx=0)
            nn.init.uniform_(self.word_emb.weight, -0.25, 0.25)

        self.bi_lstm = nn.LSTM(input_size=args.word_emb_dim + args.final_char_dim,
                               hidden_size=args.hidden_dim // 2,  # Bidirectional will double the hidden_size
                               bidirectional=True,
                               batch_first=True)

        self.output_linear = nn.Linear(args.hidden_dim, len(get_labels(args)))

        self.crf = CRF(num_tags=len(get_labels(args)), batch_first=True)
Beispiel #17
0
    def __init__(self, bert_name: str, num_labels: int, num_layers: int,
                 hidden_size: int, dropout_prob: float, rnn_type: str,
                 bidirectional: bool, use_crf: bool, freeze_bert: bool):

        super().__init__()
        self.bert = BertModel.from_pretrained(bert_name)
        if freeze_bert:
            self.bert.requires_grad = False
        if num_layers > 0:
            if rnn_type == "gru":
                self.rnn = nn.GRU(self.bert.config.hidden_size,
                                  hidden_size,
                                  num_layers=num_layers,
                                  bidirectional=bidirectional,
                                  batch_first=True)
            else:
                self.rnn = nn.LSTM(self.bert.config.hidden_size,
                                   hidden_size,
                                   num_layers=num_layers,
                                   bidirectional=bidirectional,
                                   batch_first=True)
        else:
            self.rnn = nn.Identity()
        self.classifier = nn.Linear((1 + bidirectional) * hidden_size,
                                    num_labels)
        self.dropout = nn.Dropout(dropout_prob)
        self.use_crf = use_crf
        if self.use_crf:
            self.crf = CRF(num_labels, batch_first=True)
Beispiel #18
0
 def __init__(self, args1, args2, args3, num_tags):
     super(CNN_biLSTM_CRF, self).__init__()
     self.num_tags = num_tags
     self.cnn = TextCNN(args1)
     self.bilstm = BILSTM(args2)
     self.convs = CNNlayers(args3)
     self.crf = CRF(self.num_tags, batch_first=True)
    def __init__(self,
                 data_config,
                 model_config="embs:8|conv:16|l1:16|do:0.0|oc:BI"):
        super(Model, self).__init__()

        no_syllables = data_config['num_tokens']
        log.info("no. syllables: %d" % no_syllables)

        config = utils.parse_model_params(model_config)
        conv_filters = config['conv']
        dropout_rate = config.get("do", 0)

        self.output_scheme = output_tags.get_scheme(config["oc"])

        self.sy_embeddings = prepare_embedding(data_config, config)

        if "crf" in config:
            self.crf = CRF(self.output_scheme.num_tags, batch_first=True)

        emb_dim = self.sy_embeddings.weight.shape[1]

        self.id_conv = IteratedDilatedConvolutions(emb_dim, conv_filters,
                                                   dropout_rate)

        self.linear1 = nn.Linear(conv_filters, config['l1'])
        self.linear2 = nn.Linear(config['l1'], self.output_scheme.num_tags)

        self.model_params = model_config
Beispiel #20
0
    def __init__(
        self,
        word_vocab: Vocab,
        tag_vocab: Vocab,
        embedding: nn.Embedding,
        hidden_dim: int,
        num_layers: int,
        dropout: float = 0.2,
    ):
        super().__init__(word_vocab, tag_vocab)
        self.hidden_dim = hidden_dim

        self.dropout = nn.Dropout(dropout)

        self.embedding = embedding
        self.lstm = nn.LSTM(
            embedding.embedding_dim,
            hidden_dim,
            num_layers=num_layers,
            bidirectional=True,
        )

        # Maps the output of the LSTM into tag space.
        self.fc = nn.Linear(hidden_dim * 2, self.tagset_size)

        self.f1 = F1(num_classes=self.tagset_size)
        self.crf = CRF(num_tags=self.tagset_size)
        self.save_hyperparameters()
    def __init__(self, vocab_size, embed_size, hidden_dim, weight, dropout,
                 word2id, tag2id):
        super(NERLSTM_CRF, self).__init__()

        # self.embedding_dim = embedding_dim
        self.embed_size = embed_size
        self.hidden_dim = hidden_dim
        self.vocab_size = len(word2id) + 1
        self.tag_to_ix = tag2id
        self.tagset_size = len(tag2id)

        # self.word_embeds = nn.Embedding(self.vocab_size, self.embedding_dim)
        self.embedding = nn.Embedding(vocab_size, embed_size)
        self.embedding.weight.data.copy_(weight)
        # 是否将embedding定住
        self.embedding.weight.requires_grad = True
        self.dropout = nn.Dropout(dropout)

        # CRF
        self.lstm = nn.LSTM(self.embed_size,
                            self.hidden_dim // 2,
                            num_layers=1,
                            bidirectional=True,
                            batch_first=False)

        self.hidden2tag = nn.Linear(self.hidden_dim, self.tagset_size)
        self.crf = CRF(self.tagset_size)
Beispiel #22
0
    def __init__(self, config):
        super(BiLstmCRF, self).__init__()

        # embedding随训练更新
        self.embedding = nn.Embedding.from_pretrained(
            config.embedding_pretrained, freeze=False)

        self.device = config.device

        self.tag_num = config.tag_num
        self.seqLen = config.pad_size

        self.lstm = nn.LSTM(
            input_size=config.input_size,
            hidden_size=config.hidden_size // 2,
            num_layers=config.hidden_layer_num,
            dropout=config.dropout,
            bidirectional=True,  # 设置双向
            batch_first=True)  # batch为张量的第一个维度

        self.attention = SelfAttention(config.pad_size)

        self.pos_fc = PositionFeedForward(config.pad_size, config.pad_size)

        self.conditionLinear = nn.Linear(config.hidden_size, config.pad_size)
        self.attLinear = nn.Linear(config.pad_size, 1)
        self.linear = nn.Linear(config.pad_size, config.tag_num)

        self.conditionNorm = ConditionalLayerNorm(config.pad_size)

        self.crf = CRF(config.tag_num, batch_first=True)
Beispiel #23
0
    def __init__(self,
                 model_path_list,
                 bert_dir_list,
                 num_tags,
                 device,
                 lamb=1 / 3):

        self.models = []
        self.crf_module = CRF(num_tags=num_tags, batch_first=True)
        self.lamb = lamb

        for idx, _path in enumerate(model_path_list):
            print(f'Load model from {_path}')

            print(f'Load model type: {bert_dir_list[0]}')
            model = CRFModel(bert_dir=bert_dir_list[0], num_tags=num_tags)

            model.load_state_dict(
                torch.load(_path, map_location=torch.device('cpu')))

            model.eval()
            model.to(device)

            self.models.append(model)
            if idx == 0:
                print(f'Load CRF weight from {_path}')
                self.crf_module.load_state_dict(model.crf_module.state_dict())
                self.crf_module.to(device)
Beispiel #24
0
    def __init__(self, config):
        super(TransformerEncoderModel, self).__init__(config)
        self.src_mask = None
        self._dim_embedding = self._config.model.dim_embedding
        self._dim_hidden = self._config.model.dim_hidden
        self._num_layer = self._config.model.nlayer

        self._num_vocab = self._config.data.num_vocab
        self._num_tag = self._config.data.num_tag

        self._dropout = self._config.learn.dropout_rate
        self.pos_encoder = PositionalEncoding(self._dim_embedding,
                                              self._dropout)
        self._embedding = nn.Embedding(self._num_vocab, self._dim_embedding)
        encoder_layers = TransformerEncoderLayer(
            self._dim_hidden, self._config.model.nhead,
            self._config.model.nhid, self._config.learn.dropout_rate)
        self.transformer_encoder = TransformerEncoder(
            encoder_layers, self._config.model.nlayer)
        self.lstm = nn.LSTM(input_size=self._dim_embedding,
                            hidden_size=self._dim_hidden // 2,
                            bidirectional=True,
                            num_layers=self._num_layer,
                            dropout=self._dropout)
        self._hidden2label = nn.Linear(self._dim_hidden, self._num_tag)
        self._crf = CRF(self._num_tag)
    def __init__(self, conf, vocab, char_vocab, tag_vocab):
        super(C_RNN, self).__init__()
        # CHARACTER EMBEDDINGS AND WORD-LEVEL BILSTM
        self.char_RNN = char_RNN(conf, char_vocab)
        in_shape = self.char_RNN.output_size

        # Main BiLSTM
        # ======================================================================
        self.word_RNN = word_RNN(in_shape, None, conf["w_rnn_layers"])
        output_size = self.word_RNN.output_size
        self.mid_dropout = nn.Dropout(conf["mid_dropout"])
        self.w_bin_out = conf["w_bin_out"]
        out_bin_size = 2 * output_size if self.w_bin_out == "maxpool" else output_size

        # Output Layers
        # ======================================================================
        # Binary
        self.bin_out = nn.Linear(out_bin_size, 1)

        # NER
        self.n_tags = len(tag_vocab)
        self.ner_out = nn.Linear(output_size, self.n_tags)

        # CRF Layer
        self.use_crf = conf["use_crf"]
        if self.use_crf:
            self.crf = CRF(self.n_tags, batch_first=True)
        # ======================================================================

        # Maybe move to GPU
        self.to(self.device)
Beispiel #26
0
    def __init__(self, hparams):
        super(CRF_Model, self).__init__()
        self.name = hparams.model_name
        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'

        self.word_embedding = nn.Embedding(hparams.vocab_size,
                                           hparams.embedding_dim,
                                           padding_idx=0)
        self.word_dropout = nn.Dropout(hparams.dropout)

        if hparams.embeddings is not None:
            print("initializing embeddings from pretrained")
            self.word_embedding.weight.data.copy_(hparams.embeddings)

        self.pos_embedding = nn.Embedding(hparams.pos_vocab_size,
                                          hparams.embedding_dim,
                                          padding_idx=0)
        self.pos_dropout = nn.Dropout(hparams.dropout)

        if hparams.pos_embeddings is not None:
            print("initializing embeddings from pretrained")
            self.pos_embedding.weight.data.copy_(hparams.pos_embeddings)

        self.lstm = nn.LSTM(
            hparams.embedding_dim * 2,
            hparams.hidden_dim,
            bidirectional=hparams.bidirectional,
            num_layers=hparams.num_layers,
            dropout=hparams.dropout if hparams.num_layers > 1 else 0,
            batch_first=True)

        lstm_output_dim = hparams.hidden_dim if hparams.bidirectional is False else hparams.hidden_dim * 2
        self.dropout = nn.Dropout(hparams.dropout)
        self.classifier = nn.Linear(lstm_output_dim, hparams.num_classes)
        self.crf = CRF(hparams.num_classes, batch_first=True)
Beispiel #27
0
    def __init__(self, vocab_size, emb_dim, hidden_size, out_size, dropout):
        """
        模型结构搭建
        :param vocab_size: 词表大小
        :param emb_dim: 词向量维数
        :param hidden_size: LSTM隐含层大小
        :param out_size: 标签数量
        """
        super(BiLSTM_CRF, self).__init__()
        # 定义字向量矩阵
        self.embedding = nn.Embedding(num_embeddings=vocab_size,
                                      embedding_dim=emb_dim)

        self.dropout = nn.Dropout(dropout)

        self.bilstm = nn.LSTM(input_size=emb_dim,
                              hidden_size=hidden_size,
                              num_layers=1,
                              batch_first=True,
                              bidirectional=True)
        # bilstm 输出将 hidden layer 的输出拼接
        self.hidden2tag = nn.Linear(in_features=hidden_size * 2,
                                    out_features=out_size)

        self.crf = CRF(num_tags=out_size, batch_first=True)
 def __init__(self, config, word_vocab, vocab_size, tag_num, vector_path):
     super(TransformerEncoderModel, self).__init__()
     self.use_dae = config.use_dae
     self.dae_lambda = config.dae_lambda
     self.use_dice = config.use_dice
     self.dice_lambda = config.dice_lambda
     self.vocab_size = vocab_size
     self.word_vocab = word_vocab
     self.tag_num = tag_num
     self.embedding_dim = config.embedding_dim
     self.hidden_dim = config.hidden_dim
     self.bidirectional = True
     self.num_layers = config.num_layers
     self.dropout = config.dropout
     self.drop = nn.Dropout(self.dropout)
     self.vector_path = vector_path
     self.src_mask = None
     self.pos_encoder = PositionalEncoding(self.embedding_dim, self.dropout)
     encoder_layers = TransformerEncoderLayer(self.embedding_dim, config.n_head, config.n_hid, self.dropout)
     self.transformer_encoder = TransformerEncoder(encoder_layers, config.n_layers)
     self.embedding = nn.Embedding(self.vocab_size, self.embedding_dim)
     if config.use_vectors:
         logger.info('Loading word vectors from {}...'.format(self.vector_path))
         embed_weights = load_word2vec(self.vector_path, self.word_vocab, embedding_dim=self.embedding_dim)
         logger.info('Finished load word vectors')
         self.embedding = nn.Embedding.from_pretrained(embed_weights, freeze=False).to(DEVICE)
     # self.embedding.weight.requires_grad = True
     self.lstm = nn.LSTM(input_size=self.embedding_dim, hidden_size=self.hidden_dim // 2,
                         bidirectional=self.bidirectional, num_layers=1).to(DEVICE)
     self.linear = nn.Linear(self.hidden_dim, self.tag_num)
     self.lm_decoder = nn.Linear(self.hidden_dim, self.vocab_size)
     self.init_weights()
     self.crf_layer = CRF(self.tag_num)
     self.dice_loss = DiceLoss1()
     self.criterion = nn.CrossEntropyLoss()
Beispiel #29
0
    def __init__(self,
                 word_embed_dim,
                 extra_embed_dim,
                 hidden_dim,
                 vocab_size,
                 extra_size,
                 tagset_size,
                 device='cuda'):
        super(BiLSTM_CRF_extra, self).__init__()
        self.tagset_size = tagset_size

        # Word embedding layer
        self.embedding = nn.Embedding(vocab_size, word_embed_dim)
        self.extra_embedding = nn.Embedding(extra_size, extra_embed_dim)

        # Bidirectional LSTM layer
        self.half_hidden = hidden_dim // 2
        self.lstm = nn.LSTM(word_embed_dim + extra_embed_dim,
                            hidden_dim // 2,
                            bidirectional=True)

        # Mapping lstm output to tags
        self.fc = nn.Linear(hidden_dim, tagset_size)

        # CRF layer
        self.crf = CRF(tagset_size)

        self.device = device
        self.to(device)
        self.init_hidden()
 def __init__(self,input_size, hidden_size, num_layers, num_classes):
     super(bilstm_crf, self).__init__()
     self.hidden_size = hidden_size
     self.num_layers = num_layers
     self.lstm = nn.GRU(input_size,hidden_size,num_layers,batch_first = True, bidirectional=True)
     self.fc = nn.Linear(hidden_size*2,num_classes)
     self.crf = CRF(num_classes,batch_first=True)