def __init__(self, words_size, divs_size, chs_size, embedding_dim, hidden_dim, feature_dim=128): super().__init__() self.embedding_dim = embedding_dim self.hidden_dim = hidden_dim self.words_size = words_size self.tagset_size1 = divs_size self.tagset_size2 = chs_size self.word_embeds = nn.Embedding(words_size, embedding_dim) self.lstm = nn.LSTM(embedding_dim, hidden_dim // 2, num_layers=1, bidirectional=True, batch_first=True) self.linear = nn.Linear(hidden_dim, feature_dim) self.hidden2tag1 = nn.Linear(feature_dim, self.tagset_size1) self.hidden2tag2 = nn.Linear(feature_dim, self.tagset_size2) self.div_crf = CRF(self.tagset_size1, batch_first=True) self.ch_crf = CRF(self.tagset_size2, batch_first=True) self.hidden = self.init_hidden()
def __init__(self, freeze_bert, tokenizer, device, bidirectional): super(GPT2LSTMLogRegCRF, self).__init__() #Instantiating BERT model object self.gpt2_layer = GPT2Model.from_pretrained('gpt2', output_hidden_states=True, output_attentions=False) #Freeze bert layers: if True, the freeze BERT weights if freeze_bert: for p in self.gpt2_layer.parameters(): p.requires_grad = False self.tokenizer = tokenizer self.device = device self.bidirectional = bidirectional self.dropout = nn.Dropout(0.5) # lstm layer self.lstm_layer = nn.LSTM(input_size=768, hidden_size = 512, num_layers = 1, bidirectional=bidirectional, batch_first=True) # log reg if bidirectional == True: self.hidden2tag = nn.Linear(1024, clf_P_num_labels) self.hidden2tag_fine = nn.Linear(1024, clf_P_fine_num_labels) else: self.hidden2tag = nn.Linear(512, clf_P_num_labels) self.hidden2tag_fine = nn.Linear(512, clf_P_fine_num_labels) # crf (coarse) self.crf_layer = CRF(clf_P_num_labels, batch_first=True) # crf (fine) self.crf_layer_fine = CRF(clf_P_fine_num_labels, batch_first=True)
def __init__(self, freeze_bert, tokenizer, device, bidirectional, class_weight_c=None, class_weight_f=None): super(GPT2LSTMattenLogRegCRF, self).__init__() self.hidden_dim = 512 self.tokenizer = tokenizer self.device = device self.bidirectional = bidirectional self.class_weight_c = class_weight_c self.class_weight_f = class_weight_f #Instantiating BERT model object self.gpt2_layer = GPT2Model.from_pretrained('gpt2', output_hidden_states=True, output_attentions=False) #Freeze bert layers: if True, the freeze BERT weights if freeze_bert: for p in self.gpt2_layer.parameters(): p.requires_grad = False # lstm layer self.lstm_layer = nn.LSTM(input_size=768, hidden_size=512, num_layers=1, bidirectional=bidirectional, batch_first=True) # attention mechanism if bidirectional == True: self.self_attention = nn.MultiheadAttention( self.hidden_dim * 2, 1, bias=True) # attention mechanism from PyTorch else: self.self_attention = nn.MultiheadAttention(self.hidden_dim, 1, bias=True) # log reg if bidirectional == True: self.hidden2tag = nn.Linear(1024, clf_P_num_labels) self.hidden2tag_fine = nn.Linear(1024, clf_P_fine_num_labels) else: self.hidden2tag = nn.Linear(512, clf_P_num_labels) self.hidden2tag_fine = nn.Linear(512, clf_P_fine_num_labels) # crf (coarse) self.crf_layer = CRF(clf_P_num_labels, batch_first=True) # crf (fine) self.crf_layer_fine = CRF(clf_P_fine_num_labels, batch_first=True)
def __init__(self, freeze_bert, tokenizer, device, bidirectional, class_weight_c = None, class_weight_f = None): super(BERTLSTMMulattenLogRegCRF, self).__init__() self.hidden_dim = 512 self.tokenizer = tokenizer self.device = device self.bidirectional = bidirectional self.class_weight_c = class_weight_c self.class_weight_f = class_weight_f #Instantiating BERT model object self.bert_layer = BertModel.from_pretrained('bert-base-uncased', output_hidden_states=True, output_attentions=False) #Freeze bert layers: if True, the freeze BERT weights if freeze_bert: for p in self.bert_layer.parameters(): p.requires_grad = False # lstm layer self.lstm_layer = nn.LSTM(input_size=768, hidden_size = 512, num_layers = 1, bidirectional=bidirectional, batch_first=True) # attention mechanism if bidirectional == True: self.self_attention = nn.MultiheadAttention(self.hidden_dim * 2, 1, bias=True) # attention mechanism from PyTorch self.self_attention2 = nn.MultiheadAttention(self.hidden_dim * 2, 2, bias=True) # Multi-head attention mechanism else: self.self_attention = nn.MultiheadAttention(self.hidden_dim, 1, bias=True) self.self_attention2 = nn.MultiheadAttention(self.hidden_dim, 2, bias=True) # log reg if bidirectional == True: self.hidden2tag = nn.Linear(1024, clf_P_num_labels) self.hidden2tag_fine = nn.Linear(1024, clf_P_fine_num_labels) else: self.hidden2tag = nn.Linear(512, clf_P_num_labels) self.hidden2tag_fine = nn.Linear(512, clf_P_fine_num_labels) # crf (coarse) self.crf_layer = CRF(clf_P_num_labels, batch_first=True) # crf (fine) self.crf_layer_fine = CRF(clf_P_fine_num_labels, batch_first=True) # loss calculation (for coarse labels) self.loss_fct_fine = nn.CrossEntropyLoss(weight=class_weight_f) # Weighted uncertainty-based loss (if args.loss == 'mtl') self.log_vars = nn.Parameter(torch.zeros((2), device="cuda"), requires_grad=True) self.log_vars = self.log_vars.to(device)
def build_model(self): ''' build the embedding layer, lstm layer and CRF layer ''' self.hidden2tag = nn.Linear(self.embedding_dim, self.n_tags) self.crf = CRF(self.n_tags, batch_first=True) self.bert = transformers.BertModel.from_pretrained('bert-base-chinese')
def __init__(self, opt): super(NER_NET, self).__init__() self.opt = opt self.hidden_size = opt.hidden_size self.bert = BertModel.from_pretrained(common.bert_path) self.pos_embedding = nn.Embedding(len(common.posDic), opt.pos_embed_size, padding_idx=common.posDic['PAD']) self.bio_label_embedding = nn.Embedding(len(common.labelDic) + 1, opt.label_embed_size, padding_idx=len(common.labelDic)) # if opt.pos_after_gcn: # gcn_input_size = self.hidden_size # biaffine_input_size = self.hidden_size + opt.label_embed_size + opt.pos_embed_size # else: biaffine_input_size = self.hidden_size + opt.label_embed_size + opt.pos_embed_size self.gcn = GCN(in_dim=self.hidden_size, mem_dim=self.hidden_size, num_layers=opt.gcn_layer, in_drop=opt.gcn_dropout, out_drop=opt.gcn_dropout, batch=True) self.biaffine = Biaffine(in1_features=biaffine_input_size, in2_features=biaffine_input_size, out_features=2) self.dropout = nn.Dropout(opt.dropout) self.classifier = nn.Linear(self.hidden_size + opt.pos_embed_size, len(common.labelDic)) self.crf = CRF(len(common.labelDic), batch_first=True) self.cross_entropy_loss = nn.CrossEntropyLoss(ignore_index=-1)
def __init__(self, text_field, label_field, rnn_size, emb_dim, update_pretrained=False): super().__init__() voc_size = len(text_field.vocab) self.n_labels = len(label_field.vocab) self.embedding = nn.Embedding(voc_size, emb_dim) if text_field.vocab.vectors is not None: self.embedding.weight = torch.nn.Parameter( text_field.vocab.vectors, requires_grad=update_pretrained) self.rnn = nn.LSTM(input_size=emb_dim, hidden_size=rnn_size, bidirectional=True, num_layers=1) self.top_layer = nn.Linear(2 * rnn_size, self.n_labels) self.pad_word_id = text_field.vocab.stoi[text_field.pad_token] self.pad_label_id = label_field.vocab.stoi[label_field.pad_token] self.crf = CRF(self.n_labels)
def __init__(self, embedding_size, hidden_size, vocab_size, target_size, num_layers, lstm_drop_out, nn_drop_out, pretrained_embedding=False, embedding_weight=None): super(BiLSTM_CRF, self).__init__() self.hidden_size = hidden_size self.nn_drop_out = nn_drop_out # nn.Embedding: parameter size (num_words, embedding_dim) # for every word id, output a embedding for this word # input size: N x W, N is batch size, W is max sentence len # output size: (N, W, embedding_dim), embedding all the words if pretrained_embedding: self.embedding = nn.Embedding.from_pretrained(embedding_weight) self.embedding.weight.requires_grad = True else: self.embedding = nn.Embedding(vocab_size, embedding_size) self.bilstm = nn.LSTM(input_size=embedding_size, hidden_size=hidden_size, batch_first=True, num_layers=num_layers, dropout=lstm_drop_out if num_layers > 1 else 0, bidirectional=True) if nn_drop_out > 0: self.dropout = nn.Dropout(nn_drop_out) self.classifier = nn.Linear(hidden_size * 2, target_size) # https://pytorch-crf.readthedocs.io/en/stable/_modules/torchcrf.html self.crf = CRF(target_size, batch_first=True)
def __init__(self, number_of_categories, vocab_size=VocabSize, hidden=HiddenSize, max_len=SentenceLength, num_hidden_layers=HiddenLayerNum, attention_heads=AttentionHeadNum, dropout_prob=DropOut, intermediate_size=IntermediateSize): super(RobertaNer, self).__init__() self.vocab_size = vocab_size self.hidden_size = hidden self.max_len = max_len self.num_hidden_layers = num_hidden_layers self.attention_head_num = attention_heads self.dropout_prob = dropout_prob self.attention_head_size = hidden // attention_heads self.tokenizer = Tokenizer(VocabPath) self.intermediate_size = intermediate_size self.number_of_categories = number_of_categories # 申明网络 self.roberta_emb = TokenEmbedding() self.position_emb = PositionEmbedding() self.bi_gru = BiGRU(self.number_of_categories, self.number_of_categories) self.transformer_blocks = nn.ModuleList( Transformer(hidden_size=self.hidden_size, attention_head_num=self.attention_head_num, attention_head_size=self.attention_head_size, intermediate_size=self.intermediate_size).to(device) for _ in range(self.num_hidden_layers)) self.mlm = Mlm(self.hidden_size, self.number_of_categories) self.crf = CRF(self.number_of_categories, batch_first=True)
def __init__(self, bert_dir, num_tags, opt, dropout_prob=0.1, **kwargs): super(CRFModel, self).__init__(bert_dir=bert_dir, dropout_prob=dropout_prob) self.opt = opt out_dims = self.bert_config.hidden_size mid_linear_dims = kwargs.pop('mid_linear_dims', 128) self.mid_linear = nn.Sequential(nn.Linear(out_dims, mid_linear_dims), nn.ReLU(), nn.Dropout(dropout_prob)) out_dims = mid_linear_dims self.classifier = nn.Linear(out_dims, num_tags) self.loss_weight = nn.Parameter(torch.FloatTensor(1), requires_grad=True) self.loss_weight.data.fill_(-0.5) self.crf_module = CRF(num_tags=num_tags, batch_first=True) init_blocks = [self.mid_linear, self.classifier] self._init_weights( init_blocks, initializer_range=self.bert_config.initializer_range)
def __init__(self, char_vocab_dim, char_embed_dim, char_hidden_dim, sub_vocab_dims, sub_embed_dims, sub_hidden_dims, tag_dim, batch_size, use_gpu=True): super().__init__() self.use_gpu = use_gpu self.embed_char = nn.Embedding(char_vocab_dim, char_embed_dim) self.embed_subs = [ nn.Embedding(vocab_dim, embed_dim, tag_dim).cuda() for vocab_dim, embed_dim in zip(sub_vocab_dims, sub_embed_dims) ] self.lstm_block_char = BiLSTMBlock(char_embed_dim, char_hidden_dim, tag_dim, batch_size) self.lstm_block_subs = [ BiLSTMBlock(embed_dim, hidden_dim, tag_dim, batch_size).cuda() for embed_dim, hidden_dim in zip(sub_embed_dims, sub_hidden_dims) ] self.outs2tag = nn.Linear(tag_dim * (1 + len(sub_hidden_dims)), tag_dim) self.crf = CRF(tag_dim)
def __init__(self, config): """Initialize token classification model with a CRF layer. :param config: Transformers config object. """ super().__init__(config) self.crf = CRF(config.num_labels, batch_first=True)
def __init__(self, distilbert_config, args, intent_label_lst, slot_label_lst): super(JointDistilBERT, self).__init__(distilbert_config) self.args = args self.num_intent_labels = len(intent_label_lst) self.num_slot_labels = len(slot_label_lst) if args.do_pred: self.distilbert = PRETRAINED_MODEL_MAP[args.model_type]( config=distilbert_config) else: self.distilbert = PRETRAINED_MODEL_MAP[ args.model_type].from_pretrained( args.model_name_or_path, config=distilbert_config) # Load pretrained bert self.intent_classifier = IntentClassifier( distilbert_config.hidden_size, self.num_intent_labels, args.dropout_rate) self.slot_classifier = SlotClassifier(distilbert_config.hidden_size, self.num_slot_labels, args.dropout_rate) if args.use_crf: self.crf = CRF(num_tags=self.num_slot_labels, batch_first=True) self.slot_pad_token_idx = slot_label_lst.index(args.slot_pad_label)
def __init__(self, config, num_labels, num_ner_labels): super(BertForTABSAJoint_CRF_T, self).__init__() self.bert = BertModel(config) self.dropout = nn.Dropout(config.hidden_dropout_prob) self.classifier = nn.Linear( config.hidden_size, num_labels) # num_labels is the type sum of 0 & 1 self.ner_hidden2tag = nn.Linear( config.hidden_size, num_ner_labels ) # num_ner_labels is the type sum of ner labels: TO or BIO etc self.num_labels = num_labels self.num_ner_labels = num_ner_labels # CRF self.CRF_model = CRF(num_ner_labels, batch_first=True) def init_weights(module): if isinstance(module, (nn.Linear, nn.Embedding)): # Slightly different from the TF version which uses truncated_normal for initialization # cf https://github.com/pytorch/pytorch/pull/5617 module.weight.data.normal_(mean=0.0, std=config.initializer_range) elif isinstance(module, BERTLayerNorm): module.beta.data.normal_(mean=0.0, std=config.initializer_range) module.gamma.data.normal_(mean=0.0, std=config.initializer_range) if isinstance(module, nn.Linear): module.bias.data.zero_() self.apply(init_weights)
def __init__(self, config, pad_idx, lstm_hidden_dim, num_lstm_layers, bidirectional, num_labels): super(BertLstmCrf, self).__init__(config) self.dropout_prob = config.hidden_dropout_prob self.pad_idx = pad_idx self.lstm_hidden_dim = lstm_hidden_dim self.num_lstm_layers = num_lstm_layers self.bidirectional = bidirectional self.num_labels = num_labels self.bert = BertModel(config) if self.num_lstm_layers > 1: self.lstm = nn.LSTM(input_size=config.hidden_size, hidden_size=self.lstm_hidden_dim, num_layers=self.num_lstm_layers, bidirectional=self.bidirectional, dropout=self.dropout_prob, batch_first=True) else: self.lstm = nn.LSTM(input_size=config.hidden_size, hidden_size=self.lstm_hidden_dim, num_layers=self.num_lstm_layers, bidirectional=self.bidirectional, batch_first=True) if self.bidirectional is True: self.linear = nn.Linear(self.lstm_hidden_dim * 2, self.num_labels) else: self.linear = nn.Linear(self.lstm_hidden_dim, self.num_labels) self.crf_layer = CRF(self.num_labels, batch_first=True) self.dropout_layer = nn.Dropout(self.dropout_prob) self.init_weights()
def __init__(self, args, pretrained_word_matrix): super(BiLSTM_CNN_CRF, self).__init__() self.args = args self.char_cnn = CharCNN(max_word_len=args.max_word_len, kernel_lst=args.kernel_lst, num_filters=args.num_filters, char_vocab_size=args.char_vocab_size, char_emb_dim=args.char_emb_dim, final_char_dim=args.final_char_dim) if pretrained_word_matrix is not None: self.word_emb = nn.Embedding.from_pretrained(pretrained_word_matrix) else: self.word_emb = nn.Embedding(args.word_vocab_size, args.word_emb_dim, padding_idx=0) nn.init.uniform_(self.word_emb.weight, -0.25, 0.25) self.bi_lstm = nn.LSTM(input_size=args.word_emb_dim + args.final_char_dim, hidden_size=args.hidden_dim // 2, # Bidirectional will double the hidden_size bidirectional=True, batch_first=True) self.output_linear = nn.Linear(args.hidden_dim, len(get_labels(args))) self.crf = CRF(num_tags=len(get_labels(args)), batch_first=True)
def __init__(self, bert_name: str, num_labels: int, num_layers: int, hidden_size: int, dropout_prob: float, rnn_type: str, bidirectional: bool, use_crf: bool, freeze_bert: bool): super().__init__() self.bert = BertModel.from_pretrained(bert_name) if freeze_bert: self.bert.requires_grad = False if num_layers > 0: if rnn_type == "gru": self.rnn = nn.GRU(self.bert.config.hidden_size, hidden_size, num_layers=num_layers, bidirectional=bidirectional, batch_first=True) else: self.rnn = nn.LSTM(self.bert.config.hidden_size, hidden_size, num_layers=num_layers, bidirectional=bidirectional, batch_first=True) else: self.rnn = nn.Identity() self.classifier = nn.Linear((1 + bidirectional) * hidden_size, num_labels) self.dropout = nn.Dropout(dropout_prob) self.use_crf = use_crf if self.use_crf: self.crf = CRF(num_labels, batch_first=True)
def __init__(self, args1, args2, args3, num_tags): super(CNN_biLSTM_CRF, self).__init__() self.num_tags = num_tags self.cnn = TextCNN(args1) self.bilstm = BILSTM(args2) self.convs = CNNlayers(args3) self.crf = CRF(self.num_tags, batch_first=True)
def __init__(self, data_config, model_config="embs:8|conv:16|l1:16|do:0.0|oc:BI"): super(Model, self).__init__() no_syllables = data_config['num_tokens'] log.info("no. syllables: %d" % no_syllables) config = utils.parse_model_params(model_config) conv_filters = config['conv'] dropout_rate = config.get("do", 0) self.output_scheme = output_tags.get_scheme(config["oc"]) self.sy_embeddings = prepare_embedding(data_config, config) if "crf" in config: self.crf = CRF(self.output_scheme.num_tags, batch_first=True) emb_dim = self.sy_embeddings.weight.shape[1] self.id_conv = IteratedDilatedConvolutions(emb_dim, conv_filters, dropout_rate) self.linear1 = nn.Linear(conv_filters, config['l1']) self.linear2 = nn.Linear(config['l1'], self.output_scheme.num_tags) self.model_params = model_config
def __init__( self, word_vocab: Vocab, tag_vocab: Vocab, embedding: nn.Embedding, hidden_dim: int, num_layers: int, dropout: float = 0.2, ): super().__init__(word_vocab, tag_vocab) self.hidden_dim = hidden_dim self.dropout = nn.Dropout(dropout) self.embedding = embedding self.lstm = nn.LSTM( embedding.embedding_dim, hidden_dim, num_layers=num_layers, bidirectional=True, ) # Maps the output of the LSTM into tag space. self.fc = nn.Linear(hidden_dim * 2, self.tagset_size) self.f1 = F1(num_classes=self.tagset_size) self.crf = CRF(num_tags=self.tagset_size) self.save_hyperparameters()
def __init__(self, vocab_size, embed_size, hidden_dim, weight, dropout, word2id, tag2id): super(NERLSTM_CRF, self).__init__() # self.embedding_dim = embedding_dim self.embed_size = embed_size self.hidden_dim = hidden_dim self.vocab_size = len(word2id) + 1 self.tag_to_ix = tag2id self.tagset_size = len(tag2id) # self.word_embeds = nn.Embedding(self.vocab_size, self.embedding_dim) self.embedding = nn.Embedding(vocab_size, embed_size) self.embedding.weight.data.copy_(weight) # 是否将embedding定住 self.embedding.weight.requires_grad = True self.dropout = nn.Dropout(dropout) # CRF self.lstm = nn.LSTM(self.embed_size, self.hidden_dim // 2, num_layers=1, bidirectional=True, batch_first=False) self.hidden2tag = nn.Linear(self.hidden_dim, self.tagset_size) self.crf = CRF(self.tagset_size)
def __init__(self, config): super(BiLstmCRF, self).__init__() # embedding随训练更新 self.embedding = nn.Embedding.from_pretrained( config.embedding_pretrained, freeze=False) self.device = config.device self.tag_num = config.tag_num self.seqLen = config.pad_size self.lstm = nn.LSTM( input_size=config.input_size, hidden_size=config.hidden_size // 2, num_layers=config.hidden_layer_num, dropout=config.dropout, bidirectional=True, # 设置双向 batch_first=True) # batch为张量的第一个维度 self.attention = SelfAttention(config.pad_size) self.pos_fc = PositionFeedForward(config.pad_size, config.pad_size) self.conditionLinear = nn.Linear(config.hidden_size, config.pad_size) self.attLinear = nn.Linear(config.pad_size, 1) self.linear = nn.Linear(config.pad_size, config.tag_num) self.conditionNorm = ConditionalLayerNorm(config.pad_size) self.crf = CRF(config.tag_num, batch_first=True)
def __init__(self, model_path_list, bert_dir_list, num_tags, device, lamb=1 / 3): self.models = [] self.crf_module = CRF(num_tags=num_tags, batch_first=True) self.lamb = lamb for idx, _path in enumerate(model_path_list): print(f'Load model from {_path}') print(f'Load model type: {bert_dir_list[0]}') model = CRFModel(bert_dir=bert_dir_list[0], num_tags=num_tags) model.load_state_dict( torch.load(_path, map_location=torch.device('cpu'))) model.eval() model.to(device) self.models.append(model) if idx == 0: print(f'Load CRF weight from {_path}') self.crf_module.load_state_dict(model.crf_module.state_dict()) self.crf_module.to(device)
def __init__(self, config): super(TransformerEncoderModel, self).__init__(config) self.src_mask = None self._dim_embedding = self._config.model.dim_embedding self._dim_hidden = self._config.model.dim_hidden self._num_layer = self._config.model.nlayer self._num_vocab = self._config.data.num_vocab self._num_tag = self._config.data.num_tag self._dropout = self._config.learn.dropout_rate self.pos_encoder = PositionalEncoding(self._dim_embedding, self._dropout) self._embedding = nn.Embedding(self._num_vocab, self._dim_embedding) encoder_layers = TransformerEncoderLayer( self._dim_hidden, self._config.model.nhead, self._config.model.nhid, self._config.learn.dropout_rate) self.transformer_encoder = TransformerEncoder( encoder_layers, self._config.model.nlayer) self.lstm = nn.LSTM(input_size=self._dim_embedding, hidden_size=self._dim_hidden // 2, bidirectional=True, num_layers=self._num_layer, dropout=self._dropout) self._hidden2label = nn.Linear(self._dim_hidden, self._num_tag) self._crf = CRF(self._num_tag)
def __init__(self, conf, vocab, char_vocab, tag_vocab): super(C_RNN, self).__init__() # CHARACTER EMBEDDINGS AND WORD-LEVEL BILSTM self.char_RNN = char_RNN(conf, char_vocab) in_shape = self.char_RNN.output_size # Main BiLSTM # ====================================================================== self.word_RNN = word_RNN(in_shape, None, conf["w_rnn_layers"]) output_size = self.word_RNN.output_size self.mid_dropout = nn.Dropout(conf["mid_dropout"]) self.w_bin_out = conf["w_bin_out"] out_bin_size = 2 * output_size if self.w_bin_out == "maxpool" else output_size # Output Layers # ====================================================================== # Binary self.bin_out = nn.Linear(out_bin_size, 1) # NER self.n_tags = len(tag_vocab) self.ner_out = nn.Linear(output_size, self.n_tags) # CRF Layer self.use_crf = conf["use_crf"] if self.use_crf: self.crf = CRF(self.n_tags, batch_first=True) # ====================================================================== # Maybe move to GPU self.to(self.device)
def __init__(self, hparams): super(CRF_Model, self).__init__() self.name = hparams.model_name self.device = 'cuda' if torch.cuda.is_available() else 'cpu' self.word_embedding = nn.Embedding(hparams.vocab_size, hparams.embedding_dim, padding_idx=0) self.word_dropout = nn.Dropout(hparams.dropout) if hparams.embeddings is not None: print("initializing embeddings from pretrained") self.word_embedding.weight.data.copy_(hparams.embeddings) self.pos_embedding = nn.Embedding(hparams.pos_vocab_size, hparams.embedding_dim, padding_idx=0) self.pos_dropout = nn.Dropout(hparams.dropout) if hparams.pos_embeddings is not None: print("initializing embeddings from pretrained") self.pos_embedding.weight.data.copy_(hparams.pos_embeddings) self.lstm = nn.LSTM( hparams.embedding_dim * 2, hparams.hidden_dim, bidirectional=hparams.bidirectional, num_layers=hparams.num_layers, dropout=hparams.dropout if hparams.num_layers > 1 else 0, batch_first=True) lstm_output_dim = hparams.hidden_dim if hparams.bidirectional is False else hparams.hidden_dim * 2 self.dropout = nn.Dropout(hparams.dropout) self.classifier = nn.Linear(lstm_output_dim, hparams.num_classes) self.crf = CRF(hparams.num_classes, batch_first=True)
def __init__(self, vocab_size, emb_dim, hidden_size, out_size, dropout): """ 模型结构搭建 :param vocab_size: 词表大小 :param emb_dim: 词向量维数 :param hidden_size: LSTM隐含层大小 :param out_size: 标签数量 """ super(BiLSTM_CRF, self).__init__() # 定义字向量矩阵 self.embedding = nn.Embedding(num_embeddings=vocab_size, embedding_dim=emb_dim) self.dropout = nn.Dropout(dropout) self.bilstm = nn.LSTM(input_size=emb_dim, hidden_size=hidden_size, num_layers=1, batch_first=True, bidirectional=True) # bilstm 输出将 hidden layer 的输出拼接 self.hidden2tag = nn.Linear(in_features=hidden_size * 2, out_features=out_size) self.crf = CRF(num_tags=out_size, batch_first=True)
def __init__(self, config, word_vocab, vocab_size, tag_num, vector_path): super(TransformerEncoderModel, self).__init__() self.use_dae = config.use_dae self.dae_lambda = config.dae_lambda self.use_dice = config.use_dice self.dice_lambda = config.dice_lambda self.vocab_size = vocab_size self.word_vocab = word_vocab self.tag_num = tag_num self.embedding_dim = config.embedding_dim self.hidden_dim = config.hidden_dim self.bidirectional = True self.num_layers = config.num_layers self.dropout = config.dropout self.drop = nn.Dropout(self.dropout) self.vector_path = vector_path self.src_mask = None self.pos_encoder = PositionalEncoding(self.embedding_dim, self.dropout) encoder_layers = TransformerEncoderLayer(self.embedding_dim, config.n_head, config.n_hid, self.dropout) self.transformer_encoder = TransformerEncoder(encoder_layers, config.n_layers) self.embedding = nn.Embedding(self.vocab_size, self.embedding_dim) if config.use_vectors: logger.info('Loading word vectors from {}...'.format(self.vector_path)) embed_weights = load_word2vec(self.vector_path, self.word_vocab, embedding_dim=self.embedding_dim) logger.info('Finished load word vectors') self.embedding = nn.Embedding.from_pretrained(embed_weights, freeze=False).to(DEVICE) # self.embedding.weight.requires_grad = True self.lstm = nn.LSTM(input_size=self.embedding_dim, hidden_size=self.hidden_dim // 2, bidirectional=self.bidirectional, num_layers=1).to(DEVICE) self.linear = nn.Linear(self.hidden_dim, self.tag_num) self.lm_decoder = nn.Linear(self.hidden_dim, self.vocab_size) self.init_weights() self.crf_layer = CRF(self.tag_num) self.dice_loss = DiceLoss1() self.criterion = nn.CrossEntropyLoss()
def __init__(self, word_embed_dim, extra_embed_dim, hidden_dim, vocab_size, extra_size, tagset_size, device='cuda'): super(BiLSTM_CRF_extra, self).__init__() self.tagset_size = tagset_size # Word embedding layer self.embedding = nn.Embedding(vocab_size, word_embed_dim) self.extra_embedding = nn.Embedding(extra_size, extra_embed_dim) # Bidirectional LSTM layer self.half_hidden = hidden_dim // 2 self.lstm = nn.LSTM(word_embed_dim + extra_embed_dim, hidden_dim // 2, bidirectional=True) # Mapping lstm output to tags self.fc = nn.Linear(hidden_dim, tagset_size) # CRF layer self.crf = CRF(tagset_size) self.device = device self.to(device) self.init_hidden()
def __init__(self,input_size, hidden_size, num_layers, num_classes): super(bilstm_crf, self).__init__() self.hidden_size = hidden_size self.num_layers = num_layers self.lstm = nn.GRU(input_size,hidden_size,num_layers,batch_first = True, bidirectional=True) self.fc = nn.Linear(hidden_size*2,num_classes) self.crf = CRF(num_classes,batch_first=True)