def __init__(self, embed,label_vocab,pos_idx=31, Parsing_rnn_layers=3, Parsing_arc_mlp_size=500, Parsing_label_mlp_size=100,Parsing_use_greedy_infer=False, encoding_type='bmeso',embedding_dim=768,dropout=0.1,use_pos_embedding=True, use_average=True): super().__init__() self.embed = embed self.use_pos_embedding=use_pos_embedding self.use_average=use_average self.label_vocab=label_vocab self.pos_idx=pos_idx self.user_dict_weight=0.05 embedding_dim_1=512 embedding_dim_2=256 self.layers_map={'CWS':'-1','POS':'-1','Parsing':'-1','NER':'-1'} #NER self.ner_linear=nn.Linear(embedding_dim,len(label_vocab['NER'])) trans = allowed_transitions(label_vocab['NER'], encoding_type='bmeso', include_start_end=True) self.ner_crf = ConditionalRandomField(len(label_vocab['NER']), include_start_end_trans=True, allowed_transitions=trans) #parsing self.biaffine_parser=BertCharParser( app_index=self.label_vocab['Parsing'].to_index('APP'), vector_size=768, num_label=len(label_vocab['Parsing']), rnn_layers=Parsing_rnn_layers, arc_mlp_size=Parsing_arc_mlp_size, label_mlp_size=Parsing_label_mlp_size, dropout=dropout, use_greedy_infer=Parsing_use_greedy_infer) if self.use_pos_embedding: self.pos_embedding=nn.Embedding(len(self.label_vocab['pos']),embedding_dim, padding_idx=0) self.loss=CrossEntropyLoss(padding_idx=0) #CWS self.cws_mlp=MLP([embedding_dim, embedding_dim_1,embedding_dim_2, len(label_vocab['CWS'])], 'relu', output_activation=None) trans=allowed_transitions(label_vocab['CWS'],include_start_end=True) self.cws_crf = ConditionalRandomField(len(label_vocab['CWS']), include_start_end_trans=True, allowed_transitions=trans) #POS self.pos_mlp=MLP([embedding_dim, embedding_dim_1,embedding_dim_2, len(label_vocab['POS'])], 'relu', output_activation=None) trans=allowed_transitions(label_vocab['POS'],include_start_end=True) self.pos_crf = ConditionalRandomField(len(label_vocab['POS']), include_start_end_trans=True, allowed_transitions=trans)
def __init__(self, embed, hidden_size, num_layers, tag_vocab, dropout=0.5, encoding_type='bioes'): super().__init__() self.embedding = embed self.lstm = LSTM(input_size=self.embedding.embedding_dim, hidden_size=hidden_size // 2, num_layers=num_layers, bidirectional=True, batch_first=True) self.fc = nn.Linear(hidden_size, len(tag_vocab)) transitions = allowed_transitions(tag_vocab.idx2word, encoding_type=encoding_type, include_start_end=True) self.crf = ConditionalRandomField(len(tag_vocab), include_start_end_trans=True, allowed_transitions=transitions) self.dropout = nn.Dropout(dropout, inplace=True) for name, param in self.named_parameters(): if 'fc' in name: if param.data.dim() > 1: nn.init.xavier_uniform_(param) else: nn.init.constant_(param, 0) if 'crf' in name: nn.init.zeros_(param)
def __init__(self, config, data_bundle, embed, num_layers, d_model, n_head, feedforward_dim, dropout, after_norm=True, attn_type='adatrans', bi_embed=None, fc_dropout=0.3, pos_embed=None, scale=False, dropout_attn=None): """ :param tag_vocab: fastNLP Vocabulary :param embed: fastNLP TokenEmbedding :param num_layers: number of self-attention layers :param d_model: input size :param n_head: number of head :param feedforward_dim: the dimension of ffn :param dropout: dropout in self-attention :param after_norm: normalization place :param attn_type: adatrans, naive :param rel_pos_embed: position embedding的类型,支持sin, fix, None. relative时可为None :param bi_embed: Used in Chinese scenerio :param fc_dropout: dropout rate before the fc layer """ super().__init__() self.config = config self.data_bundle = data_bundle tag_vocab = data_bundle.get_vocab('target') self.embed = embed embed_size = self.embed.embed_size self.bi_embed = None if bi_embed is not None: self.bi_embed = bi_embed embed_size += self.bi_embed.embed_size self.in_fc = nn.Linear(embed_size, d_model) self.transformer = TransformerEncoder(num_layers, d_model, n_head, feedforward_dim, dropout, after_norm=after_norm, attn_type=attn_type, scale=scale, dropout_attn=dropout_attn, pos_embed=pos_embed) self.fc_dropout = nn.Dropout(fc_dropout) self.out_fc = nn.Linear(d_model, len(tag_vocab)) trans = allowed_transitions(tag_vocab, include_start_end=True) self.crf = ConditionalRandomField(len(tag_vocab), include_start_end_trans=True, allowed_transitions=trans)
def __init__(self, char_embed, hidden_size, num_layers, target_vocab=None, bigram_embed=None, trigram_embed=None, dropout=0.5): super().__init__() embed_size = char_embed.embed_size self.char_embed = char_embed if bigram_embed: embed_size += bigram_embed.embed_size self.bigram_embed = bigram_embed if trigram_embed: embed_size += trigram_embed.embed_size self.trigram_embed = trigram_embed self.lstm = LSTM(embed_size, hidden_size=hidden_size // 2, bidirectional=True, batch_first=True, num_layers=num_layers) self.dropout = nn.Dropout(p=dropout) self.fc = nn.Linear(hidden_size, len(target_vocab)) transitions = None if target_vocab: transitions = allowed_transitions(target_vocab, include_start_end=True, encoding_type='bmes') self.crf = ConditionalRandomField(num_tags=len(target_vocab), allowed_transitions=transitions)
def __init__(self, tag_vocabs, embed, num_layers, d_model, n_head, feedforward_dim, dropout, after_norm=True, attn_type='adatrans', bi_embed=None, fc_dropout=0.3, pos_embed=None, scale=False, dropout_attn=None): super().__init__() self.embed = embed embed_size = self.embed.embed_size self.bi_embed = None if bi_embed is not None: self.bi_embed = bi_embed embed_size += self.bi_embed.embed_size self.tag_vocabs = [] self.out_fcs = nn.ModuleList() self.crfs = nn.ModuleList() for i in range(len(tag_vocabs)): self.tag_vocabs.append(tag_vocabs[i]) out_fc = nn.Linear(1536, len(tag_vocabs[i])) self.out_fcs.append(out_fc) trans = allowed_transitions( tag_vocabs[i], encoding_type='bioes', include_start_end=True) crf = ConditionalRandomField( len(tag_vocabs[i]), include_start_end_trans=True, allowed_transitions=trans) self.crfs.append(crf) self.in_fc = nn.Linear(embed_size, d_model) self.transformer = TransformerEncoder(num_layers, d_model, n_head, feedforward_dim, dropout, after_norm=after_norm, attn_type=attn_type, scale=scale, dropout_attn=dropout_attn, pos_embed=pos_embed) self.fc_dropout = nn.Dropout(fc_dropout)
def __init__(self, embed, tag_vocab, encoding_type='bio'): super().__init__() self.embed = embed self.fc = nn.Linear(self.embed.embed_size, len(tag_vocab)) trans = allowed_transitions(tag_vocab, encoding_type=encoding_type, include_start_end=True) self.crf = ConditionalRandomField(len(tag_vocab), include_start_end_trans=True, allowed_transitions=trans)
def __init__(self, tag_vocab, embed, d_model, n_heads, d_k, d_v, n_layers, d_label=10, fc_dropout=0.3, dropout=0.15, gpu=0, pos_embed=None, scale=False): """ :param tag_vocab: fastNLP Vocabulary :param embed: fastNLP TokenEmbedding :param num_layers: number of self-attention layers :param d_model: input size :param n_head: number of head :param feedforward_dim: the dimension of ffn :param dropout: dropout in self-attention :param after_norm: normalization place :param attn_type: adatrans, naive :param rel_pos_embed: position embedding的类型,支持sin, fix, None. relative时可为None :param bi_embed: Used in Chinese scenerio :param fc_dropout: dropout rate before the fc layer """ super().__init__() self.embed = embed embed_size = self.embed.embed_size self.in_fc = nn.Linear(embed_size, d_model) self.encoder = Encoder(d_model, n_heads, d_k, d_v, n_layers, d_label, dropout, feedforward_dim=int(2 * d_model)) self.fc_dropout = nn.Dropout(fc_dropout) self.out_fc = nn.Linear(d_model, len(tag_vocab)) trans = allowed_transitions(tag_vocab, include_start_end=True) self.crf = ConditionalRandomField(len(tag_vocab), include_start_end_trans=True, allowed_transitions=trans)
def __init__(self, embed, tag_vocabs, encoding_type='bio'): super().__init__() self.embed = embed self.tag_vocabs = [] self.fcs = nn.ModuleList() self.crfs = nn.ModuleList() for i in range(len(tag_vocabs)): self.tag_vocabs.append(tag_vocabs[i]) linear = nn.Linear(self.embed.embed_size, len(tag_vocabs[i])) self.fcs.append(linear) trans = allowed_transitions( tag_vocabs[i], encoding_type=encoding_type, include_start_end=True) crf = ConditionalRandomField( len(tag_vocabs[i]), include_start_end_trans=True, allowed_transitions=trans) self.crfs.append(crf)
def __init__(self, char_embed, num_classes, bigram_embed=None, trigram_embed=None, num_layers=1, hidden_size=100, dropout=0.5, target_vocab=None, encoding_type=None): super().__init__() self.char_embed = get_embeddings(char_embed) embed_size = self.char_embed.embedding_dim if bigram_embed: self.bigram_embed = get_embeddings(bigram_embed) embed_size += self.bigram_embed.embedding_dim if trigram_embed: self.trigram_ebmbed = get_embeddings(trigram_embed) embed_size += self.bigram_embed.embedding_dim if num_layers > 1: self.lstm = LSTM(embed_size, num_layers=num_layers, hidden_size=hidden_size // 2, bidirectional=True, batch_first=True, dropout=dropout) else: self.lstm = LSTM(embed_size, num_layers=num_layers, hidden_size=hidden_size // 2, bidirectional=True, batch_first=True) self.dropout = nn.Dropout(dropout) self.fc = nn.Linear(hidden_size, num_classes) trans = None if target_vocab is not None and encoding_type is not None: trans = allowed_transitions(target_vocab.idx2word, encoding_type=encoding_type, include_start_end=True) self.crf = ConditionalRandomField(num_classes, include_start_end_trans=True, allowed_transitions=trans)
def __init__(self, config: BertConfig): super(KnowledgePointExtractionModel, self).__init__(config=config) self.bert = BertModel( config=config, add_pooling_layer=False) # word to vector(embeddings) # MLP输入输出向量size, mlp_layer_sizes: [hidden_size, middle_size1, middle_size2, len(config.crf_labels)] self.kpe_mlp = MLP(size_layer=config.mlp_layer_sizes, activation='relu', output_activation=None) # crf_labels = {0:"<pad>", 1: "S", 2: "B", 3: "M", 4: "E"} (id2label) tag_labels = {} for key, value in config.crf_labels.items(): if not isinstance(key, int): tag_labels[int(key)] = value if tag_labels: config.crf_labels = tag_labels trans = allowed_transitions(tag_vocab=config.crf_labels, include_start_end=True) self.kpe_crf = ConditionalRandomField(num_tags=len(config.crf_labels), include_start_end_trans=True, allowed_transitions=trans)
def __init__(self, tag_vocab, embed, num_layers, d_model, n_head, feedforward_dim, dropout, after_norm=True, attn_type='adatrans', bi_embed=None, fc_dropout=0.3, pos_embed=None, scale=False, dropout_attn=None, use_knowledge=False, feature2count=None, vocab_size=None, feature_vocab_size=None, kv_attn_type="dot", memory_dropout=0.2, fusion_dropout=0.2, fusion_type='concat', highway_layer=0, key_embed_dropout=0.2, knowledge_type="all", use_zen=False, zen_model=None): """ :param tag_vocab: fastNLP Vocabulary :param embed: fastNLP TokenEmbedding :param num_layers: number of self-attention layers :param d_model: input size :param n_head: number of head :param feedforward_dim: the dimension of ffn :param dropout: dropout in self-attention :param after_norm: normalization place :param attn_type: adatrans, naive :param rel_pos_embed: position embedding的类型,支持sin, fix, None. relative时可为None :param bi_embed: Used in Chinese scenerio :param fc_dropout: dropout rate before the fc layer :param use_knowledge: 是否使用stanford corenlp的知识 :param feature2count: 字典, {"gram2count": dict, "pos_tag2count": dict, "chunk_tag2count": dict, "dep_tag2count": dict}, :param """ super().__init__() self.use_knowledge = use_knowledge self.feature2count = feature2count self.vocab_size = vocab_size self.feature_vocab_size = feature_vocab_size # add ZEN self.use_zen = use_zen self.embed = embed embed_size = self.embed.embed_size self.bi_embed = None if bi_embed is not None: self.bi_embed = bi_embed embed_size += self.bi_embed.embed_size self.in_fc = nn.Linear(embed_size, d_model) self.transformer = TransformerEncoder(num_layers, d_model, n_head, feedforward_dim, dropout, after_norm=after_norm, attn_type=attn_type, scale=scale, dropout_attn=dropout_attn, pos_embed=pos_embed) self.kv_memory = KeyValueMemoryNetwork( vocab_size=vocab_size, feature_vocab_size=feature_vocab_size, attn_type=kv_attn_type, emb_size=d_model, scaled=True, key_embed_dropout=key_embed_dropout, knowledge_type=knowledge_type) self.output_dim = d_model * _dim_map[fusion_type] self.fusion = FusionModule(fusion_type=fusion_type, layer=highway_layer, input_size=d_model, output_size=self.output_dim, dropout=fusion_dropout) self.memory_dropout = nn.Dropout(p=memory_dropout) self.out_fc = nn.Linear(self.output_dim, len(tag_vocab)) self.fc_dropout = nn.Dropout(fc_dropout) trans = allowed_transitions(tag_vocab, include_start_end=True) self.crf = ConditionalRandomField(len(tag_vocab), include_start_end_trans=True, allowed_transitions=trans)
def __init__(self, tag_vocab, embed, num_layers, d_model, n_head, feedforward_dim, dropout, after_norm=True, attn_type='adatrans', bi_embed=None, fc_dropout=0.3, pos_embed=None, scale=False, dropout_attn=None, use_knowledge=False, multi_att_dropout=0.3, use_ngram=False, gram2id=None, cat_num=5, device=None): """ :param tag_vocab: fastNLP Vocabulary :param embed: fastNLP TokenEmbedding :param num_layers: number of self-attention layers :param d_model: input size :param n_head: number of head :param feedforward_dim: the dimension of ffn :param dropout: dropout in self-attention :param after_norm: normalization place :param attn_type: adatrans, naive :param rel_pos_embed: position embedding的类型,支持sin, fix, None. relative时可为None :param bi_embed: Used in Chinese scenerio :param fc_dropout: dropout rate before the fc layer :param use_knowledge: 是否使用stanford corenlp的知识 :param feature2count: 字典, {"gram2count": dict, "pos_tag2count": dict, "chunk_tag2count": dict, "dep_tag2count": dict}, :param """ super().__init__() self.use_knowledge = use_knowledge self.use_ngram = use_ngram self.gram2id = gram2id self.embed = embed # new add self.cat_num = cat_num self.use_attention = use_ngram embed_size = self.embed.embed_size self.bi_embed = None if bi_embed is not None: self.bi_embed = bi_embed embed_size += self.bi_embed.embed_size # self.ngram_embeddings = BertWordEmbeddings(hidden_size=embed_size) self.in_fc = nn.Linear(embed_size, d_model) self.transformer = TransformerEncoder(num_layers, d_model, n_head, feedforward_dim, dropout, after_norm=after_norm, attn_type=attn_type, scale=scale, dropout_attn=dropout_attn, pos_embed=pos_embed) self.hidden_size = d_model if self.use_attention: print("use multi_attention") self.multi_attention = MultiChannelAttention( len(self.gram2id), self.hidden_size, self.cat_num) self.attention_fc = nn.Linear(self.hidden_size * self.cat_num, self.hidden_size, bias=False) self.multi_att_dropout = nn.Dropout(multi_att_dropout) self.out_fc = nn.Linear(self.hidden_size * 2, len(tag_vocab), bias=False) self.gate = GateConcMechanism(hidden_size=self.hidden_size) # self.gete_dropout = nn.Dropout(gate_dropout) else: self.multi_attention = None self.out_fc = nn.Linear(self.hidden_size, len(tag_vocab), bias=False) # self.out_fc = nn.Linear(d_model, len(tag_vocab)) # print("len(tag_vocab): ", len(tag_vocab)) self.fc_dropout = nn.Dropout(fc_dropout) trans = allowed_transitions(tag_vocab, include_start_end=True) self.crf = ConditionalRandomField(len(tag_vocab), include_start_end_trans=True, allowed_transitions=trans)
def __init__(self, model_conf, attn_type='adatrans', pos_embed=None, dropout_attn=None): """ :param tag_vocab: fastNLP Vocabulary :param embed: fastNLP TokenEmbedding :param num_layers: number of self-attention layers :param d_model: input size :param n_head: number of head :param feedforward_dim: the dimension of ffn :param dropout: dropout in self-attention :param after_norm: normalization place :param attn_type: adatrans, naive :param rel_pos_embed: position embedding的类型,支持sin, fix, None. relative时可为None :param bi_embed: Used in Chinese scenerio :param fc_dropout: dropout rate before the fc layer """ super().__init__() print('current model is TENER') # origin paper param n_head = 6 head_dims = 80 num_layers = 2 d_model = n_head * head_dims feedforward_dim = int(2 * d_model) dropout = 0.15 fc_dropout = 0.4 after_norm = 1 scale = attn_type == 'transformer' tag_vocab = model_conf['entity_type'] # embedding embed = model_conf['char_emb'] bi_embed = model_conf['bichar_emb'] self.embed = nn.Embedding(num_embeddings=embed.shape[0], embedding_dim=embed.shape[1], padding_idx=0, _weight=embed) embed_size = embed.size()[1] self.bi_embed = None if bi_embed is not None: self.bi_embed = nn.Embedding(num_embeddings=bi_embed.shape[0], embedding_dim=bi_embed.shape[1], padding_idx=0, _weight=bi_embed) embed_size += bi_embed.size()[1] self.in_fc = nn.Linear(embed_size, d_model) self.transformer = TransformerEncoder(num_layers, d_model, n_head, feedforward_dim, dropout, after_norm=after_norm, attn_type=attn_type, scale=scale, dropout_attn=dropout_attn, pos_embed=pos_embed) self.fc_dropout = nn.Dropout(fc_dropout) self.out_fc = nn.Linear(d_model, len(tag_vocab)) trans = allowed_transitions( {item: key for key, item in tag_vocab.items()}, include_start_end=True, encoding_type='bmeso') self.crf = ConditionalRandomField(len(tag_vocab), include_start_end_trans=True, allowed_transitions=trans)