def __init__(self, embed,label_vocab,pos_idx=31, Parsing_rnn_layers=3, Parsing_arc_mlp_size=500, Parsing_label_mlp_size=100,Parsing_use_greedy_infer=False, encoding_type='bmeso',embedding_dim=768,dropout=0.1,use_pos_embedding=True, use_average=True): super().__init__() self.embed = embed self.use_pos_embedding=use_pos_embedding self.use_average=use_average self.label_vocab=label_vocab self.pos_idx=pos_idx self.user_dict_weight=0.05 embedding_dim_1=512 embedding_dim_2=256 self.layers_map={'CWS':'-1','POS':'-1','Parsing':'-1','NER':'-1'} #NER self.ner_linear=nn.Linear(embedding_dim,len(label_vocab['NER'])) trans = allowed_transitions(label_vocab['NER'], encoding_type='bmeso', include_start_end=True) self.ner_crf = ConditionalRandomField(len(label_vocab['NER']), include_start_end_trans=True, allowed_transitions=trans) #parsing self.biaffine_parser=BertCharParser( app_index=self.label_vocab['Parsing'].to_index('APP'), vector_size=768, num_label=len(label_vocab['Parsing']), rnn_layers=Parsing_rnn_layers, arc_mlp_size=Parsing_arc_mlp_size, label_mlp_size=Parsing_label_mlp_size, dropout=dropout, use_greedy_infer=Parsing_use_greedy_infer) if self.use_pos_embedding: self.pos_embedding=nn.Embedding(len(self.label_vocab['pos']),embedding_dim, padding_idx=0) self.loss=CrossEntropyLoss(padding_idx=0) #CWS self.cws_mlp=MLP([embedding_dim, embedding_dim_1,embedding_dim_2, len(label_vocab['CWS'])], 'relu', output_activation=None) trans=allowed_transitions(label_vocab['CWS'],include_start_end=True) self.cws_crf = ConditionalRandomField(len(label_vocab['CWS']), include_start_end_trans=True, allowed_transitions=trans) #POS self.pos_mlp=MLP([embedding_dim, embedding_dim_1,embedding_dim_2, len(label_vocab['POS'])], 'relu', output_activation=None) trans=allowed_transitions(label_vocab['POS'],include_start_end=True) self.pos_crf = ConditionalRandomField(len(label_vocab['POS']), include_start_end_trans=True, allowed_transitions=trans)
def __init__(self, args, embedding, hid_dim): super(GCN, self).__init__() self.args = args self.layers = args.num_layers self.mem_dim = hid_dim self.in_dim = args.tok_dim + args.pos_dim + args.post_dim self.tok_emb, self.pos_emb, self.post_emb = embedding # drop out self.rnn_drop = nn.Dropout(args.rnn_dropout) self.in_drop = nn.Dropout(args.input_dropout) self.gcn_drop = nn.Dropout(args.gcn_dropout) # lstm input_size = self.in_dim self.rnn = LSTM( input_size, args.rnn_hidden, args.rnn_layers, batch_first=True, dropout=args.rnn_dropout, bidirectional=args.bidirect, ) if args.bidirect: self.in_dim = args.rnn_hidden * 2 else: self.in_dim = args.rnn_hidden # gcn layer self.G = nn.ModuleList() for layer in range(self.layers): input_dim = [self.in_dim, self.mem_dim][layer != 0] self.G.append(MLP([input_dim, self.mem_dim]))
def __init__(self, pre_name, word2bpes, pad_id, num_languages): # word2bpes: super().__init__() self.model = LMBertForMaskedLM.from_pretrained(pre_name) self.model.bert.add_language_embedding(num_languages) self.model.set_start_end(1, 1+len(word2bpes[0])) # self.model = LMBertModel.from_pretrained(pre_name) # self.model.add_language_embedding(num_languages) self.max_word_len = len(word2bpes[0]) word2bpes = torch.LongTensor(word2bpes).transpose(0, 1).unsqueeze(0) self.register_buffer('word2bpes', word2bpes) self.lg_fc = MLP([768, 1024, num_languages], activation='relu', dropout=0.3) self.pad_id = pad_id
def __init__(self, config: BertConfig): super(KnowledgePointExtractionModel, self).__init__(config=config) self.bert = BertModel( config=config, add_pooling_layer=False) # word to vector(embeddings) # MLP输入输出向量size, mlp_layer_sizes: [hidden_size, middle_size1, middle_size2, len(config.crf_labels)] self.kpe_mlp = MLP(size_layer=config.mlp_layer_sizes, activation='relu', output_activation=None) # crf_labels = {0:"<pad>", 1: "S", 2: "B", 3: "M", 4: "E"} (id2label) tag_labels = {} for key, value in config.crf_labels.items(): if not isinstance(key, int): tag_labels[int(key)] = value if tag_labels: config.crf_labels = tag_labels trans = allowed_transitions(tag_vocab=config.crf_labels, include_start_end=True) self.kpe_crf = ConditionalRandomField(num_tags=len(config.crf_labels), include_start_end_trans=True, allowed_transitions=trans)
def __init__(self, config, vocab=None): super(CGSum, self).__init__() self.use_cuda = config.use_gpu and torch.cuda.is_available() encoder = Encoder(config) decoder = Decoder(config) decoder.embedding.weight = encoder.embedding.weight reduce_state = ReduceState(config) self.config = config self.vocab = vocab self.mlp = MLP(size_layer=[ config.hidden_dim * 2, config.hidden_dim * 2, config.hidden_dim * 2 ]) self.W_h = nn.Linear(config.hidden_dim * 2, config.hidden_dim * 2, bias=False) self.encoder = encoder self.decoder = decoder self.gnnEncoder = GNNEncoder(config) self.reduce_state = reduce_state
def __init__(self, config): super(Encoder, self).__init__() self.config = config self.embedding = nn.Embedding(config.vocab_size, config.emb_dim) init_wt_normal(config, self.embedding.weight) self.join = nn.Linear(4 * config.hidden_dim, 2 * config.hidden_dim) init_linear_wt(config, self.join) self.lstm = LSTM(config.emb_dim, config.hidden_dim, num_layers=1, batch_first=True, bidirectional=True) self.graph_feature_lstm = LSTM(config.emb_dim, config.hidden_dim, num_layers=1, batch_first=True, bidirectional=True) self.mlp = MLP(size_layer=[ config.hidden_dim * 4, config.hidden_dim * 2, config.hidden_dim * 2, 1 ], activation="tanh") self.criterion = nn.MSELoss(reduction="sum")
def __init__(self, in_feature_dim, out_feature_dim): super(NaiveClassifier4, self).__init__() self.mlp = MLP( [in_feature_dim, in_feature_dim, out_feature_dim])
def __init__(self, args, emb_matrix): super().__init__() in_dim = args.hidden_dim self.gcn_model = GCNAbsaModel(args, emb_matrix=emb_matrix) self.classifier = MLP([in_dim, args.num_class])
def __init__(self, embed, tag_size): super().__init__() self.embedding = embed self.tag_size = tag_size self.mlp = MLP(size_layer=[self.embedding.embedding_dim, tag_size])
def __init__(self, embed, tag_size): super().__init__() self.embedding = Embedding(embed, dropout=0.1) self.tag_size = tag_size self.mlp = MLP(size_layer=[self.embedding.embedding_dim, tag_size])