Beispiel #1
0
    def __init__(self, embed,label_vocab,pos_idx=31,
                Parsing_rnn_layers=3, Parsing_arc_mlp_size=500,
                Parsing_label_mlp_size=100,Parsing_use_greedy_infer=False,
                encoding_type='bmeso',embedding_dim=768,dropout=0.1,use_pos_embedding=True,
                use_average=True):
        super().__init__()
        self.embed = embed
        self.use_pos_embedding=use_pos_embedding
        self.use_average=use_average
        self.label_vocab=label_vocab
        self.pos_idx=pos_idx
        self.user_dict_weight=0.05
        embedding_dim_1=512
        embedding_dim_2=256
        
        
        self.layers_map={'CWS':'-1','POS':'-1','Parsing':'-1','NER':'-1'}
        #NER
        self.ner_linear=nn.Linear(embedding_dim,len(label_vocab['NER']))
        trans = allowed_transitions(label_vocab['NER'], encoding_type='bmeso', include_start_end=True)
        self.ner_crf = ConditionalRandomField(len(label_vocab['NER']), include_start_end_trans=True, allowed_transitions=trans)

        #parsing
        self.biaffine_parser=BertCharParser(
                    app_index=self.label_vocab['Parsing'].to_index('APP'),
                    vector_size=768,
                    num_label=len(label_vocab['Parsing']),
                    rnn_layers=Parsing_rnn_layers,
                    arc_mlp_size=Parsing_arc_mlp_size,
                    label_mlp_size=Parsing_label_mlp_size,
                    dropout=dropout,
                    use_greedy_infer=Parsing_use_greedy_infer)
        
        if self.use_pos_embedding:
            self.pos_embedding=nn.Embedding(len(self.label_vocab['pos']),embedding_dim, padding_idx=0)
        
        
        self.loss=CrossEntropyLoss(padding_idx=0)

        #CWS
        self.cws_mlp=MLP([embedding_dim, embedding_dim_1,embedding_dim_2, len(label_vocab['CWS'])], 'relu', output_activation=None)
        trans=allowed_transitions(label_vocab['CWS'],include_start_end=True)
        self.cws_crf = ConditionalRandomField(len(label_vocab['CWS']), include_start_end_trans=True, allowed_transitions=trans)

        #POS
        self.pos_mlp=MLP([embedding_dim, embedding_dim_1,embedding_dim_2, len(label_vocab['POS'])], 'relu', output_activation=None)
        trans=allowed_transitions(label_vocab['POS'],include_start_end=True)
        self.pos_crf = ConditionalRandomField(len(label_vocab['POS']), include_start_end_trans=True, allowed_transitions=trans)
Beispiel #2
0
    def __init__(self, args, embedding, hid_dim):
        super(GCN, self).__init__()

        self.args = args
        self.layers = args.num_layers
        self.mem_dim = hid_dim
        self.in_dim = args.tok_dim + args.pos_dim + args.post_dim
        self.tok_emb, self.pos_emb, self.post_emb = embedding
        # drop out
        self.rnn_drop = nn.Dropout(args.rnn_dropout)
        self.in_drop = nn.Dropout(args.input_dropout)
        self.gcn_drop = nn.Dropout(args.gcn_dropout)

        # lstm
        input_size = self.in_dim
        self.rnn = LSTM(
            input_size,
            args.rnn_hidden,
            args.rnn_layers,
            batch_first=True,
            dropout=args.rnn_dropout,
            bidirectional=args.bidirect,
        )
        if args.bidirect:
            self.in_dim = args.rnn_hidden * 2
        else:
            self.in_dim = args.rnn_hidden

        # gcn layer
        self.G = nn.ModuleList()
        for layer in range(self.layers):
            input_dim = [self.in_dim, self.mem_dim][layer != 0]
            self.G.append(MLP([input_dim, self.mem_dim]))
Beispiel #3
0
    def __init__(self, pre_name, word2bpes, pad_id, num_languages):
        # word2bpes:

        super().__init__()
        self.model = LMBertForMaskedLM.from_pretrained(pre_name)
        self.model.bert.add_language_embedding(num_languages)
        self.model.set_start_end(1, 1+len(word2bpes[0]))

        # self.model = LMBertModel.from_pretrained(pre_name)
        # self.model.add_language_embedding(num_languages)

        self.max_word_len = len(word2bpes[0])
        word2bpes = torch.LongTensor(word2bpes).transpose(0, 1).unsqueeze(0)
        self.register_buffer('word2bpes', word2bpes)

        self.lg_fc = MLP([768, 1024, num_languages], activation='relu', dropout=0.3)
        self.pad_id = pad_id
    def __init__(self, config: BertConfig):
        super(KnowledgePointExtractionModel, self).__init__(config=config)

        self.bert = BertModel(
            config=config,
            add_pooling_layer=False)  # word to vector(embeddings)
        # MLP输入输出向量size, mlp_layer_sizes: [hidden_size, middle_size1, middle_size2, len(config.crf_labels)]
        self.kpe_mlp = MLP(size_layer=config.mlp_layer_sizes,
                           activation='relu',
                           output_activation=None)
        # crf_labels = {0:"<pad>", 1: "S", 2: "B", 3: "M", 4: "E"} (id2label)
        tag_labels = {}
        for key, value in config.crf_labels.items():
            if not isinstance(key, int):
                tag_labels[int(key)] = value
        if tag_labels:
            config.crf_labels = tag_labels
        trans = allowed_transitions(tag_vocab=config.crf_labels,
                                    include_start_end=True)
        self.kpe_crf = ConditionalRandomField(num_tags=len(config.crf_labels),
                                              include_start_end_trans=True,
                                              allowed_transitions=trans)
Beispiel #5
0
    def __init__(self, config, vocab=None):
        super(CGSum, self).__init__()
        self.use_cuda = config.use_gpu and torch.cuda.is_available()

        encoder = Encoder(config)
        decoder = Decoder(config)
        decoder.embedding.weight = encoder.embedding.weight

        reduce_state = ReduceState(config)
        self.config = config
        self.vocab = vocab

        self.mlp = MLP(size_layer=[
            config.hidden_dim * 2, config.hidden_dim * 2, config.hidden_dim * 2
        ])
        self.W_h = nn.Linear(config.hidden_dim * 2,
                             config.hidden_dim * 2,
                             bias=False)

        self.encoder = encoder
        self.decoder = decoder
        self.gnnEncoder = GNNEncoder(config)
        self.reduce_state = reduce_state
Beispiel #6
0
    def __init__(self, config):
        super(Encoder, self).__init__()
        self.config = config
        self.embedding = nn.Embedding(config.vocab_size, config.emb_dim)
        init_wt_normal(config, self.embedding.weight)
        self.join = nn.Linear(4 * config.hidden_dim, 2 * config.hidden_dim)
        init_linear_wt(config, self.join)
        self.lstm = LSTM(config.emb_dim,
                         config.hidden_dim,
                         num_layers=1,
                         batch_first=True,
                         bidirectional=True)
        self.graph_feature_lstm = LSTM(config.emb_dim,
                                       config.hidden_dim,
                                       num_layers=1,
                                       batch_first=True,
                                       bidirectional=True)
        self.mlp = MLP(size_layer=[
            config.hidden_dim * 4, config.hidden_dim * 2,
            config.hidden_dim * 2, 1
        ],
                       activation="tanh")

        self.criterion = nn.MSELoss(reduction="sum")
Beispiel #7
0
 def __init__(self, in_feature_dim, out_feature_dim):
     super(NaiveClassifier4, self).__init__()
     self.mlp = MLP(
         [in_feature_dim, in_feature_dim, out_feature_dim])
Beispiel #8
0
    def __init__(self, args, emb_matrix):
        super().__init__()

        in_dim = args.hidden_dim
        self.gcn_model = GCNAbsaModel(args, emb_matrix=emb_matrix)
        self.classifier = MLP([in_dim, args.num_class])
Beispiel #9
0
 def __init__(self, embed, tag_size):
     super().__init__()
     self.embedding = embed
     self.tag_size = tag_size
     self.mlp = MLP(size_layer=[self.embedding.embedding_dim, tag_size])
Beispiel #10
0
    def __init__(self, embed, tag_size):
        super().__init__()

        self.embedding = Embedding(embed, dropout=0.1)
        self.tag_size = tag_size
        self.mlp = MLP(size_layer=[self.embedding.embedding_dim, tag_size])