コード例 #1
0
    def model(self):
        word_input = Input(shape=(self.maxlen_sentence,)) #[batch,sentencen]
        char_input = Input(shape=(self.maxlen_sentence,self.maxlen_word,)) #[batch,word,char]
        ner_label = Input(shape=(self.maxlen_sentence,))
        # relation_label = Input(shape=(self.maxlen_sentence,))

        mask = Lambda(lambda x: K.cast(K.greater(K.expand_dims(x, 2), 0), 'float32'))(word_input)

        word_embedding = Embedding(self.word_vocab_size, self.word_embed_size,mask_zero=True,weights=[self.embedding_martrix],name='word_embedding',trainable=True)(word_input) #[batch,word,embed]
        char_embedding = Embedding(self.char_vocab_size,self.char_embed_size,mask_zero=True,name='char_embedding',trainable=True)(char_input) #[batch,word,char,embedd]

        if self.embedding_dropout_prob:
            word_embedding = Dropout(self.embedding_dropout_prob)(word_embedding)
            char_embedding = Dropout(self.embedding_dropout_prob)(char_embedding)

        if self.is_use_char_embedding:
            # char_embedding maxpooling part
            char_embedding_shape = K.int_shape(char_embedding)  # [batch,sentence,word,dim]
            # char_embedding_reshaped = K.reshape(char_embedding, shape=(-1, char_embedding_shape[-2],self.char_embed_size))  # [batch*sentence,word,dim of char embedding]
            char_embedding_reshaped = self.reshape_layer_1(char_embedding,char_embedding_shape)
            char_lstm = Bidirectional(MaskedLSTM(units=self.char_embed_size // 2, return_sequences=True, name='char_lstm_layer'))(
                char_embedding_reshaped)
            attention = TimeDistributed(Dense(1, activation='tanh'))(char_lstm)
            attention = MaskFlatten()(attention)
            attention = Activation('softmax')(attention)
            attention = MaskRepeatVector(self.char_embed_size)(attention)
            attention = MaskPermute([2, 1])(attention)
            sent_representation = multiply([char_lstm, attention])
            attention = Lambda(lambda xin: K.sum(xin, axis=1))(sent_representation)

            # char_maxpool = GlobalMaxPooling1D(char_lstm)  # [batch*sentence,hidden_size]
            # char_att = Attention_Layer()(char_lstm)  # [batch*sentence,hidden_size]
            # char_embedding = K.reshape(char_maxpool, shape=[-1, char_embedding_shape[1],
            #                                                 self.hidden_size])  # [batch,sentence,hidden_size]
            # char_embedding = K.reshape(attention, shape=[-1, char_embedding_shape[-1], self.char_embed_size])  # [batch,sentence,hidden_size]
            char_embedding = self.reshape_layer_2(attention,char_embedding_shape)
            if  self.word_char_embed_mode == 'concate':
                embedding = Concatenate(axis=-1)([word_embedding,char_embedding])
            else :
                embedding = Gate_Add_Lyaer()([word_embedding,char_embedding])
                # pass
        else:
            embedding = word_embedding
        #multi-layers self-attention for ner pred
        if self.embedding_dropout_prob:
            embedding = Dropout(self.embedding_dropout_prob)(embedding)


        # part1 , multi-self-attentionblock, (CNN/LSTM/FNN+self-attention)
        lstm = Bidirectional(MaskedLSTM(units=self.hidden_size // 2, return_sequences=True))(embedding)
        attention = TimeDistributed(Dense(1, activation='tanh'))(lstm)
        attention = MaskFlatten()(attention)
        attention = Activation('softmax')(attention)
        attention = MaskRepeatVector(self.hidden_size)(attention)
        attention = MaskPermute([2, 1])(attention)
        sent_representation = multiply([lstm, attention])
        attention = Lambda(lambda xin: K.sum(xin, axis=1))(sent_representation)
        # lstm_attention = Lambda(seq_and_vec, output_shape=(None, self.hidden_size * 2))(
        #     [lstm, attention])  # [这里考虑下用相加的方法,以及门控相加]
        attention = MaskRepeatVector(self.maxlen_sentence)(attention) #[batch,sentence,hidden_size]
        lstm = Gate_Add_Lyaer()([lstm,attention])
        if self.nn_dropout_prob:
            lstm = Dropout(self.nn_dropout_prob)(lstm)

        lstm_attention = MaskedConv1D(filters=self.hidden_size,kernel_size=3,activation='relu',padding='same')(lstm)
        bio_pred = Dense(self.num_classes, activation='softmax')(lstm_attention)
        pred_model =Model([word_input, char_input], bio_pred)
        #part2 multi-head selection for relation classification
        train_model = Model([word_input, char_input, ner_label], bio_pred)

        loss = K.sparse_categorical_crossentropy(ner_label, bio_pred)
        loss = K.sum(loss * mask[:, :, 0]) / K.sum(mask)

        train_model.summary()
        train_model.add_loss(loss)
        train_model.compile(keras.optimizers.adam(lr=self.learning_rate))

        return train_model,pred_model
コード例 #2
0
def build_model_from_config(
    config_file,
    checkpoint_file,
    training=False,
    trainable=False,
    seq_len=None,
):
    """Build the model from config file.

    :param config_file: The path to the JSON configuration file.
    :param training: If training, the whole model will be returned.
    :param trainable: Whether the model is trainable.
    :param seq_len: If it is not None and it is shorter than the value in the config file, the weights in
                    position embeddings will be sliced to fit the new length.
    :return: model and config
    """
    with open(config_file, 'r') as reader:
        config = json.loads(reader.read())
    if seq_len is not None:
        config['max_position_embeddings'] = min(
            seq_len, config['max_position_embeddings'])
    if trainable is None:
        trainable = training
    model = get_model(
        token_num=config['vocab_size'],
        pos_num=config['max_position_embeddings'],
        seq_len=config['max_position_embeddings'],
        embed_dim=config['hidden_size'],
        transformer_num=config['num_hidden_layers'],
        head_num=config['num_attention_heads'],
        feed_forward_dim=config['intermediate_size'],
        training=False,
        trainable=True,
    )
    inputs, outputs = model
    bio_label = Input(shape=(maxlen, ))
    event = Input(shape=(1, ))

    mask = Lambda(
        lambda x: K.cast(K.greater(K.expand_dims(x, 2), 0), 'float32'))(
            inputs[0])
    event_embedding = Embedding(len(event2id), hidden_size,
                                mask_zero=True)(event)

    outputs = Dropout(0.15)(outputs)
    attention = TimeDistributed(Dense(1, activation='tanh'))(outputs)
    attention = MaskFlatten()(attention)
    attention = Activation('softmax')(attention)
    attention = MaskRepeatVector(config['hidden_size'])(attention)
    attention = MaskPermute([2, 1])(attention)
    sent_representation = multiply([outputs, attention])
    attention = Lambda(lambda xin: K.sum(xin, axis=1))(sent_representation)
    t_dim = K.int_shape(outputs)[-1]
    bert_attention = Lambda(seq_and_vec,
                            output_shape=(None,
                                          t_dim * 2))([outputs, attention])

    cnn1 = MaskedConv1D(filters=hidden_size,
                        kernel_size=3,
                        activation='relu',
                        padding='same')(bert_attention)
    event_bc = Lambda(lambda input: input[0] * 0 + input[1])(
        [cnn1, event_embedding])
    con_cnn_event = Concatenate(axis=-1)([cnn1, event_bc])
    dens1 = Dense(hidden_size, activation='relu', use_bias=True)(con_cnn_event)
    #BIOE
    bio_pred = Dense(4, activation='softmax')(dens1)
    entity_model = keras.models.Model([inputs[0], inputs[1], event],
                                      [bio_pred])  # 预测subject的模型
    train_model = keras.models.Model([inputs[0], inputs[1], bio_label, event],
                                     [bio_pred])

    loss = K.sparse_categorical_crossentropy(bio_label, bio_pred)
    loss = K.sum(loss * mask[:, :, 0]) / K.sum(mask)

    train_model.add_loss(loss)
    train_model.summary()
    train_model.compile(optimizer=keras.optimizers.Adam(lr=3e-5), )
    load_model_weights_from_checkpoint(train_model, config, checkpoint_file,
                                       training)
    return train_model, entity_model
コード例 #3
0
    def model(self):
        word_input = Input(shape=(self.maxlen_sentence,)) #[batch,sentencen]
        char_input = Input(shape=(self.maxlen_sentence,self.maxlen_word,)) #[batch,word,char]
        ner_label = Input(shape=(self.maxlen_sentence,))
        # relation_label = Input(shape=self.maxlen_sentence,) #[batch,sentence,n_classes]
        mask = Lambda(lambda x: K.cast(K.greater(K.expand_dims(x, 2), 0), 'float32'))(word_input)

        word_embedding = Embedding(self.word_vocab_size, self.word_embed_size,mask_zero=True,weights=[self.embedding_martrix],name='word_embedding',trainable=True)(word_input) #[batch,word,embed]
        char_embedding = Embedding(self.char_vocab_size,self.char_embed_size,mask_zero=True,name='char_embedding',trainable=True)(char_input) #[batch,word,char,embedd]

        if self.embedding_dropout_prob:
            word_embedding = Dropout(self.embedding_dropout_prob)(word_embedding)
            char_embedding = Dropout(self.embedding_dropout_prob)(char_embedding)

        if self.is_use_char_embedding:
            # char_embedding maxpooling part
            char_embedding_shape = K.int_shape(char_embedding)  # [batch,sentence,word,dim]
            # char_embedding_reshaped = K.reshape(char_embedding, shape=(-1, char_embedding_shape[-2],self.char_embed_size))  # [batch*sentence,word,dim of char embedding]
            char_embedding_reshaped = self.reshape_layer_1(char_embedding,char_embedding_shape)
            char_lstm = Bidirectional(MaskedLSTM(units=self.char_embed_size // 2, return_sequences=True, name='char_lstm_layer'))(
                char_embedding_reshaped)

            attention = TimeDistributed(Dense(1, activation='tanh'))(char_lstm)
            attention = MaskFlatten()(attention)
            attention = Activation('softmax')(attention)
            attention = MaskRepeatVector(self.char_embed_size)(attention)
            attention = MaskPermute([2, 1])(attention)
            sent_representation = multiply([char_lstm, attention])
            attention = Lambda(lambda xin: K.sum(xin, axis=1))(sent_representation)

            char_embedding = self.reshape_layer_2(attention,char_embedding_shape)
            if  self.word_char_embed_mode == 'concate':
                embedding = Concatenate(axis=-1)([word_embedding,char_embedding])
            else :
                embedding = Gate_Add_Lyaer()([word_embedding,char_embedding])
                # pass
        else:
            embedding = word_embedding
        #multi-layers self-attention for ner pred
        if self.embedding_dropout_prob:
            embedding = Dropout(self.embedding_dropout_prob)(embedding)

        # part1 , multi-self-attentionblock, (CNN/LSTM/FNN+self-attention)
        lstm = Bidirectional(MaskedLSTM(units=self.hidden_size // 2, return_sequences=True), name='lstm_layer0')(embedding)
        if self.nn_dropout_prob:
            lstm = Dropout(self.nn_dropout_prob)(lstm)
        # # multi_lstm_layers
        # if self.multi_layers >= 2:
        #     for i in range(self.multi_layers - 1):
        #         i+=1
        #         lstm = Bidirectional(CuDNNLSTM(self.hidden_size // 2, return_sequences=True), name='lstm_layer{}'.format(i))(lstm)
        #         if self.nn_dropout_prob:
        #             lstm = Dropout(self.nn_dropout_prob)(lstm)
        bio_pred = Dense(self.num_classes, activation='softmax')(lstm)
        pred_model =Model([word_input, char_input], bio_pred)


        train_model = Model([word_input, char_input, ner_label], bio_pred)

        loss = K.sparse_categorical_crossentropy(ner_label, bio_pred)
        loss = K.sum(loss * mask[:, :, 0]) / K.sum(mask)

        loss = K.sum(loss * mask) / K.sum(mask)
        train_model.summary()
        train_model.add_loss(loss)
        train_model.compile(keras.optimizers.adam(lr=self.learning_rate))

        return train_model,pred_model