コード例 #1
0
    def build_model(self):
        import tensorflow as tf
        from keras.backend.tensorflow_backend import set_session
        config = tf.ConfigProto()
        config.gpu_options.allocator_type = 'BFC'  # A "Best-fit with coalescing" algorithm, simplified from a version of dlmalloc.
        if self.memory_fraction:
            config.gpu_options.per_process_gpu_memory_fraction = self.memory_fraction
            config.gpu_options.allow_growth = False
        else:
            config.gpu_options.allow_growth = True
        set_session(tf.Session(config=config))

        # 补充输入
        subject_labels = Input(shape=(None, 2), name='Subject-Labels')
        subject_ids = Input(shape=(2, ), name='Subject-Ids')
        object_labels = Input(shape=(None, self.num_classes, 2),
                              name='Object-Labels')
        # 加载预训练模型
        bert = build_transformer_model(
            config_path=self.bert_config_path,
            checkpoint_path=self.bert_checkpoint_path,
            return_keras_model=False,
        )
        # 预测subject
        output = Dense(units=2,
                       activation='sigmoid',
                       kernel_initializer=bert.initializer)(bert.model.output)
        subject_preds = Lambda(lambda x: x**2)(output)
        self.subject_model = Model(bert.model.inputs, subject_preds)
        # 传入subject,预测object
        # 通过Conditional Layer Normalization将subject融入到object的预测中
        output = bert.model.layers[-2].get_output_at(-1)
        subject = Lambda(self.extrac_subject)([output, subject_ids])
        output = LayerNormalization(conditional=True)([output, subject])
        output = Dense(units=self.num_classes * 2,
                       activation='sigmoid',
                       kernel_initializer=bert.initializer)(output)
        output = Lambda(lambda x: x**4)(output)
        object_preds = Reshape((-1, self.num_classes, 2))(output)
        self.object_model = Model(bert.model.inputs + [subject_ids],
                                  object_preds)
        # 训练模型
        self.model = Model(
            bert.model.inputs + [subject_labels, subject_ids, object_labels],
            [subject_preds, object_preds])

        mask = bert.model.get_layer('Embedding-Token').output_mask
        mask = K.cast(mask, K.floatx())
        subject_loss = K.binary_crossentropy(subject_labels, subject_preds)
        subject_loss = K.mean(subject_loss, 2)
        subject_loss = K.sum(subject_loss * mask) / K.sum(mask)
        object_loss = K.binary_crossentropy(object_labels, object_preds)
        object_loss = K.sum(K.mean(object_loss, 3), 2)
        object_loss = K.sum(object_loss * mask) / K.sum(mask)
        self.model.add_loss(subject_loss + object_loss)
        AdamEMA = extend_with_exponential_moving_average(Adam, name='AdamEMA')
        self.optimizer = AdamEMA(lr=1e-4)
コード例 #2
0
def build_model():
    bert_model = build_transformer_model(
        config_path=Config.config_path,
        checkpoint_path=Config.checkpoint_path,
        return_keras_model=False)

    # 补充输入
    subject_labels = Input(shape=(None, 2))
    subject_ids = Input(shape=(2, ))
    object_labels = Input(shape=(None, len(predicate2id), 2))

    # 预测subject
    output = Dense(units=2,
                   activation='sigmoid',
                   kernel_initializer=bert_model.initializer)(
                       bert_model.model.output)

    subject_preds = Lambda(lambda x: x**2)(output)

    subject_model = Model(bert_model.inputs, subject_preds)
    # 传入subject,预测object
    output = bert_model.model.layers[-2].get_output_at(-1)
    subject = Lambda(extrac_subject)([output, subject_ids])
    output = LayerNormalization(conditional=True)([output, subject])
    output = Dense(units=len(predicate2id) * 2,
                   activation='sigmoid',
                   kernel_initializer=bert_model.initializer)(output)
    output = Lambda(lambda x: x**4)(output)
    object_preds = Reshape((-1, len(predicate2id), 2))(output)

    object_model = Model(bert_model.model.inputs + [subject_ids], object_preds)

    # 训练模型
    train_model = Model(
        bert_model.model.inputs + [subject_labels, subject_ids, object_labels],
        [subject_preds, object_preds])

    mask = bert_model.model.get_layer('Embedding-Token').output_mask
    mask = K.cast(mask, K.floatx())

    subject_loss = K.binary_crossentropy(subject_labels, subject_preds)
    subject_loss = K.mean(subject_loss, 2)
    subject_loss = K.sum(subject_loss * mask) / K.sum(mask)

    object_loss = K.binary_crossentropy(object_labels, object_preds)
    object_loss = K.sum(K.mean(object_loss, 3), 2)
    object_loss = K.sum(object_loss * mask) / K.sum(mask)

    train_model.add_loss(subject_loss + object_loss)

    optimizer = Adam(Config.learning_rate)

    train_model.compile(optimizer=optimizer)
    return train_model, subject_model, object_model
コード例 #3
0
 def compute_loss(self, inputs, mask=None):
     subject_labels, object_labels = inputs[:2]
     subject_preds, object_preds, _ = inputs[2:]
     if mask[4] is None:
         mask = 1.0
     else:
         mask = K.cast(mask[4], K.floatx())
     # subject部分loss
     subject_loss = K.binary_crossentropy(subject_labels, subject_preds)
     subject_loss = K.mean(subject_loss, 2)
     subject_loss = K.sum(subject_loss * mask) / K.sum(mask)
     # object部分loss
     object_loss = K.binary_crossentropy(object_labels, object_preds)
     object_loss = K.sum(K.mean(object_loss, 3), 2)
     object_loss = K.sum(object_loss * mask) / K.sum(mask)
     # 总的loss
     return subject_loss + object_loss
コード例 #4
0
    def compute_loss(self, inputs, mask=None):
        subject_labels, object_labels = inputs[:2]
        subject_preds, object_preds, _ = inputs[2:]
        if mask[4] is None:
            mask = 1.0
        else:
            mask = K.cast(mask[4], K.floatx())
        subject_loss = K.binary_crossentropy(
            subject_labels, subject_preds)  # (btz, seq_len, 2)在最后一维上计算loss
        subject_loss = K.mean(
            subject_loss, 2)  # (btz, seq_len)就像是(btz, seq_len, 1)  每个字的平均loss
        subject_loss = K.sum(subject_loss * mask) / K.sum(mask)

        object_loss = K.binary_crossentropy(object_labels, object_preds)
        object_loss = K.sum(K.mean(object_loss, 3), 2)
        object_loss = K.sum(object_loss * mask) / K.sum(mask)
        return subject_loss + object_loss
コード例 #5
0
#                activation='sigmoid',
#                kernel_initializer=bert.initializer)(output)
# output = Reshape((-1, len(predicate2id), 2))(output) #[? ? predicate49*2]->[? ? 49 2]
# object_preds = Lambda(lambda x: x**4)(output)
#
# object_model = Model(bert.model.inputs + [subject_ids], object_preds) #sub,text -> obj,predicate

# 训练模型
# train_model = Model(bert.model.inputs + [subject_labels,
#                                         subject_ids, object_labels],
#                     [subject_preds, object_preds])
train_model = Model(bert.model.inputs + [subject_labels], [subject_preds])

mask = bert.model.get_layer('Sequence-Mask').output_mask

subject_loss = K.binary_crossentropy(subject_labels, subject_preds)
subject_loss = K.sum(K.mean(subject_loss, 3), 2)
subject_loss = K.sum(subject_loss * mask) / K.sum(mask)

#
# object_loss = K.binary_crossentropy(object_labels, object_preds)
# object_loss = K.sum(K.mean(object_loss, 3), 2)
# object_loss = K.sum(object_loss * mask) / K.sum(mask)

train_model.add_loss(subject_loss)
train_model.compile(optimizer=Adam(1e-5))

#
# def extract_spoes(text):
#     #抽取输入text所包含的三元组
#
コード例 #6
0
def build_model():
    """
    调用模型参数,搭建事件抽取模型主体,先搭建触发词模型,然后围绕触发词下标搭建其他论元模型。
    :return: 各个论元模型对象
    """
    with SESS.as_default():
        with SESS.graph.as_default():
            # 构建bert模型主体
            bert_model = build_transformer_model(
                config_path=bert_config.config_path,
                checkpoint_path=bert_config.checkpoint_path,
                return_keras_model=False,
                model=bert_config.model_type)

            # l为模型内部的层名,格式为--str
            for l in bert_model.layers:
                bert_model.model.get_layer(l).trainable = True

            # 搭建模型
            # keras会自动对所有的占位张量添加batch_size维度
            # 动词输入 (batch_size, seq_len)
            trigger_start_in = Input(shape=(None, ))
            trigger_end_in = Input(shape=(None, ))
            # 动词下标输入 (batch_size, seq_len)
            trigger_index_start_in = Input(shape=(1, ))
            trigger_index_end_in = Input(shape=(1, ))
            # 宾语输入 (batch_size, seq_len)
            object_start_in = Input(shape=(None, ))
            object_end_in = Input(shape=(None, ))
            # 主语输入 (batch_size, seq_len)
            subject_start_in = Input(shape=(None, ))
            subject_end_in = Input(shape=(None, ))
            # 地点输入 (batch_size, seq_len)
            loc_start_in = Input(shape=(None, ))
            loc_end_in = Input(shape=(None, ))
            # 时间输入 (batch_size, seq_len)
            time_start_in = Input(shape=(None, ))
            time_end_in = Input(shape=(None, ))
            # 否定词输入 (batch_size, seq_len)
            negative_start_in = Input(shape=(None, ))
            negative_end_in = Input(shape=(None, ))

            # 将输入的占位符赋值给相应的变量(此处只是在使用时方便,没有其他的模型结构意义)
            # 动词输入
            trigger_start, trigger_end = trigger_start_in, trigger_end_in
            # 动词下标
            trigger_index_start, trigger_index_end = trigger_index_start_in, trigger_index_end_in
            # 宾语输入
            object_start, object_end = object_start_in, object_end_in
            # 主语输入
            subject_start, subject_end = subject_start_in, subject_end_in
            # 地点输入
            loc_start, loc_end = loc_start_in, loc_end_in
            # 时间输入
            time_start, time_end = time_start_in, time_end_in
            # 否定词输入
            negative_start, negative_end = negative_start_in, negative_end_in

            # bert_model.model.inputs为列表格式,含有两个张量,[token_ids(batch, seq_len), segment_ids(batch, seq_len)]
            # mask操作,将bert模型的输入的token_ids序列,进行维度扩充[batch_size, seq_len, 1],
            # 然后将初始填充为0的地方全部都用0代替,非0的地方都用1占位,
            # 这是为后边计算损失做准备,防止计算损失时,前期填充为0的位置也进行反向传播
            mask = Lambda(lambda x: K.cast(
                K.greater(K.expand_dims(x[0], 2), 0), 'float32'))(
                    bert_model.model.inputs)

            # 计算动词输出的起始终止标签,bert_model.model.output [batch_size, seq_len, 768]
            trigger_start_out = Dense(1, activation='sigmoid')(
                bert_model.model.output)
            trigger_end_out = Dense(1, activation='sigmoid')(
                bert_model.model.output)
            # 预测trigger动词的模型
            trigger_model = Model(bert_model.model.inputs,
                                  [trigger_start_out, trigger_end_out])

            # 将动词下标对应位置的子向量抽取出来并计算均值
            k1v = Lambda(seq_gather)(
                [bert_model.model.output, trigger_index_start])
            k2v = Lambda(seq_gather)(
                [bert_model.model.output, trigger_index_end])
            kv = Average()([k1v, k2v])
            # 融合动词词向量的句子张量,用来作为预测其它论元部分的向量
            t = LayerNormalization(conditional=True)(
                [bert_model.model.output, kv])

            # 宾语模型输出
            object_start_out = Dense(1, activation='sigmoid')(t)
            object_end_out = Dense(1, activation='sigmoid')(t)
            # 主语模型输出
            subject_start_out = Dense(1, activation='sigmoid')(t)
            subject_end_out = Dense(1, activation='sigmoid')(t)
            # 地点模型输出
            loc_start_out = Dense(1, activation='sigmoid')(t)
            loc_end_out = Dense(1, activation='sigmoid')(t)
            # 时间模型输出
            time_start_out = Dense(1, activation='sigmoid')(t)
            time_end_out = Dense(1, activation='sigmoid')(t)
            # 否定词模型输出
            negative_start_out = Dense(1, activation='sigmoid')(t)
            negative_end_out = Dense(1, activation='sigmoid')(t)
            # 输入text和trigger,预测object
            object_model = Model(
                bert_model.model.inputs +
                [trigger_index_start_in, trigger_index_end_in],
                [object_start_out, object_end_out])
            # 输入text和trigger,预测subject
            subject_model = Model(
                bert_model.model.inputs +
                [trigger_index_start_in, trigger_index_end_in],
                [subject_start_out, subject_end_out])
            # 输入text和trigger,预测loc
            loc_model = Model(
                bert_model.model.inputs +
                [trigger_index_start_in, trigger_index_end_in],
                [loc_start_out, loc_end_out])
            # 输入text和trigger,预测time
            time_model = Model(
                bert_model.model.inputs +
                [trigger_index_start_in, trigger_index_end_in],
                [time_start_out, time_end_out])
            # 否定词模型
            negative_model = Model(
                bert_model.model.inputs +
                [trigger_index_start_in, trigger_index_end_in],
                [negative_start_out, negative_end_out])

            # 主模型
            train_model = Model(
                bert_model.model.inputs + [
                    trigger_start_in, trigger_end_in, trigger_index_start_in,
                    trigger_index_end_in, object_start_in, object_end_in,
                    subject_start_in, subject_end_in, loc_start_in, loc_end_in,
                    time_start_in, time_end_in, negative_start_in,
                    negative_end_in
                ], [
                    trigger_start_out, trigger_end_out, object_start_out,
                    object_end_out, subject_start_out, subject_end_out,
                    loc_start_out, loc_end_out, time_start_out, time_end_out,
                    negative_start_out, negative_end_out
                ])

            # 扩充维度, 构造成与mask矩阵相同的结构,方便后续计算模型各部分损失
            trigger_start = K.expand_dims(trigger_start, 2)
            trigger_end = K.expand_dims(trigger_end, 2)
            object_start = K.expand_dims(object_start, 2)
            object_end = K.expand_dims(object_end, 2)
            subject_start = K.expand_dims(subject_start, 2)
            subject_end = K.expand_dims(subject_end, 2)
            loc_start = K.expand_dims(loc_start, 2)
            loc_end = K.expand_dims(loc_end, 2)
            time_start = K.expand_dims(time_start, 2)
            time_end = K.expand_dims(time_end, 2)
            negative_start = K.expand_dims(negative_start, 2)
            negative_end = K.expand_dims(negative_end, 2)

            # 构造模型损失函数,使用mask矩阵将前期填充为0的位置全部掩掉,不进行反向传播。
            # 动词损失
            trigger_start_loss = K.binary_crossentropy(trigger_start,
                                                       trigger_start_out)
            # 使用mask矩阵,将前期填充为0的位置掩掉,不进行反向传播
            trigger_start_loss = K.sum(trigger_start_loss * mask) / K.sum(mask)
            trigger_end_loss = K.binary_crossentropy(trigger_end,
                                                     trigger_end_out)
            # 使用mask矩阵,将前期填充为0的位置掩掉,不进行反向传播
            trigger_end_loss = K.sum(trigger_end_loss * mask) / K.sum(mask)
            # 宾语损失
            object_start_loss = K.sum(
                K.binary_crossentropy(object_start, object_start_out))
            # 使用mask矩阵,将前期填充为0的位置掩掉,不进行反向传播
            object_start_loss = K.sum(object_start_loss * mask) / K.sum(mask)
            object_end_loss = K.sum(
                K.binary_crossentropy(object_end, object_end_out))
            # 使用mask矩阵,将前期填充为0的位置掩掉,不进行反向传播
            object_end_loss = K.sum(object_end_loss * mask) / K.sum(mask)
            # 主语损失
            subject_start_loss = K.sum(
                K.binary_crossentropy(subject_start, subject_start_out))
            # 使用mask矩阵,将前期填充为0的位置掩掉,不进行反向传播
            subject_start_loss = K.sum(subject_start_loss * mask) / K.sum(mask)
            subject_end_loss = K.sum(
                K.binary_crossentropy(subject_end, subject_end_out))
            # 使用mask矩阵,将前期填充为0的位置掩掉,不进行反向传播
            subject_end_loss = K.sum(subject_end_loss * mask) / K.sum(mask)
            # 地点损失
            loc_start_loss = K.sum(
                K.binary_crossentropy(loc_start, loc_start_out))
            # 使用mask矩阵,将前期填充为0的位置掩掉,不进行反向传播
            loc_start_loss = K.sum(loc_start_loss * mask) / K.sum(mask)
            loc_end_loss = K.sum(K.binary_crossentropy(loc_end, loc_end_out))
            # 使用mask矩阵,将前期填充为0的位置掩掉,不进行反向传播
            loc_end_loss = K.sum(loc_end_loss * mask) / K.sum(mask)
            # 时间损失
            time_start_loss = K.sum(
                K.binary_crossentropy(time_start, time_start_out))
            # 使用mask矩阵,将前期填充为0的位置掩掉,不进行反向传播
            time_start_loss = K.sum(time_start_loss * mask) / K.sum(mask)
            time_end_loss = K.sum(K.binary_crossentropy(
                time_end, time_end_out))
            # 使用mask矩阵,将前期填充为0的位置掩掉,不进行反向传播
            time_end_loss = K.sum(time_end_loss * mask) / K.sum(mask)
            # 否定词损失
            negative_start_loss = K.sum(
                K.binary_crossentropy(negative_start, negative_start_out))
            # 使用mask矩阵,将前期填充为0的位置掩掉,不进行反向传播
            negative_start_loss = K.sum(
                negative_start_loss * mask) / K.sum(mask)
            negative_end_loss = K.sum(
                K.binary_crossentropy(negative_end, negative_end_out))
            # 使用mask矩阵,将前期填充为0的位置掩掉,不进行反向传播
            negative_end_loss = K.sum(negative_end_loss * mask) / K.sum(mask)

            # 合并损失
            loss = (trigger_start_loss + trigger_end_loss) + (
                object_start_loss +
                object_end_loss) + (subject_start_loss + subject_end_loss) + (
                    loc_start_loss +
                    loc_end_loss) + (time_start_loss + time_end_loss) + (
                        negative_start_loss + negative_end_loss)

            train_model.add_loss(loss)
            train_model.compile(
                optimizer=Adam(extract_train_config.learning_rate))
            train_model.summary()

    return trigger_model, subject_model, object_model, time_model, loc_model, negative_model, train_model
コード例 #7
0
x = bert_model.get_layer(output_layers).output

start_output = Dense(2,activation='sigmoid',name='start')(x)
end_output = Dense(2,activation='sigmoid',name='end')(x)

start_output = Lambda(lambda x:x ** 2)(start_output)
end_output = Lambda(lambda x:x ** 2)(end_output)


start_model = Model(bert_model.input,start_output)
end_model = Model(bert_model.input,end_output)

model = Model(bert_model.input + [start_labels,end_labels],[start_output,end_output])
model.summary()

start_loss = K.binary_crossentropy(start_labels,start_output)
start_loss = K.mean(start_loss,2)
start_loss = K.sum(start_loss * mask) / K.sum(mask)

end_loss = K.binary_crossentropy(end_labels,end_output)
end_loss = K.mean(end_loss,2)
end_loss = K.sum(end_loss * mask) / K.sum(mask)

loss = start_loss + end_loss
model.add_loss(loss)
model.compile(optimizer=Adam(learning_rate))



def extract(qtext):
    v = qtext.split('fengefu')[0]
コード例 #8
0
#object_model = Model(bert.model.inputs + [subject_ids], object_preds) #sub,text -> obj,predicate

# 训练模型
# train_model = Model(bert.model.inputs + [subject_labels, subject_ids, object_labels],
#                     [subject_preds, object_preds])
train_model = Model(
    bert.model.inputs + [subject_ids, predicate_id, object_labels],
    [object_preds])

mask = bert.model.get_layer('Sequence-Mask').output_mask

# subject_loss = K.binary_crossentropy(subject_labels, subject_preds)
# subject_loss = K.mean(subject_loss, 2)
# subject_loss = K.sum(subject_loss * mask) / K.sum(mask)

object_loss = K.binary_crossentropy(object_labels,
                                    object_preds)  # [batch step 2]
object_loss = K.mean(object_loss, 2)
object_loss = K.sum(object_loss * mask) / K.sum(mask)

train_model.add_loss(object_loss)
train_model.compile(optimizer=Adam(1e-5))


def extract_spoes(text):
    #抽取输入text所包含的三元组

    tokens = tokenizer.tokenize(text, max_length=maxlen)
    token_ids, segment_ids = tokenizer.encode(text, max_length=maxlen)
    # 抽取subject
    subject_preds = subject_model.predict([[token_ids], [segment_ids]])
    start = np.where(subject_preds[0, :, 0] > 0.6)[0]