Beispiel #1
0
    def build_model(self):
        import tensorflow as tf
        from keras.backend.tensorflow_backend import set_session
        config = tf.ConfigProto()
        config.gpu_options.allocator_type = 'BFC'  # A "Best-fit with coalescing" algorithm, simplified from a version of dlmalloc.
        if self.memory_fraction:
            config.gpu_options.per_process_gpu_memory_fraction = self.memory_fraction
            config.gpu_options.allow_growth = False
        else:
            config.gpu_options.allow_growth = True
        set_session(tf.Session(config=config))

        # 补充输入
        subject_labels = Input(shape=(None, 2), name='Subject-Labels')
        subject_ids = Input(shape=(2, ), name='Subject-Ids')
        object_labels = Input(shape=(None, self.num_classes, 2),
                              name='Object-Labels')
        # 加载预训练模型
        bert = build_transformer_model(
            config_path=self.bert_config_path,
            checkpoint_path=self.bert_checkpoint_path,
            return_keras_model=False,
        )
        # 预测subject
        output = Dense(units=2,
                       activation='sigmoid',
                       kernel_initializer=bert.initializer)(bert.model.output)
        subject_preds = Lambda(lambda x: x**2)(output)
        self.subject_model = Model(bert.model.inputs, subject_preds)
        # 传入subject,预测object
        # 通过Conditional Layer Normalization将subject融入到object的预测中
        output = bert.model.layers[-2].get_output_at(-1)
        subject = Lambda(self.extrac_subject)([output, subject_ids])
        output = LayerNormalization(conditional=True)([output, subject])
        output = Dense(units=self.num_classes * 2,
                       activation='sigmoid',
                       kernel_initializer=bert.initializer)(output)
        output = Lambda(lambda x: x**4)(output)
        object_preds = Reshape((-1, self.num_classes, 2))(output)
        self.object_model = Model(bert.model.inputs + [subject_ids],
                                  object_preds)
        # 训练模型
        self.model = Model(
            bert.model.inputs + [subject_labels, subject_ids, object_labels],
            [subject_preds, object_preds])

        mask = bert.model.get_layer('Embedding-Token').output_mask
        mask = K.cast(mask, K.floatx())
        subject_loss = K.binary_crossentropy(subject_labels, subject_preds)
        subject_loss = K.mean(subject_loss, 2)
        subject_loss = K.sum(subject_loss * mask) / K.sum(mask)
        object_loss = K.binary_crossentropy(object_labels, object_preds)
        object_loss = K.sum(K.mean(object_loss, 3), 2)
        object_loss = K.sum(object_loss * mask) / K.sum(mask)
        self.model.add_loss(subject_loss + object_loss)
        AdamEMA = extend_with_exponential_moving_average(Adam, name='AdamEMA')
        self.optimizer = AdamEMA(lr=1e-4)
def build_model():
    bert_model = build_transformer_model(
        config_path=Config.config_path,
        checkpoint_path=Config.checkpoint_path,
        return_keras_model=False)

    # 补充输入
    subject_labels = Input(shape=(None, 2))
    subject_ids = Input(shape=(2, ))
    object_labels = Input(shape=(None, len(predicate2id), 2))

    # 预测subject
    output = Dense(units=2,
                   activation='sigmoid',
                   kernel_initializer=bert_model.initializer)(
                       bert_model.model.output)

    subject_preds = Lambda(lambda x: x**2)(output)

    subject_model = Model(bert_model.inputs, subject_preds)
    # 传入subject,预测object
    output = bert_model.model.layers[-2].get_output_at(-1)
    subject = Lambda(extrac_subject)([output, subject_ids])
    output = LayerNormalization(conditional=True)([output, subject])
    output = Dense(units=len(predicate2id) * 2,
                   activation='sigmoid',
                   kernel_initializer=bert_model.initializer)(output)
    output = Lambda(lambda x: x**4)(output)
    object_preds = Reshape((-1, len(predicate2id), 2))(output)

    object_model = Model(bert_model.model.inputs + [subject_ids], object_preds)

    # 训练模型
    train_model = Model(
        bert_model.model.inputs + [subject_labels, subject_ids, object_labels],
        [subject_preds, object_preds])

    mask = bert_model.model.get_layer('Embedding-Token').output_mask
    mask = K.cast(mask, K.floatx())

    subject_loss = K.binary_crossentropy(subject_labels, subject_preds)
    subject_loss = K.mean(subject_loss, 2)
    subject_loss = K.sum(subject_loss * mask) / K.sum(mask)

    object_loss = K.binary_crossentropy(object_labels, object_preds)
    object_loss = K.sum(K.mean(object_loss, 3), 2)
    object_loss = K.sum(object_loss * mask) / K.sum(mask)

    train_model.add_loss(subject_loss + object_loss)

    optimizer = Adam(Config.learning_rate)

    train_model.compile(optimizer=optimizer)
    return train_model, subject_model, object_model
Beispiel #3
0
    def build(self, input_shape):
        super(ResidualGatedConv1D, self).build(input_shape)
        self.conv1d = Conv1D(
            filters=self.filters * 2,
            kernel_size=self.kernel_size,
            dilation_rate=self.dilation_rate,
            padding='same',
        )
        self.layernorm = LayerNormalization()

        if self.filters != input_shape[-1]:
            self.dense = Dense(self.filters, use_bias=False)

        self.alpha = self.add_weight(
            name='alpha', shape=[1], initializer='zeros'
        )
def build_model():
    """
    搭建模型主体。
    :return: 模型对象
    """
    with SESS.as_default():
        with SESS.graph.as_default():
            # 构建bert模型主体
            bert_model = build_transformer_model(
                config_path=bert_config.config_path,
                checkpoint_path=bert_config.checkpoint_path,
                return_keras_model=False,
                model=bert_config.model_type
            )

            # l为模型内部的层名,格式为--str
            for l in bert_model.layers:
                bert_model.model.get_layer(l).trainable = True

            # 动词起始,终止下标,keras会自动补充batch这一维度
            # [batch_size, 1]
            trigger_start_index = Input(shape=(1,))
            trigger_end_index = Input(shape=(1,))

            # 将动词下标对应位置的子向量抽取出来并计算均值
            k1v = Lambda(seq_gather)([bert_model.model.output, trigger_start_index])
            k2v = Lambda(seq_gather)([bert_model.model.output, trigger_end_index])
            kv = Average()([k1v, k2v])
            # 融合动词词向量的句子张量
            t = LayerNormalization(conditional=True)([bert_model.model.output, kv])
            # 取出[CLS]对应的向量用来做分类
            t = Lambda(lambda x: x[:, 0])(t)
            # 预测事件状态
            state_out_put = Dense(3, activation='softmax')(t)
            # 构建状态预测模型
            state_model = Model(bert_model.model.inputs + [trigger_start_index, trigger_end_index], state_out_put)
            # 设置学习率、优化器、损失函数以及每个批次的评估指标
            state_model.compile(loss='sparse_categorical_crossentropy',
                                optimizer=Adam(state_train_config.learning_rate),
                                metrics=['accuracy'])

            state_model.summary()

    return state_model
def get_state_model():
    """
    构建事件状态模型,加载模型参数,返回模型对象
    使用bert输出融合动词下标预测事件状态
    :return: state_model
    """
    with state_sess.as_default():
        with state_sess.graph.as_default():
            # 构建bert模型主体
            bert_model = build_transformer_model(
                config_path=bert_config.config_path,
                return_keras_model=False,
                model=bert_config.model_type
            )
            # 动词下标输入
            trigger_start_index = Input(shape=(1,))
            trigger_end_index = Input(shape=(1,))
            # 获取动词向量
            k1v = Lambda(seq_gather)([bert_model.model.output, trigger_start_index])
            k2v = Lambda(seq_gather)([bert_model.model.output, trigger_end_index])
            kv = Average()([k1v, k2v])
            # 将动词向量与bert模型输出也就是句子张量进行融合
            t = LayerNormalization(conditional=True)([bert_model.model.output, kv])
            # 取出[CLS]对应的向量用来做分类
            t = Lambda(lambda x: x[:, 0])(t)
            # 预测事件状态
            state_out_put = Dense(3, activation='softmax')(t)
            # 主模型
            state_model = Model(bert_model.model.inputs + [trigger_start_index, trigger_end_index], state_out_put)

            # 加载模型
            logger.info("开始加载事件状态模型参数。。。")
            state_model.load_weights(pre_config.event_state_model_path)
            logger.info("事件状态模型参数加载完成!")

    return state_model
Beispiel #6
0
    return_keras_model=False,
)

# 预测subject
output = Dense(units=2,
               activation='sigmoid',
               kernel_initializer=bert.initializer)(bert.model.output)
subject_preds = Lambda(lambda x: x**2)(output)

subject_model = Model(bert.model.inputs, subject_preds)

# 传入subject,预测object
# 通过Conditional Layer Normalization将subject融入到object的预测中
output = bert.model.layers[-2].get_output_at(-1)
subject = Lambda(extrac_subject)([output, subject_ids])
output = LayerNormalization(conditional=True)([output, subject])

output = Dense(units=len(predicate2id) * 2,
               activation='sigmoid',
               kernel_initializer=bert.initializer)(output)
output = Lambda(lambda x: x**4)(output)
object_preds = Reshape((-1, len(predicate2id), 2))(output)

object_model = Model(bert.model.inputs + [subject_ids], object_preds)

# 训练模型
train_model = Model(
    bert.model.inputs + [subject_labels, subject_ids, object_labels],
    [subject_preds, object_preds])
# train_model.summary()
Beispiel #7
0
def build_model():
    """
    调用模型参数,搭建事件抽取模型主体,先搭建触发词模型,然后围绕触发词下标搭建其他论元模型。
    :return: 各个论元模型对象
    """
    with SESS.as_default():
        with SESS.graph.as_default():
            # 构建bert模型主体
            bert_model = build_transformer_model(
                config_path=bert_config.config_path,
                checkpoint_path=bert_config.checkpoint_path,
                return_keras_model=False,
                model=bert_config.model_type)

            # l为模型内部的层名,格式为--str
            for l in bert_model.layers:
                bert_model.model.get_layer(l).trainable = True

            # 搭建模型
            # keras会自动对所有的占位张量添加batch_size维度
            # 动词输入 (batch_size, seq_len)
            trigger_start_in = Input(shape=(None, ))
            trigger_end_in = Input(shape=(None, ))
            # 动词下标输入 (batch_size, seq_len)
            trigger_index_start_in = Input(shape=(1, ))
            trigger_index_end_in = Input(shape=(1, ))
            # 宾语输入 (batch_size, seq_len)
            object_start_in = Input(shape=(None, ))
            object_end_in = Input(shape=(None, ))
            # 主语输入 (batch_size, seq_len)
            subject_start_in = Input(shape=(None, ))
            subject_end_in = Input(shape=(None, ))
            # 地点输入 (batch_size, seq_len)
            loc_start_in = Input(shape=(None, ))
            loc_end_in = Input(shape=(None, ))
            # 时间输入 (batch_size, seq_len)
            time_start_in = Input(shape=(None, ))
            time_end_in = Input(shape=(None, ))
            # 否定词输入 (batch_size, seq_len)
            negative_start_in = Input(shape=(None, ))
            negative_end_in = Input(shape=(None, ))

            # 将输入的占位符赋值给相应的变量(此处只是在使用时方便,没有其他的模型结构意义)
            # 动词输入
            trigger_start, trigger_end = trigger_start_in, trigger_end_in
            # 动词下标
            trigger_index_start, trigger_index_end = trigger_index_start_in, trigger_index_end_in
            # 宾语输入
            object_start, object_end = object_start_in, object_end_in
            # 主语输入
            subject_start, subject_end = subject_start_in, subject_end_in
            # 地点输入
            loc_start, loc_end = loc_start_in, loc_end_in
            # 时间输入
            time_start, time_end = time_start_in, time_end_in
            # 否定词输入
            negative_start, negative_end = negative_start_in, negative_end_in

            # bert_model.model.inputs为列表格式,含有两个张量,[token_ids(batch, seq_len), segment_ids(batch, seq_len)]
            # mask操作,将bert模型的输入的token_ids序列,进行维度扩充[batch_size, seq_len, 1],
            # 然后将初始填充为0的地方全部都用0代替,非0的地方都用1占位,
            # 这是为后边计算损失做准备,防止计算损失时,前期填充为0的位置也进行反向传播
            mask = Lambda(lambda x: K.cast(
                K.greater(K.expand_dims(x[0], 2), 0), 'float32'))(
                    bert_model.model.inputs)

            # 计算动词输出的起始终止标签,bert_model.model.output [batch_size, seq_len, 768]
            trigger_start_out = Dense(1, activation='sigmoid')(
                bert_model.model.output)
            trigger_end_out = Dense(1, activation='sigmoid')(
                bert_model.model.output)
            # 预测trigger动词的模型
            trigger_model = Model(bert_model.model.inputs,
                                  [trigger_start_out, trigger_end_out])

            # 将动词下标对应位置的子向量抽取出来并计算均值
            k1v = Lambda(seq_gather)(
                [bert_model.model.output, trigger_index_start])
            k2v = Lambda(seq_gather)(
                [bert_model.model.output, trigger_index_end])
            kv = Average()([k1v, k2v])
            # 融合动词词向量的句子张量,用来作为预测其它论元部分的向量
            t = LayerNormalization(conditional=True)(
                [bert_model.model.output, kv])

            # 宾语模型输出
            object_start_out = Dense(1, activation='sigmoid')(t)
            object_end_out = Dense(1, activation='sigmoid')(t)
            # 主语模型输出
            subject_start_out = Dense(1, activation='sigmoid')(t)
            subject_end_out = Dense(1, activation='sigmoid')(t)
            # 地点模型输出
            loc_start_out = Dense(1, activation='sigmoid')(t)
            loc_end_out = Dense(1, activation='sigmoid')(t)
            # 时间模型输出
            time_start_out = Dense(1, activation='sigmoid')(t)
            time_end_out = Dense(1, activation='sigmoid')(t)
            # 否定词模型输出
            negative_start_out = Dense(1, activation='sigmoid')(t)
            negative_end_out = Dense(1, activation='sigmoid')(t)
            # 输入text和trigger,预测object
            object_model = Model(
                bert_model.model.inputs +
                [trigger_index_start_in, trigger_index_end_in],
                [object_start_out, object_end_out])
            # 输入text和trigger,预测subject
            subject_model = Model(
                bert_model.model.inputs +
                [trigger_index_start_in, trigger_index_end_in],
                [subject_start_out, subject_end_out])
            # 输入text和trigger,预测loc
            loc_model = Model(
                bert_model.model.inputs +
                [trigger_index_start_in, trigger_index_end_in],
                [loc_start_out, loc_end_out])
            # 输入text和trigger,预测time
            time_model = Model(
                bert_model.model.inputs +
                [trigger_index_start_in, trigger_index_end_in],
                [time_start_out, time_end_out])
            # 否定词模型
            negative_model = Model(
                bert_model.model.inputs +
                [trigger_index_start_in, trigger_index_end_in],
                [negative_start_out, negative_end_out])

            # 主模型
            train_model = Model(
                bert_model.model.inputs + [
                    trigger_start_in, trigger_end_in, trigger_index_start_in,
                    trigger_index_end_in, object_start_in, object_end_in,
                    subject_start_in, subject_end_in, loc_start_in, loc_end_in,
                    time_start_in, time_end_in, negative_start_in,
                    negative_end_in
                ], [
                    trigger_start_out, trigger_end_out, object_start_out,
                    object_end_out, subject_start_out, subject_end_out,
                    loc_start_out, loc_end_out, time_start_out, time_end_out,
                    negative_start_out, negative_end_out
                ])

            # 扩充维度, 构造成与mask矩阵相同的结构,方便后续计算模型各部分损失
            trigger_start = K.expand_dims(trigger_start, 2)
            trigger_end = K.expand_dims(trigger_end, 2)
            object_start = K.expand_dims(object_start, 2)
            object_end = K.expand_dims(object_end, 2)
            subject_start = K.expand_dims(subject_start, 2)
            subject_end = K.expand_dims(subject_end, 2)
            loc_start = K.expand_dims(loc_start, 2)
            loc_end = K.expand_dims(loc_end, 2)
            time_start = K.expand_dims(time_start, 2)
            time_end = K.expand_dims(time_end, 2)
            negative_start = K.expand_dims(negative_start, 2)
            negative_end = K.expand_dims(negative_end, 2)

            # 构造模型损失函数,使用mask矩阵将前期填充为0的位置全部掩掉,不进行反向传播。
            # 动词损失
            trigger_start_loss = K.binary_crossentropy(trigger_start,
                                                       trigger_start_out)
            # 使用mask矩阵,将前期填充为0的位置掩掉,不进行反向传播
            trigger_start_loss = K.sum(trigger_start_loss * mask) / K.sum(mask)
            trigger_end_loss = K.binary_crossentropy(trigger_end,
                                                     trigger_end_out)
            # 使用mask矩阵,将前期填充为0的位置掩掉,不进行反向传播
            trigger_end_loss = K.sum(trigger_end_loss * mask) / K.sum(mask)
            # 宾语损失
            object_start_loss = K.sum(
                K.binary_crossentropy(object_start, object_start_out))
            # 使用mask矩阵,将前期填充为0的位置掩掉,不进行反向传播
            object_start_loss = K.sum(object_start_loss * mask) / K.sum(mask)
            object_end_loss = K.sum(
                K.binary_crossentropy(object_end, object_end_out))
            # 使用mask矩阵,将前期填充为0的位置掩掉,不进行反向传播
            object_end_loss = K.sum(object_end_loss * mask) / K.sum(mask)
            # 主语损失
            subject_start_loss = K.sum(
                K.binary_crossentropy(subject_start, subject_start_out))
            # 使用mask矩阵,将前期填充为0的位置掩掉,不进行反向传播
            subject_start_loss = K.sum(subject_start_loss * mask) / K.sum(mask)
            subject_end_loss = K.sum(
                K.binary_crossentropy(subject_end, subject_end_out))
            # 使用mask矩阵,将前期填充为0的位置掩掉,不进行反向传播
            subject_end_loss = K.sum(subject_end_loss * mask) / K.sum(mask)
            # 地点损失
            loc_start_loss = K.sum(
                K.binary_crossentropy(loc_start, loc_start_out))
            # 使用mask矩阵,将前期填充为0的位置掩掉,不进行反向传播
            loc_start_loss = K.sum(loc_start_loss * mask) / K.sum(mask)
            loc_end_loss = K.sum(K.binary_crossentropy(loc_end, loc_end_out))
            # 使用mask矩阵,将前期填充为0的位置掩掉,不进行反向传播
            loc_end_loss = K.sum(loc_end_loss * mask) / K.sum(mask)
            # 时间损失
            time_start_loss = K.sum(
                K.binary_crossentropy(time_start, time_start_out))
            # 使用mask矩阵,将前期填充为0的位置掩掉,不进行反向传播
            time_start_loss = K.sum(time_start_loss * mask) / K.sum(mask)
            time_end_loss = K.sum(K.binary_crossentropy(
                time_end, time_end_out))
            # 使用mask矩阵,将前期填充为0的位置掩掉,不进行反向传播
            time_end_loss = K.sum(time_end_loss * mask) / K.sum(mask)
            # 否定词损失
            negative_start_loss = K.sum(
                K.binary_crossentropy(negative_start, negative_start_out))
            # 使用mask矩阵,将前期填充为0的位置掩掉,不进行反向传播
            negative_start_loss = K.sum(
                negative_start_loss * mask) / K.sum(mask)
            negative_end_loss = K.sum(
                K.binary_crossentropy(negative_end, negative_end_out))
            # 使用mask矩阵,将前期填充为0的位置掩掉,不进行反向传播
            negative_end_loss = K.sum(negative_end_loss * mask) / K.sum(mask)

            # 合并损失
            loss = (trigger_start_loss + trigger_end_loss) + (
                object_start_loss +
                object_end_loss) + (subject_start_loss + subject_end_loss) + (
                    loc_start_loss +
                    loc_end_loss) + (time_start_loss + time_end_loss) + (
                        negative_start_loss + negative_end_loss)

            train_model.add_loss(loss)
            train_model.compile(
                optimizer=Adam(extract_train_config.learning_rate))
            train_model.summary()

    return trigger_model, subject_model, object_model, time_model, loc_model, negative_model, train_model
# 预测subject
output = Dense(units=2,
               activation='sigmoid',
               kernel_initializer=bert.initializer)(
                   bert.model.output)  #[? ? 768]->[? ? 2]
subject_preds = Lambda(lambda x: x**2)(output)

subject_model = Model(bert.model.inputs, subject_preds)  # text -> sub

# 传入subject,预测object
# 通过Conditional Layer Normalization将subject融入到object的预测中
output = bert.model.layers[-2].get_output_at(-1)
subject = Lambda(extrac_subject)(
    [output, subject_ids])  # output[? ? 768]  subid[? 2]  ->[? 768*2]
output = LayerNormalization(conditional=True)(
    [output, subject])  #[? ? 768] mean std 依赖sub-hid
output = Dense(units=len(predicate2id) * 2,
               activation='sigmoid',
               kernel_initializer=bert.initializer)(output)
output = Reshape(
    (-1, len(predicate2id), 2))(output)  #[? ? predicate49*2]->[? ? 49 2]
object_preds = Lambda(lambda x: x**4)(output)

object_model = Model(bert.model.inputs + [subject_ids],
                     object_preds)  #sub,text -> obj,predicate

# 训练模型
train_model = Model(
    bert.model.inputs + [subject_labels, subject_ids, object_labels],
    [subject_preds, object_preds])
def get_extract_model():
    """
    构建事件抽取模型结构,加载模型参数,返回模型对象
    1、使用bert输出预测动词下标
    2、使用bert输出融合动词下标预测事件时间、地点、主语、宾语、否定词
    :return: 各个部分的模型对象
    """
    with extract_sess.as_default():
        with extract_sess.graph.as_default():
            # 构建bert模型主体
            bert_model = build_transformer_model(
                config_path=bert_config.config_path,
                return_keras_model=False,
                model=bert_config.model_type
            )

            # 搭建模型
            # 动词输入
            trigger_start_in = Input(shape=(None,))
            trigger_end_in = Input(shape=(None,))
            # 动词下标输入
            trigger_index_start_in = Input(shape=(1,))
            trigger_index_end_in = Input(shape=(1,))
            # 宾语输入
            object_start_in = Input(shape=(None,))
            object_end_in = Input(shape=(None,))
            # 主语输入
            subject_start_in = Input(shape=(None,))
            subject_end_in = Input(shape=(None,))
            # 地点输入
            loc_start_in = Input(shape=(None,))
            loc_end_in = Input(shape=(None,))
            # 时间输入
            time_start_in = Input(shape=(None,))
            time_end_in = Input(shape=(None,))
            # 否定词输入
            negative_start_in = Input(shape=(None,))
            negative_end_in = Input(shape=(None,))
            # 将模型外传入的下标赋值给模型内部变量(只是为了将模型中应用与构建Model的输入区分开来)
            trigger_index_start, trigger_index_end = trigger_index_start_in, trigger_index_end_in

            trigger_start_out = Dense(1, activation='sigmoid')(bert_model.model.output)
            trigger_end_out = Dense(1, activation='sigmoid')(bert_model.model.output)
            # 预测trigger动词的模型
            trigger_model = Model(bert_model.model.inputs, [trigger_start_out, trigger_end_out])

            # 按照动词下标采集字向量
            k1v = Lambda(seq_gather)([bert_model.model.output, trigger_index_start])
            k2v = Lambda(seq_gather)([bert_model.model.output, trigger_index_end])
            kv = Average()([k1v, k2v])
            # 使用归一化融合动词词向量与句子张量
            t = LayerNormalization(conditional=True)([bert_model.model.output, kv])

            # 宾语模型输出
            object_start_out = Dense(1, activation='sigmoid')(t)
            object_end_out = Dense(1, activation='sigmoid')(t)
            # 主语模型输出
            subject_start_out = Dense(1, activation='sigmoid')(t)
            subject_end_out = Dense(1, activation='sigmoid')(t)
            # 地点模型输出
            loc_start_out = Dense(1, activation='sigmoid')(t)
            loc_end_out = Dense(1, activation='sigmoid')(t)
            # 时间模型输出
            time_start_out = Dense(1, activation='sigmoid')(t)
            time_end_out = Dense(1, activation='sigmoid')(t)
            # 否定词模型输出
            negative_start_out = Dense(1, activation='sigmoid')(t)
            negative_end_out = Dense(1, activation='sigmoid')(t)

            # 输入text和trigger,预测object
            object_model = Model(bert_model.model.inputs + [trigger_index_start_in, trigger_index_end_in],
                                 [object_start_out, object_end_out])
            # 输入text和trigger,预测subject
            subject_model = Model(bert_model.model.inputs + [trigger_index_start_in, trigger_index_end_in],
                                  [subject_start_out, subject_end_out])
            # 输入text和trigger,预测loc
            loc_model = Model(bert_model.model.inputs + [trigger_index_start_in, trigger_index_end_in],
                              [loc_start_out, loc_end_out])
            # 输入text和trigger,预测time
            time_model = Model(bert_model.model.inputs + [trigger_index_start_in, trigger_index_end_in],
                               [time_start_out, time_end_out])
            # 输入text和trigger,预测否定词negative
            negative_model = Model(bert_model.model.inputs + [trigger_index_start_in, trigger_index_end_in],
                                   [negative_start_out, negative_end_out])

            # 主模型
            train_model = Model(
                bert_model.model.inputs + [trigger_start_in, trigger_end_in, trigger_index_start_in, trigger_index_end_in,
                                           object_start_in, object_end_in, subject_start_in, subject_end_in, loc_start_in,
                                           loc_end_in, time_start_in, time_end_in, negative_start_in, negative_end_in],
                [trigger_start_out, trigger_end_out, object_start_out, object_end_out, subject_start_out, subject_end_out,
                 loc_start_out, loc_end_out, time_start_out, time_end_out, negative_start_out, negative_end_out])
            # 加载事件抽取模型参数
            logger.info("开始加载事件抽取模型参数。。。")
            train_model.load_weights(pre_config.event_extract_model_path)
            logger.info("事件抽取模型参数加载完成!")

    return trigger_model, object_model, subject_model, loc_model, time_model, negative_model
Beispiel #10
0
## embed predicate
predicate_n = 49
emb_l = Embedding(predicate_n, 32,
                  name='p_emb')  # not tf.keras.layer, | keras.layer.xx
predicate_emb = emb_l(predicate_id)

output = bert.model.layers[-2].get_output_at(-1)
subject = Lambda(extrac_subject)(
    [output, subject_ids])  # output[? ? 768]  subid[? 2]  ->subject[? 768*2]

###
#subject_predicate=K.concatenate([subject,predicate_emb[:,0,:]],axis=-1)
#subject_predicate=Concatenate(-1)([subject,predicate_emb[:,0,:]])
subject_predicate = Lambda(concat)([subject, predicate_emb])
output = LayerNormalization(conditional=True, name='specialNorm')(
    [output, subject_predicate])  #[? ? 768] mean std 依赖sub-hid
output = Dense(
    units=2,
    #units=len(predicate2id) * 2,
    activation='sigmoid',
    kernel_initializer=bert.initializer)(output)
#output = Reshape((-1, 2))(output) #[? ? 2]
object_preds = Lambda(lambda x: x**4)(output)

#object_model = Model(bert.model.inputs + [subject_ids], object_preds) #sub,text -> obj,predicate

# 训练模型
# train_model = Model(bert.model.inputs + [subject_labels, subject_ids, object_labels],
#                     [subject_preds, object_preds])
train_model = Model(
    bert.model.inputs + [subject_ids, predicate_id, object_labels],
subject_preds = Lambda(lambda x: x**2)(output)  # 把概率平方一下,缓解不平衡问题
# subject_model对应的model
# bert.model.inputs是Input-Token和Input-Segment:0 = {Tensor} Tensor("Input-Token:0", shape=(?, ?), dtype=float32), 1 = {Tensor} Tensor("Input-Segment:0", shape=(?, ?), dtype=float32)
subject_model = Model(bert.model.inputs, subject_preds)

# 3.2 传入subject_ids,通过指针来影响object的预测
# 通过Conditional Layer Normalization将subject融入到object的预测中
output = bert.model.layers[-2].get_output_at(
    -1
)  # (?, ?, 768) bert中最后LN不要做,取出来自己用CLN做 获取某一个网络层的输出;get_output_at:keras函数专用共享编码层
subject = Lambda(extrac_subject)(
    [output,
     subject_ids])  # shape=(?, 1536) 根据subject_ids从output中取出subject首尾的向量表征
output = LayerNormalization(conditional=True)(
    [output, subject]
)  # 带上subject的预测 本质是用subject(?, 1536)通过Dense变成(?,1,768)去影响归一化中的beta和gamma,就是影响缩放偏移的那个就可以了
# 输入s_ids 加上对应的p 去预测出o  那这里其实跟p的具体值没啥关系
output = Dense(
    units=len(predicate2id) * 2,  # dense全连接层输出概率
    activation='sigmoid',
    kernel_initializer=bert.initializer)(output)
output = Lambda(lambda x: x**4)(output)
object_preds = Reshape(
    (-1, len(predicate2id),
     2))(output)  # (?, ?, 49, 2) -1表示不知道第一维的数字的多少,根据后面确定好了之后自动计算出来
# object_model对应的model
object_model = Model(bert.model.inputs + [subject_ids], object_preds)


# 定义loss,把subject和object的预测loss相加即可