예제 #1
0
def build_model():
    """ 模型构建 """
    token_ids = Input(shape=(max_segment, maxlen), dtype='int32')
    segment_ids = Input(shape=(max_segment, maxlen), dtype='int32')

    input_mask = Masking(mask_value=0)(token_ids)  # 对输入token_ids做masking
    # k.any()先归约,然后再进行类型变换
    # 可以转换一个 Keras 变量,但它仍然返回一个 Keras 张量(类型变换)
    input_mask = Lambda(lambda x: K.cast(K.any(x, axis=2, keepdims=True),
                                         'float32'))(input_mask)

    # 重构 维度 把 batch, token_ids 合并成一个维度
    token_ids1 = Lambda(lambda x: K.reshape(x, shape=(-1, maxlen)))(token_ids)
    segment_ids1 = Lambda(lambda x: K.reshape(x, shape=(-1, maxlen)))(
        segment_ids)

    # 加载预训练模型
    bert = build_transformer_model(
        config_path=config_path,
        checkpoint_path=checkpoint_path,
        return_keras_model=False,
    )
    output = bert.model([token_ids1, segment_ids1])
    output = Lambda(lambda x: x[:, 0])(output)  # 取CLS 只取第一列
    # 维度重构
    output = Lambda(lambda x: K.reshape(
        x, shape=(-1, max_segment, output.shape[-1])))(output)
    output = Multiply()([output,
                         input_mask])  # 把输出和 input_mask拼到一起,然后输出一个张量,维度不变
    output = Dropout(drop)(output)

    output = Attention(output.shape[-1].value)([output, input_mask])  # 使用注意力
    output = Dropout(drop)(output)
    # FC 线性层
    output = Dense(units=num_classes,
                   activation='softmax',
                   kernel_initializer=bert.initializer)(output)

    model = keras.models.Model([token_ids, segment_ids], output)
    # 设置多GPU
    # 设置优化器,优化参数
    optimizer_params = {
        'learning_rate': lr,
        'grad_accum_steps': grad_accum_steps
    }

    optimizer = extend_with_gradient_accumulation(Adam)  # 加入梯度累积
    optimizer = optimizer(**optimizer_params)

    # multi gpu

    model.compile(
        loss='sparse_categorical_crossentropy',
        optimizer=optimizer,
        metrics=['sparse_categorical_accuracy'],
    )

    return model
예제 #2
0
def build_model():
    """构建模型。"""
    token_ids = Input(shape=(max_segment, maxlen), dtype='int32')
    segment_ids = Input(shape=(max_segment, maxlen), dtype='int32')

    input_mask = Masking(mask_value=0)(token_ids)
    input_mask = Lambda(
        lambda x: K.cast(K.any(x, axis=2, keepdims=True), 'float32')
    )(input_mask)

    token_ids1 = Lambda(
        lambda x: K.reshape(x, shape=(-1, maxlen))
    )(token_ids)
    segment_ids1 = Lambda(
        lambda x: K.reshape(x, shape=(-1, maxlen))
    )(segment_ids)

    # 加载预训练模型
    bert = build_transformer_model(
        config_path=config_path,
        checkpoint_path=checkpoint_path,
        return_keras_model=False,
    )
    output = bert.model([token_ids1, segment_ids1])
    output = Lambda(lambda x: x[:, 0])(output)
    output = Lambda(
        lambda x: K.reshape(x, shape=(-1, max_segment, output.shape[-1]))
    )(output)
    output = Multiply()([output, input_mask])
    output = Dropout(drop)(output)

    output = Attention(output.shape[-1].value)([output, input_mask])
    output = Dropout(drop)(output)

    output = Dense(
        units=num_classes,
        activation='softmax',
        kernel_initializer=bert.initializer
    )(output)

    model = keras.models.Model([token_ids, segment_ids], output)

    optimizer_params = {
        'learning_rate': lr,
        'grad_accum_steps': grad_accum_steps
    }
    optimizer = extend_with_gradient_accumulation(Adam)
    optimizer = optimizer(**optimizer_params)
    model.compile(
        loss='sparse_categorical_crossentropy',
        optimizer=optimizer,
        metrics=['sparse_categorical_accuracy'],
    )

    return model
예제 #3
0
 def new_update(x, new_x):
     if x is var and self._do_lazy_optimization(x):
         if indices is None:
             r = K.any(K.not_equal(grad, 0.),
                       axis=-1,
                       keepdims=True)
             new_x = x + (new_x - x) * K.cast(r, K.floatx())
             return old_update(x, new_x)
         else:
             return self._resource_scatter_add(
                 x, indices, K.gather(new_x - x, indices))
     return old_update(x, new_x)
예제 #4
0
def build_model():
    """构建模型。"""
    token_ids = Input(shape=(max_segment, maxlen), dtype='int32')
    segment_ids = Input(shape=(max_segment, maxlen), dtype='int32')

    input_mask = Masking(mask_value=0)(token_ids)
    input_mask = Lambda(lambda x: K.cast(K.any(x, axis=2, keepdims=True),
                                         'float32'))(input_mask)

    token_ids1 = Lambda(lambda x: K.reshape(x, shape=(-1, maxlen)))(token_ids)
    segment_ids1 = Lambda(lambda x: K.reshape(x, shape=(-1, maxlen)))(
        segment_ids)

    # 加载预训练模型
    bert = build_transformer_model(
        config_path=config_path,
        checkpoint_path=checkpoint_path,
        return_keras_model=False,
    )
    output = bert.model([token_ids1, segment_ids1])
    output = Lambda(lambda x: x[:, 0])(output)
    output = Lambda(lambda x: K.reshape(
        x, shape=(-1, max_segment, output.shape[-1])))(output)
    output = Multiply()([output, input_mask])
    output = Dropout(drop)(output)

    output = Attention(output.shape[-1].value)([output, input_mask])
    output = Dropout(drop)(output)

    output = Dense(units=num_classes,
                   activation='softmax',
                   kernel_initializer=bert.initializer)(output)

    model = keras.models.Model([token_ids, segment_ids], output)

    return model
예제 #5
0
 def new_update(x, new_x):
     if is_one_of(x, params) and self._do_lazy_optimization(x):
         g = self.grads[x]
         r = K.any(K.not_equal(g, 0.), axis=-1, keepdims=True)
         new_x = x + (new_x - x) * K.cast(r, K.floatx())
     return old_update(x, new_x)