def build_model(): """ 模型构建 """ token_ids = Input(shape=(max_segment, maxlen), dtype='int32') segment_ids = Input(shape=(max_segment, maxlen), dtype='int32') input_mask = Masking(mask_value=0)(token_ids) # 对输入token_ids做masking # k.any()先归约,然后再进行类型变换 # 可以转换一个 Keras 变量,但它仍然返回一个 Keras 张量(类型变换) input_mask = Lambda(lambda x: K.cast(K.any(x, axis=2, keepdims=True), 'float32'))(input_mask) # 重构 维度 把 batch, token_ids 合并成一个维度 token_ids1 = Lambda(lambda x: K.reshape(x, shape=(-1, maxlen)))(token_ids) segment_ids1 = Lambda(lambda x: K.reshape(x, shape=(-1, maxlen)))( segment_ids) # 加载预训练模型 bert = build_transformer_model( config_path=config_path, checkpoint_path=checkpoint_path, return_keras_model=False, ) output = bert.model([token_ids1, segment_ids1]) output = Lambda(lambda x: x[:, 0])(output) # 取CLS 只取第一列 # 维度重构 output = Lambda(lambda x: K.reshape( x, shape=(-1, max_segment, output.shape[-1])))(output) output = Multiply()([output, input_mask]) # 把输出和 input_mask拼到一起,然后输出一个张量,维度不变 output = Dropout(drop)(output) output = Attention(output.shape[-1].value)([output, input_mask]) # 使用注意力 output = Dropout(drop)(output) # FC 线性层 output = Dense(units=num_classes, activation='softmax', kernel_initializer=bert.initializer)(output) model = keras.models.Model([token_ids, segment_ids], output) # 设置多GPU # 设置优化器,优化参数 optimizer_params = { 'learning_rate': lr, 'grad_accum_steps': grad_accum_steps } optimizer = extend_with_gradient_accumulation(Adam) # 加入梯度累积 optimizer = optimizer(**optimizer_params) # multi gpu model.compile( loss='sparse_categorical_crossentropy', optimizer=optimizer, metrics=['sparse_categorical_accuracy'], ) return model
def build_model(): """构建模型。""" token_ids = Input(shape=(max_segment, maxlen), dtype='int32') segment_ids = Input(shape=(max_segment, maxlen), dtype='int32') input_mask = Masking(mask_value=0)(token_ids) input_mask = Lambda( lambda x: K.cast(K.any(x, axis=2, keepdims=True), 'float32') )(input_mask) token_ids1 = Lambda( lambda x: K.reshape(x, shape=(-1, maxlen)) )(token_ids) segment_ids1 = Lambda( lambda x: K.reshape(x, shape=(-1, maxlen)) )(segment_ids) # 加载预训练模型 bert = build_transformer_model( config_path=config_path, checkpoint_path=checkpoint_path, return_keras_model=False, ) output = bert.model([token_ids1, segment_ids1]) output = Lambda(lambda x: x[:, 0])(output) output = Lambda( lambda x: K.reshape(x, shape=(-1, max_segment, output.shape[-1])) )(output) output = Multiply()([output, input_mask]) output = Dropout(drop)(output) output = Attention(output.shape[-1].value)([output, input_mask]) output = Dropout(drop)(output) output = Dense( units=num_classes, activation='softmax', kernel_initializer=bert.initializer )(output) model = keras.models.Model([token_ids, segment_ids], output) optimizer_params = { 'learning_rate': lr, 'grad_accum_steps': grad_accum_steps } optimizer = extend_with_gradient_accumulation(Adam) optimizer = optimizer(**optimizer_params) model.compile( loss='sparse_categorical_crossentropy', optimizer=optimizer, metrics=['sparse_categorical_accuracy'], ) return model
def new_update(x, new_x): if x is var and self._do_lazy_optimization(x): if indices is None: r = K.any(K.not_equal(grad, 0.), axis=-1, keepdims=True) new_x = x + (new_x - x) * K.cast(r, K.floatx()) return old_update(x, new_x) else: return self._resource_scatter_add( x, indices, K.gather(new_x - x, indices)) return old_update(x, new_x)
def build_model(): """构建模型。""" token_ids = Input(shape=(max_segment, maxlen), dtype='int32') segment_ids = Input(shape=(max_segment, maxlen), dtype='int32') input_mask = Masking(mask_value=0)(token_ids) input_mask = Lambda(lambda x: K.cast(K.any(x, axis=2, keepdims=True), 'float32'))(input_mask) token_ids1 = Lambda(lambda x: K.reshape(x, shape=(-1, maxlen)))(token_ids) segment_ids1 = Lambda(lambda x: K.reshape(x, shape=(-1, maxlen)))( segment_ids) # 加载预训练模型 bert = build_transformer_model( config_path=config_path, checkpoint_path=checkpoint_path, return_keras_model=False, ) output = bert.model([token_ids1, segment_ids1]) output = Lambda(lambda x: x[:, 0])(output) output = Lambda(lambda x: K.reshape( x, shape=(-1, max_segment, output.shape[-1])))(output) output = Multiply()([output, input_mask]) output = Dropout(drop)(output) output = Attention(output.shape[-1].value)([output, input_mask]) output = Dropout(drop)(output) output = Dense(units=num_classes, activation='softmax', kernel_initializer=bert.initializer)(output) model = keras.models.Model([token_ids, segment_ids], output) return model
def new_update(x, new_x): if is_one_of(x, params) and self._do_lazy_optimization(x): g = self.grads[x] r = K.any(K.not_equal(g, 0.), axis=-1, keepdims=True) new_x = x + (new_x - x) * K.cast(r, K.floatx()) return old_update(x, new_x)