def model_fn(features, labels, mode, params): """ :param features: input_fn 得到的feature :param labels: labels :param mode: 当前模式:分为训练、验证、预测 :param params: 模型的其他参数 :return: estimator(推算器) """ """ 输入feature & 配置的读取 """ # 获取 feature tensor # 1. 对于 sparse_feature 存储的是index # 2. 对于 dense_feature 存储的是具体的值 sparse_features_input = features['sparse_features'] # 获取基本配置 batch_size = params['batch_size'] lr = params['lr'] embedding_size = params['embedding_size'] task_nums = params['task_nums'] # 获取 sparse feature 配置 sparse_feature_columns = params['sparse_feature_columns'] # 获取 sparse feature embedding sparse_embeddings = model_util.get_feature_embeddings( sparse_feature_columns, dict([(feature_name, input) for feature_name, input in sparse_features_input.items()]), embedding_size=embedding_size ) # 将sparse feature concat起来 concat_embeddings = tf.concat(sparse_embeddings, axis=1) # MMoE layer mmoe_layers = layers.MMoE(units=4, num_experts=8, num_tasks=task_nums)(concat_embeddings) # 分别处理不同 Task Tower logits = [] for index, task_layer in enumerate(mmoe_layers): tower_layer = tf.keras.layers.Dense(units=8, activation='relu', kernel_initializer=tf.keras.initializers.VarianceScaling())(task_layer) # batch_size * 1 output_layer = tf.keras.layers.Dense(units=1, activation=None, kernel_initializer=tf.keras.initializers.VarianceScaling())(tower_layer) logits.append(tf.squeeze(output_layer, axis=1)) labels = [labels['income_50k'], labels['marital_stat']] preds = [tf.sigmoid(logit) for logit in logits] predictions = { "prob_income_50k": preds[0], "prob_marital_stat": preds[1] } export_outputs = { tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: tf.estimator.export.PredictOutput( predictions)} """ train & eval & predict """ if mode == tf.estimator.ModeKeys.PREDICT: return tf.estimator.EstimatorSpec( mode=mode, predictions=predictions, export_outputs=export_outputs) else: # 定义loss losses = 0 for label, logit in zip(labels, logits): loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=label, logits=logit) loss = tf.reduce_sum(loss) losses += loss # 处理验证模式 if mode == tf.estimator.ModeKeys.EVAL: eval_hooks = [ metrics.BinaryMetricHook(batch_size=batch_size, pred_tensor=pred, label_tensor=label, prefix='Eval_{}'.format(index)) for index, (label, pred) in enumerate(zip(labels, preds)) ] return tf.estimator.EstimatorSpec( mode=mode, predictions=predictions, loss=losses, evaluation_hooks=eval_hooks ) # 处理训练模式 if mode == tf.estimator.ModeKeys.TRAIN: training_hooks = [ metrics.BinaryMetricHook(batch_size=batch_size, pred_tensor=pred, label_tensor=label, prefix='Train_{}'.format(index)) for index, (label, pred) in enumerate(zip(labels, preds)) ] optimizer = tf.train.AdamOptimizer(learning_rate=lr, beta1=0.9, beta2=0.999, epsilon=1e-8) train_op = optimizer.minimize(losses, global_step=tf.train.get_global_step()) return tf.estimator.EstimatorSpec( mode=mode, predictions=predictions, loss=losses, train_op=train_op, training_hooks=training_hooks )
def model_fn(features, labels, mode, params): """ :param features: input_fn 得到的feature :param labels: labels :param mode: 当前模式:分为训练、验证、预测 :param params: 模型的其他参数 :return: estimator """ """ 输入feature & 配置的读取 """ # 获取 feature tensor # 1. 对于 sparse_feature 存储的是index # 2. 对于 dense_feature 存储的是具体的值 sparse_features_input = features['sparse_features'] # 获取基本配置 batch_size = params['batch_size'] lr = params['lr'] embedding_size = params['embedding_size'] # 获取 sparse feature 配置 sparse_feature_columns = params['sparse_feature_columns'] # 获取线性部分logits linear_logits = model_util.get_linear_logits( sparse_feature_columns = sparse_feature_columns, sparse_features_input = dict([(feature_name, input) for feature_name, input in sparse_features_input.items()]), dense_features_input = dict() ) # 创建 sparse feature embedding sparse_embeddings = model_util.get_feature_embeddings( sparse_feature_columns, dict([(feature_name, input) for feature_name, input in sparse_features_input.items()]), embedding_size=embedding_size ) sparse_embeddings = [tf.expand_dims(embedding, axis=1) for embedding in sparse_embeddings] sparse_embeddings = tf.concat(sparse_embeddings, axis=1) # 获取 fm交叉部分logits fm_logits = layers.FM()(sparse_embeddings) logits = tf.add_n([fm_logits, linear_logits]) logits = tf.squeeze(logits, axis=1) pred = tf.sigmoid(logits) predictions = {"prob": pred} export_outputs = { tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: tf.estimator.export.PredictOutput( predictions)} """ train & eval & predict """ if mode == tf.estimator.ModeKeys.PREDICT: return tf.estimator.EstimatorSpec( mode=mode, predictions=predictions, export_outputs=export_outputs) else: # 定义loss loss = tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=labels) loss = tf.reduce_sum(loss) # 处理验证模式 if mode == tf.estimator.ModeKeys.EVAL: eval_hooks = [ metrics.BinaryMetricHook(batch_size=batch_size, pred_tensor=pred, label_tensor=labels, prefix='Eval'), ] return tf.estimator.EstimatorSpec( mode=mode, predictions=predictions, loss=loss, evaluation_hooks=eval_hooks ) # 处理训练模式 if mode == tf.estimator.ModeKeys.TRAIN: training_hooks = [ metrics.BinaryMetricHook(batch_size=batch_size, pred_tensor=pred, label_tensor=labels, prefix='Train'), ] optimizer = tf.train.AdamOptimizer(learning_rate=lr, beta1=0.9, beta2=0.999, epsilon=1e-8) train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step()) return tf.estimator.EstimatorSpec( mode=mode, predictions=predictions, loss=loss, train_op=train_op, training_hooks=training_hooks )
def model_fn(features, labels, mode, params): """ :param features: input_fn 得到的feature :param labels: labels :param mode: 当前模式:分为训练、验证、预测 :param params: 模型的其他参数 :return: estimator(推算器) """ """ 输入feature & 配置的读取 """ # 获取 feature tensor # 1. 对于 sparse_feature 存储的是index # 2. 对于 dense_feature 存储的是具体的值 sparse_features_input = features['sparse_features'] dense_features_input = features['dense_features'] # 获取基本配置 batch_size = params['batch_size'] lr = params['lr'] embedding_size = params['embedding_size'] # 获取需要进入 linear和nn部分的feature name linear_feature_names = params['linear_feature_names'] dnn_feature_names = params['dnn_feature_names'] # 获取 sparse、dense feature 配置 sparse_feature_columns = params['sparse_feature_columns'] # 创建 sparse feature embedding sparse_embeddings = model_util.get_feature_embeddings( sparse_feature_columns, dict([(feature_name, input) for feature_name, input in sparse_features_input.items() if feature_name in dnn_feature_names]), embedding_size=embedding_size) z_part = tf.concat(sparse_embeddings, axis=1) num_pairs = int(len(sparse_embeddings) * (len(sparse_embeddings) - 1) / 2) p_part = layers.PNN( mode=0, embedding_size=embedding_size, num_pairs=num_pairs, num_embeddings=len(sparse_embeddings))(sparse_embeddings) concat_embedding = tf.concat([z_part, p_part], axis=1) logits = common_layer.get_nn_layers(concat_embedding, dims=[128, 32, 1]) logits = tf.squeeze(logits, axis=1) pred = tf.sigmoid(logits) predictions = {"prob": pred} export_outputs = { tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: tf.estimator.export.PredictOutput(predictions) } """ train & eval & predict """ if mode == tf.estimator.ModeKeys.PREDICT: return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions, export_outputs=export_outputs) else: # 定义loss loss = tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=labels) loss = tf.reduce_sum(loss) # 处理验证模式 if mode == tf.estimator.ModeKeys.EVAL: eval_hooks = [ metrics.BinaryMetricHook(batch_size=batch_size, pred_tensor=pred, label_tensor=labels, prefix='Eval'), ] return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions, loss=loss, evaluation_hooks=eval_hooks) # 处理训练模式 if mode == tf.estimator.ModeKeys.TRAIN: training_hooks = [ metrics.BinaryMetricHook(batch_size=batch_size, pred_tensor=pred, label_tensor=labels, prefix='Train'), ] optimizer = tf.train.AdamOptimizer(learning_rate=lr, beta1=0.9, beta2=0.999, epsilon=1e-8) train_op = optimizer.minimize( loss, global_step=tf.train.get_global_step()) return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions, loss=loss, train_op=train_op, training_hooks=training_hooks)