def _dssm_model(features, labels, mode, params): """ Returns: 构建双塔模型 """ user_emb = tf.feature_column.input_layer( features, params['feature_columns']['user_columns']) good_emb = tf.feature_column.input_layer( features, params['feature_columns']['good_columns']) with tf.name_scope('user'): user_emb = build_deep_layers(user_emb, params, mode, name='user') with tf.name_scope('goods'): good_emb = build_deep_layers(good_emb, params, mode, name='good') head = head_lib._binary_logistic_or_multi_class_head( n_classes=2, weight_column=None, label_vocabulary=None, loss_reduction=losses.Reduction.SUM) logits = tf.layers.dense( tf.multiply(user_emb, good_emb), units=head.logits_dimension, kernel_initializer=tf.glorot_uniform_initializer()) preds = tf.sigmoid(logits) # similarity = tf.reduce_sum(tf.multiply(user_emb, good_emb), axis=-1) # predictions = tf.nn.sigmoid(similarity) if mode == tf.estimator.ModeKeys.PREDICT: predictions = {'probabilities': preds} return tf.estimator.EstimatorSpec(mode, predictions=predictions) loss = tf.reduce_sum( tf.nn.sigmoid_cross_entropy_with_logits( labels=tf.cast(labels['ctr_label'], tf.float32), logits=tf.cast(logits, tf.float32))) auc = tf.metrics.auc(labels['ctr_label'], preds) metrics = {'auc': auc} tf.summary.scalar('auc', auc[1]) if mode == tf.estimator.ModeKeys.EVAL: return tf.estimator.EstimatorSpec(mode, loss=loss, eval_metric_ops=metrics) assert mode == tf.estimator.ModeKeys.TRAIN statrt_learning_rate = params['learning_rate'] global_step = tf.train.get_global_step() learning_rate = tf.train.exponential_decay( learning_rate=statrt_learning_rate, global_step=global_step, decay_steps=params['decay_steps'], decay_rate=params['decay_rate'], staircase=False) tf.summary.scalar('learning_rate', learning_rate) optimizer = tf.train.AdagradOptimizer(learning_rate=learning_rate) train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step()) return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)
def xdeepfm_model_fn(features, labels, mode, params): net = tf.feature_column.input_layer(features, params['feature_columns']) last_deep_layer = _build_deep_layers(net, params) last_xdeepfm_layer = _build_xdeepfm_layers(net, params) if params['use_xdeepfm']: print('--use xdeepfm layer--') last_layer = tf.concat([last_deep_layer, last_xdeepfm_layer], 1) else: last_layer = last_deep_layer # head = tf.contrib.estimator.binary_classification_head(loss_reduction=losses.Reduction.SUM) head = head_lib._binary_logistic_or_multi_class_head( # pylint: disable=protected-access n_classes=2, weight_column=None, label_vocabulary=None, loss_reduction=losses.Reduction.SUM) logits = tf.layers.dense( last_layer, units=head.logits_dimension, kernel_initializer=tf.glorot_uniform_initializer()) optimizer = tf.train.AdagradOptimizer( learning_rate=params['learning_rate']) # optimizer = tf.train.AdamOptimizer(learning_rate=params['learning_rate']) preds = tf.sigmoid(logits) user_id = features['user_id'] label = features['label'] if mode == tf.estimator.ModeKeys.PREDICT: predictions = { 'probabilities': preds, 'user_id': user_id, 'label': label } export_outputs = { 'regression': tf.estimator.export.RegressionOutput(predictions['probabilities']) } return tf.estimator.EstimatorSpec(mode, predictions=predictions, export_outputs=export_outputs) loss = tf.reduce_sum( tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=labels)) # loss = focal_loss(logits=logits, labels=labels, alpha=0.5, gamma=6, beta=1) train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step()) if mode == tf.estimator.ModeKeys.TRAIN: return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op) return head.create_estimator_spec( features=features, mode=mode, labels=labels, logits=logits, train_op_fn=lambda loss: optimizer.minimize( loss, global_step=tf.train.get_global_step()))
def din_model_fn(features, labels, mode, params): net = tf.feature_column.input_layer(features, params['feature_columns']) attention_keyword = tf.string_to_hash_bucket_fast(features["keyword_attention"], 500000) attention_keyword_embeddings = tf.get_variable(name="attention_keyword_embeddings", dtype=tf.float32, shape=[500000, 20]) # shape(batch_size, len, embedding_size) attention_keyword_emb = tf.nn.embedding_lookup(attention_keyword_embeddings, attention_keyword) attention_creativeid = tf.string_to_hash_bucket_fast(tf.as_string(features["creative_id"]), 200000) attention_creativeid_embeddings = tf.get_variable(name="attention_creativeid_embeddings", dtype=tf.float32, shape=[200000, 20]) # shape(batch_size, 1, embedding_size) attention_creativeid_emb = tf.nn.embedding_lookup(attention_creativeid_embeddings, attention_creativeid) keyword_creativeid_attention = attention_layer(attention_creativeid_emb, attention_keyword_emb) # (batchsize,embedding_size) last_deep_layer = build_deep_layers(net, params) last_cross_layer = build_cross_layers(net, params) last_layer = tf.concat([last_deep_layer, last_cross_layer, keyword_creativeid_attention], 1) # head = tf.contrib.estimator.binary_classification_head(loss_reduction=losses.Reduction.SUM) head = head_lib._binary_logistic_or_multi_class_head( # pylint: disable=protected-access n_classes=2, weight_column=None, label_vocabulary=None, loss_reduction=losses.Reduction.SUM) logits = tf.layers.dense(last_layer, units=head.logits_dimension, kernel_initializer=tf.glorot_uniform_initializer()) optimizer = tf.train.AdagradOptimizer(learning_rate=params['learning_rate']) preds = tf.sigmoid(logits) user_id = features['user_id'] label = features['label'] if mode == tf.estimator.ModeKeys.PREDICT: predictions = { 'probabilities': preds, 'user_id': user_id, 'label': label } export_outputs = { 'regression': tf.estimator.export.RegressionOutput(predictions['probabilities']) } return tf.estimator.EstimatorSpec(mode, predictions=predictions, export_outputs=export_outputs) return head.create_estimator_spec( features=features, mode=mode, labels=labels, logits=logits, train_op_fn=lambda loss: optimizer.minimize(loss, global_step=tf.train.get_global_step()) )
def esmm_model_fn(features, labels, mode, params): net = tf.feature_column.input_layer(features, params['feature_columns']) last_ctr_layer = build_deep_layers(net, params) last_cvr_layer = build_deep_layers(net, params) #head = tf.contrib.estimator.binary_classification_head(loss_reduction=losses.Reduction.SUM) head = head_lib._binary_logistic_or_multi_class_head( # pylint: disable=protected-access n_classes=2, weight_column=None, label_vocabulary=None, loss_reduction=losses.Reduction.SUM) ctr_logits = tf.layers.dense(last_ctr_layer, units=head.logits_dimension, kernel_initializer=tf.glorot_uniform_initializer()) cvr_logits = tf.layers.dense(last_cvr_layer, units=head.logits_dimension, kernel_initializer=tf.glorot_uniform_initializer()) ctr_preds = tf.sigmoid(ctr_logits) cvr_preds = tf.sigmoid(cvr_logits) ctcvr_preds = tf.multiply(ctr_preds, cvr_preds) optimizer = tf.train.AdagradOptimizer(learning_rate=params['learning_rate']) ctr_label = labels['ctr_label'] cvr_label = labels['cvr_label'] ctr_loss = tf.reduce_sum(tf.nn.sigmoid_cross_entropy_with_logits(labels=ctr_label,logits=ctr_logits)) ctcvr_loss = tf.reduce_sum(tf.losses.log_loss(labels=cvr_label,predictions=ctcvr_preds)) loss = ctr_loss + ctcvr_loss # loss这儿可以加一个参数,参考multi-task损失的方法 user_id = features['user_id'] click_label = features['label'] conversion_label = features['is_conversion'] if mode == tf.estimator.ModeKeys.PREDICT: predictions = { 'ctr_preds': ctr_preds, 'cvr_preds': cvr_preds, 'ctcvr_preds': ctcvr_preds 'user_id': user_id, 'click_label': click_label, 'conversion_label': conversion_label } export_outputs = { 'regression': tf.estimator.export.RegressionOutput(predictions['cvr_preds']) #线上预测需要的 } return tf.estimator.EstimatorSpec(mode, predictions=predictions, export_outputs=export_outputs) elif mode == tf.estimator.ModeKeys.EVAL: return tf.estimator.EstimatorSpec(mode, loss=loss) else: train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step()) return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op) """
def afm_model_fn(features, labels, mode, params): net = tf.feature_column.input_layer(features, params['feature_columns']) last_deep_layer = build_deep_layers(net, params) last_layer = build_afm_layers(net, params) # head = tf.contrib.estimator.binary_classification_head(loss_reduction=losses.Reduction.SUM) head = head_lib._binary_logistic_or_multi_class_head( # pylint: disable=protected-access n_classes=2, weight_column=None, label_vocabulary=None, loss_reduction=losses.Reduction.SUM) logits = tf.layers.dense( last_layer, units=head.logits_dimension, kernel_initializer=tf.glorot_uniform_initializer()) optimizer = tf.train.AdagradOptimizer( learning_rate=params['learning_rate']) preds = tf.sigmoid(logits) user_id = features['user_id'] label = features['label'] if mode == tf.estimator.ModeKeys.PREDICT: predictions = { 'probabilities': preds, 'user_id': user_id, 'label': label } export_outputs = { 'regression': tf.estimator.export.RegressionOutput(predictions['probabilities']) } return tf.estimator.EstimatorSpec(mode, predictions=predictions, export_outputs=export_outputs) return head.create_estimator_spec( features=features, mode=mode, labels=labels, logits=logits, train_op_fn=lambda loss: optimizer.minimize( loss, global_step=tf.train.get_global_step()))
def dfm_model_fn(features, labels, mode, params): net = tf.feature_column.input_layer( features, params['feature_columns'] ) # shape(batch_size, column_num * embedding_size) last_deep_layer = build_deep_layers(net, params) column_num, dimension = _check_fm_columns(params['feature_columns']) feature_embeddings = tf.reshape( net, (-1, column_num, dimension)) # (batch_size,column_num, embedding_size) # sum_square part summed_feature_embeddings = tf.reduce_sum(feature_embeddings, 1) # (batch_size,embedding_size) summed_square_feature_embeddings = tf.square(summed_feature_embeddings) # squre-sum part squared_feature_embeddings = tf.square(feature_embeddings) squared_sum_feature_embeddings = tf.reduce_sum(squared_feature_embeddings, 1) fm_second_order = 0.5 * tf.subtract(summed_square_feature_embeddings, squared_sum_feature_embeddings) # print(tf.shape(fm_second_order)) # print(fm_second_order.get_shape()) if params['use_fm']: print('--use fm--') last_layer = tf.concat([fm_second_order, last_deep_layer], 1) else: last_layer = last_deep_layer # head = tf.contrib.estimator.binary_classification_head(loss_reduction=losses.Reduction.SUM) head = head_lib._binary_logistic_or_multi_class_head( # pylint: disable=protected-access n_classes=2, weight_column=None, label_vocabulary=None, loss_reduction=losses.Reduction.SUM) logits = tf.layers.dense( last_layer, units=head.logits_dimension, kernel_initializer=tf.glorot_uniform_initializer()) optimizer = tf.train.AdagradOptimizer( learning_rate=params['learning_rate']) preds = tf.sigmoid(logits) # print(tf.shape(preds)) # print(preds.get_shape()) user_id = features['user_id'] label = features['label'] if mode == tf.estimator.ModeKeys.EVAL: accuracy = tf.metrics.accuracy(labels=labels['class'], predictions=tf.to_float( tf.greater_equal(preds, 0.5))) auc = tf.metrics.auc(labels['class'], preds) label_mean = metrics_lib.mean(labels['class']) prediction_mean = metrics_lib.mean(preds) prediction_squared_difference = tf.math.squared_difference( preds, prediction_mean[0]) prediction_squared_sum = tf.reduce_sum(prediction_squared_difference) num_predictions = tf.to_float(tf.size(preds)) s_deviation = tf.sqrt(prediction_squared_sum / num_predictions), \ accuracy[0] # 标准差 c_variation = tf.to_float(s_deviation[0] / prediction_mean[0]), \ accuracy[0] # 变异系数 # group_auc = tf.to_float(cal_group_auc(labels['class'], preds, labels['user_id'])), accuracy[0] # group auc metrics = { 'accuracy': accuracy, 'auc': auc, 'label/mean': label_mean, 'prediction/mean': prediction_mean, 'standard deviation': s_deviation, 'coefficient of variation': c_variation } # 'group auc': group_auc} tf.summary.scalar('accuracy', accuracy[1]) tf.summary.scalar('auc', auc[1]) tf.summary.scalar('label/mean', label_mean[1]) tf.summary.scalar('prediction/mean', prediction_mean[1]) tf.summary.scalar('s_deviation', s_deviation[1]) tf.summary.scalar('c_variation', c_variation[1]) # tf.summary.scalar('group_auc', group_auc[1]) loss = tf.reduce_sum( tf.nn.sigmoid_cross_entropy_with_logits(labels=labels['class'], logits=logits)) # print(tf.shape(loss)) # print(loss.get_shape()) return tf.estimator.EstimatorSpec(mode, loss=loss, eval_metric_ops=metrics) if mode == tf.estimator.ModeKeys.PREDICT: predictions = { 'probabilities': preds, 'user_id': user_id, 'label': label } export_outputs = { 'prediction': tf.estimator.export.PredictOutput(predictions) } return tf.estimator.EstimatorSpec(mode, predictions=predictions, export_outputs=export_outputs) return head.create_estimator_spec( features=features, mode=mode, labels=labels, logits=logits, train_op_fn=lambda loss: optimizer.minimize( loss, global_step=tf.train.get_global_step()))
def __init__( self, hidden_units, feature_columns, model_dir=None, n_classes=2, weight_column=None, label_vocabulary=None, optimizer='Adagrad', activation_fn=nn.relu, dropout=None, input_layer_partitioner=None, config=None, warm_start_from=None, loss_reduction=losses.Reduction.SUM, ): """Initializes a `DNNClassifier` instance. Args: hidden_units: Iterable of number hidden units per layer. All layers are fully connected. Ex. `[64, 32]` means first layer has 64 nodes and second one has 32. feature_columns: An iterable containing all the feature columns used by the model. All items in the set should be instances of classes derived from `_FeatureColumn`. model_dir: Directory to save model parameters, graph and etc. This can also be used to load checkpoints from the directory into a estimator to continue training a previously saved model. n_classes: Number of label classes. Defaults to 2, namely binary classification. Must be > 1. weight_column: A string or a `_NumericColumn` created by `tf.feature_column.numeric_column` defining feature column representing weights. It is used to down weight or boost examples during training. It will be multiplied by the loss of the example. If it is a string, it is used as a key to fetch weight tensor from the `features`. If it is a `_NumericColumn`, raw tensor is fetched by key `weight_column.key`, then weight_column.normalizer_fn is applied on it to get weight tensor. label_vocabulary: A list of strings represents possible label values. If given, labels must be string type and have any value in `label_vocabulary`. If it is not given, that means labels are already encoded as integer or float within [0, 1] for `n_classes=2` and encoded as integer values in {0, 1,..., n_classes-1} for `n_classes`>2 . Also there will be errors if vocabulary is not provided and labels are string. optimizer: An instance of `tf.Optimizer` used to train the model. Can also be a string (one of 'Adagrad', 'Adam', 'Ftrl', 'RMSProp', 'SGD'), or callable. Defaults to Adagrad optimizer. activation_fn: Activation function applied to each layer. If `None`, will use `tf.nn.relu`. dropout: When not `None`, the probability we will drop out a given coordinate. input_layer_partitioner: Optional. Partitioner for input layer. Defaults to `min_max_variable_partitioner` with `min_slice_size` 64 << 20. config: `RunConfig` object to configure the runtime settings. warm_start_from: A string filepath to a checkpoint to warm-start from, or a `WarmStartSettings` object to fully configure warm-starting. If the string filepath is provided instead of a `WarmStartSettings`, then all weights are warm-started, and it is assumed that vocabularies and Tensor names are unchanged. loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how to reduce training loss over batch. Defaults to `SUM`. """ head = head_lib._binary_logistic_or_multi_class_head( # pylint: disable=protected-access n_classes, weight_column, label_vocabulary, loss_reduction) def _model_fn(features, labels, mode, config): """Call the defined shared _dnn_model_fn.""" return _dnn_model_fn( features=features, labels=labels, mode=mode, head=head, hidden_units=hidden_units, feature_columns=tuple(feature_columns or []), optimizer=optimizer, activation_fn=activation_fn, dropout=dropout, input_layer_partitioner=input_layer_partitioner, config=config) super(DNNClassifier, self).__init__( model_fn=_model_fn, model_dir=model_dir, config=config, warm_start_from=warm_start_from)
def __init__( self, hidden_units, feature_columns, model_dir=None, n_classes=2, weight_column=None, label_vocabulary=None, optimizer='Adagrad', activation_fn=nn.relu, dropout=None, input_layer_partitioner=None, config=None, warm_start_from=None, loss_reduction=losses.Reduction.SUM, batch_norm=False, ): """Initializes a `DNNClassifier` instance. Args: hidden_units: Iterable of number hidden units per layer. All layers are fully connected. Ex. `[64, 32]` means first layer has 64 nodes and second one has 32. feature_columns: An iterable containing all the feature columns used by the model. All items in the set should be instances of classes derived from `_FeatureColumn`. model_dir: Directory to save model parameters, graph and etc. This can also be used to load checkpoints from the directory into a estimator to continue training a previously saved model. n_classes: Number of label classes. Defaults to 2, namely binary classification. Must be > 1. weight_column: A string or a `_NumericColumn` created by `tf.feature_column.numeric_column` defining feature column representing weights. It is used to down weight or boost examples during training. It will be multiplied by the loss of the example. If it is a string, it is used as a key to fetch weight tensor from the `features`. If it is a `_NumericColumn`, raw tensor is fetched by key `weight_column.key`, then weight_column.normalizer_fn is applied on it to get weight tensor. label_vocabulary: A list of strings represents possible label values. If given, labels must be string type and have any value in `label_vocabulary`. If it is not given, that means labels are already encoded as integer or float within [0, 1] for `n_classes=2` and encoded as integer values in {0, 1,..., n_classes-1} for `n_classes`>2 . Also there will be errors if vocabulary is not provided and labels are string. optimizer: An instance of `tf.Optimizer` used to train the model. Can also be a string (one of 'Adagrad', 'Adam', 'Ftrl', 'RMSProp', 'SGD'), or callable. Defaults to Adagrad optimizer. activation_fn: Activation function applied to each layer. If `None`, will use `tf.nn.relu`. dropout: When not `None`, the probability we will drop out a given coordinate. input_layer_partitioner: Optional. Partitioner for input layer. Defaults to `min_max_variable_partitioner` with `min_slice_size` 64 << 20. config: `RunConfig` object to configure the runtime settings. warm_start_from: A string filepath to a checkpoint to warm-start from, or a `WarmStartSettings` object to fully configure warm-starting. If the string filepath is provided instead of a `WarmStartSettings`, then all weights are warm-started, and it is assumed that vocabularies and Tensor names are unchanged. loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how to reduce training loss over batch. Defaults to `SUM`. batch_norm: Whether to use batch normalization after each hidden layer. """ head = head_lib._binary_logistic_or_multi_class_head( # pylint: disable=protected-access n_classes, weight_column, label_vocabulary, loss_reduction) shared_state_manager = feature_column_v2.maybe_create_shared_state_manager( feature_columns) def _model_fn(features, labels, mode, config): """Call the defined shared _dnn_model_fn.""" return _dnn_model_fn( features=features, labels=labels, mode=mode, head=head, hidden_units=hidden_units, feature_columns=tuple(feature_columns or []), optimizer=optimizer, activation_fn=activation_fn, dropout=dropout, input_layer_partitioner=input_layer_partitioner, config=config, batch_norm=batch_norm, shared_state_manager=shared_state_manager) super(DNNClassifier, self).__init__(model_fn=_model_fn, model_dir=model_dir, config=config, warm_start_from=warm_start_from)
def transformer_model_fn(features, labels, mode, params): net = tf.feature_column.input_layer(features, params['feature_columns']) last_click_creativeid = tf.string_to_hash_bucket_fast( features["user_click_creatives_att"], 200000) creativeid_embeddings = tf.get_variable( name="attention_creativeid_embeddings", dtype=tf.float32, shape=[200000, 20]) last_click_creativeid_emb = tf.nn.embedding_lookup(creativeid_embeddings, last_click_creativeid) last_click_productid = tf.string_to_hash_bucket_fast( features["user_click_products_att"], 40000) productid_embeddings = tf.get_variable( name="attention_productid_embeddings", dtype=tf.float32, shape=[40000, 20]) last_click_productid_emb = tf.nn.embedding_lookup(productid_embeddings, last_click_productid) his_click_emb = tf.concat( [last_click_creativeid_emb, last_click_productid_emb], 2) # (batch_size,10,emb_size*2) transformerNetwork_click = TransformerNetwork( params['transformer_num_units'], params['num_blocks'], params['num_heads'], max_len=10, dropout_rate=params['dropout_rate'], pos_fixed=True) mask_click = tf.expand_dims( tf.to_float( tf.cast(tf.not_equal(features["user_click_creatives_att"], "0"), tf.float32)), -1) # (batch_size, 10, 1) transformer_click_outputs = transformerNetwork_click( his_click_emb, mask_click) # (batch_size, max_len, num_units) transformer_click_outputs = tf.reshape( tf.reduce_sum(transformer_click_outputs, 1), shape=[-1, params['transformer_num_units']]) last_deep_layer = build_deep_layers(net, params) last_cross_layer = build_cross_layers(net, params) last_layer = tf.concat( [last_deep_layer, last_cross_layer, transformer_click_outputs], 1) # head = tf.contrib.estimator.binary_classification_head(loss_reduction=losses.Reduction.SUM) head = head_lib._binary_logistic_or_multi_class_head( # pylint: disable=protected-access n_classes=2, weight_column=None, label_vocabulary=None, loss_reduction=losses.Reduction.SUM) logits = tf.layers.dense( last_layer, units=head.logits_dimension, kernel_initializer=tf.glorot_uniform_initializer()) optimizer = tf.train.AdagradOptimizer( learning_rate=params['learning_rate']) preds = tf.sigmoid(logits) user_id = features['user_id'] label = features['label'] if mode == tf.estimator.ModeKeys.PREDICT: predictions = { 'probabilities': preds, 'user_id': user_id, 'label': label } export_outputs = { 'regression': tf.estimator.export.RegressionOutput(predictions['probabilities']) } return tf.estimator.EstimatorSpec(mode, predictions=predictions, export_outputs=export_outputs) return head.create_estimator_spec( features=features, mode=mode, labels=labels, logits=logits, train_op_fn=lambda loss: optimizer.minimize( loss, global_step=tf.train.get_global_step()))