Example #1
0
 def cal_fm_first_logits():
     logit_fn = linear._linear_logit_fn_builder(
         units=head.logits_dimension,
         feature_columns=fm_first_feature_columns)
     fm_first_logits = logit_fn(features=features)
     _add_layer_summary(fm_first_logits, scope.name)
     return fm_first_logits
def _dnn_linear_combined_model_fn(features,
                                  labels,
                                  mode,
                                  head,
                                  linear_feature_columns=None,
                                  linear_optimizer='Ftrl',
                                  dnn_feature_columns=None,
                                  dnn_optimizer='Adagrad',
                                  dnn_hidden_units=None,
                                  dnn_activation_fn=nn.relu,
                                  dnn_dropout=None,
                                  input_layer_partitioner=None,
                                  config=None):
  """Deep Neural Net and Linear combined model_fn.

  Args:
    features: dict of `Tensor`.
    labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype
      `int32` or `int64` in the range `[0, n_classes)`.
    mode: Defines whether this is training, evaluation or prediction.
      See `ModeKeys`.
    head: A `Head` instance.
    linear_feature_columns: An iterable containing all the feature columns used
      by the Linear model.
    linear_optimizer: string, `Optimizer` object, or callable that defines the
      optimizer to use for training the Linear model. Defaults to the Ftrl
      optimizer.
    dnn_feature_columns: An iterable containing all the feature columns used by
      the DNN model.
    dnn_optimizer: string, `Optimizer` object, or callable that defines the
      optimizer to use for training the DNN model. Defaults to the Adagrad
      optimizer.
    dnn_hidden_units: List of hidden units per DNN layer.
    dnn_activation_fn: Activation function applied to each DNN layer. If `None`,
      will use `tf.nn.relu`.
    dnn_dropout: When not `None`, the probability we will drop out a given DNN
      coordinate.
    input_layer_partitioner: Partitioner for input layer.
    config: `RunConfig` object to configure the runtime settings.

  Returns:
    An `EstimatorSpec` instance.

  Raises:
    ValueError: If both `linear_feature_columns` and `dnn_features_columns`
      are empty at the same time, or `input_layer_partitioner` is missing,
      or features has the wrong type.
  """
  if not isinstance(features, dict):
    raise ValueError('features should be a dictionary of `Tensor`s. '
                     'Given type: {}'.format(type(features)))
  if not linear_feature_columns and not dnn_feature_columns:
    raise ValueError(
        'Either linear_feature_columns or dnn_feature_columns must be defined.')

  num_ps_replicas = config.num_ps_replicas if config else 0
  input_layer_partitioner = input_layer_partitioner or (
      partitioned_variables.min_max_variable_partitioner(
          max_partitions=num_ps_replicas,
          min_slice_size=64 << 20))

  # Build DNN Logits.
  dnn_parent_scope = 'dnn'

  if not dnn_feature_columns:
    dnn_logits = None
  else:
    dnn_optimizer = optimizers.get_optimizer_instance(
        dnn_optimizer, learning_rate=_DNN_LEARNING_RATE)
    _check_no_sync_replicas_optimizer(dnn_optimizer)
    if not dnn_hidden_units:
      raise ValueError(
          'dnn_hidden_units must be defined when dnn_feature_columns is '
          'specified.')
    dnn_partitioner = (
        partitioned_variables.min_max_variable_partitioner(
            max_partitions=num_ps_replicas))
    with variable_scope.variable_scope(
        dnn_parent_scope,
        values=tuple(six.itervalues(features)),
        partitioner=dnn_partitioner):

      dnn_logit_fn = dnn._dnn_logit_fn_builder(  # pylint: disable=protected-access
          units=head.logits_dimension,
          hidden_units=dnn_hidden_units,
          feature_columns=dnn_feature_columns,
          activation_fn=dnn_activation_fn,
          dropout=dnn_dropout,
          input_layer_partitioner=input_layer_partitioner)
      dnn_logits = dnn_logit_fn(features=features, mode=mode)

  linear_parent_scope = 'linear'

  if not linear_feature_columns:
    linear_logits = None
  else:
    linear_optimizer = optimizers.get_optimizer_instance(
        linear_optimizer,
        learning_rate=_linear_learning_rate(len(linear_feature_columns)))
    _check_no_sync_replicas_optimizer(linear_optimizer)
    with variable_scope.variable_scope(
        linear_parent_scope,
        values=tuple(six.itervalues(features)),
        partitioner=input_layer_partitioner) as scope:
      logit_fn = linear._linear_logit_fn_builder(  # pylint: disable=protected-access
          units=head.logits_dimension,
          feature_columns=linear_feature_columns)
      linear_logits = logit_fn(features=features)
      _add_layer_summary(linear_logits, scope.name)

  # Combine logits and build full model.
  if dnn_logits is not None and linear_logits is not None:
    logits = dnn_logits + linear_logits
  elif dnn_logits is not None:
    logits = dnn_logits
  else:
    logits = linear_logits

  def _train_op_fn(loss):
    """Returns the op to optimize the loss."""
    train_ops = []
    global_step = training_util.get_global_step()
    if dnn_logits is not None:
      train_ops.append(
          dnn_optimizer.minimize(
              loss,
              var_list=ops.get_collection(
                  ops.GraphKeys.TRAINABLE_VARIABLES,
                  scope=dnn_parent_scope)))
    if linear_logits is not None:
      train_ops.append(
          linear_optimizer.minimize(
              loss,
              var_list=ops.get_collection(
                  ops.GraphKeys.TRAINABLE_VARIABLES,
                  scope=linear_parent_scope)))

    train_op = control_flow_ops.group(*train_ops)
    with ops.control_dependencies([train_op]):
      return distribute_lib.increment_var(global_step)

  return head.create_estimator_spec(
      features=features,
      mode=mode,
      labels=labels,
      train_op_fn=_train_op_fn,
      logits=logits)
Example #3
0
def _dnn_linear_combined_model_fn(features,
                                  labels,
                                  mode,
                                  head,
                                  linear_feature_columns=None,
                                  linear_optimizer='Ftrl',
                                  dnn_feature_columns=None,
                                  dnn_optimizer='Adagrad',
                                  dnn_hidden_units=None,
                                  dnn_activation_fn=nn.relu,
                                  dnn_dropout=None,
                                  input_layer_partitioner=None,
                                  config=None):
    """Deep Neural Net and Linear combined model_fn.

  Args:
    features: dict of `Tensor`.
    labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype
      `int32` or `int64` in the range `[0, n_classes)`.
    mode: Defines whether this is training, evaluation or prediction.
      See `ModeKeys`.
    head: A `Head` instance.
    linear_feature_columns: An iterable containing all the feature columns used
      by the Linear model.
    linear_optimizer: string, `Optimizer` object, or callable that defines the
      optimizer to use for training the Linear model. Defaults to the Ftrl
      optimizer.
    dnn_feature_columns: An iterable containing all the feature columns used by
      the DNN model.
    dnn_optimizer: string, `Optimizer` object, or callable that defines the
      optimizer to use for training the DNN model. Defaults to the Adagrad
      optimizer.
    dnn_hidden_units: List of hidden units per DNN layer.
    dnn_activation_fn: Activation function applied to each DNN layer. If `None`,
      will use `tf.nn.relu`.
    dnn_dropout: When not `None`, the probability we will drop out a given DNN
      coordinate.
    input_layer_partitioner: Partitioner for input layer.
    config: `RunConfig` object to configure the runtime settings.

  Returns:
    `ModelFnOps`

  Raises:
    ValueError: If both `linear_feature_columns` and `dnn_features_columns`
      are empty at the same time, or `input_layer_partitioner` is missing,
      or features has the wrong type.
  """
    if not isinstance(features, dict):
        raise ValueError('features should be a dictionary of `Tensor`s. '
                         'Given type: {}'.format(type(features)))
    if not linear_feature_columns and not dnn_feature_columns:
        raise ValueError(
            'Either linear_feature_columns or dnn_feature_columns must be defined.'
        )

    num_ps_replicas = config.num_ps_replicas if config else 0
    input_layer_partitioner = input_layer_partitioner or (
        partitioned_variables.min_max_variable_partitioner(
            max_partitions=num_ps_replicas, min_slice_size=64 << 20))

    # Build DNN Logits.
    dnn_parent_scope = 'dnn'

    if not dnn_feature_columns:
        dnn_logits = None
    else:
        dnn_optimizer = optimizers.get_optimizer_instance(
            dnn_optimizer, learning_rate=_DNN_LEARNING_RATE)
        _check_no_sync_replicas_optimizer(dnn_optimizer)
        if not dnn_hidden_units:
            raise ValueError(
                'dnn_hidden_units must be defined when dnn_feature_columns is '
                'specified.')
        dnn_partitioner = (partitioned_variables.min_max_variable_partitioner(
            max_partitions=num_ps_replicas))
        with variable_scope.variable_scope(dnn_parent_scope,
                                           values=tuple(
                                               six.itervalues(features)),
                                           partitioner=dnn_partitioner):

            dnn_logit_fn = dnn._dnn_logit_fn_builder(  # pylint: disable=protected-access
                units=head.logits_dimension,
                hidden_units=dnn_hidden_units,
                feature_columns=dnn_feature_columns,
                activation_fn=dnn_activation_fn,
                dropout=dnn_dropout,
                input_layer_partitioner=input_layer_partitioner)
            dnn_logits = dnn_logit_fn(features=features, mode=mode)

    linear_parent_scope = 'linear'

    if not linear_feature_columns:
        linear_logits = None
    else:
        linear_optimizer = optimizers.get_optimizer_instance(
            linear_optimizer,
            learning_rate=_linear_learning_rate(len(linear_feature_columns)))
        _check_no_sync_replicas_optimizer(linear_optimizer)
        with variable_scope.variable_scope(
                linear_parent_scope,
                values=tuple(six.itervalues(features)),
                partitioner=input_layer_partitioner) as scope:
            logit_fn = linear._linear_logit_fn_builder(  # pylint: disable=protected-access
                units=head.logits_dimension,
                feature_columns=linear_feature_columns)
            linear_logits = logit_fn(features=features)
            _add_layer_summary(linear_logits, scope.name)

    # Combine logits and build full model.
    if dnn_logits is not None and linear_logits is not None:
        logits = dnn_logits + linear_logits
    elif dnn_logits is not None:
        logits = dnn_logits
    else:
        logits = linear_logits

    def _train_op_fn(loss):
        """Returns the op to optimize the loss."""
        train_ops = []
        global_step = training_util.get_global_step()
        if dnn_logits is not None:
            train_ops.append(
                dnn_optimizer.minimize(loss,
                                       var_list=ops.get_collection(
                                           ops.GraphKeys.TRAINABLE_VARIABLES,
                                           scope=dnn_parent_scope)))
        if linear_logits is not None:
            train_ops.append(
                linear_optimizer.minimize(
                    loss,
                    var_list=ops.get_collection(
                        ops.GraphKeys.TRAINABLE_VARIABLES,
                        scope=linear_parent_scope)))

        train_op = control_flow_ops.group(*train_ops)
        with ops.control_dependencies([train_op]):
            with ops.colocate_with(global_step):
                return state_ops.assign_add(global_step, 1)

    return head.create_estimator_spec(features=features,
                                      mode=mode,
                                      labels=labels,
                                      train_op_fn=_train_op_fn,
                                      logits=logits)
Example #4
0
def esmm_model_fn(features, labels, mode, params):
  batch_weight = tf.feature_column.input_layer(features, params['weight_columns'])
  inputs, embedding_table = build_input(features, params)
  hidden_units = params['hidden_units']
  linear_parent_scope = 'linear'
  dnn_parent_scope = 'dnn'
  is_dynamic = params['dynamic']
  print("is_dynamic:", is_dynamic)
  reg = 1e-4
  if params['model'] == 'linear':
    with tf.variable_scope(linear_parent_scope, values=tuple(six.itervalues(features)), reuse=tf.AUTO_REUSE):
      with tf.variable_scope('linear_ctr'):
        ctr_logit_fn = linear._linear_logit_fn_builder(1, params['linear_columns'])
        ctr_logits = ctr_logit_fn(features=features)
      with tf.variable_scope('linear_cvr'):
        cvr_logit_fn = linear._linear_logit_fn_builder(1, params['linear_columns'])
        cvr_logits = cvr_logit_fn(features=features)
  if params['model'] == 'dnn':
    with tf.variable_scope(dnn_parent_scope):
      with tf.variable_scope('dnn_ctr'):
        ctr_logits = build_deep_layers(inputs, hidden_units, mode, params['ctr_reg'])
        #ctr_logit_fn = dnn._dnn_logit_fn_builder(1, hidden_units, params['dnn_columns'], tf.nn.relu, None, None, True)
        #ctr_logits = ctr_logit_fn(features=features, mode=mode)
      with tf.variable_scope('dnn_cvr'):
        cvr_logits = build_deep_layers(inputs, hidden_units, mode, params['cvr_reg'])
        #cvr_logit_fn = dnn._dnn_logit_fn_builder(1, hidden_units, params['dnn_columns'], tf.nn.relu, None, None, True)
        #cvr_logits = cvr_logit_fn(features=features, mode=mode)
  ctr_preds = tf.nn.sigmoid(ctr_logits)
  cvr_preds = tf.nn.sigmoid(cvr_logits)
  ctcvr_preds = tf.stop_gradient(ctr_preds) * cvr_preds
  #ctcvr_preds = ctr_preds * cvr_preds
  tf.summary.histogram("esmm/ctr_preds", ctr_preds) 
  tf.summary.histogram("esmm/ctcvr_preds", ctcvr_preds)
  if mode == tf.estimator.ModeKeys.PREDICT:
    #redundant_items = ctr_preds
    predictions = {
      'prob': tf.concat([ctcvr_preds, ctr_preds], 1)
    }
    export_outputs = {
      tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: tf.estimator.export.PredictOutput(predictions)  #线上预测需要的
    }
    return tf.estimator.EstimatorSpec(mode, predictions=predictions, export_outputs=export_outputs)

  else:
    #for variable in tf.trainable_variables('fm'):
    #  print(variable_name)
    #print(tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='fm'))
    #print(tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=dnn_parent_scope))
    #shared_weights = tf.trainable_variables(dnn_parent_scope + '/SharedLayer/kernel')[0]
    linear_weights = list(embedding_table.get_linear_weights().values())
    embed_weights = list(embedding_table.get_embed_weights().values())
    shared_weights = {'linear':linear_weights, 'embed':embed_weights}
    ctr_labels = labels['ctr']
    ctcvr_labels = labels['ctcvr']
    linear_optimizer = tf.train.FtrlOptimizer(0.01, l1_regularization_strength=0.01, l2_regularization_strength=0.001)
    dnn_optimizer = optimizers.get_optimizer_instance('Adam', params['learning_rate'])
    loss_optimizer = optimizers.get_optimizer_instance('Adam', 0.001)
    ctr_auc = tf.metrics.auc(labels=ctr_labels, predictions=ctr_preds)
    ctcvr_auc = tf.metrics.auc(labels=ctcvr_labels, predictions=ctcvr_preds)
    ctr_precision, ctr_precision_update_op = tf.metrics.precision(labels=ctr_labels, predictions=ctr_preds)
    ctr_recall, ctr_recall_update_op = tf.metrics.recall(labels=ctr_labels, predictions=ctr_preds)
    ctr_loss = tf.losses.log_loss(ctr_labels, ctr_preds, reduction=tf.losses.Reduction.SUM_OVER_BATCH_SIZE, weights=batch_weight)
    ctcvr_loss = tf.losses.log_loss(ctcvr_labels, ctcvr_preds, reduction=tf.losses.Reduction.SUM_OVER_BATCH_SIZE)
    reg_loss = tf.reduce_sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))

    weight_loss, update_list, w_list, loss_gradnorm = get_weight_loss([ctr_loss, ctcvr_loss], is_dynamic, shared_weights)
    print("get_weight_loss:", weight_loss, update_list)
    loss = tf.add_n(weight_loss + [reg_loss])
    tf.summary.scalar("esmm/ctr_loss", tf.reduce_sum(ctr_loss))
    tf.summary.scalar("esmm/ctcvr_loss", tf.reduce_sum(ctcvr_loss))
    tf.summary.scalar("esmm/loss", tf.reduce_sum(loss))
    def _train_op_fn(loss):
      train_ops = []
      global_step = tf.train.get_global_step()
      if params['model'] in ('dnn'):
        var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='fm') + tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=dnn_parent_scope)
        train_ops.append(
          dnn_optimizer.minimize(
            loss,
            var_list=var_list))
      if params['model'] in ('linear'):
        train_ops.append(
          linear_optimizer.minimize(
              loss,
              var_list=tf.get_collection(
                  tf.GraphKeys.TRAINABLE_VARIABLES,
                  scope=linear_parent_scope)))
      if w_list and update_list and loss_gradnorm:
        train_ops.append(
            loss_optimizer.minimize(
                loss_gradnorm,
                var_list=w_list))
        train_ops.append(update_list)
      train_op = tf.group(*train_ops)
      with tf.control_dependencies([train_op]):
        return distribute_lib.increment_var(global_step)
    metrics = {'ctr_auc': ctr_auc, 'ctcvr_auc': ctcvr_auc, 'ctr_precision':(ctr_precision, ctr_precision_update_op), 'ctr_recall':(ctr_recall, ctr_recall_update_op)}
    train_op = _train_op_fn(loss)
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    if update_ops:
      train_op = tf.group(train_op, *update_ops)
    return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op, eval_metric_ops=metrics)
def _dnn_linear_combined_model_fn(
    features, labels, mode, head, num_workers, opt,
    linear_feature_columns=None,
    dnn_feature_columns=None, dnn_hidden_units=None,
    dnn_activation_fn=nn.relu, dnn_dropout=None,
    input_layer_partitioner=None, config=None):

  num_ps_replicas = config.num_ps_replicas if config else 0
  input_layer_partitioner = input_layer_partitioner or (
      partitioned_variables.min_max_variable_partitioner(
          max_partitions=num_ps_replicas,
          min_slice_size=64 << 20))

  # combined_optimizer = CombinedOptimizer(linear_feature_columns)
  # sync_optimizer = tf.train.SyncReplicasOptimizer(combined_optimizer, replicas_to_aggregate=num_workers, total_num_replicas=num_workers)

  dnn_parent_scope = 'dnn'
  linear_parent_scope = 'linear'

  dnn_partitioner = (
      partitioned_variables.min_max_variable_partitioner(
          max_partitions=num_ps_replicas))
  with variable_scope.variable_scope(
      dnn_parent_scope,
      values=tuple(six.itervalues(features)),
      partitioner=dnn_partitioner):

    dnn_logit_fn = dnn._dnn_logit_fn_builder(
        units=head.logits_dimension,
        hidden_units=dnn_hidden_units,
        feature_columns=dnn_feature_columns,
        activation_fn=dnn_activation_fn,
        dropout=dnn_dropout,
        input_layer_partitioner=input_layer_partitioner)
    dnn_logits = dnn_logit_fn(features=features, mode=mode)

  with variable_scope.variable_scope(
      linear_parent_scope,
      values=tuple(six.itervalues(features)),
      partitioner=input_layer_partitioner) as scope:
    logit_fn = linear._linear_logit_fn_builder(
        units=head.logits_dimension,
        feature_columns=linear_feature_columns)
    linear_logits = logit_fn(features=features)

  logits = dnn_logits + linear_logits


  def _train_op_fn(loss):
    """Returns the op to optimize the loss."""
    global_step = training_util.get_global_step()

    pairs = opt.compute_gradients(loss)
    train_op = opt.apply_gradients(pairs, global_step)

    # train_op = control_flow_ops.group(*train_ops)
    with ops.control_dependencies([train_op]):
      with ops.colocate_with(global_step):
        return state_ops.assign_add(global_step, 1)

  return head.create_estimator_spec(
      features=features,
      mode=mode,
      labels=labels,
      train_op_fn=_train_op_fn,
      logits=logits)
Example #6
0
def esmm_model_fn(features, labels, mode, params):
  batch_weight = tf.feature_column.input_layer(features, params['weight_columns'])
  inputs, shared_weights = build_input(features, params)
  hidden_units = params['hidden_units']
  linear_parent_scope = 'linear'
  dnn_parent_scope = 'dnn'
  is_dynamic = params['dynamic']
  print("is_dynamic:", is_dynamic)
  reg = 1e-4
  if params['model'] == 'linear':
    with tf.variable_scope(linear_parent_scope, values=tuple(six.itervalues(features)), reuse=tf.AUTO_REUSE):
      with tf.variable_scope('linear_ctr'):
        ctr_logit_fn = linear._linear_logit_fn_builder(1, params['linear_columns'])
        ctr_logits = ctr_logit_fn(features=features)
      with tf.variable_scope('linear_cvr'):
        cvr_logit_fn = linear._linear_logit_fn_builder(1, params['linear_columns'])
        cvr_logits = cvr_logit_fn(features=features)
  if params['model'] == 'dnn':
    with tf.variable_scope(dnn_parent_scope):
      with tf.variable_scope('dnn_ctr'):
        ctr_logits = build_deep_layers(inputs, hidden_units, mode, params['ctr_reg'])
        #ctr_logit_fn = dnn._dnn_logit_fn_builder(1, hidden_units, params['dnn_columns'], tf.nn.relu, None, None, True)
        #ctr_logits = ctr_logit_fn(features=features, mode=mode)
      with tf.variable_scope('dnn_cvr'):
        cvr_logits = build_deep_layers(inputs, hidden_units, mode, params['cvr_reg'])
        #cvr_logit_fn = dnn._dnn_logit_fn_builder(1, hidden_units, params['dnn_columns'], tf.nn.relu, None, None, True)
        #cvr_logits = cvr_logit_fn(features=features, mode=mode)
  logits = {'ctr': ctr_logits, 'ctcvr': ctr_logits*cvr_logits}
  ctr_preds = tf.nn.sigmoid(ctr_logits)
  cvr_preds = tf.nn.sigmoid(cvr_logits)
  #ctcvr_preds = tf.stop_gradient(ctr_preds) * cvr_preds
  ctcvr_preds = ctr_preds * cvr_preds
  tf.summary.histogram("esmm/ctr_preds", ctr_preds) 
  tf.summary.histogram("esmm/ctcvr_preds", ctcvr_preds)
  if mode == tf.estimator.ModeKeys.PREDICT:
    #redundant_items = ctr_preds
    predictions = {
      'prob': tf.concat([ctcvr_preds, ctr_preds], 1)
    }
    export_outputs = {
      tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: tf.estimator.export.PredictOutput(predictions)  #线上预测需要的
    }
    return tf.estimator.EstimatorSpec(mode, predictions=predictions, export_outputs=export_outputs)

  else:
    #shared_weights = tf.trainable_variables(dnn_parent_scope + '/SharedLayer/kernel')[0]
    ctr_labels = labels['ctr']
    ctcvr_labels = labels['ctcvr']
    linear_optimizer = tf.train.FtrlOptimizer(0.01, l1_regularization_strength=0.001, l2_regularization_strength=0.001)
    dnn_optimizer = optimizers.get_optimizer_instance('Adam', params['learning_rate'])
    loss_optimizer = optimizers.get_optimizer_instance('Adam', 0.001)
    ctr_loss = tf.losses.log_loss(ctr_labels, ctr_preds, reduction=tf.losses.Reduction.SUM_OVER_BATCH_SIZE, weights=batch_weight)
    ctcvr_loss = tf.losses.log_loss(ctcvr_labels, ctcvr_preds, reduction=tf.losses.Reduction.SUM_OVER_BATCH_SIZE)
    #reg_loss = tf.reduce_sum(ops.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
    ctr_auc = tf.metrics.auc(labels=ctr_labels, predictions=ctr_preds, weights=batch_weight)
    ctcvr_auc = tf.metrics.auc(labels=ctcvr_labels, predictions=ctcvr_preds)
    mask = tf.map_fn(lambda x:tf.cond(tf.equal(x, 1), lambda: True, lambda: False), tf.squeeze(labels['ctr']), dtype=tf.bool)
    cvr_preds = tf.boolean_mask(cvr_preds, mask)
    cvr_labels = tf.boolean_mask(labels['ctcvr'], mask)
    cvr_auc = tf.metrics.auc(labels=cvr_labels, predictions=cvr_preds)
    cvr_loss = tf.losses.log_loss(cvr_labels, cvr_preds, reduction=tf.losses.Reduction.SUM_OVER_NONZERO_WEIGHTS)
    tf.summary.scalar("cvr_auc", cvr_auc[1])
    tf.summary.scalar("cvr_loss", cvr_loss)
    tf.summary.scalar('ctr_loss', ctr_loss)
    tf.summary.scalar('ctcvr_loss', ctcvr_loss)
    tf.summary.scalar('ctr_auc', ctr_auc[1])
    tf.summary.scalar('ctcvr_auc', ctcvr_auc[1])
    loss = tf.add_n([ctr_loss, ctcvr_loss])
    #weight_loss, update_list, w_list, loss_gradnorm = get_weight_loss([ctr_loss, ctcvr_loss], is_dynamic, shared_weights)
    #print("get_weight_loss:", weight_loss, update_list)
    def _train_op_fn(loss):
      train_ops = []
      global_step = tf.train.get_global_step()
      if params['model'] in ('dnn'):
        fm_var_list = ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES, scope='fm') 
        dnn_var_list = ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES, scope=dnn_parent_scope) + ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES, scope='dnn_embed')
        train_ops.append(
          dnn_optimizer.minimize(
            loss,
            var_list=dnn_var_list))
        train_ops.append(
          linear_optimizer.minimize(
            loss,
            var_list=fm_var_list))
      if params['model'] in ('linear'):
        train_ops.append(
          linear_optimizer.minimize(
              loss,
              var_list=tf.get_collection(
                  tf.GraphKeys.TRAINABLE_VARIABLES,
                  scope=linear_parent_scope)))
      '''
      if w_list is not None and update_list is not None and loss_gradnorm is not None:
        train_ops.append(
            loss_optimizer.minimize(
                loss_gradnorm,
                var_list=w_list))
        train_ops.append(update_list)
      '''
      train_op = control_flow_ops.group(*train_ops)
      with ops.control_dependencies([train_op]):
        return state_ops.assign_add(global_step, 1).op
    hooks = tf.train.LoggingTensorHook({'ctr_loss':ctr_loss, 'ctcvr_loss':ctcvr_loss, 'cvr_loss':cvr_loss}, every_n_iter=10000)
    train_op = _train_op_fn(loss)
    train_op = head_v1._append_update_ops(train_op)
    metrics = {'ctr_auc': ctr_auc, 'ctcvr_auc': ctcvr_auc, 'cvr_auc': cvr_auc}
    #return _TPUEstimatorSpec(mode, loss=loss, train_op=train_op).as_estimator_spec()
    return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op, eval_metric_ops=metrics)
def _base_model(features=None,
                parent_scope_name=None,
                mode=None,
                linear_feature_columns=None,
                linear_optimizer='Ftrl',
                dnn_feature_columns=None,
                dnn_optimizer='Adagrad',
                dnn_hidden_units=None,
                dnn_activation_fn=nn.relu,
                dnn_dropout=None,
                input_layer_partitioner=None,
                config=None):
    if not isinstance(features, dict):
        raise ValueError('features should be a dictionary of `Tensor`s. '
                         'Given type: {}'.format(type(features)))
    if not linear_feature_columns and not dnn_feature_columns:
        raise ValueError(
            'Either linear_feature_columns or dnn_feature_columns must be defined.'
        )

    num_ps_replicas = config.num_ps_replicas if config else 0
    input_layer_partitioner = input_layer_partitioner or (
        partitioned_variables.min_max_variable_partitioner(
            max_partitions=num_ps_replicas, min_slice_size=64 << 20))

    # Build DNN Logits.
    dnn_parent_scope = 'dnn'

    if not dnn_feature_columns:
        dnn_logits = None
    else:
        dnn_optimizer = optimizers.get_optimizer_instance(
            dnn_optimizer, learning_rate=_DNN_LEARNING_RATE)
        _check_no_sync_replicas_optimizer(dnn_optimizer)
        if not dnn_hidden_units:
            raise ValueError(
                'dnn_hidden_units must be defined when dnn_feature_columns is '
                'specified.')
        dnn_partitioner = (partitioned_variables.min_max_variable_partitioner(
            max_partitions=num_ps_replicas))
        with variable_scope.variable_scope(dnn_parent_scope,
                                           values=tuple(
                                               six.itervalues(features)),
                                           partitioner=dnn_partitioner):

            dnn_logit_fn = dnn._dnn_logit_fn_builder(  # pylint: disable=protected-access
                units=1,
                hidden_units=dnn_hidden_units,
                feature_columns=dnn_feature_columns,
                activation_fn=dnn_activation_fn,
                dropout=dnn_dropout,
                input_layer_partitioner=None)
            dnn_logits = dnn_logit_fn(features=features, mode=mode)

    linear_parent_scope = 'linear'

    if not linear_feature_columns:
        linear_logits = None
    else:
        linear_optimizer = optimizers.get_optimizer_instance(
            linear_optimizer,
            learning_rate=_linear_learning_rate(len(linear_feature_columns)))
        _check_no_sync_replicas_optimizer(linear_optimizer)
        with variable_scope.variable_scope(
                linear_parent_scope,
                values=tuple(six.itervalues(features)),
                partitioner=input_layer_partitioner) as scope:
            logit_fn = linear._linear_logit_fn_builder(  # pylint: disable=protected-access
                units=1,
                feature_columns=linear_feature_columns)
            linear_logits = logit_fn(features=features)
            _add_layer_summary(linear_logits, scope.name)

    # Combine logits
    if dnn_logits is not None and linear_logits is not None:
        logits = dnn_logits + linear_logits
    elif dnn_logits is not None:
        logits = dnn_logits
    else:
        logits = linear_logits

    def _train_op_fn(loss):
        """Returns the op to optimize the loss."""
        train_ops = []
        if dnn_logits is not None:
            train_ops.append(
                dnn_optimizer.minimize(
                    loss,
                    var_list=ops.get_collection(
                        ops.GraphKeys.TRAINABLE_VARIABLES,
                        scope=parent_scope_name + '/' + dnn_parent_scope)))
        if linear_logits is not None:
            train_ops.append(
                linear_optimizer.minimize(
                    loss,
                    var_list=ops.get_collection(
                        ops.GraphKeys.TRAINABLE_VARIABLES,
                        scope=parent_scope_name + '/' + linear_parent_scope)))

        train_op = control_flow_ops.group(*train_ops)
        with ops.control_dependencies([train_op]):
            return tf.no_op()

    return logits, _train_op_fn