Exemple #1
0
    def model_fn(features, labels, mode):
      """BaselineModel model_fn.

      Args:
        features: `Tensor` or `dict` of `Tensor`.
        labels: A `dict` of `Tensor` Objects. Expects to have a key/value pair
          for the key self.label_column_name.
        mode: Defines whether this is training, evaluation or prediction. See
          `ModeKeys`. Currently PREDICT mode is not implemented.

      Returns:
        An instance of `tf.estimator.EstimatorSpec', which encapsulates the
        `mode`, `predictions`, `loss` and the `train_op`. Note that here
        `predictions` is either a `Tensor` or a `dict` of `Tensor` objects,
        representing the prediction of the bianry classification model.
        'loss` is a scalar containing the loss of the step and `train_op` is the
        op for training.
      """

      # Instantiates a tensor with true class labels
      class_labels = labels[self._label_column_name]

      tf.logging.info('model_fn for mode: {}'.format(mode))

      with tf.name_scope('model'):
        input_layer = tf.feature_column.input_layer(features,
                                                    self._feature_columns)
        layer = input_layer
        for unit in self._hidden_units:
          layer = tf.layers.Dense(unit, activation=self._activation)(layer)
        logits = tf.layers.Dense(1)(layer)
        sigmoid_output = tf.nn.sigmoid(logits, name='sigmoid')
        class_predictions = tf.cast(tf.greater(sigmoid_output, 0.5), tf.float32)
        tf.summary.histogram('class_predictions', class_predictions)

      # Initializes Loss Functions
      loss = self._loss(class_labels, logits)
      # Sets up dictionaries used for computing performance metrics
      predictions = {
          (self._label_column_name, 'class_ids'):
              tf.reshape(class_predictions, [-1]),
          (self._label_column_name, 'logistic'):
              tf.reshape(sigmoid_output, [-1])
      }

      class_id_kwargs = {
          'labels': class_labels,
          'predictions': class_predictions
      }
      logistics_kwargs = {'labels': class_labels, 'predictions': sigmoid_output}

      # EVAL Mode
      if mode == tf.estimator.ModeKeys.EVAL:
        with tf.name_scope('eval_metrics'):
          eval_metric_ops = {
              'accuracy': tf.metrics.accuracy(**class_id_kwargs),
              'precision': tf.metrics.precision(**class_id_kwargs),
              'recall': tf.metrics.recall(**class_id_kwargs),
              'fp': tf.metrics.false_positives(**class_id_kwargs),
              'fn': tf.metrics.false_negatives(**class_id_kwargs),
              'tp': tf.metrics.true_positives(**class_id_kwargs),
              'tn': tf.metrics.true_negatives(**class_id_kwargs),
              'fpr': contrib_metrics.streaming_false_positive_rate(**class_id_kwargs),  # pylint: disable=line-too-long
              'fnr': contrib_metrics.streaming_false_negative_rate(**class_id_kwargs),  # pylint: disable=line-too-long
              'auc': tf.metrics.auc(curve='ROC', **logistics_kwargs),
              'aucpr': tf.metrics.auc(curve='PR', **logistics_kwargs)
          }

          # EstimatorSpec object for evaluation
          estimator_spec = tf.estimator.EstimatorSpec(
              mode=mode,
              predictions=predictions,
              loss=loss,
              eval_metric_ops=eval_metric_ops)

      # TRAIN Mode
      if mode == tf.estimator.ModeKeys.TRAIN:
        train_op_primary = contrib_layers.optimize_loss(
            loss=loss,
            learning_rate=self._learning_rate,
            global_step=contrib_framework.get_global_step(),
            optimizer=self._optimizer)

        estimator_spec = tf.estimator.EstimatorSpec(
            mode=mode,
            predictions=predictions,
            loss=loss,
            train_op=train_op_primary)

      return estimator_spec
        def model_fn(features, labels, mode):
            """BaselineModel model_fn.

      Args:
        features: `Tensor` or `dict` of `Tensor`.
        labels: A `dict` of `Tensor` Objects. Expects to have a key/value pair
          for the key self.label_column_name, "IPS_example_weights_with_label",
          and "IPS_example_weights_without_label".
          IPS stands for inverse propensity score, wherein each example is
          assigned a weight inversely proportionate their propensity of
          appearing in training distribution. Concretely, ips-weight = 1/p(x),
          where p(x) is the probability of x in training distribution.
          In "IPS_without_label", each example is given a weight as the inverse
          propensity score of their subgroup. For example, 1/p("Black Female").
          In "IPS_with_label", each example is assigned a weight as the inverse
          propensity score of their subgroup and class membership. For example,
          1/p("Black Female", "class 0")).
        mode: Defines whether this is training, evaluation or prediction. See
          `ModeKeys`. Currently PREDICT mode is not implemented.

      Returns:
        An instance of `tf.estimator.EstimatorSpec', which encapsulates the
        `mode`, `predictions`, `loss` and the `train_op`. Note that here
        `predictions` is either a `Tensor` or a `dict` of `Tensor` objects,
        representing the prediction of the bianry classification model.
        'loss` is a scalar containing the loss of the step and `train_op` is the
        op for training.
      """

            # Instantiates a tensor with true class labels
            class_labels = labels[self._label_column_name]

            ips_example_weights_with_label = labels[
                IPS_WITH_LABEL_TARGET_COLUMN_NAME]
            ips_example_weights_without_label = labels[
                IPS_WITHOUT_LABEL_TARGET_COLUMN_NAME]

            tf.logging.info('model_fn for mode: {}'.format(mode))

            with tf.name_scope('model'):
                input_layer = tf.feature_column.input_layer(
                    features, self._feature_columns)
                layer = input_layer
                for unit in self._hidden_units:
                    layer = tf.layers.Dense(unit,
                                            activation=self._activation)(layer)
                logits = tf.layers.Dense(1)(layer)
                sigmoid_output = tf.nn.sigmoid(logits, name='sigmoid')
                class_predictions = tf.cast(tf.greater(sigmoid_output, 0.5), tf.float32)  # pylint: disable=line-too-long
                tf.summary.histogram('class_predictions', class_predictions)

            if self._reweighting_type == 'IPS_with_label':
                example_weights = ips_example_weights_with_label
            elif self._reweighting_type == 'IPS_without_label':
                example_weights = ips_example_weights_without_label

            # Initializes Loss Functions
            loss = self._loss(class_labels, logits, example_weights)

            # Sets up dictionaries used for computing performance metrics
            predictions = {
                (self._label_column_name, 'class_ids'):
                tf.reshape(class_predictions, [-1]),
                (self._label_column_name, 'logistic'):
                tf.reshape(sigmoid_output, [-1])
            }

            class_id_kwargs = {
                'labels': class_labels,
                'predictions': class_predictions
            }
            logistics_kwargs = {
                'labels': class_labels,
                'predictions': sigmoid_output
            }

            # EVAL Mode
            if mode == tf_estimator.ModeKeys.EVAL:
                with tf.name_scope('eval_metrics'):
                    eval_metric_ops = {
                        'accuracy':
                        tf.metrics.accuracy(**class_id_kwargs),
                        'precision':
                        tf.metrics.precision(**class_id_kwargs),
                        'recall':
                        tf.metrics.recall(**class_id_kwargs),
                        'fp':
                        tf.metrics.false_positives(**class_id_kwargs),
                        'fn':
                        tf.metrics.false_negatives(**class_id_kwargs),
                        'tp':
                        tf.metrics.true_positives(**class_id_kwargs),
                        'tn':
                        tf.metrics.true_negatives(**class_id_kwargs),
                        'fpr':
                        contrib_metrics.streaming_false_positive_rate(
                            **class_id_kwargs),  # pylint: disable=line-too-long
                        'fnr':
                        contrib_metrics.streaming_false_negative_rate(
                            **class_id_kwargs),  # pylint: disable=line-too-long
                        'auc':
                        tf.metrics.auc(curve='ROC', **logistics_kwargs),
                        'aucpr':
                        tf.metrics.auc(curve='PR', **logistics_kwargs)
                    }

                    # EstimatorSpec object for evaluation
                    estimator_spec = tf_estimator.EstimatorSpec(
                        mode=mode,
                        predictions=predictions,
                        loss=loss,
                        eval_metric_ops=eval_metric_ops)

            # TRAIN Mode
            if mode == tf_estimator.ModeKeys.TRAIN:
                train_op_primary = contrib_layers.optimize_loss(
                    loss=loss,
                    learning_rate=self._learning_rate,
                    global_step=contrib_framework.get_global_step(),
                    optimizer=self._optimizer)

                estimator_spec = tf_estimator.EstimatorSpec(
                    mode=mode,
                    predictions=predictions,
                    loss=loss,
                    train_op=train_op_primary)

            return estimator_spec
        def model_fn(features, labels, mode):
            """robustModel model_fn.

      Args:
        features: `dict` of `Tensor`.
        labels: A `dict` of `Tensor` Objects. Expects to have a key/value pair
          for the key self.label_column_name.
        mode: Defines whether this is training, evaluation or prediction. See
          `ModeKeys`. Currently PREDICT mode is not implemented.

      Returns:
        An instance of `tf.estimator.EstimatorSpec', which encapsulates the
        `mode`, `predictions`, `loss` and the `train_op`. Note that here
        `predictions` is either a `Tensor` or a `dict` of `Tensor` objects,
        representing the prediction of the bianry classification model.
        'loss` is a scalar containing the loss of the step and `train_op` is the
        op for training.

      Raises:
        ValueError: if protected_column_names not in feature_columns
      """
            for col in self._protected_column_names:
                if col not in features.keys():
                    raise ValueError(
                        'Protected column <{}> should be in features.'.format(
                            col))

            # Instantiates a tensor with true class labels
            class_labels = labels[self._label_column_name]

            # Initialize a global step variable used for alternate training
            current_step = self._get_or_create_global_step_var()

            tf.logging.info('model_fn for mode: {}'.format(mode))

            with tf.name_scope('primary_NN'):
                with tf.variable_scope('primary'):
                    input_layer = tf.feature_column.input_layer(
                        features, self._feature_columns)
                    layer = input_layer
                    for unit in self._primary_hidden_units:
                        layer = tf.layers.Dense(
                            unit, activation=self._activation)(layer)
                    logits = tf.layers.Dense(1)(layer)
                    sigmoid_output = tf.nn.sigmoid(logits, name='sigmoid')
                    class_predictions = tf.cast(tf.greater(sigmoid_output, 0.5), tf.float32)  # pylint: disable=line-too-long
                    tf.summary.histogram('class_predictions',
                                         class_predictions)

            with tf.name_scope('adversary_NN'):
                with tf.variable_scope('adversary'):
                    # Filters and keeps only protected features and feature columns.
                    adversarial_features, adversary_feature_columns = self._get_adversary_features_and_feature_columns(features)  # pylint: disable=line-too-long

                    adv_input_layer = tf.feature_column.input_layer(
                        adversarial_features, adversary_feature_columns)

                    adv_layer = adv_input_layer
                    for adv_unit in self._adversary_hidden_units:
                        adv_layer = tf.layers.Dense(adv_unit)(adv_layer)
                    adv_output_layer = tf.layers.Dense(
                        1, use_bias=True)(adv_layer)

                    example_weights = tf.cond(
                        tf.greater(current_step, self._pretrain_steps),
                        true_fn=lambda: self._compute_example_weights(
                            adv_output_layer),
                        false_fn=lambda: tf.ones_like(class_labels))

            # Initializes Loss Functions
            primary_loss = self._primary_loss(class_labels, logits,
                                              example_weights)
            adversary_loss = self._adversary_loss(class_labels, logits,
                                                  example_weights)

            # Sets up dictionaries used for computing performance metrics
            predictions = {
                (self._label_column_name, 'class_ids'):
                tf.reshape(class_predictions, [-1]),
                (self._label_column_name, 'logistic'):
                tf.reshape(sigmoid_output, [-1]),
                ('example_weights'):
                tf.reshape(example_weights, [-1])
            }

            class_id_kwargs = {
                'labels': class_labels,
                'predictions': class_predictions
            }
            logistics_kwargs = {
                'labels': class_labels,
                'predictions': sigmoid_output
            }

            # EVAL Mode
            if mode == tf.estimator.ModeKeys.EVAL:
                with tf.name_scope('eval_metrics'):
                    eval_metric_ops = {
                        'accuracy':
                        tf.metrics.accuracy(**class_id_kwargs),
                        'precision':
                        tf.metrics.precision(**class_id_kwargs),
                        'recall':
                        tf.metrics.recall(**class_id_kwargs),
                        'fp':
                        tf.metrics.false_positives(**class_id_kwargs),
                        'fn':
                        tf.metrics.false_negatives(**class_id_kwargs),
                        'tp':
                        tf.metrics.true_positives(**class_id_kwargs),
                        'tn':
                        tf.metrics.true_negatives(**class_id_kwargs),
                        'fpr':
                        contrib_metrics.streaming_false_positive_rate(
                            **class_id_kwargs),  # pylint: disable=line-too-long
                        'fnr':
                        contrib_metrics.streaming_false_negative_rate(
                            **class_id_kwargs),  # pylint: disable=line-too-long
                        'auc':
                        tf.metrics.auc(curve='ROC', **logistics_kwargs),
                        'aucpr':
                        tf.metrics.auc(curve='PR', **logistics_kwargs)
                    }

                    # EstimatorSpec object for evaluation
                    estimator_spec = tf.estimator.EstimatorSpec(
                        mode=mode,
                        predictions=predictions,
                        loss=primary_loss,
                        eval_metric_ops=eval_metric_ops)

            # TRAIN Mode
            if mode == tf.estimator.ModeKeys.TRAIN:
                # Filters trainable variables for each task
                all_trainable_vars = tf.trainable_variables()
                primary_trainable_vars = [
                    v for v in all_trainable_vars if 'primary' in v.op.name
                ]
                adversary_trainable_vars = [
                    v for v in all_trainable_vars if 'adversary' in v.op.name
                ]

                # TRAIN_OP for adversary DNN
                train_op_adversary = contrib_layers.optimize_loss(
                    loss=adversary_loss,
                    variables=adversary_trainable_vars,
                    global_step=contrib_framework.get_global_step(),
                    learning_rate=self._adversary_learning_rate,
                    optimizer=self._optimizer)

                # TRAIN_OP for primary DNN
                train_op_primary = contrib_layers.optimize_loss(
                    loss=primary_loss,
                    variables=primary_trainable_vars,
                    global_step=contrib_framework.get_global_step(),
                    learning_rate=self._primary_learning_rate,
                    optimizer=self._optimizer)

                # Upto ``pretrain_steps'' trains primary only.
                # Beyond ``pretrain_steps'' alternates between primary and adversary.
                estimator_spec = tf.estimator.EstimatorSpec(
                    mode=mode,
                    predictions=predictions,
                    loss=primary_loss + adversary_loss,
                    train_op=tf.cond(
                        tf.greater(current_step, self._pretrain_steps),
                        true_fn=lambda: tf.group(
                            [train_op_primary, train_op_adversary]),  # pylint: disable=line-too-long
                        false_fn=lambda: tf.group([train_op_primary])))

            return estimator_spec