def model_fn(features, labels, mode): """BaselineModel model_fn. Args: features: `Tensor` or `dict` of `Tensor`. labels: A `dict` of `Tensor` Objects. Expects to have a key/value pair for the key self.label_column_name. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. Currently PREDICT mode is not implemented. Returns: An instance of `tf.estimator.EstimatorSpec', which encapsulates the `mode`, `predictions`, `loss` and the `train_op`. Note that here `predictions` is either a `Tensor` or a `dict` of `Tensor` objects, representing the prediction of the bianry classification model. 'loss` is a scalar containing the loss of the step and `train_op` is the op for training. """ # Instantiates a tensor with true class labels class_labels = labels[self._label_column_name] tf.logging.info('model_fn for mode: {}'.format(mode)) with tf.name_scope('model'): input_layer = tf.feature_column.input_layer(features, self._feature_columns) layer = input_layer for unit in self._hidden_units: layer = tf.layers.Dense(unit, activation=self._activation)(layer) logits = tf.layers.Dense(1)(layer) sigmoid_output = tf.nn.sigmoid(logits, name='sigmoid') class_predictions = tf.cast(tf.greater(sigmoid_output, 0.5), tf.float32) tf.summary.histogram('class_predictions', class_predictions) # Initializes Loss Functions loss = self._loss(class_labels, logits) # Sets up dictionaries used for computing performance metrics predictions = { (self._label_column_name, 'class_ids'): tf.reshape(class_predictions, [-1]), (self._label_column_name, 'logistic'): tf.reshape(sigmoid_output, [-1]) } class_id_kwargs = { 'labels': class_labels, 'predictions': class_predictions } logistics_kwargs = {'labels': class_labels, 'predictions': sigmoid_output} # EVAL Mode if mode == tf.estimator.ModeKeys.EVAL: with tf.name_scope('eval_metrics'): eval_metric_ops = { 'accuracy': tf.metrics.accuracy(**class_id_kwargs), 'precision': tf.metrics.precision(**class_id_kwargs), 'recall': tf.metrics.recall(**class_id_kwargs), 'fp': tf.metrics.false_positives(**class_id_kwargs), 'fn': tf.metrics.false_negatives(**class_id_kwargs), 'tp': tf.metrics.true_positives(**class_id_kwargs), 'tn': tf.metrics.true_negatives(**class_id_kwargs), 'fpr': contrib_metrics.streaming_false_positive_rate(**class_id_kwargs), # pylint: disable=line-too-long 'fnr': contrib_metrics.streaming_false_negative_rate(**class_id_kwargs), # pylint: disable=line-too-long 'auc': tf.metrics.auc(curve='ROC', **logistics_kwargs), 'aucpr': tf.metrics.auc(curve='PR', **logistics_kwargs) } # EstimatorSpec object for evaluation estimator_spec = tf.estimator.EstimatorSpec( mode=mode, predictions=predictions, loss=loss, eval_metric_ops=eval_metric_ops) # TRAIN Mode if mode == tf.estimator.ModeKeys.TRAIN: train_op_primary = contrib_layers.optimize_loss( loss=loss, learning_rate=self._learning_rate, global_step=contrib_framework.get_global_step(), optimizer=self._optimizer) estimator_spec = tf.estimator.EstimatorSpec( mode=mode, predictions=predictions, loss=loss, train_op=train_op_primary) return estimator_spec
def model_fn(features, labels, mode): """BaselineModel model_fn. Args: features: `Tensor` or `dict` of `Tensor`. labels: A `dict` of `Tensor` Objects. Expects to have a key/value pair for the key self.label_column_name, "IPS_example_weights_with_label", and "IPS_example_weights_without_label". IPS stands for inverse propensity score, wherein each example is assigned a weight inversely proportionate their propensity of appearing in training distribution. Concretely, ips-weight = 1/p(x), where p(x) is the probability of x in training distribution. In "IPS_without_label", each example is given a weight as the inverse propensity score of their subgroup. For example, 1/p("Black Female"). In "IPS_with_label", each example is assigned a weight as the inverse propensity score of their subgroup and class membership. For example, 1/p("Black Female", "class 0")). mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. Currently PREDICT mode is not implemented. Returns: An instance of `tf.estimator.EstimatorSpec', which encapsulates the `mode`, `predictions`, `loss` and the `train_op`. Note that here `predictions` is either a `Tensor` or a `dict` of `Tensor` objects, representing the prediction of the bianry classification model. 'loss` is a scalar containing the loss of the step and `train_op` is the op for training. """ # Instantiates a tensor with true class labels class_labels = labels[self._label_column_name] ips_example_weights_with_label = labels[ IPS_WITH_LABEL_TARGET_COLUMN_NAME] ips_example_weights_without_label = labels[ IPS_WITHOUT_LABEL_TARGET_COLUMN_NAME] tf.logging.info('model_fn for mode: {}'.format(mode)) with tf.name_scope('model'): input_layer = tf.feature_column.input_layer( features, self._feature_columns) layer = input_layer for unit in self._hidden_units: layer = tf.layers.Dense(unit, activation=self._activation)(layer) logits = tf.layers.Dense(1)(layer) sigmoid_output = tf.nn.sigmoid(logits, name='sigmoid') class_predictions = tf.cast(tf.greater(sigmoid_output, 0.5), tf.float32) # pylint: disable=line-too-long tf.summary.histogram('class_predictions', class_predictions) if self._reweighting_type == 'IPS_with_label': example_weights = ips_example_weights_with_label elif self._reweighting_type == 'IPS_without_label': example_weights = ips_example_weights_without_label # Initializes Loss Functions loss = self._loss(class_labels, logits, example_weights) # Sets up dictionaries used for computing performance metrics predictions = { (self._label_column_name, 'class_ids'): tf.reshape(class_predictions, [-1]), (self._label_column_name, 'logistic'): tf.reshape(sigmoid_output, [-1]) } class_id_kwargs = { 'labels': class_labels, 'predictions': class_predictions } logistics_kwargs = { 'labels': class_labels, 'predictions': sigmoid_output } # EVAL Mode if mode == tf_estimator.ModeKeys.EVAL: with tf.name_scope('eval_metrics'): eval_metric_ops = { 'accuracy': tf.metrics.accuracy(**class_id_kwargs), 'precision': tf.metrics.precision(**class_id_kwargs), 'recall': tf.metrics.recall(**class_id_kwargs), 'fp': tf.metrics.false_positives(**class_id_kwargs), 'fn': tf.metrics.false_negatives(**class_id_kwargs), 'tp': tf.metrics.true_positives(**class_id_kwargs), 'tn': tf.metrics.true_negatives(**class_id_kwargs), 'fpr': contrib_metrics.streaming_false_positive_rate( **class_id_kwargs), # pylint: disable=line-too-long 'fnr': contrib_metrics.streaming_false_negative_rate( **class_id_kwargs), # pylint: disable=line-too-long 'auc': tf.metrics.auc(curve='ROC', **logistics_kwargs), 'aucpr': tf.metrics.auc(curve='PR', **logistics_kwargs) } # EstimatorSpec object for evaluation estimator_spec = tf_estimator.EstimatorSpec( mode=mode, predictions=predictions, loss=loss, eval_metric_ops=eval_metric_ops) # TRAIN Mode if mode == tf_estimator.ModeKeys.TRAIN: train_op_primary = contrib_layers.optimize_loss( loss=loss, learning_rate=self._learning_rate, global_step=contrib_framework.get_global_step(), optimizer=self._optimizer) estimator_spec = tf_estimator.EstimatorSpec( mode=mode, predictions=predictions, loss=loss, train_op=train_op_primary) return estimator_spec
def model_fn(features, labels, mode): """robustModel model_fn. Args: features: `dict` of `Tensor`. labels: A `dict` of `Tensor` Objects. Expects to have a key/value pair for the key self.label_column_name. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. Currently PREDICT mode is not implemented. Returns: An instance of `tf.estimator.EstimatorSpec', which encapsulates the `mode`, `predictions`, `loss` and the `train_op`. Note that here `predictions` is either a `Tensor` or a `dict` of `Tensor` objects, representing the prediction of the bianry classification model. 'loss` is a scalar containing the loss of the step and `train_op` is the op for training. Raises: ValueError: if protected_column_names not in feature_columns """ for col in self._protected_column_names: if col not in features.keys(): raise ValueError( 'Protected column <{}> should be in features.'.format( col)) # Instantiates a tensor with true class labels class_labels = labels[self._label_column_name] # Initialize a global step variable used for alternate training current_step = self._get_or_create_global_step_var() tf.logging.info('model_fn for mode: {}'.format(mode)) with tf.name_scope('primary_NN'): with tf.variable_scope('primary'): input_layer = tf.feature_column.input_layer( features, self._feature_columns) layer = input_layer for unit in self._primary_hidden_units: layer = tf.layers.Dense( unit, activation=self._activation)(layer) logits = tf.layers.Dense(1)(layer) sigmoid_output = tf.nn.sigmoid(logits, name='sigmoid') class_predictions = tf.cast(tf.greater(sigmoid_output, 0.5), tf.float32) # pylint: disable=line-too-long tf.summary.histogram('class_predictions', class_predictions) with tf.name_scope('adversary_NN'): with tf.variable_scope('adversary'): # Filters and keeps only protected features and feature columns. adversarial_features, adversary_feature_columns = self._get_adversary_features_and_feature_columns(features) # pylint: disable=line-too-long adv_input_layer = tf.feature_column.input_layer( adversarial_features, adversary_feature_columns) adv_layer = adv_input_layer for adv_unit in self._adversary_hidden_units: adv_layer = tf.layers.Dense(adv_unit)(adv_layer) adv_output_layer = tf.layers.Dense( 1, use_bias=True)(adv_layer) example_weights = tf.cond( tf.greater(current_step, self._pretrain_steps), true_fn=lambda: self._compute_example_weights( adv_output_layer), false_fn=lambda: tf.ones_like(class_labels)) # Initializes Loss Functions primary_loss = self._primary_loss(class_labels, logits, example_weights) adversary_loss = self._adversary_loss(class_labels, logits, example_weights) # Sets up dictionaries used for computing performance metrics predictions = { (self._label_column_name, 'class_ids'): tf.reshape(class_predictions, [-1]), (self._label_column_name, 'logistic'): tf.reshape(sigmoid_output, [-1]), ('example_weights'): tf.reshape(example_weights, [-1]) } class_id_kwargs = { 'labels': class_labels, 'predictions': class_predictions } logistics_kwargs = { 'labels': class_labels, 'predictions': sigmoid_output } # EVAL Mode if mode == tf.estimator.ModeKeys.EVAL: with tf.name_scope('eval_metrics'): eval_metric_ops = { 'accuracy': tf.metrics.accuracy(**class_id_kwargs), 'precision': tf.metrics.precision(**class_id_kwargs), 'recall': tf.metrics.recall(**class_id_kwargs), 'fp': tf.metrics.false_positives(**class_id_kwargs), 'fn': tf.metrics.false_negatives(**class_id_kwargs), 'tp': tf.metrics.true_positives(**class_id_kwargs), 'tn': tf.metrics.true_negatives(**class_id_kwargs), 'fpr': contrib_metrics.streaming_false_positive_rate( **class_id_kwargs), # pylint: disable=line-too-long 'fnr': contrib_metrics.streaming_false_negative_rate( **class_id_kwargs), # pylint: disable=line-too-long 'auc': tf.metrics.auc(curve='ROC', **logistics_kwargs), 'aucpr': tf.metrics.auc(curve='PR', **logistics_kwargs) } # EstimatorSpec object for evaluation estimator_spec = tf.estimator.EstimatorSpec( mode=mode, predictions=predictions, loss=primary_loss, eval_metric_ops=eval_metric_ops) # TRAIN Mode if mode == tf.estimator.ModeKeys.TRAIN: # Filters trainable variables for each task all_trainable_vars = tf.trainable_variables() primary_trainable_vars = [ v for v in all_trainable_vars if 'primary' in v.op.name ] adversary_trainable_vars = [ v for v in all_trainable_vars if 'adversary' in v.op.name ] # TRAIN_OP for adversary DNN train_op_adversary = contrib_layers.optimize_loss( loss=adversary_loss, variables=adversary_trainable_vars, global_step=contrib_framework.get_global_step(), learning_rate=self._adversary_learning_rate, optimizer=self._optimizer) # TRAIN_OP for primary DNN train_op_primary = contrib_layers.optimize_loss( loss=primary_loss, variables=primary_trainable_vars, global_step=contrib_framework.get_global_step(), learning_rate=self._primary_learning_rate, optimizer=self._optimizer) # Upto ``pretrain_steps'' trains primary only. # Beyond ``pretrain_steps'' alternates between primary and adversary. estimator_spec = tf.estimator.EstimatorSpec( mode=mode, predictions=predictions, loss=primary_loss + adversary_loss, train_op=tf.cond( tf.greater(current_step, self._pretrain_steps), true_fn=lambda: tf.group( [train_op_primary, train_op_adversary]), # pylint: disable=line-too-long false_fn=lambda: tf.group([train_op_primary]))) return estimator_spec