Esempio n. 1
0
 def build_train_op(self, learning_rate, unconstrained=False):
     ctx = tfco.rate_context(self.predictions_tensor, self.labels_placeholder)
     positive_slice = ctx.subset(self.labels_placeholder > 0)
     overall_tpr = tfco.positive_prediction_rate(positive_slice)
     constraints = []
     if not unconstrained:
         for placeholder in self.protected_placeholders:
             slice_tpr = tfco.positive_prediction_rate(ctx.subset((placeholder > 0) & (self.labels_placeholder > 0)))
             constraints.append(slice_tpr <= overall_tpr + self.tpr_max_diff)
     mp = tfco.RateMinimizationProblem(tfco.error_rate(ctx), constraints)
     opt = tfco.ProxyLagrangianOptimizer(tf.train.AdamOptimizer(learning_rate))
     self.train_op = opt.minimize(minimization_problem=mp)
     return self.train_op
Esempio n. 2
0
 def __init__(self, num_classes):
   # Set up TFCO library objects. We will create a constrained optimization
   # problem object with dummy constraints on the false negative rates, so
   # when we read out the "constraint value", we get the false negative rates.
   self.labels_placeholder = Placeholder()
   self.logits_placeholder = Placeholder()
   context = tfco.multiclass_rate_context(
       num_classes, self.logits_placeholder, self.labels_placeholder)
   constraints = [
       tfco.false_negative_rate(context, ii) <= 0.0
       for ii in range(num_classes)]
   self.problem = tfco.RateMinimizationProblem(
       tfco.wrap_rate(0.0), constraints)
Esempio n. 3
0
 def build_train_op_tfco(self,
                         learning_rate=0.1):
     ctx = tfco.multiclass_rate_context(self.num_classes,
                                        self.predictions_tensor,
                                        self.labels_placeholder)
     # positive_slice = ctx.subset(self.labels_placeholder > 0)
     # overall_tpr = tfco.positive_prediction_rate(positive_slice)
     constraints = []
     for c in range(self.num_classes):
         pos_rate = tfco.positive_prediction_rate(ctx, c)
         constraints.append(pos_rate <= (1.05 / self.num_classes))
     mp = tfco.RateMinimizationProblem(tfco.error_rate(ctx), constraints)
     self.opt = tfco.ProxyLagrangianOptimizerV1(tf.train.AdamOptimizer(learning_rate))
     self.train_op = self.opt.minimize(mp)
     return self.train_op
Esempio n. 4
0
    def build_train_op_ctx(self, learning_rate=.001):
        # We initialize the constrained problem using the rate helpers.
        ctx = tfco.rate_context(self.predictions_tensor,
                                self.labels_placeholder)
        positive_slice = ctx.subset(self.labels_placeholder > 0)
        overall_tpr = tfco.positive_prediction_rate(positive_slice)
        constraints = []
        for placeholder in self.protected_placeholders:
            slice_tpr = tfco.positive_prediction_rate(
                ctx.subset((placeholder > 0) & (self.labels_placeholder > 0)))
            tmp = 2 * (overall_tpr - slice_tpr)
            constraints.append(tmp)

        constraint = sum(constraints) <= 0.2 * self.didi_tr
        mp = tfco.RateMinimizationProblem(tfco.error_rate(ctx), [constraint])
        opt = tfco.ProxyLagrangianOptimizerV1(
            tf.train.AdamOptimizer(learning_rate))
        self.train_op = opt.minimize(mp)
        return self.train_op
    def build_train_op(self, learning_rate, unconstrained=False):
        ctx = tfco.rate_context(self.predictions_tensor,
                                self.labels_placeholder)
        positive_slice = ctx.subset(self.labels_placeholder > 0)
        overall_tpr = tfco.positive_prediction_rate(positive_slice)
        constraints = []

        # add constraints
        if not unconstrained:

            for constraint in self.constraints:

                print(constraint)
                if len(constraint) == 1:
                    placeholder = self.protected_placeholders_dict[
                        constraint[0]]
                    slice_tpr = tfco.positive_prediction_rate(
                        ctx.subset((placeholder > 0)
                                   & (self.labels_placeholder > 0)))
                elif len(constraint) == 2:
                    placeholder0 = self.protected_placeholders_dict[
                        constraint[0]]
                    placeholder1 = self.protected_placeholders_dict[
                        constraint[1]]
                    slice_tpr = tfco.positive_prediction_rate(
                        ctx.subset((placeholder0 > 0) & (placeholder1 > 0)
                                   & (self.labels_placeholder > 0)))

                constraints.append(
                    slice_tpr >= overall_tpr - self.tpr_max_diff)

        mp = tfco.RateMinimizationProblem(tfco.error_rate(ctx), constraints)
        opt = tfco.ProxyLagrangianOptimizer(
            tf.train.AdamOptimizer(learning_rate))
        self.train_op = opt.minimize(minimization_problem=mp)
        return self.train_op
Esempio n. 6
0
def lagrangian_optimizer_kld(train_set, additive_slack, learning_rate,
                             learning_rate_constraint, loops):
    """Implements surrogate-based Lagrangian optimizer (Algorithm 2).

  Specifically solves:
    min_{theta} sum_{G = 0, 1} KLD(p, pprG(theta))
      s.t. error_rate <= additive_slack,
    where p is the overall proportion of positives and pprG is the positive
    prediction rate for group G.

  We frame this as a constrained optimization problem:
    min_{theta, xi_pos0, xi_pos1, xi_neg0, xi_neg1} {
      -p log(xi_pos0) - (1-p) log(xi_neg0) - p log(xi_pos1)
        -(1-p) log(xi_neg1)}
    s.t.
      error_rate <= additive_slack,
        xi_pos0 <= ppr0(theta), xi_neg0 <= npr0(theta),
        xi_pos1 <= ppr1(theta), xi_neg1 <= npr1(theta),
  and formulate the Lagrangian:
    max_{lambda's >= 0} min_{xi's} {
      -p log(xi_pos0) - (1-p) log(xi_neg0) - p log(xi_pos1)
        -(1-p) log(xi_neg1)
       + lambda_pos0 (xi_pos0 - ppr0(theta))
       + lambda_neg0 (xi_neg0 - npr0(theta))
       + lambda_pos1 (xi_pos1 - ppr1(theta))
       + lambda_neg1 (xi_neg1 - npr1(theta))}
    s.t.
      error_rate <= additive_slack.

  We do best response for the slack variables xi:
    BR for xi_pos0 = p / lambda_pos0
    BR for xi_neg0 = (1 - p) / lambda_neg0
    BR for xi_pos1 = p / lambda_pos1
    BR for xi_neg1 = (1 - p) / lambda_neg1
  We do gradient ascent on the lambda's, where
    Gradient w.r.t. lambda_pos0
      = BR for xi_pos0 - ppr0(theta)
      = p / lambda_pos0 - ppr0(theta)
      = Gradient w.r.t. lambda_pos0 of
        (p log(lambda_pos0) - lambda_pos0 ppr0(theta))
    Gradient w.r.t. lambda_neg0
      = Gradient w.r.t. lambda_neg0 of
        ((1 - p) log(lambda_neg0) - lambda_neg0 npr0(theta))
    Gradient w.r.t. lambda_pos1
      = Gradient w.r.t. lambda_pos1 of
        (p log(lambda_pos1) - lambda_pos1 ppr1(theta))
    Gradient w.r.t. lambda_neg1
      = Gradient w.r.t. lambda_neg1 of
        ((1 - p) log(lambda_neg1) - lambda_neg1 npr1(theta)).
  We do gradient descent on thetas's, with ppr's and npr's replaced with hinge
  surrogates. We use concave lower bounds on ppr's and npr's, so that when they
  get negated in the updates, we get convex upper bounds.

  See Appendix D.1 in the paper for more details.

  Args:
    train_set: (features, labels, groups)
    additive_slack: float, additive slack on error rate constraint
    learning_rate: float, learning rate for model parameters
    learning_rate_constraint: float, learning rate for Lagrange multipliers
    loops: int, number of iterations

  Returns:
    stochastic_model containing list of models and probabilities,
    deterministic_model.
  """
    x_train, y_train, z_train = train_set
    dimension = x_train.shape[-1]

    tf.reset_default_graph()

    # Data tensors.
    features_tensor = tf.constant(x_train.astype("float32"), name="features")
    labels_tensor = tf.constant(y_train.astype("float32"), name="labels")

    # Linear model.
    weights = tf.Variable(tf.zeros(dimension, dtype=tf.float32),
                          name="weights")
    threshold = tf.Variable(0, name="threshold", dtype=tf.float32)
    predictions_tensor = (tf.tensordot(features_tensor, weights, axes=(1, 0)) +
                          threshold)

    # Group-specific predictions.
    predictions_group0 = tf.boolean_mask(predictions_tensor,
                                         mask=(z_train < 1))
    num_examples0 = np.sum(z_train < 1)
    predictions_group1 = tf.boolean_mask(predictions_tensor,
                                         mask=(z_train > 0))
    num_examples1 = np.sum(z_train > 0)

    # We use the TF Constrained Optimization (TFCO) library to set up the
    # constrained optimization problem. The library doesn't currently support best
    # responses for slack variables. So we maintain explicit Lagrange multipliers
    # for the slack variables, and let the library deal with the Lagrange
    # multipliers for the error rate constraint.

    # Since we need to perform a gradient descent update on the model parameters,
    # and an ascent update on the Lagrange multipliers on the slack variables, we
    # create a single "minimization" objective using stop gradients, where a
    # descent gradient update has the effect of minimizing over the model
    # parameters and maximizing over the Lagrange multipliers for the slack
    # variables. As noted above, the ascent update on the Lagrange multipliers for
    # the error rate constraint is done by the library internally.

    # Placeholders for Lagrange multipliers for the four slack variables.
    lambda_pos0 = tf.Variable(0.5, dtype=tf.float32, name="lambda_pos0")
    lambda_neg0 = tf.Variable(0.5, dtype=tf.float32, name="lambda_neg0")
    lambda_pos1 = tf.Variable(0.5, dtype=tf.float32, name="lambda_pos1")
    lambda_neg1 = tf.Variable(0.5, dtype=tf.float32, name="lambda_neg1")

    # Set up prediction rates and surrogate relaxations on them.
    p = np.mean(y_train)  # Proportion of positives.

    # Positive and negative prediction rates for group 0 and group 1.
    ppr_group0 = tf.reduce_sum(
        tf.cast(
            tf.greater(predictions_group0,
                       tf.zeros(num_examples0, dtype="float32")),
            "float32")) / num_examples0
    npr_group0 = 1 - ppr_group0
    ppr_group1 = tf.reduce_sum(
        tf.cast(
            tf.greater(predictions_group1,
                       tf.zeros(num_examples1, dtype="float32")),
            "float32")) / num_examples1
    npr_group1 = 1 - ppr_group1

    # Hinge concave lower bounds on the positive and negative prediction rates.
    # In the gradient updates, these get negated and become convex upper bounds.
    # For group 0:
    ppr_hinge_group0 = tf.reduce_sum(
        1 - tf.nn.relu(1 - predictions_group0)) * 1.0 / num_examples0
    npr_hinge_group0 = tf.reduce_sum(
        1 - tf.nn.relu(1 + predictions_group0)) * 1.0 / num_examples0
    # For group 1:
    ppr_hinge_group1 = tf.reduce_sum(
        1 - tf.nn.relu(1 - predictions_group1)) * 1.0 / num_examples1
    npr_hinge_group1 = tf.reduce_sum(
        1 - tf.nn.relu(1 + predictions_group1)) * 1.0 / num_examples1

    # Set up KL-divergence objective for constrained optimization.
    # We use stop gradients to ensure that a single descent gradient update on the
    # objective has the effect of minimizing over the model parameters and
    # maximizing over the Lagrange multipliers for the slack variables.

    # KL-divergence for group 0.
    kld_hinge_pos_group0 = (-tf.stop_gradient(lambda_pos0) * ppr_hinge_group0 -
                            p * tf.log(lambda_pos0) +
                            lambda_pos0 * tf.stop_gradient(ppr_group0))
    kld_hinge_neg_group0 = (-tf.stop_gradient(lambda_neg0) * npr_hinge_group0 -
                            (1 - p) * tf.log(lambda_neg0) +
                            lambda_neg0 * tf.stop_gradient(npr_group0))
    kld_hinge_group0 = kld_hinge_pos_group0 + kld_hinge_neg_group0

    # KL-divergence for group 1.
    kld_hinge_pos_group1 = (-tf.stop_gradient(lambda_pos1) * ppr_hinge_group1 -
                            p * tf.log(lambda_pos1) +
                            lambda_pos1 * tf.stop_gradient(ppr_group1))
    kld_hinge_neg_group1 = (-tf.stop_gradient(lambda_neg1) * npr_hinge_group1 -
                            (1 - p) * tf.log(lambda_neg1) +
                            lambda_neg1 * tf.stop_gradient(npr_group1))
    kld_hinge_group1 = kld_hinge_pos_group1 + kld_hinge_neg_group1

    # Wrap the objective into a rate object.
    objective = tfco.wrap_rate(kld_hinge_group0 + kld_hinge_group1)

    # Set up error rate constraint for constrained optimization.
    context = tfco.rate_context(predictions_tensor, labels_tensor)
    error = tfco.error_rate(context)
    constraints = [error <= additive_slack]

    # Cretae rate minimization problem object.
    problem = tfco.RateMinimizationProblem(objective, constraints)

    # Set up optimizer.
    optimizer = tfco.LagrangianOptimizerV1(
        tf.train.AdamOptimizer(learning_rate=learning_rate),
        constraint_optimizer=tf.train.AdamOptimizer(
            learning_rate=learning_rate_constraint))
    train_op = optimizer.minimize(problem)

    # Start TF session and initialize variables.
    session = tf.Session()
    session.run(tf.global_variables_initializer())

    # We maintain a list of objectives and model weights during training.
    objectives = []
    violations = []
    models = []

    # Perform full gradient updates.
    for ii in range(loops):

        # Gradient updates.
        session.run(train_op)

        # Checkpoint once in 10 iterations.
        if ii % 10 == 0:
            # Model weights.
            model = [session.run(weights), session.run(threshold)]
            models.append(model)

            # Objective.
            klds = evaluation.expected_group_klds(x_train, y_train, z_train,
                                                  [model], [1.0])
            objectives.append(sum(klds))

            # Violation.
            error = evaluation.expected_error_rate(x_train, y_train, [model],
                                                   [1.0])
            violations.append([error - additive_slack])

    # Use the recorded objectives and constraints to find the best iterate.
    best_iterate = tfco.find_best_candidate_index(np.array(objectives),
                                                  np.array(violations))
    deterministic_model = models[best_iterate]

    # Use shrinking to find a sparse distribution over iterates.
    probabilities = tfco.find_best_candidate_distribution(
        np.array(objectives), np.array(violations))
    models_pruned = [
        models[i] for i in range(len(models)) if probabilities[i] > 0.0
    ]
    probabilities_pruned = probabilities[probabilities > 0.0]

    return (models_pruned, probabilities_pruned), deterministic_model
def train_constrained(dataset,
                      group_info,
                      epsilon=0.01,
                      learning_rate=0.1,
                      dual_scale=5.0,
                      loops=10000,
                      feature_dependent_multiplier=True,
                      hidden_layers=None,
                      skip_steps=400):
    """Train constrained classifier wth Lagrangian model.

  Args:
    dataset: train, vali and test sets
    group_info: group memberships on train, vali and test sets and thresholds
    epsilon: constraint slack
    learning_rate: learning rate for theta
    dual_scale: learning rate for gamma = dual_scale * learning_rate
    loops: number of gradient steps
    feature_dependent_multiplier: should the multiplier model be feature
      dependent. If False, a common multipler is used for all constraints
    hidden_layers: list of hidden layer nodes to be used for multiplier model
    skip_steps: steps to skip before snapshotting metrics
  """
    tf.set_random_seed(121212)
    np.random.seed(212121)
    random.seed(333333)

    x_train, y_train, z_train, x_vali, y_vali, _, x_test, y_test, _ = dataset

    (group_memberships_list_train, group_memberships_list_vali,
     group_memberships_list_test,
     group_memberships_thresholds_train) = group_info

    # Models and group thresholds tensor.
    model = create_model(x_train.shape[-1])
    multiplier_model, multiplier_weights = create_multiplier_model(
        feature_dependent_multiplier=feature_dependent_multiplier,
        dim=3,
        hidden_layers=hidden_layers)
    group_thresholds = tf.Variable(np.ones(3) * 0.1, dtype=tf.float32)

    # Features, labels, predictions, multipliers.
    features_tensor = tf.constant(x_train)
    labels_tensor = tf.constant(y_train)
    features_tensor_vali = tf.constant(x_vali)

    predictions = lambda: model(features_tensor)
    predictions_vali = lambda: model(features_tensor_vali)
    predictions_test = lambda: model(x_test)

    def multiplier_values():
        return tf.abs(
            multiplier_model(tf.reshape(group_thresholds, shape=(1, -1))))

    # Lagrangian loss function.
    def lagrangian_loss():
        # Separate out objective, constraints and proxy constraints.
        objective = problem.objective()
        constraints = problem.constraints()
        proxy_constraints = problem.proxy_constraints()

        # Set-up custom Lagrangian loss.
        primal = objective
        multipliers = multiplier_values()
        primal += tf.stop_gradient(multipliers) * proxy_constraints
        dual = dual_scale * multipliers * tf.stop_gradient(constraints)
        return primal - dual

    # Objective.
    context = tfco.rate_context(predictions, labels=lambda: labels_tensor)
    overall_error = tfco.error_rate(context)

    # Slice and subset group predictions and labels.
    def group_membership():
        return (z_train[:, 0] > group_thresholds[0]) & (
            z_train[:, 1] > group_thresholds[1]) & (z_train[:, 2] >
                                                    group_thresholds[2])

    def group_predictions():
        pred = predictions()
        groups = tf.reshape(group_membership(), (-1, 1))
        return pred[groups]

    def group_labels():
        groups = tf.reshape(group_membership(), (-1, ))
        return labels_tensor[groups]

    # Constraint.
    group_context = tfco.rate_context(group_predictions, labels=group_labels)
    group_error = tfco.error_rate(group_context)
    constraints = [group_error <= overall_error + epsilon]

    # Set up constrained optimization problem and optimizer.
    problem = tfco.RateMinimizationProblem(overall_error, constraints)
    optimizer = tf.keras.optimizers.Adagrad(learning_rate)
    var_list = model.trainable_weights + multiplier_weights

    objectives_list = []
    objectives_list_test = []
    objectives_list_vali = []
    violations_list = []
    violations_list_test = []
    violations_list_vali = []
    model_weights = []

    # Training
    for ii in range(loops):
        # Sample a group membership at random.
        random_index = np.random.randint(
            group_memberships_thresholds_train.shape[0])
        group_thresholds.assign(
            group_memberships_thresholds_train[random_index, :])

        # Gradient op.
        problem.update_ops()
        optimizer.minimize(lagrangian_loss, var_list=var_list)

        # Snapshot iterate once in 1000 loops.
        if ii % skip_steps == 0:
            pred = np.reshape(predictions(), (-1, ))
            err = error_rate(y_train, pred)
            max_viol, viol_list = violation(y_train, pred, epsilon,
                                            group_memberships_list_train)

            pred_test = np.reshape(predictions_test(), (-1, ))
            err_test = error_rate(y_test, pred_test)
            _, viol_list_test = violation(y_test, pred_test, epsilon,
                                          group_memberships_list_test)

            pred_vali = np.reshape(predictions_vali(), (-1, ))
            err_vali = error_rate(y_vali, pred_vali)
            max_viol_vali, viol_list_vali = violation(
                y_vali, pred_vali, epsilon, group_memberships_list_vali)

            objectives_list.append(err)
            objectives_list_test.append(err_test)
            objectives_list_vali.append(err_vali)
            violations_list.append(viol_list)
            violations_list_test.append(viol_list_test)
            violations_list_vali.append(viol_list_vali)
            model_weights.append(model.get_weights())

            if ii % 1000 == 0:
                print(
                    "Epoch %d | Error = %.3f | Viol = %.3f | Viol_vali = %.3f"
                    % (ii, err, max_viol, max_viol_vali),
                    flush=True)

    # Best candidate index.
    best_ind = tfco.find_best_candidate_index(np.array(objectives_list),
                                              np.array(violations_list),
                                              rank_objectives=False)
    model.set_weights(model_weights[best_ind])

    print("Train:")
    evaluate(x_train, y_train, model, epsilon, group_memberships_list_train)
    print("\nVali:")
    evaluate(x_vali, y_vali, model, epsilon, group_memberships_list_vali)
    print("\nTest:")
    evaluate(x_test, y_test, model, epsilon, group_memberships_list_test)
def train_unconstrained(dataset,
                        group_info,
                        epsilon=0.01,
                        loops=10000,
                        skip_steps=400):
    """Train unconstrained classifier.

  Args:
    dataset: train, vali and test sets
    group_info: group memberships on train, vali and test sets and thresholds
    epsilon: constraint slack
    loops: number of gradient steps
    skip_steps: steps to skip before snapshotting metrics
  """
    tf.set_random_seed(121212)
    np.random.seed(212121)
    random.seed(333333)

    x_train, y_train, _, x_vali, y_vali, _, x_test, y_test, _ = dataset

    (group_memberships_list_train, group_memberships_list_vali,
     group_memberships_list_test, _) = group_info

    model = create_model(x_train.shape[-1])
    features_tensor = tf.constant(x_train)
    labels_tensor = tf.constant(y_train)

    predictions = lambda: model(features_tensor)
    predictions_vali = lambda: model(x_vali)
    predictions_test = lambda: model(x_test)

    context = tfco.rate_context(predictions, labels=lambda: labels_tensor)
    overall_error = tfco.error_rate(context, penalty_loss=tfco.HingeLoss())
    problem = tfco.RateMinimizationProblem(overall_error)

    loss_fn, update_ops_fn, _ = tfco.create_lagrangian_loss(problem)
    optimizer = tf.keras.optimizers.Adagrad(0.1)

    objectives_list = []
    objectives_list_test = []
    objectives_list_vali = []
    violations_list = []
    violations_list_test = []
    violations_list_vali = []
    model_weights = []

    for ii in range(loops):
        update_ops_fn()
        optimizer.minimize(loss_fn, var_list=model.trainable_weights)

        # Snapshot iterate once in 1000 loops.
        if ii % skip_steps == 0:
            pred = np.reshape(predictions(), (-1, ))
            err = error_rate(y_train, pred)
            max_viol, viol_list = violation(y_train, pred, epsilon,
                                            group_memberships_list_train)

            pred_test = np.reshape(predictions_test(), (-1, ))
            err_test = error_rate(y_test, pred_test)
            _, viol_list_test = violation(y_test, pred_test, epsilon,
                                          group_memberships_list_test)

            pred_vali = np.reshape(predictions_vali(), (-1, ))
            err_vali = error_rate(y_vali, pred_vali)
            max_viol_vali, viol_list_vali = violation(
                y_vali, pred_vali, epsilon, group_memberships_list_vali)

            objectives_list.append(err)
            objectives_list_test.append(err_test)
            objectives_list_vali.append(err_vali)
            violations_list.append(viol_list)
            violations_list_test.append(viol_list_test)
            violations_list_vali.append(viol_list_vali)
            model_weights.append(model.get_weights())

            if ii % 1000 == 0:
                print(
                    "Epoch %d | Error = %.3f | Viol = %.3f | Viol_vali = %.3f"
                    % (ii, err, max_viol, max_viol_vali),
                    flush=True)

    # Best candidate index.
    best_ind = np.argmin(objectives_list)
    model.set_weights(model_weights[best_ind])

    print("Train:")
    evaluate(x_train, y_train, model, epsilon, group_memberships_list_train)
    print("\nVali:")
    evaluate(x_vali, y_vali, model, epsilon, group_memberships_list_vali)
    print("\nTest:")
    evaluate(x_test, y_test, model, epsilon, group_memberships_list_test)
def formulate_problem(features,
                      groups,
                      labels,
                      dimension,
                      constraint_groups,
                      constraint_slack=None):
  """Formulates a constrained problem."""
  #   Formulates a constrained problem that optimizes the error rate for a linear

  #   model on the specified dataset, subject to pairwise fairness constraints
  #   specified by the constraint_groups and the constraint_slack.

  #   Args:
  #     features: Nullary function returning features
  #     groups: Nullary function returning groups
  #     labels: Nullary function returning labels
  #     dimension: Input dimension for ranking model
  #     constraint_groups: List containing tuples of the form ((pos_group0,
  #       neg_group0), (pos_group1, neg_group1)), specifying the group memberships
  #       for the document pairs to compare in the constraints.
  #     constraint_slack: slackness '\epsilon' allowed in the constraints.

  #   Returns:
  #     A RateMinimizationProblem object, and a Keras ranking model.

  # Create linear ranking model: we get back a Keras model and a nullary
  # function returning predictions on the features.
  ranking_model, predictions = create_ranking_model(features, dimension)

  # Context for the optimization objective.
  context = tfco.rate_context(predictions, labels)

  # Constraint set.
  constraint_set = []

  # Context for the constraints.
  for ((pos_group0, neg_group0), (pos_group1, neg_group1)) in constraint_groups:
    # Context for group 0.
    group0_predictions, group0_labels = group_tensors(
        predictions, groups, pos_group0, neg_group=neg_group0)
    context_group0 = tfco.rate_context(group0_predictions, group0_labels)

    # Context for group 1.
    group1_predictions, group1_labels = group_tensors(
        predictions, groups, pos_group1, neg_group=neg_group1)
    context_group1 = tfco.rate_context(group1_predictions, group1_labels)

    # Add constraints to constraint set.
    constraint_set.append(
        tfco.false_negative_rate(context_group0) <= (
            tfco.false_negative_rate(context_group1) + constraint_slack))
    constraint_set.append(
        tfco.false_negative_rate(context_group1) <= (
            tfco.false_negative_rate(context_group0) + constraint_slack))

  # Formulate constrained minimization problem.
  problem = tfco.RateMinimizationProblem(
      tfco.error_rate(context, penalty_loss=tfco.SoftmaxCrossEntropyLoss()),
      constraint_set)

  return problem, ranking_model
Esempio n. 10
0
def lagrangian_optimizer_fmeasure(
    train_set, epsilon, learning_rate, learning_rate_constraint, loops):
  """Implements surrogate-based Lagrangian optimizer (Algorithm 3).

  Specifically solves:
    max F-measure s.t. F-measure(group1) >= F-measure(group0) - epsilon.

  Args:
    train_set: (features, labels, groups)
    epsilon: float, constraint slack.
    learning_rate: float, learning rate for model parameters.
    learning_rate_constraint: float, learning rate for Lagrange multipliers.
    loops: int, number of iterations.

  Returns:
    stochastic_model containing list of models and probabilities,
    determistic_model
  """
  x_train, y_train, z_train = train_set
  dimension = x_train.shape[-1]

  tf.reset_default_graph()

  # Data tensors.
  features_tensor = tf.constant(x_train.astype("float32"), name="features")
  labels_tensor = tf.constant(y_train.astype("float32"), name="labels")

  # Linear model.
  weights = tf.Variable(tf.zeros(dimension, dtype=tf.float32),
                        name="weights")
  threshold = tf.Variable(0, name="threshold", dtype=tf.float32)
  predictions_tensor = (tf.tensordot(features_tensor, weights, axes=(1, 0))
                        + threshold)

  # Contexts.
  context = tfco.rate_context(predictions_tensor, labels_tensor)
  context0 = context.subset(z_train < 1)
  context1 = context.subset(z_train > 0)

  # F-measure rates.
  fm_overall = tfco.f_score_lower_bound(context)
  fm1 = tfco.f_score_lower_bound(context1)
  fm0 = tfco.f_score_upper_bound(context0)

  # Rate minimization.
  problem = tfco.RateMinimizationProblem(-fm_overall, [fm0 <= fm1 + epsilon])

  # Optimizer.
  optimizer = tfco.LagrangianOptimizer(
      tf.train.AdamOptimizer(learning_rate=learning_rate),
      constraint_optimizer=tf.train.AdamOptimizer(
          learning_rate=learning_rate_constraint))
  train_op = optimizer.minimize(problem)

  # Start TF session and initialize variables.
  session = tf.Session()
  session.run(tf.global_variables_initializer())

  # We maintain a list of objectives and model weights during training.
  objectives = []
  violations = []
  models = []

  # Perform full gradient updates.
  for ii in range(loops):

    # Gradient updates.
    session.run(train_op)

    # Checkpoint once in 10 iterations.
    if ii % 10 == 0:
      # Model weights.
      model = [session.run(weights), session.run(threshold)]
      models.append(model)

      # Objective.
      objective = -evaluation.expected_fmeasure(
          x_train, y_train, [model], [1.0])
      objectives.append(objective)

      # Violation.
      fmeasure0, fmeasure1 = evaluation.expected_group_fmeasures(
          x_train, y_train, z_train, [model], [1.0])
      violations.append([fmeasure0 - fmeasure1 - epsilon])

  # Use the recorded objectives and constraints to find the best iterate.
  best_iterate = tfco.find_best_candidate_index(
      np.array(objectives), np.array(violations))
  deterministic_model = models[best_iterate]

  # Use shrinking to find a sparse distribution over iterates.
  probabilities = tfco.find_best_candidate_distribution(
      np.array(objectives), np.array(violations))
  models_pruned = [models[i] for i in range(len(models)) if
                   probabilities[i] > 0.0]
  probabilities_pruned = probabilities[probabilities > 0.0]

  return (models_pruned, probabilities_pruned), deterministic_model
Esempio n. 11
0
def train_helper(train_df,
                 val_df,
                 test_df,
                 feature_names,
                 label_name,
                 proxy_columns,
                 protected_columns,
                 feature_dependent_multiplier=True,
                 learning_rate=0.1,
                 batch_size=None,
                 skip_iterations=100,
                 num_steps=1000,
                 dual_scale=1.0,
                 epsilon=0.03,
                 unconstrained=False,
                 standard_lagrangian=False,
                 use_noise_array=True,
                 resample_proxy_groups=True,
                 epochs_per_resample=1,
                 n_resamples_per_candidate=10,
                 group_features_type='full_group_vec',
                 num_group_clusters=100,
                 multiplier_model_hidden_layers=[100],
                 uniform_groups=False,
                 min_group_frac=0.05):
    """Helper function for training a model."""
    tf.keras.backend.clear_session()

    # init_proxy_groups_train is the initial noisy group assignments.
    features_train, labels_train, init_proxy_groups_train, _ = extract_features(
        train_df,
        feature_names=feature_names,
        label_name=label_name,
        proxy_group_names=proxy_columns,
        true_group_names=protected_columns,
        uniform_groups=uniform_groups,
        min_group_frac=min_group_frac)

    num_groups = init_proxy_groups_train.shape[1]
    noise_array = None
    if use_noise_array and not uniform_groups:
        noise_array = get_noise_array(train_df,
                                      protected_columns=protected_columns,
                                      proxy_columns=proxy_columns,
                                      num_groups=num_groups)

    num_examples = len(train_df)
    num_features = len(feature_names)

    if batch_size is None:
        batch_size = num_examples

    # Get number of group features.
    kmeans_model = None
    num_group_features = None
    if group_features_type == 'full_group_vec':
        num_group_features = num_examples
    elif group_features_type == 'size_alone':
        num_group_features = 1
    elif group_features_type == 'size_and_pr':
        num_group_features = 2
    elif group_features_type == 'avg_features':
        num_group_features = num_features + 1
    elif group_features_type == 'kmeans':
        kmeans_model = KMeans(n_clusters=num_group_clusters,
                              random_state=0).fit(features_train)
        num_group_features = num_group_clusters

    # Features
    features_tensor = tf.Variable(np.zeros((batch_size, num_features),
                                           dtype='float32'),
                                  name='features')
    # Labels
    labels_tensor = tf.Variable(np.zeros((batch_size, 1), dtype='float32'),
                                name='labels')
    # Protected groups
    # We will resample these groups every epoch during training.
    groups_tensor = tf.Variable(np.zeros((batch_size, num_groups),
                                         dtype='float32'),
                                name='groups')
    # Protected group features.
    groups_features_tensor = tf.Variable(np.zeros(
        (num_groups, num_group_features), dtype='float32'),
                                         name='group_features')

    # Linear model with no hidden layers.
    layers = []
    layers.append(tf.keras.Input(shape=(num_features, )))
    layers.append(tf.keras.layers.Dense(1))

    # Keras model.
    model = tf.keras.Sequential(layers)

    # Set up rate minimization problem.
    # We set up a constrained optimization problem, where we *minimize the overall
    # error rate subject to the TPR for individual groups being with an epsilon
    # of the overall TPR.
    def predictions():
        return model(features_tensor)

    context = tfco.rate_context(predictions, labels=lambda: labels_tensor)
    overall_error = tfco.error_rate(context)
    constraints = []
    if not unconstrained:
        # Add group rate constraints.
        pos_context = context.subset(lambda: labels_tensor > 0)
        overall_tpr = tfco.positive_prediction_rate(pos_context)
        for jj in range(num_groups):
            group_pos_context = pos_context.subset(
                lambda kk=jj: groups_tensor[:, kk] > 0)
            group_tpr = tfco.positive_prediction_rate(group_pos_context)
            constraints.append(group_tpr >= overall_tpr - epsilon)

    problem = tfco.RateMinimizationProblem(overall_error, constraints)

    # Set up multiplier model.
    if not unconstrained:
        if standard_lagrangian:
            common_multiplier = tf.Variable(np.ones((len(constraints), 1)),
                                            dtype='float32',
                                            name='common_multiplier')
            multiplier_weights = [common_multiplier]
        else:
            multiplier_model, multiplier_weights = create_multiplier_model(
                feature_dependent_multiplier=feature_dependent_multiplier,
                num_group_features=num_group_features,
                hidden_layers=multiplier_model_hidden_layers)

    # Set up lagrangian loss.
    def lagrangian_loss():
        # Separate out objective, constraints and proxy constraints.
        objective = problem.objective()
        constraints = problem.constraints()
        proxy_constraints = problem.proxy_constraints()

        # Set-up custom Lagrangian loss.
        multipliers = tf.abs(multiplier_model(groups_features_tensor))
        primal = objective + tf.stop_gradient(multipliers) * proxy_constraints
        dual = dual_scale * multipliers * tf.stop_gradient(constraints)

        return primal - dual

    # Standard lagrangian loss with a different multiplier for each constraint.
    def lagrangian_loss_standard():
        objective = problem.objective()
        constraints = problem.constraints()
        proxy_constraints = problem.proxy_constraints()

        # Set up standard lagrangian loss.
        multipliers = tf.abs(common_multiplier)
        primal = objective + tf.stop_gradient(multipliers) * proxy_constraints
        dual = dual_scale * multipliers * tf.stop_gradient(constraints)

        return primal - dual

    # Set up unconstrained loss.
    def unconstrained_loss():
        return problem.objective()

    # Create optimizer
    optimizer = tf.keras.optimizers.Adagrad(learning_rate=learning_rate)

    # List of variables to optimize (in this case, the model parameters).
    if unconstrained:
        var_list = model.trainable_weights
    else:
        var_list = model.trainable_weights + multiplier_weights

    # Set up counter for the minibatch stream
    batch_index = 0

    # Record objectives and constraint violations.
    results_dict = {
        'train.objectives': [],
        'train.batch_violations': [],
        'train.true_error_rates': [],
        'train.sampled_violations_max': [],
        'train.sampled_violations_90p': [],
        'train.proxy_group_violations': [],
        'train.true_group_violations': [],
        'val.true_error_rates': [],
        'val.sampled_violations_max': [],
        'val.sampled_violations_90p': [],
        'val.proxy_group_violations': [],
        'val.true_group_violations': [],
        'test.true_error_rates': [],
        'test.sampled_violations_max': [],
        'test.sampled_violations_90p': [],
        'test.proxy_group_violations': [],
        'test.true_group_violations': []
    }
    group_sample_epochs = 0

    # Loop over minibatches.
    groups_train = init_proxy_groups_train
    if not unconstrained:
        group_features = extract_group_features(
            groups_train,
            features_train,
            labels_train,
            group_features_type,
            num_group_clusters=num_group_features,
            kmeans_model=kmeans_model)
        groups_features_tensor.assign(group_features)
    for ii in range(num_steps):
        # Indices for current minibatch in the stream.
        batch_indices = np.arange(batch_index * batch_size,
                                  (batch_index + 1) * batch_size)

        # Check for the beginning of a new epoch.
        if resample_proxy_groups and not unconstrained:
            if new_epoch(batch_index,
                         batch_size=batch_size,
                         num_examples=num_examples):
                # Only resample proxy groups every epochs_per_resample epochs.
                if group_sample_epochs % epochs_per_resample == 0:
                    # Resample the group at the beginning of the epoch.
                    # Get groups_train from a ball around init_proxy_groups_train.
                    if uniform_groups:
                        groups_train = generate_proxy_groups_uniform(
                            num_examples, min_group_frac=min_group_frac)
                    elif use_noise_array:
                        groups_train = generate_proxy_groups_noise_array(
                            init_proxy_groups_train, noise_array=noise_array)
                    else:
                        groups_train = generate_proxy_groups_single_noise(
                            init_proxy_groups_train,
                            noise_param=FLAGS.noise_level)
                    # Recompute group features at the beginning of the epoch.
                    group_features = extract_group_features(
                        groups_train,
                        features_train,
                        labels_train,
                        group_features_type,
                        num_group_clusters=num_group_features,
                        kmeans_model=kmeans_model)
                    groups_features_tensor.assign(group_features)
                group_sample_epochs += 1

        # Cycle back to the beginning if we have reached the end of the stream.
        batch_indices = [ind % num_examples for ind in batch_indices]

        # Assign features, labels.
        features_tensor.assign(features_train[batch_indices, :])
        labels_tensor.assign(labels_train[batch_indices].reshape(-1, 1))
        groups_tensor.assign(groups_train[batch_indices])

        # Gradient update.
        with tf.control_dependencies(problem.update_ops()):
            if unconstrained:
                optimizer.minimize(unconstrained_loss, var_list=var_list)
            elif standard_lagrangian:
                optimizer.minimize(lagrangian_loss_standard, var_list=var_list)
            else:
                optimizer.minimize(lagrangian_loss, var_list=var_list)

        if (ii % skip_iterations == 0) or (ii == num_steps - 1):
            # Record metrics.
            results_dict['train.objectives'].append(
                problem.objective().numpy())
            if not unconstrained:
                results_dict['train.batch_violations'].append(
                    np.max(problem.constraints().numpy()))
            else:
                results_dict['train.batch_violations'].append(0)
            add_summary_viols_to_results_dict(
                train_df,
                model,
                results_dict,
                'train',
                feature_names=feature_names,
                label_name=label_name,
                proxy_columns=proxy_columns,
                protected_columns=protected_columns,
                epsilon=epsilon,
                n_resamples_per_candidate=n_resamples_per_candidate,
                use_noise_array=use_noise_array,
                noise_array=noise_array,
                uniform_groups=uniform_groups,
                min_group_frac=min_group_frac)
            add_summary_viols_to_results_dict(
                val_df,
                model,
                results_dict,
                'val',
                feature_names=feature_names,
                label_name=label_name,
                proxy_columns=proxy_columns,
                protected_columns=protected_columns,
                epsilon=epsilon,
                n_resamples_per_candidate=n_resamples_per_candidate,
                use_noise_array=use_noise_array,
                noise_array=noise_array,
                uniform_groups=uniform_groups,
                min_group_frac=min_group_frac)
            add_summary_viols_to_results_dict(
                test_df,
                model,
                results_dict,
                'test',
                feature_names=feature_names,
                label_name=label_name,
                proxy_columns=proxy_columns,
                protected_columns=protected_columns,
                epsilon=epsilon,
                n_resamples_per_candidate=n_resamples_per_candidate,
                use_noise_array=use_noise_array,
                noise_array=noise_array,
                uniform_groups=uniform_groups,
                min_group_frac=min_group_frac)

            print(
                '%d: batch obj: %.3f | batch viol: %.3f | true error: %.3f | sampled viol: %.3f | true group viol: %.3f'
                % (ii, results_dict['train.objectives'][-1],
                   results_dict['train.batch_violations'][-1],
                   results_dict['train.true_error_rates'][-1],
                   results_dict['train.sampled_violations_max'][-1],
                   results_dict['train.true_group_violations'][-1]))

        batch_index += 1
    return model, results_dict
Esempio n. 12
0
    def lagrangian_optimizer(train_set,
                             epsilon=epsilon,
                             learning_rate=0.01,
                             learning_rate_constraint=0.01,
                             loops=2000):
        tf.reset_default_graph()

        x_train, y_train, z_train = train_set
        num_examples = x_train.shape[0]
        dimension = x_train.shape[-1]

        # Data tensors.
        features_tensor = tf.constant(x_train.astype("float32"),
                                      name="features")
        labels_tensor = tf.constant(y_train.astype("float32"), name="labels")

        # Linear model.
        weights = tf.Variable(tf.zeros(dimension, dtype=tf.float32),
                              name="weights")
        threshold = tf.Variable(0, name="threshold", dtype=tf.float32)
        predictions_tensor = (
            tf.tensordot(features_tensor, weights, axes=(1, 0)) + threshold)

        predictions_group0 = tf.boolean_mask(predictions_tensor,
                                             mask=(z_train < 1))
        num0 = np.sum(z_train < 1)
        predictions_group1 = tf.boolean_mask(predictions_tensor,
                                             mask=(z_train > 0))
        num1 = np.sum(z_train > 0)

        # Set up rates.
        context = tfco.rate_context(predictions_tensor, labels_tensor)
        true_positive_rate = tfco.true_positive_rate(context)
        true_negative_rate = tfco.true_negative_rate(context)

        context0 = context.subset(z_train < 1)
        true_positive_rate0 = tfco.true_positive_rate(context0)

        context1 = context.subset(z_train > 0)
        true_positive_rate1 = tfco.true_positive_rate(context1)

        # Set up slack variables.
        slack_tpr = tf.Variable(0.5, dtype=tf.float32)
        slack_tnr = tf.Variable(0.5, dtype=tf.float32)

        # Projection ops for slacks.
        projection_ops = []
        projection_ops.append(
            tf.assign(slack_tpr, tf.clip_by_value(slack_tpr, 0.001, 0.999)))
        projection_ops.append(
            tf.assign(slack_tnr, tf.clip_by_value(slack_tnr, 0.001, 0.999)))

        # Set up 1 - G-mean objective.
        objective = tfco.wrap_rate(1.0 - tf.sqrt(slack_tpr * slack_tnr))

        # Set up slack constraints.
        constraints = []
        constraints.append(tfco.wrap_rate(slack_tpr) <= true_positive_rate)
        constraints.append(tfco.wrap_rate(slack_tnr) <= true_negative_rate)

        # Set up fairness equal-opportunity constraints.
        constraints.append(
            true_positive_rate0 <= true_positive_rate1 + epsilon)
        constraints.append(
            true_positive_rate1 <= true_positive_rate0 + epsilon)

        # Set up constraint optimization problem.
        problem = tfco.RateMinimizationProblem(objective, constraints)

        # Set up solver.
        optimizer = tf.train.AdamOptimizer(learning_rate)
        constraint_optimizer = tf.train.AdamOptimizer(learning_rate_constraint)
        lagrangian_optimizer = tfco.ProxyLagrangianOptimizerV1(
            optimizer=optimizer, constraint_optimizer=constraint_optimizer)
        train_op = lagrangian_optimizer.minimize(problem)

        # Start TF session and initialize variables.
        session = tf.Session()
        tf.set_random_seed(654321)  # Set random seed for reproducibility.
        session.run(tf.global_variables_initializer())

        # We maintain a list of objectives and model weights during training.
        objectives = []
        violations = []
        models = []

        # Perform  full gradient updates.
        for ii in xrange(loops):
            # Gradient update.
            session.run(train_op)
            # Projection.
            session.run(projection_ops)

            # Checkpoint once in 100 iterations.
            if ii % 100 == 0:
                # Model weights.
                model = [session.run(weights), session.run(threshold)]
                models.append(model)

                # Snapshot performace
                error, tpr0, tpr1 = evaluate_expected_results(
                    train_set, [model], [1.0])
                objectives.append(error)
                violations.append(
                    [tpr0 - tpr1 - epsilon, tpr1 - tpr0 - epsilon])

        # Use the recorded objectives and constraints to find the best iterate.
        # Best model
        best_iterate = tfco.find_best_candidate_index(np.array(objectives),
                                                      np.array(violations))
        best_model = models[best_iterate]

        # Stochastic model over a subset of classifiers.
        probabilities = tfco.find_best_candidate_distribution(
            np.array(objectives), np.array(violations))
        models_pruned = [
            models[i] for i in range(len(models)) if probabilities[i] > 0.0
        ]
        probabilities_pruned = probabilities[probabilities > 0.0]

        # Stochastic model over all classifiers.
        probabilities_all = probabilities * 0.0 + 1.0 / len(probabilities)

        # Return Pruned models, Avg models, Best model
        results = {
            'stochastic': (models, probabilities_all),
            'pruned': (models_pruned, probabilities_pruned),
            'best': best_model,
            'objectives': objectives,
            'violations': violations
        }
        return results
Esempio n. 13
0
                                      num_mislabeled_examples,
                                      replace=False)
labels[mislabeled_indices] = 1 - labels[mislabeled_indices]

# Create variables containing the model parameters.
weights = tf.Variable(tf.zeros(dimension), dtype=tf.float32, name="weights")
threshold = tf.Variable(0.0, dtype=tf.float32, name="threshold")

# Create the optimization problem.
constant_labels = tf.constant(labels, dtype=tf.float32)
constant_features = tf.constant(features, dtype=tf.float32)

predictions = tf.tensordot(constant_features, weights, axes=(1, 0)) - threshold

context = tfco.rate_context(predictions, labels=constant_labels)
problem = tfco.RateMinimizationProblem(
    tfco.error_rate(context), [tfco.recall(context) >= recall_lower_bound])


def average_hinge_loss(labels, predictions):
    # Recall that the labels are binary (0 or 1).
    signed_labels = (labels * 2) - 1
    return np.mean(np.maximum(0.0, 1.0 - signed_labels * predictions))


def recall(labels, predictions):
    # Recall that the labels are binary (0 or 1).
    positive_count = np.sum(labels)
    true_positives = labels * (predictions > 0)
    true_positive_count = np.sum(true_positives)
    return true_positive_count / positive_count