Esempio n. 1
0
 def build_train_op(self, learning_rate, unconstrained=False):
     ctx = tfco.rate_context(self.predictions_tensor, self.labels_placeholder)
     positive_slice = ctx.subset(self.labels_placeholder > 0)
     overall_tpr = tfco.positive_prediction_rate(positive_slice)
     constraints = []
     if not unconstrained:
         for placeholder in self.protected_placeholders:
             slice_tpr = tfco.positive_prediction_rate(ctx.subset((placeholder > 0) & (self.labels_placeholder > 0)))
             constraints.append(slice_tpr <= overall_tpr + self.tpr_max_diff)
     mp = tfco.RateMinimizationProblem(tfco.error_rate(ctx), constraints)
     opt = tfco.ProxyLagrangianOptimizer(tf.train.AdamOptimizer(learning_rate))
     self.train_op = opt.minimize(minimization_problem=mp)
     return self.train_op
Esempio n. 2
0
    def build_train_op_ctx(self, learning_rate=.001):
        # We initialize the constrained problem using the rate helpers.
        ctx = tfco.rate_context(self.predictions_tensor,
                                self.labels_placeholder)
        positive_slice = ctx.subset(self.labels_placeholder > 0)
        overall_tpr = tfco.positive_prediction_rate(positive_slice)
        constraints = []
        for placeholder in self.protected_placeholders:
            slice_tpr = tfco.positive_prediction_rate(
                ctx.subset((placeholder > 0) & (self.labels_placeholder > 0)))
            tmp = 2 * (overall_tpr - slice_tpr)
            constraints.append(tmp)

        constraint = sum(constraints) <= 0.2 * self.didi_tr
        mp = tfco.RateMinimizationProblem(tfco.error_rate(ctx), [constraint])
        opt = tfco.ProxyLagrangianOptimizerV1(
            tf.train.AdamOptimizer(learning_rate))
        self.train_op = opt.minimize(mp)
        return self.train_op
Esempio n. 3
0
 def build_train_op_tfco(self,
                         learning_rate=0.1):
     ctx = tfco.multiclass_rate_context(self.num_classes,
                                        self.predictions_tensor,
                                        self.labels_placeholder)
     # positive_slice = ctx.subset(self.labels_placeholder > 0)
     # overall_tpr = tfco.positive_prediction_rate(positive_slice)
     constraints = []
     for c in range(self.num_classes):
         pos_rate = tfco.positive_prediction_rate(ctx, c)
         constraints.append(pos_rate <= (1.05 / self.num_classes))
     mp = tfco.RateMinimizationProblem(tfco.error_rate(ctx), constraints)
     self.opt = tfco.ProxyLagrangianOptimizerV1(tf.train.AdamOptimizer(learning_rate))
     self.train_op = self.opt.minimize(mp)
     return self.train_op
    def build_train_op(self, learning_rate, unconstrained=False):
        ctx = tfco.rate_context(self.predictions_tensor,
                                self.labels_placeholder)
        positive_slice = ctx.subset(self.labels_placeholder > 0)
        overall_tpr = tfco.positive_prediction_rate(positive_slice)
        constraints = []

        # add constraints
        if not unconstrained:

            for constraint in self.constraints:

                print(constraint)
                if len(constraint) == 1:
                    placeholder = self.protected_placeholders_dict[
                        constraint[0]]
                    slice_tpr = tfco.positive_prediction_rate(
                        ctx.subset((placeholder > 0)
                                   & (self.labels_placeholder > 0)))
                elif len(constraint) == 2:
                    placeholder0 = self.protected_placeholders_dict[
                        constraint[0]]
                    placeholder1 = self.protected_placeholders_dict[
                        constraint[1]]
                    slice_tpr = tfco.positive_prediction_rate(
                        ctx.subset((placeholder0 > 0) & (placeholder1 > 0)
                                   & (self.labels_placeholder > 0)))

                constraints.append(
                    slice_tpr >= overall_tpr - self.tpr_max_diff)

        mp = tfco.RateMinimizationProblem(tfco.error_rate(ctx), constraints)
        opt = tfco.ProxyLagrangianOptimizer(
            tf.train.AdamOptimizer(learning_rate))
        self.train_op = opt.minimize(minimization_problem=mp)
        return self.train_op
def train_helper(train_df,
                 val_df,
                 test_df,
                 feature_names,
                 label_name,
                 proxy_columns,
                 protected_columns,
                 feature_dependent_multiplier=True,
                 learning_rate=0.1,
                 batch_size=None,
                 skip_iterations=100,
                 num_steps=1000,
                 dual_scale=1.0,
                 epsilon=0.03,
                 unconstrained=False,
                 standard_lagrangian=False,
                 use_noise_array=True,
                 resample_proxy_groups=True,
                 epochs_per_resample=1,
                 n_resamples_per_candidate=10,
                 group_features_type='full_group_vec',
                 num_group_clusters=100,
                 multiplier_model_hidden_layers=[100],
                 uniform_groups=False,
                 min_group_frac=0.05):
    """Helper function for training a model."""
    tf.keras.backend.clear_session()

    # init_proxy_groups_train is the initial noisy group assignments.
    features_train, labels_train, init_proxy_groups_train, _ = extract_features(
        train_df,
        feature_names=feature_names,
        label_name=label_name,
        proxy_group_names=proxy_columns,
        true_group_names=protected_columns,
        uniform_groups=uniform_groups,
        min_group_frac=min_group_frac)

    num_groups = init_proxy_groups_train.shape[1]
    noise_array = None
    if use_noise_array and not uniform_groups:
        noise_array = get_noise_array(train_df,
                                      protected_columns=protected_columns,
                                      proxy_columns=proxy_columns,
                                      num_groups=num_groups)

    num_examples = len(train_df)
    num_features = len(feature_names)

    if batch_size is None:
        batch_size = num_examples

    # Get number of group features.
    kmeans_model = None
    num_group_features = None
    if group_features_type == 'full_group_vec':
        num_group_features = num_examples
    elif group_features_type == 'size_alone':
        num_group_features = 1
    elif group_features_type == 'size_and_pr':
        num_group_features = 2
    elif group_features_type == 'avg_features':
        num_group_features = num_features + 1
    elif group_features_type == 'kmeans':
        kmeans_model = KMeans(n_clusters=num_group_clusters,
                              random_state=0).fit(features_train)
        num_group_features = num_group_clusters

    # Features
    features_tensor = tf.Variable(np.zeros((batch_size, num_features),
                                           dtype='float32'),
                                  name='features')
    # Labels
    labels_tensor = tf.Variable(np.zeros((batch_size, 1), dtype='float32'),
                                name='labels')
    # Protected groups
    # We will resample these groups every epoch during training.
    groups_tensor = tf.Variable(np.zeros((batch_size, num_groups),
                                         dtype='float32'),
                                name='groups')
    # Protected group features.
    groups_features_tensor = tf.Variable(np.zeros(
        (num_groups, num_group_features), dtype='float32'),
                                         name='group_features')

    # Linear model with no hidden layers.
    layers = []
    layers.append(tf.keras.Input(shape=(num_features, )))
    layers.append(tf.keras.layers.Dense(1))

    # Keras model.
    model = tf.keras.Sequential(layers)

    # Set up rate minimization problem.
    # We set up a constrained optimization problem, where we *minimize the overall
    # error rate subject to the TPR for individual groups being with an epsilon
    # of the overall TPR.
    def predictions():
        return model(features_tensor)

    context = tfco.rate_context(predictions, labels=lambda: labels_tensor)
    overall_error = tfco.error_rate(context)
    constraints = []
    if not unconstrained:
        # Add group rate constraints.
        pos_context = context.subset(lambda: labels_tensor > 0)
        overall_tpr = tfco.positive_prediction_rate(pos_context)
        for jj in range(num_groups):
            group_pos_context = pos_context.subset(
                lambda kk=jj: groups_tensor[:, kk] > 0)
            group_tpr = tfco.positive_prediction_rate(group_pos_context)
            constraints.append(group_tpr >= overall_tpr - epsilon)

    problem = tfco.RateMinimizationProblem(overall_error, constraints)

    # Set up multiplier model.
    if not unconstrained:
        if standard_lagrangian:
            common_multiplier = tf.Variable(np.ones((len(constraints), 1)),
                                            dtype='float32',
                                            name='common_multiplier')
            multiplier_weights = [common_multiplier]
        else:
            multiplier_model, multiplier_weights = create_multiplier_model(
                feature_dependent_multiplier=feature_dependent_multiplier,
                num_group_features=num_group_features,
                hidden_layers=multiplier_model_hidden_layers)

    # Set up lagrangian loss.
    def lagrangian_loss():
        # Separate out objective, constraints and proxy constraints.
        objective = problem.objective()
        constraints = problem.constraints()
        proxy_constraints = problem.proxy_constraints()

        # Set-up custom Lagrangian loss.
        multipliers = tf.abs(multiplier_model(groups_features_tensor))
        primal = objective + tf.stop_gradient(multipliers) * proxy_constraints
        dual = dual_scale * multipliers * tf.stop_gradient(constraints)

        return primal - dual

    # Standard lagrangian loss with a different multiplier for each constraint.
    def lagrangian_loss_standard():
        objective = problem.objective()
        constraints = problem.constraints()
        proxy_constraints = problem.proxy_constraints()

        # Set up standard lagrangian loss.
        multipliers = tf.abs(common_multiplier)
        primal = objective + tf.stop_gradient(multipliers) * proxy_constraints
        dual = dual_scale * multipliers * tf.stop_gradient(constraints)

        return primal - dual

    # Set up unconstrained loss.
    def unconstrained_loss():
        return problem.objective()

    # Create optimizer
    optimizer = tf.keras.optimizers.Adagrad(learning_rate=learning_rate)

    # List of variables to optimize (in this case, the model parameters).
    if unconstrained:
        var_list = model.trainable_weights
    else:
        var_list = model.trainable_weights + multiplier_weights

    # Set up counter for the minibatch stream
    batch_index = 0

    # Record objectives and constraint violations.
    results_dict = {
        'train.objectives': [],
        'train.batch_violations': [],
        'train.true_error_rates': [],
        'train.sampled_violations_max': [],
        'train.sampled_violations_90p': [],
        'train.proxy_group_violations': [],
        'train.true_group_violations': [],
        'val.true_error_rates': [],
        'val.sampled_violations_max': [],
        'val.sampled_violations_90p': [],
        'val.proxy_group_violations': [],
        'val.true_group_violations': [],
        'test.true_error_rates': [],
        'test.sampled_violations_max': [],
        'test.sampled_violations_90p': [],
        'test.proxy_group_violations': [],
        'test.true_group_violations': []
    }
    group_sample_epochs = 0

    # Loop over minibatches.
    groups_train = init_proxy_groups_train
    if not unconstrained:
        group_features = extract_group_features(
            groups_train,
            features_train,
            labels_train,
            group_features_type,
            num_group_clusters=num_group_features,
            kmeans_model=kmeans_model)
        groups_features_tensor.assign(group_features)
    for ii in range(num_steps):
        # Indices for current minibatch in the stream.
        batch_indices = np.arange(batch_index * batch_size,
                                  (batch_index + 1) * batch_size)

        # Check for the beginning of a new epoch.
        if resample_proxy_groups and not unconstrained:
            if new_epoch(batch_index,
                         batch_size=batch_size,
                         num_examples=num_examples):
                # Only resample proxy groups every epochs_per_resample epochs.
                if group_sample_epochs % epochs_per_resample == 0:
                    # Resample the group at the beginning of the epoch.
                    # Get groups_train from a ball around init_proxy_groups_train.
                    if uniform_groups:
                        groups_train = generate_proxy_groups_uniform(
                            num_examples, min_group_frac=min_group_frac)
                    elif use_noise_array:
                        groups_train = generate_proxy_groups_noise_array(
                            init_proxy_groups_train, noise_array=noise_array)
                    else:
                        groups_train = generate_proxy_groups_single_noise(
                            init_proxy_groups_train,
                            noise_param=FLAGS.noise_level)
                    # Recompute group features at the beginning of the epoch.
                    group_features = extract_group_features(
                        groups_train,
                        features_train,
                        labels_train,
                        group_features_type,
                        num_group_clusters=num_group_features,
                        kmeans_model=kmeans_model)
                    groups_features_tensor.assign(group_features)
                group_sample_epochs += 1

        # Cycle back to the beginning if we have reached the end of the stream.
        batch_indices = [ind % num_examples for ind in batch_indices]

        # Assign features, labels.
        features_tensor.assign(features_train[batch_indices, :])
        labels_tensor.assign(labels_train[batch_indices].reshape(-1, 1))
        groups_tensor.assign(groups_train[batch_indices])

        # Gradient update.
        with tf.control_dependencies(problem.update_ops()):
            if unconstrained:
                optimizer.minimize(unconstrained_loss, var_list=var_list)
            elif standard_lagrangian:
                optimizer.minimize(lagrangian_loss_standard, var_list=var_list)
            else:
                optimizer.minimize(lagrangian_loss, var_list=var_list)

        if (ii % skip_iterations == 0) or (ii == num_steps - 1):
            # Record metrics.
            results_dict['train.objectives'].append(
                problem.objective().numpy())
            if not unconstrained:
                results_dict['train.batch_violations'].append(
                    np.max(problem.constraints().numpy()))
            else:
                results_dict['train.batch_violations'].append(0)
            add_summary_viols_to_results_dict(
                train_df,
                model,
                results_dict,
                'train',
                feature_names=feature_names,
                label_name=label_name,
                proxy_columns=proxy_columns,
                protected_columns=protected_columns,
                epsilon=epsilon,
                n_resamples_per_candidate=n_resamples_per_candidate,
                use_noise_array=use_noise_array,
                noise_array=noise_array,
                uniform_groups=uniform_groups,
                min_group_frac=min_group_frac)
            add_summary_viols_to_results_dict(
                val_df,
                model,
                results_dict,
                'val',
                feature_names=feature_names,
                label_name=label_name,
                proxy_columns=proxy_columns,
                protected_columns=protected_columns,
                epsilon=epsilon,
                n_resamples_per_candidate=n_resamples_per_candidate,
                use_noise_array=use_noise_array,
                noise_array=noise_array,
                uniform_groups=uniform_groups,
                min_group_frac=min_group_frac)
            add_summary_viols_to_results_dict(
                test_df,
                model,
                results_dict,
                'test',
                feature_names=feature_names,
                label_name=label_name,
                proxy_columns=proxy_columns,
                protected_columns=protected_columns,
                epsilon=epsilon,
                n_resamples_per_candidate=n_resamples_per_candidate,
                use_noise_array=use_noise_array,
                noise_array=noise_array,
                uniform_groups=uniform_groups,
                min_group_frac=min_group_frac)

            print(
                '%d: batch obj: %.3f | batch viol: %.3f | true error: %.3f | sampled viol: %.3f | true group viol: %.3f'
                % (ii, results_dict['train.objectives'][-1],
                   results_dict['train.batch_violations'][-1],
                   results_dict['train.true_error_rates'][-1],
                   results_dict['train.sampled_violations_max'][-1],
                   results_dict['train.true_group_violations'][-1]))

        batch_index += 1
    return model, results_dict
Esempio n. 6
0
    def lagrangian_optimizer(train_set, epsilon=epsilon, learning_rate=0.1, 
                            learning_rate_constraint=0.1, loops=2000):
        tf.reset_default_graph()
        
        x_train, y_train, z_train = train_set
        num_examples = x_train.shape[0]
        dimension = x_train.shape[-1]
        
        # Data tensors.
        features_tensor = tf.constant(x_train.astype("float32"), name="features")
        labels_tensor = tf.constant(y_train.astype("float32"), name="labels")

        # Linear model.
        weights = tf.Variable(tf.zeros(dimension, dtype=tf.float32), 
                                name="weights")
        threshold = tf.Variable(0, name="threshold", dtype=tf.float32)
        predictions_tensor = (tf.tensordot(features_tensor, weights, axes=(1, 0))
                                + threshold)

        predictions_group0 = tf.boolean_mask(predictions_tensor, mask=(z_train < 1))
        num0 = np.sum(z_train < 1)
        predictions_group1 = tf.boolean_mask(predictions_tensor, mask=(z_train > 0))
        num1 = np.sum(z_train > 0)

        # Set up rates.
        context = tfco.rate_context(predictions_tensor, labels_tensor)
        true_positive_rate = tfco.true_positive_rate(context)
        true_negative_rate = tfco.true_negative_rate(context)
        pred_positive = tfco.positive_prediction_rate(context)

        # Set up slack variables.
        slack_tpr = tf.Variable(0.5, dtype=tf.float32)
        slack_tnr = tf.Variable(0.5, dtype=tf.float32)
        
        # Projection ops for slacks. #Don't know
        projection_ops = []
        projection_ops.append(
            tf.assign(slack_tpr, tf.clip_by_value(slack_tpr, 0.001, 0.999)))
        projection_ops.append(
            tf.assign(slack_tnr, tf.clip_by_value(slack_tnr, 0.001, 0.999)))
        
        # Set up 1 - G-mean objective.
        objective = tfco.wrap_rate(1.0 - tf.sqrt(slack_tpr * slack_tnr))

        # Set up slack constraints.
        constraints = []
        constraints.append(tfco.wrap_rate(slack_tpr) <= true_positive_rate)
        constraints.append(tfco.wrap_rate(slack_tnr) <= true_negative_rate)

        # Set up COV constraints.
        constraints.append(pred_positive <= epsilon)

        # Set up constraint optimization problem.
        problem = tfco.RateMinimizationProblem(objective, constraints)

        # Set up solver.
        optimizer = tf.train.AdamOptimizer(learning_rate)
        constraint_optimizer = tf.train.AdamOptimizer(learning_rate_constraint)
        lagrangian_optimizer = tfco.ProxyLagrangianOptimizerV1(
            optimizer=optimizer, constraint_optimizer=constraint_optimizer)
        train_op = lagrangian_optimizer.minimize(problem)

        # Start TF session and initialize variables.
        session = tf.Session()
        tf.set_random_seed(654321)  # Set random seed for reproducibility.
        session.run(tf.global_variables_initializer())

        # We maintain a list of objectives and model weights during training.
        objectives = []
        violations = []
        models = []
        
        # Perform  full gradient updates.
        for ii in xrange(loops):
            # Gradient update.
            session.run(train_op)
            # Projection.
            session.run(projection_ops)
            
            # Checkpoint once in 10 iterations.
            if ii % 100 == 0:
                # Model weights.
                model = [session.run(weights), session.run(threshold)]
                models.append(model)

                # Snapshot performace
                error, pp = evaluate_expected_results(
                    train_set, [model], [1.0])
                objectives.append(error)
                violations.append([pp - epsilon])

            # print("Step %d | G-mean error = %3f | COV violation = %.3f" % (
            #     ii, objectives[-1], max(violations[-1])))
            
        # Use the recorded objectives and constraints to find the best iterate.
        # Best model
        best_iterate = tfco.find_best_candidate_index(
            np.array(objectives), np.array(violations))
        best_model = models[best_iterate]
        
        # Stochastic model over a subset of classifiers.
        probabilities = tfco.find_best_candidate_distribution(
            np.array(objectives), np.array(violations))
        models_pruned = [models[i] for i in range(len(models)) if probabilities[i] > 0.0]
        probabilities_pruned = probabilities[probabilities > 0.0]

        # Stochastic model over all classifiers.
        probabilities_all = probabilities * 0.0 + 1.0 / len(probabilities)
            
        # Return Pruned models, Avg models, Best model
        results = {
            'stochastic': (models, probabilities_all),
            'pruned': (models_pruned, probabilities_pruned),
            'best': ([best_model[0]], best_model[1]),
            'objectives': objectives,
            'violations': violations
        }
        return results