def build_train_op(self, learning_rate, unconstrained=False): ctx = tfco.rate_context(self.predictions_tensor, self.labels_placeholder) positive_slice = ctx.subset(self.labels_placeholder > 0) overall_tpr = tfco.positive_prediction_rate(positive_slice) constraints = [] if not unconstrained: for placeholder in self.protected_placeholders: slice_tpr = tfco.positive_prediction_rate(ctx.subset((placeholder > 0) & (self.labels_placeholder > 0))) constraints.append(slice_tpr <= overall_tpr + self.tpr_max_diff) mp = tfco.RateMinimizationProblem(tfco.error_rate(ctx), constraints) opt = tfco.ProxyLagrangianOptimizer(tf.train.AdamOptimizer(learning_rate)) self.train_op = opt.minimize(minimization_problem=mp) return self.train_op
def build_train_op_ctx(self, learning_rate=.001): # We initialize the constrained problem using the rate helpers. ctx = tfco.rate_context(self.predictions_tensor, self.labels_placeholder) positive_slice = ctx.subset(self.labels_placeholder > 0) overall_tpr = tfco.positive_prediction_rate(positive_slice) constraints = [] for placeholder in self.protected_placeholders: slice_tpr = tfco.positive_prediction_rate( ctx.subset((placeholder > 0) & (self.labels_placeholder > 0))) tmp = 2 * (overall_tpr - slice_tpr) constraints.append(tmp) constraint = sum(constraints) <= 0.2 * self.didi_tr mp = tfco.RateMinimizationProblem(tfco.error_rate(ctx), [constraint]) opt = tfco.ProxyLagrangianOptimizerV1( tf.train.AdamOptimizer(learning_rate)) self.train_op = opt.minimize(mp) return self.train_op
def build_train_op_tfco(self, learning_rate=0.1): ctx = tfco.multiclass_rate_context(self.num_classes, self.predictions_tensor, self.labels_placeholder) # positive_slice = ctx.subset(self.labels_placeholder > 0) # overall_tpr = tfco.positive_prediction_rate(positive_slice) constraints = [] for c in range(self.num_classes): pos_rate = tfco.positive_prediction_rate(ctx, c) constraints.append(pos_rate <= (1.05 / self.num_classes)) mp = tfco.RateMinimizationProblem(tfco.error_rate(ctx), constraints) self.opt = tfco.ProxyLagrangianOptimizerV1(tf.train.AdamOptimizer(learning_rate)) self.train_op = self.opt.minimize(mp) return self.train_op
def build_train_op(self, learning_rate, unconstrained=False): ctx = tfco.rate_context(self.predictions_tensor, self.labels_placeholder) positive_slice = ctx.subset(self.labels_placeholder > 0) overall_tpr = tfco.positive_prediction_rate(positive_slice) constraints = [] # add constraints if not unconstrained: for constraint in self.constraints: print(constraint) if len(constraint) == 1: placeholder = self.protected_placeholders_dict[ constraint[0]] slice_tpr = tfco.positive_prediction_rate( ctx.subset((placeholder > 0) & (self.labels_placeholder > 0))) elif len(constraint) == 2: placeholder0 = self.protected_placeholders_dict[ constraint[0]] placeholder1 = self.protected_placeholders_dict[ constraint[1]] slice_tpr = tfco.positive_prediction_rate( ctx.subset((placeholder0 > 0) & (placeholder1 > 0) & (self.labels_placeholder > 0))) constraints.append( slice_tpr >= overall_tpr - self.tpr_max_diff) mp = tfco.RateMinimizationProblem(tfco.error_rate(ctx), constraints) opt = tfco.ProxyLagrangianOptimizer( tf.train.AdamOptimizer(learning_rate)) self.train_op = opt.minimize(minimization_problem=mp) return self.train_op
def train_helper(train_df, val_df, test_df, feature_names, label_name, proxy_columns, protected_columns, feature_dependent_multiplier=True, learning_rate=0.1, batch_size=None, skip_iterations=100, num_steps=1000, dual_scale=1.0, epsilon=0.03, unconstrained=False, standard_lagrangian=False, use_noise_array=True, resample_proxy_groups=True, epochs_per_resample=1, n_resamples_per_candidate=10, group_features_type='full_group_vec', num_group_clusters=100, multiplier_model_hidden_layers=[100], uniform_groups=False, min_group_frac=0.05): """Helper function for training a model.""" tf.keras.backend.clear_session() # init_proxy_groups_train is the initial noisy group assignments. features_train, labels_train, init_proxy_groups_train, _ = extract_features( train_df, feature_names=feature_names, label_name=label_name, proxy_group_names=proxy_columns, true_group_names=protected_columns, uniform_groups=uniform_groups, min_group_frac=min_group_frac) num_groups = init_proxy_groups_train.shape[1] noise_array = None if use_noise_array and not uniform_groups: noise_array = get_noise_array(train_df, protected_columns=protected_columns, proxy_columns=proxy_columns, num_groups=num_groups) num_examples = len(train_df) num_features = len(feature_names) if batch_size is None: batch_size = num_examples # Get number of group features. kmeans_model = None num_group_features = None if group_features_type == 'full_group_vec': num_group_features = num_examples elif group_features_type == 'size_alone': num_group_features = 1 elif group_features_type == 'size_and_pr': num_group_features = 2 elif group_features_type == 'avg_features': num_group_features = num_features + 1 elif group_features_type == 'kmeans': kmeans_model = KMeans(n_clusters=num_group_clusters, random_state=0).fit(features_train) num_group_features = num_group_clusters # Features features_tensor = tf.Variable(np.zeros((batch_size, num_features), dtype='float32'), name='features') # Labels labels_tensor = tf.Variable(np.zeros((batch_size, 1), dtype='float32'), name='labels') # Protected groups # We will resample these groups every epoch during training. groups_tensor = tf.Variable(np.zeros((batch_size, num_groups), dtype='float32'), name='groups') # Protected group features. groups_features_tensor = tf.Variable(np.zeros( (num_groups, num_group_features), dtype='float32'), name='group_features') # Linear model with no hidden layers. layers = [] layers.append(tf.keras.Input(shape=(num_features, ))) layers.append(tf.keras.layers.Dense(1)) # Keras model. model = tf.keras.Sequential(layers) # Set up rate minimization problem. # We set up a constrained optimization problem, where we *minimize the overall # error rate subject to the TPR for individual groups being with an epsilon # of the overall TPR. def predictions(): return model(features_tensor) context = tfco.rate_context(predictions, labels=lambda: labels_tensor) overall_error = tfco.error_rate(context) constraints = [] if not unconstrained: # Add group rate constraints. pos_context = context.subset(lambda: labels_tensor > 0) overall_tpr = tfco.positive_prediction_rate(pos_context) for jj in range(num_groups): group_pos_context = pos_context.subset( lambda kk=jj: groups_tensor[:, kk] > 0) group_tpr = tfco.positive_prediction_rate(group_pos_context) constraints.append(group_tpr >= overall_tpr - epsilon) problem = tfco.RateMinimizationProblem(overall_error, constraints) # Set up multiplier model. if not unconstrained: if standard_lagrangian: common_multiplier = tf.Variable(np.ones((len(constraints), 1)), dtype='float32', name='common_multiplier') multiplier_weights = [common_multiplier] else: multiplier_model, multiplier_weights = create_multiplier_model( feature_dependent_multiplier=feature_dependent_multiplier, num_group_features=num_group_features, hidden_layers=multiplier_model_hidden_layers) # Set up lagrangian loss. def lagrangian_loss(): # Separate out objective, constraints and proxy constraints. objective = problem.objective() constraints = problem.constraints() proxy_constraints = problem.proxy_constraints() # Set-up custom Lagrangian loss. multipliers = tf.abs(multiplier_model(groups_features_tensor)) primal = objective + tf.stop_gradient(multipliers) * proxy_constraints dual = dual_scale * multipliers * tf.stop_gradient(constraints) return primal - dual # Standard lagrangian loss with a different multiplier for each constraint. def lagrangian_loss_standard(): objective = problem.objective() constraints = problem.constraints() proxy_constraints = problem.proxy_constraints() # Set up standard lagrangian loss. multipliers = tf.abs(common_multiplier) primal = objective + tf.stop_gradient(multipliers) * proxy_constraints dual = dual_scale * multipliers * tf.stop_gradient(constraints) return primal - dual # Set up unconstrained loss. def unconstrained_loss(): return problem.objective() # Create optimizer optimizer = tf.keras.optimizers.Adagrad(learning_rate=learning_rate) # List of variables to optimize (in this case, the model parameters). if unconstrained: var_list = model.trainable_weights else: var_list = model.trainable_weights + multiplier_weights # Set up counter for the minibatch stream batch_index = 0 # Record objectives and constraint violations. results_dict = { 'train.objectives': [], 'train.batch_violations': [], 'train.true_error_rates': [], 'train.sampled_violations_max': [], 'train.sampled_violations_90p': [], 'train.proxy_group_violations': [], 'train.true_group_violations': [], 'val.true_error_rates': [], 'val.sampled_violations_max': [], 'val.sampled_violations_90p': [], 'val.proxy_group_violations': [], 'val.true_group_violations': [], 'test.true_error_rates': [], 'test.sampled_violations_max': [], 'test.sampled_violations_90p': [], 'test.proxy_group_violations': [], 'test.true_group_violations': [] } group_sample_epochs = 0 # Loop over minibatches. groups_train = init_proxy_groups_train if not unconstrained: group_features = extract_group_features( groups_train, features_train, labels_train, group_features_type, num_group_clusters=num_group_features, kmeans_model=kmeans_model) groups_features_tensor.assign(group_features) for ii in range(num_steps): # Indices for current minibatch in the stream. batch_indices = np.arange(batch_index * batch_size, (batch_index + 1) * batch_size) # Check for the beginning of a new epoch. if resample_proxy_groups and not unconstrained: if new_epoch(batch_index, batch_size=batch_size, num_examples=num_examples): # Only resample proxy groups every epochs_per_resample epochs. if group_sample_epochs % epochs_per_resample == 0: # Resample the group at the beginning of the epoch. # Get groups_train from a ball around init_proxy_groups_train. if uniform_groups: groups_train = generate_proxy_groups_uniform( num_examples, min_group_frac=min_group_frac) elif use_noise_array: groups_train = generate_proxy_groups_noise_array( init_proxy_groups_train, noise_array=noise_array) else: groups_train = generate_proxy_groups_single_noise( init_proxy_groups_train, noise_param=FLAGS.noise_level) # Recompute group features at the beginning of the epoch. group_features = extract_group_features( groups_train, features_train, labels_train, group_features_type, num_group_clusters=num_group_features, kmeans_model=kmeans_model) groups_features_tensor.assign(group_features) group_sample_epochs += 1 # Cycle back to the beginning if we have reached the end of the stream. batch_indices = [ind % num_examples for ind in batch_indices] # Assign features, labels. features_tensor.assign(features_train[batch_indices, :]) labels_tensor.assign(labels_train[batch_indices].reshape(-1, 1)) groups_tensor.assign(groups_train[batch_indices]) # Gradient update. with tf.control_dependencies(problem.update_ops()): if unconstrained: optimizer.minimize(unconstrained_loss, var_list=var_list) elif standard_lagrangian: optimizer.minimize(lagrangian_loss_standard, var_list=var_list) else: optimizer.minimize(lagrangian_loss, var_list=var_list) if (ii % skip_iterations == 0) or (ii == num_steps - 1): # Record metrics. results_dict['train.objectives'].append( problem.objective().numpy()) if not unconstrained: results_dict['train.batch_violations'].append( np.max(problem.constraints().numpy())) else: results_dict['train.batch_violations'].append(0) add_summary_viols_to_results_dict( train_df, model, results_dict, 'train', feature_names=feature_names, label_name=label_name, proxy_columns=proxy_columns, protected_columns=protected_columns, epsilon=epsilon, n_resamples_per_candidate=n_resamples_per_candidate, use_noise_array=use_noise_array, noise_array=noise_array, uniform_groups=uniform_groups, min_group_frac=min_group_frac) add_summary_viols_to_results_dict( val_df, model, results_dict, 'val', feature_names=feature_names, label_name=label_name, proxy_columns=proxy_columns, protected_columns=protected_columns, epsilon=epsilon, n_resamples_per_candidate=n_resamples_per_candidate, use_noise_array=use_noise_array, noise_array=noise_array, uniform_groups=uniform_groups, min_group_frac=min_group_frac) add_summary_viols_to_results_dict( test_df, model, results_dict, 'test', feature_names=feature_names, label_name=label_name, proxy_columns=proxy_columns, protected_columns=protected_columns, epsilon=epsilon, n_resamples_per_candidate=n_resamples_per_candidate, use_noise_array=use_noise_array, noise_array=noise_array, uniform_groups=uniform_groups, min_group_frac=min_group_frac) print( '%d: batch obj: %.3f | batch viol: %.3f | true error: %.3f | sampled viol: %.3f | true group viol: %.3f' % (ii, results_dict['train.objectives'][-1], results_dict['train.batch_violations'][-1], results_dict['train.true_error_rates'][-1], results_dict['train.sampled_violations_max'][-1], results_dict['train.true_group_violations'][-1])) batch_index += 1 return model, results_dict
def lagrangian_optimizer(train_set, epsilon=epsilon, learning_rate=0.1, learning_rate_constraint=0.1, loops=2000): tf.reset_default_graph() x_train, y_train, z_train = train_set num_examples = x_train.shape[0] dimension = x_train.shape[-1] # Data tensors. features_tensor = tf.constant(x_train.astype("float32"), name="features") labels_tensor = tf.constant(y_train.astype("float32"), name="labels") # Linear model. weights = tf.Variable(tf.zeros(dimension, dtype=tf.float32), name="weights") threshold = tf.Variable(0, name="threshold", dtype=tf.float32) predictions_tensor = (tf.tensordot(features_tensor, weights, axes=(1, 0)) + threshold) predictions_group0 = tf.boolean_mask(predictions_tensor, mask=(z_train < 1)) num0 = np.sum(z_train < 1) predictions_group1 = tf.boolean_mask(predictions_tensor, mask=(z_train > 0)) num1 = np.sum(z_train > 0) # Set up rates. context = tfco.rate_context(predictions_tensor, labels_tensor) true_positive_rate = tfco.true_positive_rate(context) true_negative_rate = tfco.true_negative_rate(context) pred_positive = tfco.positive_prediction_rate(context) # Set up slack variables. slack_tpr = tf.Variable(0.5, dtype=tf.float32) slack_tnr = tf.Variable(0.5, dtype=tf.float32) # Projection ops for slacks. #Don't know projection_ops = [] projection_ops.append( tf.assign(slack_tpr, tf.clip_by_value(slack_tpr, 0.001, 0.999))) projection_ops.append( tf.assign(slack_tnr, tf.clip_by_value(slack_tnr, 0.001, 0.999))) # Set up 1 - G-mean objective. objective = tfco.wrap_rate(1.0 - tf.sqrt(slack_tpr * slack_tnr)) # Set up slack constraints. constraints = [] constraints.append(tfco.wrap_rate(slack_tpr) <= true_positive_rate) constraints.append(tfco.wrap_rate(slack_tnr) <= true_negative_rate) # Set up COV constraints. constraints.append(pred_positive <= epsilon) # Set up constraint optimization problem. problem = tfco.RateMinimizationProblem(objective, constraints) # Set up solver. optimizer = tf.train.AdamOptimizer(learning_rate) constraint_optimizer = tf.train.AdamOptimizer(learning_rate_constraint) lagrangian_optimizer = tfco.ProxyLagrangianOptimizerV1( optimizer=optimizer, constraint_optimizer=constraint_optimizer) train_op = lagrangian_optimizer.minimize(problem) # Start TF session and initialize variables. session = tf.Session() tf.set_random_seed(654321) # Set random seed for reproducibility. session.run(tf.global_variables_initializer()) # We maintain a list of objectives and model weights during training. objectives = [] violations = [] models = [] # Perform full gradient updates. for ii in xrange(loops): # Gradient update. session.run(train_op) # Projection. session.run(projection_ops) # Checkpoint once in 10 iterations. if ii % 100 == 0: # Model weights. model = [session.run(weights), session.run(threshold)] models.append(model) # Snapshot performace error, pp = evaluate_expected_results( train_set, [model], [1.0]) objectives.append(error) violations.append([pp - epsilon]) # print("Step %d | G-mean error = %3f | COV violation = %.3f" % ( # ii, objectives[-1], max(violations[-1]))) # Use the recorded objectives and constraints to find the best iterate. # Best model best_iterate = tfco.find_best_candidate_index( np.array(objectives), np.array(violations)) best_model = models[best_iterate] # Stochastic model over a subset of classifiers. probabilities = tfco.find_best_candidate_distribution( np.array(objectives), np.array(violations)) models_pruned = [models[i] for i in range(len(models)) if probabilities[i] > 0.0] probabilities_pruned = probabilities[probabilities > 0.0] # Stochastic model over all classifiers. probabilities_all = probabilities * 0.0 + 1.0 / len(probabilities) # Return Pruned models, Avg models, Best model results = { 'stochastic': (models, probabilities_all), 'pruned': (models_pruned, probabilities_pruned), 'best': ([best_model[0]], best_model[1]), 'objectives': objectives, 'violations': violations } return results