def print_results(test_set, models, probabilities, title): # Prints and returns F-measure and constraint violation on test set. x_test, y_test, z_test = test_set fm = evaluation.expected_fmeasure(x_test, y_test, models, probabilities) fm0, fm1 = evaluation.expected_group_fmeasures(x_test, y_test, z_test, models, probabilities) print(title + ": %.3f (%.3f)" % (fm, fm0 - fm1)) return fm, fm0 - fm1
def lagrangian_optimizer_fmeasure( train_set, epsilon, learning_rate, learning_rate_constraint, loops): """Implements surrogate-based Lagrangian optimizer (Algorithm 3). Specifically solves: max F-measure s.t. F-measure(group1) >= F-measure(group0) - epsilon. Args: train_set: (features, labels, groups) epsilon: float, constraint slack. learning_rate: float, learning rate for model parameters. learning_rate_constraint: float, learning rate for Lagrange multipliers. loops: int, number of iterations. Returns: stochastic_model containing list of models and probabilities, determistic_model """ x_train, y_train, z_train = train_set dimension = x_train.shape[-1] tf.reset_default_graph() # Data tensors. features_tensor = tf.constant(x_train.astype("float32"), name="features") labels_tensor = tf.constant(y_train.astype("float32"), name="labels") # Linear model. weights = tf.Variable(tf.zeros(dimension, dtype=tf.float32), name="weights") threshold = tf.Variable(0, name="threshold", dtype=tf.float32) predictions_tensor = (tf.tensordot(features_tensor, weights, axes=(1, 0)) + threshold) # Contexts. context = tfco.rate_context(predictions_tensor, labels_tensor) context0 = context.subset(z_train < 1) context1 = context.subset(z_train > 0) # F-measure rates. fm_overall = tfco.f_score_lower_bound(context) fm1 = tfco.f_score_lower_bound(context1) fm0 = tfco.f_score_upper_bound(context0) # Rate minimization. problem = tfco.RateMinimizationProblem(-fm_overall, [fm0 <= fm1 + epsilon]) # Optimizer. optimizer = tfco.LagrangianOptimizer( tf.train.AdamOptimizer(learning_rate=learning_rate), constraint_optimizer=tf.train.AdamOptimizer( learning_rate=learning_rate_constraint)) train_op = optimizer.minimize(problem) # Start TF session and initialize variables. session = tf.Session() session.run(tf.global_variables_initializer()) # We maintain a list of objectives and model weights during training. objectives = [] violations = [] models = [] # Perform full gradient updates. for ii in range(loops): # Gradient updates. session.run(train_op) # Checkpoint once in 10 iterations. if ii % 10 == 0: # Model weights. model = [session.run(weights), session.run(threshold)] models.append(model) # Objective. objective = -evaluation.expected_fmeasure( x_train, y_train, [model], [1.0]) objectives.append(objective) # Violation. fmeasure0, fmeasure1 = evaluation.expected_group_fmeasures( x_train, y_train, z_train, [model], [1.0]) violations.append([fmeasure0 - fmeasure1 - epsilon]) # Use the recorded objectives and constraints to find the best iterate. best_iterate = tfco.find_best_candidate_index( np.array(objectives), np.array(violations)) deterministic_model = models[best_iterate] # Use shrinking to find a sparse distribution over iterates. probabilities = tfco.find_best_candidate_distribution( np.array(objectives), np.array(violations)) models_pruned = [models[i] for i in range(len(models)) if probabilities[i] > 0.0] probabilities_pruned = probabilities[probabilities > 0.0] return (models_pruned, probabilities_pruned), deterministic_model
def run_experiment(): """Run experiments comparing unconstrained and constrained methods.""" # Range of hyper-parameters for unconstrained and constrained optimization. lr_range_unc = [0.005, 0.01, 0.05, 0.1, 0.5, 1.0, 5.0, 10.0] lr_range_con = [0.001, 0.01, 0.1, 1.0] # Load dataset. with open(FLAGS.data_file, "rb") as f: train_set, vali_set, test_set = np.load(f, allow_pickle=True, fix_imports=True) x_vali, y_vali, z_vali = vali_set ################################################## # Unconstrained Error Optimization. print("Running unconstrained error optimization") models_unc = [] param_objectives_unc = [] # Find best learning rate. for lr_model in lr_range_unc: model = methods.error_rate_optimizer(train_set, learning_rate=lr_model, loops=FLAGS.loops_unc) error = evaluation.expected_error_rate(x_vali, y_vali, [model], [1.0]) param_objectives_unc.append(error) models_unc.append(model) best_param_index_unc = np.argmin(param_objectives_unc) model_er = models_unc[best_param_index_unc] print() ################################################## # Post-shift F1 Optimization. print("Running unconstrained F-measure optimization (Post-shift)") # First train logistic regression model. models_log = [] param_objectives_log = [] # Find best learning rate. for lr_model in lr_range_unc: model = methods.logistic_regression(train_set, learning_rate=lr_model, loops=FLAGS.loops_unc) loss = evaluation.cross_entropy_loss(x_vali, y_vali, model[0], model[1]) param_objectives_log.append(loss) models_log.append(model) best_param_index_log = np.argmin(param_objectives_log) logreg_model = models_log[best_param_index_log] # Post-shift logistic regression model to optimize F-measure. model_ps = methods.post_shift_fmeasure(vali_set, logreg_model) print() ################################################## # Surrogate-based Lagrangian Optimizer for Sums-of-ratios (Algorithm 3). print("Running constrained Lagrangian optimization (Algorithm 3)") # Maintain list of models, objectives and violations for hyper-parameters. stochastic_models_list = [] deterministic_models_list = [] param_objectives_con = [] param_violations_con = [] # Find best learning rates for model parameters and Lagrange multipliers. for lr_model in lr_range_con: for lr_constraint in lr_range_con: stochastic_model, deterministic_model = ( methods.lagrangian_optimizer_fmeasure( train_set, learning_rate=lr_model, learning_rate_constraint=lr_constraint, loops=FLAGS.loops_con, epsilon=FLAGS.epsilon)) stochastic_models_list.append(stochastic_model) deterministic_models_list.append(deterministic_model) # Record objective and constraint violations for stochastic model. fm = -evaluation.expected_fmeasure( x_vali, y_vali, stochastic_model[0], stochastic_model[1]) param_objectives_con.append(fm) fm0, fm1 = evaluation.expected_group_fmeasures( x_vali, y_vali, z_vali, stochastic_model[0], stochastic_model[1]) param_violations_con.append([fm0 - fm1 - FLAGS.epsilon]) print("Parameters (%.3f, %.3f): %.3f (%.3f)" % (lr_model, lr_constraint, -param_objectives_con[-1], max(param_violations_con[-1]))) # Best param. best_param_index_con = tfco.find_best_candidate_index( np.array(param_objectives_con), np.array(param_violations_con)) stochastic_model_con = stochastic_models_list[best_param_index_con] deterministic_model_con = deterministic_models_list[best_param_index_con] print() # Print summary of performance on test set. results = {} results["UncError"] = print_results(test_set, [model_er], [1.0], "UncError") results["UncF1"] = print_results(test_set, [model_ps], [1.0], "UncF1") results["Stochastic"] = print_results(test_set, stochastic_model_con[0], stochastic_model_con[1], "Constrained (Stochastic)") results["Deterministic"] = print_results(test_set, [deterministic_model_con], [1.0], "Constrained (Deterministic)")