Ejemplo n.º 1
0
def print_results(test_set, models, probabilities, title):
    # Prints and returns F-measure and constraint violation on test set.
    x_test, y_test, z_test = test_set

    fm = evaluation.expected_fmeasure(x_test, y_test, models, probabilities)
    fm0, fm1 = evaluation.expected_group_fmeasures(x_test, y_test, z_test,
                                                   models, probabilities)

    print(title + ": %.3f (%.3f)" % (fm, fm0 - fm1))
    return fm, fm0 - fm1
Ejemplo n.º 2
0
def lagrangian_optimizer_fmeasure(
    train_set, epsilon, learning_rate, learning_rate_constraint, loops):
  """Implements surrogate-based Lagrangian optimizer (Algorithm 3).

  Specifically solves:
    max F-measure s.t. F-measure(group1) >= F-measure(group0) - epsilon.

  Args:
    train_set: (features, labels, groups)
    epsilon: float, constraint slack.
    learning_rate: float, learning rate for model parameters.
    learning_rate_constraint: float, learning rate for Lagrange multipliers.
    loops: int, number of iterations.

  Returns:
    stochastic_model containing list of models and probabilities,
    determistic_model
  """
  x_train, y_train, z_train = train_set
  dimension = x_train.shape[-1]

  tf.reset_default_graph()

  # Data tensors.
  features_tensor = tf.constant(x_train.astype("float32"), name="features")
  labels_tensor = tf.constant(y_train.astype("float32"), name="labels")

  # Linear model.
  weights = tf.Variable(tf.zeros(dimension, dtype=tf.float32),
                        name="weights")
  threshold = tf.Variable(0, name="threshold", dtype=tf.float32)
  predictions_tensor = (tf.tensordot(features_tensor, weights, axes=(1, 0))
                        + threshold)

  # Contexts.
  context = tfco.rate_context(predictions_tensor, labels_tensor)
  context0 = context.subset(z_train < 1)
  context1 = context.subset(z_train > 0)

  # F-measure rates.
  fm_overall = tfco.f_score_lower_bound(context)
  fm1 = tfco.f_score_lower_bound(context1)
  fm0 = tfco.f_score_upper_bound(context0)

  # Rate minimization.
  problem = tfco.RateMinimizationProblem(-fm_overall, [fm0 <= fm1 + epsilon])

  # Optimizer.
  optimizer = tfco.LagrangianOptimizer(
      tf.train.AdamOptimizer(learning_rate=learning_rate),
      constraint_optimizer=tf.train.AdamOptimizer(
          learning_rate=learning_rate_constraint))
  train_op = optimizer.minimize(problem)

  # Start TF session and initialize variables.
  session = tf.Session()
  session.run(tf.global_variables_initializer())

  # We maintain a list of objectives and model weights during training.
  objectives = []
  violations = []
  models = []

  # Perform full gradient updates.
  for ii in range(loops):

    # Gradient updates.
    session.run(train_op)

    # Checkpoint once in 10 iterations.
    if ii % 10 == 0:
      # Model weights.
      model = [session.run(weights), session.run(threshold)]
      models.append(model)

      # Objective.
      objective = -evaluation.expected_fmeasure(
          x_train, y_train, [model], [1.0])
      objectives.append(objective)

      # Violation.
      fmeasure0, fmeasure1 = evaluation.expected_group_fmeasures(
          x_train, y_train, z_train, [model], [1.0])
      violations.append([fmeasure0 - fmeasure1 - epsilon])

  # Use the recorded objectives and constraints to find the best iterate.
  best_iterate = tfco.find_best_candidate_index(
      np.array(objectives), np.array(violations))
  deterministic_model = models[best_iterate]

  # Use shrinking to find a sparse distribution over iterates.
  probabilities = tfco.find_best_candidate_distribution(
      np.array(objectives), np.array(violations))
  models_pruned = [models[i] for i in range(len(models)) if
                   probabilities[i] > 0.0]
  probabilities_pruned = probabilities[probabilities > 0.0]

  return (models_pruned, probabilities_pruned), deterministic_model
Ejemplo n.º 3
0
def run_experiment():
    """Run experiments comparing unconstrained and constrained methods."""
    # Range of hyper-parameters for unconstrained and constrained optimization.
    lr_range_unc = [0.005, 0.01, 0.05, 0.1, 0.5, 1.0, 5.0, 10.0]
    lr_range_con = [0.001, 0.01, 0.1, 1.0]

    # Load dataset.
    with open(FLAGS.data_file, "rb") as f:
        train_set, vali_set, test_set = np.load(f,
                                                allow_pickle=True,
                                                fix_imports=True)
    x_vali, y_vali, z_vali = vali_set

    ##################################################
    # Unconstrained Error Optimization.
    print("Running unconstrained error optimization")

    models_unc = []
    param_objectives_unc = []

    # Find best learning rate.
    for lr_model in lr_range_unc:
        model = methods.error_rate_optimizer(train_set,
                                             learning_rate=lr_model,
                                             loops=FLAGS.loops_unc)
        error = evaluation.expected_error_rate(x_vali, y_vali, [model], [1.0])
        param_objectives_unc.append(error)
        models_unc.append(model)

    best_param_index_unc = np.argmin(param_objectives_unc)
    model_er = models_unc[best_param_index_unc]
    print()

    ##################################################
    # Post-shift F1 Optimization.
    print("Running unconstrained F-measure optimization (Post-shift)")

    # First train logistic regression model.
    models_log = []
    param_objectives_log = []

    # Find best learning rate.
    for lr_model in lr_range_unc:
        model = methods.logistic_regression(train_set,
                                            learning_rate=lr_model,
                                            loops=FLAGS.loops_unc)
        loss = evaluation.cross_entropy_loss(x_vali, y_vali, model[0],
                                             model[1])
        param_objectives_log.append(loss)
        models_log.append(model)

    best_param_index_log = np.argmin(param_objectives_log)
    logreg_model = models_log[best_param_index_log]

    # Post-shift logistic regression model to optimize F-measure.
    model_ps = methods.post_shift_fmeasure(vali_set, logreg_model)
    print()

    ##################################################
    # Surrogate-based Lagrangian Optimizer for Sums-of-ratios (Algorithm 3).
    print("Running constrained Lagrangian optimization (Algorithm 3)")

    # Maintain list of models, objectives and violations for hyper-parameters.
    stochastic_models_list = []
    deterministic_models_list = []
    param_objectives_con = []
    param_violations_con = []

    # Find best learning rates for model parameters and Lagrange multipliers.
    for lr_model in lr_range_con:
        for lr_constraint in lr_range_con:
            stochastic_model, deterministic_model = (
                methods.lagrangian_optimizer_fmeasure(
                    train_set,
                    learning_rate=lr_model,
                    learning_rate_constraint=lr_constraint,
                    loops=FLAGS.loops_con,
                    epsilon=FLAGS.epsilon))
            stochastic_models_list.append(stochastic_model)
            deterministic_models_list.append(deterministic_model)

            # Record objective and constraint violations for stochastic model.
            fm = -evaluation.expected_fmeasure(
                x_vali, y_vali, stochastic_model[0], stochastic_model[1])
            param_objectives_con.append(fm)

            fm0, fm1 = evaluation.expected_group_fmeasures(
                x_vali, y_vali, z_vali, stochastic_model[0],
                stochastic_model[1])
            param_violations_con.append([fm0 - fm1 - FLAGS.epsilon])

            print("Parameters (%.3f, %.3f): %.3f (%.3f)" %
                  (lr_model, lr_constraint, -param_objectives_con[-1],
                   max(param_violations_con[-1])))

    # Best param.
    best_param_index_con = tfco.find_best_candidate_index(
        np.array(param_objectives_con), np.array(param_violations_con))

    stochastic_model_con = stochastic_models_list[best_param_index_con]
    deterministic_model_con = deterministic_models_list[best_param_index_con]
    print()

    # Print summary of performance on test set.
    results = {}
    results["UncError"] = print_results(test_set, [model_er], [1.0],
                                        "UncError")
    results["UncF1"] = print_results(test_set, [model_ps], [1.0], "UncF1")
    results["Stochastic"] = print_results(test_set, stochastic_model_con[0],
                                          stochastic_model_con[1],
                                          "Constrained (Stochastic)")
    results["Deterministic"] = print_results(test_set,
                                             [deterministic_model_con], [1.0],
                                             "Constrained (Deterministic)")