Ejemplo n.º 1
0
def error_rate_optimizer(train_set, learning_rate, loops):
  """Returns a model that optimizes the hinge loss."""
  x_train, y_train, _ = train_set
  dimension = x_train.shape[-1]

  tf.reset_default_graph()

  # Data tensors.
  features_tensor = tf.constant(x_train.astype("float32"), name="features")
  labels_tensor = tf.constant(y_train.astype("float32"), name="labels")

  # Linear model.
  weights = tf.Variable(tf.zeros(dimension, dtype=tf.float32),
                        name="weights")
  threshold = tf.Variable(0, name="threshold", dtype=tf.float32)
  predictions_tensor = (tf.tensordot(features_tensor, weights, axes=(1, 0))
                        + threshold)

  # Set up hinge loss objective.
  objective = tf.losses.hinge_loss(labels=labels_tensor,
                                   logits=predictions_tensor)

  # Set up the optimizer and get `train_op` for gradient updates.
  solver = tf.train.AdamOptimizer(learning_rate=learning_rate)
  train_op = solver.minimize(objective)

  # Start TF session and initialize variables.
  session = tf.Session()
  session.run(tf.global_variables_initializer())

  # We maintain a list of objectives and model weights during training.
  objectives = []
  models = []

  # Perform full gradient updates.
  for ii in range(loops):
    # Gradient updates.
    session.run(train_op)

    # Checkpoint once in 10 iterations.
    if ii % 10 == 0:
      # Model weights.
      model = [session.run(weights), session.run(threshold)]
      models.append(model)

      # Objective.
      objective = evaluation.expected_error_rate(
          x_train, y_train, [model], [1.0])
      objectives.append(objective)

  # Use the recorded objectives and constraints to find the best iterate.
  best_iterate = np.argmin(objectives)
  best_model = models[best_iterate]

  return best_model
Ejemplo n.º 2
0
def run_experiment():
    """Run experiments comparing unconstrained and constrained methods."""
    # Range of hyper-parameters for unconstrained and constrained optimization.
    lr_range_unc = [0.005, 0.01, 0.05, 0.1, 0.5, 1.0, 5.0, 10.0]
    lr_range_con = [0.001, 0.01, 0.1, 1.0]

    # Load dataset.
    with open(FLAGS.data_file, "rb") as f:
        train_set, vali_set, test_set = np.load(f,
                                                allow_pickle=True,
                                                fix_imports=True)
    x_vali, y_vali, z_vali = vali_set

    ##################################################
    # Unconstrained Error Optimization.
    print("Running unconstrained error optimization")

    models_unc = []
    param_objectives_unc = []

    # Find best learning rate.
    for lr_model in lr_range_unc:
        model = methods.error_rate_optimizer(train_set,
                                             learning_rate=lr_model,
                                             loops=FLAGS.loops_unc)
        error = evaluation.expected_error_rate(x_vali, y_vali, [model], [1.0])
        param_objectives_unc.append(error)
        models_unc.append(model)

    best_param_index_unc = np.argmin(param_objectives_unc)
    model_er = models_unc[best_param_index_unc]
    print()

    ##################################################
    # Post-shift F1 Optimization.
    print("Running unconstrained F-measure optimization (Post-shift)")

    # First train logistic regression model.
    models_log = []
    param_objectives_log = []

    # Find best learning rate.
    for lr_model in lr_range_unc:
        model = methods.logistic_regression(train_set,
                                            learning_rate=lr_model,
                                            loops=FLAGS.loops_unc)
        loss = evaluation.cross_entropy_loss(x_vali, y_vali, model[0],
                                             model[1])
        param_objectives_log.append(loss)
        models_log.append(model)

    best_param_index_log = np.argmin(param_objectives_log)
    logreg_model = models_log[best_param_index_log]

    # Post-shift logistic regression model to optimize F-measure.
    model_ps = methods.post_shift_fmeasure(vali_set, logreg_model)
    print()

    ##################################################
    # Surrogate-based Lagrangian Optimizer for Sums-of-ratios (Algorithm 3).
    print("Running constrained Lagrangian optimization (Algorithm 3)")

    # Maintain list of models, objectives and violations for hyper-parameters.
    stochastic_models_list = []
    deterministic_models_list = []
    param_objectives_con = []
    param_violations_con = []

    # Find best learning rates for model parameters and Lagrange multipliers.
    for lr_model in lr_range_con:
        for lr_constraint in lr_range_con:
            stochastic_model, deterministic_model = (
                methods.lagrangian_optimizer_fmeasure(
                    train_set,
                    learning_rate=lr_model,
                    learning_rate_constraint=lr_constraint,
                    loops=FLAGS.loops_con,
                    epsilon=FLAGS.epsilon))
            stochastic_models_list.append(stochastic_model)
            deterministic_models_list.append(deterministic_model)

            # Record objective and constraint violations for stochastic model.
            fm = -evaluation.expected_fmeasure(
                x_vali, y_vali, stochastic_model[0], stochastic_model[1])
            param_objectives_con.append(fm)

            fm0, fm1 = evaluation.expected_group_fmeasures(
                x_vali, y_vali, z_vali, stochastic_model[0],
                stochastic_model[1])
            param_violations_con.append([fm0 - fm1 - FLAGS.epsilon])

            print("Parameters (%.3f, %.3f): %.3f (%.3f)" %
                  (lr_model, lr_constraint, -param_objectives_con[-1],
                   max(param_violations_con[-1])))

    # Best param.
    best_param_index_con = tfco.find_best_candidate_index(
        np.array(param_objectives_con), np.array(param_violations_con))

    stochastic_model_con = stochastic_models_list[best_param_index_con]
    deterministic_model_con = deterministic_models_list[best_param_index_con]
    print()

    # Print summary of performance on test set.
    results = {}
    results["UncError"] = print_results(test_set, [model_er], [1.0],
                                        "UncError")
    results["UncF1"] = print_results(test_set, [model_ps], [1.0], "UncF1")
    results["Stochastic"] = print_results(test_set, stochastic_model_con[0],
                                          stochastic_model_con[1],
                                          "Constrained (Stochastic)")
    results["Deterministic"] = print_results(test_set,
                                             [deterministic_model_con], [1.0],
                                             "Constrained (Deterministic)")