Beispiel #1
0
def main(unused_argv):
  """Runs the main script."""

  opts = register_optimizers()

  # Choose a set of problems to optimize. By default this includes quadratics,
  # 2-dimensional bowls, 2-class softmax problems, and non-noisy optimization
  # test problems (e.g. Rosenbrock, Beale)
  problems_and_data = []

  if FLAGS.include_sparse_softmax_problems:
    problems_and_data.extend(ps.sparse_softmax_2_class_sparse_problems())

  if FLAGS.include_one_hot_sparse_softmax_problems:
    problems_and_data.extend(
        ps.one_hot_sparse_softmax_2_class_sparse_problems())

  if FLAGS.include_quadratic_problems:
    problems_and_data.extend(ps.quadratic_problems())

  if FLAGS.include_noisy_quadratic_problems:
    problems_and_data.extend(ps.quadratic_problems_noisy())

  if FLAGS.include_large_quadratic_problems:
    problems_and_data.extend(ps.quadratic_problems_large())

  if FLAGS.include_bowl_problems:
    problems_and_data.extend(ps.bowl_problems())

  if FLAGS.include_noisy_bowl_problems:
    problems_and_data.extend(ps.bowl_problems_noisy())

  if FLAGS.include_softmax_2_class_problems:
    problems_and_data.extend(ps.softmax_2_class_problems())

  if FLAGS.include_noisy_softmax_2_class_problems:
    problems_and_data.extend(ps.softmax_2_class_problems_noisy())

  if FLAGS.include_optimization_test_problems:
    problems_and_data.extend(ps.optimization_test_problems())

  if FLAGS.include_noisy_optimization_test_problems:
    problems_and_data.extend(ps.optimization_test_problems_noisy())

  if FLAGS.include_fully_connected_random_2_class_problems:
    problems_and_data.extend(ps.fully_connected_random_2_class_problems())

  if FLAGS.include_matmul_problems:
    problems_and_data.extend(ps.matmul_problems())

  if FLAGS.include_log_objective_problems:
    problems_and_data.extend(ps.log_objective_problems())

  if FLAGS.include_rescale_problems:
    problems_and_data.extend(ps.rescale_problems())

  if FLAGS.include_norm_problems:
    problems_and_data.extend(ps.norm_problems())

  if FLAGS.include_noisy_norm_problems:
    problems_and_data.extend(ps.norm_problems_noisy())

  if FLAGS.include_sum_problems:
    problems_and_data.extend(ps.sum_problems())

  if FLAGS.include_noisy_sum_problems:
    problems_and_data.extend(ps.sum_problems_noisy())

  if FLAGS.include_sparse_gradient_problems:
    problems_and_data.extend(ps.sparse_gradient_problems())
    if FLAGS.include_fully_connected_random_2_class_problems:
      problems_and_data.extend(ps.sparse_gradient_problems_mlp())

  if FLAGS.include_min_max_well_problems:
    problems_and_data.extend(ps.min_max_well_problems())

  if FLAGS.include_sum_of_quadratics_problems:
    problems_and_data.extend(ps.sum_of_quadratics_problems())

  if FLAGS.include_projection_quadratic_problems:
    problems_and_data.extend(ps.projection_quadratic_problems())

  if FLAGS.include_outward_snake_problems:
    problems_and_data.extend(ps.outward_snake_problems())

  if FLAGS.include_dependency_chain_problems:
    problems_and_data.extend(ps.dependency_chain_problems())

  # log directory
  logdir = os.path.join(FLAGS.train_dir,
                        "{}_{}_{}_{}".format(FLAGS.optimizer,
                                             FLAGS.cell_cls,
                                             FLAGS.cell_size,
                                             FLAGS.num_cells))

  # get the optimizer class and arguments
  optimizer_cls = opts[FLAGS.optimizer]

  assert len(HRNN_CELL_SIZES) in [1, 2, 3]
  optimizer_args = (HRNN_CELL_SIZES,)

  optimizer_kwargs = {
      "init_lr_range": (FLAGS.min_lr, FLAGS.max_lr),
      "learnable_decay": FLAGS.learnable_decay,
      "dynamic_output_scale": FLAGS.dynamic_output_scale,
      "cell_cls": getattr(tf.contrib.rnn, FLAGS.cell_cls),
      "use_attention": FLAGS.use_attention,
      "use_log_objective": FLAGS.use_log_objective,
      "num_gradient_scales": FLAGS.num_gradient_scales,
      "zero_init_lr_weights": FLAGS.zero_init_lr_weights,
      "use_log_means_squared": FLAGS.use_log_means_squared,
      "use_relative_lr": FLAGS.use_relative_lr,
      "use_extreme_indicator": FLAGS.use_extreme_indicator,
      "max_log_lr": FLAGS.max_log_lr,
      "obj_train_max_multiplier": FLAGS.objective_training_max_multiplier,
      "use_problem_lr_mean": FLAGS.use_problem_lr_mean,
      "use_gradient_shortcut": FLAGS.use_gradient_shortcut,
      "use_second_derivatives": FLAGS.use_second_derivatives,
      "use_lr_shortcut": FLAGS.use_lr_shortcut,
      "use_grad_products": FLAGS.use_grad_products,
      "use_multiple_scale_decays": FLAGS.use_multiple_scale_decays,
      "use_numerator_epsilon": FLAGS.use_numerator_epsilon,
      "learnable_inp_decay": FLAGS.learnable_inp_decay,
      "learnable_rnn_init": FLAGS.learnable_rnn_init,
  }
  optimizer_spec = problem_spec.Spec(
      optimizer_cls, optimizer_args, optimizer_kwargs)

  # make log directory
  tf.gfile.MakeDirs(logdir)

  is_chief = FLAGS.task == 0
  # if this is a distributed run, make the chief run through problems in order
  select_random_problems = FLAGS.worker_tasks == 1 or not is_chief

  def num_unrolls():
    return metaopt.sample_numiter(FLAGS.num_unroll_scale, FLAGS.min_num_unrolls)

  def num_partial_unroll_itrs():
    return metaopt.sample_numiter(FLAGS.num_partial_unroll_itr_scale,
                                  FLAGS.min_num_itr_partial_unroll)

  # run it
  metaopt.train_optimizer(
      logdir,
      optimizer_spec,
      problems_and_data,
      FLAGS.num_problems,
      FLAGS.num_meta_iterations,
      num_unrolls,
      num_partial_unroll_itrs,
      learning_rate=FLAGS.meta_learning_rate,
      gradient_clip=FLAGS.gradient_clip_level,
      is_chief=is_chief,
      select_random_problems=select_random_problems,
      obj_train_max_multiplier=FLAGS.objective_training_max_multiplier,
      callbacks=[])

  return 0
Beispiel #2
0
def main(unused_argv):
  """Runs the main script."""

  opts = register_optimizers()

  # Choose a set of problems to optimize. By default this includes quadratics,
  # 2-dimensional bowls, 2-class softmax problems, and non-noisy optimization
  # test problems (e.g. Rosenbrock, Beale)
  problems_and_data = []

  if FLAGS.include_sparse_softmax_problems:
    problems_and_data.extend(ps.sparse_softmax_2_class_sparse_problems())

  if FLAGS.include_one_hot_sparse_softmax_problems:
    problems_and_data.extend(
        ps.one_hot_sparse_softmax_2_class_sparse_problems())

  if FLAGS.include_quadratic_problems:
    problems_and_data.extend(ps.quadratic_problems())

  if FLAGS.include_noisy_quadratic_problems:
    problems_and_data.extend(ps.quadratic_problems_noisy())

  if FLAGS.include_large_quadratic_problems:
    problems_and_data.extend(ps.quadratic_problems_large())

  if FLAGS.include_bowl_problems:
    problems_and_data.extend(ps.bowl_problems())

  if FLAGS.include_noisy_bowl_problems:
    problems_and_data.extend(ps.bowl_problems_noisy())

  if FLAGS.include_softmax_2_class_problems:
    problems_and_data.extend(ps.softmax_2_class_problems())

  if FLAGS.include_noisy_softmax_2_class_problems:
    problems_and_data.extend(ps.softmax_2_class_problems_noisy())

  if FLAGS.include_optimization_test_problems:
    problems_and_data.extend(ps.optimization_test_problems())

  if FLAGS.include_noisy_optimization_test_problems:
    problems_and_data.extend(ps.optimization_test_problems_noisy())

  if FLAGS.include_fully_connected_random_2_class_problems:
    problems_and_data.extend(ps.fully_connected_random_2_class_problems())

  if FLAGS.include_matmul_problems:
    problems_and_data.extend(ps.matmul_problems())

  if FLAGS.include_log_objective_problems:
    problems_and_data.extend(ps.log_objective_problems())

  if FLAGS.include_rescale_problems:
    problems_and_data.extend(ps.rescale_problems())

  if FLAGS.include_norm_problems:
    problems_and_data.extend(ps.norm_problems())

  if FLAGS.include_noisy_norm_problems:
    problems_and_data.extend(ps.norm_problems_noisy())

  if FLAGS.include_sum_problems:
    problems_and_data.extend(ps.sum_problems())

  if FLAGS.include_noisy_sum_problems:
    problems_and_data.extend(ps.sum_problems_noisy())

  if FLAGS.include_sparse_gradient_problems:
    problems_and_data.extend(ps.sparse_gradient_problems())
    if FLAGS.include_fully_connected_random_2_class_problems:
      problems_and_data.extend(ps.sparse_gradient_problems_mlp())

  if FLAGS.include_min_max_well_problems:
    problems_and_data.extend(ps.min_max_well_problems())

  if FLAGS.include_sum_of_quadratics_problems:
    problems_and_data.extend(ps.sum_of_quadratics_problems())

  if FLAGS.include_projection_quadratic_problems:
    problems_and_data.extend(ps.projection_quadratic_problems())

  if FLAGS.include_outward_snake_problems:
    problems_and_data.extend(ps.outward_snake_problems())

  if FLAGS.include_dependency_chain_problems:
    problems_and_data.extend(ps.dependency_chain_problems())

  # log directory
  logdir = os.path.join(FLAGS.train_dir,
                        "{}_{}_{}_{}".format(FLAGS.optimizer,
                                             FLAGS.cell_cls,
                                             FLAGS.cell_size,
                                             FLAGS.num_cells))

  # get the optimizer class and arguments
  optimizer_cls = opts[FLAGS.optimizer]

  assert len(HRNN_CELL_SIZES) in [1, 2, 3]
  optimizer_args = (HRNN_CELL_SIZES,)

  optimizer_kwargs = {
      "init_lr_range": (FLAGS.min_lr, FLAGS.max_lr),
      "learnable_decay": FLAGS.learnable_decay,
      "dynamic_output_scale": FLAGS.dynamic_output_scale,
      "cell_cls": getattr(tf.contrib.rnn, FLAGS.cell_cls),
      "use_attention": FLAGS.use_attention,
      "use_log_objective": FLAGS.use_log_objective,
      "num_gradient_scales": FLAGS.num_gradient_scales,
      "zero_init_lr_weights": FLAGS.zero_init_lr_weights,
      "use_log_means_squared": FLAGS.use_log_means_squared,
      "use_relative_lr": FLAGS.use_relative_lr,
      "use_extreme_indicator": FLAGS.use_extreme_indicator,
      "max_log_lr": FLAGS.max_log_lr,
      "obj_train_max_multiplier": FLAGS.objective_training_max_multiplier,
      "use_problem_lr_mean": FLAGS.use_problem_lr_mean,
      "use_gradient_shortcut": FLAGS.use_gradient_shortcut,
      "use_second_derivatives": FLAGS.use_second_derivatives,
      "use_lr_shortcut": FLAGS.use_lr_shortcut,
      "use_grad_products": FLAGS.use_grad_products,
      "use_multiple_scale_decays": FLAGS.use_multiple_scale_decays,
      "use_numerator_epsilon": FLAGS.use_numerator_epsilon,
      "learnable_inp_decay": FLAGS.learnable_inp_decay,
      "learnable_rnn_init": FLAGS.learnable_rnn_init,
  }
  optimizer_spec = problem_spec.Spec(
      optimizer_cls, optimizer_args, optimizer_kwargs)

  # make log directory
  tf.gfile.MakeDirs(logdir)

  is_chief = FLAGS.task == 0
  # if this is a distributed run, make the chief run through problems in order
  select_random_problems = FLAGS.worker_tasks == 1 or not is_chief

  def num_unrolls():
    return metaopt.sample_numiter(FLAGS.num_unroll_scale, FLAGS.min_num_unrolls)

  def num_partial_unroll_itrs():
    return metaopt.sample_numiter(FLAGS.num_partial_unroll_itr_scale,
                                  FLAGS.min_num_itr_partial_unroll)

  # run it
  metaopt.train_optimizer(
      logdir,
      optimizer_spec,
      problems_and_data,
      FLAGS.num_problems,
      FLAGS.num_meta_iterations,
      num_unrolls,
      num_partial_unroll_itrs,
      learning_rate=FLAGS.meta_learning_rate,
      gradient_clip=FLAGS.gradient_clip_level,
      is_chief=is_chief,
      select_random_problems=select_random_problems,
      obj_train_max_multiplier=FLAGS.objective_training_max_multiplier,
      callbacks=[])

  return 0
Beispiel #3
0
def main(_):
    """Runs the main script."""

    opts = register_optimizers()

    # Choose a set of problems to optimize. By default this includes quadratics,
    # 2-dimensional bowls, 2-class softmax problems, and non-noisy optimization
    # test problems (e.g. Rosenbrock, Beale)
    problems_and_data = []

    if FLAGS.include_sparse_softmax_problems:
        problems_and_data.extend(ps.sparse_softmax_2_class_sparse_problems())

    if FLAGS.include_mnist_conv_problems:
        problems_and_data.extend(ps.mnist_conv_problems())

    if FLAGS.include_cifar10_conv_problems:
        problems_and_data.extend(ps.cifar10_conv_problems())

    if FLAGS.include_mnist_mlp_problems:
        problems_and_data.extend(ps.mnist_mlp_problems())

    if FLAGS.include_one_hot_sparse_softmax_problems:
        problems_and_data.extend(
            ps.one_hot_sparse_softmax_2_class_sparse_problems())

    if FLAGS.include_quadratic_problems:
        problems_and_data.extend(ps.quadratic_problems())

    if FLAGS.include_noisy_quadratic_problems:
        problems_and_data.extend(ps.quadratic_problems_noisy())

    if FLAGS.include_large_quadratic_problems:
        problems_and_data.extend(ps.quadratic_problems_large())

    if FLAGS.include_bowl_problems:
        problems_and_data.extend(ps.bowl_problems())

    if FLAGS.include_noisy_bowl_problems:
        problems_and_data.extend(ps.bowl_problems_noisy())

    if FLAGS.include_softmax_2_class_problems:
        problems_and_data.extend(ps.softmax_2_class_problems())

    if FLAGS.include_noisy_softmax_2_class_problems:
        problems_and_data.extend(ps.softmax_2_class_problems_noisy())

    if FLAGS.include_optimization_test_problems:
        problems_and_data.extend(ps.optimization_test_problems())

    if FLAGS.include_noisy_optimization_test_problems:
        problems_and_data.extend(ps.optimization_test_problems_noisy())

    if FLAGS.include_fully_connected_random_2_class_problems:
        problems_and_data.extend(ps.fully_connected_random_2_class_problems())

    if FLAGS.include_matmul_problems:
        problems_and_data.extend(ps.matmul_problems())

    if FLAGS.include_log_objective_problems:
        problems_and_data.extend(ps.log_objective_problems())

    if FLAGS.include_rescale_problems:
        problems_and_data.extend(ps.rescale_problems())

    if FLAGS.include_norm_problems:
        problems_and_data.extend(ps.norm_problems())

    if FLAGS.include_noisy_norm_problems:
        problems_and_data.extend(ps.norm_problems_noisy())

    if FLAGS.include_sum_problems:
        problems_and_data.extend(ps.sum_problems())

    if FLAGS.include_noisy_sum_problems:
        problems_and_data.extend(ps.sum_problems_noisy())

    if FLAGS.include_sparse_gradient_problems:
        problems_and_data.extend(ps.sparse_gradient_problems())
        if FLAGS.include_fully_connected_random_2_class_problems:
            problems_and_data.extend(ps.sparse_gradient_problems_mlp())

    if FLAGS.include_min_max_well_problems:
        problems_and_data.extend(ps.min_max_well_problems())

    if FLAGS.include_sum_of_quadratics_problems:
        problems_and_data.extend(ps.sum_of_quadratics_problems())

    if FLAGS.include_projection_quadratic_problems:
        problems_and_data.extend(ps.projection_quadratic_problems())

    if FLAGS.include_outward_snake_problems:
        problems_and_data.extend(ps.outward_snake_problems())

    if FLAGS.include_dependency_chain_problems:
        problems_and_data.extend(ps.dependency_chain_problems())

    if FLAGS.include_lasso_problems:
        problems_and_data.extend(ps.lasso_problems())

    if FLAGS.include_rastrigin_problems:
        problems_and_data.extend(ps.rastrigin_problems())

    # log directory
    logdir = os.path.join(
        FLAGS.train_dir,
        "{}_{}_{}_{}".format(FLAGS.optimizer, FLAGS.cell_cls, FLAGS.cell_size,
                             FLAGS.num_cells))

    # get the optimizer class and arguments
    optimizer_cls = opts[FLAGS.optimizer]

    assert len(HRNN_CELL_SIZES) in [1, 2, 3]
    optimizer_args = (HRNN_CELL_SIZES, )

    optimizer_kwargs = {
        "init_lr_range": (FLAGS.min_lr, FLAGS.max_lr),
        "learnable_decay": FLAGS.learnable_decay,
        "dynamic_output_scale": FLAGS.dynamic_output_scale,
        "cell_cls": getattr(tf.contrib.rnn, FLAGS.cell_cls),
        "use_attention": FLAGS.use_attention,
        "use_log_objective": FLAGS.use_log_objective,
        "num_gradient_scales": FLAGS.num_gradient_scales,
        "zero_init_lr_weights": FLAGS.zero_init_lr_weights,
        "use_log_means_squared": FLAGS.use_log_means_squared,
        "use_relative_lr": FLAGS.use_relative_lr,
        "use_extreme_indicator": FLAGS.use_extreme_indicator,
        "max_log_lr": FLAGS.max_log_lr,
        "obj_train_max_multiplier": FLAGS.objective_training_max_multiplier,
        "use_problem_lr_mean": FLAGS.use_problem_lr_mean,
        "use_gradient_shortcut": FLAGS.use_gradient_shortcut,
        "use_second_derivatives": FLAGS.use_second_derivatives,
        "use_lr_shortcut": FLAGS.use_lr_shortcut,
        "use_grad_products": FLAGS.use_grad_products,
        "use_multiple_scale_decays": FLAGS.use_multiple_scale_decays,
        "use_numerator_epsilon": FLAGS.use_numerator_epsilon,
        "learnable_inp_decay": FLAGS.learnable_inp_decay,
        "learnable_rnn_init": FLAGS.learnable_rnn_init,
    }
    optimizer_spec = problem_spec.Spec(optimizer_cls, optimizer_args,
                                       optimizer_kwargs)

    # make log directory
    tf.gfile.MakeDirs(logdir)

    is_chief = FLAGS.task == 0
    # if this is a distributed run, make the chief run through problems in order
    select_random_problems = FLAGS.worker_tasks == 1 or not is_chief

    def num_unrolls():
        return metaopt.sample_numiter(FLAGS.num_unroll_scale,
                                      FLAGS.min_num_unrolls)

    def num_partial_unroll_itrs():
        return metaopt.sample_numiter(FLAGS.num_partial_unroll_itr_scale,
                                      FLAGS.min_num_itr_partial_unroll)

    # run it
    metaopt.train_optimizer(
        logdir,
        optimizer_spec,
        problems_and_data,
        FLAGS.num_problems,
        FLAGS.num_meta_iterations,
        num_unrolls,
        num_partial_unroll_itrs,
        learning_rate=FLAGS.meta_learning_rate,
        gradient_clip=FLAGS.gradient_clip_level,
        is_chief=is_chief,
        select_random_problems=select_random_problems,
        obj_train_max_multiplier=FLAGS.objective_training_max_multiplier,
        callbacks=[],
        fix_unroll=FLAGS.fix_unroll,
        fix_unroll_length=FLAGS.fix_unroll_length,
        fix_num_steps=FLAGS.fix_num_steps,
        fix_num_steps_eval=FLAGS.fix_num_steps_eval,
        evaluation_period=FLAGS.evaluation_period,
        evaluation_epochs=FLAGS.evaluation_epochs,
        save_period=FLAGS.save_period,
        if_cl=FLAGS.if_cl)
    # else:
    # # test trainable_optimizer
    # for problem_itr, (problem_spec, dataset, batch_size) in enumerate(problems_and_data):
    #
    #     # if dataset is None, use the EMPTY_DATASET
    #     if dataset is None:
    #       dataset = datasets.EMPTY_DATASET
    #       batch_size = dataset.size
    #
    #     # build a new graph for this problem
    #     graph = tf.Graph()
    #
    #     with graph.as_default():
    #
    #         # initialize a problem
    #         problem = problem_spec.build()
    #         metaopt.test_optimizer(
    #             optimizer_spec,
    #             problem,
    #             num_iter=FLAGS.num_testing_itrs,
    #             dataset=dataset,
    #             batch_size=batch_size,
    #             seed=None,
    #             graph=graph,
    #             logdir=logdir,
    #             record_every=None)

    return 0