예제 #1
0
 def testPiecewiseConstantEdgeCases(self):
   with self.test_session():
     with self.assertRaises(ValueError):
       x_int = variables.Variable(0, dtype=variables.dtypes.int32)
       boundaries, values = [-1.0, 1.0], [1, 2, 3]
       learning_rate_decay.piecewise_constant(x_int, boundaries, values)
     with self.assertRaises(ValueError):
       x = variables.Variable(0.0)
       boundaries, values = [-1.0, 1.0], [1.0, 2, 3]
       learning_rate_decay.piecewise_constant(x, boundaries, values)
 def testPiecewiseConstantEdgeCases(self):
   with self.test_session():
     x_int = variables.Variable(0, dtype=variables.dtypes.int32)
     boundaries, values = [-1.0, 1.0], [1, 2, 3]
     with self.assertRaises(ValueError):
       learning_rate_decay.piecewise_constant(x_int, boundaries, values)
     x = variables.Variable(0.0)
     boundaries, values = [-1.0, 1.0], [1.0, 2, 3]
     with self.assertRaises(ValueError):
       learning_rate_decay.piecewise_constant(x, boundaries, values)
예제 #3
0
    def decay_fn(learning_rate, global_step):
        """The computed learning rate decay function.
        """
        global_step = tf.to_int32(global_step)
        decayed_learning_rate = eval(decay_type)(
            learning_rate=learning_rate,
            global_step=tf.minimum(global_step, stop_decay_at) -
            start_decay_at,
            decay_steps=decay_steps,
            decay_rate=decay_rate,
            staircase=staircase,
            name="decayed_learning_rate",
            **kwargs)
        other_tensor_dict = {}
        if isinstance(decayed_learning_rate, tuple):
            decayed_learning_rate, other_tensor_dict = decayed_learning_rate

        final_lr = learning_rate_decay.piecewise_constant(
            x=global_step,
            boundaries=[start_decay_at],
            values=[learning_rate, decayed_learning_rate])

        if min_learning_rate:
            final_lr = tf.maximum(final_lr, min_learning_rate)
        return final_lr, other_tensor_dict
  def testPiecewiseConstantEdgeCases(self):
    with self.test_session():
      x_int = variables.Variable(0, dtype=variables.dtypes.int32)
      boundaries, values = [-1.0, 1.0], [1, 2, 3]
      with self.assertRaises(ValueError):
        learning_rate_decay.piecewise_constant(x_int, boundaries, values)
      x = variables.Variable(0.0)
      boundaries, values = [-1.0, 1.0], [1.0, 2, 3]
      with self.assertRaises(ValueError):
        learning_rate_decay.piecewise_constant(x, boundaries, values)

      # Test that ref types are valid.
      x_ref = x.op.outputs[0]   # float32_ref tensor should be accepted
      boundaries, values = [1.0, 2.0], [1, 2, 3]
      learning_rate_decay.piecewise_constant(x_ref, boundaries, values)

      # Test casting boundaries from int32 to int64.
      x_int64 = variables.Variable(0, dtype=variables.dtypes.int64)
      assign_1 = x_int64.assign(1)
      assign_2 = x_int64.assign(2)
      assign_3 = x_int64.assign(3)
      assign_4 = x_int64.assign(4)
      boundaries, values = [1, 2, 3], [0.4, 0.5, 0.6, 0.7]
      pc = learning_rate_decay.piecewise_constant(x_int64, boundaries, values)

      variables.global_variables_initializer().run()
      self.assertAllClose(pc.eval(), 0.4, 1e-6)
      assign_1.op.run()
      self.assertAllClose(pc.eval(), 0.4, 1e-6)
      assign_2.op.run()
      self.assertAllClose(pc.eval(), 0.5, 1e-6)
      assign_3.op.run()
      self.assertAllClose(pc.eval(), 0.6, 1e-6)
      assign_4.op.run()
      self.assertAllClose(pc.eval(), 0.7, 1e-6)
  def testPiecewiseConstantEdgeCases(self):
    with self.test_session():
      x_int = variables.Variable(0, dtype=variables.dtypes.int32)
      boundaries, values = [-1.0, 1.0], [1, 2, 3]
      with self.assertRaises(ValueError):
        learning_rate_decay.piecewise_constant(x_int, boundaries, values)
      x = variables.Variable(0.0)
      boundaries, values = [-1.0, 1.0], [1.0, 2, 3]
      with self.assertRaises(ValueError):
        learning_rate_decay.piecewise_constant(x, boundaries, values)

      # Test that ref types are valid.
      x_ref = x.op.outputs[0]   # float32_ref tensor should be accepted
      boundaries, values = [1.0, 2.0], [1, 2, 3]
      learning_rate_decay.piecewise_constant(x_ref, boundaries, values)

      # Test casting boundaries from int32 to int64.
      x_int64 = variables.Variable(0, dtype=variables.dtypes.int64)
      assign_1 = x_int64.assign(1)
      assign_2 = x_int64.assign(2)
      assign_3 = x_int64.assign(3)
      assign_4 = x_int64.assign(4)
      boundaries, values = [1, 2, 3], [0.4, 0.5, 0.6, 0.7]
      pc = learning_rate_decay.piecewise_constant(x_int64, boundaries, values)

      variables.global_variables_initializer().run()
      self.assertAllClose(pc.eval(), 0.4, 1e-6)
      assign_1.op.run()
      self.assertAllClose(pc.eval(), 0.4, 1e-6)
      assign_2.op.run()
      self.assertAllClose(pc.eval(), 0.5, 1e-6)
      assign_3.op.run()
      self.assertAllClose(pc.eval(), 0.6, 1e-6)
      assign_4.op.run()
      self.assertAllClose(pc.eval(), 0.7, 1e-6)
예제 #6
0
    def testPiecewiseConstantEdgeCases(self):
        x_int = resource_variable_ops.ResourceVariable(
            0, dtype=variables.dtypes.int32)
        boundaries, values = [-1.0, 1.0], [1, 2, 3]
        with self.assertRaises(ValueError):
            decayed_lr = learning_rate_decay.piecewise_constant(
                x_int, boundaries, values)
            if context.executing_eagerly():
                decayed_lr()

        x = resource_variable_ops.ResourceVariable(0.0)
        boundaries, values = [-1.0, 1.0], [1.0, 2, 3]
        with self.assertRaises(ValueError):
            decayed_lr = learning_rate_decay.piecewise_constant(
                x, boundaries, values)
            if context.executing_eagerly():
                decayed_lr()

        # Test that ref types are valid.
        if not context.executing_eagerly():
            x = variables.Variable(0.0)
            x_ref = x.op.outputs[0]  # float32_ref tensor should be accepted
            boundaries, values = [1.0, 2.0], [1, 2, 3]
            learning_rate_decay.piecewise_constant(x_ref, boundaries, values)

        # Test casting boundaries from int32 to int64.
        x_int64 = resource_variable_ops.ResourceVariable(
            0, dtype=variables.dtypes.int64)
        boundaries, values = [1, 2, 3], [0.4, 0.5, 0.6, 0.7]
        decayed_lr = learning_rate_decay.piecewise_constant(
            x_int64, boundaries, values)

        self.evaluate(variables.global_variables_initializer())
        self.assertAllClose(self.evaluate(decayed_lr), 0.4, 1e-6)
        self.evaluate(x_int64.assign(1))
        self.assertAllClose(self.evaluate(decayed_lr), 0.4, 1e-6)
        self.evaluate(x_int64.assign(2))
        self.assertAllClose(self.evaluate(decayed_lr), 0.5, 1e-6)
        self.evaluate(x_int64.assign(3))
        self.assertAllClose(self.evaluate(decayed_lr), 0.6, 1e-6)
        self.evaluate(x_int64.assign(4))
        self.assertAllClose(self.evaluate(decayed_lr), 0.7, 1e-6)
  def testPiecewiseConstantEdgeCases(self):
    x_int = resource_variable_ops.ResourceVariable(
        0, dtype=variables.dtypes.int32)
    boundaries, values = [-1.0, 1.0], [1, 2, 3]
    with self.assertRaises(ValueError):
      decayed_lr = learning_rate_decay.piecewise_constant(
          x_int, boundaries, values)
      if context.executing_eagerly():
        decayed_lr()

    x = resource_variable_ops.ResourceVariable(0.0)
    boundaries, values = [-1.0, 1.0], [1.0, 2, 3]
    with self.assertRaises(ValueError):
      decayed_lr = learning_rate_decay.piecewise_constant(
          x, boundaries, values)
      if context.executing_eagerly():
        decayed_lr()

    # Test that ref types are valid.
    if not context.executing_eagerly():
      x = variables.VariableV1(0.0)
      x_ref = x.op.outputs[0]   # float32_ref tensor should be accepted
      boundaries, values = [1.0, 2.0], [1, 2, 3]
      learning_rate_decay.piecewise_constant(x_ref, boundaries, values)

    # Test casting boundaries from int32 to int64.
    x_int64 = resource_variable_ops.ResourceVariable(
        0, dtype=variables.dtypes.int64)
    boundaries, values = [1, 2, 3], [0.4, 0.5, 0.6, 0.7]
    decayed_lr = learning_rate_decay.piecewise_constant(
        x_int64, boundaries, values)

    self.evaluate(variables.global_variables_initializer())
    self.assertAllClose(self.evaluate(decayed_lr), 0.4, 1e-6)
    self.evaluate(x_int64.assign(1))
    self.assertAllClose(self.evaluate(decayed_lr), 0.4, 1e-6)
    self.evaluate(x_int64.assign(2))
    self.assertAllClose(self.evaluate(decayed_lr), 0.5, 1e-6)
    self.evaluate(x_int64.assign(3))
    self.assertAllClose(self.evaluate(decayed_lr), 0.6, 1e-6)
    self.evaluate(x_int64.assign(4))
    self.assertAllClose(self.evaluate(decayed_lr), 0.7, 1e-6)
  def testPiecewiseConstant(self):
    x = resource_variable_ops.ResourceVariable(-999)
    decayed_lr = learning_rate_decay.piecewise_constant(
        x, [100, 110, 120], [1.0, 0.1, 0.01, 0.001])

    self.evaluate(variables.global_variables_initializer())

    self.assertAllClose(self.evaluate(decayed_lr), 1.0, 1e-6)
    self.evaluate(x.assign(100))
    self.assertAllClose(self.evaluate(decayed_lr), 1.0, 1e-6)
    self.evaluate(x.assign(105))
    self.assertAllClose(self.evaluate(decayed_lr), 0.1, 1e-6)
    self.evaluate(x.assign(110))
    self.assertAllClose(self.evaluate(decayed_lr), 0.1, 1e-6)
    self.evaluate(x.assign(120))
    self.assertAllClose(self.evaluate(decayed_lr), 0.01, 1e-6)
    self.evaluate(x.assign(999))
    self.assertAllClose(self.evaluate(decayed_lr), 0.001, 1e-6)
예제 #9
0
    def testPiecewiseConstant(self):
        x = resource_variable_ops.ResourceVariable(-999)
        decayed_lr = learning_rate_decay.piecewise_constant(
            x, [100, 110, 120], [1.0, 0.1, 0.01, 0.001])

        self.evaluate(variables.global_variables_initializer())

        self.assertAllClose(self.evaluate(decayed_lr), 1.0, 1e-6)
        self.evaluate(x.assign(100))
        self.assertAllClose(self.evaluate(decayed_lr), 1.0, 1e-6)
        self.evaluate(x.assign(105))
        self.assertAllClose(self.evaluate(decayed_lr), 0.1, 1e-6)
        self.evaluate(x.assign(110))
        self.assertAllClose(self.evaluate(decayed_lr), 0.1, 1e-6)
        self.evaluate(x.assign(120))
        self.assertAllClose(self.evaluate(decayed_lr), 0.01, 1e-6)
        self.evaluate(x.assign(999))
        self.assertAllClose(self.evaluate(decayed_lr), 0.001, 1e-6)
예제 #10
0
  def testPiecewiseConstantEdgeCases(self):
    with self.test_session():
      x_int = variables.Variable(0, dtype=variables.dtypes.int32)
      boundaries, values = [-1.0, 1.0], [1, 2, 3]
      with self.assertRaises(ValueError):
        learning_rate_decay.piecewise_constant(x_int, boundaries, values)
      x = variables.Variable(0.0)
      boundaries, values = [-1.0, 1.0], [1.0, 2, 3]
      with self.assertRaises(ValueError):
        learning_rate_decay.piecewise_constant(x, boundaries, values)

      # Test that ref types are valid.
      x_ref = x.op.outputs[0]   # float32_ref tensor should be accepted
      boundaries, values = [1.0, 2.0], [1, 2, 3]
      learning_rate_decay.piecewise_constant(x_ref, boundaries, values)
예제 #11
0
def apply_lr_decay(cfg, global_step):
    # Learning rate schedule
    if cfg.lr_decay is None:
        lr = cfg.lr
    elif cfg.lr_decay == 'exp':
        lr = exponential_decay(cfg.lr,
                               global_step,
                               cfg.decay_steps,
                               cfg.decay_rate,
                               staircase=cfg.staircase)
    elif cfg.lr_decay == 'piecewise':
        lr = piecewise_constant(global_step, cfg.lr_boundaries, cfg.lr_values)
    elif cfg.lr_decay == 'polynomial':
        lr = polynomial_decay(cfg.lr,
                              global_step,
                              cfg.decay_steps,
                              end_learning_rate=cfg.end_lr,
                              power=cfg.power,
                              cycle=cfg.staircase)

    elif cfg.lr_decay == 'natural_exp':
        lr = natural_exp_decay(cfg.lr,
                               global_step,
                               cfg.decay_steps,
                               cfg.decay_rate,
                               staircase=cfg.staircase)
    elif cfg.lr_decay == 'inverse_time':
        lr = inverse_time_decay(cfg.lr,
                                global_step,
                                cfg.decay_steps,
                                cfg.decay_rate,
                                staircase=cfg.staircase)

    elif cfg.lr_decay == 'STN':
        epoch = tf.cast(global_step / cfg.decay_steps, tf.int32)
        lr = cfg.lr * tf.pow(0.5, tf.cast(epoch / 50, cfg._FLOATX))
    else:
        raise NotImplementedError()
    return lr
  def testPiecewiseConstant(self):
    with self.test_session():
      x = variables.Variable(-999)
      assign_100 = x.assign(100)
      assign_105 = x.assign(105)
      assign_110 = x.assign(110)
      assign_120 = x.assign(120)
      assign_999 = x.assign(999)
      pc = learning_rate_decay.piecewise_constant(x, [100, 110, 120],
                                                  [1.0, 0.1, 0.01, 0.001])

      variables.initialize_all_variables().run()
      self.assertAllClose(pc.eval(), 1.0, 1e-6)
      assign_100.op.run()
      self.assertAllClose(pc.eval(), 1.0, 1e-6)
      assign_105.op.run()
      self.assertAllClose(pc.eval(), 0.1, 1e-6)
      assign_110.op.run()
      self.assertAllClose(pc.eval(), 0.1, 1e-6)
      assign_120.op.run()
      self.assertAllClose(pc.eval(), 0.01, 1e-6)
      assign_999.op.run()
      self.assertAllClose(pc.eval(), 0.001, 1e-6)
예제 #13
0
    def testPiecewiseConstant(self):
        with self.test_session():
            x = variables.Variable(-999)
            assign_100 = x.assign(100)
            assign_105 = x.assign(105)
            assign_110 = x.assign(110)
            assign_120 = x.assign(120)
            assign_999 = x.assign(999)
            pc = learning_rate_decay.piecewise_constant(
                x, [100, 110, 120], [1.0, 0.1, 0.01, 0.001])

            variables.initialize_all_variables().run()
            self.assertAllClose(pc.eval(), 1.0, 1e-6)
            assign_100.op.run()
            self.assertAllClose(pc.eval(), 1.0, 1e-6)
            assign_105.op.run()
            self.assertAllClose(pc.eval(), 0.1, 1e-6)
            assign_110.op.run()
            self.assertAllClose(pc.eval(), 0.1, 1e-6)
            assign_120.op.run()
            self.assertAllClose(pc.eval(), 0.01, 1e-6)
            assign_999.op.run()
            self.assertAllClose(pc.eval(), 0.001, 1e-6)
예제 #14
0
    def decay_fn(learning_rate, global_step):
        """The computed learning rate decay function.
        """
        global_step = tf.to_int32(global_step)
        decayed_learning_rate = eval(decay_type)(
            learning_rate=learning_rate,
            global_step=tf.minimum(global_step, stop_decay_at) - start_decay_at,
            decay_steps=decay_steps,
            decay_rate=decay_rate,
            staircase=staircase,
            name="decayed_learning_rate",
            **kwargs)
        other_tensor_dict = {}
        if isinstance(decayed_learning_rate, tuple):
            decayed_learning_rate, other_tensor_dict = decayed_learning_rate

        final_lr = learning_rate_decay.piecewise_constant(
            x=global_step,
            boundaries=[start_decay_at],
            values=[learning_rate, decayed_learning_rate])

        if min_learning_rate:
            final_lr = tf.maximum(final_lr, min_learning_rate)
        return final_lr, other_tensor_dict
def train():
  """Train CIFAR-10 for a number of steps."""
  with tf.Graph().as_default(), tf.device('/cpu:0'):
    # Create a variable to count the number of train() calls. This equals the
    # number of batches processed * FLAGS.num_gpus.

    global_step = tf.get_variable(
        'global_step', [],
        dtype=tf.int32,
        initializer=tf.constant_initializer(0), trainable=False)

    # global_step = tf.Variable(0, trainable=False)

    # Calculate the learning rate schedule.
    num_batches_per_epoch = (cifar10.NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN /
                             FLAGS.batch_size)
    print(cifar10.NUM_EPOCHS_PER_DECAY)
    decay_steps = int(num_batches_per_epoch * cifar10.NUM_EPOCHS_PER_DECAY)

    # # Decay the learning rate exponentially based on the number of steps.
    # lr = tf.train.exponential_decay(cifar10.INITIAL_LEARNING_RATE,
    #                                 global_step,
    #                                 decay_steps,
    #                                 cifar10.LEARNING_RATE_DECAY_FACTOR,
    #                                 staircase=True)

    # lr manual control
    lr_boundaries = list()
    lr_values = list()
    for drop_no in range(1, 21):
      cifar10.LR_DROP_EVERY_NO_STEPS = cifar10.LR_DROP_EVERY_NO_EPOCHS * num_batches_per_epoch
      lr_boundary = int(drop_no * cifar10.LR_DROP_EVERY_NO_STEPS)
      lr_boundaries.append(lr_boundary)

      lr_value = cifar10.INITIAL_LEARNING_RATE / 2 ** (drop_no - 1)
      lr_values.append(lr_value)
    
    print(lr_boundaries)
    print(lr_values)

    # boundaries = [100000, 110000]
    # values = [1.0, 0.5, 0.1]
    # int_global_step = int(global_step)
    lr = learning_rate_decay.piecewise_constant(global_step, lr_boundaries, lr_values)


    # Create an optimizer that performs gradient descent.
    # opt = tf.train.GradientDescentOptimizer(lr)
    opt = tf.train.MomentumOptimizer(lr, cifar10.MOMENTUM)

    # Calculate the gradients for each model tower.
    tower_grads = []
    for i in xrange(FLAGS.num_gpus):
      with tf.device('/gpu:%d' % i):
        with tf.name_scope('%s_%d' % (cifar10.TOWER_NAME, i)) as scope:
          # Calculate the loss for one tower of the CIFAR model. This function
          # constructs the entire CIFAR model but shares the variables across
          # all towers.
          loss = tower_loss(scope)

          # Reuse variables for the next tower.
          tf.get_variable_scope().reuse_variables()

          # Retain the summaries from the final tower.
          summaries = tf.get_collection(tf.GraphKeys.SUMMARIES, scope)

          # Calculate the gradients for the batch of data on this CIFAR tower.
          grads = opt.compute_gradients(loss)

          # Keep track of the gradients across all towers.
          tower_grads.append(grads)

    # We must calculate the mean of each gradient. Note that this is the
    # synchronization point across all towers.
    grads = average_gradients(tower_grads)

    # Add a summary to track the learning rate.
    summaries.append(tf.scalar_summary('learning_rate', lr))

    # Add histograms for gradients.
    for grad, var in grads:
      if grad is not None:
        summaries.append(
            tf.histogram_summary(var.op.name + '/gradients', grad))

    # Apply the gradients to adjust the shared variables.
    apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)

    # Add histograms for trainable variables.
    for var in tf.trainable_variables():
      summaries.append(tf.histogram_summary(var.op.name, var))

    # Track the moving averages of all trainable variables.
    variable_averages = tf.train.ExponentialMovingAverage(
        cifar10.MOVING_AVERAGE_DECAY, global_step)
    variables_averages_op = variable_averages.apply(tf.trainable_variables())

    # Group all updates to into a single train op.
    train_op = tf.group(apply_gradient_op, variables_averages_op)

    # Create a saver.
    saver = tf.train.Saver(tf.all_variables())

    # Build the summary operation from the last tower summaries.
    summary_op = tf.merge_summary(summaries)

    # Build an initialization operation to run below.
    init = tf.initialize_all_variables()

    # Start running operations on the Graph. allow_soft_placement must be set to
    # True to build towers on GPU, as some of the ops do not have GPU
    # implementations.
    sess = tf.Session(config=tf.ConfigProto(
        allow_soft_placement=True,
        log_device_placement=FLAGS.log_device_placement))
    sess.run(init)

    # Start the queue runners.
    tf.train.start_queue_runners(sess=sess)

    summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, sess.graph)

    for step in xrange(FLAGS.max_steps):
      start_time = time.time()
      _, loss_value = sess.run([train_op, loss])
      duration = time.time() - start_time

      assert not np.isnan(loss_value), 'Model diverged with loss = NaN'

      if step % 10 == 0:
        num_examples_per_step = FLAGS.batch_size * FLAGS.num_gpus
        examples_per_sec = num_examples_per_step / duration
        sec_per_batch = duration / FLAGS.num_gpus

        format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
                      'sec/batch)')
        print (format_str % (datetime.now(), step, loss_value,
                             examples_per_sec, sec_per_batch))

      if step % 100 == 0:
        summary_str = sess.run(summary_op)
        summary_writer.add_summary(summary_str, step)

      # Save the model checkpoint periodically.
      if step % 1000 == 0 or (step + 1) == FLAGS.max_steps:
        checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt')
        saver.save(sess, checkpoint_path, global_step=step)
 def pc():
   return learning_rate_decay.piecewise_constant(x_int64, boundaries, values)
 def pc():
   return learning_rate_decay.piecewise_constant(x, [100, 110, 120],
                                                 [1.0, 0.1, 0.01, 0.001])
 def pc():
     return learning_rate_decay.piecewise_constant(
         x, [100, 110, 120], [1.0, 0.1, 0.01, 0.001])
 def pc():
     return learning_rate_decay.piecewise_constant(
         x_int64, boundaries, values)
예제 #20
0
 def _nested_func(global_step):
     return learning_rate_decay.piecewise_constant(global_step,
                                                   boundaries=step_list,
                                                   values=values)
예제 #21
0
def train(total_loss, global_step):
  """Train CIFAR-10 model.

  Create an optimizer and apply to all trainable variables. Add moving
  average for all trainable variables.

  Args:
    total_loss: Total loss from loss().
    global_step: Integer Variable counting the number of training steps
      processed.
  Returns:
    train_op: op for training.
  """
  # Variables that affect learning rate.
  num_batches_per_epoch = NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN / FLAGS.batch_size
  decay_steps = int(num_batches_per_epoch * NUM_EPOCHS_PER_DECAY)

  # Drop learning rate every "epoch_step" epochs
  #if epoch % opt.epoch_step == 0 then optimState.learningRate = optimState.learningRate/2 end

  # Decay the learning rate exponentially based on the number of steps.
  
  # debug
  print("global_step: " + str(global_step))
  print("NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN: " + str(NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN))
  print("batch size: " + str(FLAGS.batch_size))
  # epochs_done = math.floor(float(global_step) * FLAGS.batch_size / NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN)
  # print("epochs done: " + str(epochs_done))

  # # lr decay
  # lr = tf.train.exponential_decay(INITIAL_LEARNING_RATE,
  #                                 global_step,
  #                                 decay_steps,
  #                                 LEARNING_RATE_DECAY_FACTOR,
  #                                 staircase=True)

  # lr manual control
  lr_boundaries = list()
  lr_values = list()
  for drop_no in range(1, 21):
    LR_DROP_EVERY_NO_STEPS = LR_DROP_EVERY_NO_EPOCHS * num_batches_per_epoch
    lr_boundary = int(drop_no * LR_DROP_EVERY_NO_STEPS)
    lr_boundaries.append(lr_boundary)

    lr_value = INITIAL_LEARNING_RATE / 2 ** (drop_no - 1)
    lr_values.append(lr_value)
  
  print(lr_boundaries)
  print(lr_values)

  # boundaries = [100000, 110000]
  # values = [1.0, 0.5, 0.1]
  lr = learning_rate_decay.piecewise_constant(global_step, lr_boundaries, lr_values)

  # sess = tf.Session()
  # lr_val = sess.run(lr)
  # print("lr_val: " + lr_val)

  # if epochs_done % LR_DROP_EVER_NO_EPOCHS == 0:
  #   lr = lr  / LR_DROP_SCALE


  tf.scalar_summary('learning_rate', lr)

  # Generate moving averages of all losses and associated summaries.
  loss_averages_op = _add_loss_summaries(total_loss)

  # Compute gradients.
  with tf.control_dependencies([loss_averages_op]):
    # opt = tf.train.GradientDescentOptimizer(lr)
    opt = tf.train.MomentumOptimizer(lr, MOMENTUM)
    grads = opt.compute_gradients(total_loss)

    #debug
    # print("grads")
    # print(grads)

    # #debug
    # # Add histograms for gradients.
    # for grad, var in grads:
    #   if grad is not None:
    #     g_norm = tf.global_norm([grad])
    #     g_norm = tf.Print(g_norm, [g_norm], "grad norm for " + var.op.name)
    #     tf.histogram_summary("g_norm_for_" + var.op.name, g_norm)

    #debug
    # grads_only = [g_pair[0] for g_pair in grads]
    # g_norm = tf.global_norm(grads_only)
    # g_norm = tf.Print(g_norm, [g_norm], "whole gradient norm")

    #debug
    # # just to print it mostly
    # tf.histogram_summary("g_norm", g_norm)

  # Apply gradients.
  apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)

  # Add histograms for trainable variables.
  for var in tf.trainable_variables():
    tf.histogram_summary(var.op.name, var)

  # Add histograms for gradients.
  for grad, var in grads:
    if grad is not None:
      tf.histogram_summary(var.op.name + '/gradients', grad)

  # Track the moving averages of all trainable variables.
  variable_averages = tf.train.ExponentialMovingAverage(
      MOVING_AVERAGE_DECAY, global_step)
  variables_averages_op = variable_averages.apply(tf.trainable_variables())

  with tf.control_dependencies([apply_gradient_op, variables_averages_op]):
    train_op = tf.no_op(name='train')

  return train_op