def testConditionalMaskUpdate(self):
        weight = K.variable(np.linspace(1.0, 100.0, 100), name="weights")
        mask = K.ones(weight.get_shape())
        threshold = K.zeros([])

        def linear_sparsity(step):
            sparsity_val = tf.convert_to_tensor(
                [0.0, 0.1, 0.1, 0.3, 0.3, 0.5, 0.5, 0.5, 0.5, 0.5])
            return tf.convert_to_tensor(True), sparsity_val[step]

        # Set up pruning
        p = pruning_impl.Pruning(pruning_vars=[(weight, mask, threshold)],
                                 training_step_fn=self.training_step_fn,
                                 pruning_schedule=linear_sparsity,
                                 block_size=self.block_size,
                                 block_pooling_type=self.block_pooling_type)

        non_zero_count = []
        for _ in range(10):
            if tf.executing_eagerly():
                p.conditional_mask_update()
                p.weight_mask_op()
                tf.assign_add(self.global_step, 1)
            else:
                K.get_session().run(p.conditional_mask_update())
                K.get_session().run(p.weight_mask_op())
                K.get_session().run(tf.assign_add(self.global_step, 1))

            non_zero_count.append(np.count_nonzero(K.get_value(weight)))

        # Weights pruned at steps 1,3,5
        expected_non_zero_count = [100, 90, 90, 70, 70, 50, 50, 50, 50, 50]
        self.assertAllEqual(expected_non_zero_count, non_zero_count)
Exemple #2
0
    def __init__(self, epsilon=1e-2, shape=()):

        self._sum = tf.get_variable(
            dtype=tf.float64,
            shape=shape,
            initializer=tf.constant_initializer(0.0),
            name="runningsum", trainable=False)
        self._sumsq = tf.get_variable(
            dtype=tf.float64,
            shape=shape,
            initializer=tf.constant_initializer(epsilon),
            name="runningsumsq", trainable=False)
        self._count = tf.get_variable(
            dtype=tf.float64,
            shape=(),
            initializer=tf.constant_initializer(epsilon),
            name="count", trainable=False)
        self.shape = shape

        self.mean = tf.to_float(self._sum / self._count)
        self.std = tf.sqrt( tf.maximum( tf.to_float(self._sumsq / self._count) - tf.square(self.mean) , 1e-2 ))

        newsum = tf.placeholder(shape=self.shape, dtype=tf.float64, name='sum')
        newsumsq = tf.placeholder(shape=self.shape, dtype=tf.float64, name='var')
        newcount = tf.placeholder(shape=[], dtype=tf.float64, name='count')
        self.incfiltparams = U.function([newsum, newsumsq, newcount], [],
            updates=[tf.assign_add(self._sum, newsum),
                     tf.assign_add(self._sumsq, newsumsq),
                     tf.assign_add(self._count, newcount)])
Exemple #3
0
    def _ref_add_batch(self):
        ops = []

        for g_ref, g in zip(self._grads_ref, self._grads):
            ops.append(
                tf.assign_add(
                    g_ref,
                    g * tf.cast(self._num_ref_samples_batch,
                                dtype=self._loss.dtype)))
        ops.append(
            tf.assign_add(self._num_ref_samples, self._num_ref_samples_batch))

        return tf.group(ops)
  def testPlateauOpHook(self):
    global_step = tf.train.create_global_step()
    counter = tf.get_variable("count", initializer=0, dtype=tf.int32)
    indicator = tf.get_variable("indicator", initializer=0, dtype=tf.int32)
    tf.summary.scalar("count", counter)
    incr_global_step = tf.assign_add(global_step, 1)
    incr_counter = tf.assign_add(counter, 1)
    incr_indicator = tf.assign_add(indicator, 1)

    # Stop if the global step has not gone up by more than 1 in 20 steps.

    ckpt_dir = self.ckpt_dir("plateauop")
    stop_hook = metrics_hook.PlateauOpHook(
        ckpt_dir,
        "count_1",
        incr_indicator,
        num_plateau_steps=20,
        plateau_delta=1.,
        plateau_decrease=False,
        every_n_steps=10)
    with self.sess(stop_hook, ckpt_dir) as sess:
      for _ in range(20):
        sess.run((incr_global_step, incr_counter))

      # Summary files should now have 2 values in them
      self.flush()

      # Run for more steps so that the hook gets triggered and we verify that we
      # don't stop.
      for _ in range(30):
        sess.run((incr_global_step, incr_counter))

      self.flush()

      # Run without incrementing the counter
      for _ in range(30):
        sess.run(incr_global_step)
      self.flush()

      self.assertTrue(sess.run(indicator) < 1)

      # Metrics should be written such that now the counter has gone >20 steps
      # without being incremented.
      # Check that we run the incr_indicator op several times
      for _ in range(3):
        for _ in range(10):
          sess.run(incr_global_step)
        self.flush()

      self.assertTrue(sess.run(indicator) > 1)
Exemple #5
0
    def _dense_moving_average(self, x_tm1, a_t, name, beta=.9):
        """ """

        b_tm1 = self.get_accumulator(x_tm1, '%s' % name)
        tm1 = self.get_accumulator(x_tm1, '%s/tm1' % name, shape=[])
        t = tf.assign_add(tm1, 1)
        if beta < 1:
            beta_t = tf.convert_to_tensor(beta, name='%s/decay' % name)
            beta_t = beta_t * (1 - beta**tm1) / (1 - beta**t)
        else:
            beta_t = tm1 / t
        b_t = tf.assign(b_tm1, beta_t * b_tm1)
        b_t = tf.assign_add(b_t, (1 - beta_t) * a_t)
        return b_t, t
        def session_run_job():
            with tf.Session() as sess:
                a = tf.Variable(10, dtype=tf.int32, name="a")
                b = tf.Variable(20, dtype=tf.int32, name="b")
                d = tf.constant(1, dtype=tf.int32, name="d")
                inc_a = tf.assign_add(a, d, name="inc_a")
                inc_b = tf.assign_add(b, d, name="inc_b")
                inc_ab = tf.group([inc_a, inc_b], name="inc_ab")

                sess.run(tf.global_variables_initializer())

                sess = tf_debug.TensorBoardDebugWrapperSession(
                    sess, self._debugger_url)
                session_run_results.append(sess.run(inc_ab))
Exemple #7
0
            def computation_fn():
                graph = mtf.Graph()
                mesh = mtf.Mesh(graph, 'my_mesh')
                mesh_shape = mtf.convert_to_shape('all:2')
                layout = 'none:all'
                mesh_devices = [''] * mesh_shape.size
                mesh_impl = mtf.simd_mesh_impl.SimdMeshImpl(
                    mesh_shape, mtf.convert_to_layout_rules(layout),
                    mesh_devices, device_assignment)
                hidden_dim = mtf.Dimension('hidden', 3)
                w = mtf.get_variable(mesh,
                                     'w',
                                     shape=[hidden_dim],
                                     initializer=tf.constant_initializer(
                                         [0.1, -0.2, -0.1]))
                x = mtf.constant(mesh, [0.4, 0.2, -0.5], [hidden_dim],
                                 dtype=tf.float32)
                loss = mtf.reduce_mean(mtf.square(x - w))

                lr, update_ops = optimization_lib.create_optimizer(
                    loss, 0.2, 100, 10)
                self.lowering = mtf.Lowering(graph, {mesh: mesh_impl})

                tf_update_ops = [
                    self.lowering.lowered_operation(op) for op in update_ops
                ]
                tf_update_ops.append(
                    tf.assign_add(tf.train.get_or_create_global_step(), 1))
                train_op = tf.group(tf_update_ops)

                return lr, train_op
Exemple #8
0
    def testPeriodicTargetUpdate(self, use_locking, update_period):
        """Tests that the simple success case works as expected.

    This is an integration test. The periodically and update parts are
    unit-tested in the preceding.

    Args:
      use_locking: value for `periodic_target_update`'s `use_locking` argument.
      update_period: how often an update should happen.
    """
        target_variables = [tf.Variable(tf.zeros([1, 2]))]
        source_variables = [tf.Variable(tf.random_normal([1, 2]))]
        increment = tf.ones([1, 2])

        update_source_op = tf.assign_add(source_variables[0], increment)
        updated = target_update_ops.periodic_target_update(
            target_variables,
            source_variables,
            update_period=update_period,
            use_locking=use_locking)

        with self.test_session() as sess:
            sess.run(tf.global_variables_initializer())

            for step in range(3 * update_period):
                sess.run(update_source_op)
                sess.run(updated)
                targets, sources = sess.run(
                    [target_variables, source_variables])

                if step % update_period == 0:
                    self.assertAllClose(targets, sources)
                else:
                    self.assertNotAllClose(targets, sources)
Exemple #9
0
    def get_train_ops(self, graph_ops, infeed_queue, i_tr, X_b_tr, y_b_tr):
        """Add training operations to the graph"""
        possible_xla = self.device_config['maybe_xla_compile']

        # Need to close over scope of `self` for GPU XLA
        def train_op(loss, i, X, y):
            return tr(X)

        def tr(X):
            return self.train_ops(X)

        def tr_infeed():
            loss = tf.zeros(self.loss_shape, self.experiment.dtype)
            return loops_repeat(self.device_config['device'],
                                self.iters_per_sess_run,
                                train_op, [loss],
                                infeed_queue,
                                maybe_xla=possible_xla)

        with self.graph.as_default():
            graph_ops['incr_global_step'] = tf.assign_add(
                self.global_step, self.iters_per_sess_run)
            with self.device_config['scoper']():
                if self.experiment.config.training:
                    if self.use_infeed:
                        graph_ops['train'] = tr_infeed()
                    else:
                        graph_ops['train'] = possible_xla(tr, [X_b_tr])
                    graph_ops['lr'] = self.get_current_learning_rate()
                    graph_ops['epochs'] = self.get_epoch()
        return graph_ops
 def _apply_and_zero():
     apply_op = self._opt.apply_gradients(list(zip(accums, variables)))
     with tf.control_dependencies([apply_op]):
         zero_op = [
             tf.assign(accum, tf.zeros_like(accum)) for accum in accums
         ]
     return tf.group(zero_op, tf.assign_add(self._counter, 1))
  def testWeightSpecificSparsity(self):
    param_list = [
        "begin_pruning_step=1", "pruning_frequency=1", "end_pruning_step=100",
        "target_sparsity=0.5",
        "weight_sparsity_map=[layer1:0.6,layer2/weights:0.75,.*kernel:0.6]",
        "threshold_decay=0.0"
    ]
    test_spec = ",".join(param_list)
    pruning_hparams = pruning.get_pruning_hparams().parse(test_spec)

    with tf.variable_scope("layer1"):
      w1 = tf.Variable(tf.linspace(1.0, 100.0, 100), name="weights")
      _ = pruning.apply_mask(w1)
    with tf.variable_scope("layer2"):
      w2 = tf.Variable(tf.linspace(1.0, 100.0, 100), name="weights")
      _ = pruning.apply_mask(w2)
    with tf.variable_scope("layer3"):
      w3 = tf.Variable(tf.linspace(1.0, 100.0, 100), name="kernel")
      _ = pruning.apply_mask(w3)

    p = pruning.Pruning(pruning_hparams)
    mask_update_op = p.conditional_mask_update_op()
    increment_global_step = tf.assign_add(self.global_step, 1)

    with self.cached_session() as session:
      tf.global_variables_initializer().run()
      for _ in range(110):
        session.run(mask_update_op)
        session.run(increment_global_step)

      self.assertAllClose(
          session.run(pruning.get_weight_sparsity()), [0.6, 0.75, 0.6])
Exemple #12
0
    def _update_dy(self):
        ops = []

        for dy, g in zip(self._dys, self._grads):
            ops.append(tf.assign_add(dy, g))

        return tf.group(ops)
Exemple #13
0
 def __init__(self, train_time, time_limit=None):
     super(TrainTimeHook, self).__init__()
     self._train_time = train_time
     self._time_limit = time_limit
     self._increment_amount = tf.placeholder(tf.float32, None)
     self._increment_op = tf.assign_add(train_time, self._increment_amount)
     self._last_run_duration = None
Exemple #14
0
 def build(self, input_shape):
   self.train_step = self.add_weight(
       name='train_step', shape=[], initializer='zeros', trainable=False)
   increment = tf.cast(tf.keras.backend.learning_phase(), tf.float32)
   increment_op = tf.assign_add(self.train_step, increment)
   self.add_update(increment_op)
   super().build(input_shape)
Exemple #15
0
def learner(model, params):
    """Run a learner job."""
    ds = dataset.load_dataset(FLAGS.dataset_dir, 'train')
    ds = dataset.add_targets(ds, [params['field']],
                             add_history=params['history'])
    ds = dataset.split_and_preprocess(ds,
                                      noise_field=params['field'],
                                      noise_scale=params['noise'],
                                      noise_gamma=params['gamma'])
    inputs = tf.data.make_one_shot_iterator(ds).get_next()

    loss_op = model.loss(inputs)
    global_step = tf.train.create_global_step()
    lr = tf.train.exponential_decay(learning_rate=1e-4,
                                    global_step=global_step,
                                    decay_steps=int(5e6),
                                    decay_rate=0.1) + 1e-6
    optimizer = tf.train.AdamOptimizer(learning_rate=lr)
    train_op = optimizer.minimize(loss_op, global_step=global_step)
    # Don't train for the first few steps, just accumulate normalization stats
    train_op = tf.cond(tf.less(global_step, 1000),
                       lambda: tf.group(tf.assign_add(global_step, 1)),
                       lambda: tf.group(train_op))

    with tf.train.MonitoredTrainingSession(hooks=[
            tf.train.StopAtStepHook(last_step=FLAGS.num_training_steps)
    ],
                                           checkpoint_dir=FLAGS.checkpoint_dir,
                                           save_checkpoint_secs=600) as sess:

        while not sess.should_stop():
            _, step, loss = sess.run([train_op, global_step, loss_op])
            if step % 1000 == 0:
                logging.info('Step %d: Loss %g', step, loss)
        logging.info('Training complete.')
Exemple #16
0
    def _update_weights(self):
        ops = []

        for w, y in zip(self._weights, self._ys):
            ops.append(tf.assign_add(w, self._conf['lr'] * y))

        return tf.group(ops)
    def update_fisher_diag(self, n_task):

        # Reset is mandatory
        print('Mandatory fisher diagonal reset')
        self.reset_fisher_diag()

        print("Reset fishers computed")
        reset_ops = []
        for fdc in self.objs['fisher_diagcs']:
            reset_ops += [tf.assign(fdc, tf.zeros_like(fdc))]
        self.objs['sess'].run(reset_ops)

        n_minibatches = self.it.n // self.fisher_batch_size
        self.it.i = 0
        orig = self.objs['sess'].run(utils.sum_up(self.objs['fisher_diagcs']))
        # imgs_sum = []
        for batch in range(n_minibatches):
            # print("Batch %d" % batch)
            nX, nY = next(self.it)
            # imgs_sum += [np.sum(nY)]
            train_data = {self.phs['fisher_X']: nX, self.phs['fisher_Y']: nY}
            self.objs['sess'].run(self.objs['fisher_sum_up_ops'],
                                  feed_dict=train_data)
            # print(self.objs['sess'].run(self.objs['fisher_diagcs'][0])[0][0])
        newv = self.objs['sess'].run(utils.sum_up(self.objs['fisher_diagcs']))
        # print(orig, newv, n_minibatches, self.fisher_batch_size)
        # print(imgs_sum)
        print('Ran fisher_sum_up_ops (examples: %d)' %
              (n_minibatches * self.fisher_batch_size))

        division_ops = []
        for fdc in self.objs['fisher_diagcs']:
            division_ops += [
                tf.assign(
                    fdc, tf.divide(fdc,
                                   n_minibatches * self.fisher_batch_size))
            ]
        self.objs['sess'].run(division_ops)

        shown_vars = self.objs['fisher_diags']
        orig = self.objs['sess'].run(utils.sum_up(self.objs['fisher_diags']))
        origs = ["%.2f" % orig]
        assign_ops = []
        for fdc, fd in zip(self.objs['fisher_diagcs'],
                           self.objs['fisher_diags']):
            assign_ops += [tf.assign_add(fd, fdc)]
        self.objs['sess'].run(assign_ops)
        newv = self.objs['sess'].run(utils.sum_up(self.objs['fisher_diags']))
        newvs = ["%.2f" % newv]
        print("changed %s => %s" % (" , ".join(origs), " , ".join(newvs)))
        # print("SHOWN:")
        # self.print_vars(shown_vars)

        self.saved_fishers[n_task - 1] = []  # say task 0
        save_ops = []
        for fd in self.objs['fisher_diags']:
            self.saved_fishers[n_task - 1] += [tf.Variable(tf.zeros_like(fd))]
            save_ops += [tf.assign(self.saved_fishers[n_task - 1][-1], fd)]
        self.objs['sess'].run(save_ops)
        print("Saved fishers for task %d" % (n_task - 1))
  def apply_gradients(self, grads_and_vars, global_step=None, name=None):
    with tf.init_scope():
      self._create_slots([v for (_, v) in grads_and_vars])

    accums = []
    variables = []

    for g, v in grads_and_vars:
      accum = self.get_slot(v, 'grad_accum')
      variables.append(v)
      if isinstance(g, tf.IndexedSlices):
        scaled_grad = tf.IndexedSlices(
            g.values / self._grad_steps, g.indices, dense_shape=g.dense_shape)
        accums.append(accum.assign_add(scaled_grad))  # pytype: disable=attribute-error
      else:
        accums.append(accum.assign_add(g / self._grad_steps))  # pytype: disable=attribute-error

    def _apply_and_zero():
      apply_op = self._opt.apply_gradients(list(zip(accums, variables)))
      with tf.control_dependencies([apply_op]):
        zero_op = [tf.assign(accum, tf.zeros_like(accum)) for accum in accums]
      return tf.group(zero_op, tf.assign_add(self._counter, 1))

    def _accum():
      return tf.group(accums)

    accum_step = tf.cond(
        tf.equal(tf.mod(global_step, self._grad_steps), self._grad_steps - 1),
        _apply_and_zero, _accum)

    with tf.control_dependencies([accum_step]):
      global_step = tf.assign_add(global_step, 1)
      return tf.group(global_step)
Exemple #19
0
  def test_prune_every_n_steps(self):
    every_steps = 10
    pruning_obj = MockPruningObject()

    with tf.Graph().as_default():
      listener = pruning_hook.ModelPruningListener(pruning_obj)
      hook = pruning_hook.ModelPruningHook(every_steps=every_steps,
                                           listeners=[listener])
      global_step = tf.train.get_or_create_global_step()
      train_op = tf.constant(0)
      global_step_increment_op = tf.assign_add(global_step, 1)
      with tf.train.MonitoredSession(tf.train.ChiefSessionCreator(),
                                     hooks=[hook]) as mon_sess:
        mon_sess.run(tf.global_variables_initializer())

        mon_sess.run(train_op)
        mon_sess.run(global_step_increment_op)
        # ModelPruningHook runs once after session creation, at step 0.
        self.assertEqual(len(pruning_obj.logged_steps), 1)
        self.assertEqual(pruning_obj.logged_steps[0], 0)

        for _ in range(every_steps-1):
          mon_sess.run(train_op)
          mon_sess.run(global_step_increment_op)

        self.assertEqual(len(pruning_obj.logged_steps), 2)
        self.assertSameElements(pruning_obj.logged_steps, [0, every_steps])

        for _ in range(every_steps-1):
          mon_sess.run(train_op)
          mon_sess.run(global_step_increment_op)

        self.assertEqual(len(pruning_obj.logged_steps), 2)
        self.assertSameElements(pruning_obj.logged_steps, [0, every_steps])
 def testConditionalMaskUpdate(self):
   param_list = [
       "pruning_frequency=2", "begin_pruning_step=1", "end_pruning_step=6",
       "nbins=100"
   ]
   test_spec = ",".join(param_list)
   pruning_hparams = pruning.get_pruning_hparams().parse(test_spec)
   weights = tf.Variable(tf.linspace(1.0, 100.0, 100), name="weights")
   masked_weights = pruning.apply_mask(weights)
   sparsity = tf.Variable(0.00, name="sparsity")
   # Set up pruning
   p = pruning.Pruning(pruning_hparams, sparsity=sparsity)
   p._spec.threshold_decay = 0.0
   mask_update_op = p.conditional_mask_update_op()
   sparsity_val = tf.linspace(0.0, 0.9, 10)
   increment_global_step = tf.assign_add(self.global_step, 1)
   non_zero_count = []
   with self.cached_session() as session:
     tf.global_variables_initializer().run()
     for i in range(10):
       session.run(tf.assign(sparsity, sparsity_val[i]))
       session.run(mask_update_op)
       session.run(increment_global_step)
       non_zero_count.append(np.count_nonzero(masked_weights.eval()))
   # Weights pruned at steps 0,2,4,and,6
   expected_non_zero_count = [100, 100, 80, 80, 60, 60, 40, 40, 40, 40]
   self.assertAllEqual(expected_non_zero_count, non_zero_count)
Exemple #21
0
def _create_var(name: str, value_expr: TfExpression) -> TfExpression:
    """Internal helper for creating autosummary accumulators."""
    assert not _finalized
    name_id = name.replace("/", "_")
    v = tf.cast(value_expr, _dtype)

    if v.shape.is_fully_defined():
        size = np.prod(v.shape.as_list())
        size_expr = tf.constant(size, dtype=_dtype)
    else:
        size = None
        size_expr = tf.reduce_prod(tf.cast(tf.shape(v), _dtype))

    if size == 1:
        if v.shape.ndims != 0:
            v = tf.reshape(v, [])
        v = [size_expr, v, tf.square(v)]
    else:
        v = [size_expr, tf.reduce_sum(v), tf.reduce_sum(tf.square(v))]
    v = tf.cond(tf.is_finite(v[1]), lambda: tf.stack(v),
                lambda: tf.zeros(3, dtype=_dtype))

    with tfutil.absolute_name_scope("Autosummary/" +
                                    name_id), tf.control_dependencies(None):
        var = tf.Variable(tf.zeros(3, dtype=_dtype),
                          trainable=False)  # [sum(1), sum(x), sum(x**2)]
    update_op = tf.cond(tf.is_variable_initialized(var),
                        lambda: tf.assign_add(var, v),
                        lambda: tf.assign(var, v))

    if name in _vars:
        _vars[name].append(var)
    else:
        _vars[name] = [var]
    return update_op
  def testWeightSparsityTiebreaker(self):
    param_list = [
        "begin_pruning_step=1", "pruning_frequency=1", "end_pruning_step=100",
        "target_sparsity=0.5",
        "threshold_decay=0.0"
    ]
    test_spec = ",".join(param_list)
    pruning_hparams = pruning.get_pruning_hparams().parse(test_spec)

    with tf.variable_scope("layer1"):
      w1 = tf.Variable(np.ones([100], dtype=np.float32),
                       name="weights")
      _ = pruning.apply_mask(w1)

    p = pruning.Pruning(pruning_hparams)
    mask_update_op = p.conditional_mask_update_op()
    increment_global_step = tf.assign_add(self.global_step, 1)

    with self.cached_session() as session:
      tf.global_variables_initializer().run()
      for _ in range(110):
        session.run(mask_update_op)
        session.run(increment_global_step)

      self.assertAllClose(
          session.run(pruning.get_weight_sparsity()), [0.5])
Exemple #23
0
    def __init__(self, update_batchnorm_params=True):
        self.update_batchnorm_params = update_batchnorm_params

        num_samples = datasets.get_count(FLAGS.train_split)
        if FLAGS.num_supervised_examples:
            num_samples = FLAGS.num_supervised_examples
        steps_per_epoch = num_samples // FLAGS.batch_size
        self.steps_per_epoch = steps_per_epoch

        global_step = tf.train.get_or_create_global_step()
        self.global_step_inc = tf.assign_add(global_step, 1)

        # lr_scale_batch_size defines a canonical batch size that is coupled with
        # the initial learning rate. If actual batch size is not the same as
        # canonical than learning rate is linearly scaled. This is very convinient
        # as this allows to vary batch size without recomputing learning rate.
        lr_factor = 1.0
        if FLAGS.lr_scale_batch_size:
            lr_factor = FLAGS.batch_size / float(FLAGS.lr_scale_batch_size)

        # We actually also accept fractional epochs.
        schedule_in_steps = utils.get_schedule_from_config(
            FLAGS.schedule, steps_per_epoch)
        warmup, decays = schedule_in_steps[0], schedule_in_steps[1:-1]

        self.lr = get_lr(global_step,
                         base_lr=FLAGS.lr * lr_factor,
                         decay_steps=decays,
                         lr_decay_factor=FLAGS.lr_decay_factor,
                         warmup_steps=warmup)
    def _reset_non_empty(self, indices):
        """Reset the batch of environments.

    Args:
      indices: The batch indices of the environments to reset; defaults to all.

    Returns:
      Batch tensor of the new observations.
    """
        reset_video_op = tf.cond(
            self._video_condition,
            lambda: tf.py_func(self._video_reset_writer, [], []), tf.no_op)
        with tf.control_dependencies([reset_video_op]):
            inc_op = tf.assign_add(self._episode_counter, 1)
            with tf.control_dependencies(
                [self.history_buffer.reset(indices), inc_op]):
                initial_frame_dump_op = tf.cond(
                    self._video_condition,
                    lambda: tf.py_func(
                        self._video_dump_frames,  # pylint: disable=g-long-lambda
                        [self.history_buffer.get_all_elements()],
                        []),
                    tf.no_op)
                observ_assign_op = self._observ.assign(
                    self.history_buffer.get_all_elements()[:, -1, ...])
                with tf.control_dependencies(
                    [observ_assign_op, initial_frame_dump_op]):
                    reset_model_op = tf.assign(self._reset_model,
                                               tf.constant(1.0))
                    with tf.control_dependencies([reset_model_op]):
                        return tf.gather(self._observ.read_value(), indices)
Exemple #25
0
def get_train_op(loss,
                 initial_learning_rate,
                 momentum,
                 lr_decay_factor,
                 decay_steps,
                 warmup_steps,
                 use_tpu=False):
    """Builds an SGD update operation."""

    global_step = tf.train.get_or_create_global_step()

    lr = tf.train.piecewise_constant(global_step, decay_steps, [
        initial_learning_rate * (lr_decay_factor**i)
        for i in range(len(decay_steps) + 1)
    ])
    lr = apply_warmup_lr(global_step, lr, initial_learning_rate, warmup_steps)

    optimizer = tf.train.MomentumOptimizer(learning_rate=lr, momentum=momentum)
    if use_tpu:
        optimizer = tf.contrib.tpu.CrossShardOptimizer(optimizer)

    train_op = optimizer.minimize(loss)

    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

    global_step_inc_op = tf.assign_add(global_step, 1)

    return tf.group([train_op, update_ops, global_step_inc_op])
    def testStop(self):
        global_step = tf.train.create_global_step()
        tf.summary.scalar("global_step", global_step)
        incr_global_step = tf.assign_add(global_step, 1)

        ckpt_dir = self.ckpt_dir("stop")
        dummy = DummyHook(ckpt_dir, every_n_steps=10)
        with self.sess(dummy, ckpt_dir) as sess:
            for _ in range(20):
                sess.run(incr_global_step)

            # Summary files should now have 2 global step values in them
            self.flush()

            # Run for 10 more so that the hook gets triggered again
            for _ in range(10):
                sess.run(incr_global_step)

            # Check that the metrics have actually been collected.
            self.assertTrue("" in dummy.test_metrics)
            metrics = dummy.test_metrics[""]
            self.assertTrue("global_step_1" in metrics)
            steps, vals = metrics["global_step_1"]
            self.assertTrue(len(steps) == len(vals))
            self.assertTrue(len(steps) >= 2)

            # Run for 10 more so that the hook triggers stoppage
            for _ in range(10):
                sess.run(incr_global_step)

            with self.assertRaisesRegexp(RuntimeError,
                                         "after should_stop requested"):
                sess.run(incr_global_step)
Exemple #27
0
    def apply_gradients(self, grads_and_vars, global_step=None, name=None):
        """Applying gradients and tune hyperparams with YellowFin.

    Args:
      grads_and_vars: List of (gradient, variable) pairs as returned by
        compute_gradients().
      global_step: Optional Variable to increment by one after the
        variables have been updated.
      name:  Optional name for the returned operation. Default to the
        name passed to the Optimizer constructor.

    Returns:
        (A group of operations)
        Variable Update with Momentum ops,
        YellowFin ops(Curvature, Variance, Distance) ops,
        SingleStep and lr_mu tuning ops,
        Step increment ops.
    """
        self._grad, self._vars = zip(*[(g, t) for g, t in grads_and_vars
                                       if g is not None])

        # Var update with Momentum.
        with tf.variable_scope("apply_updates"):
            # Gradient Clipping?
            if self._clip_thresh_var is not None:
                self._grad, _ = tf.clip_by_global_norm(self._grad,
                                                       self._clip_thresh_var)

                apply_grad_op = self._momentum_optimizer.apply_gradients(
                    zip(self._grad, self._vars),
                    global_step=global_step,
                    name=name)
            else:
                apply_grad_op = self._momentum_optimizer.apply_gradients(
                    zip(self._grad, self._vars),
                    global_step=global_step,
                    name=name)

        # Begin lr and mu tuning.
        with tf.variable_scope("prepare_yellowFin_variables"):
            # the dependencies ideally only need to be after clip is done,
            # i.e. depends on self._grads. However, the control_dependencies
            # does not support indexed slice for sparse gradients.
            # The alternative dependencies here might be slightly slower due
            # to less parallelization.
            with tf.control_dependencies([
                    apply_grad_op,
            ]):
                prepare_variables_op = self._prepare_variables()

        with tf.variable_scope("yellowfin"):
            with tf.control_dependencies([prepare_variables_op]):
                yellowfin_op = self._yellowfin()

        # Update YellowFin step variable.
        with tf.control_dependencies([yellowfin_op]):
            self._increment_step_op = tf.assign_add(self._step, 1).op

        return tf.group(apply_grad_op, prepare_variables_op, yellowfin_op,
                        self._increment_step_op)
Exemple #28
0
    def _update_weights(self):
        ops = []

        for w, dw in zip(self._weights, self._dws):
            ops.append(tf.assign_add(w, self._step_size_placeh * dw))

        return tf.group(ops)
Exemple #29
0
    def _build(self, x, weights=None):
        if weights is None:
            weights = tf.ones_like(x)
        if weights.get_shape().as_list() != x.get_shape().as_list():
            weights = tf.broadcast_to(weights, x.get_shape().as_list())

        sum_weights = tf.reduce_sum(weights, axis=self._axis)
        shape = sum_weights.get_shape().as_list()

        total = tf.get_variable(
            "total",
            shape=shape,
            dtype=weights.dtype,
            initializer=tf.zeros_initializer(),
            trainable=False,
        )
        mean = tf.get_variable(
            "mean",
            shape=shape,
            dtype=x.dtype,
            initializer=tf.zeros_initializer(),
            trainable=False,
        )
        m2 = tf.get_variable(
            "M2",
            shape=shape,
            dtype=x.dtype,
            initializer=tf.zeros_initializer(),
            trainable=False,
        )

        total_update = tf.assign_add(total, sum_weights)

        with tf.control_dependencies([total_update]):
            delta = (x - mean) * weights
            mean_update = tf.assign_add(
                mean,
                tf.reduce_sum(delta, axis=self._axis) / total)

        with tf.control_dependencies([mean_update]):
            delta2 = x - mean
            m2_update = tf.assign_add(
                m2, tf.reduce_sum(delta * delta2, axis=self._axis))

        with tf.control_dependencies([m2_update]):
            return tf.identity(mean), m2 / (total -
                                            self._ddof), tf.identity(total)
    def _build_model(self, weights=None):
        """
        Builds TensorFlow model.
        :return:
        """
        # initialize weights and biases
        self._initialize_weights(weights)

        # TensorFlow operations
        self.visible_units_placeholder = tf.placeholder(tf.float32, shape=[None, self.n_visible_units])
        self.compute_hidden_units_op = self._activation_function_class(
            tf.transpose(tf.matmul(self.W, tf.transpose(self.visible_units_placeholder))) + self.c)
        self.hidden_units_placeholder = tf.placeholder(tf.float32, shape=[None, self.n_hidden_units])
        self.compute_visible_units_op = self._activation_function_class(
            tf.matmul(self.hidden_units_placeholder, self.W) + self.b)
        self.random_uniform_values = tf.Variable(tf.random_uniform([self.batch_size, self.n_hidden_units]))
        sample_hidden_units_op = tf.to_float(self.random_uniform_values < self.compute_hidden_units_op)
        self.random_variables = [self.random_uniform_values]

        # Positive gradient
        # Outer product. N is the batch size length.
        # From http://stackoverflow.com/questions/35213787/tensorflow-batch-outer-product
        positive_gradient_op = tf.matmul(tf.expand_dims(sample_hidden_units_op, 2),  # [N, U, 1]
                                         tf.expand_dims(self.visible_units_placeholder, 1))  # [N, 1, V]

        # Negative gradient
        # Gibbs sampling
        sample_hidden_units_gibbs_step_op = sample_hidden_units_op
        for t in range(self.contrastive_divergence_iter):
            compute_visible_units_op = self._activation_function_class(
                tf.matmul(sample_hidden_units_gibbs_step_op, self.W) + self.b)
            compute_hidden_units_gibbs_step_op = self._activation_function_class(
                tf.transpose(tf.matmul(self.W, tf.transpose(compute_visible_units_op))) + self.c)
            random_uniform_values = tf.Variable(tf.random_uniform([self.batch_size, self.n_hidden_units]))
            sample_hidden_units_gibbs_step_op = tf.to_float(random_uniform_values < compute_hidden_units_gibbs_step_op)
            self.random_variables.append(random_uniform_values)

        negative_gradient_op = tf.matmul(tf.expand_dims(sample_hidden_units_gibbs_step_op, 2),  # [N, U, 1]
                                         tf.expand_dims(compute_visible_units_op, 1))  # [N, 1, V]

        compute_delta_W = tf.reduce_mean(positive_gradient_op - negative_gradient_op, 0)
        compute_delta_b = tf.reduce_mean(self.visible_units_placeholder - compute_visible_units_op, 0)
        compute_delta_c = tf.reduce_mean(sample_hidden_units_op - sample_hidden_units_gibbs_step_op, 0)

        self.update_W = tf.assign_add(self.W, self.learning_rate * compute_delta_W)
        self.update_b = tf.assign_add(self.b, self.learning_rate * compute_delta_b)
        self.update_c = tf.assign_add(self.c, self.learning_rate * compute_delta_c)