Exemple #1
0
def streaming_tp_fp_arrays(num_gbboxes, tp, fp, 
                           metrics_collections=None,
                           updates_collections=None,
                           name=None):
    """Streaming computation of True and False Positive arrays. 
    """
    with variable_scope.variable_scope(name, 'streaming_tp_fp',
                                       [num_gbboxes, tp, fp]):
        num_gbboxes = tf.cast(num_gbboxes, tf.int32)
        tp = tf.cast(tp, tf.bool)
        fp = tf.cast(fp, tf.bool)
        # Reshape TP and FP tensors and clean away 0 class values.
        tp = tf.reshape(tp, [-1])
        fp = tf.reshape(fp, [-1])

        # Local variables accumlating information over batches.
        v_num_objects = _create_local('v_num_gbboxes', shape=[], dtype=tf.int32)
        v_tp = _create_local('v_tp', shape=[0, ], dtype=tf.bool)
        v_fp = _create_local('v_fp', shape=[0, ], dtype=tf.bool)
        

        # Update operations.
        num_objects_op = state_ops.assign_add(v_num_objects,
                                           tf.reduce_sum(num_gbboxes))
        tp_op = state_ops.assign(v_tp, tf.concat([v_tp, tp], axis=0),
                                 validate_shape=False)
        fp_op = state_ops.assign(v_fp, tf.concat([v_fp, fp], axis=0),
                                 validate_shape=False)

        # Value and update ops.
        val = (v_num_objects, v_tp, v_fp)
        with ops.control_dependencies([num_objects_op, tp_op, fp_op]):
            update_op = (num_objects_op, tp_op, fp_op)

        return val, update_op
Exemple #2
0
    def _Update_global_variables():
      global_norm = []
      # a = a / t
      for g in grad_vars:
        global_norm.append(state_ops.assign(g, g / self._period))
      # apply
      with ops.control_dependencies(global_norm):
        apply_global_op = self._opt.apply_gradients(
            zip(grad_vars, global_center_vars))

      # pull
      with ops.control_dependencies([apply_global_op]):
        update_ops = []
        if global_step:
          with ops.colocate_with(global_step):
            update_ops.append(state_ops.assign_add(global_step, 1))

        for lvar in local_vars:
          g_val = self._global_map[lvar].read_value()
          update_ops.append(state_ops.assign(lvar, g_val))
        for grad_var in grad_vars:
          update_ops.append(
              state_ops.assign(grad_var, array_ops.zeros_like(grad_var)))
        variable_update = control_flow_ops.group(*(update_ops))
      return variable_update
Exemple #3
0
  def _apply_sparse(self, grad, var):
    beta1_power = math_ops.cast(self._beta1_power, var.dtype.base_dtype)
    beta2_power = math_ops.cast(self._beta2_power, var.dtype.base_dtype)
    lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype)
    beta1_t = math_ops.cast(self._beta1_t, var.dtype.base_dtype)
    beta2_t = math_ops.cast(self._beta2_t, var.dtype.base_dtype)
    epsilon_t = math_ops.cast(self._epsilon_t, var.dtype.base_dtype)
    lr = (lr_t * math_ops.sqrt(1 - beta2_power) / (1 - beta1_power))
    # m_t = beta1 * m + (1 - beta1) * g_t
    m = self.get_slot(var, "m")
    m_scaled_g_values = grad.values * (1 - beta1_t)
    m_t = state_ops.assign(m, m * beta1_t,
                           use_locking=self._use_locking)
    m_t = state_ops.scatter_add(m_t, grad.indices, m_scaled_g_values,
                               use_locking=self._use_locking)



    # u_t = max(beta_2 * u_{t-1}, L1(g_t))
    # theta_t = theta_{t-1} - alpha/(1-beta_1).m_t/u_t

    v = self.get_slot(var, "v")
    g_abs_values = tensorflow.abs(g_t)
    v_t = state_ops.assign(v, v * beta_2, use_locking = self._use_locking)
    v_t = state_ops.assign_max(v_t, grad.indices, g_abs_values,
                             use_locking=self._use_locking)
    var_update = state_ops.assign_sub(var,
                                      lr*m_t/(v_t*(1 - beta_1)),
                                      use_locking=self._use_locking)

    return control_flow_ops.group(*[var_update, m_t, v_t])
  def testMultiplyInverseAgainstExplicit(self):
    with ops.Graph().as_default(), self.test_session() as sess:
      random_seed.set_random_seed(200)
      input_dim, output_dim = 3, 2
      inputs = array_ops.zeros([32, input_dim])
      outputs = array_ops.zeros([32, output_dim])
      params = array_ops.zeros([input_dim, output_dim])
      block = fb.FullyConnectedKFACBasicFB(
          lc.LayerCollection(), inputs, outputs, has_bias=False)
      grads = outputs**2
      damping = 0.  # This test is only valid without damping.
      block.instantiate_factors((grads,), damping)

      sess.run(state_ops.assign(block._input_factor._cov, _make_psd(3)))
      sess.run(state_ops.assign(block._output_factor._cov, _make_psd(2)))
      sess.run(block._input_factor.make_inverse_update_ops())
      sess.run(block._output_factor.make_inverse_update_ops())

      v_flat = np.arange(6, dtype=np.float32)
      vector = utils.column_to_tensors(params, array_ops.constant(v_flat))
      output = block.multiply_inverse(vector)
      output_flat = sess.run(utils.tensors_to_column(output)).ravel()

      full = sess.run(block.full_fisher_block())
      explicit = np.dot(np.linalg.inv(full + damping * np.eye(6)), v_flat)

      self.assertAllClose(output_flat, explicit)
 def _f():
   # Note that there is a race condition here, so we do a best effort
   # updates here. We reset update_in_steps first so that other workers
   # don't duplicate the updates. Also we update cluster_center_vars
   # before resetting total_counts to avoid large updates to
   # cluster_centers_updated based on partially updated
   # cluster_center_vars.
   with ops.control_dependencies([
       state_ops.assign(update_in_steps,
                        self._mini_batch_steps_per_iteration - 1)
   ]):
     with ops.colocate_with(
         cluster_centers_updated, ignore_existing=True):
       if self._distance_metric == COSINE_DISTANCE:
         cluster_centers = nn_impl.l2_normalize(
             cluster_centers_updated, dim=1)
       else:
         cluster_centers = cluster_centers_updated
     with ops.colocate_with(cluster_centers_var, ignore_existing=True):
       with ops.control_dependencies(
           [state_ops.assign(cluster_centers_var, cluster_centers)]):
         with ops.colocate_with(None, ignore_existing=True):
           with ops.control_dependencies([
               state_ops.assign(total_counts,
                                array_ops.zeros_like(total_counts))
           ]):
             return array_ops.identity(update_in_steps)
  def _testDefaultGraphInThread(self, constructed_event, continue_event, i):
    with session.Session() as s:
      self.assertEqual(ops.get_default_graph(), s.graph)
      a = constant_op.constant(1.0, shape=[1, 2])
      b = constant_op.constant(2.0, shape=[2, 3])
      c = math_ops.matmul(a, b)
      v = variables.Variable(c, name='var_%d' % i)

      # Block here until all threads have constructed their graph.
      constructed_event.set()
      continue_event.wait()

      assign_c_to_v = state_ops.assign(v, c)
      v.initializer.run()
      assign_c_to_v.eval()
      v_val = v.eval()
      self.assertAllEqual([[4.0, 4.0, 4.0]], v_val)
      d = constant_op.constant(3.0, shape=[2, 3])
      e = math_ops.matmul(a, d)
      assign_e_to_v = state_ops.assign(v, e)
      e_val = e.eval()
      self.assertAllEqual([[6.0, 6.0, 6.0]], e_val)
      v_val = v.eval()
      self.assertAllEqual([[4.0, 4.0, 4.0]], v_val)
      s.run(assign_e_to_v)
      v_val = v.eval()
      self.assertAllEqual([[6.0, 6.0, 6.0]], v_val)
      self.assertEqual(ops.get_default_graph(), s.graph)
  def testMultiplyInverseAgainstExplicit(self):
    with ops.Graph().as_default(), self.test_session() as sess:
      random_seed.set_random_seed(200)
      params = array_ops.zeros((2, 2, 2, 2))
      inputs = array_ops.zeros((2, 2, 2, 2))
      outputs = array_ops.zeros((2, 2, 2, 2))
      block = fb.ConvKFCBasicFB(lc.LayerCollection(), params, (1, 1, 1, 1),
                                'SAME')
      block.register_additional_minibatch(inputs, outputs)
      grads = outputs**2
      damping = 0.  # This test is only valid without damping.
      block.instantiate_factors(([grads],), damping)

      sess.run(state_ops.assign(block._input_factor._cov, _make_psd(8)))
      sess.run(state_ops.assign(block._output_factor._cov, _make_psd(2)))
      sess.run(block._input_factor.make_inverse_update_ops())
      sess.run(block._output_factor.make_inverse_update_ops())

      v_flat = np.arange(16, dtype=np.float32)
      vector = utils.column_to_tensors(params, array_ops.constant(v_flat))
      output = block.multiply_inverse(vector)
      output_flat = sess.run(utils.tensors_to_column(output)).ravel()

      full = sess.run(block.full_fisher_block())
      explicit = np.dot(np.linalg.inv(full + damping * np.eye(16)), v_flat)

      self.assertAllClose(output_flat, explicit)
Exemple #8
0
  def get_updates(self, loss, params):
    grads = self.get_gradients(loss, params)
    self.updates = [state_ops.assign_add(self.iterations, 1)]

    lr = self.lr
    if self.initial_decay > 0:
      lr = lr * (  # pylint: disable=g-no-augmented-assignment
          1. / (1. + self.decay * math_ops.cast(self.iterations,
                                                K.dtype(self.decay))))
    # momentum
    shapes = [K.int_shape(p) for p in params]
    moments = [K.zeros(shape) for shape in shapes]
    self.weights = [self.iterations] + moments
    for p, g, m in zip(params, grads, moments):
      v = self.momentum * m - lr * g  # velocity
      self.updates.append(state_ops.assign(m, v))

      if self.nesterov:
        new_p = p + self.momentum * v - lr * g
      else:
        new_p = p + v

      # Apply constraints.
      if getattr(p, 'constraint', None) is not None:
        new_p = p.constraint(new_p)

      self.updates.append(state_ops.assign(p, new_p))
    return self.updates
 def test_fn(a):
   state_ops.assign(a, a + 1)
   b = a + 1
   state_ops.assign(a, a + 1)
   c = b + 1
   d = c + 1
   return d
  def get_placements(self, *args, **kwargs):
    num_children = self.hparams.num_children
    with variable_scope.variable_scope("controller_{}".format(self.ctrl_id)):
      actions_cache = variable_scope.get_local_variable(
          "actions_cache",
          initializer=init_ops.zeros_initializer,
          dtype=dtypes.int32,
          shape=[num_children, self.num_groups],
          trainable=False)

    x = array_ops.tile(self.seq2seq_input_layer, [num_children, 1, 1])
    last_c, last_h, attn_mem = self.encode(x)
    actions, log_probs = {}, {}
    actions["sample"], log_probs["sample"] = (
        self.decode(
            x, last_c, last_h, attn_mem, mode="sample"))
    actions["target"], log_probs["target"] = (
        self.decode(
            x,
            last_c,
            last_h,
            attn_mem,
            mode="target",
            y=actions_cache))
    actions["greedy"], log_probs["greedy"] = (
        self.decode(
            x, last_c, last_h, attn_mem, mode="greedy"))
    actions["sample"] = control_flow_ops.cond(
        self.global_step < self.hparams.stop_sampling,
        lambda: state_ops.assign(actions_cache, actions["sample"]),
        lambda: state_ops.assign(actions_cache, actions["target"]))
    self.actions_cache = actions_cache

    return actions, log_probs
Exemple #11
0
  def get_updates(self, loss, params):
    grads = self.get_gradients(loss, params)
    shapes = [K.int_shape(p) for p in params]
    accumulators = [K.zeros(shape) for shape in shapes]
    delta_accumulators = [K.zeros(shape) for shape in shapes]
    self.weights = accumulators + delta_accumulators
    self.updates = [state_ops.assign_add(self.iterations, 1)]

    lr = self.lr
    if self.initial_decay > 0:
      lr = lr * (  # pylint: disable=g-no-augmented-assignment
          1. / (1. + self.decay * math_ops.cast(self.iterations,
                                                K.dtype(self.decay))))

    for p, g, a, d_a in zip(params, grads, accumulators, delta_accumulators):
      # update accumulator
      new_a = self.rho * a + (1. - self.rho) * math_ops.square(g)
      self.updates.append(state_ops.assign(a, new_a))

      # use the new accumulator and the *old* delta_accumulator
      update = g * K.sqrt(d_a + self.epsilon) / K.sqrt(new_a + self.epsilon)
      new_p = p - lr * update

      # Apply constraints.
      if getattr(p, 'constraint', None) is not None:
        new_p = p.constraint(new_p)

      self.updates.append(state_ops.assign(p, new_p))

      # update delta_accumulator
      new_d_a = self.rho * d_a + (1 - self.rho) * math_ops.square(update)
      self.updates.append(state_ops.assign(d_a, new_d_a))
    return self.updates
 def testIsVariableInitialized(self):
   for use_gpu in [True, False]:
     with self.test_session(use_gpu=use_gpu):
       v0 = state_ops.variable_op([1, 2], dtypes.float32)
       self.assertEqual(False, variables.is_variable_initialized(v0).eval())
       state_ops.assign(v0, [[2.0, 3.0]]).eval()
       self.assertEqual(True, variables.is_variable_initialized(v0).eval())
  def test_stop_based_on_num_step(self):
    h = basic_session_run_hooks.StopAtStepHook(num_steps=10)

    with ops.Graph().as_default():
      global_step = variables.get_or_create_global_step()
      no_op = control_flow_ops.no_op()
      h.begin()
      with session_lib.Session() as sess:
        mon_sess = monitored_session._HookedSession(sess, [h])
        sess.run(state_ops.assign(global_step, 5))
        h.after_create_session(sess, None)
        mon_sess.run(no_op)
        self.assertFalse(mon_sess.should_stop())
        sess.run(state_ops.assign(global_step, 13))
        mon_sess.run(no_op)
        self.assertFalse(mon_sess.should_stop())
        sess.run(state_ops.assign(global_step, 14))
        mon_sess.run(no_op)
        self.assertFalse(mon_sess.should_stop())
        sess.run(state_ops.assign(global_step, 15))
        mon_sess.run(no_op)
        self.assertTrue(mon_sess.should_stop())
        sess.run(state_ops.assign(global_step, 16))
        mon_sess._should_stop = False
        mon_sess.run(no_op)
        self.assertTrue(mon_sess.should_stop())
Exemple #14
0
  def get_updates(self, loss, params):
    grads = self.get_gradients(loss, params)
    shapes = [K.int_shape(p) for p in params]
    accumulators = [K.zeros(shape) for shape in shapes]
    self.weights = accumulators
    self.updates = [state_ops.assign_add(self.iterations, 1)]

    lr = self.lr
    if self.initial_decay > 0:
      lr = lr * (  # pylint: disable=g-no-augmented-assignment
          1. /
          (1. +
           self.decay * math_ops.cast(self.iterations, K.dtype(self.decay))))

    for p, g, a in zip(params, grads, accumulators):
      new_a = a + math_ops.square(g)  # update accumulator
      self.updates.append(state_ops.assign(a, new_a))
      new_p = p - lr * g / (K.sqrt(new_a) + self.epsilon)

      # Apply constraints.
      if getattr(p, 'constraint', None) is not None:
        new_p = p.constraint(new_p)

      self.updates.append(state_ops.assign(p, new_p))
    return self.updates
Exemple #15
0
    def _apply_dense(self, grad, var):
        beta1_power = math_ops.cast(self._beta1_power, var.dtype.base_dtype)
        beta2_power = math_ops.cast(self._beta2_power, var.dtype.base_dtype)
        lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype)
        beta1_t = math_ops.cast(self._beta1_t, var.dtype.base_dtype)
        beta2_t = math_ops.cast(self._beta2_t, var.dtype.base_dtype)
        epsilon_t = math_ops.cast(self._epsilon_t, var.dtype.base_dtype)

        lr = (lr_t * math_ops.sqrt(1 - beta2_power) / (1 - beta1_power))

        # m_t = beta1 * m + (1 - beta1) * g_t
        m = self.get_slot(var, "m")
        m_scaled_g_values = grad * (1 - beta1_t)
        m_t = state_ops.assign(m, beta1_t * m + m_scaled_g_values, use_locking=self._use_locking)

        # v_t = beta2 * v + (1 - beta2) * (g_t * g_t)
        v = self.get_slot(var, "v")
        v_scaled_g_values = (grad * grad) * (1 - beta2_t)
        v_t = state_ops.assign(v, beta2_t * v + v_scaled_g_values, use_locking=self._use_locking)

        # amsgrad
        vhat = self.get_slot(var, "vhat")
        vhat_t = state_ops.assign(vhat, math_ops.maximum(v_t, vhat))
        v_sqrt = math_ops.sqrt(vhat_t)

        var_update = state_ops.assign_sub(var, lr * m_t / (v_sqrt + epsilon_t), use_locking=self._use_locking)
        return control_flow_ops.group(*[var_update, m_t, v_t, vhat_t])
Exemple #16
0
  def _resource_apply_sparse(self, grad, var, indices):
    var_dtype = var.dtype.base_dtype
    lr_t = self._decayed_lr(var_dtype)
    beta_1_t = self._get_hyper('beta_1', var_dtype)
    beta_2_t = self._get_hyper('beta_2', var_dtype)
    local_step = math_ops.cast(self.iterations + 1, var_dtype)
    beta_1_power = math_ops.pow(beta_1_t, local_step)
    beta_2_power = math_ops.pow(beta_2_t, local_step)
    epsilon_t = self._get_hyper('epsilon', var_dtype)
    lr = (lr_t * math_ops.sqrt(1 - beta_2_power) / (1 - beta_1_power))

    # m_t = beta1 * m + (1 - beta1) * g_t
    m = self.get_slot(var, 'm')
    m_scaled_g_values = grad * (1 - beta_1_t)
    m_t = state_ops.assign(m, m * beta_1_t, use_locking=self._use_locking)
    with ops.control_dependencies([m_t]):
      m_t = self._resource_scatter_add(m, indices, m_scaled_g_values)
      # m_bar = (1 - beta1) * g_t + beta1 * m_t
      m_bar = m_scaled_g_values + beta_1_t * array_ops.gather(m_t, indices)

    # v_t = beta2 * v + (1 - beta2) * (g_t * g_t)
    v = self.get_slot(var, 'v')
    v_scaled_g_values = (grad * grad) * (1 - beta_2_t)
    v_t = state_ops.assign(v, v * beta_2_t, use_locking=self._use_locking)
    with ops.control_dependencies([v_t]):
      v_t = self._resource_scatter_add(v, indices, v_scaled_g_values)

    v_t_slice = array_ops.gather(v_t, indices)
    v_sqrt = math_ops.sqrt(v_t_slice)
    var_update = self._resource_scatter_add(var, indices,
                                            -lr * m_bar / (v_sqrt + epsilon_t))
    return control_flow_ops.group(*[var_update, m_bar, v_t])
  def testDeferredSlotRestoration(self):
    checkpoint_directory = self.get_temp_dir()

    root = trackable_utils.Checkpoint()
    root.var = trackable_utils.add_variable(
        root, name="var", initializer=0.)
    optimizer = adam.AdamOptimizer(0.1)
    if context.executing_eagerly():
      optimizer.minimize(root.var.read_value)
    else:
      train_op = optimizer.minimize(root.var)
      # Note that `optimizer` has not been added as a dependency of
      # `root`. Create a one-off grouping so that slot variables for `root.var`
      # get initialized too.
      self.evaluate(trackable_utils.gather_initializers(
          trackable_utils.Checkpoint(root=root, optimizer=optimizer)))
      self.evaluate(train_op)
    self.evaluate(state_ops.assign(root.var, 12.))
    no_slots_path = root.save(os.path.join(checkpoint_directory, "no_slots"))
    root.optimizer = optimizer
    self.evaluate(state_ops.assign(root.var, 13.))
    self.evaluate(state_ops.assign(optimizer.get_slot(name="m", var=root.var),
                                   14.))
    slots_path = root.save(os.path.join(checkpoint_directory, "with_slots"))
    new_root = trackable_utils.Checkpoint()
    # Load the slot-containing checkpoint (deferred), then immediately overwrite
    # the non-slot variable (also deferred).
    slot_status = new_root.restore(slots_path)
    no_slot_status = new_root.restore(no_slots_path)
    with self.assertRaises(AssertionError):
      no_slot_status.assert_consumed()
    new_root.var = trackable_utils.add_variable(
        new_root, name="var", shape=[])
    no_slot_status.assert_consumed()
    no_slot_status.run_restore_ops()
    self.assertEqual(12., self.evaluate(new_root.var))
    new_root.optimizer = adam.AdamOptimizer(0.1)
    slot_status.assert_existing_objects_matched()
    with self.assertRaisesRegexp(AssertionError, "beta1_power"):
      slot_status.assert_consumed()
    self.assertEqual(12., self.evaluate(new_root.var))
    if context.executing_eagerly():
      # Slot variables are only created with restoring initializers when
      # executing eagerly.
      self.assertEqual(14., self.evaluate(
          new_root.optimizer.get_slot(name="m", var=new_root.var)))
    else:
      self.assertIs(new_root.optimizer.get_slot(name="m", var=new_root.var),
                    None)
    if context.executing_eagerly():
      new_root.optimizer.minimize(new_root.var.read_value)
    else:
      train_op = new_root.optimizer.minimize(new_root.var)
      # The slot variable now exists; restore() didn't create it, but we should
      # now have a restore op for it.
      slot_status.run_restore_ops()
      self.assertEqual(14., self.evaluate(
          new_root.optimizer.get_slot(name="m", var=new_root.var)))
      self.evaluate(train_op)
    slot_status.assert_consumed()
Exemple #18
0
 def _apply_sparse_shared(self, grad, var, indices, scatter_add):
   beta1_power = math_ops.cast(self._beta1_power, var.dtype.base_dtype)
   beta2_power = math_ops.cast(self._beta2_power, var.dtype.base_dtype)
   lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype)
   beta1_t = math_ops.cast(self._beta1_t, var.dtype.base_dtype)
   beta2_t = math_ops.cast(self._beta2_t, var.dtype.base_dtype)
   epsilon_t = math_ops.cast(self._epsilon_t, var.dtype.base_dtype)
   lr = (lr_t * math_ops.sqrt(1 - beta2_power) / (1 - beta1_power))
   # m_t = beta1 * m + (1 - beta1) * g_t
   m = self.get_slot(var, "m")
   m_scaled_g_values = grad * (1 - beta1_t)
   m_t = state_ops.assign(m, m * beta1_t,
                          use_locking=self._use_locking)
   with ops.control_dependencies([m_t]):
     m_t = scatter_add(m, indices, m_scaled_g_values)
   # v_t = beta2 * v + (1 - beta2) * (g_t * g_t)
   v = self.get_slot(var, "v")
   v_scaled_g_values = (grad * grad) * (1 - beta2_t)
   v_t = state_ops.assign(v, v * beta2_t, use_locking=self._use_locking)
   with ops.control_dependencies([v_t]):
     v_t = scatter_add(v, indices, v_scaled_g_values)
   v_sqrt = math_ops.sqrt(v_t)
   var_update = state_ops.assign_sub(var,
                                     lr * m_t / (v_sqrt + epsilon_t),
                                     use_locking=self._use_locking)
   return control_flow_ops.group(*[var_update, m_t, v_t])
 def test_fn(a):
   with ops.name_scope('foo'):
     state_ops.assign(a, a + 1)
     b = a + 1
     # state_ops.assign(a, b + 1)
     c = b + 1
     d = c + 1
   return d
 def testSaveRestore(self):
   network = MyNetwork()
   optimizer = adam.AdamOptimizer(0.001)
   root_checkpointable = checkpointable_utils.Checkpoint(
       optimizer=optimizer, network=network)
   input_value = constant_op.constant([[3.]])
   if context.in_eager_mode():
     optimizer.minimize(
         lambda: network(input_value))
   else:
     train_op = optimizer.minimize(network(input_value))
     # TODO(allenl): Make initialization more pleasant when graph building.
     root_checkpointable.save_counter  # pylint: disable=pointless-statement
     self.evaluate(checkpointable_utils.gather_initializers(
         root_checkpointable))
     self.evaluate(train_op)
   prefix = os.path.join(self.get_temp_dir(), "ckpt")
   self.evaluate(state_ops.assign(network._named_dense.variables[1], [42.]))
   m_bias_slot = optimizer.get_slot(network._named_dense.variables[1], "m")
   self.evaluate(state_ops.assign(m_bias_slot, [1.5]))
   save_path = root_checkpointable.save(file_prefix=prefix)
   self.evaluate(state_ops.assign(network._named_dense.variables[1], [43.]))
   self.evaluate(state_ops.assign(root_checkpointable.save_counter, 3))
   optimizer_variables = self.evaluate(optimizer.variables())
   self.evaluate(state_ops.assign(m_bias_slot, [-2.]))
   # Immediate restoration
   status = root_checkpointable.restore(save_path=save_path).assert_consumed()
   status.run_restore_ops()
   self.assertAllEqual([42.], self.evaluate(network._named_dense.variables[1]))
   self.assertAllEqual(1, self.evaluate(root_checkpointable.save_counter))
   self.assertAllEqual([1.5], self.evaluate(m_bias_slot))
   if context.in_graph_mode():
     return  # Restore-on-create is only supported when executing eagerly
   on_create_network = MyNetwork()
   on_create_optimizer = adam.AdamOptimizer(0.001)
   on_create_root = checkpointable_utils.Checkpoint(
       optimizer=on_create_optimizer, network=on_create_network)
   # Deferred restoration
   status = on_create_root.restore(save_path=save_path)
   on_create_network(constant_op.constant([[3.]]))  # create variables
   self.assertAllEqual(1, self.evaluate(on_create_root.save_counter))
   self.assertAllEqual([42.],
                       self.evaluate(
                           on_create_network._named_dense.variables[1]))
   on_create_m_bias_slot = on_create_optimizer.get_slot(
       on_create_network._named_dense.variables[1], "m")
   # Optimizer slot variables are created when the original variable is
   # restored.
   self.assertAllEqual([1.5], self.evaluate(on_create_m_bias_slot))
   self.assertAllEqual(optimizer_variables[2:],
                       self.evaluate(on_create_optimizer.variables()))
   on_create_optimizer._create_slots(
       [resource_variable_ops.ResourceVariable([1.])])
   status.assert_consumed()
   beta1_power, beta2_power = on_create_optimizer._get_beta_accumulators()
   self.assertAllEqual(optimizer_variables[0], self.evaluate(beta1_power))
   self.assertAllEqual(optimizer_variables[1], self.evaluate(beta2_power))
  def testDeferredSlotRestoration(self):
    checkpoint_directory = self.get_temp_dir()

    root = checkpointable.Checkpointable()
    root.var = checkpointable_utils.add_variable(
        root, name="var", initializer=0.)
    optimizer = CheckpointableAdam(0.1)
    if context.in_graph_mode():
      train_op = optimizer.minimize(root.var)
      self.evaluate(variables.global_variables_initializer())
      self.evaluate(train_op)
    else:
      optimizer.minimize(root.var.read_value)
    self.evaluate(state_ops.assign(root.var, 12.))
    no_slots_path = checkpointable_utils.Saver(root).save(
        os.path.join(checkpoint_directory, "no_slots"))
    root.optimizer = optimizer
    self.evaluate(state_ops.assign(root.var, 13.))
    self.evaluate(state_ops.assign(optimizer.get_slot(name="m", var=root.var),
                                   14.))
    slots_path = checkpointable_utils.Saver(root).save(
        os.path.join(checkpoint_directory, "with_slots"))
    new_root = checkpointable.Checkpointable()
    # Load the slot-containing checkpoint (deferred), then immediately overwrite
    # the non-slot variable (also deferred).
    slot_status = checkpointable_utils.Saver(new_root).restore(slots_path)
    no_slot_status = checkpointable_utils.Saver(new_root).restore(no_slots_path)
    with self.assertRaises(AssertionError):
      no_slot_status.assert_consumed()
    new_root.var = checkpointable_utils.add_variable(
        new_root, name="var", shape=[])
    no_slot_status.assert_consumed()
    no_slot_status.run_restore_ops()
    self.assertEqual(12., self.evaluate(new_root.var))
    new_root.optimizer = CheckpointableAdam(0.1)
    with self.assertRaisesRegexp(AssertionError, "beta1_power"):
      slot_status.assert_consumed()
    self.assertEqual(12., self.evaluate(new_root.var))
    if context.in_eager_mode():
      # Slot variables are only created with restoring initializers when
      # executing eagerly.
      self.assertEqual(14., self.evaluate(
          new_root.optimizer.get_slot(name="m", var=new_root.var)))
    else:
      self.assertIs(new_root.optimizer.get_slot(name="m", var=new_root.var),
                    None)
    if context.in_graph_mode():
      train_op = new_root.optimizer.minimize(new_root.var)
      # The slot variable now exists; restore() didn't create it, but we should
      # now have a restore op for it.
      slot_status.run_restore_ops()
      self.assertEqual(14., self.evaluate(
          new_root.optimizer.get_slot(name="m", var=new_root.var)))
      self.evaluate(train_op)
    else:
      new_root.optimizer.minimize(new_root.var.read_value)
    slot_status.assert_consumed()
Exemple #22
0
  def test_resource_variable(self):
    """Tests that resource variable usage is allowed."""
    a = variable_scope.get_variable(
        name='variable_a', shape=(1), use_resource=True)

    context = self.create_test_xla_compile_context()
    context.Enter()
    state_ops.assign(a, a + 1)
    context.Exit()
Exemple #23
0
 def _resource_apply_sparse(self, grad, var, indices):
   var_dtype = var.dtype.base_dtype
   lr_t = self._decayed_lr(var_dtype)
   rms = self.get_slot(var, "rms")
   rho = self._get_hyper("rho", var_dtype)
   momentum = self._get_hyper("momentum", var_dtype)
   epsilon = self._get_hyper("epsilon", var_dtype)
   if self._momentum:
     mom = self.get_slot(var, "momentum")
     if self.centered:
       mg = self.get_slot(var, "mg")
       return training_ops.resource_sparse_apply_centered_rms_prop(
           var.handle,
           mg.handle,
           rms.handle,
           mom.handle,
           lr_t,
           rho,
           momentum,
           epsilon,
           grad,
           indices,
           use_locking=self._use_locking)
     else:
       return training_ops.resource_sparse_apply_rms_prop(
           var.handle,
           rms.handle,
           mom.handle,
           lr_t,
           rho,
           momentum,
           epsilon,
           grad,
           indices,
           use_locking=self._use_locking)
   else:
     rms_scaled_g_values = (grad * grad) * (1. - rho)
     rms_t = state_ops.assign(rms, rms * rho, use_locking=self._use_locking)
     with ops.control_dependencies([rms_t]):
       rms_t = self._resource_scatter_add(rms, indices, rms_scaled_g_values)
       rms_slice = array_ops.gather(rms_t, indices)
     denom_slice = rms_slice
     if self.centered:
       mg = self.get_slot(var, "mg")
       mg_scaled_g_values = grad * (1. - rho)
       mg_t = state_ops.assign(mg, mg * rho, use_locking=self._use_locking)
       with ops.control_dependencies([mg_t]):
         mg_t = self._resource_scatter_add(mg, indices, mg_scaled_g_values)
         mg_slice = array_ops.gather(mg_t, indices)
         denom_slice = rms_slice - math_ops.square(mg_slice)
     var_update = self._resource_scatter_add(
         var, indices, -lr_t * grad / (math_ops.sqrt(denom_slice) + epsilon))
     if self.centered:
       return control_flow_ops.group(*[var_update, rms_t, mg_t])
     return control_flow_ops.group(*[var_update, rms_t])
 def testDuplicateTemporaryVariable(self):
   with test_util.use_gpu():
     var1 = gen_state_ops.temporary_variable(
         [1, 2], dtypes.float32, var_name="dup")
     var1 = state_ops.assign(var1, [[1.0, 2.0]])
     var2 = gen_state_ops.temporary_variable(
         [1, 2], dtypes.float32, var_name="dup")
     var2 = state_ops.assign(var2, [[3.0, 4.0]])
     final = var1 + var2
     with self.assertRaises(errors.AlreadyExistsError):
       self.evaluate(final)
Exemple #25
0
  def _cached_copy(self, var, name, pass_through=False):
    """Helper function to create a worker cached copy of a Variable.

    This assigns the var (either a single Variable or a list of Variables) to
    local transient cache Variable(s). Note that if var is a list of Variables,
    the assignment is done sequentially to minimize the memory overheads.
    Also note that if pass_through is set to True, this does not create new
    Variables but simply return the input back.

    Args:
      var: A Variable or a list of Variables to cache.
      name: name of cached Variable.
      pass_through: when set to True, this simply pass through the var back
        through identity operator and does not actually creates a cache.

    Returns:
      Tuple consisting of following three entries:
      cache: the new transient Variable or list of transient Variables
        corresponding one-to-one with var.
      cache_init: op to initialize the Variable or the list of Variables.
      cache_reset: op to reset the Variable or the list of Variables to some
        default value.
    """
    if var is None:
      return None, None, None
    elif pass_through:
      cache = var
      cache_init = control_flow_ops.no_op()
      cache_reset = control_flow_ops.no_op()
    elif isinstance(var, variables.Variable):
      cache = WALSModel._transient_var(name=name)
      with ops.colocate_with(cache):
        cache_init = state_ops.assign(cache, var, validate_shape=False)
        cache_reset = state_ops.assign(cache, 1.0, validate_shape=False)
    else:
      assert isinstance(var, list)
      assert var
      cache = [
          WALSModel._transient_var(name="%s_shard_%d" % (name, i))
          for i in xrange(len(var))
      ]
      reset_ops = []
      for i, c in enumerate(cache):
        with ops.colocate_with(c):
          if i == 0:
            cache_init = state_ops.assign(c, var[i], validate_shape=False)
          else:
            with ops.control_dependencies([cache_init]):
              cache_init = state_ops.assign(c, var[i], validate_shape=False)
          reset_ops.append(state_ops.assign(c, 1.0, validate_shape=False))
      cache_reset = control_flow_ops.group(*reset_ops)

    return cache, cache_init, cache_reset
Exemple #26
0
    def create_axis_ops(sp_input, num_items, update_fn, axis_name):
      """Creates book-keeping and training ops for a given axis.

      Args:
        sp_input: A SparseTensor corresponding to the row or column batch.
        num_items: An integer, the total number of items of this axis.
        update_fn: A function that takes one argument (`sp_input`), and that
        returns a tuple of
          * new_factors: A flot Tensor of the factor values after update.
          * update_op: a TensorFlow op which updates the factors.
          * loss: A float Tensor, the unregularized loss.
          * reg_loss: A float Tensor, the regularization loss.
          * sum_weights: A float Tensor, the sum of factor weights.
        axis_name: A string that specifies the name of the axis.

      Returns:
        A tuple consisting of:
          * reset_processed_items_op: A TensorFlow op, to be run before the
            beginning of any sweep. It marks all items as not-processed.
          * axis_train_op: A Tensorflow op, to be run during this axis' sweeps.
      """
      processed_items_init = array_ops.fill(dims=[num_items], value=False)
      with ops.colocate_with(processed_items_init):
        processed_items = variable_scope.variable(
            processed_items_init,
            collections=[ops.GraphKeys.GLOBAL_VARIABLES],
            trainable=False,
            name="processed_" + axis_name)
      reset_processed_items_op = state_ops.assign(
          processed_items, processed_items_init,
          name="reset_processed_" + axis_name)
      _, update_op, loss, reg, sum_weights = update_fn(sp_input)
      input_indices = sp_input.indices[:, 0]
      with ops.control_dependencies([
          update_op,
          state_ops.assign(loss_var, loss + reg),
          state_ops.assign(rwse_var, math_ops.sqrt(loss / sum_weights))]):
        with ops.colocate_with(processed_items):
          update_processed_items = state_ops.scatter_update(
              processed_items,
              input_indices,
              array_ops.ones_like(input_indices, dtype=dtypes.bool),
              name="update_processed_{}_indices".format(axis_name))
        with ops.control_dependencies([update_processed_items]):
          is_sweep_done = math_ops.reduce_all(processed_items)
          axis_train_op = control_flow_ops.group(
              global_step_incr_op,
              state_ops.assign(is_sweep_done_var, is_sweep_done),
              state_ops.assign_add(
                  completed_sweeps_var,
                  math_ops.cast(is_sweep_done, dtypes.int32)),
              name="{}_sweep_train_op".format(axis_name))
      return reset_processed_items_op, axis_train_op
Exemple #27
0
  def test_non_resource_variable_error(self):
    """Tests that non-resource variable usage is disallowed."""
    a = variable_scope.get_variable(
        name='variable_a', shape=(1), use_resource=False)

    context = self.create_test_xla_compile_context()
    context.Enter()
    with self.assertRaisesRegexp(
        NotImplementedError, 'Non-resource Variables are not supported inside '
        r'XLA computations \(operator name: Assign\)'):
      state_ops.assign(a, a + 1)
    context.Exit()
Exemple #28
0
def _Update(variable, gradients, accum, linear, base_lr, lr_power, l1, l2):
  """Update "variable", "accum", "linear" based on "gradients".

  Some notations here: "variable" as W, "accum" as N, "linear" as Z,
                       "gradients" as G, N(t) means "accum" at t-step.
  Assuming lr_power = -0.5 which means using adagrad learning rate.
  "accum" updates as: N = N + G^2
  "linear" updates as: Z = Z + G - W * (sqrt(N(t)) - sqrt(N(t-1)))/base_lr
  REQUIRES: Dimensionality of variable, gradients, accum and linear
            must be same.

  Args:
    variable: A Variable.
    gradients: A Tensor of same shape as 'variable'.
    accum: A Variable containing the sum of the squares of gradients.
    linear: A Variable containing approximation info.
    base_lr: A constant represents base learning rate.
    lr_power: A constant is used to adjust learning rate.
    l1: A constant represents l1 regularization strength.
    l2: A constant represents l2 regularization strength.

  Returns:
    A group op including three Assign ops:
      1. Assign for "accum"
      2. Assign for "linear"
      3. Assign for "variable"
  """
  dtype = variable.dtype.base_dtype
  base_lr = ops.convert_to_tensor(base_lr, dtype=dtype)
  lr_power = ops.convert_to_tensor(lr_power, dtype=dtype)
  l1 = ops.convert_to_tensor(l1, dtype=dtype)
  l2 = ops.convert_to_tensor(l2, dtype=dtype)
  # Compute the new accumulator
  sqr_grad = math_ops.square(gradients)
  accum_updated = sqr_grad + accum
  # Compute the new linear
  neg_lr_power = math_ops.neg(lr_power)
  sigma = math_ops.pow(accum_updated, neg_lr_power) - math_ops.pow(
      accum, neg_lr_power)
  sigma /= base_lr
  proximal_adjust = sigma * variable
  linear_updated = linear + gradients - proximal_adjust
  # Compute the "variable"
  variable_updated = _Compute(accum_updated, linear_updated, base_lr,
                              lr_power, l1, l2)

  with ops.control_dependencies([sigma]):
    accum_update_op = state_ops.assign(accum, accum_updated)
  linear_update_op = state_ops.assign(linear, linear_updated)
  variable_update_op = state_ops.assign(variable, variable_updated)
  group_op = control_flow_ops.group(linear_update_op, accum_update_op,
                                    variable_update_op)
  return group_op
Exemple #29
0
 def setUp(self):
   self._num_rows = 5
   self._num_cols = 7
   self._train_op = control_flow_ops.no_op()
   self._row_prep_done = variables.Variable(False)
   self._col_prep_done = variables.Variable(False)
   self._init_done = variables.Variable(False)
   self._row_prep_ops = [state_ops.assign(self._row_prep_done, True)]
   self._col_prep_ops = [state_ops.assign(self._col_prep_done, True)]
   self._init_ops = [state_ops.assign(self._init_done, True)]
   self._input_row_indices_ph = array_ops.placeholder(dtypes.int64)
   self._input_col_indices_ph = array_ops.placeholder(dtypes.int64)
Exemple #30
0
 def series_start_updates():
   # If this is the lowest-time chunk that we have seen so far, update
   # series start moments to reflect that. Note that these statistics are
   # "best effort", as there are race conditions in the update (however,
   # they should eventually converge if the start of the series is
   # presented enough times).
   mean, variance = nn.moments(
       values[min_time_batch, :self._starting_variance_window_size],
       axes=[0])
   return control_flow_ops.group(
       state_ops.assign(statistics.series_start_moments.mean, mean),
       state_ops.assign(statistics.series_start_moments.variance,
                        variance))
Exemple #31
0
    def _resource_apply_dense(self, grad, var):
        var_dtype = var.dtype.base_dtype
        lr_t = self._decayed_lr(var_dtype)
        m = self.get_slot(var, 'm')
        v = self.get_slot(var, 'v')
        beta_1_t = self._get_hyper('beta_1', var_dtype)
        beta_2_t = self._get_hyper('beta_2', var_dtype)
        epsilon_t = tf.convert_to_tensor(self.epsilon, var_dtype)
        local_step = math_ops.cast(self.iterations + 1, var_dtype)
        beta_1_power = math_ops.pow(beta_1_t, local_step)
        beta_2_power = math_ops.pow(beta_2_t, local_step)

        if self._initial_total_steps > 0:
            total_steps = self._get_hyper('total_steps', var_dtype)
            warmup_steps = total_steps * self._get_hyper(
                'warmup_proportion', var_dtype)
            min_lr = self._get_hyper('min_lr', var_dtype)
            decay_steps = K.maximum(total_steps - warmup_steps, 1)
            decay_rate = (min_lr - lr_t) / decay_steps
            lr_t = tf.where(
                local_step <= warmup_steps,
                lr_t * (local_step / warmup_steps),
                lr_t +
                decay_rate * K.minimum(local_step - warmup_steps, decay_steps),
            )

        sma_inf = 2.0 / (1.0 - beta_2_t) - 1.0
        sma_t = sma_inf - 2.0 * local_step * beta_2_power / (1.0 -
                                                             beta_2_power)

        m_t = state_ops.assign(m,
                               beta_1_t * m + (1.0 - beta_1_t) * grad,
                               use_locking=self._use_locking)
        m_corr_t = m_t / (1.0 - beta_1_power)

        v_t = state_ops.assign(v,
                               beta_2_t * v +
                               (1.0 - beta_2_t) * math_ops.square(grad),
                               use_locking=self._use_locking)
        if self.amsgrad:
            vhat = self.get_slot(var, 'vhat')
            vhat_t = state_ops.assign(vhat,
                                      math_ops.maximum(vhat, v_t),
                                      use_locking=self._use_locking)
            v_corr_t = math_ops.sqrt(vhat_t / (1.0 - beta_2_power))
        else:
            vhat_t = None
            v_corr_t = math_ops.sqrt(v_t / (1.0 - beta_2_power))

        r_t = math_ops.sqrt((sma_t - 4.0) / (sma_inf - 4.0) * (sma_t - 2.0) /
                            (sma_inf - 2.0) * sma_inf / sma_t)

        var_t = tf.where(sma_t >= 5.0, r_t * m_corr_t / (v_corr_t + epsilon_t),
                         m_corr_t)

        if self._initial_weight_decay > 0.0:
            var_t += self._get_hyper('weight_decay', var_dtype) * var

        var_update = state_ops.assign_sub(var,
                                          lr_t * var_t,
                                          use_locking=self._use_locking)

        updates = [var_update, m_t, v_t]
        if self.amsgrad:
            updates.append(vhat_t)
        return control_flow_ops.group(*updates)
 def update_var_v1(x):
   v = variables.Variable(3, name='v')
   update_op = state_ops.assign(v, x).op
   return update_op
Exemple #33
0
    def _resource_apply_sparse(self, grad, var, indices, apply_state=None):
        var_device, var_dtype = var.device, var.dtype.base_dtype
        coefficients = ((apply_state or {}).get((var_device, var_dtype))
                        or self._fallback_apply_state(var_device, var_dtype))

        rms = self.get_slot(var, "rms")
        if self._momentum:
            mom = self.get_slot(var, "momentum")
            if self.centered:
                mg = self.get_slot(var, "mg")
                return training_ops.resource_sparse_apply_centered_rms_prop(
                    var.handle,
                    mg.handle,
                    rms.handle,
                    mom.handle,
                    coefficients["lr_t"],
                    coefficients["rho"],
                    coefficients["momentum"],
                    coefficients["epsilon"],
                    grad,
                    indices,
                    use_locking=self._use_locking)
            else:
                return training_ops.resource_sparse_apply_rms_prop(
                    var.handle,
                    rms.handle,
                    mom.handle,
                    coefficients["lr_t"],
                    coefficients["rho"],
                    coefficients["momentum"],
                    coefficients["epsilon"],
                    grad,
                    indices,
                    use_locking=self._use_locking)
        else:
            rms_scaled_g_values = (grad * grad) * coefficients["one_minus_rho"]
            rms_t = state_ops.assign(rms,
                                     rms * coefficients["rho"],
                                     use_locking=self._use_locking)
            with ops.control_dependencies([rms_t]):
                rms_t = self._resource_scatter_add(rms, indices,
                                                   rms_scaled_g_values)
                rms_slice = array_ops.gather(rms_t, indices)
            denom_slice = rms_slice
            if self.centered:
                mg = self.get_slot(var, "mg")
                mg_scaled_g_values = grad * coefficients["one_minus_rho"]
                mg_t = state_ops.assign(mg,
                                        mg * coefficients["rho"],
                                        use_locking=self._use_locking)
                with ops.control_dependencies([mg_t]):
                    mg_t = self._resource_scatter_add(mg, indices,
                                                      mg_scaled_g_values)
                    mg_slice = array_ops.gather(mg_t, indices)
                    denom_slice = rms_slice - math_ops.square(mg_slice)
            var_update = self._resource_scatter_add(
                var, indices, coefficients["neg_lr_t"] * grad /
                (math_ops.sqrt(denom_slice) + coefficients["epsilon"]))
            if self.centered:
                return control_flow_ops.group(*[var_update, rms_t, mg_t])
            return control_flow_ops.group(*[var_update, rms_t])
    def build_controller(self):
        """RL optimization interface.

    Returns:
      ops: A dictionary holding handles of the model used for training.
    """

        self._global_step = training_util.get_or_create_global_step()
        ops = {}
        ops["loss"] = 0

        failing_signal = self.compute_reward(self.hparams.failing_signal)

        ctr = {}

        with tf_ops.name_scope("controller_{}".format(self.ctrl_id)):
            with variable_scope.variable_scope("controller_{}".format(
                    self.ctrl_id)):
                ctr["reward"] = {"value": [], "ph": [], "update": []}
                ctr["ready"] = {"value": [], "ph": [], "update": []}
                ctr["best_reward"] = {"value": [], "update": []}
                for i in range(self.hparams.num_children):
                    reward_value = variable_scope.get_local_variable(
                        "reward_{}".format(i),
                        initializer=0.0,
                        dtype=dtypes.float32,
                        trainable=False)
                    reward_ph = array_ops.placeholder(
                        dtypes.float32,
                        shape=(),
                        name="reward_ph_{}".format(i))
                    reward_update = state_ops.assign(reward_value,
                                                     reward_ph,
                                                     use_locking=True)
                    ctr["reward"]["value"].append(reward_value)
                    ctr["reward"]["ph"].append(reward_ph)
                    ctr["reward"]["update"].append(reward_update)
                    best_reward = variable_scope.get_local_variable(
                        "best_reward_{}".format(i),
                        initializer=failing_signal,
                        dtype=dtypes.float32,
                        trainable=False)
                    ctr["best_reward"]["value"].append(best_reward)
                    ctr["best_reward"]["update"].append(
                        state_ops.assign(
                            best_reward,
                            math_ops.minimum(best_reward, reward_update)))

                    ready_value = variable_scope.get_local_variable(
                        "ready_{}".format(i),
                        initializer=True,
                        dtype=dtypes.bool,
                        trainable=False)
                    ready_ph = array_ops.placeholder(
                        dtypes.bool, shape=(), name="ready_ph_{}".format(i))
                    ready_update = state_ops.assign(ready_value,
                                                    ready_ph,
                                                    use_locking=True)
                    ctr["ready"]["value"].append(ready_value)
                    ctr["ready"]["ph"].append(ready_ph)
                    ctr["ready"]["update"].append(ready_update)

            ctr["grouping_y_preds"], ctr[
                "grouping_log_probs"] = self.get_groupings()
            summary.histogram(
                "grouping_actions",
                array_ops.slice(ctr["grouping_y_preds"]["sample"], [0, 0],
                                [1, array_ops.shape(self.op_embeddings)[0]]))

            with variable_scope.variable_scope("controller_{}".format(
                    self.ctrl_id)):
                ctr["baseline"] = variable_scope.get_local_variable(
                    "baseline",
                    initializer=failing_signal
                    if self.hparams.start_with_failing_signal else 0.0,
                    dtype=dtypes.float32,
                    trainable=False)

            new_baseline = self.hparams.bl_dec * ctr["baseline"] + (
                1 - self.hparams.bl_dec) * math_ops.reduce_mean(
                    ctr["reward"]["value"])
            if not self.hparams.always_update_baseline:
                baseline_mask = math_ops.less(ctr["reward"]["value"],
                                              failing_signal)
                selected_reward = array_ops.boolean_mask(
                    ctr["reward"]["value"], baseline_mask)
                selected_baseline = control_flow_ops.cond(
                    math_ops.reduce_any(baseline_mask),
                    lambda: math_ops.reduce_mean(selected_reward),
                    lambda: constant_op.constant(0, dtype=dtypes.float32))
                ctr["pos_reward"] = selected_baseline
                pos_ = math_ops.less(
                    constant_op.constant(0, dtype=dtypes.float32),
                    selected_baseline)
                selected_baseline = self.hparams.bl_dec * ctr["baseline"] + (
                    1 - self.hparams.bl_dec) * selected_baseline
                selected_baseline = control_flow_ops.cond(
                    pos_, lambda: selected_baseline, lambda: ctr["baseline"])
                new_baseline = control_flow_ops.cond(
                    math_ops.less(self.global_step,
                                  self.hparams.stop_updating_after_steps),
                    lambda: new_baseline, lambda: selected_baseline)
            ctr["baseline_update"] = state_ops.assign(ctr["baseline"],
                                                      new_baseline,
                                                      use_locking=True)

            ctr["y_preds"], ctr["log_probs"] = self.get_placements()
            summary.histogram("actions", ctr["y_preds"]["sample"])
            mask = math_ops.less(ctr["reward"]["value"], failing_signal)
            ctr["loss"] = ctr["reward"]["value"] - ctr["baseline"]
            ctr["loss"] *= (ctr["log_probs"]["sample"] +
                            ctr["grouping_log_probs"]["sample"])

            selected_loss = array_ops.boolean_mask(ctr["loss"], mask)
            selected_loss = control_flow_ops.cond(
                math_ops.reduce_any(mask),
                lambda: math_ops.reduce_mean(-selected_loss),
                lambda: constant_op.constant(0, dtype=dtypes.float32))

            ctr["loss"] = control_flow_ops.cond(
                math_ops.less(self.global_step,
                              self.hparams.stop_updating_after_steps),
                lambda: math_ops.reduce_mean(-ctr["loss"]),
                lambda: selected_loss)

            ctr["reward_s"] = math_ops.reduce_mean(ctr["reward"]["value"])
            summary.scalar("loss", ctr["loss"])
            summary.scalar("avg_reward", ctr["reward_s"])
            summary.scalar("best_reward_so_far", best_reward)
            summary.scalar(
                "advantage",
                math_ops.reduce_mean(ctr["reward"]["value"] - ctr["baseline"]))

        with variable_scope.variable_scope("optimizer",
                                           reuse=variable_scope.AUTO_REUSE):
            (ctr["train_op"], ctr["lr"], ctr["grad_norm"],
             ctr["grad_norms"]) = self._get_train_ops(
                 ctr["loss"],
                 tf_ops.get_collection(tf_ops.GraphKeys.TRAINABLE_VARIABLES),
                 self.global_step,
                 grad_bound=self.hparams.grad_bound,
                 lr_init=self.hparams.lr,
                 lr_dec=self.hparams.lr_dec,
                 start_decay_step=self.hparams.start_decay_step,
                 decay_steps=self.hparams.decay_steps,
                 optimizer_type=self.hparams.optimizer_type)

        summary.scalar("gradnorm", ctr["grad_norm"])
        summary.scalar("lr", ctr["lr"])
        ctr["summary"] = summary.merge_all()
        ops["controller"] = ctr

        self.ops = ops
        return ops
Exemple #35
0
 def _assign_new_value(self, variable, value):
   with K.name_scope('AssignNewValue') as scope:
     with ops.colocate_with(variable):
       return state_ops.assign(variable, value, name=scope)
Exemple #36
0
def LastValueQuantize(inputs,
                      per_channel=False,
                      init_min=-6.0,
                      init_max=6.0,
                      vars_collection=None,
                      name_prefix='LastValueQuant',
                      reuse=None,
                      is_training=True,
                      num_bits=8,
                      narrow_range=False,
                      symmetric=False):
  """Adds a layer that collects quantization ranges as last input ranges.

  LastValueQuantize creates variables called 'min' and 'max', representing the
  interval used for quantization and clamping.

  Args:
    inputs: a tensor containing values to be quantized.
    per_channel: (Optional) a boolean specifying whether to use different
      quantization ranges per output channel.
    init_min: a float scalar, the initial value for variable min.
    init_max: a float scalar, the initial value for variable max.
    vars_collection: (Optional) collection where to store variables for
      quantization interval ends.
    name_prefix: name_prefix for created nodes.
    reuse: whether or not the layer and its variables should be reused. To be
      able to reuse the layer scope must be given.
    is_training: Whether the op is applied to a training or eval graph.
    num_bits: Number of bits to use for quantization, must be between 2 and 8.
    narrow_range: Whether to use the narrow quantization range
      [1; 2^num_bits - 1] or wide range [0; 2^num_bits - 1].
    symmetric: If true, use symmetric quantization limits instead of training
      the minimum and maximum of each quantization range separately.
  Returns:
    a tensor containing quantized values.
  """
  with variable_scope.variable_scope(
      None, default_name=name_prefix, values=[inputs], reuse=reuse) as scope:
    scope.set_partitioner(None)
    input_shape = inputs.get_shape()
    input_dim = len(input_shape)
    if per_channel:
      # Only support quantizing 1-, 2- and 4-dimensional tensors.
      assert input_dim in [1, 2, 4], ('Expected 1D, 2D or 4D input, was: %s in '
                                      ' scope: %s' % (input_shape, name_prefix))
      min_max_shape = [input_shape[-1]]
    else:
      min_max_shape = []

    vars_collections = [vars_collection] if vars_collection else []
    min_var = _ModelVariable(
        'min',
        shape=min_max_shape,
        initializer=init_ops.constant_initializer(init_min),
        collections=vars_collections,
        trainable=False)
    max_var = _ModelVariable(
        'max',
        shape=min_max_shape,
        initializer=init_ops.constant_initializer(init_max),
        collections=vars_collections,
        trainable=False)
    if not is_training:
      return _FakeQuantWithMinMaxVars(
          inputs,
          min_var,
          max_var,
          per_channel=per_channel,
          num_bits=num_bits,
          narrow_range=narrow_range)

    if per_channel:
      if input_dim == 2:
        reduce_dims = [0]
      elif input_dim == 4:
        reduce_dims = [0, 1, 2]

    if per_channel:
      if input_dim >= 2:
        batch_min = math_ops.reduce_min(
            inputs, axis=reduce_dims, name='BatchMin')
      else:
        batch_min = inputs
    else:
      batch_min = math_ops.reduce_min(inputs, name='BatchMin')

    if per_channel:
      if input_dim >= 2:
        batch_max = math_ops.reduce_max(
            inputs, axis=reduce_dims, name='BatchMax')
      else:
        batch_max = inputs
    else:
      batch_max = math_ops.reduce_max(inputs, name='BatchMax')

    if symmetric:
      if narrow_range:
        min_max_ratio = -1
      else:
        # In two's complement notation, the negative range is slightly larger
        # than the positive range.
        min_max_ratio = -((1 << num_bits) - 2) / (1 << num_bits)

      # TFLite requires that 0.0 if always in the [min; max] range. Because
      # batch_min <= batch_max, it follows that range_min <= 0 <= range_max.
      range_min = math_ops.minimum(batch_min, batch_max / min_max_ratio)
      range_max = math_ops.maximum(batch_max, batch_min * min_max_ratio)
    else:
      # TFLite requires that 0.0 if always in the [min; max] range.
      range_min = math_ops.minimum(batch_min, 0.0)
      range_max = math_ops.maximum(batch_max, 0.0)

    assign_min = state_ops.assign(min_var, range_min, name='AssignMinLast')
    assign_max = state_ops.assign(max_var, range_max, name='AssignMaxLast')

    return _FakeQuantWithMinMaxVars(
        inputs,
        assign_min,
        assign_max,
        per_channel=per_channel,
        num_bits=num_bits,
        narrow_range=narrow_range)
Exemple #37
0
def _warm_start_var_with_vocab(var,
                               current_vocab_path,
                               current_vocab_size,
                               prev_ckpt,
                               prev_vocab_path,
                               previous_vocab_size=-1,
                               current_oov_buckets=0,
                               prev_tensor_name=None,
                               initializer=None):
  """Warm-starts given variable from `prev_tensor_name` tensor in `prev_ckpt`.

  Use this method when the `var` is backed by vocabulary. This method stitches
  the given `var` such that values corresponding to individual features in the
  vocabulary remain consistent irrespective of changing order of the features
  between old and new vocabularies.

  Args:
    var: Current graph's variable that needs to be warm-started (initialized).
      Can be either of the following:
      (i) `Variable`
      (ii) `ResourceVariable`
      (iii) list of `Variable`: The list must contain slices of the same larger
        variable.
      (iv) `PartitionedVariable`
    current_vocab_path: Path to the vocab file used for the given `var`.
    current_vocab_size: An `int` specifying the number of entries in the current
      vocab.
    prev_ckpt: A string specifying the directory with checkpoint file(s) or path
      to checkpoint. The given checkpoint must have tensor with name
      `prev_tensor_name` (if not None) or tensor with name same as given `var`.
    prev_vocab_path: Path to the vocab file used for the tensor in `prev_ckpt`.
    previous_vocab_size: If provided, will constrain previous vocab to the first
      `previous_vocab_size` entries.  -1 means use the entire previous vocab.
    current_oov_buckets: An `int` specifying the number of out-of-vocabulary
      buckets used for given `var`.
    prev_tensor_name: Name of the tensor to lookup in provided `prev_ckpt`. If
      None, we lookup tensor with same name as given `var`.
    initializer: Variable initializer to be used for missing entries.  If None,
      missing entries will be zero-initialized.

  Raises:
    ValueError: If required args are not provided.
  """
  if not (current_vocab_path and current_vocab_size and prev_ckpt and
          prev_vocab_path):
    raise ValueError("Invalid args: Must provide all of [current_vocab_path, "
                     "current_vocab_size, prev_ckpt, prev_vocab_path}.")
  if _is_variable(var):
    var = [var]
  elif isinstance(var, list) and all(_is_variable(v) for v in var):
    var = var
  elif isinstance(var, variables_lib.PartitionedVariable):
    var = var._get_variable_list()
  else:
    raise TypeError(
        "var MUST be one of the following: a Variable, list of Variable or "
        "PartitionedVariable, but is {}".format(type(var)))

  if not prev_tensor_name:
    # Assume tensor name remains the same.
    prev_tensor_name = _infer_var_name(var)

  for v in var:
    v_shape = v.get_shape().as_list()
    slice_info = v._get_save_slice_info()
    partition_info = None
    if slice_info:
      partition_info = variable_scope._PartitionInfo(
          full_shape=slice_info.full_shape,
          var_offset=slice_info.var_offset)

    # TODO(eddz): Support WarmStartSettings where class vocabularies need
    # remapping too.
    init = checkpoint_ops._load_and_remap_matrix_initializer(
        ckpt_path=checkpoint_utils._get_checkpoint_filename(prev_ckpt),
        old_tensor_name=prev_tensor_name,
        new_row_vocab_size=current_vocab_size,
        new_col_vocab_size=v_shape[1],
        old_row_vocab_size=previous_vocab_size,
        old_row_vocab_file=prev_vocab_path,
        new_row_vocab_file=current_vocab_path,
        old_col_vocab_file=None,
        new_col_vocab_file=None,
        num_row_oov_buckets=current_oov_buckets,
        num_col_oov_buckets=0,
        initializer=initializer)
    new_init_val = ops.convert_to_tensor(
        init(shape=v_shape, partition_info=partition_info))
    v._initializer_op = state_ops.assign(v, new_init_val)
    def streaming_tp_fp_arrays(self,
                               num_gbboxes,
                               tp,
                               fp,
                               scores,
                               remove_zero_scores=True,
                               metrics_collections=None,
                               updates_collections=None,
                               name=None):

        # Input dictionaries: dict outputs as streaming metrics.
        if isinstance(scores, dict) or isinstance(fp, dict):
            d_values = {}
            d_update_ops = {}
            for c in num_gbboxes.keys():
                scope = 'streaming_tp_fp_%s' % c
                v, up = self.streaming_tp_fp_arrays(num_gbboxes[c],
                                                    tp[c],
                                                    fp[c],
                                                    scores[c],
                                                    remove_zero_scores,
                                                    metrics_collections,
                                                    updates_collections,
                                                    name=scope)
                d_values[c] = v
                d_update_ops[c] = up
            return d_values, d_update_ops

        # Input Tensors...
        with variable_scope.variable_scope(name, 'streaming_tp_fp',
                                           [num_gbboxes, tp, fp, scores]):
            num_gbboxes = math_ops.to_int64(num_gbboxes)
            scores = math_ops.to_float(scores)
            stype = tf.bool
            tp = tf.cast(tp, stype)
            fp = tf.cast(fp, stype)
            # Reshape TP and FP tensors and clean away 0 class values.
            scores = tf.reshape(scores, [-1])
            tp = tf.reshape(tp, [-1])
            fp = tf.reshape(fp, [-1])
            # Remove TP and FP both false.
            mask = tf.logical_or(tp, fp)
            if remove_zero_scores:
                rm_threshold = 1e-4
                mask = tf.logical_and(mask, tf.greater(scores, rm_threshold))
                scores = tf.boolean_mask(scores, mask)
                tp = tf.boolean_mask(tp, mask)
                fp = tf.boolean_mask(fp, mask)

            # Local variables accumlating information over batches.
            v_nobjects = self._create_local('v_num_gbboxes',
                                            shape=[],
                                            dtype=tf.int64)
            v_ndetections = self._create_local('v_num_detections',
                                               shape=[],
                                               dtype=tf.int32)
            v_scores = self._create_local('v_scores', shape=[
                0,
            ])
            v_tp = self._create_local('v_tp', shape=[
                0,
            ], dtype=stype)
            v_fp = self._create_local('v_fp', shape=[
                0,
            ], dtype=stype)

            # Update operations.
            nobjects_op = state_ops.assign_add(v_nobjects,
                                               tf.reduce_sum(num_gbboxes))
            ndetections_op = state_ops.assign_add(
                v_ndetections, tf.size(scores, out_type=tf.int32))
            scores_op = state_ops.assign(v_scores,
                                         tf.concat([v_scores, scores], axis=0),
                                         validate_shape=False)
            tp_op = state_ops.assign(v_tp,
                                     tf.concat([v_tp, tp], axis=0),
                                     validate_shape=False)
            fp_op = state_ops.assign(v_fp,
                                     tf.concat([v_fp, fp], axis=0),
                                     validate_shape=False)

            # Value and update ops.
            val = (v_nobjects, v_ndetections, v_tp, v_fp, v_scores)
            with ops.control_dependencies(
                [nobjects_op, ndetections_op, scores_op, tp_op, fp_op]):
                update_op = (nobjects_op, ndetections_op, tp_op, fp_op,
                             scores_op)

            if metrics_collections:
                ops.add_to_collections(metrics_collections, val)
            if updates_collections:
                ops.add_to_collections(updates_collections, update_op)
            return val, update_op
Exemple #39
0
  def call(self, inputs, mask=None, training=None, initial_state=None):
    # The input should be dense, padded with zeros. If a ragged input is fed
    # into the layer, it is padded and the row lengths are used for masking.
    inputs, row_lengths = K.convert_inputs_if_ragged(inputs)
    is_ragged_input = (row_lengths is not None)
    self._validate_args_if_ragged(is_ragged_input, mask)

    # LSTM does not support constants. Ignore it during process.
    inputs, initial_state, _ = self._process_inputs(inputs, initial_state, None)

    if isinstance(mask, list):
      mask = mask[0]

    input_shape = K.int_shape(inputs)
    timesteps = input_shape[0] if self.time_major else input_shape[1]

    if not self._could_use_gpu_kernel:
      # Fall back to use the normal LSTM.
      kwargs = {'training': training}
      self._maybe_reset_cell_dropout_mask(self.cell)

      def step(inputs, states):
        return self.cell(inputs, states, **kwargs)

      last_output, outputs, states = K.rnn(
          step,
          inputs,
          initial_state,
          constants=None,
          go_backwards=self.go_backwards,
          mask=mask,
          unroll=self.unroll,
          input_length=row_lengths if row_lengths is not None else timesteps,
          time_major=self.time_major,
          zero_output_for_mask=self.zero_output_for_mask)
      runtime = _runtime(_RUNTIME_UNKNOWN)
    else:
      # Use the new defun approach for backend implementation swap.
      # Note that different implementations need to have same function
      # signature, eg, the tensor parameters need to have same shape and dtypes.
      # Since the CuDNN has an extra set of bias, those bias will be passed to
      # both normal and CuDNN implementations.
      self.reset_dropout_mask()
      dropout_mask = self.get_dropout_mask_for_cell(inputs, training, count=4)
      if dropout_mask is not None:
        inputs = inputs * dropout_mask[0]
      gpu_lstm_kwargs = {
          'inputs': inputs,
          'init_h': initial_state[0],
          'init_c': initial_state[1],
          'kernel': self.cell.kernel,
          'recurrent_kernel': self.cell.recurrent_kernel,
          'bias': self.cell.bias,
          'mask': mask,
          'time_major': self.time_major,
          'go_backwards': self.go_backwards,
          'sequence_lengths': row_lengths
      }
      normal_lstm_kwargs = gpu_lstm_kwargs.copy()
      normal_lstm_kwargs.update({
          'activation': self.activation,
          'recurrent_activation': self.recurrent_activation,
          'zero_output_for_mask': self.zero_output_for_mask,
      })

      if context.executing_eagerly():
        device_type = _get_context_device_type()
        can_use_gpu = (
            # Either user specified GPU or unspecified but GPU is available.
            (device_type == _GPU_DEVICE_NAME
             or (device_type is None and context.num_gpus() > 0))
            and
            (mask is None or is_sequence_right_padded(mask, self.time_major)))
        # Under eager context, check the device placement and prefer the
        # GPU implementation when GPU is available.
        if can_use_gpu:
          last_output, outputs, new_h, new_c, runtime = gpu_lstm(
              **gpu_lstm_kwargs)
        else:
          last_output, outputs, new_h, new_c, runtime = standard_lstm(
              **normal_lstm_kwargs)
      else:
        (last_output, outputs, new_h, new_c,
         runtime) = lstm_with_backend_selection(**normal_lstm_kwargs)

      states = [new_h, new_c]

    if self.stateful:
      updates = []
      for i in range(len(states)):
        updates.append(state_ops.assign(self.states[i], states[i]))
      self.add_update(updates)

    if self.return_sequences:
      output = K.maybe_convert_to_ragged(is_ragged_input, outputs, row_lengths)
    else:
      output = last_output

    if self.return_state:
      return [output] + list(states)
    elif self.return_runtime:
      return output, runtime
    else:
      return output
Exemple #40
0
    def _apply_sparse_shared(self, grad, var, indices, scatter_add):
        step, beta1_power, beta2_power = self._get_beta_accumulators()
        beta1_power = math_ops.cast(beta1_power, var.dtype.base_dtype)
        beta2_power = math_ops.cast(beta2_power, var.dtype.base_dtype)
        lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype)

        if self._initial_total_steps > 0:
            total_steps = math_ops.cast(self._total_steps_t, var.dtype.base_dtype)
            warmup_proportion = math_ops.cast(self._warmup_proportion_t, var.dtype.base_dtype)
            min_lr = math_ops.cast(self._min_lr_t, var.dtype.base_dtype)
            warmup_steps = total_steps * warmup_proportion
            decay_steps = math_ops.maximum(total_steps - warmup_steps, 1)
            decay_rate = (min_lr - lr_t) / decay_steps
            lr_t = tf.where(
                step <= warmup_steps,
                lr_t * (step / warmup_steps),
                lr_t + decay_rate * math_ops.minimum(step - warmup_steps, decay_steps),
            )

        beta1_t = math_ops.cast(self._beta1_t, var.dtype.base_dtype)
        beta2_t = math_ops.cast(self._beta2_t, var.dtype.base_dtype)
        epsilon_t = math_ops.cast(self._epsilon_t, var.dtype.base_dtype)

        sma_inf = 2.0 / (1.0 - beta2_t) - 1.0
        sma_t = sma_inf - 2.0 * step * beta2_power / (1.0 - beta2_power)

        m = self.get_slot(var, "m")
        m_scaled_g_values = grad * (1 - beta1_t)
        m_t = state_ops.assign(m, m * beta1_t, use_locking=self._use_locking)
        with ops.control_dependencies([m_t]):
            m_t = scatter_add(m, indices, m_scaled_g_values)
        m_corr_t = m_t / (1.0 - beta1_power)

        v = self.get_slot(var, "v")
        v_scaled_g_values = (grad * grad) * (1 - beta2_t)
        v_t = state_ops.assign(v, v * beta2_t, use_locking=self._use_locking)
        with ops.control_dependencies([v_t]):
            v_t = scatter_add(v, indices, v_scaled_g_values)
        if self._amsgrad:
            vhat = self.get_slot(var, 'vhat')
            vhat_t = state_ops.assign(vhat, math_ops.maximum(vhat, v_t), use_locking=self._use_locking)
            v_corr_t = math_ops.sqrt(vhat_t / (1.0 - beta2_power))
        else:
            v_corr_t = math_ops.sqrt(v_t / (1.0 - beta2_power))

        r_t = math_ops.sqrt((sma_t - 4.0) / (sma_inf - 4.0) *
                            (sma_t - 2.0) / (sma_inf - 2.0) *
                            sma_inf / sma_t)

        var_t = tf.where(sma_t >= 5.0, r_t * m_corr_t / (v_corr_t + epsilon_t), m_corr_t)

        if self._initial_weight_decay > 0.0:
            var_t += math_ops.cast(self._weight_decay_t, var.dtype.base_dtype) * var

        var_t = lr_t * var_t
        var_update = state_ops.scatter_sub(
                    var,
                    indices,
                    array_ops.gather(var_t, indices),
                    use_locking=self._use_locking)

        updates = [var_update, m_t, v_t]
        if self._amsgrad:
            updates.append(vhat_t)
        return control_flow_ops.group(*updates)
Exemple #41
0
    def _testSaveRestoreOutput(self, rnn_mode, direction, dtype):
        with ops.Graph().as_default():
            num_layers = 2
            num_units = 7
            input_size = 7
            seq_length = 10
            batch_size = 5
            dir_count = 1 if direction == cudnn_rnn_ops.CUDNN_RNN_UNIDIRECTION else 2
            model = _CreateModel(rnn_mode,
                                 num_layers,
                                 num_units,
                                 input_size,
                                 direction=direction,
                                 dtype=dtype)
            params_size_t = model.params_size()
            params = variables.VariableV1(array_ops.ones([params_size_t],
                                                         dtype=dtype),
                                          validate_shape=False,
                                          dtype=dtype)
            _CreateParamsSavable(params, model)
            save_path = os.path.join(self.get_temp_dir(),
                                     "save-restore-output-test")
            saver = saver_lib.Saver(write_version=saver_pb2.SaverDef.V2)

            np.random.seed(1234)
            has_input_c = (rnn_mode == cudnn_rnn_ops.CUDNN_LSTM)
            input_data = constant_op.constant(np.random.randn(
                seq_length, batch_size, input_size),
                                              dtype=dtype)
            input_h = constant_op.constant(np.random.randn(
                num_layers * dir_count, batch_size, num_units),
                                           dtype=dtype)
            if has_input_c:
                input_c = constant_op.constant(np.random.randn(
                    num_layers * dir_count, batch_size, num_units),
                                               dtype=dtype)
                outputs = model(input_data=input_data,
                                input_h=input_h,
                                input_c=input_c,
                                params=params,
                                is_training=False)
            else:
                outputs = model(input_data=input_data,
                                input_h=input_h,
                                params=params,
                                is_training=False)
            total_sum = sum(map(math_ops.reduce_sum, outputs))
            # Passing graph explicitly, otherwise an old sess would be reused.
            with self.test_session(use_gpu=True,
                                   graph=ops.get_default_graph()) as sess:
                sess.run(variables.global_variables_initializer())
                total_sum_v = sess.run(total_sum)
                val = saver.save(sess, save_path)
                self.assertEqual(save_path, val)
            # Passing graph explicitly, otherwise an old sess would be reused.
            with self.test_session(use_gpu=True,
                                   graph=ops.get_default_graph()) as sess:
                reset_params = state_ops.assign(params,
                                                array_ops.zeros(
                                                    [params_size_t],
                                                    dtype=dtype),
                                                validate_shape=False)
                sess.run(reset_params)
                saver.restore(sess, save_path)
                total_sum_v_restored = sess.run(total_sum)
                self.assertAllClose(total_sum_v,
                                    total_sum_v_restored,
                                    atol=1e-5)
Exemple #42
0
def _wals_factorization_model_function(features, labels, mode, params):
  """Model function for the WALSFactorization estimator.

  Args:
    features: Dictionary of features. See WALSMatrixFactorization.
    labels: Must be None.
    mode: A model_fn.ModeKeys object.
    params: Dictionary of parameters containing arguments passed to the
      WALSMatrixFactorization constructor.

  Returns:
    A ModelFnOps object.

  Raises:
    ValueError: If `mode` is not recognized.
  """
  assert labels is None
  use_factors_weights_cache = (params["use_factors_weights_cache_for_training"]
                               and mode == model_fn.ModeKeys.TRAIN)
  use_gramian_cache = (params["use_gramian_cache_for_training"] and
                       mode == model_fn.ModeKeys.TRAIN)
  max_sweeps = params["max_sweeps"]
  model = factorization_ops.WALSModel(
      params["num_rows"],
      params["num_cols"],
      params["embedding_dimension"],
      unobserved_weight=params["unobserved_weight"],
      regularization=params["regularization_coeff"],
      row_init=params["row_init"],
      col_init=params["col_init"],
      num_row_shards=params["num_row_shards"],
      num_col_shards=params["num_col_shards"],
      row_weights=params["row_weights"],
      col_weights=params["col_weights"],
      use_factors_weights_cache=use_factors_weights_cache,
      use_gramian_cache=use_gramian_cache)

  # Get input rows and cols. We either update rows or columns depending on
  # the value of row_sweep, which is maintained using a session hook.
  input_rows = features[WALSMatrixFactorization.INPUT_ROWS]
  input_cols = features[WALSMatrixFactorization.INPUT_COLS]

  # TRAIN mode:
  if mode == model_fn.ModeKeys.TRAIN:
    # Training consists of the following ops (controlled using a SweepHook).
    # Before a row sweep:
    #   row_update_prep_gramian_op
    #   initialize_row_update_op
    # During a row sweep:
    #   update_row_factors_op
    # Before a col sweep:
    #   col_update_prep_gramian_op
    #   initialize_col_update_op
    # During a col sweep:
    #   update_col_factors_op

    is_row_sweep_var = variable_scope.variable(
        True,
        trainable=False,
        name="is_row_sweep",
        collections=[ops.GraphKeys.GLOBAL_VARIABLES])
    is_sweep_done_var = variable_scope.variable(
        False,
        trainable=False,
        name="is_sweep_done",
        collections=[ops.GraphKeys.GLOBAL_VARIABLES])
    completed_sweeps_var = variable_scope.variable(
        0,
        trainable=False,
        name=WALSMatrixFactorization.COMPLETED_SWEEPS,
        collections=[ops.GraphKeys.GLOBAL_VARIABLES])
    loss_var = variable_scope.variable(
        0.,
        trainable=False,
        name=WALSMatrixFactorization.LOSS,
        collections=[ops.GraphKeys.GLOBAL_VARIABLES])
    # The root weighted squared error =
    #   \\(\sqrt( \sum_{i,j} w_ij * (a_ij - r_ij)^2 / \sum_{i,j} w_ij )\\)
    rwse_var = variable_scope.variable(
        0.,
        trainable=False,
        name=WALSMatrixFactorization.RWSE,
        collections=[ops.GraphKeys.GLOBAL_VARIABLES])

    summary.scalar("loss", loss_var)
    summary.scalar("root_weighted_squared_error", rwse_var)
    summary.scalar("completed_sweeps", completed_sweeps_var)

    def create_axis_ops(sp_input, num_items, update_fn, axis_name):
      """Creates book-keeping and training ops for a given axis.

      Args:
        sp_input: A SparseTensor corresponding to the row or column batch.
        num_items: An integer, the total number of items of this axis.
        update_fn: A function that takes one argument (`sp_input`), and that
        returns a tuple of
          * new_factors: A float Tensor of the factor values after update.
          * update_op: a TensorFlow op which updates the factors.
          * loss: A float Tensor, the unregularized loss.
          * reg_loss: A float Tensor, the regularization loss.
          * sum_weights: A float Tensor, the sum of factor weights.
        axis_name: A string that specifies the name of the axis.

      Returns:
        A tuple consisting of:
          * reset_processed_items_op: A TensorFlow op, to be run before the
            beginning of any sweep. It marks all items as not-processed.
          * axis_train_op: A Tensorflow op, to be run during this axis' sweeps.
      """
      processed_items_init = array_ops.fill(dims=[num_items], value=False)
      with ops.colocate_with(processed_items_init):
        processed_items = variable_scope.variable(
            processed_items_init,
            collections=[ops.GraphKeys.GLOBAL_VARIABLES],
            trainable=False,
            name="processed_" + axis_name)
      _, update_op, loss, reg, sum_weights = update_fn(sp_input)
      input_indices = sp_input.indices[:, 0]
      with ops.control_dependencies([
          update_op,
          state_ops.assign(loss_var, loss + reg),
          state_ops.assign(rwse_var, math_ops.sqrt(loss / sum_weights))]):
        with ops.colocate_with(processed_items):
          update_processed_items = state_ops.scatter_update(
              processed_items,
              input_indices,
              array_ops.ones_like(input_indices, dtype=dtypes.bool),
              name="update_processed_{}_indices".format(axis_name))
        with ops.control_dependencies([update_processed_items]):
          is_sweep_done = math_ops.reduce_all(processed_items)
          axis_train_op = control_flow_ops.group(
              state_ops.assign(is_sweep_done_var, is_sweep_done),
              state_ops.assign_add(
                  completed_sweeps_var,
                  math_ops.cast(is_sweep_done, dtypes.int32)),
              name="{}_sweep_train_op".format(axis_name))
      return processed_items.initializer, axis_train_op

    reset_processed_rows_op, row_train_op = create_axis_ops(
        input_rows,
        params["num_rows"],
        lambda x: model.update_row_factors(sp_input=x, transpose_input=False),
        "rows")
    reset_processed_cols_op, col_train_op = create_axis_ops(
        input_cols,
        params["num_cols"],
        lambda x: model.update_col_factors(sp_input=x, transpose_input=True),
        "cols")
    switch_op = control_flow_ops.group(
        state_ops.assign(
            is_row_sweep_var, math_ops.logical_not(is_row_sweep_var)),
        reset_processed_rows_op,
        reset_processed_cols_op,
        name="sweep_switch_op")
    row_prep_ops = [
        model.row_update_prep_gramian_op, model.initialize_row_update_op]
    col_prep_ops = [
        model.col_update_prep_gramian_op, model.initialize_col_update_op]
    init_op = model.worker_init
    sweep_hook = _SweepHook(
        is_row_sweep_var, is_sweep_done_var, init_op,
        row_prep_ops, col_prep_ops, row_train_op, col_train_op, switch_op)
    global_step_hook = _IncrementGlobalStepHook()
    training_hooks = [sweep_hook, global_step_hook]
    if max_sweeps is not None:
      training_hooks.append(_StopAtSweepHook(max_sweeps))

    return model_fn.ModelFnOps(
        mode=model_fn.ModeKeys.TRAIN,
        predictions={},
        loss=loss_var,
        eval_metric_ops={},
        train_op=control_flow_ops.no_op(),
        training_hooks=training_hooks)

  # INFER mode
  elif mode == model_fn.ModeKeys.INFER:
    projection_weights = features.get(
        WALSMatrixFactorization.PROJECTION_WEIGHTS)

    def get_row_projection():
      return model.project_row_factors(
          sp_input=input_rows,
          projection_weights=projection_weights,
          transpose_input=False)

    def get_col_projection():
      return model.project_col_factors(
          sp_input=input_cols,
          projection_weights=projection_weights,
          transpose_input=True)

    predictions = {
        WALSMatrixFactorization.PROJECTION_RESULT: control_flow_ops.cond(
            features[WALSMatrixFactorization.PROJECT_ROW],
            get_row_projection,
            get_col_projection)
    }

    return model_fn.ModelFnOps(
        mode=model_fn.ModeKeys.INFER,
        predictions=predictions,
        loss=None,
        eval_metric_ops={},
        train_op=control_flow_ops.no_op(),
        training_hooks=[])

  # EVAL mode
  elif mode == model_fn.ModeKeys.EVAL:
    def get_row_loss():
      _, _, loss, reg, _ = model.update_row_factors(
          sp_input=input_rows, transpose_input=False)
      return loss + reg
    def get_col_loss():
      _, _, loss, reg, _ = model.update_col_factors(
          sp_input=input_cols, transpose_input=True)
      return loss + reg
    loss = control_flow_ops.cond(
        features[WALSMatrixFactorization.PROJECT_ROW],
        get_row_loss,
        get_col_loss)
    return model_fn.ModelFnOps(
        mode=model_fn.ModeKeys.EVAL,
        predictions={},
        loss=loss,
        eval_metric_ops={},
        train_op=control_flow_ops.no_op(),
        training_hooks=[])

  else:
    raise ValueError("mode=%s is not recognized." % str(mode))
  def _init_from_args(self,
                      initial_value=None,
                      trainable=True,
                      collections=None,
                      validate_shape=True,
                      caching_device=None,
                      name=None,
                      dtype=None,
                      expected_shape=None):
    """Creates a new variable from arguments.

    Args:
      initial_value: A `Tensor`, or Python object convertible to a `Tensor`,
        which is the initial value for the Variable. The initial value must have
        a shape specified unless `validate_shape` is set to False. Can also be a
        callable with no argument that returns the initial value when called.
        (Note that initializer functions  from init_ops.py must first be bound
         to a shape before being used here.)
      trainable: If `True`, the default, also adds the variable to the graph
        collection `GraphKeys.TRAINABLE_VARIABLES`. This collection is used as
        the default list of variables to use by the `Optimizer` classes.
      collections: List of graph collections keys. The new variable is added to
        these collections. Defaults to `[GraphKeys.GLOBAL_VARIABLES]`.
      validate_shape: If `False`, allows the variable to be initialized with a
        value of unknown shape. If `True`, the default, the shape of
        `initial_value` must be known.
      caching_device: Optional device string or function describing where the
        Variable should be cached for reading.  Defaults to the Variable's
        device.  If not `None`, caches on another device.  Typical use is to
        cache on the device where the Ops using the Variable reside, to
        deduplicate copying through `Switch` and other conditional statements.
      name: Optional name for the variable. Defaults to `'Variable'` and gets
        uniquified automatically.
      dtype: If set, initial_value will be converted to the given type.
        If None, either the datatype will be kept (if initial_value is
       a Tensor) or float32 will be used (if it is a Python object convertible
       to a Tensor).
      expected_shape: Deprecated. Ignored.

    Raises:
      ValueError: If the initial value is not specified, or does not have a
        shape and `validate_shape` is `True`.
    """
    _ = expected_shape
    if initial_value is None:
      raise ValueError("initial_value must be specified.")
    init_from_fn = callable(initial_value)

    if collections is None:
      collections = [ops.GraphKeys.GLOBAL_VARIABLES]
    if not isinstance(collections, (list, tuple, set)):
      raise ValueError(
          "collections argument to Variable constructor must be a list, tuple, "
          "or set. Got %s of type %s" % (collections, type(collections)))
    if trainable and ops.GraphKeys.TRAINABLE_VARIABLES not in collections:
      collections = list(collections) + [ops.GraphKeys.TRAINABLE_VARIABLES]
    with ops.control_dependencies(None):
      with ops.name_scope(name, "Variable", [] if init_from_fn else
                          [initial_value]) as name:

        if init_from_fn:
          # Use attr_scope and device(None) to simulate the behavior of
          # colocate_with when the variable we want to colocate with doesn't
          # yet exist.
          true_name = ops._name_from_scope_name(name)
          attr = attr_value_pb2.AttrValue(
              list=attr_value_pb2.AttrValue.ListValue(
                  s=[compat.as_bytes("loc:@%s" % true_name)]))
          # pylint: disable=protected-access
          with ops.get_default_graph()._attr_scope({"_class": attr}):
            with ops.name_scope("Initializer"),  ops.device(None):
              self._initial_value = ops.convert_to_tensor(
                  initial_value(), name="initial_value", dtype=dtype)
              shape = (self._initial_value.get_shape()
                       if validate_shape else tensor_shape.unknown_shape())
            self._variable = state_ops.variable_op_v2(
                shape,
                self._initial_value.dtype.base_dtype,
                name=name)

        # Or get the initial value from a Tensor or Python object.
        else:
          self._initial_value = ops.convert_to_tensor(
              initial_value, name="initial_value", dtype=dtype)
          shape = (self._initial_value.get_shape()
                   if validate_shape else tensor_shape.unknown_shape())
          # In this case, the variable op can't be created until after the
          # initial_value has been converted to a Tensor with a known type.
          self._variable = state_ops.variable_op_v2(
              shape,
              self._initial_value.dtype.base_dtype,
              name=name)

        # Manually overrides the variable's shape with the initial value's.
        if validate_shape:
          initial_value_shape = self._initial_value.get_shape()
          if not initial_value_shape.is_fully_defined():
            raise ValueError("initial_value must have a shape specified: %s" %
                             self._initial_value)

        # Assigns initial value.
        self._initializer_op = state_ops.assign(
            self._variable, self._initial_value,
            validate_shape=validate_shape).op

        # TODO(vrv): Change this class to not take caching_device, but
        # to take the op to colocate the snapshot with, so we can use
        # colocation rather than devices.
        if caching_device is not None:
          with ops.device(caching_device):
            self._snapshot = array_ops.identity(self._variable, name="read")
        else:
          with ops.colocate_with(self._variable.op):
            self._snapshot = array_ops.identity(self._variable, name="read")

    ops.add_to_collections(collections, self)
    self._caching_device = caching_device
    self._save_slice_info = None
    def _resource_apply_dense(self, grad, var):
        var_dtype = var.dtype.base_dtype
        lr_t = array_ops.identity(self._get_hyper('learning_rate', var_dtype))
        beta_1_t = array_ops.identity(self._get_hyper('beta_1', var_dtype))
        beta_2_t = array_ops.identity(self._get_hyper('beta_2', var_dtype))
        epsilon_t = ops.convert_to_tensor(self.epsilon, var_dtype)
        m = self.get_slot(var, 'm')
        v = self.get_slot(var, 'v')
        local_step = math_ops.cast(self.iterations + 1, var_dtype)
        next_step = math_ops.cast(self.iterations + 2, var_dtype)
        decay_base = math_ops.cast(0.96, var_dtype)

        # Learning rate multipliers
        if self.lr_multipliers is not None:
            lr_t = _apply_lr_multiplier(self, lr_t, var)

        # Due to the recommendations in [2], i.e. warming momentum schedule
        momentum_cache_t = beta_1_t * (
            1. - 0.5 *
            (math_ops.pow(decay_base, self._initial_decay * local_step)))
        momentum_cache_t_1 = beta_1_t * (
            1. - 0.5 *
            (math_ops.pow(decay_base, self._initial_decay * next_step)))
        m_schedule_new = math_ops.cast(self._m_cache_read,
                                       var_dtype) * momentum_cache_t
        if var_dtype is self._m_cache.dtype:
            m_schedule_new = array_ops.identity(
                state_ops.assign(self._m_cache,
                                 m_schedule_new,
                                 use_locking=self._use_locking))
        m_schedule_next = m_schedule_new * momentum_cache_t_1

        # the following equations given in [1]
        g_prime = grad / (1. - m_schedule_new)
        m_t = beta_1_t * m + (1. - beta_1_t) * grad
        m_t_prime = m_t / (1. - m_schedule_next)
        v_t = beta_2_t * v + (1. - beta_2_t) * math_ops.square(grad)
        v_t_prime = v_t / (1. - math_ops.pow(beta_2_t, local_step))
        m_t_bar = (1. - momentum_cache_t) * g_prime + (momentum_cache_t *
                                                       m_t_prime)

        m_t = state_ops.assign(m, m_t, use_locking=self._use_locking)
        v_t = state_ops.assign(v, v_t, use_locking=self._use_locking)

        var_t = math_ops.sub(
            var, self.eta_t * lr_t * m_t_bar /
            (math_ops.sqrt(v_t_prime + epsilon_t)))

        # Weight decays
        if var.name in self.weight_decays.keys():
            var_t = _apply_weight_decays(self, var, var_t)

        var_update = state_ops.assign(var,
                                      var_t,
                                      use_locking=self._use_locking)

        # Cosine annealing
        (iteration_done, t_cur_update,
         eta_t_update) = _update_t_cur_eta_t_v2(self, lr_t, var)
        if iteration_done and not self._init_notified:
            self._init_notified = True

        updates = [var_update, m_t, v_t]
        if iteration_done:
            updates += [t_cur_update]
        if self.use_cosine_annealing and iteration_done:
            updates += [eta_t_update]
        return control_flow_ops.group(*updates)
Exemple #45
0
 def _projection_op(self, state, name=None):
     with ops.colocate_with(state):
         return state_ops.assign(
             state,
             _project_stochastic_matrix_wrt_euclidean_norm(state),
             name=name)
    def testDeferredSlotRestoration(self):
        checkpoint_directory = self.get_temp_dir()

        root = trackable_utils.Checkpoint()
        root.var = trackable_utils.add_variable(root,
                                                name="var",
                                                initializer=0.)
        optimizer = adam.AdamOptimizer(0.1)
        if context.executing_eagerly():
            optimizer.minimize(root.var.read_value)
        else:
            train_op = optimizer.minimize(root.var)
            # Note that `optimizer` has not been added as a dependency of
            # `root`. Create a one-off grouping so that slot variables for `root.var`
            # get initialized too.
            self.evaluate(
                trackable_utils.gather_initializers(
                    trackable_utils.Checkpoint(root=root,
                                               optimizer=optimizer)))
            self.evaluate(train_op)
        self.evaluate(state_ops.assign(root.var, 12.))
        no_slots_path = root.save(
            os.path.join(checkpoint_directory, "no_slots"))
        root.optimizer = optimizer
        self.evaluate(state_ops.assign(root.var, 13.))
        self.evaluate(
            state_ops.assign(optimizer.get_slot(name="m", var=root.var), 14.))
        slots_path = root.save(os.path.join(checkpoint_directory,
                                            "with_slots"))
        new_root = trackable_utils.Checkpoint()
        # Load the slot-containing checkpoint (deferred), then immediately overwrite
        # the non-slot variable (also deferred).
        slot_status = new_root.restore(slots_path)
        no_slot_status = new_root.restore(no_slots_path)
        with self.assertRaises(AssertionError):
            no_slot_status.assert_consumed()
        new_root.var = trackable_utils.add_variable(new_root,
                                                    name="var",
                                                    shape=[])
        no_slot_status.assert_consumed()
        no_slot_status.run_restore_ops()
        self.assertEqual(12., self.evaluate(new_root.var))
        new_root.optimizer = adam.AdamOptimizer(0.1)
        slot_status.assert_existing_objects_matched()
        with self.assertRaisesRegexp(AssertionError, "beta1_power"):
            slot_status.assert_consumed()
        self.assertEqual(12., self.evaluate(new_root.var))
        if context.executing_eagerly():
            # Slot variables are only created with restoring initializers when
            # executing eagerly.
            self.assertEqual(
                14.,
                self.evaluate(
                    new_root.optimizer.get_slot(name="m", var=new_root.var)))
        else:
            self.assertIs(
                new_root.optimizer.get_slot(name="m", var=new_root.var), None)
        if context.executing_eagerly():
            new_root.optimizer.minimize(new_root.var.read_value)
        else:
            train_op = new_root.optimizer.minimize(new_root.var)
            # The slot variable now exists; restore() didn't create it, but we should
            # now have a restore op for it.
            slot_status.run_restore_ops()
            self.assertEqual(
                14.,
                self.evaluate(
                    new_root.optimizer.get_slot(name="m", var=new_root.var)))
            self.evaluate(train_op)
        slot_status.assert_consumed()
Exemple #47
0
    def doBasicsOneExportPath(self,
                              export_path,
                              clear_devices=False,
                              global_step=GLOBAL_STEP,
                              sharded=True,
                              export_count=1):
        # Build a graph with 2 parameter nodes on different devices.
        ops.reset_default_graph()
        with session.Session(target="",
                             config=config_pb2.ConfigProto(
                                 device_count={"CPU": 2})) as sess:
            # v2 is an unsaved variable derived from v0 and v1.  It is used to
            # exercise the ability to run an init op when restoring a graph.
            with sess.graph.device("/cpu:0"):
                v0 = variables.Variable(10, name="v0")
            with sess.graph.device("/cpu:1"):
                v1 = variables.Variable(20, name="v1")
            v2 = variables.Variable(1,
                                    name="v2",
                                    trainable=False,
                                    collections=[])
            assign_v2 = state_ops.assign(v2, math_ops.add(v0, v1))
            init_op = control_flow_ops.group(assign_v2, name="init_op")

            ops.add_to_collection("v", v0)
            ops.add_to_collection("v", v1)
            ops.add_to_collection("v", v2)

            named_tensor_bindings = {
                "logical_input_A": v0,
                "logical_input_B": v1
            }
            signatures = {
                "foo":
                exporter.regression_signature(input_tensor=v0,
                                              output_tensor=v1),
                "generic":
                exporter.generic_signature(named_tensor_bindings)
            }

            asset_filepath_orig = os.path.join(test.get_temp_dir(),
                                               "hello42.txt")
            asset_file = constant_op.constant(asset_filepath_orig,
                                              name="filename42")
            ops.add_to_collection(ops.GraphKeys.ASSET_FILEPATHS, asset_file)

            with gfile.FastGFile(asset_filepath_orig, "w") as f:
                f.write("your data here")
            assets_collection = ops.get_collection(
                ops.GraphKeys.ASSET_FILEPATHS)

            ignored_asset = os.path.join(test.get_temp_dir(), "ignored.txt")
            with gfile.FastGFile(ignored_asset, "w") as f:
                f.write("additional data here")

            variables.global_variables_initializer().run()

            # Run an export.
            save = saver.Saver({
                "v0": v0,
                "v1": v1
            },
                               restore_sequentially=True,
                               sharded=sharded,
                               write_version=saver_pb2.SaverDef.V1)
            export = exporter.Exporter(save)
            compare_def = ops.get_default_graph().as_graph_def()
            export.init(
                compare_def,
                init_op=init_op,
                clear_devices=clear_devices,
                default_graph_signature=exporter.classification_signature(
                    input_tensor=v0),
                named_graph_signatures=signatures,
                assets_collection=assets_collection)

            for x in range(export_count):
                export.export(export_path,
                              constant_op.constant(global_step + x),
                              sess,
                              exports_to_keep=gc.largest_export_versions(2))
            # Set global_step to the last exported version, as the rest of the test
            # uses it to construct model export path, loads model from it, and does
            # verifications. We want to make sure to always use the last exported
            # version, as old ones may have be garbage-collected.
            global_step += export_count - 1

        # Restore graph.
        ops.reset_default_graph()
        with session.Session(target="",
                             config=config_pb2.ConfigProto(
                                 device_count={"CPU": 2})) as sess:
            save = saver.import_meta_graph(
                os.path.join(export_path,
                             constants.VERSION_FORMAT_SPECIFIER % global_step,
                             constants.META_GRAPH_DEF_FILENAME))
            self.assertIsNotNone(save)
            meta_graph_def = save.export_meta_graph()
            collection_def = meta_graph_def.collection_def

            # Validate custom graph_def.
            graph_def_any = collection_def[constants.GRAPH_KEY].any_list.value
            self.assertEquals(len(graph_def_any), 1)
            graph_def = graph_pb2.GraphDef()
            graph_def_any[0].Unpack(graph_def)
            if clear_devices:
                for node in compare_def.node:
                    node.device = ""
            self.assertProtoEquals(compare_def, graph_def)

            # Validate init_op.
            init_ops = collection_def[constants.INIT_OP_KEY].node_list.value
            self.assertEquals(len(init_ops), 1)
            self.assertEquals(init_ops[0], "init_op")

            # Validate signatures.
            signatures_any = collection_def[
                constants.SIGNATURES_KEY].any_list.value
            self.assertEquals(len(signatures_any), 1)
            signatures = manifest_pb2.Signatures()
            signatures_any[0].Unpack(signatures)
            default_signature = signatures.default_signature
            self.assertEqual(
                default_signature.classification_signature.input.tensor_name,
                "v0:0")
            bindings = signatures.named_signatures[
                "generic"].generic_signature.map
            self.assertEquals(bindings["logical_input_A"].tensor_name, "v0:0")
            self.assertEquals(bindings["logical_input_B"].tensor_name, "v1:0")
            read_foo_signature = (
                signatures.named_signatures["foo"].regression_signature)
            self.assertEquals(read_foo_signature.input.tensor_name, "v0:0")
            self.assertEquals(read_foo_signature.output.tensor_name, "v1:0")

            # Validate the assets.
            assets_any = collection_def[constants.ASSETS_KEY].any_list.value
            self.assertEquals(len(assets_any), 1)
            asset = manifest_pb2.AssetFile()
            assets_any[0].Unpack(asset)
            assets_path = os.path.join(
                export_path, constants.VERSION_FORMAT_SPECIFIER % global_step,
                constants.ASSETS_DIRECTORY, "hello42.txt")
            asset_contents = gfile.GFile(assets_path).read()
            self.assertEqual(asset_contents, b"your data here")
            self.assertEquals("hello42.txt", asset.filename)
            self.assertEquals("filename42:0", asset.tensor_binding.tensor_name)
            ignored_asset_path = os.path.join(
                export_path, constants.VERSION_FORMAT_SPECIFIER % global_step,
                constants.ASSETS_DIRECTORY, "ignored.txt")
            self.assertFalse(gfile.Exists(ignored_asset_path))

            # Validate graph restoration.
            if sharded:
                save.restore(
                    sess,
                    os.path.join(
                        export_path,
                        constants.VERSION_FORMAT_SPECIFIER % global_step,
                        constants.VARIABLES_FILENAME_PATTERN))
            else:
                save.restore(
                    sess,
                    os.path.join(
                        export_path,
                        constants.VERSION_FORMAT_SPECIFIER % global_step,
                        constants.VARIABLES_FILENAME))
            self.assertEqual(10, ops.get_collection("v")[0].eval())
            self.assertEqual(20, ops.get_collection("v")[1].eval())
            ops.get_collection(constants.INIT_OP_KEY)[0].run()
            self.assertEqual(30, ops.get_collection("v")[2].eval())
    def testDeferredSlotRestoration(self):
        with self.test_session():
            checkpoint_directory = self.get_temp_dir()

            root = trackable_utils.Checkpoint()
            root.var = trackable_utils.add_variable(root,
                                                    name="var",
                                                    initializer=0.)
            optimizer = adam.Adam(0.1)
            variables = [root.var]
            gradients = [1.]
            train_op = optimizer.apply_gradients(zip(gradients, variables))
            # Note that `optimizer` has not been added as a dependency of
            # `root`. Create a one-off grouping so that slot variables for `root.var`
            # get initialized too.
            self.evaluate(
                trackable_utils.gather_initializers(
                    trackable_utils.Checkpoint(root=root,
                                               optimizer=optimizer)))
            self.evaluate(train_op)
            self.evaluate(state_ops.assign(root.var, 12.))
            no_slots_path = root.save(
                os.path.join(checkpoint_directory, "no_slots"))
            root.optimizer = optimizer
            self.evaluate(state_ops.assign(root.var, 13.))
            self.evaluate(
                state_ops.assign(
                    optimizer.get_slot(slot_name="m", var=root.var), 14.))
            slots_path = root.save(
                os.path.join(checkpoint_directory, "with_slots"))
            new_root = trackable_utils.Checkpoint()
            # Load the slot-containing checkpoint (deferred), then immediately
            # overwrite the non-slot variable (also deferred).
            slot_status = new_root.restore(slots_path)
            no_slot_status = new_root.restore(no_slots_path)
            with self.assertRaises(AssertionError):
                no_slot_status.assert_consumed()
            new_root.var = trackable_utils.add_variable(new_root,
                                                        name="var",
                                                        shape=[])
            no_slot_status.assert_consumed()
            no_slot_status.run_restore_ops()
            self.assertEqual(12., self.evaluate(new_root.var))
            new_root.optimizer = adam.Adam(0.1)
            slot_status.assert_existing_objects_matched()
            if not context.executing_eagerly():
                with self.assertRaisesRegex(AssertionError,
                                            "Unresolved object"):
                    slot_status.assert_consumed()
            self.assertEqual(12., self.evaluate(new_root.var))
            if context.executing_eagerly():
                # Slot variables are only created with restoring initializers when
                # executing eagerly.
                self.assertEqual(
                    14.,
                    self.evaluate(
                        new_root.optimizer.get_slot(slot_name="m",
                                                    var=new_root.var)))
            else:
                # Slot variables are not created eagerly when graph building.
                with self.assertRaises(KeyError):
                    new_root.optimizer.get_slot(slot_name="m",
                                                var=new_root.var)
            variables = [new_root.var]
            gradients = [1.]
            train_op = new_root.optimizer.apply_gradients(
                zip(gradients, variables))
            # The slot variable now exists; restore() didn't create it, but we should
            # now have a restore op for it.
            slot_status.run_restore_ops()
            if not context.executing_eagerly():
                # The train op hasn't run when graph building, so the slot variable has
                # its restored value. It has run in eager, so the value will
                # be different.
                self.assertEqual(
                    14.,
                    self.evaluate(
                        new_root.optimizer.get_slot(slot_name="m",
                                                    var=new_root.var)))
            self.evaluate(train_op)
            slot_status.assert_consumed()
    def apply_gradients(self,
                        grads_and_vars,
                        worker_id,
                        global_step=None,
                        name=None,
                        collect_cdfs=False):
        """Apply gradients to variables.
    This contains most of the synchronization implementation and also wraps the
    apply_gradients() from the real optimizer.
    Args:
      grads_and_vars: List of (gradient, variable) pairs as returned by
        compute_gradients().
      global_step: Optional Variable to increment by one after the
        variables have been updated.
      name: Optional name for the returned operation.  Default to the
        name passed to the Optimizer constructor.
    Returns:
      train_op: The op to dequeue a token so the replicas can exit this batch
      and start the next one. This is executed by each replica.
    Raises:
      ValueError: If the grads_and_vars is empty.
      ValueError: If global step is not provided, the staleness cannot be
        checked.
    """
        if not grads_and_vars:
            raise ValueError("Must supply at least one variable")

        if global_step is None:
            raise ValueError("Global step is required to check staleness")

        self._global_step = global_step
        train_ops = []
        aggregated_grad = []
        var_list = []

        self._local_step = variables.Variable(
            initial_value=0,
            trainable=False,
            collections=[ops.GraphKeys.LOCAL_VARIABLES],
            dtype=global_step.dtype.base_dtype,
            name="sync_rep_local_step")
        self.local_step_init_op = state_ops.assign(self._local_step,
                                                   global_step._ref())
        chief_init_ops = [self.local_step_init_op]
        self.ready_for_local_init_op = variables.report_uninitialized_variables(
            variables.all_variables())

        # The wait op waits for the current worker to dequeue a token from its respective token queue
        self._wait_op = self._sync_token_queues[worker_id].dequeue()

        # Replicas have to wait until they can get a token from the token queue
        # BEFORE begining to compute gradients.
        with ops.device(global_step.device):
            queue_size = self._sync_token_queues[worker_id].size()
            update_local_step_op = state_ops.assign(self._local_step,
                                                    global_step._ref())

        # Gradient accum creation
        with ops.name_scope(None, self._name):
            for grad, var in grads_and_vars:
                var_list.append(var)
                tf.logging.info("Grad " + str(grad) + " assigned to " +
                                str(var.device))
                with ops.device(var.device):
                    if grad is None:
                        continue
                    elif isinstance(grad, ops.Tensor):
                        grad_accum = data_flow_ops.ConditionalAccumulator(
                            grad.dtype,
                            shape=var.get_shape(),
                            shared_name=var.name + "/grad_accum")
                    else:
                        if not isinstance(grad, ops.IndexedSlices):
                            raise ValueError("Unknown grad type!")
                        grad_accum = data_flow_ops.SparseConditionalAccumulator(
                            grad.dtype,
                            shape=(),
                            shared_name=var.name + "/grad_accum")

                    self._accumulator_list.append((grad_accum, var))
            """# Phase 1 gradient computation
      with ops.control_dependencies([update_local_step_op]):
        for index, (grad, var) in enumerate(grads_and_vars):
          with ops.device(var.device):
            if grad is None:
              continue

            elif isinstance(grad, ops.Tensor):
              grad_accum = self._accumulator_list[index][0]

              train_ops.append(grad_accum.apply_grad(grad,
                                                     local_step=self._local_step._ref()))

            else:
              if not isinstance(grad, ops.IndexedSlices):
                raise ValueError("Unknown grad type!")
              grad_accum = self._accumulator_list[index][0]

              train_ops.append(grad_accum.apply_indexed_slices_grad(
                grad, local_step=self._local_step._ref()))"""

            # Phase 1 gradient computation
            with ops.control_dependencies([update_local_step_op]):
                for index, (grad, var) in enumerate(grads_and_vars):
                    print_start_op = logging_ops.Print(
                        global_step, [global_step],
                        message="Starting to apply grads for variable %d" %
                        index)
                    with ops.device(var.device):
                        if grad is None:
                            continue

                        elif isinstance(grad, ops.Tensor):
                            grad_accum = self._accumulator_list[index][0]

                            with ops.control_dependencies([print_start_op]):
                                with tf.device("job:worker/task:%d" %
                                               worker_id):
                                    apply_grad_op = grad_accum.apply_grad(
                                        grad,
                                        local_step=self._local_step._ref())
                                    with ops.control_dependencies(
                                        [apply_grad_op]):
                                        finished_print_op = logging_ops.Print(
                                            global_step, [global_step],
                                            message=
                                            "Done applying grads for variable %d"
                                            % index)
                                        train_ops.append(finished_print_op)

                        else:
                            if not isinstance(grad, ops.IndexedSlices):
                                raise ValueError("Unknown grad type!")
                            grad_accum = self._accumulator_list[index][0]

                            with ops.control_dependencies([print_start_op]):
                                with tf.device("job:worker/task:%d" %
                                               worker_id):
                                    apply_grad_op = grad_accum.apply_indexed_slices_grad(
                                        grad,
                                        local_step=self._local_step._ref())
                                    with ops.control_dependencies(
                                        [apply_grad_op]):
                                        finished_print_op = logging_ops.Print(
                                            global_step, [global_step],
                                            message=
                                            "Done applying grads for variable %d"
                                            % index)
                                        train_ops.append(finished_print_op)

            # Phase 2 gradient applying
            for index, (grad, var) in enumerate(grads_and_vars):
                with ops.device(var.device):
                    grad_accum = self._accumulator_list[index][0]
                    if grad is None:
                        aggregated_grad.append(None)
                    elif isinstance(grad, ops.Tensor):
                        if collect_cdfs:
                            aggregated_grad.append(
                                grad_accum.take_grad(self._total_num_replicas))
                        else:
                            aggregated_grad.append(grad_accum.take_grad(1))
                    else:
                        if collect_cdfs:
                            aggregated_grad.append(
                                grad_accum.take_grad(self._total_num_replicas))
                        else:
                            aggregated_grad.append(
                                grad_accum.take_indexed_slices_grad(1))

            aggregated_grads_and_vars = zip(aggregated_grad, var_list)

            # Some debug operations
            self.print_sizes = logging_ops.Print(global_step, [
                self._sync_token_queues[i].size()
                for i in range(self._total_num_replicas)
            ],
                                                 message="queue sizes")
            self.print_accum_sizes = logging_ops.Print(
                self._local_step,
                [x[0].num_accumulated()
                 for x in self._accumulator_list] + [worker_id],
                message="Accum sizes")
            self.print_local_step = logging_ops.Print(
                self._local_step,
                [self._local_step._ref(),
                 global_step._ref()],
                message="local vs global step")

            # sync_op will be assigned to the same device as the global step.
            with ops.device(global_step.device), ops.name_scope(""):
                with ops.control_dependencies([self.print_accum_sizes]):
                    update_op = self._opt.apply_gradients(
                        aggregated_grads_and_vars, global_step)
                    self._update_op = update_op
                    with ops.control_dependencies([update_op]):
                        sync_op = []
                        for cur_worker_id in range(self._total_num_replicas):
                            sync_op.append(
                                self._sync_token_queues[cur_worker_id].enqueue(
                                    global_step))
                        sync_op = control_flow_ops.group(*(sync_op))

                # dummy_queue is passed to the queue runner. Don't use the real queues
                # because the queue runner doesn't automatically reopen it once it
                # closed queues in PS devices.
                dummy_queue = (data_flow_ops.FIFOQueue(
                    1,
                    types_pb2.DT_INT32,
                    shapes=(),
                    shared_name="dummy_queue"))

                self._chief_queue_runner = queue_runner.QueueRunner(
                    dummy_queue, [sync_op])

            with ops.device(global_step.device), ops.name_scope(""):
                with ops.control_dependencies(train_ops):
                    # Worker finished applying gradients. Add token to phase1_finished_queue
                    train_op = logging_ops.Print(
                        self._local_step._ref(), [
                            x[0].num_accumulated()
                            for x in self._accumulator_list
                        ] + [worker_id],
                        message="Finished worker updates",
                        name="FinishedWorkerUpdatesPrint")

            for accum, var in self._accumulator_list:
                with ops.device(var.device):
                    chief_init_ops.append(
                        accum.set_global_step(global_step,
                                              name="SetGlobalStep"))
            self.chief_init_op = control_flow_ops.group(*(chief_init_ops))
            self._gradients_applied = True

            return train_op
Exemple #50
0
  def training_graph(self,
                     input_data,
                     input_labels,
                     random_seed,
                     data_spec,
                     epoch=None,
                     input_weights=None):

    """Constructs a TF graph for training a random tree.

    Args:
      input_data: A tensor or SparseTensor or placeholder for input data.
      input_labels: A tensor or placeholder for labels associated with
        input_data.
      random_seed: The random number generator seed to use for this tree.  0
        means use the current time as the seed.
      data_spec: A list of tf.dtype values specifying the original types of
        each column.
      epoch: A tensor or placeholder for the epoch the training data comes from.
      input_weights: A float tensor or placeholder holding per-input weights,
        or None if all inputs are to be weighted equally.

    Returns:
      The last op in the random tree training graph.
    """
    epoch = [0] if epoch is None else epoch

    if input_weights is None:
      input_weights = []

    sparse_indices = []
    sparse_values = []
    sparse_shape = []
    if isinstance(input_data, ops.SparseTensor):
      sparse_indices = input_data.indices
      sparse_values = input_data.values
      sparse_shape = input_data.shape
      input_data = []

    # Count extremely random stats.
    (node_sums, node_squares, splits_indices, splits_sums, splits_squares,
     totals_indices, totals_sums, totals_squares,
     input_leaves) = (self.training_ops.count_extremely_random_stats(
         input_data,
         sparse_indices,
         sparse_values,
         sparse_shape,
         data_spec,
         input_labels,
         input_weights,
         self.variables.tree,
         self.variables.tree_thresholds,
         self.variables.node_to_accumulator_map,
         self.variables.candidate_split_features,
         self.variables.candidate_split_thresholds,
         self.variables.start_epoch,
         epoch,
         num_classes=self.params.num_output_columns,
         regression=self.params.regression))
    node_update_ops = []
    node_update_ops.append(
        state_ops.assign_add(self.variables.node_sums, node_sums))

    splits_update_ops = []
    splits_update_ops.append(self.training_ops.scatter_add_ndim(
        self.variables.candidate_split_sums,
        splits_indices, splits_sums))
    splits_update_ops.append(self.training_ops.scatter_add_ndim(
        self.variables.accumulator_sums, totals_indices,
        totals_sums))

    if self.params.regression:
      node_update_ops.append(state_ops.assign_add(self.variables.node_squares,
                                                  node_squares))
      splits_update_ops.append(self.training_ops.scatter_add_ndim(
          self.variables.candidate_split_squares,
          splits_indices, splits_squares))
      splits_update_ops.append(self.training_ops.scatter_add_ndim(
          self.variables.accumulator_squares, totals_indices,
          totals_squares))

    # Sample inputs.
    update_indices, feature_updates, threshold_updates = (
        self.training_ops.sample_inputs(
            input_data,
            sparse_indices,
            sparse_values,
            sparse_shape,
            input_weights,
            self.variables.node_to_accumulator_map,
            input_leaves,
            self.variables.candidate_split_features,
            self.variables.candidate_split_thresholds,
            split_initializations_per_input=(
                self.params.split_initializations_per_input),
            split_sampling_random_seed=random_seed))
    update_features_op = state_ops.scatter_update(
        self.variables.candidate_split_features, update_indices,
        feature_updates)
    update_thresholds_op = state_ops.scatter_update(
        self.variables.candidate_split_thresholds, update_indices,
        threshold_updates)

    # Calculate finished nodes.
    with ops.control_dependencies(splits_update_ops):
      children = array_ops.squeeze(array_ops.slice(
          self.variables.tree, [0, 0], [-1, 1]), squeeze_dims=[1])
      is_leaf = math_ops.equal(constants.LEAF_NODE, children)
      leaves = math_ops.to_int32(array_ops.squeeze(array_ops.where(is_leaf),
                                                   squeeze_dims=[1]))
      finished, stale = self.training_ops.finished_nodes(
          leaves, self.variables.node_to_accumulator_map,
          self.variables.candidate_split_sums,
          self.variables.candidate_split_squares,
          self.variables.accumulator_sums,
          self.variables.accumulator_squares,
          self.variables.start_epoch, epoch,
          num_split_after_samples=self.params.split_after_samples,
          min_split_samples=self.params.min_split_samples)

    # Update leaf scores.
    non_fertile_leaves = array_ops.boolean_mask(
        leaves, math_ops.less(array_ops.gather(
            self.variables.node_to_accumulator_map, leaves), 0))

    # TODO(gilberth): It should be possible to limit the number of non
    # fertile leaves we calculate scores for, especially since we can only take
    # at most array_ops.shape(finished)[0] of them.
    with ops.control_dependencies(node_update_ops):
      sums = array_ops.gather(self.variables.node_sums, non_fertile_leaves)
      if self.params.regression:
        squares = array_ops.gather(self.variables.node_squares,
                                   non_fertile_leaves)
        non_fertile_leaf_scores = self._variance(sums, squares)
      else:
        non_fertile_leaf_scores = self._weighted_gini(sums)

    # Calculate best splits.
    with ops.control_dependencies(splits_update_ops):
      split_indices = self.training_ops.best_splits(
          finished, self.variables.node_to_accumulator_map,
          self.variables.candidate_split_sums,
          self.variables.candidate_split_squares,
          self.variables.accumulator_sums,
          self.variables.accumulator_squares,
          regression=self.params.regression)

    # Grow tree.
    with ops.control_dependencies([update_features_op, update_thresholds_op]):
      (tree_update_indices, tree_children_updates, tree_threshold_updates,
       new_eot) = (self.training_ops.grow_tree(
           self.variables.end_of_tree, self.variables.node_to_accumulator_map,
           finished, split_indices, self.variables.candidate_split_features,
           self.variables.candidate_split_thresholds))
      tree_update_op = state_ops.scatter_update(
          self.variables.tree, tree_update_indices, tree_children_updates)
      thresholds_update_op = state_ops.scatter_update(
          self.variables.tree_thresholds, tree_update_indices,
          tree_threshold_updates)
      # TODO(thomaswc): Only update the epoch on the new leaves.
      new_epoch_updates = epoch * array_ops.ones_like(tree_threshold_updates,
                                                      dtype=dtypes.int32)
      epoch_update_op = state_ops.scatter_update(
          self.variables.start_epoch, tree_update_indices,
          new_epoch_updates)

    # Update fertile slots.
    with ops.control_dependencies([tree_update_op]):
      (node_map_updates, accumulators_cleared, accumulators_allocated) = (
          self.training_ops.update_fertile_slots(
              finished,
              non_fertile_leaves,
              non_fertile_leaf_scores,
              self.variables.end_of_tree,
              self.variables.accumulator_sums,
              self.variables.node_to_accumulator_map,
              stale,
              regression=self.params.regression))

    # Ensure end_of_tree doesn't get updated until UpdateFertileSlots has
    # used it to calculate new leaves.
    gated_new_eot, = control_flow_ops.tuple([new_eot],
                                            control_inputs=[node_map_updates])
    eot_update_op = state_ops.assign(self.variables.end_of_tree, gated_new_eot)

    updates = []
    updates.append(eot_update_op)
    updates.append(tree_update_op)
    updates.append(thresholds_update_op)
    updates.append(epoch_update_op)

    updates.append(state_ops.scatter_update(
        self.variables.node_to_accumulator_map,
        array_ops.squeeze(array_ops.slice(node_map_updates, [0, 0], [1, -1]),
                          squeeze_dims=[0]),
        array_ops.squeeze(array_ops.slice(node_map_updates, [1, 0], [1, -1]),
                          squeeze_dims=[0])))

    cleared_and_allocated_accumulators = array_ops.concat(
        0, [accumulators_cleared, accumulators_allocated])
    # Calculate values to put into scatter update for candidate counts.
    # Candidate split counts are always reset back to 0 for both cleared
    # and allocated accumulators. This means some accumulators might be doubly
    # reset to 0 if the were released and not allocated, then later allocated.
    split_values = array_ops.tile(
        array_ops.expand_dims(array_ops.expand_dims(
            array_ops.zeros_like(cleared_and_allocated_accumulators,
                                 dtype=dtypes.float32), 1), 2),
        [1, self.params.num_splits_to_consider, self.params.num_output_columns])
    updates.append(state_ops.scatter_update(
        self.variables.candidate_split_sums,
        cleared_and_allocated_accumulators, split_values))
    if self.params.regression:
      updates.append(state_ops.scatter_update(
          self.variables.candidate_split_squares,
          cleared_and_allocated_accumulators, split_values))

    # Calculate values to put into scatter update for total counts.
    total_cleared = array_ops.tile(
        array_ops.expand_dims(
            math_ops.neg(array_ops.ones_like(accumulators_cleared,
                                             dtype=dtypes.float32)), 1),
        [1, self.params.num_output_columns])
    total_reset = array_ops.tile(
        array_ops.expand_dims(
            array_ops.zeros_like(accumulators_allocated,
                                 dtype=dtypes.float32), 1),
        [1, self.params.num_output_columns])
    accumulator_updates = array_ops.concat(0, [total_cleared, total_reset])
    updates.append(state_ops.scatter_update(
        self.variables.accumulator_sums,
        cleared_and_allocated_accumulators, accumulator_updates))
    if self.params.regression:
      updates.append(state_ops.scatter_update(
          self.variables.accumulator_squares,
          cleared_and_allocated_accumulators, accumulator_updates))

    # Calculate values to put into scatter update for candidate splits.
    split_features_updates = array_ops.tile(
        array_ops.expand_dims(
            math_ops.neg(array_ops.ones_like(
                cleared_and_allocated_accumulators)), 1),
        [1, self.params.num_splits_to_consider])
    updates.append(state_ops.scatter_update(
        self.variables.candidate_split_features,
        cleared_and_allocated_accumulators, split_features_updates))

    updates += self.finish_iteration()

    return control_flow_ops.group(*updates)
Exemple #51
0
def LastValueQuantize(inputs,
                      per_channel=False,
                      init_min=-6.0,
                      init_max=6.0,
                      updates_collection=ops.GraphKeys.UPDATE_OPS,
                      vars_collection=ops.GraphKeys.MOVING_AVERAGE_VARIABLES,
                      name_prefix='LastValueQuant',
                      reuse=None,
                      is_training=True,
                      num_bits=8,
                      narrow_range=False):
  """Adds a layer that collects quantization ranges as last input ranges.

  LastValueQuantize creates variables called 'min' and 'max', representing the
  interval used for quantization and clamping.

  Args:
    inputs: a tensor containing values to be quantized.
    per_channel: (Optional) a boolean specifying whether to use different
      quantization ranges per output channel.
    init_min: a float scalar, the initial value for variable min.
    init_max: a float scalar, the initial value for variable max.
    updates_collection: (Optional) collections to collect the update ops for
      computation.
    vars_collection: (Optional) collection where to store variables for
      quantization interval ends.
    name_prefix: name_prefix for created nodes.
    reuse: whether or not the layer and its variables should be reused. To be
      able to reuse the layer scope must be given.
    is_training: Whether the op is applied to a training or eval graph.
    num_bits: Number of bits to use for quantization, must be between 2 and 8.
    narrow_range: Whether to use the narrow quantization range
      [1; 2^num_bits - 1] or wide range [0; 2^num_bits - 1].
  Returns:
    a tensor containing quantized values.
  """
  with variable_scope.variable_scope(
      None, default_name=name_prefix, values=[inputs], reuse=reuse):
    input_shape = inputs.get_shape()
    input_dim = len(input_shape)
    if per_channel:
      # Only support quantizing 1-, 2- and 4-dimensional tensors.
      assert input_dim in [1, 2, 4], ('Expected 1D, 2D or 4D input, was: %s in '
                                      ' scope: %s' % (input_shape, name_prefix))
      min_max_shape = [input_shape[-1]]
    else:
      min_max_shape = []

    min_var = model_variable(
        'min',
        shape=min_max_shape,
        initializer=init_ops.constant_initializer(init_min),
        collections=[vars_collection],
        trainable=False)
    max_var = model_variable(
        'max',
        shape=min_max_shape,
        initializer=init_ops.constant_initializer(init_max),
        collections=[vars_collection],
        trainable=False)
    if not is_training:
      return _FakeQuantWithMinMaxVars(
          inputs,
          min_var,
          max_var,
          per_channel=per_channel,
          num_bits=num_bits,
          narrow_range=narrow_range)

    if per_channel:
      if input_dim == 2:
        reduce_dims = [0]
      elif input_dim == 4:
        reduce_dims = [0, 1, 2]

    if per_channel:
      if input_dim >= 2:
        batch_min = math_ops.reduce_min(
            inputs, reduction_indices=reduce_dims, name='BatchMin')
      else:
        batch_min = inputs
    else:
      batch_min = math_ops.reduce_min(inputs, name='BatchMin')
    # TFLite requires that 0.0 if always in the [min; max] range.
    batch_min = math_ops.minimum(batch_min, 0.0)
    assign_min = state_ops.assign(min_var, batch_min, name='AssignMinLast')
    ops.add_to_collection(updates_collection, assign_min.op)

    if per_channel:
      if input_dim >= 2:
        batch_max = math_ops.reduce_max(
            inputs, reduction_indices=reduce_dims, name='BatchMax')
      else:
        batch_max = inputs
    else:
      batch_max = math_ops.reduce_max(inputs, name='BatchMax')
    # TFLite requires that 0.0 if always in the [min; max] range.
    batch_max = math_ops.maximum(batch_max, 0.0)
    assign_max = state_ops.assign(max_var, batch_max, name='AssignMaxLast')
    ops.add_to_collection(updates_collection, assign_max.op)

    return _FakeQuantWithMinMaxVars(
        inputs,
        assign_min,
        assign_max,
        per_channel=per_channel,
        num_bits=num_bits,
        narrow_range=narrow_range)
Exemple #52
0
 def testSaveRestore(self):
     with self.test_session():
         model = MyModel()
         optimizer = adam.AdamOptimizer(0.001)
         root_trackable = trackable_utils.Checkpoint(optimizer=optimizer,
                                                     model=model)
         input_value = constant_op.constant([[3.]])
         if context.executing_eagerly():
             optimizer.minimize(lambda: model(input_value))
         else:
             train_op = optimizer.minimize(model(input_value))
             # TODO(allenl): Make initialization more pleasant when graph building.
             root_trackable.save_counter  # pylint: disable=pointless-statement
             self.evaluate(
                 trackable_utils.gather_initializers(root_trackable))
             self.evaluate(train_op)
         prefix = os.path.join(self.get_temp_dir(), "ckpt")
         self.evaluate(
             state_ops.assign(model._named_dense.variables[1], [42.]))
         m_bias_slot = optimizer.get_slot(model._named_dense.variables[1],
                                          "m")
         self.evaluate(state_ops.assign(m_bias_slot, [1.5]))
         save_path = root_trackable.save(file_prefix=prefix)
         self.evaluate(
             state_ops.assign(model._named_dense.variables[1], [43.]))
         self.evaluate(state_ops.assign(root_trackable.save_counter, 3))
         optimizer_variables = self.evaluate(optimizer.variables())
         self.evaluate(state_ops.assign(m_bias_slot, [-2.]))
         # Immediate restoration
         status = root_trackable.restore(
             save_path=save_path).assert_consumed()
         status.run_restore_ops()
         self.assertAllEqual([42.],
                             self.evaluate(model._named_dense.variables[1]))
         self.assertAllEqual(1, self.evaluate(root_trackable.save_counter))
         self.assertAllEqual([1.5], self.evaluate(m_bias_slot))
         if not context.executing_eagerly():
             return  # Restore-on-create is only supported when executing eagerly
         on_create_model = MyModel()
         on_create_optimizer = adam.AdamOptimizer(
             0.001,
             # Preserve beta1_power and beta2_power when applying gradients
             # so we can test that they've been restored correctly.
             beta1=1.0,
             beta2=1.0)
         on_create_root = trackable_utils.Checkpoint(
             optimizer=on_create_optimizer, model=on_create_model)
         # Deferred restoration
         status = on_create_root.restore(save_path=save_path)
         status.assert_nontrivial_match()
         status.assert_existing_objects_matched()
         with self.assertRaises(AssertionError):
             status.assert_consumed()
         on_create_model(constant_op.constant([[3.]]))  # create variables
         self.assertAllEqual(1, self.evaluate(on_create_root.save_counter))
         self.assertAllEqual([42.],
                             self.evaluate(
                                 on_create_model._named_dense.variables[1]))
         on_create_m_bias_slot = on_create_optimizer.get_slot(
             on_create_model._named_dense.variables[1], "m")
         status.assert_existing_objects_matched()
         with self.assertRaises(AssertionError):
             status.assert_consumed()
         # Optimizer slot variables are created when the original variable is
         # restored.
         self.assertAllEqual([1.5], self.evaluate(on_create_m_bias_slot))
         self.assertAllEqual(optimizer_variables[2:],
                             self.evaluate(on_create_optimizer.variables()))
         dummy_var = variables.Variable([1.])
         on_create_optimizer.minimize(loss=dummy_var.read_value)
         status.assert_existing_objects_matched()
         status.assert_consumed()
         beta1_power, beta2_power = on_create_optimizer._get_beta_accumulators(
         )
         self.assertAllEqual(optimizer_variables[0],
                             self.evaluate(beta1_power))
         self.assertAllEqual(optimizer_variables[1],
                             self.evaluate(beta2_power))
 def testSaveRestore(self):
     with self.test_session():
         model = MyModel()
         optimizer = adam.Adam(0.001)
         root_trackable = trackable_utils.Checkpoint(optimizer=optimizer,
                                                     model=model)
         input_value = constant_op.constant([[3.]])
         with backprop.GradientTape() as tape:
             loss = model(input_value)
         variables = model.trainable_variables
         gradients = tape.gradient(loss, variables)
         train_op = optimizer.apply_gradients(zip(gradients, variables))
         self.assertFalse(root_trackable.save_counter.trainable)
         self.evaluate(trackable_utils.gather_initializers(root_trackable))
         self.evaluate(train_op)
         prefix = os.path.join(self.get_temp_dir(), "ckpt")
         self.evaluate(
             state_ops.assign(model._named_dense.variables[1], [42.]))
         m_bias_slot = optimizer.get_slot(model._named_dense.variables[1],
                                          "m")
         self.evaluate(state_ops.assign(m_bias_slot, [1.5]))
         save_path = root_trackable.save(file_prefix=prefix)
         self.evaluate(
             state_ops.assign(model._named_dense.variables[1], [43.]))
         self.evaluate(state_ops.assign(root_trackable.save_counter, 3))
         optimizer_variables = self.evaluate(
             sorted(optimizer.variables(), key=lambda v: v.name))
         self.evaluate(state_ops.assign(m_bias_slot, [-2.]))
         # Immediate restoration
         status = root_trackable.restore(
             save_path=save_path).assert_consumed()
         status.run_restore_ops()
         self.assertAllEqual([42.],
                             self.evaluate(model._named_dense.variables[1]))
         self.assertAllEqual(1, self.evaluate(root_trackable.save_counter))
         self.assertAllEqual([1.5], self.evaluate(m_bias_slot))
         if not context.executing_eagerly():
             return  # Restore-on-create is only supported when executing eagerly
         on_create_model = MyModel()
         on_create_optimizer = adam.Adam(0.001)
         on_create_root = trackable_utils.Checkpoint(
             optimizer=on_create_optimizer, model=on_create_model)
         # Deferred restoration
         status = on_create_root.restore(save_path=save_path)
         status.assert_nontrivial_match()
         status.assert_existing_objects_matched()
         with self.assertRaises(AssertionError):
             status.assert_consumed()
         on_create_model(constant_op.constant([[3.]]))  # create variables
         self.assertAllEqual(1, self.evaluate(on_create_root.save_counter))
         self.assertAllEqual([42.],
                             self.evaluate(
                                 on_create_model._named_dense.variables[1]))
         on_create_m_bias_slot = on_create_optimizer.get_slot(
             on_create_model._named_dense.variables[1], "m")
         status.assert_existing_objects_matched()
         if not context.executing_eagerly():
             with self.assertRaises(AssertionError):
                 status.assert_consumed()
         # Optimizer slot variables are created when the original variable is
         # restored.
         self.assertAllEqual([1.5], self.evaluate(on_create_m_bias_slot))
         dummy_var = resource_variable_ops.ResourceVariable([1.])
         on_create_optimizer.minimize(loss=dummy_var.read_value,
                                      var_list=[dummy_var])
         status.assert_existing_objects_matched()
         status.assert_consumed()
         self.assertAllEqual(
             optimizer_variables,
             # Creation order is different, so .variables() needs to be re-sorted.
             self.evaluate(
                 sorted(optimizer.variables(), key=lambda v: v.name)))
    def _update_statistics_from_mini_batch(self, statistics,
                                           auxiliary_variables, times, values):
        """Given mini-batch input, update `statistics` and `auxiliary_variables`."""
        values = math_ops.cast(values, self._dtype)
        # The density (measured in times per observation) that we see in each part
        # of the mini-batch.
        batch_inter_observation_duration = (
            math_ops.cast(
                math_ops.reduce_max(times, axis=1) -
                math_ops.reduce_min(times, axis=1), self._dtype) /
            math_ops.cast(array_ops.shape(times)[1] - 1, self._dtype))
        # Co-locate updates with their variables to minimize race conditions when
        # updating statistics.
        with ops.colocate_with(auxiliary_variables.max_time_seen):
            # There is a race condition if this value is being updated from multiple
            # workers. However, it should eventually reach the correct value if the
            # last chunk is presented enough times.
            max_time_seen_assign = state_ops.assign(
                auxiliary_variables.max_time_seen,
                gen_math_ops.maximum(auxiliary_variables.max_time_seen,
                                     math_ops.reduce_max(times)))
        with ops.colocate_with(auxiliary_variables.chunk_count):
            chunk_count_assign = state_ops.assign_add(
                auxiliary_variables.chunk_count,
                array_ops.shape(times, out_type=dtypes.int64)[0])
        with ops.colocate_with(
                auxiliary_variables.inter_observation_duration_sum):
            inter_observation_duration_assign = state_ops.assign_add(
                auxiliary_variables.inter_observation_duration_sum,
                math_ops.reduce_sum(batch_inter_observation_duration))
        with ops.colocate_with(auxiliary_variables.example_count):
            example_count_assign = state_ops.assign_add(
                auxiliary_variables.example_count,
                array_ops.size(times, out_type=dtypes.int64))
        # Note: These mean/variance updates assume that all points are equally
        # likely, which is not true if _chunks_ are sampled uniformly from the space
        # of all possible contiguous chunks, since points at the start and end of
        # the series are then members of fewer chunks. For series which are much
        # longer than the chunk size (the usual/expected case), this effect becomes
        # irrelevant.
        with ops.colocate_with(auxiliary_variables.overall_feature_sum):
            overall_feature_sum_assign = state_ops.assign_add(
                auxiliary_variables.overall_feature_sum,
                math_ops.reduce_sum(values, axis=[0, 1]))
        with ops.colocate_with(
                auxiliary_variables.overall_feature_sum_of_squares):
            overall_feature_sum_of_squares_assign = state_ops.assign_add(
                auxiliary_variables.overall_feature_sum_of_squares,
                math_ops.reduce_sum(values**2, axis=[0, 1]))
        per_chunk_aux_updates = control_flow_ops.group(
            max_time_seen_assign, chunk_count_assign,
            inter_observation_duration_assign, example_count_assign,
            overall_feature_sum_assign, overall_feature_sum_of_squares_assign)
        with ops.control_dependencies([per_chunk_aux_updates]):
            example_count_float = math_ops.cast(
                auxiliary_variables.example_count, self._dtype)
            new_feature_mean = (auxiliary_variables.overall_feature_sum /
                                example_count_float)
            overall_feature_mean_update = state_ops.assign(
                statistics.overall_feature_moments.mean, new_feature_mean)
            overall_feature_var_update = state_ops.assign(
                statistics.overall_feature_moments.variance,
                # De-biased n / (n - 1) variance correction
                example_count_float / (example_count_float - 1.) *
                (auxiliary_variables.overall_feature_sum_of_squares /
                 example_count_float - new_feature_mean**2))
            # TODO(b/35675805): Remove this cast
            min_time_batch = math_ops.cast(math_ops.argmin(times[:, 0]),
                                           dtypes.int32)

            def series_start_updates():
                # If this is the lowest-time chunk that we have seen so far, update
                # series start moments to reflect that. Note that these statistics are
                # "best effort", as there are race conditions in the update (however,
                # they should eventually converge if the start of the series is
                # presented enough times).
                mean, variance = nn.moments(values[
                    min_time_batch, :self._starting_variance_window_size],
                                            axes=[0])
                return control_flow_ops.group(
                    state_ops.assign(statistics.series_start_moments.mean,
                                     mean),
                    state_ops.assign(statistics.series_start_moments.variance,
                                     variance))

            with ops.colocate_with(statistics.start_time):
                series_start_update = control_flow_ops.cond(
                    # Update moments whenever we even match the lowest time seen so far,
                    # to ensure that series start statistics are eventually updated to
                    # their correct values, despite race conditions (i.e. eventually
                    # statistics.start_time will reflect the global lowest time, and
                    # given that we will eventually update the series start moments to
                    # their correct values).
                    math_ops.less_equal(times[min_time_batch, 0],
                                        statistics.start_time),
                    series_start_updates,
                    control_flow_ops.no_op)
                with ops.control_dependencies([series_start_update]):
                    # There is a race condition if this update is performed in parallel on
                    # multiple workers. Since models may be sensitive to being presented
                    # with times before the putative start time, the value of this
                    # variable is post-processed above to guarantee that each worker is
                    # presented with a start time which is at least as low as the lowest
                    # time in its current mini-batch.
                    start_time_update = state_ops.assign(
                        statistics.start_time,
                        gen_math_ops.minimum(statistics.start_time,
                                             math_ops.reduce_min(times)))
            inter_observation_duration_estimate = (
                auxiliary_variables.inter_observation_duration_sum /
                math_ops.cast(auxiliary_variables.chunk_count, self._dtype))
            # Estimate the total number of observations as:
            #   (end time - start time + 1) * average intra-chunk time density
            total_observation_count_update = state_ops.assign(
                statistics.total_observation_count,
                math_ops.cast(
                    gen_math_ops.round(
                        math_ops.cast(
                            max_time_seen_assign - start_time_update + 1,
                            self._dtype) /
                        inter_observation_duration_estimate), dtypes.int64))
            per_chunk_stat_updates = control_flow_ops.group(
                overall_feature_mean_update, overall_feature_var_update,
                series_start_update, start_time_update,
                total_observation_count_update)
        return per_chunk_stat_updates
 def _initialized_limit_check():
     return control_flow_ops.cond(
         math_ops.reduce_any(non_decreasing),
         lambda: state_ops.assign(discarded_windows_limiter, 0),
         lambda: discarded_windows_limiter.count_up_to(
             self._discard_limit))
Exemple #56
0
    def _apply_dense(self, grad, var):

        lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype)
        beta1_t = math_ops.cast(self._beta1_t, var.dtype.base_dtype)
        beta2_t = math_ops.cast(self._beta2_t, var.dtype.base_dtype)
        epsilon_t = math_ops.cast(self._epsilon_t, var.dtype.base_dtype)

        m = self.get_slot(var, "m")
        v = self.get_slot(var, "v")
        g = self.get_slot(var, "g")
        z = self.get_slot(var, "z")
        b1p = self.get_slot(var, "b1p")
        b2p = self.get_slot(var, "b2p")

        m_t = state_ops.assign(m, beta1_t * m + grad * (1 - beta1_t), use_locking=self._use_locking)

        if self._pred_g_op == 'none':
            v_t = state_ops.assign(v, v * beta2_t + tf.square(g) * (1 - beta2_t), use_locking=self._use_locking)
        elif self._pred_g_op == 'max':
            v_t = state_ops.assign(v, v * beta2_t + tf.reduce_max(tf.square(g)) * (1 - beta2_t), use_locking=self._use_locking)
        elif self._pred_g_op == 'mean':
            v_t = state_ops.assign(v, v * beta2_t + tf.reduce_mean(tf.square(g)) * (1 - beta2_t), use_locking=self._use_locking)
        else:
            assert False

        # v_t = tf.cond(tf.less(self._current_iter, tf.constant(self._init_step)),
        #               lambda: state_ops.assign(v, v * beta2_t + (grad * grad) * (1 - beta2_t), use_locking=self._use_locking),
        #               lambda: state_ops.assign(v, v * beta2_t + (g * g) * (1 - beta2_t), use_locking=self._use_locking))

        # cond = (tf.sign(tf.cast(self._current_iter - tf.constant(self._init_step), tf.float32) + tf.constant(0.5)) + tf.constant(1.0)) / tf.constant(2.0)
        # v_a = v * beta2_t + (grad * grad) * (1 - beta2_t)
        # v_b = v * beta2_t + (g * g) * (1 - beta2_t)
        # v_t = state_ops.assign(v, v_a * (1 - cond) + v_b * cond, use_locking=self._use_locking)

        # cond = tf.abs(tf.sign(g))
        # v_t = state_ops.assign(v, v * (1 - cond) + (v * beta2_t + (g * g) * (1 - beta2_t)) * cond, use_locking=self._use_locking)

        # v_t = state_ops.assign(v, v * beta2_t + (g * g) * (1 - beta2_t), use_locking=self._use_locking)
        # v_t = state_ops.assign(v, tf.maximum(grad * grad * beta2_fix, v * beta2_t + (g * g) * (1 - beta2_t)), use_locking=self._use_locking)

        with ops.control_dependencies([v_t]):
            z_t = state_ops.assign(z, tf.cast(tf.logical_or(v_t > 0.0, z > 0.0), tf.float32))
            g_t = state_ops.assign(g, grad, use_locking=self._use_locking)

        b1p_t = state_ops.assign(b1p, b1p * beta1_t * tf.sign(z_t) + (1.0 - tf.sign(z_t)), use_locking=self._use_locking)
        b2p_t = state_ops.assign(b2p, b2p * beta2_t * tf.sign(z_t) + (1.0 - tf.sign(z_t)), use_locking=self._use_locking)

        b1_fix = tf.maximum(1e-8, 1.0 - b1p_t)
        b2_fix = tf.maximum(1e-8, 1.0 - b2p_t)

        step_t = z_t * (m_t / b1_fix) / (math_ops.sqrt(v_t / b2_fix) + epsilon_t)

        # if var.name == self.first_var.name: #'discriminator/final_linear/w:0':
        #     idx = 0
        #     step_t = tf.Print(step_t, [z_t[idx]], 'z_t', summarize=1000)
        #     step_t = tf.Print(step_t, [g[idx]], 'g', summarize=1000)
        #     step_t = tf.Print(step_t, [grad[idx]], 'grad', summarize=1000)
        #     step_t = tf.Print(step_t, [b2p_t[idx]], 'b2p_t', summarize=1000)
        #     step_t = tf.Print(step_t, [b2_fix], 'beta2_fix', summarize=1000)
        #     step_t = tf.Print(step_t, [tf.sqrt(v_t / b2_fix)[idx]], 'v_t', summarize=1000)
        #     step_t = tf.Print(step_t, [step_t], 'step', summarize=1000)

        var_update = state_ops.assign_sub(var, lr_t * step_t, use_locking=self._use_locking)
        return control_flow_ops.group(*[var_update, g_t])
Exemple #57
0
    def set_model(self, model):
        """Sets Keras model and creates summary ops."""

        self.model = model
        self._init_writer(model)
        # histogram summaries only enabled in graph mode
        if not context.executing_eagerly():
            self._make_histogram_ops(model)
            self.merged = tf_summary.merge_all()

        # If both embedding_freq and embeddings_data are available, we will
        # visualize embeddings.
        if self.embeddings_freq and self.embeddings_data is not None:
            # Avoid circular dependency.
            from tensorflow.python.keras.engine import training_utils_v1  # pylint: disable=g-import-not-at-top
            self.embeddings_data = training_utils_v1.standardize_input_data(
                self.embeddings_data, model.input_names)

            # If embedding_layer_names are not provided, get all of the embedding
            # layers from the model.
            embeddings_layer_names = self.embeddings_layer_names
            if not embeddings_layer_names:
                embeddings_layer_names = [
                    layer.name for layer in self.model.layers
                    if type(layer).__name__ == 'Embedding'
                ]

            self.assign_embeddings = []
            embeddings_vars = {}

            self.batch_id = batch_id = array_ops.placeholder(dtypes.int32)
            self.step = step = array_ops.placeholder(dtypes.int32)

            for layer in self.model.layers:
                if layer.name in embeddings_layer_names:
                    embedding_input = self.model.get_layer(layer.name).output
                    embedding_size = np.prod(embedding_input.shape[1:])
                    embedding_input = array_ops.reshape(
                        embedding_input, (step, int(embedding_size)))
                    shape = (self.embeddings_data[0].shape[0],
                             int(embedding_size))
                    embedding = variables.Variable(array_ops.zeros(shape),
                                                   name=layer.name +
                                                   '_embedding')
                    embeddings_vars[layer.name] = embedding
                    batch = state_ops.assign(
                        embedding[batch_id:batch_id + step], embedding_input)
                    self.assign_embeddings.append(batch)

            self.saver = saver.Saver(list(embeddings_vars.values()))

            # Create embeddings_metadata dictionary
            if isinstance(self.embeddings_metadata, str):
                embeddings_metadata = {
                    layer_name: self.embeddings_metadata
                    for layer_name in embeddings_vars.keys()
                }
            else:
                # If embedding_metadata is already a dictionary
                embeddings_metadata = self.embeddings_metadata

            try:
                from tensorboard.plugins import projector
            except ImportError:
                raise ImportError(
                    'Failed to import TensorBoard. Please make sure that '
                    'TensorBoard integration is complete."')

            # TODO(psv): Add integration tests to test embedding visualization
            # with TensorBoard callback. We are unable to write a unit test for this
            # because TensorBoard dependency assumes TensorFlow package is installed.
            config = projector.ProjectorConfig()
            for layer_name, tensor in embeddings_vars.items():
                embedding = config.embeddings.add()
                embedding.tensor_name = tensor.name

                if (embeddings_metadata is not None
                        and layer_name in embeddings_metadata):
                    embedding.metadata_path = embeddings_metadata[layer_name]

            projector.visualize_embeddings(self.writer, config)
    def _resource_apply_sparse(self, grad, var, indices, apply_state=None):
        var_dtype = var.dtype.base_dtype
        lr_t = array_ops.identity(self._get_hyper('learning_rate', var_dtype))
        beta_1_t = array_ops.identity(self._get_hyper('beta_1', var_dtype))
        beta_2_t = array_ops.identity(self._get_hyper('beta_2', var_dtype))
        epsilon_t = ops.convert_to_tensor(self.epsilon, var_dtype)
        m = self.get_slot(var, 'm')
        v = self.get_slot(var, 'v')
        local_step = math_ops.cast(self.iterations + 1, var_dtype)
        next_step = math_ops.cast(self.iterations + 2, var_dtype)
        decay_base = math_ops.cast(0.96, var_dtype)

        # Learning rate multipliers
        if self.lr_multipliers is not None:
            lr_t = _apply_lr_multiplier(self, lr_t, var)

        momentum_cache_t = beta_1_t * (
            1. - 0.5 *
            (math_ops.pow(decay_base, self._initial_decay * local_step)))
        momentum_cache_t_1 = beta_1_t * (
            1. - 0.5 *
            (math_ops.pow(decay_base, self._initial_decay * next_step)))
        m_schedule_new = math_ops.cast(self._m_cache_read,
                                       var_dtype) * momentum_cache_t
        if var_dtype is self._m_cache.dtype:
            m_schedule_new = array_ops.identity(
                state_ops.assign(self._m_cache,
                                 m_schedule_new,
                                 use_locking=self._use_locking))
        m_schedule_next = m_schedule_new * momentum_cache_t_1

        m_scaled_g_values = grad * (1. - beta_1_t)
        m_t = state_ops.assign(m, m * beta_1_t, use_locking=self._use_locking)
        with ops.control_dependencies([m_t]):
            m_t = self._resource_scatter_add(m, indices, m_scaled_g_values)
            m_t_slice = array_ops.gather(m_t, indices)

        m_t_prime = m_t_slice / (1. - m_schedule_next)
        g_prime = grad / (1. - m_schedule_new)
        m_t_bar = (1. - momentum_cache_t) * g_prime + (momentum_cache_t_1 *
                                                       m_t_prime)

        v_scaled_g_values = (grad * grad) * (1. - beta_2_t)
        v_t = state_ops.assign(v, v * beta_2_t, use_locking=self._use_locking)

        with ops.control_dependencies([v_t]):
            v_t = self._resource_scatter_add(v, indices, v_scaled_g_values)
            v_t_slice = array_ops.gather(v_t, indices)

        v_t_prime_denominator = 1. - math_ops.pow(beta_2_t, local_step)
        v_t_prime = v_t_slice / v_t_prime_denominator
        v_prime_sqrt_plus_eps = math_ops.sqrt(v_t_prime) + epsilon_t

        var_t = self._resource_scatter_add(
            var, indices, -self.eta_t * lr_t * m_t_bar / v_prime_sqrt_plus_eps)

        # Weight decays
        if var.name in self.weight_decays.keys():
            var_t = _apply_weight_decays(self, var, var_t)

        var_update = state_ops.assign(var,
                                      var_t,
                                      use_locking=self._use_locking)

        # Cosine annealing
        (iteration_done, t_cur_update,
         eta_t_update) = _update_t_cur_eta_t_v2(self, lr_t, var)
        if iteration_done and not self._init_notified:
            self._init_notified = True

        updates = [var_update, m_t_bar, v_t]
        if iteration_done:
            updates += [t_cur_update]
        if self.use_cosine_annealing and iteration_done:
            updates += [eta_t_update]
        return control_flow_ops.group(*updates)
Exemple #59
0
def streaming_precision_recall_arrays(n_gbboxes,
                                      rclasses,
                                      rscores,
                                      tp_tensor,
                                      fp_tensor,
                                      remove_zero_labels=True,
                                      metrics_collections=None,
                                      updates_collections=None,
                                      name=None):
    """Streaming computation of precision / recall arrays. This metrics
    keeps tracks of boolean True positives and False positives arrays.
    """
    with variable_scope.variable_scope(
            name, 'stream_precision_recall',
        [n_gbboxes, rclasses, tp_tensor, fp_tensor]):
        n_gbboxes = math_ops.to_int64(n_gbboxes)
        rclasses = math_ops.to_int64(rclasses)
        rscores = math_ops.to_float(rscores)

        stype = tf.int32
        tp_tensor = tf.cast(tp_tensor, stype)
        fp_tensor = tf.cast(fp_tensor, stype)

        # Reshape TP and FP tensors and clean away 0 class values.
        rclasses = tf.reshape(rclasses, [-1])
        rscores = tf.reshape(rscores, [-1])
        tp_tensor = tf.reshape(tp_tensor, [-1])
        fp_tensor = tf.reshape(fp_tensor, [-1])
        if remove_zero_labels:
            mask = tf.greater(rclasses, 0)
            rclasses = tf.boolean_mask(rclasses, mask)
            rscores = tf.boolean_mask(rscores, mask)
            tp_tensor = tf.boolean_mask(tp_tensor, mask)
            fp_tensor = tf.boolean_mask(fp_tensor, mask)

        # Local variables accumlating information over batches.
        v_nobjects = _create_local('v_nobjects', shape=[], dtype=tf.int64)
        v_ndetections = _create_local('v_ndetections',
                                      shape=[],
                                      dtype=tf.int32)
        v_scores = _create_local('v_scores', shape=[
            0,
        ])
        v_tp = _create_local('v_tp', shape=[
            0,
        ], dtype=stype)
        v_fp = _create_local('v_fp', shape=[
            0,
        ], dtype=stype)

        # Update operations.
        nobjects_op = state_ops.assign_add(v_nobjects,
                                           tf.reduce_sum(n_gbboxes))
        ndetections_op = state_ops.assign_add(
            v_ndetections, tf.size(rscores, out_type=tf.int32))
        scores_op = state_ops.assign(v_scores,
                                     tf.concat([v_scores, rscores], axis=0),
                                     validate_shape=False)
        tp_op = state_ops.assign(v_tp,
                                 tf.concat([v_tp, tp_tensor], axis=0),
                                 validate_shape=False)
        fp_op = state_ops.assign(v_fp,
                                 tf.concat([v_fp, fp_tensor], axis=0),
                                 validate_shape=False)

        # Precision and recall computations.
        # r = _precision_recall(nobjects_op, scores_op, tp_op, fp_op, 'value')
        r = _precision_recall(v_nobjects, v_ndetections, v_scores, v_tp, v_fp,
                              'value')

        with ops.control_dependencies(
            [nobjects_op, ndetections_op, scores_op, tp_op, fp_op]):
            update_op = _precision_recall(nobjects_op, ndetections_op,
                                          scores_op, tp_op, fp_op, 'update_op')

            # update_op = tf.Print(update_op,
            #                      [tf.reduce_sum(tf.cast(mask, tf.int64)),
            #                       tf.reduce_sum(tf.cast(mask2, tf.int64)),
            #                       tf.reduce_min(rscores),
            #                       tf.reduce_sum(n_gbboxes)],
            #                      'Metric: ')
            # Some debugging stuff!
            # update_op = tf.Print(update_op,
            #                      [tf.shape(tp_op),
            #                       tf.reduce_sum(tf.cast(tp_op, tf.int64), axis=0)],
            #                      'TP and FP shape: ')
            # update_op[0] = tf.Print(update_op,
            #                      [nobjects_op],
            #                      '# Groundtruth bboxes: ')
            # update_op = tf.Print(update_op,
            #                      [update_op[0][0],
            #                       update_op[0][-1],
            #                       tf.reduce_min(update_op[0]),
            #                       tf.reduce_max(update_op[0]),
            #                       tf.reduce_min(update_op[1]),
            #                       tf.reduce_max(update_op[1])],
            #                      'Precision and recall :')

        if metrics_collections:
            ops.add_to_collections(metrics_collections, r)
        if updates_collections:
            ops.add_to_collections(updates_collections, update_op)
        return r, update_op
Exemple #60
0
  def test_save_restore_multi_variables(self, rnn_mode, num_units, input_size,
                                        num_layers, direction):
    # Verify the restored opaque param, once converted to tf_canonical format,
    # is the same as the tf canonicals of the pre-restored param.
    if not context.context().num_gpus():
      self.skipTest("No GPUs found")
    with self.session(use_gpu=True) as sess:
      opaque_params = []
      saveables = []
      num_opaque_params = 2
      for i in range(num_opaque_params):
        opaque_params.append(
            self._create_opaque_param(
                rnn_mode,
                num_units,
                input_size,
                num_layers,
                direction,
                name="opaque_param_%d" % i))
        saveable = self._create_saveable(opaque_params[i], rnn_mode, num_units,
                                         input_size, num_layers, direction)
        ops.add_to_collection(ops.GraphKeys.SAVEABLE_OBJECTS, saveable)
        saveables.append(saveable)

      weights_ops, biases_ops = [], []
      for i in range(num_opaque_params):
        weights_op, biases_op = (
            saveables[i].format_converter.opaque_to_tf_canonical(
                saveables[i]._variables))
        weights_ops.append(weights_op)
        biases_ops.append(biases_op)

      save_path = os.path.join(self.get_temp_dir(), "save_restore_var_test")
      saver = saver_lib.Saver(write_version=saver_pb2.SaverDef.V2)

      init_op = variables.global_variables_initializer()
      reset_ops = []
      for i in range(num_opaque_params):
        reset_ops.append(
            state_ops.assign(opaque_params[i],
                             array_ops.zeros_like(opaque_params[i])))
      sess.run(init_op)
      self.assertEqual(save_path, saver.save(sess, save_path))

      # Get the tf canonical vals before reset-restore
      for i in range(num_opaque_params):
        weights, biases = sess.run([weights_ops[i], biases_ops[i]])

        # Reset the opaque param value
        sess.run(reset_ops[i])

        # Assert reset happened.
        weights_z, biases_z = sess.run([weights_ops[i], biases_ops[i]])
        for w in weights_z:
          self.assertAllClose(w, np.zeros_like(w))
        for b in biases_z:
          self.assertAllClose(b, np.zeros_like(b))

        # Restore opaque param value from checkpoint.
        saver.restore(sess, save_path)
        weights_r, biases_r = sess.run([weights_ops[i], biases_ops[i]])
        self._compare_weights(weights, weights_r)
        self._compare_biases(biases, biases_r)