Example #1
0
 def _setup_last_update_step(self):
     with tf.variable_scope(self._spec.name,
                            use_resource=self._spec.use_tpu) as scope:
         try:
             last_update_step = tf.get_variable(
                 'last_mask_update_step', [],
                 initializer=tf.zeros_initializer(),
                 trainable=False,
                 dtype=tf.int32)
         except ValueError:
             scope.reuse_variables()
             last_update_step = tf.get_variable('last_mask_update_step',
                                                dtype=tf.int32)
     return last_update_step
Example #2
0
 def testExponentialMovingAverageIncludingMovingVars(self):
   task = BaseTaskTest.TestParams()
   task.input = base_input_generator.BaseSequenceInputGenerator.Params()
   task.train.ema_decay = 0.9
   task.train.ema_decay_moving_vars = True
   p = base_model.SingleTaskModel.Params(task)
   model = p.Instantiate()
   self.assertIsNotNone(model.ema)
   model.ConstructFPropBPropGraph()
   with tf.variable_scope('base_mdl', reuse=True):
     beta = tf.get_variable('x/beta/var')
     mean = tf.get_variable('x/moving_mean/var')
     self.assertIsNotNone(model.ema.average(beta))
     self.assertIsNotNone(model.ema.average(mean))
Example #3
0
 def testExponentialMovingAverage(self):
   p = base_model.SingleTaskModel.Params()
   p.task = BaseTaskTest.TestParams()
   p.task.input = base_input_generator.BaseSequenceInputGenerator.Params()
   p.train.ema_decay = 0.9
   model = p.Instantiate()
   model._task.CreateChild('a',
                           layers.BatchNormLayer.Params().Set(name='a', dim=1))
   model._task._train_op = tf.no_op()
   model._task.ApplyExponentialMovingAverage(model.ema)
   with tf.variable_scope('', reuse=True):
     beta = tf.get_variable('a/beta/var')
     mean = tf.get_variable('a/moving_mean/var')
     self.assertIsNotNone(model.ema.average(beta))
     self.assertIsNone(model.ema.average(mean))
Example #4
0
 def testExponentialMovingAverage(self):
     p = base_model.SingleTaskModel.Params()
     p.task = BaseTaskTest.TestParams()
     p.task.input = base_input_generator.BaseSequenceInputGenerator.Params()
     p.task.train.ema_decay = 0.9
     p.task.train.ema_decay_moving_vars = False
     model = p.Instantiate()
     task = model._task
     task._train_op = tf.no_op()
     task.ApplyExponentialMovingAverage(model.ema)
     with tf.variable_scope('base_mdl', reuse=True):
         beta = tf.get_variable('x/beta/var')
         mean = tf.get_variable('x/moving_mean/var')
         self.assertIsNotNone(model.ema.average(beta))
         self.assertIsNone(model.ema.average(mean))
Example #5
0
 def testNoPS(self):
     p = cluster_factory.Cluster.Params()
     p.worker.name = '/job:trainer'
     p.worker.replicas = 1
     p.ps.name = '/job:trainer'
     p.ps.replicas = 1
     c = cluster_factory.Cluster(p)
     g = tf.Graph()
     vs = []
     with g.as_default():
         with tf.device(c.GetPlacer()):
             for i in range(10):
                 vs.append(tf.get_variable('x%d' % i, (10, 10, 10)))
             sum_all = tf.add_n(vs)
     for v in vs:
         self.assertEqual(
             v.device,
             cluster.MakeDeviceString(job_name='/job:trainer',
                                      task_id=0,
                                      device_name='CPU',
                                      device_id=0))
     self.assertEqual(
         sum_all.device,
         cluster.MakeDeviceString(job_name='/job:trainer',
                                  task_id=0,
                                  device_name='CPU',
                                  device_id=0))
Example #6
0
 def _CreateVariableStub(name,
                         params,
                         reuse=None,
                         trainable=True,
                         collections=None,
                         default_seed=None,
                         synchronization=None,
                         aggregation=None):
     """Return a zero tensor of the right shape instead of creating variable."""
     del reuse
     del default_seed
     del synchronization
     del aggregation
     dtype = params.dtype
     shape = py_utils.ToStaticShape(params.shape)
     # For total samples counters we have to actually create variables so that
     # we can access the 'value' attribute during construction.
     if 'total_samples' in name:
         var = tf.get_variable(name,
                               shape,
                               dtype,
                               tf.constant_initializer(0),
                               collections=collections,
                               trainable=trainable,
                               validate_shape=True)
     else:
         key = (tf.get_default_graph(), tuple(shape))
         if key in variable_cache:
             var = variable_cache[key]
         else:
             var = tf.zeros(shape, dtype)
             variable_cache[key] = var
     return var, var
Example #7
0
 def testDefaultParams(self):
   p = cluster_factory.Cluster.Params()
   c = cluster_factory.Cluster(p)
   self.assertFalse(c.add_summary)
   g = tf.Graph()
   vs = []
   with g.as_default():
     with tf.device(c.GetPlacer()):
       for i in range(10):
         vs.append(tf.get_variable('x%d' % i, (10, 10, 10)))
       sum_all = tf.add_n(vs)
   for v in vs:
     self.assertEqual(
         v.device,
         c._MakeDeviceString(
             job_name='/job:localhost',
             task_id=0,
             device_name='CPU',
             device_id=0))
   self.assertEqual(
       sum_all.device,
       c._MakeDeviceString(
           job_name='/job:localhost',
           task_id=0,
           device_name='CPU',
           device_id=0))
Example #8
0
 def testPSWithGPUs(self):
     p = cluster_factory.Cluster.Params()
     p.worker.name = '/job:trainer'
     p.worker.replicas = 1
     p.ps.name = '/job:ps'
     p.ps.replicas = 4
     p.ps.gpus_per_replica = 2
     c = cluster_factory.Cluster(p)
     g = tf.Graph()
     vs = []
     with g.as_default():
         with tf.device(c.GetPlacer()):
             for i in range(10):
                 vs.append(tf.get_variable('x%d' % i, (10, 10, 10)))
             sum_all = tf.add_n(vs)
     for i, v in enumerate(vs):
         self.assertEqual(
             v.device,
             cluster.MakeDeviceString(job_name='/job:ps',
                                      task_id=(i / 2) % 4,
                                      device_name='GPU',
                                      device_id=i % 2))
     self.assertEqual(
         sum_all.device,
         cluster.MakeDeviceString(job_name='/job:trainer',
                                  task_id=0,
                                  device_name='CPU',
                                  device_id=0))
Example #9
0
    def ModuleFn(training):
        """Builds the graph and signature for the stub TF-hub module."""
        image_data = tf.placeholder(
            shape=[None, input_image_height, input_image_width, 3],
            dtype=tf.float32)
        # Linearly project image_data to shape [1, output_feature_dim] features.
        encoder_output = tf.compat.v1.layers.dense(
            tf.reshape(image_data,
                       [-1, input_image_height * input_image_width * 3]),
            output_feature_dim)

        # Add a non-trainable 'count' variable that can be updated through an
        # UPDATE_OP. This is analogous to a batch-norm moving average that should be
        # updated during fine-tuning.
        v = tf.get_variable('count',
                            initializer=0,
                            dtype=tf.int32,
                            trainable=False)
        if training:
            update_op = v.assign_add(1).op
            tf.add_to_collection(tf.GraphKeys.UPDATE_OPS, update_op)

        hub.add_signature('default',
                          inputs={'images': image_data},
                          outputs=encoder_output)
Example #10
0
 def testDefaultParamsWithDynamicShape(self):
     p = cluster_factory.Cluster.Params()
     c = cluster_factory.Cluster(p)
     g = tf.Graph()
     vs = []
     with g.as_default():
         with tf.device(c.GetPlacer()):
             for i in range(10):
                 dyn_shape = tf.constant([2], dtype=tf.int32)
                 dyn_shape = tf.placeholder_with_default(dyn_shape,
                                                         shape=[None])
                 v = tf.get_variable('x%d_wb/var' % i,
                                     initializer=tf.random.uniform(
                                         dyn_shape, dtype=tf.float64),
                                     validate_shape=False)
                 vs.append(v)
             sum_all = tf.add_n(vs)
     for v in vs:
         self.assertEqual(
             v.device,
             cluster.MakeDeviceString(job_name='/job:localhost',
                                      task_id=0,
                                      device_name='CPU',
                                      device_id=0))
     self.assertEqual(
         sum_all.device,
         cluster.MakeDeviceString(job_name='/job:localhost',
                                  task_id=0,
                                  device_name='CPU',
                                  device_id=0))
Example #11
0
 def _CreateVariableStub(name,
                         params,
                         reuse=None,
                         trainable=True,
                         init_wrapper=None,
                         collections=None):
     """Return a zero tensor of the right shape instead of creating variable."""
     del reuse
     dtype = params.dtype
     shape = py_utils.ToStaticShape(params.shape)
     if init_wrapper:
         var = init_wrapper(dtype, tf.constant_initializer(0, dtype=dtype))
     # For total samples counters we have to actually create variables so that
     # we can access the 'value' attribute during construction.
     elif 'total_samples' in name:
         var = tf.get_variable(name,
                               shape,
                               dtype,
                               tf.constant_initializer(0, dtype=dtype),
                               collections=collections,
                               trainable=trainable,
                               validate_shape=True)
     else:
         key = hash(tuple(shape))
         if key in variable_cache:
             var = variable_cache[key]
         else:
             var = tf.zeros(shape, dtype)
             variable_cache[key] = var
     return var, var
Example #12
0
 def testExponentialMovingAverage(self):
   task = BaseTaskTest.TestParams()
   task.input = base_input_generator.BaseSequenceInputGenerator.Params()
   task.train.ema_decay = 0.9
   task.train.ema_decay_moving_vars = False
   p = base_model.SingleTaskModel.Params(task)
   model = p.Instantiate()
   self.assertIsNotNone(model.ema)
   model.ConstructFPropBPropGraph()
   # Test that EMA is accessible by a sublayer.
   x = model.GetTask().x
   self.assertIsNotNone(x.ema)
   self.assertIs(x.ema, model.ema)
   with tf.variable_scope('base_mdl', reuse=True):
     beta = tf.get_variable('x/beta/var')
     mean = tf.get_variable('x/moving_mean/var')
     self.assertIsNotNone(model.ema.average(beta))
     self.assertIsNone(model.ema.average(mean))
Example #13
0
 def testFactorizedMaxPool(self, input_shape, window_shape):
   weights = tf.get_variable("weights", shape=input_shape)
   pooling_kwargs = {
       "window_shape": window_shape,
       "pooling_type": "MAX",
       "strides": window_shape,
       "padding": "SAME"
   }
   self._compare_pooling_methods(weights, pooling_kwargs)
 def testVarWrapperTrackAssign(self):
     with tf.Graph().as_default():
         var = tf.get_variable('v0', shape=[8, 16], dtype=tf.float32)
         wrapper = var_tmp_wrappers.VarWrapperTrackAssign(var)
         ones = tf.ones_like(wrapper)
         a = wrapper.assign(ones)
         b = wrapper.assign_add(ones)
         c = wrapper.assign_sub(ones)
         self.assertSameElements(wrapper.previous_assigns(), [a, b, c])
 def testStackedVarWrapperWithManualSharding(self):
     with tf.Graph().as_default():
         var = tf.get_variable('v2', shape=[8, 16], dtype=tf.float32)
         wrapper = var_tmp_wrappers.StackedVarWrapperWithManualSharding(var)
         ones = tf.ones_like(wrapper)
         wrapper.assign(ones)
         wrapper.assign_add(ones)
         wrapper.assign_sub(ones)
         self.assertEqual(ones.shape, [16])
Example #16
0
 def testTensorPartitioner(self):
     with tf.Session():
         w1 = tf.get_variable('w1', [255, 255], tf.float32)
         self.evaluate(tf.global_variables_initializer())
         partition_info = distributed_shampoo.PartitionConfig(200, 128)
         grad = tf.constant(w1.eval())
         metadata = distributed_shampoo.TensorPartitioner.partition_metadata(
             w1, partition_info)
         partitioned_grad = distributed_shampoo.TensorPartitioner.partition_tensor(
             w1, partition_info)
         reformed_grad = distributed_shampoo.TensorPartitioner.reform_tensor(
             partitioned_grad, metadata.num_splits_per_dim)
         self.assertAllCloseAccordingToType(reformed_grad, grad)
Example #17
0
  def _create_slots(self, var_list):
    if not self._counter:
      self._counter = tf.get_variable(
          shape=[], initializer=tf.zeros_initializer, name='update_count')

    for v in var_list:
      vo = self._opt._zeros_slot(v, 'grad_accum', 'GradientAccumulator')  # pylint: disable=protected-access
      sharding = None
      try:
        sharding = gshard_utils.GetVarSharding(v)
      except ValueError:
        continue
      if sharding and not sharding.is_replicated:
        sharding.ApplyToVariable(vo)
Example #18
0
 def testPartitionedVariableMasking(self):
     partitioner = tf.variable_axis_size_partitioner(40)
     with self.cached_session() as session:
         with tf.variable_scope("", partitioner=partitioner):
             sparsity = tf.Variable(0.5, name="Sparsity")
             weights = tf.get_variable("weights",
                                       initializer=tf.linspace(
                                           1.0, 100.0, 100))
             masked_weights = pruning.apply_mask(
                 weights, scope=tf.get_variable_scope())
         p = pruning.Pruning(sparsity=sparsity)
         p._spec.threshold_decay = 0.0
         mask_update_op = p.mask_update_op()
         tf.global_variables_initializer().run()
         masked_weights_val = masked_weights.eval()
         session.run(mask_update_op)
         masked_weights_val = masked_weights.eval()
         self.assertAllEqual(np.count_nonzero(masked_weights_val), 50)
Example #19
0
def weight_threshold_variable(var, scope):
    """Create a scalar threshold for the weights.

  This function adds a variable
  'threshold' to the graph.

  Args:
    var: The weight variable that needs to be masked
    scope: The variable scope of the variable var

  Returns:
    A scalar threshold variable initialized to 0.
  """
    with tf.variable_scope(scope):
        threshold = tf.get_variable('threshold', [],
                                    initializer=tf.zeros_initializer(),
                                    trainable=False,
                                    dtype=var.dtype)
        return threshold
Example #20
0
def weight_mask_variable(var, scope):
    """Create a mask for the weights.

  This function adds a variable 'mask' to the graph.

  Args:
    var: the weight variable that needs to be masked
    scope: The variable scope of the variable var

  Returns:
    the mask variable of the same size and shape as var, initialized to all 1s.
  """
    with tf.variable_scope(scope):
        mask = tf.get_variable('mask',
                               var.get_shape(),
                               initializer=tf.ones_initializer(),
                               trainable=False,
                               dtype=var.dtype)
    return mask
Example #21
0
 def testRematerialize(self):
     # Test the dropout consistency between fprop and bprop.
     b = builder.Base.Params()
     b = b.Instantiate()
     start_block = layers.DeterministicDropoutLayer.Params().Set(
         name='start_dropout', keep_prob=0.7)
     # Build 4 dropout layers, each wrapped by RematerializeFn.
     num_blocks = 4
     blocks = []
     blocks_per_cell = 2
     for i in range(num_blocks):
         blocks.append(layers.DeterministicDropoutLayer.Params().Set(
             name='dropout_{}'.format(i), keep_prob=0.7))
     cells = []
     while blocks:
         heads, blocks = blocks[:blocks_per_cell], blocks[blocks_per_cell:]
         cell_name = 'cell_{}'.format(len(cells))
         cells.append(
             b._Rematerialize(name=cell_name,
                              body=b._Seq(cell_name, *heads)))
     with self.session(use_gpu=False, graph=tf.Graph()) as sess:
         tf.random.set_seed(12345)
         p = b._Seq('test', start_block, *cells)
         mdl = p.Instantiate()
         # y = mdl.Frop(x * w)
         # Fake input
         x = tf.ones([4, 5])
         # Construct weights.
         w = tf.get_variable('w',
                             shape=[4, 5],
                             initializer=tf.constant_initializer([[1] * 5] *
                                                                 4))
         y = mdl.FPropDefaultTheta(x * w)
         # Construct loss function such that gradients = final activation.
         # dy/dw = y = mdl.Frop(x * w) when w is 1.
         loss = tf.reduce_sum(y)
         grads = py_utils.ComputeGradients(loss, py_utils.NestedMap(w=w))
         tf.global_variables_initializer().run()
         y_val, grads_val = sess.run([y, grads.Transform(tuple)])
         grads_val = grads_val['w'][1]
         self.assertAllClose(y_val, grads_val)
         self.assertEqual(py_utils.GetStepSeed().eval(), 1553244033)
Example #22
0
 def testPSRandomSize(self):
   p = cluster_factory.Cluster.Params()
   p.worker.name = '/job:trainer'
   p.ps.name = '/job:ps'
   p.ps.replicas = 10
   c = cluster_factory.Cluster(p)
   g = tf.Graph()
   vs = []
   np.random.seed(301)
   with g.as_default():
     with tf.device(c.GetPlacer()):
       # Creates 200 variables with different sizes.
       for i in range(200):
         if i % 13:
           size = np.random.randint(10000)
         elif i % 7:
           size = np.random.randint(100)
         else:
           size = np.random.randint(10)
         vs.append(tf.get_variable('x%d' % i, shape=(size)))
       sum_all = tf.add_n([tf.reduce_sum(x) for x in vs])
   # Computes the total size of variables placed on each device.
   total_size = {}  # device name -> size
   for v in vs:
     size = tf.TensorShape(v.op.get_attr('shape')).num_elements()
     if v.device in total_size:
       total_size[v.device] += size
     else:
       total_size[v.device] = size
   for (device, allocated) in zip(
       sorted(total_size),
       [91701, 91361, 90346, 88738, 87240, 89265, 91944, 92472, 88051, 95053]):
     self.assertEqual(total_size[device], allocated)
   self.assertEqual(
       sum_all.device,
       cluster.MakeDeviceString(
           job_name='/job:trainer',
           replica_id=0,
           task_id=0,
           device_name='CPU',
           device_id=0))
Example #23
0
    def _TestSaveRestoreHelper(self, direction):
        """Test opaque params stay 'equivalent' after save-restore."""
        input_dim = 4
        cell_dim = 3

        with tf.variable_scope('s1'):
            params_size_t = self._ParamsSize(input_dim, cell_dim, direction)
            params = tf.get_variable('cudnn_params',
                                     initializer=tf.random_uniform(
                                         [params_size_t]),
                                     validate_shape=False)
            reset_params_op = tf.assign(params, tf.zeros_like(params))
            cur_scope_name = tf.get_variable_scope().name
            saveable = self._CreateSaveable(params, input_dim, cell_dim,
                                            direction, cur_scope_name)
            canonical_wts, canonical_bs = (
                saveable.format_converter._opaque_to_cu_canonical(
                    saveable._variables))
            saver = saver_lib.Saver()
        with self.session(use_gpu=True) as sess:
            sess.run(tf.global_variables_initializer())
            save_path = os.path.join(self.get_temp_dir(), 'save-restore-unidi')
            saver.save(sess, save_path)
            canonical_wts_v, canonical_bs_v = sess.run(
                [canonical_wts, canonical_bs])

        with self.session(use_gpu=False) as sess:
            sess.run(tf.global_variables_initializer())
            sess.run(reset_params_op)
            saver.restore(sess, save_path)
            canonical_wts_v_restored, canonical_bs_v_restored = sess.run(
                [canonical_wts, canonical_bs])
            # Weight porition of the opaque params are exactly the same. For biases
            # porition, it's expected that the sum of biases each gate stays the same.
            self._CompareWeights(canonical_wts_v, canonical_wts_v_restored)
            self._CompareBiases(canonical_bs_v, canonical_bs_v_restored,
                                direction)
Example #24
0
    def testDropoutInRecurrent(self, graph_seed):
        with self.session() as sess:
            if graph_seed:
                tf.random.set_seed(12345)
            l = lingvo_layers.DeterministicDropoutLayer.Params().Set(
                name='dropout', keep_prob=0.7).Instantiate()
            # Input variable.
            w = tf.get_variable('w',
                                shape=[9, 20],
                                initializer=tf.ones_initializer())
            sess.run(tf.global_variables_initializer())
            prev_sum = np.sum(np.isclose(sess.run(w), 0.0))

            def Step(theta, state0, unused_inputs):
                w = l.FProp(theta.l, state0.w)
                state1 = py_utils.NestedMap(w=w)
                return state1, py_utils.NestedMap()

            acc, final = recurrent.Recurrent(
                theta=py_utils.NestedMap(l=l.theta),
                state0=py_utils.NestedMap(w=w),
                inputs=py_utils.NestedMap(x=tf.zeros([4])),
                cell_fn=Step)

            acc_w = sess.run(acc.w)
            self.assertLen(acc_w, 4)
            for acc_w_i in acc_w:
                next_sum = np.sum(np.isclose(acc_w_i, 0.0))
                self.assertGreater(next_sum, prev_sum)
                prev_sum = next_sum

            # Construct loss function such that gradients = final activation.
            loss = tf.reduce_sum(final.w)
            grads = py_utils.ComputeGradients(loss, py_utils.NestedMap(w=w))
            w_val, grads_val = sess.run([final.w, grads.w.grad])
            self.assertAllClose(w_val, grads_val)