def _CreateVariableStub(name,
                         params,
                         reuse=None,
                         trainable=True,
                         init_wrapper=None,
                         collections=None,
                         default_seed=None):
     """Return a zero tensor of the right shape instead of creating variable."""
     del reuse
     del default_seed
     dtype = params.dtype
     shape = py_utils.ToStaticShape(params.shape)
     if init_wrapper:
         var = init_wrapper(dtype, tf.constant_initializer(0, dtype=dtype))
     # For total samples counters we have to actually create variables so that
     # we can access the 'value' attribute during construction.
     elif 'total_samples' in name:
         var = tf.get_variable(name,
                               shape,
                               dtype,
                               tf.constant_initializer(0, dtype=dtype),
                               collections=collections,
                               trainable=trainable,
                               validate_shape=True)
     else:
         key = (tf.get_default_graph(), tuple(shape))
         if key in variable_cache:
             var = variable_cache[key]
         else:
             var = tf.zeros(shape, dtype)
             variable_cache[key] = var
     return var, var
Example #2
0
 def testRematerialize(self):
     # Test the dropout consistency between fprop and bprop.
     b = builder.Base.Params()
     b = b.Instantiate()
     start_block = layers.DeterministicDropoutLayer.Params().Set(
         name='start_dropout', keep_prob=0.7)
     # Build 4 dropout layers, each wrapped by RematerializeFn.
     num_blocks = 4
     blocks = []
     blocks_per_cell = 2
     for i in range(num_blocks):
         blocks.append(layers.DeterministicDropoutLayer.Params().Set(
             name='dropout_{}'.format(i), keep_prob=0.7))
     cells = []
     while blocks:
         heads, blocks = blocks[:blocks_per_cell], blocks[blocks_per_cell:]
         cell_name = 'cell_{}'.format(len(cells))
         cells.append(
             b._Rematerialize(name=cell_name,
                              body=b._Seq(cell_name, *heads)))
     with self.session(use_gpu=False, graph=tf.Graph()) as sess:
         tf.random.set_seed(12345)
         p = b._Seq('test', start_block, *cells)
         mdl = p.Instantiate()
         # y = mdl.Frop(x * w)
         # Fake input
         x = tf.ones([4, 5])
         # Construct weights.
         w = tf.get_variable('w',
                             shape=[4, 5],
                             initializer=tf.constant_initializer([[1] * 5] *
                                                                 4))
         y = mdl.FPropDefaultTheta(x * w)
         # Construct loss function such that gradients = final activation.
         # dy/dw = y = mdl.Frop(x * w) when w is 1.
         loss = tf.reduce_sum(y)
         grads = py_utils.ComputeGradients(loss, py_utils.NestedMap(w=w))
         tf.global_variables_initializer().run()
         y_val, grads_val = sess.run([y, grads.Transform(tuple)])
         grads_val = grads_val['w'][1]
         self.assertAllClose(y_val, grads_val)
         self.assertEqual(py_utils.GetStepSeed().eval(), 1553244033)