コード例 #1
0
ファイル: gshard_builder_test.py プロジェクト: vcj-huy/lingvo
    def testEmbedding(self):
        builder = gshard_builder.DenseBuilder.Params().Set(
            model_dim=4, model_dim_reshape_segments=2).Instantiate()
        ids = [[1, 2, 3], [3, 2, 1]]
        graph = tf.Graph()
        with graph.as_default():
            tf.random.set_seed(24332)
            py_utils.GetOrCreateGlobalStepVar()
            emb_layer_p = builder.Embedding('emb', vocab_dim=4)
            emb_layer = emb_layer_p.Instantiate()
            enc_out = emb_layer.FPropDefaultTheta(
                tf.convert_to_tensor(ids, dtype=tf.int32))

        expected_val = [[[[-0.67452705, -2.6386688], [1.1666715, 0.04592554]],
                         [[-1.0561675, -0.48270327], [0.7765603, 0.6768117]],
                         [[0.8349989, 0.67100984], [-0.15557083, 1.275625]]],
                        [[[0.8349989, 0.67100984], [-0.15557083, 1.275625]],
                         [[-1.0561675, -0.48270327], [0.7765603, 0.6768117]],
                         [[-0.67452705, -2.6386688], [1.1666715, 0.04592554]]]]
        with self.session(graph=graph) as sess:
            sess.run(tf.global_variables_initializer())
            enc_out_vals = sess.run(enc_out)
            self.assertAllClose(expected_val, enc_out_vals)
コード例 #2
0
  def testBProp(self):
    with self.session():
      tf.random.set_seed(_TF_RANDOM_SEED)
      p = self._testParams()
      mdl = p.Instantiate()
      mdl.FPropDefaultTheta()
      mdl.BProp()
      loss = mdl.loss
      logp = mdl.eval_metrics['log_pplx'][0]

      self.evaluate(tf.global_variables_initializer())
      vals = []
      for _ in range(5):
        vals += [self.evaluate((loss, logp, mdl.train_op))[:2]]
      print('bprop actual vals = %s' % np.array_repr(np.array(vals)))
      expected_vals = [
          [226.92014, 10.373492],
          [225.25146, 9.585169],
          [248.49757, 9.8904505],
          [212.02884, 10.943424],
          [314.57098, 11.983657],
      ]
      self.assertAllClose(vals, expected_vals, atol=1e-3)
コード例 #3
0
ファイル: decoder_test.py プロジェクト: wangfeng012316/lingvo
  def testDecoderFPropWithMeanSeqLoss(self):
    """Create and fprop a decoder with different dims per layer."""
    with self.session(use_gpu=False):
      tf.random.set_seed(8372749040)

      p = _DecoderParams(
          vn_config=py_utils.VariationalNoiseParams(
              None, True, False, seed=12345))
      p.token_normalized_per_seq_loss = True
      p.per_token_avg_loss = False

      metrics, per_sequence_loss = self._getDecoderFPropMetrics(params=p)
      self.evaluate(tf.global_variables_initializer())
      metrics_val, per_sequence_loss_val = self.evaluate(
          [metrics, per_sequence_loss])
      tf.logging.info('metrics=%s, per_sequence_loss=%s', metrics_val,
                      per_sequence_loss_val)

      self.assertNotEqual(metrics_val['loss'][0], metrics_val['log_pplx'][0])
      self.assertAllClose(metrics_val['loss'], (3.484608, 4.0))
      self.assertAllClose(metrics_val['log_pplx'], (3.496482, 15.0))
      # Target batch size is 4. Therefore, we should expect 4 here.
      self.assertEqual(per_sequence_loss_val.shape, (4,))
コード例 #4
0
  def testForwardPass(self):
    with self.session(use_gpu=False):
      tf.random.set_seed(8372749040)
      p = self._EncoderParams()
      mt_enc = encoder.MTEncoderV1(p)
      batch = py_utils.NestedMap()
      batch.ids = tf.transpose(tf.reshape(tf.range(0, 8, 1), [4, 2]))
      batch.paddings = tf.zeros([2, 4])
      enc_out = mt_enc.FPropDefaultTheta(batch).encoded

      self.evaluate(tf.global_variables_initializer())
      actual_enc_out = enc_out.eval()
      expected_enc_out = [
          [[1.5309354e-06, -1.7816075e-07, 3.8047763e-06, -5.6422067e-07],
           [1.9017770e-06, -2.9778969e-06, -4.5083775e-06, -1.7054812e-06]],
          [[-2.1852782e-06, -1.8208171e-06, -1.4747930e-06, -5.8206351e-06],
           [6.7667429e-07, -3.6828042e-06, -1.0916860e-05, -3.2522742e-06]],
          [[-3.2333378e-07, 3.2147584e-06, 5.0556650e-07, -7.0188378e-07],
           [-6.5340635e-07, 1.9502845e-06, -9.2459632e-06, 5.1955390e-06]],
          [[2.0232728e-06, 4.9331529e-06, 1.1346837e-06, 7.5571520e-06],
           [-5.8475212e-07, 3.5547487e-06, -3.9037773e-06, 8.9575424e-06]]
      ]
      self.assertAllClose(expected_enc_out, actual_enc_out)
コード例 #5
0
    def testScaleGradientsCheckNumerics(self):
        """ScaleGradients when enable_check_numerics=True."""
        FLAGS.enable_check_numerics = True
        p = self.TestParams()
        p.input = base_input_generator.BaseSequenceInputGenerator.Params()
        task = p.Instantiate()
        task.CreateVariable(
            'a',
            py_utils.WeightParams(shape=[],
                                  init=py_utils.WeightInit.Constant(0)))
        var_a = task.theta.a
        # Make a NaN gradient.
        var_grads = py_utils.NestedMap(
            a=py_utils.VarGrad(var_a, 0. * tf.math.log(0.)))
        scaled_grads_map = task.learners[0].ScaleGradients(var_grads)

        with self.session():
            self.evaluate(tf.global_variables_initializer())
            self.assertEqual(0., scaled_grads_map.grad_scale.eval())
            # Fetching the gradient raises an exception with enable_check_numerics.
            with self.assertRaisesRegex(tf.errors.InvalidArgumentError,
                                        'is not finite'):
                _ = scaled_grads_map.final_var_grads.a[1].eval()
コード例 #6
0
ファイル: learner_test.py プロジェクト: vcj-huy/lingvo
 def _testLearner(self, layer, learner_p):
     tf.train.get_or_create_global_step()  # needed for lr_schedule
     lrnr = learner_p.Instantiate()
     if isinstance(learner_p.loss_name, (list, tuple)):
         main_loss = layer.MainLoss(layer.theta)
         aux_loss = layer.AuxLoss(layer.theta)
         metrics = {
             'main_loss': (main_loss, 1.),
             'aux_loss': (aux_loss, 1.)
         }
         expected_losses = [main_loss, aux_loss]
     else:
         loss = layer.Loss(layer.theta)
         metrics = {learner_p.name: (loss, 1.)}
         expected_losses = [loss]
     losses, update_op, eval_metrics = lrnr.Apply(metrics, layer.vars)
     self.assertAllEqual(losses, expected_losses)
     with self.session():
         self.evaluate(tf.global_variables_initializer())
         var_grads = self.evaluate(lrnr.GetVarGrads().Transform(tuple))
         update_op.run()
         updated_vars = self.evaluate(layer.vars)
         return var_grads, updated_vars, eval_metrics
コード例 #7
0
    def __init__(self, train_dir, model):
        """Initialize Checkpointer.

    Args:
     train_dir: Training directory for saving checkpoints.
     model: Model.
    """
        self._train_dir = train_dir
        self._model = model
        self._params = model.params

        self._vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
        self._uninitialized_vars = tf.report_uninitialized_variables(
            self._vars)
        self._initialize_vars = tf.global_variables_initializer()

        self._save_path = os.path.join(self._train_dir, 'ckpt')
        self._model_tasks = model.tasks

        tp = self._params.train
        self._save_interval_seconds = tp.save_interval_seconds
        self._next_checkpoint_seconds = 0
        self._saver = self._GetSaver()
コード例 #8
0
  def testBiEncoderForwardPassWithDropout(self):
    with self.session(use_gpu=False):
      tf.random.set_seed(8372749040)
      p = self._BiEncoderParams()
      p.dropout_prob = 0.5
      mt_enc = encoder.MTEncoderBiRNN(p)
      batch = py_utils.NestedMap()
      batch.ids = tf.transpose(tf.reshape(tf.range(0, 8, 1), [4, 2]))
      batch.paddings = tf.zeros([2, 4])
      enc_out = mt_enc.FPropDefaultTheta(batch).encoded

      self.evaluate(tf.global_variables_initializer())
      actual_enc_out = enc_out.eval()
      print('bi_enc_actual_enc_out_with_dropout', np.array_repr(actual_enc_out))
      expected_enc_out = [[[1.60383240e-06, 1.22550023e-06],
                           [-7.21660126e-06, 1.05704457e-05]],
                          [[1.42539475e-05, -2.06075638e-05],
                           [-4.98754298e-06, 1.51066461e-05]],
                          [[-7.15192800e-06, -6.44075908e-06],
                           [5.02962678e-07, -3.40795486e-06]],
                          [[-6.54424548e-06, 9.88359807e-06],
                           [1.42836643e-06, -1.68607176e-06]]]
      self.assertAllClose(expected_enc_out, actual_enc_out)
コード例 #9
0
ファイル: encoder_test.py プロジェクト: galv/lingvo-copy
    def testBiEncoderForwardPass(self):
        with self.session(use_gpu=False):
            tf.random.set_seed(8372749040)
            p = self._BiEncoderParams()
            mt_enc = encoder.MTEncoderBiRNN(p)
            batch = py_utils.NestedMap()
            batch.ids = tf.transpose(tf.reshape(tf.range(0, 8, 1), [4, 2]))
            batch.paddings = tf.zeros([2, 4])
            enc_out = mt_enc.FPropDefaultTheta(batch).encoded

            self.evaluate(tf.global_variables_initializer())
            actual_enc_out = enc_out.eval()
            tf.logging.info('testBiEncoderForwardPass actual_enc_out %r' %
                            actual_enc_out)
            expected_enc_out = [[[-2.47998378e-06, 7.36457878e-06],
                                 [7.89248020e-07, -2.67464316e-06]],
                                [[-2.98803275e-06, 8.20233890e-06],
                                 [1.00139073e-06, -2.24554151e-06]],
                                [[-5.06675951e-06, 1.15983785e-05],
                                 [-4.58391014e-07, -2.99553108e-07]],
                                [[-4.34937465e-06, 8.58816838e-06],
                                 [-1.74859031e-06, 3.99598093e-06]]]
            self.assertAllClose(expected_enc_out, actual_enc_out)
コード例 #10
0
ファイル: gpipe_test.py プロジェクト: ruby11dog/lingvo
 def testDummyPipelineCnnNestedMapInput(self):
   batch_size = 16
   num_layers = 4
   cells = []
   with self.session(graph=tf.Graph()) as sess:
     for i in range(num_layers):
       cells.append(_SimpyLayerWithNestedMapInput.Params().Set(
           name='layer_{}'.format(i)))
     p = PipeliningLayer.Params().Set(
         name='pipeline',
         num_micro_batches=8,
         micro_batch_size=2,
         nested_map_fprop=True,
         cell_tpl=cells,
         before_tpl=[])
     layer = p.Instantiate()
     tf.set_random_seed(1245)
     inputs = tf.random_uniform([batch_size, 8, 8, 1], seed=12345)
     outputs = layer.FPropDefaultTheta(
         py_utils.NestedMap(vec=inputs, paddings=None))
     sess.run(tf.global_variables_initializer())
     sess.run(outputs.vec)
     self.assertEqual(outputs.vec.shape, (batch_size, 8, 8, 1))
コード例 #11
0
ファイル: gshard_builder_test.py プロジェクト: vcj-huy/lingvo
    def testEncNotVisible(self):
        def _Notvisible(x):
            a, b = tf.expand_dims(x, -1), tf.expand_dims(x, -2)
            return tf.cast(
                tf.math.logical_or(
                    tf.not_equal(a, b),
                    # also ignoring segment_id=0
                    tf.math.logical_not(
                        tf.math.logical_or(tf.cast(a, tf.bool),
                                           tf.cast(b, tf.bool)))),
                tf.float32)

        builder = gshard_builder.DenseBuilder.Params().Set(
            dtype=tf.float32).Instantiate()
        graph = tf.Graph()
        with graph.as_default():
            segment_ids = tf.convert_to_tensor([[1, 1, 1, 1]], dtype=tf.int32)
            y = builder._EncNotVisible(segment_ids, segment_ids)
            y2 = _Notvisible(segment_ids)
        with self.session(graph=graph) as sess:
            sess.run(tf.global_variables_initializer())
            y_val, y2_val = sess.run([y, y2])
            self.assertAllEqual(y_val, y2_val)
コード例 #12
0
ファイル: encoder_test.py プロジェクト: galv/lingvo-copy
    def testUniEncoderForwardPass(self):
        with self.session(use_gpu=False):
            tf.random.set_seed(8372749040)
            p = self._UniEncoderParams()
            mt_enc = encoder.MTEncoderUniRNN(p)
            batch = py_utils.NestedMap()
            batch.ids = tf.transpose(tf.reshape(tf.range(0, 8, 1), [4, 2]))
            batch.paddings = tf.zeros([2, 4])
            enc_out = mt_enc.FPropDefaultTheta(batch).encoded

            self.evaluate(tf.global_variables_initializer())
            actual_enc_out = enc_out.eval()
            tf.logging.info('testUniEncoderForwardPass actual_enc_out %r' %
                            actual_enc_out)
            expected_enc_out = [[[-4.3304257e-07, 5.4100457e-07],
                                 [-4.0170832e-07, -2.6441572e-07]],
                                [[-1.7024040e-07, -1.8555815e-07],
                                 [-6.4563977e-07, -3.7835261e-07]],
                                [[-2.4001852e-07, 5.1114228e-07],
                                 [-3.4349023e-07, -1.0049351e-06]],
                                [[1.8068013e-07, -6.8982729e-08],
                                 [3.3005003e-07, -8.8834116e-07]]]
            self.assertAllClose(expected_enc_out, actual_enc_out)
コード例 #13
0
    def testScaleGradients(self):
        p = self.TestParams()
        p.input = base_input_generator.BaseSequenceInputGenerator.Params()
        task = p.Instantiate()
        task.CreateVariable(
            'a',
            py_utils.WeightParams(shape=[],
                                  init=py_utils.WeightInit.Constant(0)))
        var_a = task.theta.a
        var_grads = py_utils.NestedMap(
            a=py_utils.VarGrad(var_a, tf.ones_like(var_a)))
        scaled_grads_map = task.learners[0].ScaleGradients(var_grads)

        FLAGS.enable_check_numerics = False
        with self.session():
            self.evaluate(tf.global_variables_initializer())
            self.assertEqual(1.0, scaled_grads_map.grad_scale.eval())
            # The final gradient must be finite.
            self.assertFalse(
                tf.math.is_nan(scaled_grads_map.final_var_grads.a[1]).eval())
            self.assertTrue(
                tf.math.is_finite(
                    scaled_grads_map.final_var_grads.a[1]).eval())
コード例 #14
0
    def testFPropNoPostGatingRNN(self):
        vocab, time, batch = 7, 13, 3
        p = self._MoeLmParams(vocab, False, False)

        with self.session(graph=tf.Graph()) as sess:
            np.random.seed(54321)
            tf.random.set_seed(123456)
            lm = p.Instantiate()
            inputs, paddings, labels = self._GetData(vocab, time, batch)
            sess.run(tf.global_variables_initializer())
            xent_output, state1 = lm.FPropDefaultTheta(inputs=inputs,
                                                       paddings=paddings,
                                                       state0=lm.zero_state(
                                                           lm.theta, batch),
                                                       labels=labels)

            xent_output_val, state1_val = sess.run([xent_output, state1])

            print('xent_output_val', xent_output_val)
            print('state1', state1_val)
            test_utils.CompareToGoldenSingleFloat(self, 1.9443978, xent_output_val.avg_xent)  # pyformat: disable pylint: disable=line-too-long
            self.assertAllEqual(xent_output_val.per_example_argmax,
                                np.argmax(xent_output_val.logits, axis=-1))
コード例 #15
0
    def testBProp(self):
        with self.session() as sess:
            tf.random.set_seed(_TF_RANDOM_SEED)
            p = self._testParams()
            mdl = p.Instantiate()
            mdl.FPropDefaultTheta()
            mdl.BProp()
            loss = mdl.loss
            logp = mdl.eval_metrics['log_pplx'][0]

            self.evaluate(tf.global_variables_initializer())
            vals = []
            for _ in range(5):
                vals += [sess.run((loss, logp, mdl.train_op))[:2]]
            print('BProp actual vals = ', vals)
            expected_vals = [
                [233.57518, 10.381119],
                [236.05138, 10.375884],
                [217.9087, 10.376605],
                [217.77725, 10.370345],
                [159.43497, 10.369753],
            ]
            self.assertAllClose(vals, expected_vals)
コード例 #16
0
  def testRnnStackStepNoContext(self):
    with self.session(use_gpu=False):
      p = rnn_steps.RnnStackStep.Params()
      p.name = 'rnn_stack_step'
      p.rnn_cell_tpl.params_init = py_utils.WeightInit.Uniform(1.24, 429891685)
      p.rnn_cell_tpl.bias_init = py_utils.WeightInit.Uniform(1.24, 429891685)
      p.rnn_cell_tpl.vn.global_vn = False
      p.rnn_cell_tpl.vn.per_step_vn = False
      p.step_input_dim = 1
      p.rnn_cell_dim = 3
      p.rnn_cell_hidden_dim = 3
      p.rnn_layers = 2
      p.residual_start = 0
      rnn_stack = p.Instantiate()

      packed = rnn_stack.PrepareExternalInputs(rnn_stack.theta,
                                               py_utils.NestedMap())
      state0 = rnn_stack.ZeroState(rnn_stack.theta, packed, 1)
      output1, state1 = rnn_stack.FProp(
          rnn_stack.theta, packed,
          py_utils.NestedMap(inputs=[tf.constant([[4]], tf.float32)]),
          tf.constant([0.0], dtype=tf.float32), state0)

      self.evaluate(tf.global_variables_initializer())
      output1, state1 = self.evaluate([output1, state1])

      self.assertAllClose(output1.output, [[5.900284, 3.0231729, 3.0207822]])
      self.assertAllClose(
          state1, {
              'sub': [{
                  'm': [[1.1416901, -0.32166323, -0.5909376]],
                  'c': [[-0.98086286, 0.9052862, 0.10041453]]
              }, {
                  'm': [[0.7585938, -0.655164, -0.3882802]],
                  'c': [[-8.3011830e-01, 1.8685710e-01, 1.0723456e-04]]
              }]
          })
コード例 #17
0
  def _testGradDrop(self, graddrop_params):
    batch_size, dims = 4, 5
    gd_layer = graddrop_params.Set(name='test_gd_layer').Instantiate()
    linear_layer = builder_layers.LinearLayer.Params().Set(
        name='test_linear_layer', input_dims=dims,
        output_dims=dims).Instantiate()

    x = tf.random.uniform((batch_size, dims))
    x = linear_layer.FPropDefaultTheta(x)

    # Make a copy of x after graddrop.
    x_gd = gd_layer.FPropDefaultTheta(x)

    # Compute a loss based on graddrop's version of x.
    gd_loss_0 = tf.reduce_sum(x_gd**2)
    gd_loss_1 = tf.reduce_sum(-tf.abs(x_gd))
    gd_layer.SetLosses([
        (gd_loss_0, 0.1),
        (gd_loss_1, 0.2),
    ])
    gd_total_loss = gd_loss_0 + gd_loss_1
    gd_grad = tf.gradients(gd_total_loss, x)

    # Compute the same loss based on the regular version of x.
    loss_0 = tf.reduce_sum(x**2)
    loss_1 = tf.reduce_sum(-tf.abs(x))
    total_loss = loss_0 + loss_1
    grad = tf.gradients(total_loss, x)

    with self.session() as sess:
      sess.run(tf.global_variables_initializer())
      actual_total_loss, actual_grad, actual_gd_total_loss, actual_gd_grad = (
          sess.run([total_loss, grad, gd_total_loss, gd_grad]))

    # Verify that losses are similar, but the gradients are different.
    self.assertAllClose(actual_total_loss, actual_gd_total_loss)
    self.assertNotAllClose(actual_grad, actual_gd_grad)
コード例 #18
0
ファイル: layers_test.py プロジェクト: shadowkun/lingvo
  def testTransformerStackAlternateLayers(self):
    batch = 3
    tf.flags.FLAGS.tpu_compatible = True
    with self.session(use_gpu=False) as sess:
      model_dim = 2
      num_transformer_layers = 2
      transformer_tpl = layers_with_attention.TransformerLayer.Params()
      transformer_tpl.tr_atten_tpl.num_attention_heads = 1
      transformer_tpl.tr_fflayer_tpl.hidden_dim = 2

      params = mt_layers.TransformerStack.Params().Set(
          name='transformer',
          model_dim=model_dim,
          num_transformer_layers=num_transformer_layers,
          transformer_tpl=[
              transformer_tpl.Copy() for _ in range(num_transformer_layers)
          ],
          random_seed=123456)

      xformer = mt_layers.TransformerStack(params)
      input_arr = np.array([
          [[0, 1]] * batch,
          [[1, -1]] * batch,
      ], dtype=int)
      paddings_arr = np.array([[0] * batch, [0] * batch], dtype=int)
      inputs = tf.constant(
          input_arr.tolist(), dtype=py_utils.FPropDtype(params))
      paddings = tf.constant(
          paddings_arr.tolist(), dtype=py_utils.FPropDtype(params))
      output, _, _ = xformer.FProp(xformer.theta, inputs, paddings)

      self.evaluate(tf.global_variables_initializer())
      output = sess.run(output)
      print(repr(output))
      self.assertAllCloseAccordingToType(
          np.array([[[-0.940543, 1.479253]] * batch,
                    [[-0.413938, -2.550903]] * batch]), output)
コード例 #19
0
    def testParalellMultiOutputsLayer(self):
        g = tf.Graph()
        with g.as_default():
            tf.set_random_seed(24332)

            def Merge(xs):
                rets = []
                for x in zip(*xs):
                    if x[0] is None:
                        rets.append(None)
                    else:
                        rets.append(tf.add_n(list(x)))
                return tuple(rets)

            p = layers.ParallelLayer.Params().Set(
                name='parallel',
                merge=Merge,
                sub=[
                    lingvo_layers.ConvLayer.Params().Set(name='p%d' % i,
                                                         filter_shape=(3, 3, 3,
                                                                       5),
                                                         filter_stride=(1, 1),
                                                         batch_norm=False)
                    for i in range(3)
                ])
            l = p.Instantiate()
            x = tf.zeros(shape=[2, 32, 32, 3])
            y0, y1 = l.FPropDefaultTheta(x)
            y_sum = tf.reduce_sum(y0)
            # Ensures the 2nd return value (None) are handled properly.
            self.assertEqual(None, y1)

        with self.session(graph=g) as sess:
            sess.run(tf.global_variables_initializer())
            y_sum_val = sess.run(y_sum)

        self.assertEqual(y_sum_val, 0.)
コード例 #20
0
    def testParallelRepeatLayerLayer(self):
        repeat = 100
        body_p = layers.SequentialLayer.Params().Set(
            name='body',
            sub=[
                layers.LinearLayer.Params().Set(name='ln1',
                                                input_dims=2,
                                                output_dims=4),
                layers.FnLayer.Params().Set(
                    name='relu',
                    fn=tf.nn.relu,
                    fn_meta=lambda x: py_utils.NestedMap(flops=1,
                                                         out_shapes=(x, ))),
                layers.LinearLayer.Params().Set(name='ln2',
                                                input_dims=4,
                                                output_dims=2)
            ])
        with self.session(use_gpu=False, graph=tf.Graph()):
            tf.random.set_seed(24332)
            p = layers.ParallelRepeatLayer.Params().Set(name='moe',
                                                        repeat=repeat,
                                                        body=body_p)
            l = p.Instantiate()
            x = tf.random.normal(shape=[repeat, 2, 2])
            y = l.FPropDefaultTheta(x)
            self.evaluate(tf.global_variables_initializer())
            x_val, y_val, w = self.evaluate([x, y, l.vars])

        np_val = []

        for i in range(repeat):
            # relu(act \dot w_1) \dot w_2
            np_val.append(
                np.dot(np.maximum(0, np.dot(x_val[i], w.body.ln1.w[i])),
                       w.body.ln2.w[i]))
        np_val = np.stack(np_val)
        self.assertAllClose(np_val, y_val)
コード例 #21
0
  def _TestRightContextStackingLayersHelper(self, **kwargs):
    """Applicable only if the layer implements StreamStep() with right context."""
    batch_size, max_seqlen, input_dim = 2, 32, kwargs['input_dim']

    stride = kwargs['stride']
    num_layers = kwargs['num_layers']
    right_context = kwargs.get('right_context', 0)

    assert max_seqlen % stride == 0

    # Prepares inputs.
    inputs, paddings = self._GetInputs(batch_size, max_seqlen, input_dim)

    # Gets params.
    p = self._GetParams(**kwargs)
    ps = [p.Copy().Set(name=f'base{i}') for i in range(num_layers)]

    # Builds graphs.
    layers = [x.Instantiate() for x in ps]
    base_outputs = self._BuildStackingBaseGraph(layers, num_layers, inputs,
                                                paddings)

    outputs = self._BuildStackingStreamGraph(layers, num_layers, inputs,
                                             paddings, stride, right_context)

    init_op = tf.global_variables_initializer()
    with self.session(use_gpu=False) as sess:
      sess.run(init_op)

      expected, actual = sess.run([base_outputs, outputs])
      print(f'expected: {repr(expected)}, {expected.shape}')
      print(f'actual: {repr(actual)}, {actual.shape}')
      print(f'np.sum(np.abs(expected)): {np.sum(np.abs(expected))}')
      print(f'np.sum(np.abs(actual)): {np.sum(np.abs(actual))}')
      self.assertAllClose(expected, actual, atol=5e-5)
      self.assertEqual(
          tuple(expected.shape), (batch_size, max_seqlen, input_dim))
コード例 #22
0
    def _TestHelperWithState(self, params, list_of_batches):
        """Returns the expected outputs for the tests.

    Args:
      params: Babelfish configuration parameters for setting up the
        cumulative_statistics_layer.
      list_of_batches: A list of padded batches of examples.
        The structure is a list of the following: {
        'features': tf.tensor(float32) of shape(len, batch, dim)
        'paddings': tf.tensor(float32) of shape(len, batch) }

    Returns:
      A dictionary containing numpy arrays of the expected test outputs.
      The structure is as follows:
      {
        'features': np.array(float32) of shape(len, batch, dim)
        'paddings': np.array(float32) of shape(len, batch)
      }
    """

        with self.session() as sess:
            tf.random.set_seed(_TF_RANDOM_SEED)
            network = params.Instantiate()

            batch_size = list_of_batches[0].features.shape[1]
            state = network.zero_state(network.theta, batch_size)

            for batch_t in list_of_batches:
                output = network.FProp(network.theta, batch_t, state)
                # Pass the output state over to the next batch as input state.
                state = output.state

            sess.run(
                tf.group(tf.global_variables_initializer(),
                         tf.tables_initializer()))

            return sess.run(output)
コード例 #23
0
ファイル: encoder_test.py プロジェクト: luweishuang/lingvo
  def testForwardPassWithTaskEmb(self):
    with self.session(use_gpu=False):
      bs = 2
      sl = 21
      tf.random.set_seed(8372749040)
      p = self._EncoderParams()
      p.task_emb = p.token_emb.Copy()
      p.task_emb.vocab_size = 4
      mt_enc = encoder.TransformerEncoder(p)
      batch = py_utils.NestedMap()
      batch.ids = tf.constant(
          np.random.randint(low=0, high=63, size=[bs, sl], dtype=np.int32))
      batch.task_ids = tf.constant(
          np.random.randint(low=0, high=3, size=[bs, sl], dtype=np.int32))
      batch.paddings = tf.zeros([bs, sl])

      enc_out = mt_enc.FPropDefaultTheta(batch)
      enc_out_sum = tf.reduce_sum(enc_out.encoded, 0)

      self.evaluate(tf.global_variables_initializer())
      actual_enc_out = enc_out_sum.eval()

      # pyformat: disable
      # pylint: disable=bad-whitespace
      expected_enc_out = [
          [ 1.2796677,  -31.786997, -0.4054339, -32.61311 ,
            42.41403,   11.020338,  54.115948,  -61.322887,
            39.593548,  15.315696,  -20.373957, 1.8548622,
            -17.743631, 3.140956,   30.730812,  41.4348],
          [ -1.0373995, -31.306532, -2.6323462, -32.078648,
            45.80049,   16.409424,  55.00114,   -63.102333,
            40.4261,    14.198621,  -23.027012, 1.0839912,
            -20.739473, 0.7242553,  32.49956,   41.592197]]
      # pylint: enable=bad-whitespace
      # pyformat: enable
      self.assertAllClose(
          expected_enc_out, actual_enc_out, rtol=1e-05, atol=1e-05)
コード例 #24
0
    def testSequentialLayer(self):
        g = tf.Graph()
        with g.as_default():
            tf.set_random_seed(24332)
            p = layers.SequentialLayer.Params().Set(
                name='seq',
                repeat=2,
                sub=[
                    lingvo_layers.FCLayer.Params().Set(name='foo',
                                                       input_dim=32,
                                                       output_dim=8),
                    lingvo_layers.FCLayer.Params().Set(name='bar',
                                                       input_dim=8,
                                                       output_dim=8),
                    lingvo_layers.FCLayer.Params().Set(name='baz',
                                                       input_dim=8,
                                                       output_dim=32),
                    lingvo_layers.DropoutLayer.Params().Set(name='dropout',
                                                            keep_prob=0.5)
                ])
            p.is_eval = True
            l = p.Instantiate()
            x = tf.random_normal(shape=[2, 32])
            y = l.FPropDefaultTheta(x)
            l.vars.Transform(lambda x: x.shape).VLog(0, 'vars: ')

        with self.session(graph=g) as sess:
            sess.run(tf.global_variables_initializer())
            x_val, y_val, w = sess.run([x, y, l.vars])

        act = x_val
        # relu(act \dot w + b)
        for i in range(2):
            act = np.maximum(0, np.dot(act, w.rep[i].foo.w) + w.rep[i].foo.b)
            act = np.maximum(0, np.dot(act, w.rep[i].bar.w) + w.rep[i].bar.b)
            act = np.maximum(0, np.dot(act, w.rep[i].baz.w) + w.rep[i].baz.b)
        self.assertAllClose(act, y_val)
コード例 #25
0
    def testEagerMultiLearnerCheckpointCompatibility(self):
        self.assertTrue(tf.executing_eagerly())
        cfg = model_registry.GetParams('test.LinearModelParams', 'Train')
        mdl = cfg.Instantiate()
        # Disable async checkpointing.
        cfg.task.train.async_checkpointing = False
        cfg.train.async_checkpointing = False
        with py_utils.GradientTape(persistent=True):
            mdl.ConstructFPropBPropGraph()

        eager_v1_logdir = os.path.join(self.get_temp_dir(), 'eager_v1')
        eager_v2_logdir = os.path.join(self.get_temp_dir(), 'eager_v2')
        checkpointer.EagerCheckpointerV1(eager_v1_logdir, mdl).Save(gsteps=0)
        checkpointer.EagerCheckpointerV2(eager_v2_logdir, mdl).Save(gsteps=0)
        eager_v1_keys = _GetCheckpointKeys(
            os.path.join(eager_v1_logdir, 'ckpt_V1', 'ckpt-00000000'))
        eager_v2_keys = _GetCheckpointKeys(
            os.path.join(eager_v2_logdir, 'ckpt_V2', 'ckpt-0'))
        # Expecting two more variables in V2 checkpoints:
        # _CHECKPOINTABLE_OBJECT_GRAPH
        # save_counter
        self.assertEqual(len(eager_v1_keys) + 2, len(eager_v2_keys))  # pylint:disable=g-generic-assert

        py_utils.SetEagerMode(False)
        self.assertFalse(tf.executing_eagerly())
        graph_logdir = os.path.join(self.get_temp_dir(), 'graph')
        os.mkdir(graph_logdir)
        with self.session(graph=tf.Graph()) as sess:
            mdl = cfg.Instantiate()
            for lrn in mdl.GetTask().learners:
                lrn.optimizer.params.clear_variable_scope = False
            mdl.ConstructFPropBPropGraph()
            sess.run(tf.global_variables_initializer())
            checkpointer.Checkpointer(graph_logdir, mdl).Save(sess, gsteps=0)
        graph_keys = _GetCheckpointKeys(
            os.path.join(graph_logdir, 'ckpt-00000000'))
        self.assertEqual(eager_v1_keys, graph_keys)
コード例 #26
0
 def testInference(self):
     with self.session() as sess:
         tf.random.set_seed(1618)
         p = model_registry.GetParams('test.MnistV2', 'Test')
         p.random_seed = 73234288
         p.input.ckpt = self.data_path
         p.task.params_init = py_utils.WeightInit.Uniform(0.1,
                                                          seed=73234288)
         model = p.Instantiate()
         subgraphs = model.GetTask().Inference()
         self.assertCountEqual(['default'], list(subgraphs.keys()))
         fetches, feeds = subgraphs['default']
         self.assertCountEqual(['normalized_image'], list(feeds.keys()))
         self.assertCountEqual(['logits', 'probs', 'prediction'],
                               list(fetches.keys()))
         self.evaluate(tf.global_variables_initializer())
         fetch_results = sess.run(
             fetches,
             {feeds['normalized_image']: np.zeros(p.input.data_shape)})
         self.assertAllEqual([p.task.softmax.num_classes],
                             fetch_results['logits'].shape)
         self.assertAllEqual([p.task.softmax.num_classes],
                             fetch_results['probs'].shape)
         self.assertAllEqual([], fetch_results['prediction'].shape)
コード例 #27
0
  def testGraphLayer(self):
    g = tf.Graph()
    with g.as_default(), self.SetEval(True):
      tf.random.set_seed(24332)

      def _FnMeta(*shapes):
        return py_utils.NestedMap(flops=1, out_shapes=shapes)

      p = layers.GraphLayer.Params().Set(
          name='graph',
          input_endpoints=['x'],
          output_endpoints=['y'],
          sub=[
              ('x.a->y.c',
               layers.FnLayer.Params().Set(fn=lambda x: 2 * x,
                                           fn_meta=_FnMeta)),
              ('x.b->y.d', layers.FnLayer.Params().Set(
                  name='bar', fn=lambda x: x + 2, fn_meta=_FnMeta)),
              ('y.c,y.d->y.e, y.f', layers.FnLayer.Params().Set(
                  name='baz', fn=lambda x, y: (x + y, x - y), fn_meta=_FnMeta)),
          ])
      l = p.Instantiate()
      x = py_utils.NestedMap(a=tf.constant(1.0), b=tf.constant(2.0))
      y = l.FProp(l.theta, x)
      y_shape = l.FPropMeta(
          p, py_utils.Transform(lambda t: tshape.Shape(t.shape),
                                x)).out_shapes[0]
      self.assertDictEqual(
          py_utils.Transform(lambda t: t.shape.as_list(), y),
          py_utils.Transform(lambda t: t.ToTensorShape().as_list(), y_shape))

    with self.session(graph=g):
      self.evaluate(tf.global_variables_initializer())
      y_val = self.evaluate(y)
      print(y_val)
      self.assertEqual(py_utils.NestedMap(c=2.0, d=4.0, e=6.0, f=-2.0), y_val)
コード例 #28
0
 def testRepeatLayerNestedMapFPropInputSignature(self):
   """Tests RepeatLayer having body layer with NestedMap in FProp signature."""
   repeat = 100
   input_dim, output_dim = 2, 2
   # Reference RepeatLayer.
   ref_p = layers.RepeatLayer.Params().Set(
       name='ref_recurrent',
       repeat=repeat,
       body=lingvo_layers.FCLayer.Params().Set(
           input_dim=input_dim, output_dim=output_dim))
   # RepeatLayer with NestedMap in `body` FProp input signature.
   new_p = layers.RepeatLayer.Params().Set(
       name='nested_map_recurrent',
       repeat=repeat,
       body=FCLayerTestNestedMapFPropInput.Params().Set(
           input_dim=input_dim, output_dim=output_dim))
   # Verify FProp output equality for both layers.
   ref_layer = ref_p.Instantiate()
   new_layer = new_p.Instantiate()
   assign_op = [
       tf.assign(dst, src)
       for (src,
            dst) in zip(ref_layer.vars.Flatten(), new_layer.vars.Flatten())
   ]
   with self.session() as sess:
     tf.random.set_seed(24332)
     sess.run(tf.global_variables_initializer())
     sess.run(assign_op)
     inputs = tf.random.normal(shape=[2, 2])
     paddings = tf.zeros((2, 1))
     ref_outputs = ref_layer.FPropDefaultTheta(inputs)
     new_out_nmap = new_layer.FPropDefaultTheta(
         py_utils.NestedMap(features=inputs, paddings=paddings))
     ref_out_vals = sess.run(ref_outputs)
     new_out_vals = sess.run(new_out_nmap.features)
     self.assertAllClose(ref_out_vals, new_out_vals)
コード例 #29
0
    def __init__(self, train_dir, model, train_params=None, save_only=False):
        """Initialize Checkpointer.

    Args:
     train_dir: Training directory for saving checkpoints.
     model: A BaseModel instance or None.
     train_params: If specified, use these training params instead of those
       in the `model`.
     save_only: This checkpointer is only intended for saving checkpoints.
    """
        self._train_dir = train_dir
        self._save_only = save_only

        self._vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
        self._uninitialized_vars = tf.report_uninitialized_variables(
            self._vars)
        self._initialize_vars = tf.global_variables_initializer()

        self._save_path = os.path.join(self._train_dir, 'ckpt')

        if train_params:
            self._train_params = train_params
            self._model = None
        else:
            assert model
            self._train_params = model.params.train
            self._model = model

        if not self._save_only:
            self._params = model.params
            self._model_tasks = model.tasks
            self._model = model

        self._next_checkpoint_seconds = 0
        self._save_interval_seconds = self._train_params.save_interval_seconds
        self._saver = self._GetSaver()
コード例 #30
0
    def testLinearLayer(self):
        g = tf.Graph()
        with g.as_default():
            tf.set_random_seed(24332)
            p = layers.LinearLayer.Params().Set(name='test',
                                                input_dims=10,
                                                output_dims=5)
            l = p.Instantiate()
            xs = []
            ys = []
            for shape in ([2, 10], [2, 3, 10], [2, 3, 5, 10], [2, 3, 5, 7,
                                                               10]):
                x = tf.random_normal(shape=shape)
                y = l.FPropDefaultTheta(x)
                xs += [x]
                ys += [y]

        with self.session(graph=g) as sess:
            sess.run(tf.global_variables_initializer())
            xs_val, ys_val, w_val = sess.run([xs, ys, l.vars])

        self.assertEqual(w_val.w.shape, (10, 5))
        for (xv, yv) in zip(xs_val, ys_val):
            self.assertAllClose(np.matmul(xv, w_val.w), yv)