def _Apply2(proj_layer, opt):
      inputs1 = np_input1
      output1 = proj_layer.FPropDefaultTheta(inputs1, in_padding1)
      loss2_1 = tf.reduce_sum(output1)
      var_grads2_1 = py_utils.ComputeGradients(loss2_1, proj_layer.vars)
      grads2_1 = var_grads2_1.Transform(tuple)

      inputs1 = np_input2
      output1 = proj_layer.FPropDefaultTheta(inputs1, in_padding1)
      loss2_2 = tf.reduce_sum(output1)
      var_grads2_2 = py_utils.ComputeGradients(loss2_2, proj_layer.vars)
      grads2_2 = var_grads2_2.Transform(tuple)

      with cluster_factory.ForTestingWorker(add_summary=True):
        _ = opt.Apply(lr, var_grads2_1)

      # Get `snapshots` of the intermediate variables
      vars2_intermediate = [v.read_value() for v in proj_layer.vars.Flatten()]
      tf.assign_add(py_utils.GetOrCreateGlobalStepVar(), 1)

      with cluster_factory.ForTestingWorker(add_summary=True):
        _ = opt.Apply(lr, var_grads2_2)

      vars2_1 = proj_layer.vars.Flatten()

      return vars2_intermediate, vars2_1, grads2_1, grads2_2
Example #2
0
    def testBatchParallel(self):
        # pyformat: disable
        b = builder.Base.Params()
        b = b.Instantiate()
        p = b._BatchParallel(
            'bp',
            b._Seq('main', b._Linear('l', 8, 4), b._PrintShape('debug'),
                   b._Bias('b', 4)))
        # pyformat: enable
        g = tf.Graph()
        with g.as_default():
            l = p.Instantiate()
            x = tf.random.normal(shape=[4, 8])
            with cluster_factory.ForTestingWorker(cpus=4, split_size=1):
                y1 = l.FPropDefaultTheta(x)
            with cluster_factory.ForTestingWorker(cpus=4, split_size=2):
                y2 = l.FPropDefaultTheta(x)
            with cluster_factory.ForTestingWorker(cpus=4, split_size=4):
                y4 = l.FPropDefaultTheta(x)

        cfg = tf.config_pb2.ConfigProto()
        cfg.device_count['CPU'] = 4
        with self.session(config=cfg, graph=g) as sess:
            sess.run(tf.global_variables_initializer())
            v1, v2, v4 = sess.run([y1, y2, y4])

        self.assertAllClose(v1, v2)
        self.assertAllClose(v1, v4)
Example #3
0
 def _buildGraphAndSaver(logdir,
                         keep_latest_n=5,
                         keep_every_n_hours=None,
                         save_async=False):
   tf.random.set_seed(123)
   g = tf.Graph()
   with g.as_default():
     p = mnist.LeNet5().Task()
     p.input = mnist.LeNet5().Train()
     with cluster_factory.ForTestingWorker(mode='sync', job='controller'):
       _ = p.Instantiate()
     gsv = py_utils.GetOrCreateGlobalStepVar()
     inc = gsv.assign_add(1)
     variables = tf.all_variables()
     sanity_checks = [([gsv], saver.InRange(0, 10))]
     for var in variables:
       sanity_checks.append(([var], saver.IsFinite()))
     sav = saver.Saver(
         logdir,
         variables,
         sanity_checks,
         keep_latest_n=keep_latest_n,
         keep_every_n_hours=keep_every_n_hours,
         async_save=save_async)
   return g, sav, inc
Example #4
0
    def testMetrics(self, packing_factor, expected_ratio, expected_count):
        p = input_generator.TextPackedInput.Params()
        p.flush_every_n = 0
        p.repeat_count = -1
        p.file_pattern = 'text:' + test_helper.test_src_dir_path(
            'tasks/mt/testdata/en_de.text')
        p.tokenizer = tokenizers.WpmTokenizer.Params().Set(
            vocab_filepath=test_helper.test_src_dir_path(
                'tasks/mt/wpm-ende-2k.voc'),
            vocab_size=2000)
        p.source_max_length = 20
        p.target_max_length = 20
        p.bucket_batch_limit = [8]
        p.packing_factor = packing_factor
        with cluster_factory.ForTestingWorker(add_summary=True):
            with self.session() as sess:
                inp = p.Instantiate()
                inp.GetPreprocessedInputBatch()
                summary_str = sess.run(tf.summary.merge_all(scope='examples'))
                summary = tf.summary.Summary.FromString(summary_str)

                self.assertLen(summary.value, 3)
                self.assertEqual(summary.value[0].tag,
                                 'examples/src_packed_token_ratio')
                self.assertEqual(summary.value[1].tag,
                                 'examples/tgt_packed_token_ratio')
                self.assertEqual(summary.value[2].tag,
                                 'examples/num_packed_examples')
                self.assertAllClose(summary.value[0].simple_value,
                                    expected_ratio,
                                    atol=0.0001)
                self.assertAllClose(summary.value[1].simple_value,
                                    expected_ratio,
                                    atol=0.0001)
                self.assertEqual(summary.value[2].simple_value, expected_count)
Example #5
0
 def testLinearRampupExponentialDecayScaledByNumSplitScheduleWarmUpInit(
         self):
     p = lr_schedule.LinearRampupExponentialDecayScaledByNumSplitSchedule.Params(
     ).Set(warmup_init=0,
           warmup=250000,
           decay_start=32000000,
           decay_end=64000000,
           min=0.5)
     with self.session(), cluster_factory.ForTestingWorker(
             mode='sync', job='trainer_client', gpus=8):
         lrs = p.cls(p)
         pts = [[i, lrs.Value(i).eval()]
                for i in range(0, 10000000, 1000000)]
         self.assertAllClose(
             pts,
             [
                 # Linear increasing from warmup_init=0.
                 [0, 0],
                 [1000000, 4.0],
                 # Constant
                 [2000000, 8.0],
                 [3000000, 8.0],
                 # Exponentially decreasing.
                 [4000000, 8.0],
                 [5000000, 4.0],
                 [6000000, 2.0],
                 [7000000, 1.0],
                 [8000000, 0.5],
                 [9000000, 0.5]
             ])
Example #6
0
 def testLinearRampupExponentialDecayScaledByNumSplitScheduleWithNumSplits(
         self):
     p = lr_schedule.LinearRampupExponentialDecayScaledByNumSplitSchedule.Params(
     ).Set(warmup=250000,
           decay_start=32000000,
           decay_end=64000000,
           min=0.5,
           max=5.0,
           num_splits=8)
     # Increases the number of splits to 32.
     with self.session(), cluster_factory.ForTestingWorker(
             mode='sync', job='trainer_client', gpus=8, split_size=4):
         lrs = p.cls(p)
         pts = [[i, lrs.Value(i).eval()]
                for i in range(0, 10000000, 1000000)]
         # Values are copied from
         # testLinearRampupExponentialDecayScaledByNumSplitScheduleWithCap.
         self.assertAllClose(
             pts,
             [
                 # Linear increasing.
                 [0, 1.0],
                 [1000000, 4.5],
                 # Constant
                 [2000000, 5.0],
                 [3000000, 5.0],
                 # Exponentially decreasing.
                 [4000000, 5.0],
                 [5000000, 4.0],
                 [6000000, 2.0],
                 [7000000, 1.0],
                 [8000000, 0.5],
                 [9000000, 0.5]
             ])
Example #7
0
    def _testDecoderFPropFloatHelper(self,
                                     func_inline=False,
                                     num_decoder_layers=1,
                                     target_seq_len=5,
                                     residual_start=0):
        """Computes decoder from params and computes loss with random inputs."""
        cluster = cluster_factory.ForTestingWorker(add_summary=True)
        config = tf.ConfigProto(graph_options=tf.GraphOptions(
            optimizer_options=tf.OptimizerOptions(
                do_function_inlining=func_inline)))
        with cluster, self.session(use_gpu=False, config=config) as sess:
            tf.set_random_seed(8372749040)
            vn_config = py_utils.VariationalNoiseParams(None, False, False)
            p = self._DecoderParams(vn_config)
            p.rnn_layers = num_decoder_layers
            p.residual_start = residual_start
            p.target_seq_len = target_seq_len
            dec = p.Instantiate()
            src_seq_len = 5
            src_enc = tf.random_normal([src_seq_len, 2, 8], seed=9283748)
            src_enc_padding = tf.constant(
                [[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 1.0], [1.0, 1.0]],
                dtype=tf.float32)
            encoder_outputs = py_utils.NestedMap(encoded=src_enc,
                                                 padding=src_enc_padding)
            target_ids = tf.transpose(
                tf.constant([[0, 1, 2, 3], [1, 2, 3, 4], [10, 11, 12, 15],
                             [5, 6, 7, 8], [10, 5, 2, 5]],
                            dtype=tf.int32))
            target_labels = tf.transpose(
                tf.constant([[0, 1, 2, 3], [1, 2, 3, 4], [10, 11, 12, 13],
                             [5, 7, 8, 10], [10, 5, 2, 4]],
                            dtype=tf.int32))
            target_paddings = tf.transpose(
                tf.constant([[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 1, 0],
                             [0, 1, 0, 0], [1, 1, 1, 1]],
                            dtype=tf.float32))
            target_transcripts = tf.constant(
                ['abcd', 'bcde', 'klmp', 'fghi', 'kfcf'])
            target_weights = 1.0 - target_paddings
            targets = py_utils.NestedMap({
                'ids': target_ids,
                'labels': target_labels,
                'weights': target_weights,
                'paddings': target_paddings,
                'transcripts': target_transcripts,
            })
            metrics = dec.FPropDefaultTheta(encoder_outputs, targets).metrics
            loss = metrics['loss'][0]
            correct_predicts = metrics['fraction_of_correct_next_step_preds'][
                0]
            summaries = tf.summary.merge(
                tf.get_collection(tf.GraphKeys.SUMMARIES))

            tf.global_variables_initializer().run()
            loss_v, _ = sess.run([loss, correct_predicts])

            summaries.eval()

            return loss_v
Example #8
0
 def testLinearRampupExponentialDecayScaledByNumSplitScheduleNoWarmUp(self):
     p = schedule.LinearRampupExponentialDecayScaledByNumSplitSchedule.Params(
     ).Set(warmup=0, decay_start=32000000, decay_end=64000000, min=0.5)
     with self.session(), cluster_factory.ForTestingWorker(
             mode='sync', job='trainer_client', gpus=8):
         lrs = p.Instantiate()
         pts = []
         for step in range(0, 10000000, 1000000):
             with py_utils.GlobalStepContext(step):
                 pts.append([step, lrs.Value().eval()])
         self.assertAllClose(
             pts,
             [
                 # Constant
                 [0, 8.0],
                 [1000000, 8.0],
                 [2000000, 8.0],
                 [3000000, 8.0],
                 # Exponentially decreasing.
                 [4000000, 8.0],
                 [5000000, 4.0],
                 [6000000, 2.0],
                 [7000000, 1.0],
                 [8000000, 0.5],
                 [9000000, 0.5]
             ])
Example #9
0
 def testScaleSplitToInfeedGPU(self, use_per_host_infeed, split_size):
     with cluster_factory.ForTestingWorker(
             gpus=128, split_size=split_size) as cluster:
         num_splits = 128 // split_size
         self.assertEqual(cluster.num_splits_per_client, num_splits)
         self.assertEqual(
             batch_utils.scale_split_to_infeed(1024, use_per_host_infeed),
             1024 * num_splits)
Example #10
0
 def testScaleInfeedToGlobalTPU(self, use_per_host_infeed, num_tpu_hosts):
     with flagsaver.flagsaver(xla_device='tpu', enable_asserts=False):
         with cluster_factory.ForTestingWorker(tpus=128,
                                               num_tpu_hosts=num_tpu_hosts):
             num_infeeds = num_tpu_hosts if use_per_host_infeed else 1
             self.assertEqual(
                 batch_utils.scale_infeed_to_global(1024,
                                                    use_per_host_infeed),
                 1024 * num_infeeds)
Example #11
0
    def testBatchSizePerHostInfeed(self):
        with cluster_factory.ForTestingWorker(tpus=128, num_tpu_hosts=8):
            p = base_input_generator.BaseInputGenerator.Params()
            p.batch_size = 16
            p.use_per_host_infeed = True
            input_generator = p.Instantiate()

            self.assertEqual(256, input_generator.InfeedBatchSize())
            self.assertEqual(2048, input_generator.GlobalBatchSize())
Example #12
0
 def testLinearRampupSqrtDecayByBatchSizeAndReplicasSchedule(self):
     p = schedule.LinearRampupSqrtDecayByBatchSizeAndReplicas.Params().Set(
         warmup_examples=100000, batch_size=100)
     with self.session(), cluster_factory.ForTestingWorker(
             mode='sync', job='trainer_client', gpus=10):
         lrs = p.Instantiate()
         self.assertAllClose(lrs.Value(-1).eval(), 0.0)
         self.assertAllClose(lrs.Value(49).eval(), 0.05)
         self.assertAllClose(lrs.Value(99).eval(), 0.1)
         self.assertAllClose(lrs.Value(399).eval(), 0.05)
         self.assertAllClose(lrs.Value(1599).eval(), 0.025)
Example #13
0
 def testScaleSplitToInfeedTPU(self, use_per_host_infeed, split_size,
                               num_tpu_hosts):
     with cluster_factory.ForTestingWorker(
             tpus=128, split_size=split_size,
             num_tpu_hosts=num_tpu_hosts) as cluster:
         num_splits = 128 // split_size
         num_infeeds = num_tpu_hosts if use_per_host_infeed else 1
         self.assertEqual(cluster.num_splits_per_client, num_splits)
         self.assertEqual(
             batch_utils.scale_split_to_infeed(1024, use_per_host_infeed),
             1024 * num_splits // num_infeeds)
Example #14
0
 def testPostProcessLogUtf8(self):
     p = self._testParams()
     p.decoder_metrics.log_utf8 = True
     mdl = p.Instantiate()
     fake_dec_out = {
         'utt_id': ['utt1', 'utt2'],
         'transcripts': ['あいうえ'.encode('utf-8'), 'あ'.encode('utf-8')],
         'topk_decoded': [
             ['あいうえ'.encode('utf-8'), 'あいう'.encode('utf-8')],
             ['wrong'.encode('utf-8'), ''.encode('utf-8')],
         ],
         'topk_scores': [[1.0, 0.9], [1.0, 0.9]],
         'topk_ids': [[1, 2, 3, 4], [2, 3, 4, 5], [3, 4, 5, 6],
                      [4, 5, 6, 7]],
         'topk_lens': [2, 4, 4, 2],
         'target_labels': [[1, 2, 3, 4], [2, 3, 4, 5]],
         'target_paddings': [[0, 0, 0, 1], [0, 0, 0, 1]],
         'norm_wer_errors': [[0, 0], [1, 1]],
         'norm_wer_words': [[4, 4], [1, 1]],
     }
     fake_dec_out = {k: np.array(v) for k, v in fake_dec_out.items()}
     metrics_dict = mdl.CreateDecoderMetrics()
     with cluster_factory.ForTestingWorker(add_summary=True):
         with mock.patch.object(tf.logging, 'info',
                                autospec=True) as mock_info:
             mdl.PostProcessDecodeOut(fake_dec_out, metrics_dict)
             mock_info.assert_has_calls([
                 mock.call('utt_id: %s', 'utt1'),
                 mock.call('  ref_str: %s', 'あいうえ'),
                 mock.call('  ref_ids: %s', [1, 2, 3]),
             ])
             mock_info.assert_has_calls([
                 # Skips np.array values for ValueError from `inspect` module.
                 # mock.call('  top_hyp_ids: %s', np.array([1, 2])),
                 mock.call('  %f: %s', 1.0, 'あいうえ'),
                 mock.call('  ins: %d, subs: %d, del: %d, total: %d', 0, 0,
                           0, 0),
                 mock.call('  %f: %s', 0.9, 'あいう'),
                 mock.call('  ins: %d, subs: %d, del: %d, total: %d', 0, 1,
                           0, 1),
                 mock.call('utt_id: %s', 'utt2'),
                 mock.call('  ref_str: %s', 'あ'),
                 mock.call('  ref_ids: %s', [2, 3, 4]),
             ])
             mock_info.assert_has_calls([
                 # Skips np.array values for ValueError from `inspect` module.
                 # mock.call('  top_hyp_ids: %s', np.array([3, 4, 5, 6])),
                 mock.call('  %f: %s', 1.0, 'wrong'),
                 mock.call('  ins: %d, subs: %d, del: %d, total: %d', 0, 1,
                           0, 1),
                 mock.call('  %f: %s', 0.9, ''),
                 mock.call('  ins: %d, subs: %d, del: %d, total: %d', 0, 0,
                           1, 1)
             ])
Example #15
0
 def testBatchSizeInInputGenerator(self):
   with self.session() as sess:
     tf.set_random_seed(_TF_RANDOM_SEED)
     p = self._testParams()
     with cluster_factory.ForTestingWorker(
         mode='sync', job='trainer_client', gpus=5):
       mdl = p.cls(p)
       mdl.FPropDefaultTheta()
     loss = mdl.loss
     tf.global_variables_initializer().run()
     _ = sess.run(loss)
     self.assertEqual(mdl.input_generator.scaled_bucket_batch_limit, [20, 40])
Example #16
0
 def testBatchSizeInInputGenerator(self):
   with self.session():
     tf.random.set_seed(_TF_RANDOM_SEED)
     p = self._testParams()
     with cluster_factory.ForTestingWorker(
         mode='sync', job='trainer_client', gpus=5):
       mdl = p.Instantiate()
       mdl.FPropDefaultTheta()
       loss = mdl.loss
       self.evaluate(tf.global_variables_initializer())
       _ = self.evaluate(loss)
       self.assertEqual(mdl.input_generator.infeed_bucket_batch_limit, [40])
Example #17
0
 def Run(num_splits):
     with self.session(use_gpu=False, graph=tf.Graph()) as sess:
         tf.set_random_seed(93820981)
         p = self._testParams()
         p.input.bucket_batch_limit = [
             b * 2 / num_splits for b in p.input.bucket_batch_limit
         ]
         with cluster_factory.ForTestingWorker(gpus=num_splits):
             mdl = p.cls(p)
             metrics = mdl.FPropDefaultTheta()[0]
         tf.global_variables_initializer().run()
         return sess.run(metrics['loss'])
Example #18
0
 def Run(num_splits):
   p = self._testParams()
   with self.session(use_gpu=False, graph=tf.Graph()):
     tf.random.set_seed(93820981)
     p.input.cur_iter_in_seed = False
     p.input.bucket_batch_limit = [
         b * 2 / num_splits for b in p.input.bucket_batch_limit
     ]
     with cluster_factory.ForTestingWorker(gpus=num_splits, do_eval=True):
       mdl = p.Instantiate()
       metrics = mdl.FPropDefaultTheta()[0]
     self.evaluate(tf.global_variables_initializer())
     return self.evaluate(metrics['loss'])
Example #19
0
    def testCreateTpuEnqueueOpsPerHostInfeed(self):
        class FooInputGenerator(base_input_generator.BaseInputGenerator):
            def _InputBatch(self):
                return py_utils.NestedMap(
                    inp=tf.constant(1.0, shape=[128, 3], dtype=tf.float32))

        with cluster_factory.ForTestingWorker(tpus=128, num_tpu_hosts=16):
            with self._DeviceAssignment():
                p = FooInputGenerator.Params()
                p.use_per_host_infeed = True
                input_generator = p.Instantiate()
                input_generator.CreateTpuEnqueueOps()
                batch = input_generator.TpuDequeueBatch()
                self.assertEqual(batch.inp.shape.as_list(), [16, 3])
Example #20
0
  def testLinearRampupPiecewiseConstantSchedule(self):
    p = lr_schedule.LinearRampupPiecewiseConstantSchedule.Params().Set(
        boundaries=[40, 64, 80, 96],
        lrs=[1.0, 0.1, 0.01, 0.001],
    )
    with self.session(), cluster_factory.ForTestingWorker(
        mode='sync', job='trainer_client', tpus=8):
      lrs = p.cls(p)
      pts = [[i, lrs.Value(i).eval()] for i in range(0, 15, 1)]

      self.assertAllClose(
          pts, [[0, 0.0], [1, 1.6], [2, 3.2], [3, 4.8], [4, 6.4], [5, 8.0],
                [6, 8.0], [7, 8.0], [8, 8.], [9, 0.8], [10, 0.8], [11, 0.08],
                [12, 0.08], [13, 0.008], [14, 0.008]])
Example #21
0
    def testStatsCounter(self):
        with self.session() as sess:
            with cluster_factory.ForTestingWorker(add_summary=True):
                foo = summary_utils.StatsCounter('foo')
                val = foo.Value()
                inc = foo.IncBy(100)

            tf.global_variables_initializer().run()
            self.assertAllEqual(0, val.eval())
            self.assertAllEqual(100, sess.run(inc))
            self.assertAllEqual(100, val.eval())
            self.assertAllEqual([100, 200], sess.run([val, inc]))
            self.assertAllEqual([200, 300], sess.run([val, inc]))
            summary = tf.Summary.FromString(sess.run(tf.summary.merge_all()))
            self.assertTrue(any('foo' in v.tag for v in summary.value))
Example #22
0
 def testMnistLeNet5(self):
   g = tf.Graph()
   with g.as_default():
     tf.set_random_seed(1618)
     p = model_registry.GetParams('image.mnist.LeNet5', 'Test')
     p.random_seed = 73234288
     p.input.ckpt = self.data_path
     p.task.params_init = py_utils.WeightInit.Uniform(0.1, seed=73234288)
     with cluster_factory.ForTestingWorker(mode='sync', job='trainer_client'):
       model = p.cls(p)
       model.ConstructFPropBPropGraph()
   with self.session(graph=g) as sess:
     sess.run(tf.global_variables_initializer())
     CompareToGoldenSingleFloat(self, 2.302583, self._runOneStep(model, sess))
     CompareToGoldenSingleFloat(self, 2.302405, self._runOneStep(model, sess))
Example #23
0
 def testMnistV2(self):
     g = tf.Graph()
     with g.as_default():
         tf.random.set_seed(1618)
         p = model_registry.GetParams('test.MnistV2', 'Test')
         p.random_seed = 73234288
         p.input.ckpt = self.data_path
         p.task.params_init = py_utils.WeightInit.Uniform(0.1,
                                                          seed=73234288)
         with cluster_factory.ForTestingWorker(mode='sync',
                                               job='trainer_client'):
             model = p.Instantiate()
             model.ConstructFPropBPropGraph()
     with self.session(graph=g):
         self.evaluate(tf.global_variables_initializer())
         CompareToGoldenSingleFloat(self, 2.303070, self._runOneStep(model))
         CompareToGoldenSingleFloat(self, 2.297364, self._runOneStep(model))
Example #24
0
  def testLinearRampupPiecewiseConstantSchedule(self):
    p = schedule.LinearRampupPiecewiseConstantSchedule.Params().Set(
        boundaries=[40, 64, 80, 96],
        lrs=[1.0, 0.1, 0.01, 0.001],
    )
    with self.session(), cluster_factory.ForTestingWorker(
        mode='sync', job='trainer_client', tpus=8):
      lrs = p.Instantiate()
      pts = []
      for step in range(0, 15, 1):
        with py_utils.GlobalStepContext(step):
          pts.append([step, lrs.Value().eval()])

      self.assertAllClose(
          pts, [[0, 0.0], [1, 1.6], [2, 3.2], [3, 4.8], [4, 6.4], [5, 8.0],
                [6, 8.0], [7, 8.0], [8, 0.8], [9, 0.8], [10, 0.08], [11, 0.08],
                [12, 0.008], [13, 0.008], [14, 0.008]])
Example #25
0
 def testLinearRampupExponentialDecayScaledByNumSplitScheduleExpOnly(self):
     p = schedule.LinearRampupExponentialDecayScaledByNumSplitSchedule.Params(
     ).Set(warmup=0, decay_start=0, decay_end=32000000, min=0.5)
     with self.session(), cluster_factory.ForTestingWorker(
             mode='sync', job='trainer_client', gpus=8):
         lrs = p.Instantiate()
         pts = [[i, lrs.Value(i).eval()]
                for i in range(0, 6000000, 1000000)]
         self.assertAllClose(
             pts,
             [
                 # Exponentially decreasing.
                 [0, 8.0],
                 [1000000, 4.0],
                 [2000000, 2.0],
                 [3000000, 1.0],
                 [4000000, 0.5],
                 [5000000, 0.5]
             ])
Example #26
0
  def testOverrideVarsFromCheckpointWithIgnoreRules(self):

    with self.session(use_gpu=False) as sess:
      tf.set_random_seed(8372749040)
      cfg = model_registry.GetParams('image.mnist.LeNet5', 'Train')
      with cluster_factory.ForTestingWorker(mode='sync', job='trainer_client'):
        cfg.cls(cfg)
      tf.global_variables_initializer().run()
      self.assertAllClose(
          # These are initialized values before overriding with checkpoint.
          self._GetLeNetVarsFirstVal(sess),
          [-0.005945, -0.036722, 0.0])
      checkpoint_path = test_helper.test_src_dir_path(
          'core/testdata/lenet_test_model')
      variable_loading_rules = [('lenet5/conv0/w/var', 'lenet5/conv0/w/var'),
                                ('lenet5/conv1/w/var', 'lenet5/conv1/w/var')]
      variable_ignore_rules = ['lenet5/conv1/w/var']
      py_utils._OverrideVarsFromCheckpoint(
          sess, tf.all_variables(), checkpoint_path, variable_loading_rules,
          variable_ignore_rules)
      self.assertAllClose(
          # Now only conv0 weights have been overridden.
          self._GetLeNetVarsFirstVal(sess),
          [0.043092, -0.036722, 0.0])
Example #27
0
  def testRepeatLayerUnrolledEval(self):
    repeat = 100
    with cluster_factory.ForTestingWorker(
        mode='sync', job='trainer_client', do_eval=True):
      tf.random.set_seed(24332)
      p = layers.RepeatLayer.Params().Set(
          name='recurrent',
          repeat=repeat,
          per_layer_vars=True,
          unrolled_in_eval=True,
          body=lingvo_layers.FCLayer.Params().Set(input_dim=2, output_dim=2))
      l = p.Instantiate()
      x = tf.random.normal(shape=[2, 2])
      y = l.FPropDefaultTheta(x)
      self.evaluate(tf.global_variables_initializer())
      x_val, y_val, w = self.evaluate([x, y, l.vars])

    np_val = x_val

    # relu(act \dot w + b)
    for i in range(repeat):
      body_i = w['body_iter_%05d' % i]
      np_val = np.maximum(0, np.dot(np_val, body_i.w) + body_i.b)
    self.assertAllClose(np_val, y_val)
Example #28
0
    def testBasic(self):
        logdir = tempfile.mkdtemp()
        # Create a dummy file that looks like a checkpoint that shouldn't
        # be touched.
        with tf.io.gfile.GFile(logdir + '/ckpt-foo', 'w') as f:
            f.write('contents')

        g = tf.Graph()
        with g.as_default():
            p = mnist.LeNet5().Task()
            p.input = mnist.LeNet5().Train()
            with cluster_factory.ForTestingWorker(mode='sync',
                                                  job='controller'):
                _ = p.Instantiate()
            gsv = py_utils.GetOrCreateGlobalStepVar()
            inc = gsv.assign_add(1)
            variables = tf.all_variables()
            sanity_checks = [([gsv], saver.InRange(0, 10))]
            for var in variables:
                sanity_checks.append(([var], saver.IsFinite()))
            sav = saver.Saver(logdir,
                              variables,
                              sanity_checks,
                              keep_latest_n=5,
                              keep_every_n_hours=1e-9)

        with self.session(graph=g) as sess:
            # Creates a few checkpoints.
            sess.run(tf.global_variables_initializer())
            for _ in range(10):
                sess.run(inc)
                _ = sav.Save(sess)

            # Restore to the latest.
            sess.run(tf.global_variables_initializer())
            _ = sav.Restore(sess)

            # Restore to a specific checkpoint.
            sess.run(tf.global_variables_initializer())
            _ = sav.Restore(sess, 6)

            # Increments global_step out of range, Save() fails.
            for _ in range(5):
                sess.run(inc)
            with self.assertRaises(tf.errors.AbortedError):
                _ = sav.Save(sess)

        filenames = tf.io.gfile.glob('{}/*'.format(logdir))
        filenames = [x[len(logdir) + 1:] for x in filenames]
        print('\n'.join(filenames))
        self.assertIn('checkpoint', filenames)

        meta_files = []
        for f in filenames:
            if f.endswith('.meta'):
                meta_files.append(f)
        # A .meta for each checkpoint.
        self.assertEqual(len(meta_files), 6)

        # 1 for checkpoint. 3 files per checkpoint. 5 good checkpoints, 1 bad.
        # 1 extra file contains the error message, and 1 dummy file
        self.assertEqual(len(filenames), 1 + (5 + 1) * 3 + 1 + 1)
Example #29
0
    def testAccumulator(self):
        # testAccumulator compares
        #   - explicit averaging of independently computed var_grads1 and
        #     var_grads2,
        #   - Accumulator(SGD) optimizer effectively doing this over 2 steps.
        np.random.seed(12345)
        np_input1 = np.random.normal(0.1, 0.5, [2, 4, 3])
        np.random.seed(12346)
        np_input2 = np.random.normal(0.1, 0.5, [2, 4, 3])

        with self.session(use_gpu=True, graph=tf.Graph()) as sess:
            tf.random.set_seed(123456)
            params = layers.ProjectionLayer.Params()
            params.name = 'proj'
            params.dtype = tf.float64
            params.input_dim = 3
            params.output_dim = 2
            params.params_init = py_utils.WeightInit.Gaussian(0.01, 123456)

            params.batch_norm = False
            proj_layer = layers.ProjectionLayer(params)
            inputs1 = tf.placeholder(shape=[2, 4, 3], dtype=tf.float64)
            in_padding1 = tf.zeros([2, 4, 1], dtype=tf.float64)
            inputs2 = tf.placeholder(shape=[2, 4, 3], dtype=tf.float64)
            in_padding2 = tf.zeros([2, 4, 1], dtype=tf.float64)
            output1 = proj_layer.FPropDefaultTheta(inputs1, in_padding1)
            output2 = proj_layer.FPropDefaultTheta(inputs2, in_padding2)
            loss1 = tf.reduce_sum(output1)
            loss2 = tf.reduce_sum(output2)
            var_grads1 = py_utils.ComputeGradients(loss1, proj_layer.vars)
            var_grads2 = py_utils.ComputeGradients(loss2, proj_layer.vars)
            op = optimizer.SGD.Params()
            opt = op.Instantiate()
            lr = 1e-1
            with tf.control_dependencies([loss1, loss2]):
                var_update_op1 = opt.Apply(
                    lr, py_utils.ApplyGradMultiplier(var_grads1, 1. / 2.))
                with tf.control_dependencies([var_update_op1]):
                    var_update_op2 = opt.Apply(
                        lr, py_utils.ApplyGradMultiplier(var_grads2, 1. / 2.))

            self.evaluate(tf.global_variables_initializer())
            vars1 = self.evaluate(proj_layer.vars.Flatten())
            loss1_1, grads1_1, loss1_2, grads1_2 = sess.run(
                [
                    loss1,
                    var_grads1.Transform(tuple), loss2,
                    var_grads2.Transform(tuple)
                ],
                feed_dict={
                    inputs1: np_input1,
                    inputs2: np_input2,
                },
            )
            sess.run([var_update_op2],
                     feed_dict={
                         inputs1: np_input1,
                         inputs2: np_input2,
                     })
            vars1_1 = self.evaluate(proj_layer.vars.Flatten())

        with self.session(use_gpu=True, graph=tf.Graph()) as sess:
            tf.random.set_seed(123456)
            params = layers.ProjectionLayer.Params()
            params.name = 'proj'
            params.dtype = tf.float64
            params.input_dim = 3
            params.output_dim = 2
            params.params_init = py_utils.WeightInit.Gaussian(0.01, 123456)

            params.batch_norm = False
            proj_layer = layers.ProjectionLayer(params)
            in_padding1 = tf.zeros([2, 4, 1], dtype=tf.float64)
            inputs1 = tf.placeholder(shape=[2, 4, 3], dtype=tf.float64)
            output1 = proj_layer.FPropDefaultTheta(inputs1, in_padding1)
            loss = tf.reduce_sum(output1)
            var_grads = py_utils.ComputeGradients(loss, proj_layer.vars)
            op = optimizer.Accumulator.Params().Set(
                accum_steps=2,
                dtype=tf.float64,
                optimizer_tpl=optimizer.SGD.Params())
            opt = op.Instantiate()
            lr = 1e-1
            with cluster_factory.ForTestingWorker(add_summary=True):
                var_update_op = opt.Apply(lr, var_grads)
            increment_global_step_op = tf.assign_add(
                py_utils.GetOrCreateGlobalStepVar(), 1)

            self.evaluate(tf.global_variables_initializer())
            vars2 = self.evaluate(proj_layer.vars.Flatten())
            loss2_1, grads2_1 = sess.run(
                [loss, var_grads.Transform(tuple)],
                feed_dict={
                    inputs1: np_input1,
                })
            loss2_2, grads2_2 = sess.run(
                [loss, var_grads.Transform(tuple)],
                feed_dict={
                    inputs1: np_input2,
                })
            acc_0 = self.evaluate([
                v for v in tf.global_variables()
                if 'grad_accumulator' in v.name
            ])[0]
            sess.run([var_update_op], feed_dict={
                inputs1: np_input1,
            })
            acc_1 = self.evaluate([
                v for v in tf.global_variables()
                if 'grad_accumulator' in v.name
            ])[0]
            vars2_intermediate = self.evaluate(proj_layer.vars.Flatten())
            self.evaluate(increment_global_step_op)
            sess.run([var_update_op], feed_dict={
                inputs1: np_input2,
            })
            acc_2 = self.evaluate([
                v for v in tf.global_variables()
                if 'grad_accumulator' in v.name
            ])[0]
            vars2_1 = self.evaluate(proj_layer.vars.Flatten())

            summary = tf.Summary.FromString(
                self.evaluate(tf.summary.merge_all()))
            tf.logging.info(f'summary: {summary}')
            self.assertEqual(summary.value[0].tag, 'sgd_lr')

        self.assertAllClose(vars1, vars2)

        self.assertAllClose(acc_0, np.zeros_like(acc_0))
        self.assertAllClose(acc_1, grads2_1['w'][1])
        self.assertAllClose(acc_2, np.zeros_like(acc_0))

        self.assertAllClose(loss1_1, loss2_1)
        self.assertAllClose(loss1_2, loss2_2)
        self.assertAllClose(grads1_1, grads2_1)
        self.assertAllClose(grads1_2, grads2_2)

        self.assertAllClose(vars1, vars2_intermediate)

        self.assertAllClose(vars2[0], grads2_1['w'][0])
        self.assertAllClose(vars2[0], grads2_2['w'][0])

        self.assertAllClose(
            vars1[0] - 0.5 * lr * (grads1_1['w'][1] + grads1_2['w'][1]),
            vars1_1[0])

        self.assertAllClose(
            vars2[0] - 0.5 * lr * (grads2_1['w'][1] + grads2_2['w'][1]),
            vars2_1[0])

        self.assertAllClose(vars2, vars2_intermediate)
        self.assertAllClose(vars1_1, vars2_1)
Example #30
0
 def testScaleInfeedToGlobalGPU(self, use_per_host_infeed):
     with cluster_factory.ForTestingWorker(gpus=128):
         self.assertEqual(
             batch_utils.scale_infeed_to_global(1024, use_per_host_infeed),
             1024)