def _Apply2(proj_layer, opt): inputs1 = np_input1 output1 = proj_layer.FPropDefaultTheta(inputs1, in_padding1) loss2_1 = tf.reduce_sum(output1) var_grads2_1 = py_utils.ComputeGradients(loss2_1, proj_layer.vars) grads2_1 = var_grads2_1.Transform(tuple) inputs1 = np_input2 output1 = proj_layer.FPropDefaultTheta(inputs1, in_padding1) loss2_2 = tf.reduce_sum(output1) var_grads2_2 = py_utils.ComputeGradients(loss2_2, proj_layer.vars) grads2_2 = var_grads2_2.Transform(tuple) with cluster_factory.ForTestingWorker(add_summary=True): _ = opt.Apply(lr, var_grads2_1) # Get `snapshots` of the intermediate variables vars2_intermediate = [v.read_value() for v in proj_layer.vars.Flatten()] tf.assign_add(py_utils.GetOrCreateGlobalStepVar(), 1) with cluster_factory.ForTestingWorker(add_summary=True): _ = opt.Apply(lr, var_grads2_2) vars2_1 = proj_layer.vars.Flatten() return vars2_intermediate, vars2_1, grads2_1, grads2_2
def testBatchParallel(self): # pyformat: disable b = builder.Base.Params() b = b.Instantiate() p = b._BatchParallel( 'bp', b._Seq('main', b._Linear('l', 8, 4), b._PrintShape('debug'), b._Bias('b', 4))) # pyformat: enable g = tf.Graph() with g.as_default(): l = p.Instantiate() x = tf.random.normal(shape=[4, 8]) with cluster_factory.ForTestingWorker(cpus=4, split_size=1): y1 = l.FPropDefaultTheta(x) with cluster_factory.ForTestingWorker(cpus=4, split_size=2): y2 = l.FPropDefaultTheta(x) with cluster_factory.ForTestingWorker(cpus=4, split_size=4): y4 = l.FPropDefaultTheta(x) cfg = tf.config_pb2.ConfigProto() cfg.device_count['CPU'] = 4 with self.session(config=cfg, graph=g) as sess: sess.run(tf.global_variables_initializer()) v1, v2, v4 = sess.run([y1, y2, y4]) self.assertAllClose(v1, v2) self.assertAllClose(v1, v4)
def _buildGraphAndSaver(logdir, keep_latest_n=5, keep_every_n_hours=None, save_async=False): tf.random.set_seed(123) g = tf.Graph() with g.as_default(): p = mnist.LeNet5().Task() p.input = mnist.LeNet5().Train() with cluster_factory.ForTestingWorker(mode='sync', job='controller'): _ = p.Instantiate() gsv = py_utils.GetOrCreateGlobalStepVar() inc = gsv.assign_add(1) variables = tf.all_variables() sanity_checks = [([gsv], saver.InRange(0, 10))] for var in variables: sanity_checks.append(([var], saver.IsFinite())) sav = saver.Saver( logdir, variables, sanity_checks, keep_latest_n=keep_latest_n, keep_every_n_hours=keep_every_n_hours, async_save=save_async) return g, sav, inc
def testMetrics(self, packing_factor, expected_ratio, expected_count): p = input_generator.TextPackedInput.Params() p.flush_every_n = 0 p.repeat_count = -1 p.file_pattern = 'text:' + test_helper.test_src_dir_path( 'tasks/mt/testdata/en_de.text') p.tokenizer = tokenizers.WpmTokenizer.Params().Set( vocab_filepath=test_helper.test_src_dir_path( 'tasks/mt/wpm-ende-2k.voc'), vocab_size=2000) p.source_max_length = 20 p.target_max_length = 20 p.bucket_batch_limit = [8] p.packing_factor = packing_factor with cluster_factory.ForTestingWorker(add_summary=True): with self.session() as sess: inp = p.Instantiate() inp.GetPreprocessedInputBatch() summary_str = sess.run(tf.summary.merge_all(scope='examples')) summary = tf.summary.Summary.FromString(summary_str) self.assertLen(summary.value, 3) self.assertEqual(summary.value[0].tag, 'examples/src_packed_token_ratio') self.assertEqual(summary.value[1].tag, 'examples/tgt_packed_token_ratio') self.assertEqual(summary.value[2].tag, 'examples/num_packed_examples') self.assertAllClose(summary.value[0].simple_value, expected_ratio, atol=0.0001) self.assertAllClose(summary.value[1].simple_value, expected_ratio, atol=0.0001) self.assertEqual(summary.value[2].simple_value, expected_count)
def testLinearRampupExponentialDecayScaledByNumSplitScheduleWarmUpInit( self): p = lr_schedule.LinearRampupExponentialDecayScaledByNumSplitSchedule.Params( ).Set(warmup_init=0, warmup=250000, decay_start=32000000, decay_end=64000000, min=0.5) with self.session(), cluster_factory.ForTestingWorker( mode='sync', job='trainer_client', gpus=8): lrs = p.cls(p) pts = [[i, lrs.Value(i).eval()] for i in range(0, 10000000, 1000000)] self.assertAllClose( pts, [ # Linear increasing from warmup_init=0. [0, 0], [1000000, 4.0], # Constant [2000000, 8.0], [3000000, 8.0], # Exponentially decreasing. [4000000, 8.0], [5000000, 4.0], [6000000, 2.0], [7000000, 1.0], [8000000, 0.5], [9000000, 0.5] ])
def testLinearRampupExponentialDecayScaledByNumSplitScheduleWithNumSplits( self): p = lr_schedule.LinearRampupExponentialDecayScaledByNumSplitSchedule.Params( ).Set(warmup=250000, decay_start=32000000, decay_end=64000000, min=0.5, max=5.0, num_splits=8) # Increases the number of splits to 32. with self.session(), cluster_factory.ForTestingWorker( mode='sync', job='trainer_client', gpus=8, split_size=4): lrs = p.cls(p) pts = [[i, lrs.Value(i).eval()] for i in range(0, 10000000, 1000000)] # Values are copied from # testLinearRampupExponentialDecayScaledByNumSplitScheduleWithCap. self.assertAllClose( pts, [ # Linear increasing. [0, 1.0], [1000000, 4.5], # Constant [2000000, 5.0], [3000000, 5.0], # Exponentially decreasing. [4000000, 5.0], [5000000, 4.0], [6000000, 2.0], [7000000, 1.0], [8000000, 0.5], [9000000, 0.5] ])
def _testDecoderFPropFloatHelper(self, func_inline=False, num_decoder_layers=1, target_seq_len=5, residual_start=0): """Computes decoder from params and computes loss with random inputs.""" cluster = cluster_factory.ForTestingWorker(add_summary=True) config = tf.ConfigProto(graph_options=tf.GraphOptions( optimizer_options=tf.OptimizerOptions( do_function_inlining=func_inline))) with cluster, self.session(use_gpu=False, config=config) as sess: tf.set_random_seed(8372749040) vn_config = py_utils.VariationalNoiseParams(None, False, False) p = self._DecoderParams(vn_config) p.rnn_layers = num_decoder_layers p.residual_start = residual_start p.target_seq_len = target_seq_len dec = p.Instantiate() src_seq_len = 5 src_enc = tf.random_normal([src_seq_len, 2, 8], seed=9283748) src_enc_padding = tf.constant( [[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 1.0], [1.0, 1.0]], dtype=tf.float32) encoder_outputs = py_utils.NestedMap(encoded=src_enc, padding=src_enc_padding) target_ids = tf.transpose( tf.constant([[0, 1, 2, 3], [1, 2, 3, 4], [10, 11, 12, 15], [5, 6, 7, 8], [10, 5, 2, 5]], dtype=tf.int32)) target_labels = tf.transpose( tf.constant([[0, 1, 2, 3], [1, 2, 3, 4], [10, 11, 12, 13], [5, 7, 8, 10], [10, 5, 2, 4]], dtype=tf.int32)) target_paddings = tf.transpose( tf.constant([[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 1, 0], [0, 1, 0, 0], [1, 1, 1, 1]], dtype=tf.float32)) target_transcripts = tf.constant( ['abcd', 'bcde', 'klmp', 'fghi', 'kfcf']) target_weights = 1.0 - target_paddings targets = py_utils.NestedMap({ 'ids': target_ids, 'labels': target_labels, 'weights': target_weights, 'paddings': target_paddings, 'transcripts': target_transcripts, }) metrics = dec.FPropDefaultTheta(encoder_outputs, targets).metrics loss = metrics['loss'][0] correct_predicts = metrics['fraction_of_correct_next_step_preds'][ 0] summaries = tf.summary.merge( tf.get_collection(tf.GraphKeys.SUMMARIES)) tf.global_variables_initializer().run() loss_v, _ = sess.run([loss, correct_predicts]) summaries.eval() return loss_v
def testLinearRampupExponentialDecayScaledByNumSplitScheduleNoWarmUp(self): p = schedule.LinearRampupExponentialDecayScaledByNumSplitSchedule.Params( ).Set(warmup=0, decay_start=32000000, decay_end=64000000, min=0.5) with self.session(), cluster_factory.ForTestingWorker( mode='sync', job='trainer_client', gpus=8): lrs = p.Instantiate() pts = [] for step in range(0, 10000000, 1000000): with py_utils.GlobalStepContext(step): pts.append([step, lrs.Value().eval()]) self.assertAllClose( pts, [ # Constant [0, 8.0], [1000000, 8.0], [2000000, 8.0], [3000000, 8.0], # Exponentially decreasing. [4000000, 8.0], [5000000, 4.0], [6000000, 2.0], [7000000, 1.0], [8000000, 0.5], [9000000, 0.5] ])
def testScaleSplitToInfeedGPU(self, use_per_host_infeed, split_size): with cluster_factory.ForTestingWorker( gpus=128, split_size=split_size) as cluster: num_splits = 128 // split_size self.assertEqual(cluster.num_splits_per_client, num_splits) self.assertEqual( batch_utils.scale_split_to_infeed(1024, use_per_host_infeed), 1024 * num_splits)
def testScaleInfeedToGlobalTPU(self, use_per_host_infeed, num_tpu_hosts): with flagsaver.flagsaver(xla_device='tpu', enable_asserts=False): with cluster_factory.ForTestingWorker(tpus=128, num_tpu_hosts=num_tpu_hosts): num_infeeds = num_tpu_hosts if use_per_host_infeed else 1 self.assertEqual( batch_utils.scale_infeed_to_global(1024, use_per_host_infeed), 1024 * num_infeeds)
def testBatchSizePerHostInfeed(self): with cluster_factory.ForTestingWorker(tpus=128, num_tpu_hosts=8): p = base_input_generator.BaseInputGenerator.Params() p.batch_size = 16 p.use_per_host_infeed = True input_generator = p.Instantiate() self.assertEqual(256, input_generator.InfeedBatchSize()) self.assertEqual(2048, input_generator.GlobalBatchSize())
def testLinearRampupSqrtDecayByBatchSizeAndReplicasSchedule(self): p = schedule.LinearRampupSqrtDecayByBatchSizeAndReplicas.Params().Set( warmup_examples=100000, batch_size=100) with self.session(), cluster_factory.ForTestingWorker( mode='sync', job='trainer_client', gpus=10): lrs = p.Instantiate() self.assertAllClose(lrs.Value(-1).eval(), 0.0) self.assertAllClose(lrs.Value(49).eval(), 0.05) self.assertAllClose(lrs.Value(99).eval(), 0.1) self.assertAllClose(lrs.Value(399).eval(), 0.05) self.assertAllClose(lrs.Value(1599).eval(), 0.025)
def testScaleSplitToInfeedTPU(self, use_per_host_infeed, split_size, num_tpu_hosts): with cluster_factory.ForTestingWorker( tpus=128, split_size=split_size, num_tpu_hosts=num_tpu_hosts) as cluster: num_splits = 128 // split_size num_infeeds = num_tpu_hosts if use_per_host_infeed else 1 self.assertEqual(cluster.num_splits_per_client, num_splits) self.assertEqual( batch_utils.scale_split_to_infeed(1024, use_per_host_infeed), 1024 * num_splits // num_infeeds)
def testPostProcessLogUtf8(self): p = self._testParams() p.decoder_metrics.log_utf8 = True mdl = p.Instantiate() fake_dec_out = { 'utt_id': ['utt1', 'utt2'], 'transcripts': ['あいうえ'.encode('utf-8'), 'あ'.encode('utf-8')], 'topk_decoded': [ ['あいうえ'.encode('utf-8'), 'あいう'.encode('utf-8')], ['wrong'.encode('utf-8'), ''.encode('utf-8')], ], 'topk_scores': [[1.0, 0.9], [1.0, 0.9]], 'topk_ids': [[1, 2, 3, 4], [2, 3, 4, 5], [3, 4, 5, 6], [4, 5, 6, 7]], 'topk_lens': [2, 4, 4, 2], 'target_labels': [[1, 2, 3, 4], [2, 3, 4, 5]], 'target_paddings': [[0, 0, 0, 1], [0, 0, 0, 1]], 'norm_wer_errors': [[0, 0], [1, 1]], 'norm_wer_words': [[4, 4], [1, 1]], } fake_dec_out = {k: np.array(v) for k, v in fake_dec_out.items()} metrics_dict = mdl.CreateDecoderMetrics() with cluster_factory.ForTestingWorker(add_summary=True): with mock.patch.object(tf.logging, 'info', autospec=True) as mock_info: mdl.PostProcessDecodeOut(fake_dec_out, metrics_dict) mock_info.assert_has_calls([ mock.call('utt_id: %s', 'utt1'), mock.call(' ref_str: %s', 'あいうえ'), mock.call(' ref_ids: %s', [1, 2, 3]), ]) mock_info.assert_has_calls([ # Skips np.array values for ValueError from `inspect` module. # mock.call(' top_hyp_ids: %s', np.array([1, 2])), mock.call(' %f: %s', 1.0, 'あいうえ'), mock.call(' ins: %d, subs: %d, del: %d, total: %d', 0, 0, 0, 0), mock.call(' %f: %s', 0.9, 'あいう'), mock.call(' ins: %d, subs: %d, del: %d, total: %d', 0, 1, 0, 1), mock.call('utt_id: %s', 'utt2'), mock.call(' ref_str: %s', 'あ'), mock.call(' ref_ids: %s', [2, 3, 4]), ]) mock_info.assert_has_calls([ # Skips np.array values for ValueError from `inspect` module. # mock.call(' top_hyp_ids: %s', np.array([3, 4, 5, 6])), mock.call(' %f: %s', 1.0, 'wrong'), mock.call(' ins: %d, subs: %d, del: %d, total: %d', 0, 1, 0, 1), mock.call(' %f: %s', 0.9, ''), mock.call(' ins: %d, subs: %d, del: %d, total: %d', 0, 0, 1, 1) ])
def testBatchSizeInInputGenerator(self): with self.session() as sess: tf.set_random_seed(_TF_RANDOM_SEED) p = self._testParams() with cluster_factory.ForTestingWorker( mode='sync', job='trainer_client', gpus=5): mdl = p.cls(p) mdl.FPropDefaultTheta() loss = mdl.loss tf.global_variables_initializer().run() _ = sess.run(loss) self.assertEqual(mdl.input_generator.scaled_bucket_batch_limit, [20, 40])
def testBatchSizeInInputGenerator(self): with self.session(): tf.random.set_seed(_TF_RANDOM_SEED) p = self._testParams() with cluster_factory.ForTestingWorker( mode='sync', job='trainer_client', gpus=5): mdl = p.Instantiate() mdl.FPropDefaultTheta() loss = mdl.loss self.evaluate(tf.global_variables_initializer()) _ = self.evaluate(loss) self.assertEqual(mdl.input_generator.infeed_bucket_batch_limit, [40])
def Run(num_splits): with self.session(use_gpu=False, graph=tf.Graph()) as sess: tf.set_random_seed(93820981) p = self._testParams() p.input.bucket_batch_limit = [ b * 2 / num_splits for b in p.input.bucket_batch_limit ] with cluster_factory.ForTestingWorker(gpus=num_splits): mdl = p.cls(p) metrics = mdl.FPropDefaultTheta()[0] tf.global_variables_initializer().run() return sess.run(metrics['loss'])
def Run(num_splits): p = self._testParams() with self.session(use_gpu=False, graph=tf.Graph()): tf.random.set_seed(93820981) p.input.cur_iter_in_seed = False p.input.bucket_batch_limit = [ b * 2 / num_splits for b in p.input.bucket_batch_limit ] with cluster_factory.ForTestingWorker(gpus=num_splits, do_eval=True): mdl = p.Instantiate() metrics = mdl.FPropDefaultTheta()[0] self.evaluate(tf.global_variables_initializer()) return self.evaluate(metrics['loss'])
def testCreateTpuEnqueueOpsPerHostInfeed(self): class FooInputGenerator(base_input_generator.BaseInputGenerator): def _InputBatch(self): return py_utils.NestedMap( inp=tf.constant(1.0, shape=[128, 3], dtype=tf.float32)) with cluster_factory.ForTestingWorker(tpus=128, num_tpu_hosts=16): with self._DeviceAssignment(): p = FooInputGenerator.Params() p.use_per_host_infeed = True input_generator = p.Instantiate() input_generator.CreateTpuEnqueueOps() batch = input_generator.TpuDequeueBatch() self.assertEqual(batch.inp.shape.as_list(), [16, 3])
def testLinearRampupPiecewiseConstantSchedule(self): p = lr_schedule.LinearRampupPiecewiseConstantSchedule.Params().Set( boundaries=[40, 64, 80, 96], lrs=[1.0, 0.1, 0.01, 0.001], ) with self.session(), cluster_factory.ForTestingWorker( mode='sync', job='trainer_client', tpus=8): lrs = p.cls(p) pts = [[i, lrs.Value(i).eval()] for i in range(0, 15, 1)] self.assertAllClose( pts, [[0, 0.0], [1, 1.6], [2, 3.2], [3, 4.8], [4, 6.4], [5, 8.0], [6, 8.0], [7, 8.0], [8, 8.], [9, 0.8], [10, 0.8], [11, 0.08], [12, 0.08], [13, 0.008], [14, 0.008]])
def testStatsCounter(self): with self.session() as sess: with cluster_factory.ForTestingWorker(add_summary=True): foo = summary_utils.StatsCounter('foo') val = foo.Value() inc = foo.IncBy(100) tf.global_variables_initializer().run() self.assertAllEqual(0, val.eval()) self.assertAllEqual(100, sess.run(inc)) self.assertAllEqual(100, val.eval()) self.assertAllEqual([100, 200], sess.run([val, inc])) self.assertAllEqual([200, 300], sess.run([val, inc])) summary = tf.Summary.FromString(sess.run(tf.summary.merge_all())) self.assertTrue(any('foo' in v.tag for v in summary.value))
def testMnistLeNet5(self): g = tf.Graph() with g.as_default(): tf.set_random_seed(1618) p = model_registry.GetParams('image.mnist.LeNet5', 'Test') p.random_seed = 73234288 p.input.ckpt = self.data_path p.task.params_init = py_utils.WeightInit.Uniform(0.1, seed=73234288) with cluster_factory.ForTestingWorker(mode='sync', job='trainer_client'): model = p.cls(p) model.ConstructFPropBPropGraph() with self.session(graph=g) as sess: sess.run(tf.global_variables_initializer()) CompareToGoldenSingleFloat(self, 2.302583, self._runOneStep(model, sess)) CompareToGoldenSingleFloat(self, 2.302405, self._runOneStep(model, sess))
def testMnistV2(self): g = tf.Graph() with g.as_default(): tf.random.set_seed(1618) p = model_registry.GetParams('test.MnistV2', 'Test') p.random_seed = 73234288 p.input.ckpt = self.data_path p.task.params_init = py_utils.WeightInit.Uniform(0.1, seed=73234288) with cluster_factory.ForTestingWorker(mode='sync', job='trainer_client'): model = p.Instantiate() model.ConstructFPropBPropGraph() with self.session(graph=g): self.evaluate(tf.global_variables_initializer()) CompareToGoldenSingleFloat(self, 2.303070, self._runOneStep(model)) CompareToGoldenSingleFloat(self, 2.297364, self._runOneStep(model))
def testLinearRampupPiecewiseConstantSchedule(self): p = schedule.LinearRampupPiecewiseConstantSchedule.Params().Set( boundaries=[40, 64, 80, 96], lrs=[1.0, 0.1, 0.01, 0.001], ) with self.session(), cluster_factory.ForTestingWorker( mode='sync', job='trainer_client', tpus=8): lrs = p.Instantiate() pts = [] for step in range(0, 15, 1): with py_utils.GlobalStepContext(step): pts.append([step, lrs.Value().eval()]) self.assertAllClose( pts, [[0, 0.0], [1, 1.6], [2, 3.2], [3, 4.8], [4, 6.4], [5, 8.0], [6, 8.0], [7, 8.0], [8, 0.8], [9, 0.8], [10, 0.08], [11, 0.08], [12, 0.008], [13, 0.008], [14, 0.008]])
def testLinearRampupExponentialDecayScaledByNumSplitScheduleExpOnly(self): p = schedule.LinearRampupExponentialDecayScaledByNumSplitSchedule.Params( ).Set(warmup=0, decay_start=0, decay_end=32000000, min=0.5) with self.session(), cluster_factory.ForTestingWorker( mode='sync', job='trainer_client', gpus=8): lrs = p.Instantiate() pts = [[i, lrs.Value(i).eval()] for i in range(0, 6000000, 1000000)] self.assertAllClose( pts, [ # Exponentially decreasing. [0, 8.0], [1000000, 4.0], [2000000, 2.0], [3000000, 1.0], [4000000, 0.5], [5000000, 0.5] ])
def testOverrideVarsFromCheckpointWithIgnoreRules(self): with self.session(use_gpu=False) as sess: tf.set_random_seed(8372749040) cfg = model_registry.GetParams('image.mnist.LeNet5', 'Train') with cluster_factory.ForTestingWorker(mode='sync', job='trainer_client'): cfg.cls(cfg) tf.global_variables_initializer().run() self.assertAllClose( # These are initialized values before overriding with checkpoint. self._GetLeNetVarsFirstVal(sess), [-0.005945, -0.036722, 0.0]) checkpoint_path = test_helper.test_src_dir_path( 'core/testdata/lenet_test_model') variable_loading_rules = [('lenet5/conv0/w/var', 'lenet5/conv0/w/var'), ('lenet5/conv1/w/var', 'lenet5/conv1/w/var')] variable_ignore_rules = ['lenet5/conv1/w/var'] py_utils._OverrideVarsFromCheckpoint( sess, tf.all_variables(), checkpoint_path, variable_loading_rules, variable_ignore_rules) self.assertAllClose( # Now only conv0 weights have been overridden. self._GetLeNetVarsFirstVal(sess), [0.043092, -0.036722, 0.0])
def testRepeatLayerUnrolledEval(self): repeat = 100 with cluster_factory.ForTestingWorker( mode='sync', job='trainer_client', do_eval=True): tf.random.set_seed(24332) p = layers.RepeatLayer.Params().Set( name='recurrent', repeat=repeat, per_layer_vars=True, unrolled_in_eval=True, body=lingvo_layers.FCLayer.Params().Set(input_dim=2, output_dim=2)) l = p.Instantiate() x = tf.random.normal(shape=[2, 2]) y = l.FPropDefaultTheta(x) self.evaluate(tf.global_variables_initializer()) x_val, y_val, w = self.evaluate([x, y, l.vars]) np_val = x_val # relu(act \dot w + b) for i in range(repeat): body_i = w['body_iter_%05d' % i] np_val = np.maximum(0, np.dot(np_val, body_i.w) + body_i.b) self.assertAllClose(np_val, y_val)
def testBasic(self): logdir = tempfile.mkdtemp() # Create a dummy file that looks like a checkpoint that shouldn't # be touched. with tf.io.gfile.GFile(logdir + '/ckpt-foo', 'w') as f: f.write('contents') g = tf.Graph() with g.as_default(): p = mnist.LeNet5().Task() p.input = mnist.LeNet5().Train() with cluster_factory.ForTestingWorker(mode='sync', job='controller'): _ = p.Instantiate() gsv = py_utils.GetOrCreateGlobalStepVar() inc = gsv.assign_add(1) variables = tf.all_variables() sanity_checks = [([gsv], saver.InRange(0, 10))] for var in variables: sanity_checks.append(([var], saver.IsFinite())) sav = saver.Saver(logdir, variables, sanity_checks, keep_latest_n=5, keep_every_n_hours=1e-9) with self.session(graph=g) as sess: # Creates a few checkpoints. sess.run(tf.global_variables_initializer()) for _ in range(10): sess.run(inc) _ = sav.Save(sess) # Restore to the latest. sess.run(tf.global_variables_initializer()) _ = sav.Restore(sess) # Restore to a specific checkpoint. sess.run(tf.global_variables_initializer()) _ = sav.Restore(sess, 6) # Increments global_step out of range, Save() fails. for _ in range(5): sess.run(inc) with self.assertRaises(tf.errors.AbortedError): _ = sav.Save(sess) filenames = tf.io.gfile.glob('{}/*'.format(logdir)) filenames = [x[len(logdir) + 1:] for x in filenames] print('\n'.join(filenames)) self.assertIn('checkpoint', filenames) meta_files = [] for f in filenames: if f.endswith('.meta'): meta_files.append(f) # A .meta for each checkpoint. self.assertEqual(len(meta_files), 6) # 1 for checkpoint. 3 files per checkpoint. 5 good checkpoints, 1 bad. # 1 extra file contains the error message, and 1 dummy file self.assertEqual(len(filenames), 1 + (5 + 1) * 3 + 1 + 1)
def testAccumulator(self): # testAccumulator compares # - explicit averaging of independently computed var_grads1 and # var_grads2, # - Accumulator(SGD) optimizer effectively doing this over 2 steps. np.random.seed(12345) np_input1 = np.random.normal(0.1, 0.5, [2, 4, 3]) np.random.seed(12346) np_input2 = np.random.normal(0.1, 0.5, [2, 4, 3]) with self.session(use_gpu=True, graph=tf.Graph()) as sess: tf.random.set_seed(123456) params = layers.ProjectionLayer.Params() params.name = 'proj' params.dtype = tf.float64 params.input_dim = 3 params.output_dim = 2 params.params_init = py_utils.WeightInit.Gaussian(0.01, 123456) params.batch_norm = False proj_layer = layers.ProjectionLayer(params) inputs1 = tf.placeholder(shape=[2, 4, 3], dtype=tf.float64) in_padding1 = tf.zeros([2, 4, 1], dtype=tf.float64) inputs2 = tf.placeholder(shape=[2, 4, 3], dtype=tf.float64) in_padding2 = tf.zeros([2, 4, 1], dtype=tf.float64) output1 = proj_layer.FPropDefaultTheta(inputs1, in_padding1) output2 = proj_layer.FPropDefaultTheta(inputs2, in_padding2) loss1 = tf.reduce_sum(output1) loss2 = tf.reduce_sum(output2) var_grads1 = py_utils.ComputeGradients(loss1, proj_layer.vars) var_grads2 = py_utils.ComputeGradients(loss2, proj_layer.vars) op = optimizer.SGD.Params() opt = op.Instantiate() lr = 1e-1 with tf.control_dependencies([loss1, loss2]): var_update_op1 = opt.Apply( lr, py_utils.ApplyGradMultiplier(var_grads1, 1. / 2.)) with tf.control_dependencies([var_update_op1]): var_update_op2 = opt.Apply( lr, py_utils.ApplyGradMultiplier(var_grads2, 1. / 2.)) self.evaluate(tf.global_variables_initializer()) vars1 = self.evaluate(proj_layer.vars.Flatten()) loss1_1, grads1_1, loss1_2, grads1_2 = sess.run( [ loss1, var_grads1.Transform(tuple), loss2, var_grads2.Transform(tuple) ], feed_dict={ inputs1: np_input1, inputs2: np_input2, }, ) sess.run([var_update_op2], feed_dict={ inputs1: np_input1, inputs2: np_input2, }) vars1_1 = self.evaluate(proj_layer.vars.Flatten()) with self.session(use_gpu=True, graph=tf.Graph()) as sess: tf.random.set_seed(123456) params = layers.ProjectionLayer.Params() params.name = 'proj' params.dtype = tf.float64 params.input_dim = 3 params.output_dim = 2 params.params_init = py_utils.WeightInit.Gaussian(0.01, 123456) params.batch_norm = False proj_layer = layers.ProjectionLayer(params) in_padding1 = tf.zeros([2, 4, 1], dtype=tf.float64) inputs1 = tf.placeholder(shape=[2, 4, 3], dtype=tf.float64) output1 = proj_layer.FPropDefaultTheta(inputs1, in_padding1) loss = tf.reduce_sum(output1) var_grads = py_utils.ComputeGradients(loss, proj_layer.vars) op = optimizer.Accumulator.Params().Set( accum_steps=2, dtype=tf.float64, optimizer_tpl=optimizer.SGD.Params()) opt = op.Instantiate() lr = 1e-1 with cluster_factory.ForTestingWorker(add_summary=True): var_update_op = opt.Apply(lr, var_grads) increment_global_step_op = tf.assign_add( py_utils.GetOrCreateGlobalStepVar(), 1) self.evaluate(tf.global_variables_initializer()) vars2 = self.evaluate(proj_layer.vars.Flatten()) loss2_1, grads2_1 = sess.run( [loss, var_grads.Transform(tuple)], feed_dict={ inputs1: np_input1, }) loss2_2, grads2_2 = sess.run( [loss, var_grads.Transform(tuple)], feed_dict={ inputs1: np_input2, }) acc_0 = self.evaluate([ v for v in tf.global_variables() if 'grad_accumulator' in v.name ])[0] sess.run([var_update_op], feed_dict={ inputs1: np_input1, }) acc_1 = self.evaluate([ v for v in tf.global_variables() if 'grad_accumulator' in v.name ])[0] vars2_intermediate = self.evaluate(proj_layer.vars.Flatten()) self.evaluate(increment_global_step_op) sess.run([var_update_op], feed_dict={ inputs1: np_input2, }) acc_2 = self.evaluate([ v for v in tf.global_variables() if 'grad_accumulator' in v.name ])[0] vars2_1 = self.evaluate(proj_layer.vars.Flatten()) summary = tf.Summary.FromString( self.evaluate(tf.summary.merge_all())) tf.logging.info(f'summary: {summary}') self.assertEqual(summary.value[0].tag, 'sgd_lr') self.assertAllClose(vars1, vars2) self.assertAllClose(acc_0, np.zeros_like(acc_0)) self.assertAllClose(acc_1, grads2_1['w'][1]) self.assertAllClose(acc_2, np.zeros_like(acc_0)) self.assertAllClose(loss1_1, loss2_1) self.assertAllClose(loss1_2, loss2_2) self.assertAllClose(grads1_1, grads2_1) self.assertAllClose(grads1_2, grads2_2) self.assertAllClose(vars1, vars2_intermediate) self.assertAllClose(vars2[0], grads2_1['w'][0]) self.assertAllClose(vars2[0], grads2_2['w'][0]) self.assertAllClose( vars1[0] - 0.5 * lr * (grads1_1['w'][1] + grads1_2['w'][1]), vars1_1[0]) self.assertAllClose( vars2[0] - 0.5 * lr * (grads2_1['w'][1] + grads2_2['w'][1]), vars2_1[0]) self.assertAllClose(vars2, vars2_intermediate) self.assertAllClose(vars1_1, vars2_1)
def testScaleInfeedToGlobalGPU(self, use_per_host_infeed): with cluster_factory.ForTestingWorker(gpus=128): self.assertEqual( batch_utils.scale_infeed_to_global(1024, use_per_host_infeed), 1024)