def _DecoderGradientCheckerHelper(self, decoder_cls, feed_att_context_to_softmax=False): with self.session(use_gpu=True, graph=tf.Graph()) as sess: tf.set_random_seed(_TF_RANDOM_SEED) p = self._DecoderParams(dtype=tf.float64, decoder_cls=decoder_cls) p.feed_attention_context_vec_to_softmax = feed_att_context_to_softmax dec = p.Instantiate() encoder_outputs, targets = self._Inputs(dtype=tf.float64) loss, _ = dec.FPropDefaultTheta(encoder_outputs, targets).metrics['loss'] all_vars = tf.trainable_variables() grads = tf.gradients(loss, all_vars) print('num of vars ', len(all_vars)) def DenseGrad(var, grad): if isinstance(grad, tf.Tensor): return grad elif isinstance(grad, tf.IndexedSlices): return tf.unsorted_segment_sum(grad.values, grad.indices, tf.shape(var)[0]) grads = [DenseGrad(x, y) for x, y in zip(all_vars, grads)] tf.global_variables_initializer().run() symbolic_grads = [gd.eval() for gd in grads] numerical_grads = [] for v in all_vars: numerical_grads.append( test_utils.ComputeNumericGradient(sess, loss, v, delta=1e-5)) rets = {} for v, x, y in zip(all_vars, symbolic_grads, numerical_grads): print('symbolic_grads, numerical_grads :', v.name) print(x) print(y) self.assertAllClose(x, y) rets[v.name] = x return rets
def testGraphLayer(self): g = tf.Graph() with g.as_default(), self.SetEval(True): tf.set_random_seed(24332) def _FnMeta(*shapes): return py_utils.NestedMap(flops=1, out_shapes=shapes) p = layers.GraphLayer.Params().Set( name='graph', input_endpoints=['x'], output_endpoints=['y'], sub=[ ('x.a->y.c', layers.FnLayer.Params().Set(fn=lambda x: 2 * x, fn_meta=_FnMeta)), ('x.b->y.d', layers.FnLayer.Params().Set(name='bar', fn=lambda x: x + 2, fn_meta=_FnMeta)), ('y.c,y.d->y.e, y.f', layers.FnLayer.Params().Set(name='baz', fn=lambda x, y: (x + y, x - y), fn_meta=_FnMeta)), ]) l = p.Instantiate() x = py_utils.NestedMap(a=tf.constant(1.0), b=tf.constant(2.0)) y = l.FProp(l.theta, x) y_shape = l.FPropMeta( p, py_utils.Transform(lambda t: tshape.Shape(t.shape), x)).out_shapes[0] self.assertDictEqual( py_utils.Transform(lambda t: t.shape.as_list(), y), py_utils.Transform(lambda t: t.ToTensorShape().as_list(), y_shape)) with self.session(graph=g) as sess: sess.run(tf.global_variables_initializer()) y_val = sess.run(y) print(y_val) self.assertEqual(py_utils.NestedMap(c=2.0, d=4.0, e=6.0, f=-2.0), y_val)
def testDecoderFProp(self): """Create decoder with default params, and verify that FProp runs.""" with self.session(use_gpu=False) as sess: tf.set_random_seed(8372749040) p = self._DecoderParams(vn_config=py_utils.VariationalNoiseParams( None, True, False, seed=12345)) metrics, per_sequence_loss = self._getDecoderFPropMetrics(params=p) self.assertIn('fraction_of_correct_next_step_preds', metrics) tf.global_variables_initializer().run() metrics_val, per_sequence_loss_val = sess.run( [metrics, per_sequence_loss]) tf.logging.info('metrics=%s, per_sequence_loss=%s', metrics_val, per_sequence_loss_val) self.assertEqual(metrics_val['loss'], metrics_val['log_pplx']) # Target batch size is 4. Therefore, we should expect 4 here. self.assertEqual(per_sequence_loss_val.shape, (4, ))
def testFPropEvalMode(self): with self.session() as sess, self.SetEval(True): tf.set_random_seed(_TF_RANDOM_SEED) p = self._testParams() mdl = p.Instantiate() mdl.FPropDefaultTheta() loss = mdl.loss logp = mdl.eval_metrics['log_pplx'][0] tf.global_variables_initializer().run() vals = [] for _ in range(3): vals += [sess.run((loss, logp))] print('actual vals = ', vals) expected_vals = [ [326.765106, 10.373495], [306.018066, 10.373494], [280.08429, 10.373492], ] self.assertAllClose(vals, expected_vals)
def _DecoderFPropHelper(self, decoder_cls, dtype, feed_att_context_to_softmax): with self.session(use_gpu=True): tf.set_random_seed(_TF_RANDOM_SEED) p = self._DecoderParams(dtype=dtype, decoder_cls=decoder_cls) p.feed_attention_context_vec_to_softmax = feed_att_context_to_softmax dec = p.Instantiate() encoder_outputs, targets = self._Inputs(dtype=dtype) loss, _ = dec.FPropDefaultTheta(encoder_outputs, targets).metrics['loss'] tf.global_variables_initializer().run() actual_loss = loss.eval() print('actual loss = ', actual_loss) if p.feed_attention_context_vec_to_softmax: CompareToGoldenSingleFloat(self, 7.618915, actual_loss) else: CompareToGoldenSingleFloat(self, 7.624220, actual_loss)
def testDropout(self): seed = 12345 tf.set_random_seed(seed) np.random.seed(seed) time, batch, dims, hidden_dim, vocab = 5, 3, 6, 4, 8 p = lm_layers.TransformerLm.Params() p.name = 'transformerlm' p.vocab_size = vocab p.emb.vocab_size = vocab p.emb.embedding_dim = dims p.model_dim = dims p.num_trans_layers = 3 p.trans_tpl.source_dim = dims p.trans_tpl.tr_atten_tpl.num_attention_heads = 2 p.trans_tpl.tr_fflayer_tpl.hidden_dim = hidden_dim p.softmax.input_dim = dims p.softmax.num_classes = vocab with self.session(use_gpu=True) as sess: lm = p.Instantiate() inputs = np.random.randint(vocab, size=[time, batch]) targets = np.zeros([time, batch]) targets[:-1] = inputs[1:] inputs = tf.constant(inputs, tf.int32) paddings = np.zeros([time, batch]) paddings[-1] = 1.0 paddings = tf.constant(paddings, tf.float32) targets = tf.constant(targets, tf.int32) sess.run(tf.global_variables_initializer()) xent_output, _ = lm.FPropDefaultTheta( inputs=inputs, paddings=paddings, labels=py_utils.NestedMap( class_weights=1 - paddings, class_ids=targets)) xent_output_val = sess.run(xent_output) print('xent_output_val', xent_output_val) test_utils.CompareToGoldenSingleFloat(self, 3.038596, xent_output_val.avg_xent) # pyformat: disable pylint: disable=line-too-long self.assertAllEqual(xent_output_val.per_example_argmax, np.argmax(xent_output_val.logits, axis=-1))
def testBatchSizeInInputGenerator(self): with self.session() as sess: tf.set_random_seed(_TF_RANDOM_SEED) p = self._testParams() cluster_params = cluster_factory.Cluster.Params() cluster_params.mode = 'sync' cluster_params.job = 'trainer_client' cluster_params.worker.name = '/job:localhost' cluster_params.worker.gpus_per_replica = 5 cluster_params.input.name = '/job:localhost' cluster_params.input.replicas = 1 cluster_params.input.gpus_per_replica = 0 with cluster_params.Instantiate(): mdl = p.Instantiate() mdl.FPropDefaultTheta() loss = mdl.loss tf.global_variables_initializer().run() _ = sess.run(loss) self.assertEqual(mdl.input_generator.infeed_bucket_batch_limit, [40])
def testLmInference(self): tf.set_random_seed(93820986) p = self._Params() p.input = self._InputParams(for_training=False) tf.logging.info('Params: %s', p.ToText()) with self.session(use_gpu=False) as sess: mdl = p.Instantiate() subgraphs = mdl.Inference() self.assertTrue('default' in subgraphs) fetches, feeds = subgraphs['default'] tf.global_variables_initializer().run() vals = sess.run( fetches=fetches, feed_dict={feeds['text']: ['pray for world peace', 'happy birthday']}) print('actual vals = ', vals) self.assertEqual(vals['log_pplx_per_sample'].shape, (2,)) self.assertEqual(vals['log_pplx_per_token'].shape, (2, 20)) self.assertEqual(vals['paddings'].shape, (2, 20))
def testFProp(self): with self.session() as sess: tf.set_random_seed(_TF_RANDOM_SEED) p = self._testParams() mdl = p.Instantiate() mdl.FPropDefaultTheta() loss = mdl.loss logp = mdl.eval_metrics['log_pplx'][0] tf.global_variables_initializer().run() vals = [] for _ in range(5): vals += [sess.run((loss, logp))] self.assertAllClose(vals, [ [233.403564, 10.373495], [235.996948, 10.373494], [217.843338, 10.373493], [217.843338, 10.373491], [159.492432, 10.373494], ])
def testMnistV2(self): g = tf.Graph() with g.as_default(): tf.set_random_seed(1618) p = model_registry.GetParams('test.MnistV2', 'Test') p.random_seed = 73234288 p.input.ckpt = self.data_path p.task.params_init = py_utils.WeightInit.Uniform(0.1, seed=73234288) with cluster_factory.ForTestingWorker(mode='sync', job='trainer_client'): model = p.Instantiate() model.ConstructFPropBPropGraph() with self.session(graph=g) as sess: sess.run(tf.global_variables_initializer()) CompareToGoldenSingleFloat(self, 2.302583, self._runOneStep(model, sess)) CompareToGoldenSingleFloat(self, 2.142516, self._runOneStep(model, sess))
def _testDecoderBeamSearchDecodeHelperWithOutput(self, params, src_seq_len=None, src_enc_padding=None): config = tf.ConfigProto(graph_options=tf.GraphOptions( optimizer_options=tf.OptimizerOptions(do_function_inlining=False))) p = params with self.session(use_gpu=False, config=config) as sess, self.SetEval(True): tf.set_random_seed(837274904) np.random.seed(837575) p.beam_search.num_hyps_per_beam = 4 p.dtype = tf.float32 p.target_seq_len = 5 dec = p.Instantiate() if src_seq_len is None: src_seq_len = 5 src_enc = tf.constant(np.random.uniform(size=(src_seq_len, 2, 8)), tf.float32) if src_enc_padding is None: src_enc_padding = tf.constant( [[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 1.0], [1.0, 1.0]], dtype=tf.float32) encoder_outputs = py_utils.NestedMap(encoded=src_enc, padding=src_enc_padding) done_hyps = dec.BeamSearchDecode(encoder_outputs).done_hyps tf.global_variables_initializer().run() softmax_wts = sess.run(dec.vars.softmax) print('softmax wts = ', softmax_wts) done_hyps_serialized = sess.run([done_hyps])[0] hyp = Hypothesis() print('done hyps shape = ', done_hyps_serialized.shape) for i in range(5): for j in range(8): print(i, j, len(done_hyps_serialized[i, j])) hyp.ParseFromString(done_hyps_serialized[2, 5]) print('hyp = ', hyp) return hyp
def testParallelLayer(self): g = tf.Graph() with g.as_default(), self.SetEval(True): tf.set_random_seed(24332) p = layers.ParallelLayer.Params().Set( name='test', merge=lambda xs: tuple([tf.add_n(x) for x in zip(*xs)]), sub=[ lingvo_layers.FCLayer.Params().Set( name='foo', input_dim=32, output_dim=4), lingvo_layers.FCLayer.Params().Set( name='bar', input_dim=32, output_dim=4), layers.SequentialLayer.Params().Set( name='seq', sub=[ lingvo_layers.FCLayer.Params().Set( name='baz', input_dim=32, output_dim=4), lingvo_layers.DropoutLayer.Params().Set( name='dropout', keep_prob=0.5) ]) ]) l = p.Instantiate() x = tf.random_normal(shape=[2, 32]) y = l.FPropDefaultTheta(x) with self.session(graph=g) as sess: sess.run(tf.global_variables_initializer()) x_val, y_val, w = sess.run([x, y, l.vars]) out = [] act = x_val # relu(act \dot w + b) out += [np.maximum(0, np.matmul(act, w.foo.w) + w.foo.b)] self.assertEqual(out[-1].shape, (2, 4)) out += [np.maximum(0, np.matmul(act, w.bar.w) + w.bar.b)] self.assertEqual(out[-1].shape, (2, 4)) out += [np.maximum(0, np.matmul(act, w.seq.baz.w) + w.seq.baz.b)] self.assertEqual(out[-1].shape, (2, 4)) np_result = out[0] for v in out[1:]: np_result = np.add(np_result, v) self.assertAllClose(np_result, y_val)
def testForwardPassWithInputPacking(self): with self.session(use_gpu=False) as sess: with tf.variable_scope('transformer_test', reuse=tf.AUTO_REUSE): bs = 3 sl = 3 tf.set_random_seed(8372749040) p = self._EncoderParams() mt_enc = encoder.TransformerEncoder(p) packed_params = p.Copy() packed_params.packed_input = True mt_enc_packed = encoder.TransformerEncoder(packed_params) batch = py_utils.NestedMap() batch.ids = tf.constant( np.random.randint(low=0, high=63, size=[bs, sl], dtype=np.int32)) batch.paddings = tf.zeros([bs, sl]) packed_batch = py_utils.NestedMap() packed_batch.ids = tf.reshape(batch.ids, [1, -1]) packed_batch.paddings = tf.reshape(batch.paddings, [1, -1]) packed_batch.segment_ids = tf.constant( [[0, 0, 0, 1, 1, 1, 2, 2, 2]], dtype=tf.float32) packed_batch.segment_pos = tf.constant( [[0, 1, 2, 0, 1, 2, 0, 1, 2]], dtype=tf.int32) enc_out = mt_enc.FPropDefaultTheta(batch).encoded enc_out = tf.transpose(enc_out, [1, 0, 2]) packed_enc_out = mt_enc_packed.FPropDefaultTheta(packed_batch) packed_enc_out = tf.reshape(packed_enc_out.encoded, tf.shape(enc_out)) enc_out = tf.reduce_sum(enc_out, axis=0) packed_enc_out = tf.reduce_sum(packed_enc_out, axis=0) tf.global_variables_initializer().run() actual_enc_out, actual_packed_enc_out = sess.run( [enc_out, packed_enc_out]) self.assertAllClose(actual_packed_enc_out, actual_enc_out)
def testSampleCanvasAndTargets(self): with self.session() as sess: tf.set_random_seed(_TF_RANDOM_SEED) x = np.asarray([[10, 11, 12, 13, 14, 15, 2], [10, 11, 12, 13, 14, 15, 2], [2, 0, 0, 0, 0, 0, 0], [10, 11, 12, 13, 14, 2, 0]], np.int32) x_paddings = np.asarray([[0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0], [0, 1, 1, 1, 1, 1, 1], [0, 0, 0, 0, 0, 0, 1]], np.float32) p = self._testParams() mdl = p.Instantiate() descriptor = mdl._SampleCanvasAndTargets( tf.convert_to_tensor(x), tf.convert_to_tensor(x_paddings)) canvas, canvas_paddings, target_indices, target_weights = sess.run([ descriptor.canvas, descriptor.canvas_paddings, descriptor.target_indices, descriptor.target_weights ]) canvas_gold = np.asarray([[13, 15, 2, 0, 0], [10, 11, 14, 2, 0], [2, 0, 0, 0, 0], [10, 11, 13, 14, 2]], np.int32) canvas_paddings_gold = np.asarray( [[0., 0., 0., 1., 1.], [0., 0., 0., 0., 1.], [0., 1., 1., 1., 1.], [0., 0., 0., 0., 0.]], np.float32) target_indices_gold = np.asarray( [[0, 0, 10], [0, 0, 11], [0, 0, 12], [0, 0, 2], [0, 1, 14], [0, 1, 2], [0, 2, 2], [1, 0, 2], [1, 1, 2], [1, 2, 12], [1, 2, 13], [1, 2, 2], [1, 3, 15], [1, 3, 2], [2, 0, 2], [3, 0, 2], [3, 1, 2], [3, 2, 12], [3, 2, 2], [3, 3, 2], [3, 4, 2]], np.int32) target_weights_gold = np.asarray([1, 1, 1, 0, 1, 0, 1] + [1, 1, 1, 1, 0, 1, 0] + [1] + [1, 1, 1, 0, 1, 1], np.float32) target_weights_gold = np.reshape(target_weights_gold, [target_weights_gold.shape[0], 1]) self.assertAllEqual(canvas, canvas_gold) self.assertAllEqual(canvas_paddings, canvas_paddings_gold) self.assertAllEqual(target_indices, target_indices_gold) self.assertAllEqual(target_weights, target_weights_gold)
def _verify_timestep_counts(self, num_splits, auto_partition=False): num_micro_batches = 8 batch_size = 16 with self.session(graph=tf.Graph()) as sess: tf.set_random_seed(1245) inputs = tf.random_uniform([batch_size, 8, 8, 1], seed=12345) if auto_partition: layers = [ _SimpyLayer.Params().Set(name='layer_{}'.format(i)) for i in range(16) ] net = PipeliningLayer.Params().Set( name='pipeline', num_micro_batches=num_micro_batches, cell_tpl=_Partition(layers, num_splits, tshape.Shape([batch_size, 8, 8, 1]))).Instantiate() else: net = _BuildDummyPipelineCnn( num_splits=num_splits, num_micro_batches=num_micro_batches) endpoints = net.FPropDefaultTheta(inputs) if isinstance(endpoints, (list, tuple)): logits, aux_logits = endpoints else: logits = endpoints aux_logits = None loss = tf.reduce_mean(logits) grads = tf.gradients(loss, tf.trainable_variables()) grad_norm = tf.sqrt(py_utils.SumSquared(grads)) ts = net.GetAccumulatorValues().Flatten() sess.run(tf.global_variables_initializer()) grad_norm_val, ts_vals = sess.run([grad_norm, ts]) test_utils.CompareToGoldenSingleFloat(self, 0.268087, grad_norm_val) # Accumulator values should be equal to number of time steps in pipeline. for ts_val in list(ts_vals): expected_ts = num_micro_batches if num_splits > 1 else 1 self.assertEqual(ts_val, expected_ts) if aux_logits is not None: aux_logit_tensor = sess.run(aux_logits) self.assertEqual(aux_logit_tensor.shape, (batch_size, 8, 8, 1))
def testBiEncoderForwardPass(self): with self.session(use_gpu=False): tf.set_random_seed(8372749040) p = self._BiEncoderParams() mt_enc = encoder.MTEncoderBiRNN(p) batch = py_utils.NestedMap() batch.ids = tf.transpose(tf.reshape(tf.range(0, 8, 1), [4, 2])) batch.paddings = tf.zeros([2, 4]) enc_out = mt_enc.FPropDefaultTheta(batch).encoded tf.global_variables_initializer().run() actual_enc_out = enc_out.eval() expected_enc_out = [[[1.42110639e-06, 1.31101151e-05], [ -6.62138473e-06, -1.11313329e-06 ]], [[1.14506956e-05, 2.98347204e-05], [-5.89276988e-06, 5.54328744e-06]], [[1.35346390e-05, 1.00745674e-05], [-4.80002745e-06, -1.23648788e-05]], [[2.00507566e-06, -1.51463591e-05], [-5.71241526e-06, -1.87959231e-05]]] self.assertAllClose(expected_enc_out, actual_enc_out)
def testLmFprop(self): tf.set_random_seed(93820986) p = self._Params() p.input = self._InputParams(for_training=False) with self.session(use_gpu=False) as sess: mdl = p.Instantiate() mdl.FPropDefaultTheta() loss = mdl.eval_metrics['loss'][0] logp = mdl.eval_metrics['log_pplx'][0] logp_per_word = mdl.eval_metrics['log_pplx_per_word'][0] accuracy = mdl.eval_metrics['fraction_of_correct_next_step_preds'][0] tf.global_variables_initializer().run() loss, logp, logp_per_word, accuracy = sess.run( [loss, logp, logp_per_word, accuracy]) test_utils.CompareToGoldenSingleFloat(self, 4.160992, loss) test_utils.CompareToGoldenSingleFloat(self, 4.160992, logp) test_utils.CompareToGoldenSingleFloat(self, 5.944274, logp_per_word) test_utils.CompareToGoldenSingleFloat(self, 0.000000, accuracy)
def testFPropEvalMode(self): with self.session() as sess, self.SetEval(True): tf.set_random_seed(_TF_RANDOM_SEED) p = self._testParams() mdl = p.Instantiate() mdl.FPropDefaultTheta() loss = mdl.loss logp = mdl.eval_metrics['log_pplx'][0] tf.global_variables_initializer().run() vals = [] for _ in range(5): vals += [sess.run((loss, logp))] print('actual vals = ', vals) self.assertAllClose(vals, [ [233.57518, 10.381119], [236.10052, 10.378047], [217.99896, 10.380901], [217.94647, 10.378406], [159.5997, 10.380468], ])
def testRepeatLayer(self): repeat = 100 with self.session(use_gpu=False, graph=tf.Graph()) as sess: tf.set_random_seed(24332) p = layers.RepeatLayer.Params().Set( name='recurrent', repeat=repeat, body=lingvo_layers.FCLayer.Params().Set(input_dim=2, output_dim=2)) l = p.Instantiate() x = tf.random_normal(shape=[2, 2]) y = l.FPropDefaultTheta(x) tf.global_variables_initializer().run() x_val, y_val, w = sess.run([x, y, l.vars]) np_val = x_val # relu(act \dot w + b) for i in range(repeat): np_val = np.maximum(0, np.dot(np_val, w.body.w[i]) + w.body.b[i]) self.assertAllClose(np_val, y_val)
def testForwardPass(self): with self.session(use_gpu=False): tf.set_random_seed(8372749040) p = self._EncoderParams() mt_enc = encoder.MTEncoderV1(p) batch = py_utils.NestedMap() batch.ids = tf.transpose(tf.reshape(tf.range(0, 8, 1), [4, 2])) batch.paddings = tf.zeros([2, 4]) enc_out = mt_enc.FPropDefaultTheta(batch).encoded tf.global_variables_initializer().run() actual_enc_out = enc_out.eval() expected_enc_out = [[[ 1.5309354e-06, -1.7816075e-07, 3.8047763e-06, -5.6422067e-07 ], [1.9017770e-06, -2.9778969e-06, -4.5083775e-06, -1.7054812e-06]], [[ -2.1852782e-06, -1.8208171e-06, -1.4747930e-06, -5.8206351e-06 ], [ 6.7667429e-07, -3.6828042e-06, -1.0916860e-05, -3.2522742e-06 ]], [[ -3.2333378e-07, 3.2147584e-06, 5.0556650e-07, -7.0188378e-07 ], [ -6.5340635e-07, 1.9502845e-06, -9.2459632e-06, 5.1955390e-06 ]], [[ 2.0232728e-06, 4.9331529e-06, 1.1346837e-06, 7.5571520e-06 ], [ -5.8475212e-07, 3.5547487e-06, -3.9037773e-06, 8.9575424e-06 ]]] self.assertAllClose(expected_enc_out, actual_enc_out)
def testBeamSearchDecodeUseZeroAttenState(self, dtype=tf.float32): with self.session(use_gpu=True) as sess, self.SetEval(True): tf.set_random_seed(_TF_RANDOM_SEED) src_batch = 2 p = self._DecoderParams(dtype=dtype) src_time = p.target_seq_len p.beam_search.num_hyps_per_beam = 2 p.use_zero_atten_state = True p.rnn_cell_dim = 32 dec = decoder.MTDecoderV1(p) encoder_outputs, _ = self._Inputs(dtype=dtype) decode = dec.BeamSearchDecode(encoder_outputs) # topk_decoded is None in MT decoder, set it to a fake tensor to pass # sess.run(decode). decode = decode._replace(topk_decoded=tf.constant(0, tf.float32)) tf.global_variables_initializer().run() actual_decode = sess.run(decode) self.assertTupleEqual( (src_time, src_batch * p.beam_search.num_hyps_per_beam), actual_decode.done_hyps.shape) self.assertTupleEqual((src_batch, p.beam_search.num_hyps_per_beam), actual_decode.topk_hyps.shape) self.assertTupleEqual( (src_batch * p.beam_search.num_hyps_per_beam, src_time), actual_decode.topk_ids.shape) self.assertTupleEqual((src_batch * p.beam_search.num_hyps_per_beam, ), actual_decode.topk_lens.shape) self.assertTupleEqual((src_batch, p.beam_search.num_hyps_per_beam), actual_decode.topk_scores.shape) expected_topk_ids = [[2, 0, 0, 0, 0], [13, 2, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0]] expected_topk_lens = [1, 2, 0, 0] expected_topk_scores = [[-3.783176, -5.767704], [0., 0.]] self.assertAllEqual(expected_topk_ids, actual_decode.topk_ids) self.assertAllEqual(expected_topk_lens, actual_decode.topk_lens) self.assertAllClose(expected_topk_scores, actual_decode.topk_scores)
def testBeamSearchDecode(self, dtype=tf.float32): tf.set_random_seed(_TF_RANDOM_SEED) src_batch = 2 p = self._DecoderParams(dtype=dtype) p.is_eval = True src_time = p.target_seq_len p.beam_search.num_hyps_per_beam = 2 p.rnn_cell_dim = 32 dec = decoder.MTDecoderV1(p) encoder_outputs, _ = self._Inputs(dtype=dtype) decode = dec.BeamSearchDecode(encoder_outputs) # topk_decoded is None in MT decoder, set it to a fake tensor to pass # sess.run(decode). decode = decode._replace(topk_decoded=tf.constant(0, tf.float32)) with self.session(use_gpu=True) as sess: tf.global_variables_initializer().run() actual_decode = sess.run(decode) self.assertTupleEqual( (src_time, src_batch * p.beam_search.num_hyps_per_beam), actual_decode.done_hyps.shape) self.assertTupleEqual((src_batch, p.beam_search.num_hyps_per_beam), actual_decode.topk_hyps.shape) self.assertTupleEqual( (src_batch * p.beam_search.num_hyps_per_beam, src_time), actual_decode.topk_ids.shape) self.assertTupleEqual((src_batch * p.beam_search.num_hyps_per_beam, ), actual_decode.topk_lens.shape) self.assertTupleEqual((src_batch, p.beam_search.num_hyps_per_beam), actual_decode.topk_scores.shape) expected_topk_ids = [[2, 0, 0, 0, 0], [11, 2, 0, 0, 0], [2, 0, 0, 0, 0], [6, 2, 0, 0, 0]] expected_topk_lens = [1, 2, 1, 2] expected_topk_scores = [[-3.78467, -5.771077], [-3.334115, -5.597376]] self.assertAllEqual(expected_topk_ids, actual_decode.topk_ids) self.assertAllEqual(expected_topk_lens, actual_decode.topk_lens) self.assertAllClose(expected_topk_scores, actual_decode.topk_scores)
def testBiEncoderForwardPass(self): with self.session(use_gpu=False): tf.set_random_seed(8372749040) p = self._BiEncoderParams() mt_enc = encoder.MTEncoderBiRNN(p) batch = py_utils.NestedMap() batch.ids = tf.transpose(tf.reshape(tf.range(0, 8, 1), [4, 2])) batch.paddings = tf.zeros([2, 4]) enc_out = mt_enc.FPropDefaultTheta(batch).encoded tf.global_variables_initializer().run() actual_enc_out = enc_out.eval() expected_enc_out = [[[4.0744379e-07, -2.0108675e-06], [-4.2056736e-06, 9.2221135e-06]], [[1.2086311e-06, -2.2510878e-07], [-2.2938407e-06, 9.3108029e-06]], [[3.4632390e-06, -3.1495360e-06], [9.1814104e-07, 1.9459947e-06]], [[-9.0593801e-08, -1.2912932e-06], [-5.8420886e-07, -6.5603672e-07]]] self.assertAllClose(expected_enc_out, actual_enc_out)
def testPoolingWithUnknowShapeInput(self): """Tests GlobalPooling layer with unknown shape tensor.""" @tf.Defun(tf.float32) def remove_shape(tensor): return tensor g = tf.Graph() with g.as_default(), tf.Session(graph=g) as _: tf.set_random_seed(24332) input_shape = [3, 5, 2, 4] inputs = np.random.random(input_shape) - 0.5 expected_avg_output = np.mean(inputs, axis=(1, 2), keepdims=True) input_tensor = tf.convert_to_tensor(inputs, dtype=tf.float32) # initial shape is [3, 5, 2, 4] self.assertEqual(py_utils.GetShape(input_tensor), input_shape) # remove shape using a tf Defun and verify dynamic tensor shape. input_tensor = remove_shape(input_tensor) self.assertIsInstance(py_utils.GetShape(input_tensor), tf.Tensor) self.assertIsNone(input_tensor.shape.rank) self._testHelper('AVG', input_tensor, None, expected_avg_output, None)
def _testNormalizedDepthwiseConv2DHelper(self, is_causal=False, dropconnect_prob=0): if is_causal: conv_cls = (conv_layers_with_time_padding. CausalNormalizedDepthwiseConv2DLayer) else: conv_cls = conv_layers_with_time_padding.NormalizedDepthwiseConv2DLayer tf.set_random_seed(398847392) np.random.seed(12345) params = conv_cls.Params().Set(name='conv', weight_tiling_factor=2, filter_shape=[3, 1, 2, 1], dropconnect_prob=dropconnect_prob, deterministic_dropout=True) conv_layer = params.Instantiate() in_padding = tf.zeros([2, 4], dtype=tf.float32) inputs = tf.constant(np.random.normal(0.1, 0.5, [2, 4, 1, 4]), dtype=tf.float32) output, _ = conv_layer.FPropDefaultTheta(inputs, in_padding) return output
def testFProp(self, dtype=tf.float32, fprop_dtype=tf.float32): with self.session() as sess: tf.set_random_seed(_TF_RANDOM_SEED) p = self._testParams() p.dtype = dtype if fprop_dtype: p.fprop_dtype = fprop_dtype p.input.dtype = fprop_dtype mdl = p.Instantiate() mdl.FPropDefaultTheta() loss = mdl.loss logp = mdl.eval_metrics['log_pplx'][0] tf.global_variables_initializer().run() vals = [] for _ in range(5): vals += [sess.run((loss, logp))] print('actual vals = %s' % np.array_repr(np.array(vals))) self.assertAllClose(vals, [[233.57518, 10.381119], [236.10052, 10.378047], [217.99896, 10.380901], [217.94647, 10.378406], [159.5997, 10.380468]])
def testFProp(self, dtype=tf.float32): with self.session() as sess: tf.set_random_seed(_TF_RANDOM_SEED) p = self._testParams() p.dtype = dtype mdl = p.Instantiate() mdl.FPropDefaultTheta() loss = mdl.loss logp = mdl.eval_metrics['log_pplx'][0] tf.global_variables_initializer().run() vals = [] for _ in range(3): vals += [sess.run((loss, logp))] print('actual vals = %s' % np.array_repr(np.array(vals))) expected_vals = [ [326.765106, 10.373495], [306.018066, 10.373494], [280.08429, 10.373492], ] self.assertAllClose(vals, expected_vals)
def testSoftCondLayer(self): num_experts = 100 with self.session(use_gpu=False, graph=tf.Graph()) as sess: tf.set_random_seed(24332) p = layers.SoftCondLayer.Params().Set( name='soft_cond', cond_dim=2, num_experts=num_experts, body=lingvo_layers.FCLayer.Params().Set(input_dim=2, output_dim=2)) l = p.Instantiate() x = tf.random_normal(shape=[1, 2, 2]) y = l.FPropDefaultTheta(x) tf.global_variables_initializer().run() x_val, y_val, vars_val = sess.run([x, y, l.vars]) np_val = x_val[0] taks_weight = np.exp(-1.0 * np.dot(np.sum(np_val, 0), vars_val.w)) taks_weight = 1.0 / (1.0 + taks_weight) weighted_weight = np.einsum('i,ijk->jk', taks_weight, vars_val.body.w) weighted_bias = np.einsum('i,ij->j', taks_weight, vars_val.body.b) np_val = np.maximum(0, np.dot(np_val, weighted_weight) + weighted_bias) self.assertAllClose(np_val, y_val[0])
def testBProp(self): with self.session() as sess: tf.set_random_seed(_TF_RANDOM_SEED) p = self._testParams() mdl = p.Instantiate() mdl.FPropDefaultTheta() mdl.BProp() loss = mdl.loss logp = mdl.eval_metrics['log_pplx'][0] tf.global_variables_initializer().run() vals = [] for _ in range(3): vals += [sess.run((loss, logp, mdl.train_op))[:2]] print('BProp actual vals = ', vals) expected_vals = [ [326.765106, 10.373495], [306.013123, 10.373326], [280.07666, 10.37321], ] self.assertAllClose(vals, expected_vals)
def testUniEncoderForwardPass(self): with self.session(use_gpu=False): tf.set_random_seed(8372749040) p = self._UniEncoderParams() mt_enc = encoder.MTEncoderUniRNN(p) batch = py_utils.NestedMap() batch.ids = tf.transpose(tf.reshape(tf.range(0, 8, 1), [4, 2])) batch.paddings = tf.zeros([2, 4]) enc_out = mt_enc.FPropDefaultTheta(batch).encoded tf.global_variables_initializer().run() actual_enc_out = enc_out.eval() expected_enc_out = [[[-1.74790625e-06, -5.04228524e-07], [2.04836829e-06, 1.48639378e-06]], [[-1.10486064e-06, -5.77133278e-07], [4.66779238e-06, 3.72350723e-06]], [[-5.65139544e-07, -1.84634030e-06], [3.99908731e-06, 1.90148887e-06]], [[7.14102157e-07, -2.31352783e-06], [7.05981620e-06, 2.68004328e-06]]] self.assertAllClose(expected_enc_out, actual_enc_out)