def testMelFeaturesPaddedRightStacked(self): self._CreateFrontendParams() p = self.params p.stack_right_context = 2 p.frame_stride = p.stack_right_context + 1 mel_frontend = p.Instantiate() sample_rate, pcm = self._GetPcm() pcm *= 32768 # Convert to 4D [batch, time, packet, channels]. sample_count = tf.shape(pcm)[1] packet_size = 11 # A non-round number. trimmed_pcm = pcm[:, 0:(sample_count // packet_size) * packet_size] src_inputs = tf.reshape(trimmed_pcm, (1, -1, packet_size, 1)) # Create paddings such that the first 455 packets are unpadded. paddings = tf.concat([ tf.zeros([1, 455], dtype=tf.float32), tf.ones([1, tf.shape(src_inputs)[1] - 455], dtype=tf.float32) ], axis=1) # frame_step=240, frame_size=600, +1200 right padded frames # 455 packets * 11 frames rounds = 5005 frames, rounds down to 21 mel # frames. Divide by 3 for stacking = 7. # TODO(talremez): Make sure with this makes sense. expected_unpadded = 6 outputs = mel_frontend.FPropDefaultTheta( py_utils.NestedMap(src_inputs=src_inputs, paddings=paddings)) log_mel = outputs.src_inputs paddings = outputs.paddings with self.session(): pcm = self.evaluate(pcm) tf.logging.info('pcm: ~ %s = %s', pcm.shape, pcm) self.assertGreater(33000, np.amax(pcm)) self.assertGreater(np.amax(pcm), 2.) log_mel, paddings, sample_rate = self.evaluate( [log_mel, paddings, sample_rate]) self.assertEqual(sample_rate, p.sample_rate) self.assertEqual(paddings.shape, log_mel.shape[0:2]) self.assertAllEqual(paddings[:, 0:expected_unpadded], np.zeros([1, expected_unpadded])) self.assertAllEqual(paddings[:, expected_unpadded:], np.ones([1, paddings.shape[1] - expected_unpadded]))
def testForwardPass(self): with self.session(use_gpu=False): tf.set_random_seed(8372749040) p = self._EncoderParams() mt_enc = encoder.MTEncoderV1(p) batch = py_utils.NestedMap() batch.ids = tf.transpose(tf.reshape(tf.range(0, 8, 1), [4, 2])) batch.paddings = tf.zeros([2, 4]) enc_out = mt_enc.FPropDefaultTheta(batch).encoded tf.global_variables_initializer().run() actual_enc_out = enc_out.eval() expected_enc_out = [[[ -7.51581979e-07, 1.55304758e-06, -3.39117889e-07, 2.79457527e-06 ], [ -1.06733505e-05, 7.56898862e-06, -4.18875834e-06, -9.10360086e-06 ]], [[ 1.58444971e-06, 5.11627661e-07, 1.33408967e-05, 1.81603957e-06 ], [ -1.59942228e-05, 1.26068180e-05, 4.49321249e-07, -1.43790385e-05 ]], [[ 5.56546365e-06, -8.01007627e-06, 8.96620350e-06, 3.96485439e-06 ], [ -8.77006005e-06, 4.04282991e-06, -4.79895652e-06, -5.90156833e-06 ]], [[ -8.59513818e-07, -7.63760727e-06, -5.57065960e-06, 1.80756274e-06 ], [ -2.96017470e-06, -1.51323195e-06, -1.03562079e-05, 1.23328198e-06 ]]] self.assertAllClose(expected_enc_out, actual_enc_out)
def testForwardPassSplitBatch(self): with self.session(use_gpu=False) as sess: bs = 8 sl = 20 tf.set_random_seed(8372749040) p = self._EncoderParams() p.random_seed = 1234 mt_enc = encoder.TransformerEncoder(p) batch = py_utils.NestedMap() batch.ids = tf.constant( np.random.randint(low=0, high=63, size=[bs, sl], dtype=np.int32)) batch.paddings = tf.zeros([bs, sl]) out = mt_enc.FPropDefaultTheta(batch) enc_out = out.encoded emb_out = out.embedded_inputs inputs1, inputs2 = tf.split(batch.ids, 2, 0) paddings1, paddings2 = tf.split(batch.paddings, 2, 0) batch.ids = inputs1 batch.paddings = paddings1 out1 = mt_enc.FPropDefaultTheta(batch) enc_out1 = out1.encoded emb_out1 = out1.embedded_inputs batch.ids = inputs2 batch.paddings = paddings2 out2 = mt_enc.FPropDefaultTheta(batch) enc_out2 = out2.encoded emb_out2 = out2.embedded_inputs tf.global_variables_initializer().run() actual_enc_out, actual_enc_out1, actual_enc_out2, \ actual_emb_out, actual_emb_out1, actual_emb_out2 = sess.run( [enc_out, enc_out1, enc_out2, emb_out, emb_out1, emb_out2]) self.assertAllClose( actual_enc_out, np.concatenate([actual_enc_out1, actual_enc_out2], 1)) self.assertAllClose( actual_emb_out, np.concatenate([actual_emb_out1, actual_emb_out2], 1))
def NullLike(): """A function to return the same Tensor signature as Preprocess. This is necessary for the tf.cond() to avoid executing the preprocessor for examples that are going to be dropped because it exceeds the bucket limit; tf.cond() requires that the output of both branches yields the same structure. Returns: A structure with the same Tensor dtype and shape as the output of Preprocess. """ shapes = self.Shape() rets = [ tf.zeros(dtype=dtype, shape=shape) for (dtype, shape) in zip(self.DType().Flatten(), shapes.Flatten()) ] return shapes.Pack(rets)
def zero_state(self, batch_size): """Returns the initial state given the batch size. Args: batch_size: the batch size. Returns: state0: A NestedMap of tensors including: - context: A Tensor of shape [b, w, 1, c] """ p = self.params assert p.filter_shape[1] == 1, ( 'StreamStep only supports 1d causal convolution.') context = tf.zeros( shape=[batch_size, p.filter_shape[0], 1, p.filter_shape[2]], dtype=tf.float32) return py_utils.NestedMap(context=context)
def _TransformerAttentionLayerInputs(self, input_dim=4, dtype=tf.float32): np.random.seed(6348575) query_vec = tf.transpose( tf.stack([ tf.constant(np.random.rand(2, input_dim), dtype=dtype) for _ in range(5) ]), [1, 0, 2]) paddings = tf.constant([[0, 0, 1, 1, 0], [1, 0, 0, 0, 1]], dtype=dtype) aux_vec = tf.transpose( tf.stack([ tf.constant(np.random.rand(2, input_dim), dtype=dtype) for _ in range(7) ]), [1, 0, 2]) aux_paddings = tf.constant([[0, 1, 0, 1, 0, 1, 0], [1, 0, 1, 0, 1, 0, 1]], dtype=dtype) segment_mask = tf.zeros([2, 1, 5, 5]) return query_vec, paddings, aux_vec, aux_paddings, segment_mask
def _testOutShape(self, p, input_shape, expected_shape): batch_size, num_points, _ = input_shape g = tf.Graph() with g.as_default(): net = p.Instantiate() input_data = py_utils.NestedMap( points=tf.random_uniform((batch_size, num_points, 3)), features=tf.random_uniform(input_shape), padding=tf.zeros((batch_size, num_points), dtype=tf.float32), label=tf.random_uniform((batch_size, ), minval=0, maxval=16, dtype=tf.int32)) result = net.FPropDefaultTheta(input_data) with self.session(graph=g) as sess: sess.run(tf.global_variables_initializer()) np_result = sess.run(result) self.assertEqual(np_result.shape, expected_shape)
def FProp(self, theta, in_nmap): p = self.params if not p.remat: return self._FProp(theta, in_nmap) def CellFn(theta, state0, unused_inputs): out_nmap = self._FProp(theta, state0) return out_nmap, py_utils.NestedMap() _, state1 = recurrent.Recurrent( theta=theta, state0=in_nmap, inputs=py_utils.NestedMap( inputs=tf.zeros([1, 0])), # A dummy input of shape [T, ?]. cell_fn=CellFn, allow_implicit_capture=p.allow_implicit_capture) return state1
def testEvolvedTransformerDecoderLayerExtendStep(self): with self.session(use_gpu=True) as sess: np.random.seed(6348575) depth = 4 p = GPipeEvolvedTransformerDecoderLayer.Params() p.name = 'gpipe_evolved_transformer_decoder' p.source_dim = depth p.has_aux_atten = True p.mask_self_atten = True p.tr_double_heads_atten_tpl.num_attention_heads = 2 p.tr_atten_tpl.num_attention_heads = 2 p.transformer_tpl.tr_atten_tpl.num_attention_heads = 2 et_decoder = GPipeEvolvedTransformerDecoderLayer(p) (source_vecs, _, aux_vecs, aux_paddings) = self._testInputs(depth=depth) source_padding = tf.zeros([5, 2]) h1 = et_decoder.FPropDefaultTheta( aux_vecs, aux_paddings, source_vecs, source_padding, None, None, None, None, )[2] h2 = [] double_head_attention_states = py_utils.NestedMap( key=tf.zeros([0, 2, 4]), value=tf.zeros([0, 2, 4])) transformer_layer_states = py_utils.NestedMap( key=tf.zeros([0, 2, 4]), value=tf.zeros([0, 2, 4])) branched_convs_input = tf.zeros([0, 2, 4]) prefix_states = py_utils.NestedMap( double_head_attention_states=double_head_attention_states, transformer_layer_states=transformer_layer_states, branched_convs_input=branched_convs_input) for i in range(5): h, _, prefix_states = et_decoder.ExtendStep( et_decoder.theta, source_vecs[i, :, :], prefix_states, aux_vecs, aux_paddings) h2.append(h) h2 = tf.stack(h2) tf.global_variables_initializer().run() h1_v, h2_v = sess.run([h1, h2]) self.assertAllClose(h1_v, h2_v)
def testMelFeaturesUnstacked(self): self._CreateFrontendParams() p = self.params mel_frontend = p.Instantiate() sample_rate, pcm = self._GetPcm() pcm *= 32768 # Convert to 4D [batch, time, packet, channels]. sample_count = tf.shape(pcm)[1] packet_size = 11 # A non-round number. trimmed_pcm = pcm[:, 0:(sample_count // packet_size) * packet_size] src_inputs = tf.reshape(trimmed_pcm, (1, -1, packet_size, 1)) paddings = tf.zeros(tf.shape(src_inputs)[0:2]) outputs = mel_frontend.FPropDefaultTheta( py_utils.NestedMap(src_inputs=src_inputs, paddings=paddings)) log_mel = outputs.src_inputs paddings = outputs.paddings with self.session() as sess: pcm = sess.run(pcm) tf.logging.info('pcm: ~ %s = %s', pcm.shape, pcm) self.assertGreater(33000, np.amax(pcm)) self.assertGreater(np.amax(pcm), 2.) log_mel, paddings, sample_rate = sess.run( [log_mel, paddings, sample_rate]) self.assertEqual(sample_rate, p.sample_rate) self.assertEqual(paddings.shape, log_mel.shape[0:2]) self.assertAllEqual(paddings, np.zeros_like(paddings)) # log_mel ~ [batch, time, feature_size, channel] tf.logging.info('mel ~ %s', log_mel.shape) self.assertEqual(log_mel.shape[2], 2) # 2 bins # Squeeze the batch and channel dimensions out. log_mel = np.squeeze(log_mel, axis=(0, 3)) t = log_mel.shape[0] mu = np.sum(log_mel, axis=0) / t d = log_mel - mu v = np.sum(d * d, axis=0) / (t - 1) s = np.sqrt(v) tf.logging.info('Found mean = %s', mu) tf.logging.info('Found stddev = %s', s) ref_unstacked_mean = [13.46184731, 13.30099297] ref_unstacked_stddev = [1.3840059, 1.24434352] self.assertAllClose(mu, ref_unstacked_mean, atol=1e-4) self.assertAllClose(s, ref_unstacked_stddev, atol=1e-3)
def test_ensure_full_beam_more_strict(self, ensure_full_beam): hyp_size = 2 num_beams = 1 seq_len = 4 probs = [ np.log([[0.1, 0.1, 0.8], [0.1, 0.1, 0.8]]), np.log([[0.1, 0.1, 0.8], [0.9, 0.05, 0.05]]), ] common_args = dict( hyp_size=hyp_size, num_beams=num_beams, seq_len=seq_len, init_best_score=_MIN_SCORE, probs=probs, init_atten_probs=tf.zeros([hyp_size, 0]), atten_probs=np.zeros([seq_len, hyp_size, 0]), ensure_full_beam=ensure_full_beam, use_v2=True, ) # After two steps, we found 2 terminated hyps. # Regardless of p.ensure_full_beam, we are not done because beam_size is # large. results = self._runBeamSearchOpHelper(beam_size=3.0, local_eos_threshold=-1.0, **common_args) all_done = results[7] self.assertAllEqual([all_done], results[8]) self.assertFalse(all_done) # With a smaller beam_size, we are done. results = self._runBeamSearchOpHelper(beam_size=0.1, local_eos_threshold=-1.0, **common_args) all_done = results[7] self.assertTrue(all_done) # If we found 3 terminated hyps, we are similarly not done. results = self._runBeamSearchOpHelper(beam_size=3.0, local_eos_threshold=-100.0, **common_args) all_done = results[7] self.assertFalse(all_done)
def testFarthestPointSamplerGatherPoints(self): points = tf.constant([ [[0, 1, 1], [1, 1, 1], [2, 1, 1], [3, 1, 1], [4, 1, 1], [5, 1, 1]], [[0, 2, 1], [1, 2, 1], [2, 2, 1], [3, 2, 1], [4, 2, 1], [5, 2, 1]], [[0, 2, 3], [1, 2, 3], [2, 2, 3], [3, 2, 3], [4, 2, 3], [5, 2, 3]], [[0, 2, 1], [1, 2, 1], [2, 2, 1], [3, 2, 1], [4, 2, 1], [5, 2, 1]], ], dtype=tf.float32) # pyformat: disable padding = tf.zeros((4, 6), dtype=tf.float32) n = 4 num_points = 3 selected_idx, _ = car_lib.FarthestPointSampler(points, padding, num_points) gather_indices = tf.stack([ tf.tile(tf.expand_dims(tf.range(n), 1), [1, num_points]), selected_idx ], axis=2) sampled_points = tf.gather_nd(points, gather_indices) with self.session() as sess: sampled_points = sess.run(sampled_points) self.assertEqual(sampled_points.shape, (n, num_points, 3))
def _InputsForAttentionTest(self, dtype=tf.float32, has_task_ids=False): np.random.seed(_NUMPY_RANDOM_SEED) src_time = 5 src_batch = 2 num_hyps = 2 emb_dims = 4 src_enc = tf.constant( np.random.normal(size=[src_time, src_batch, emb_dims]), dtype=dtype) src_paddings = tf.constant( [[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 1.0], [0.0, 1.0]], dtype=dtype) tgt_time = 5 tgt_batch = src_batch * num_hyps self.tgt_batch = tgt_batch tgt_ids = tf.constant(np.random.randint(20, size=[tgt_batch, tgt_time]), dtype=tf.int32) tgt_labels = tf.constant(np.random.randint(20, size=[tgt_batch, tgt_time]), dtype=tf.int32) tgt_paddings = tf.zeros([tgt_batch, tgt_time], dtype=dtype) tgt_weights = 1.0 - tgt_paddings tgts = py_utils.NestedMap({ 'ids': tgt_ids, 'labels': tgt_labels, 'weights': tgt_weights, 'paddings': tgt_paddings }) encoder_outputs = py_utils.NestedMap(encoded=src_enc, padding=src_paddings, segment_id=None) if has_task_ids: task_ids = tf.constant(np.random.randint(4, size=[src_batch]), dtype=tf.int32) tgts['task_ids'] = tf.tile( tf.expand_dims(tf.tile(task_ids, [num_hyps]), 1), [1, tgt_time]) encoder_outputs['target_task_ids'] = task_ids return (encoder_outputs, tgts, num_hyps)
def zero_state(self, batch_size): """Returns the initial state given the batch size. Args: batch_size: the batch size. Returns: state0: A NestedMap of tensors including: - context: A Tensor of shape [b, filter_shape[0]-1, 1, c]. """ p = self.params assert p.filter_shape[1] == 1, ( 'zero_state() only supports 1d causal convolution.') context = tf.zeros( shape=[batch_size] + [p.filter_shape[0] - 1, p.filter_shape[1], p.filter_shape[2]], dtype=py_utils.FPropDtype(p)) return py_utils.NestedMap(context=context)
def testForwardPassWithInputPacking(self): with self.session(use_gpu=False) as sess: with tf.variable_scope('transformer_test', reuse=tf.AUTO_REUSE): bs = 3 sl = 3 tf.set_random_seed(8372749040) p = self._EncoderParams() mt_enc = encoder.TransformerEncoder(p) packed_params = p.Copy() packed_params.packed_input = True mt_enc_packed = encoder.TransformerEncoder(packed_params) batch = py_utils.NestedMap() batch.ids = tf.constant( np.random.randint(low=0, high=63, size=[bs, sl], dtype=np.int32)) batch.paddings = tf.zeros([bs, sl]) packed_batch = py_utils.NestedMap() packed_batch.ids = tf.reshape(batch.ids, [1, -1]) packed_batch.paddings = tf.reshape(batch.paddings, [1, -1]) packed_batch.segment_ids = tf.constant( [[0, 0, 0, 1, 1, 1, 2, 2, 2]], dtype=tf.float32) packed_batch.segment_pos = tf.constant( [[0, 1, 2, 0, 1, 2, 0, 1, 2]], dtype=tf.int32) enc_out = mt_enc.FPropDefaultTheta(batch).encoded enc_out = tf.transpose(enc_out, [1, 0, 2]) packed_enc_out = mt_enc_packed.FPropDefaultTheta(packed_batch) packed_enc_out = tf.reshape(packed_enc_out.encoded, tf.shape(enc_out)) enc_out = tf.reduce_sum(enc_out, axis=0) packed_enc_out = tf.reduce_sum(packed_enc_out, axis=0) tf.global_variables_initializer().run() actual_enc_out, actual_packed_enc_out = sess.run( [enc_out, packed_enc_out]) self.assertAllClose(actual_packed_enc_out, actual_enc_out)
def testForwardPass(self): with self.session(use_gpu=False): tf.random.set_seed(8372749040) p = self._EncoderParams() mt_enc = encoder.MTEncoderV1(p) batch = py_utils.NestedMap() batch.ids = tf.transpose(tf.reshape(tf.range(0, 8, 1), [4, 2])) batch.paddings = tf.zeros([2, 4]) enc_out = mt_enc.FPropDefaultTheta(batch).encoded self.evaluate(tf.global_variables_initializer()) actual_enc_out = enc_out.eval() tf.logging.info('testForwardPass actual_enc_out %r' % actual_enc_out) expected_enc_out = [[[ -2.5584161e-06, -5.6742726e-07, -8.1548797e-06, 2.6712776e-06 ], [1.1781749e-06, -4.7786052e-08, 4.2439538e-06, -3.3840388e-06]], [[ -2.6852279e-06, 2.0878532e-07, -1.0491179e-05, 5.9619756e-06 ], [ 2.0423495e-06, 3.1651740e-07, 5.7234793e-06, -3.8120934e-06 ]], [[ 3.0904158e-07, -1.2983286e-06, -1.2469604e-05, 6.6027828e-06 ], [ -3.8620223e-07, 3.8890593e-07, 1.9976458e-06, 1.0078909e-06 ]], [[ 1.0130438e-07, -1.1145677e-06, -1.2745468e-05, 8.0924037e-06 ], [ -1.3496270e-06, -3.2355717e-06, -3.0266469e-06, -3.9747570e-06 ]]] self.assertAllClose(expected_enc_out, actual_enc_out)
def testMelFeaturesUnstacked3D(self): # TODO(laurenzo): Remove this test once 3D inputs support removed. self._CreateFrontendParams() p = self.params mel_frontend = p.Instantiate() sample_rate, pcm = self._GetPcm() pcm *= 32768 # Leave in 3D [batch, time, 1]. src_inputs = tf.expand_dims(pcm, axis=2) paddings = tf.zeros(tf.shape(src_inputs)[0:2]) outputs = mel_frontend.FPropDefaultTheta( py_utils.NestedMap(src_inputs=src_inputs, paddings=paddings)) log_mel = outputs.src_inputs paddings = outputs.paddings with self.session() as sess: pcm = sess.run(pcm) tf.logging.info('pcm: ~ %s = %s', pcm.shape, pcm) self.assertGreater(33000, np.amax(pcm)) self.assertGreater(np.amax(pcm), 2.) log_mel, paddings, sample_rate = sess.run( [log_mel, paddings, sample_rate]) self.assertEqual(sample_rate, p.sample_rate) self.assertEqual(paddings.shape, log_mel.shape[0:2]) self.assertAllEqual(paddings, np.zeros_like(paddings)) # log_mel ~ [batch, time, feature_size, channel] tf.logging.info('mel ~ %s', log_mel.shape) self.assertEqual(log_mel.shape[2], 2) # 2 bins # Squeeze the batch and channel dimensions out. log_mel = np.squeeze(log_mel, axis=(0, 3)) t = log_mel.shape[0] mu = np.sum(log_mel, axis=0) / t d = log_mel - mu v = np.sum(d * d, axis=0) / (t - 1) s = np.sqrt(v) tf.logging.info('Found mean = %s', mu) tf.logging.info('Found stddev = %s', s) ref_unstacked_mean = [13.46184731, 13.30099297] ref_unstacked_stddev = [1.3840059, 1.24434352] self.assertAllClose(mu, ref_unstacked_mean, atol=1e-4) self.assertAllClose(s, ref_unstacked_stddev, atol=1e-3)
def _MaybePadSourceInputs(self, src_inputs, src_paddings): p = self.params if not p.append_eos_frame: return src_inputs, src_paddings per_src_len = tf.reduce_sum(1 - src_paddings, 1) per_src_len += 1 max_src_len = tf.reduce_max(per_src_len) input_shape = tf.shape(src_inputs) input_len = tf.maximum(input_shape[1], tf.cast(max_src_len, tf.int32)) pad_steps = input_len - input_shape[1] src_inputs = tf.concat([ src_inputs, tf.zeros(inplace_ops.inplace_update(input_shape, 1, pad_steps), src_inputs.dtype) ], 1) src_paddings = 1 - tf.sequence_mask( tf.reshape(per_src_len, [input_shape[0]]), tf.reshape( input_len, []), src_paddings.dtype) return src_inputs, src_paddings
def FProp(self, theta, inputs, paddings): p = self.params if not p.remat: return self._FProp(theta, inputs, paddings) def CellFn(theta, state0, unused_inputs): outs, out_paddings = self._FProp(theta, state0.inputs, state0.paddings) return py_utils.NestedMap( inputs=outs, paddings=out_paddings), py_utils.NestedMap() state0 = py_utils.NestedMap(inputs=inputs, paddings=paddings) _, state1 = recurrent.Recurrent( theta=theta, state0=state0, inputs=py_utils.NestedMap( inputs=tf.zeros([1, 0])), # A dummy input of shape [T, ?]. cell_fn=CellFn, allow_implicit_capture=p.allow_implicit_capture) return state1.inputs, state1.paddings
def testBiEncoderForwardPass(self): with self.session(use_gpu=False): tf.set_random_seed(8372749040) p = self._BiEncoderParams() mt_enc = encoder.MTEncoderBiRNN(p) batch = py_utils.NestedMap() batch.ids = tf.transpose(tf.reshape(tf.range(0, 8, 1), [4, 2])) batch.paddings = tf.zeros([2, 4]) enc_out = mt_enc.FPropDefaultTheta(batch).encoded tf.global_variables_initializer().run() actual_enc_out = enc_out.eval() expected_enc_out = [[[1.42110639e-06, 1.31101151e-05], [ -6.62138473e-06, -1.11313329e-06 ]], [[1.14506956e-05, 2.98347204e-05], [-5.89276988e-06, 5.54328744e-06]], [[1.35346390e-05, 1.00745674e-05], [-4.80002745e-06, -1.23648788e-05]], [[2.00507566e-06, -1.51463591e-05], [-5.71241526e-06, -1.87959231e-05]]] self.assertAllClose(expected_enc_out, actual_enc_out)
def _testNormalizedDepthwiseConv2DHelper(self, is_causal=False, dropconnect_prob=0): if is_causal: conv_cls = (conv_layers.CausalNormalizedDepthwiseConv2DLayer) else: conv_cls = conv_layers.NormalizedDepthwiseConv2DLayer tf.random.set_seed(398847392) np.random.seed(12345) params = conv_cls.Params().Set(name='conv', weight_tiling_factor=2, filter_shape=[3, 1, 2, 1], dropconnect_prob=dropconnect_prob, deterministic_dropout=True) conv_layer = params.Instantiate() in_padding = tf.zeros([2, 4], dtype=tf.float32) inputs = tf.constant(np.random.normal(0.1, 0.5, [2, 4, 1, 4]), dtype=tf.float32) output, _ = conv_layer.FPropDefaultTheta(inputs, in_padding) return output
def testSpectrumAugmenterWithFrequencyMask(self): with self.session(use_gpu=False, graph=tf.Graph()): tf.random.set_seed(1234) inputs = tf.ones([3, 5, 10, 1], dtype=tf.float32) paddings = tf.zeros([3, 5]) hs = [] for p in [ spectrum_augmenter.SpectrumAugmenter.Params(), spectrum_augmenter_on_device.SpectrumAugmenterOnDevice.Params() ]: p.name = 'specAug_layers' p.freq_mask_max_bins = 6 p.freq_mask_count = 2 p.time_mask_max_frames = 0 p.random_seed = 34567 specaug_layer = p.Instantiate() h, _ = specaug_layer.FPropDefaultTheta(inputs, paddings) hs.append(h) layer_output, layer_output_on_device = self.evaluate(hs) self.assertAllClose(layer_output, layer_output_on_device)
def _CreateDynamicShapeInputs(self, batch_dim, length_dim, input_dim): inputs = tf.random.normal([batch_dim, length_dim, input_dim], seed=92837472) # Create segment_ids with random number of 1s and stack 0s at end. num_ones = tf.random.uniform(shape=(), minval=1, maxval=length_dim, dtype=tf.int32) segment_ids = tf.concat([ tf.ones([batch_dim, num_ones]), tf.zeros([batch_dim, length_dim - num_ones]) ], axis=1) # Remove unpadded positions from the end. max_seq_len = tf.cast(tf.reduce_max(tf.reduce_sum(segment_ids, -1)), tf.int32) inputs = inputs[:, :max_seq_len, :] segment_ids = segment_ids[:, :max_seq_len] unused_segment_pos = tf.zeros_like(segment_ids) return inputs, segment_ids, unused_segment_pos
def testForwardPass(self): with self.session(use_gpu=False) as sess: bs = 2 sl = 21 d = 16 tf.random.set_seed(8372749040) p = self._EncoderParams() mt_enc = p.Instantiate() batch = py_utils.NestedMap() batch.ids = tf.constant( np.random.randint(low=0, high=63, size=[bs, sl], dtype=np.int32)) batch.paddings = tf.zeros([bs, sl]) out = mt_enc.FPropDefaultTheta(batch) enc_out_sum = tf.reduce_sum(out.encoded) tf.global_variables_initializer().run() actual_enc_out, actual_enc_out_sum = sess.run([out.encoded, enc_out_sum]) self.assertAllEqual([sl, bs, d], actual_enc_out.shape) self.assertAllClose(306.010132, actual_enc_out_sum)
def AddMultiCurveSubplot(fig, tensors, paddings, labels, xlabels=None, **kwargs): """Adds a multi curve subplot to Matplotlib figure. Plots one line for each entry in tensors and assigns a plot label legend. Args: fig: The Matplotlib figure. tensors: List of tensors of shape [batch, length] paddings: Paddings for 'tensors' with shape [batch, length] with 0. in valid positions and 1. in invalid. Or list of padding tensors of same length as tensors. labels: A list of tensor names (strings) of the same length as 'tensors'. xlabels: A string tensor of shape [batch] with an xlabel per batch. **kwargs: With optional, title, xlabel, ylabel, fontsize. """ data = [] row_labels = [] if isinstance(paddings, tf.Tensor): paddings = [paddings] * len(tensors) batch_size = py_utils.GetShape(paddings[0])[0] max_lengths = tf.zeros([batch_size], tf.int32) for t, l, p in zip(tensors, labels, paddings): max_lengths = tf.maximum(max_lengths, py_utils.LengthsFromPaddings(p)) if t is not None: data.append(py_utils.ApplyPadding(p, t)) row_labels.append(l) shape = py_utils.GetShape(data[0], 2) data = tf.reshape(tf.concat(data, -1), [shape[0], len(data), shape[1]]) args = [data, max_lengths] if xlabels is not None: args.append(xlabels) fig.AddSubplot(args, plot_func=_AddMultiCurveRowPlots, row_labels=row_labels, **kwargs)
def _InitIterator(self): """Override of the root's _InitIterator to support dataset repeat.""" if self.host_id in self._dataset: return p = self.params self._repeat_steps = getattr(self._input_generator.params, 'repeat_steps', None) self._repeat_with_sentinel = getattr(self._input_generator.params, 'repeat_with_sentinel', None) with py_utils.GlobalStepContext(None): # Hide global_step tensor from being captured by dataset function. ds = self.GetDataset() if self._repeat_steps: tf.logging.info('Repeating dataset every %d steps.', self._repeat_steps) ds = ds.take(self._repeat_steps).repeat() elif self._repeat_with_sentinel: tf.logging.info('Attaching sentinel to end of dataset and repeat.') # Dataset should contain batches of type NestedMap. sentinel_batch = ds.element_spec.Transform( lambda x: tf.zeros(x.shape, dtype=x.dtype)) # Fill the dummy sentinel batch's sentinel_key tensor with sentinel_value. sentinel_batch[p.sentinel_key] = tf.fill( sentinel_batch[p.sentinel_key].shape, p.sentinel_value) tf.logging.info('attaching sentinel %r', sentinel_batch[p.sentinel_key]) tf.logging.info('sentinel type %r', sentinel_batch[p.sentinel_key].dtype) ds = ds.concatenate( tf.data.Dataset.from_tensors(sentinel_batch)).repeat() options = tf.data.Options() options.experimental_deterministic = bool(self.cluster.in_unit_test) ds = ds.with_options(options) self._dataset[self.host_id] = ds if tf.executing_eagerly_outside_functions(): it = iter(ds) else: it = tf.data.make_initializable_iterator(ds) self._iterator[self.host_id] = it
def testMelMeanVarNormalization(self): self._CreateFrontendParams() p = self.params p.stack_left_context = 2 p.frame_stride = p.stack_left_context + 1 ref_mean = (13.38236332, 13.2698698, 13.45229626, 13.26469517, 13.46731281, 13.31649303) ref_stddev = (1.52104115, 1.27433181, 1.41266346, 1.27072334, 1.41251481, 1.28583682) p.per_bin_mean = ref_mean[:p.num_bins] p.per_bin_stddev = ref_stddev[:p.num_bins] mel_frontend = p.Instantiate() _, pcm = self._GetPcm() pcm *= 32768 # Convert to 4D [batch, time, packet, channels]. sample_count = tf.shape(pcm)[1] packet_size = 11 # A non-round number. trimmed_pcm = pcm[:, 0:(sample_count // packet_size) * packet_size] src_inputs = tf.reshape(trimmed_pcm, (1, -1, packet_size, 1)) paddings = tf.zeros(tf.shape(src_inputs)[0:2]) outputs = mel_frontend.FPropDefaultTheta( py_utils.NestedMap(src_inputs=src_inputs, paddings=paddings)) log_mel = outputs.src_inputs with self.session() as sess: log_mel = sess.run(log_mel) # log_mel ~ [batch, time, feature_size, channel] tf.logging.info('mel ~ %s', log_mel.shape) # Squeeze the batch and channel dimensions out. log_mel = np.squeeze(log_mel, axis=(0, 3)) t = log_mel.shape[0] mu = np.sum(log_mel, axis=0) / t d = log_mel - mu v = np.sum(d * d, axis=0) / (t - 1) s = np.sqrt(v) # Only take the base bin values: mu = mu[:p.num_bins] s = s[:p.num_bins] self.assertAllClose(mu, np.zeros_like(mu), atol=1e-4) self.assertAllClose(s, np.ones_like(s), atol=1e-3)
def testRepeatMoEFProp(self): """Test to verify RecurrentDenseBuilder.DecoderLayerStack(). Test without this change fails. """ batch_dim = 2 length_dim = 4 input_dim = 4 builder = gshard_builder.RecurrentDenseBuilder.Params().Set( model_dim=input_dim, num_devices=2, moe_hidden_dim=16, e_dim=2, attention_key_value_dim=input_dim, attention_num_heads=1, c_dim=2, emh_split=[-1, 0, -1, -1], ehm_split=[-1, 0, -1, -1]) b = builder.Instantiate() layers = [ b.DecSelfAttention('dec_self_attention'), b.MoE('moe', decoder=True) ] p = b.DecoderLayerStack('rep', layers, 2) with self.session(graph=tf.Graph()) as sess: tf.random.set_seed(2019) # we will reduce the length_dim by 2 dynamically. layer = p.Instantiate() inputs = tf.ones([batch_dim, length_dim, input_dim]) segment_ids = tf.ones([batch_dim, length_dim]) segment_pos = tf.ones([batch_dim, length_dim]) layer_inputs = py_utils.NestedMap(vec=inputs, segment_id=segment_ids, segment_pos=segment_pos, encoder_output=inputs, encoder_segment_id=segment_ids, encoder_segment_pos=segment_pos, aux_loss=tf.zeros([])) outputs = layer.FPropDefaultTheta(layer_inputs) sess.run(tf.global_variables_initializer()) sess.run(outputs)
def testForwardPass(self): with self.session(use_gpu=False): tf.random.set_seed(8372749040) p = self._EncoderParams() mt_enc = encoder.MTEncoderV1(p) batch = py_utils.NestedMap() batch.ids = tf.transpose(tf.reshape(tf.range(0, 8, 1), [4, 2])) batch.paddings = tf.zeros([2, 4]) enc_out = mt_enc.FPropDefaultTheta(batch).encoded tf.global_variables_initializer().run() actual_enc_out = enc_out.eval() expected_enc_out = [[[ 1.5309354e-06, -1.7816075e-07, 3.8047763e-06, -5.6422067e-07 ], [1.9017770e-06, -2.9778969e-06, -4.5083775e-06, -1.7054812e-06]], [[ -2.1852782e-06, -1.8208171e-06, -1.4747930e-06, -5.8206351e-06 ], [ 6.7667429e-07, -3.6828042e-06, -1.0916860e-05, -3.2522742e-06 ]], [[ -3.2333378e-07, 3.2147584e-06, 5.0556650e-07, -7.0188378e-07 ], [ -6.5340635e-07, 1.9502845e-06, -9.2459632e-06, 5.1955390e-06 ]], [[ 2.0232728e-06, 4.9331529e-06, 1.1346837e-06, 7.5571520e-06 ], [ -5.8475212e-07, 3.5547487e-06, -3.9037773e-06, 8.9575424e-06 ]]] self.assertAllClose(expected_enc_out, actual_enc_out)
def testBiEncoderForwardPass(self): with self.session(use_gpu=False): tf.random.set_seed(8372749040) p = self._BiEncoderParams() mt_enc = encoder.MTEncoderBiRNN(p) batch = py_utils.NestedMap() batch.ids = tf.transpose(tf.reshape(tf.range(0, 8, 1), [4, 2])) batch.paddings = tf.zeros([2, 4]) enc_out = mt_enc.FPropDefaultTheta(batch).encoded self.evaluate(tf.global_variables_initializer()) actual_enc_out = enc_out.eval() expected_enc_out = [[[4.0744379e-07, -2.0108675e-06], [-4.2056736e-06, 9.2221135e-06]], [[1.2086311e-06, -2.2510878e-07], [-2.2938407e-06, 9.3108029e-06]], [[3.4632390e-06, -3.1495360e-06], [9.1814104e-07, 1.9459947e-06]], [[-9.0593801e-08, -1.2912932e-06], [-5.8420886e-07, -6.5603672e-07]]] self.assertAllClose(expected_enc_out, actual_enc_out)