def testCondFC(self): b = builder_lib.ModelBuilderBase() p = b._CondFC('p', idims=10, adims=8, odims=12) l = p.Instantiate() y = l.FPropDefaultTheta(tf.random_uniform((3, 4, 5, 10)), tf.random_uniform((3, 4, 1, 8))) with self.session() as sess: sess.run(tf.global_variables_initializer()) actual_y = sess.run(y) self.assertAllEqual(actual_y.shape, (3, 4, 5, 12)) self.assertTrue(np.all(np.isfinite(actual_y)))
def testTransformBBoxes3DConsistentWithPoints(self): num_boxes, num_points = 20, 100 points = tf.random_uniform((num_points, 3)) bboxes_3d = tf.random_uniform((num_boxes, 7)) in_bboxes = geometry.IsWithinBBox3D(points, bboxes_3d) transforms = self._MakeTransformTestTranslationMatrices(1)[0] points_transformed = geometry.TransformPoints(points, transforms) bboxes_3d_transformed = geometry.TransformBBoxes3D(bboxes_3d, transforms) in_bboxes_transformed = geometry.IsWithinBBox3D(points_transformed, bboxes_3d_transformed) with self.session() as sess: actual_in_bboxes, actual_in_bboxes_transformed = sess.run( (in_bboxes, in_bboxes_transformed)) self.assertAllEqual(actual_in_bboxes, actual_in_bboxes_transformed)
def Rand(theta, state, inputs): del theta next_state = py_utils.NestedMap() next_state.value = ( state.value + inputs.coeff * tf.random_uniform(shape=[], dtype=state.value.dtype)) return next_state, py_utils.NestedMap()
def testDefaultParamsWithDynamicShape(self): p = cluster_factory.Cluster.Params() c = cluster_factory.Cluster(p) g = tf.Graph() vs = [] with g.as_default(): with tf.device(c.GetPlacer()): for i in range(10): dyn_shape = tf.constant([2], dtype=tf.int32) dyn_shape = tf.placeholder_with_default(dyn_shape, shape=[None]) v = tf.get_variable( 'x%d_wb/var' % i, initializer=tf.random_uniform(dyn_shape, dtype=tf.float64), validate_shape=False) vs.append(v) sum_all = tf.add_n(vs) for v in vs: self.assertEqual( v.device, c._MakeDeviceString( job_name='/job:localhost', task_id=0, device_name='CPU', device_id=0)) self.assertEqual( sum_all.device, c._MakeDeviceString( job_name='/job:localhost', task_id=0, device_name='CPU', device_id=0))
def _verify_timestep_counts(self, num_splits): num_micro_batches = 8 batch_size = 16 with self.session(graph=tf.Graph()) as sess: tf.set_random_seed(1245) inputs = tf.random_uniform([batch_size, 8, 8, 1], seed=12345) net = _BuildDummyPipelineCnn(num_splits=num_splits, num_micro_batches=num_micro_batches) endpoints = net.FPropDefaultTheta(inputs) if isinstance(endpoints, (list, tuple)): logits, aux_logits = endpoints else: logits = endpoints aux_logits = None loss = tf.reduce_mean(logits) grads = tf.gradients(loss, tf.trainable_variables()) grad_norm = tf.sqrt(py_utils.SumSquared(grads)) ts = net.GetAccumulatorValues().Flatten() sess.run(tf.global_variables_initializer()) grad_norm_val, ts_vals = sess.run([grad_norm, ts]) test_utils.CompareToGoldenSingleFloat(self, 0.268087, grad_norm_val) # Accumulator values should be equal to number of time steps in pipeline. for ts_val in list(ts_vals): expected_ts = num_micro_batches if num_splits > 1 else 1 self.assertEqual(ts_val, expected_ts) if aux_logits is not None: aux_logit_tensor = sess.run(aux_logits) self.assertEqual(aux_logit_tensor.shape, (batch_size, 8, 8, 1))
def SampleIds(self): p = self.params if p.cur_iter_in_seed: random_seed = p.random_seed * 2000 * self._cur_iter else: random_seed = p.random_seed * 2000 return tf.as_string( tf.random_uniform(p.target_shape[:1], seed=random_seed))
def testEinsumReplacementBxycBzxBzyc(self): with self.session(use_gpu=False, graph=tf.Graph()) as sess: a = tf.random_uniform(shape=[20, 7, 4, 3], minval=0, maxval=1, dtype=tf.float32) b = tf.random_uniform(shape=[20, 5, 7], minval=0, maxval=1, dtype=tf.float32) einsum = tf.einsum('bxyc,bzx->bzyc', a, b) p = spectrum_augmenter_on_device.SpectrumAugmenterOnDevice.Params() p.name = 'specAug_layers' specaug_layer = p.Instantiate() replacement = specaug_layer.EinsumBxycBzxBzyc(a, b) einsum, replacement = sess.run([einsum, replacement]) self.assertAllClose(einsum, replacement)
def _testOutShape(self, p, input_shape, expected_shape): batch_size, num_points, _ = input_shape g = tf.Graph() with g.as_default(): net = p.Instantiate() input_data = py_utils.NestedMap( points=tf.random_uniform((batch_size, num_points, 3)), features=tf.random_uniform(input_shape), padding=tf.zeros((batch_size, num_points), dtype=tf.float32), label=tf.random_uniform((batch_size, ), minval=0, maxval=16, dtype=tf.int32)) result = net.FPropDefaultTheta(input_data) with self.session(graph=g) as sess: sess.run(tf.global_variables_initializer()) np_result = sess.run(result) self.assertEqual(np_result.shape, expected_shape)
def testNeighborSquaredDistance(self): n, p1, k = 2, 10, 3 points = tf.random_uniform((n, p1, 3)) neighbor_idx = tf.random_uniform((n, p1, k), minval=0, maxval=p1, dtype=tf.int32) neighbor_points = car_lib.MatmulGather(points, neighbor_idx) sq_dist_result = car_lib.NeighborSquaredDistanceMatrix( points, neighbor_points) with self.session() as sess: [np_points, np_neighbor_idx, np_sq_dist_result ] = sess.run([points, neighbor_idx, sq_dist_result]) np_sq_dist_expected = self._np_sq_dis_neighbors( np_points, np_neighbor_idx) self.assertAllClose(np_sq_dist_result, np_sq_dist_expected)
def _Targets(self, target_shape): p = self.params if p.cur_iter_in_seed: self._cur_iter += 1 random_seed = p.random_seed * 2000 * self._cur_iter if p.fixed_target_ids is None: tids = tf.cast( tf.random_uniform(target_shape, seed=random_seed) * p.tokenizer.vocab_size, tf.int32) else: tids = p.fixed_target_ids assert tids.shape_as_list() == target_shape if p.fixed_target_labels is None: tlabels = tf.cast( tf.random_uniform(target_shape, seed=random_seed + 1) * p.tokenizer.vocab_size, tf.int32) tpaddings = tf.cast( tf.cumsum(tf.random_uniform( target_shape[:2], seed=p.random_seed + 1001 * self._cur_iter), axis=1) > 0.4 * target_shape[1], tf.float32) tpaddings = self._check_paddings(tpaddings) else: tlabels = p.fixed_target_labels assert tlabels.shape_as_list() == target_shape tpaddings = tf.constant(0.0, shape=target_shape) tweights = 1.0 - tpaddings d = { 'ids': tids, 'labels': tlabels, 'weights': tweights, 'paddings': tpaddings } if not p.for_mt: d['transcripts'] = tf.constant(p.target_transcript, shape=[target_shape[0]]) if p.align_label_with_frame: source_len = p.source_shape[1] d['alignments'] = tf.cast( tf.random_uniform(target_shape, seed=p.random_seed) * source_len, tf.int32) return d
def _Padding(): indices = tf.random_uniform([num_points_out - actual_num], minval=0, maxval=actual_num, dtype=tf.int32, seed=seed) padded = [] for t in tensor_list: padded.append(tf.concat([t, tf.gather(t, indices, axis=0)], axis=0)) return padded
def _MakeTransformTestTranslationMatrices(self, batch_size): # Make a batch of 4x4 transformation matrices that translate in all # directions. translation_matrices = [] for _ in range(batch_size): translation_matrix = tf.random_uniform([3, 1]) translation_matrix = tf.pad(translation_matrix, [[0, 1], [3, 0]]) translation_matrix += tf.diag([1., 1., 1., 1.]) translation_matrices.append(translation_matrix) transforms = tf.stack(translation_matrices, axis=0) return transforms
def _MakeTransformTestRotationMatrices(self, batch_size): # Make a batch of 4x4 transformation matrices that only has rotation around # the z-axis (world rotation). rot_matrices = [] for _ in range(batch_size): rot_matrix = geometry._MakeRotationMatrix(tf.random_uniform([]), 0., 0.) # Embed rotation matrix into a 4 x 4 matrix rot_matrix = tf.pad(rot_matrix, [[0, 1], [0, 1]]) + tf.diag([0, 0, 0, 1.]) rot_matrices.append(rot_matrix) transforms = tf.stack(rot_matrices, axis=0) return transforms
def testDecoderWithOrientedPerClassNMS(self): batch_size = 4 num_preds = 8 num_classes = 10 # An example of setting the score threshold high and IOU threshold low # for classes we don't care about score_threshold = [1.0] * num_classes score_threshold[1] = 0.05 nms_iou_threshold = [0.0] * num_classes nms_iou_threshold[1] = 0.5 with tf.Graph().as_default(): tf.set_random_seed(12345) predicted_bboxes = tf.random_normal([batch_size, num_preds, 7]) classification_scores = tf.random_uniform( [batch_size, num_preds, num_classes], minval=0, maxval=1) bboxes, bbox_scores, valid_mask = detection_decoder.DecodeWithNMS( predicted_bboxes, classification_scores, nms_iou_threshold=nms_iou_threshold, score_threshold=score_threshold, use_oriented_per_class_nms=True) with self.session() as sess: outputs = sess.run([ predicted_bboxes, classification_scores, bboxes, bbox_scores, valid_mask ]) (input_bboxes, input_scores, output_bboxes, output_scores, mask) = outputs self.assertEqual((batch_size, num_preds, 7), input_bboxes.shape) self.assertEqual((batch_size, num_classes, num_preds, 7), output_bboxes.shape) self.assertEqual((batch_size, num_preds, num_classes), input_scores.shape) self.assertEqual((batch_size, num_classes, num_preds), output_scores.shape) self.assertEqual((batch_size, num_classes, num_preds), mask.shape) # Assert that NMS did some kind of filtering for each class for cls_idx in range(num_classes): self.assertEqual(mask[:, cls_idx, :].sum(), (input_scores[:, :, cls_idx] > score_threshold[cls_idx]).sum()) self.assertEqual(mask[:, cls_idx, :].sum(), (output_scores[:, cls_idx, :] > score_threshold[cls_idx]).sum())
def testTransformPointsRotation(self): batch_size, num_points = 10, 8 points = tf.random_uniform((batch_size, num_points, 3)) transforms = self._MakeTransformTestRotationMatrices(batch_size) points_transformed = geometry.TransformPoints(points, transforms) with self.session() as sess: actual_points, actual_points_transformed = sess.run( (points, points_transformed)) # Points are the same on the z-axis (no rotation). self.assertAllClose(actual_points[:, :, 2], actual_points_transformed[:, :, 2]) # Points are transformed, and different. self.assertNotAllClose(actual_points, actual_points_transformed)
def testTransformPointsTranslation(self): batch_size, num_points = 10, 8 points = tf.random_uniform((batch_size, num_points, 3)) transforms = self._MakeTransformTestTranslationMatrices(batch_size) points_transformed = geometry.TransformPoints(points, transforms) with self.session() as sess: actual_points, actual_points_transformed, actual_transforms = sess.run( (points, points_transformed, transforms)) # Points are transformed, and different. self.assertNotAllClose(actual_points, actual_points_transformed) # Manually transform points and check that they are as expected. actual_translation = actual_transforms[:, :3, 3] self.assertAllClose(actual_points + actual_translation[:, np.newaxis, :], actual_points_transformed)
def testWrapAngleRad(self): angles = tf.random_uniform([100], minval=-100., maxval=100., dtype=tf.float32) wrapped_angles = geometry.WrapAngleRad(angles) with self.session() as sess: actual_angles, actual_wrapped_angles = sess.run((angles, wrapped_angles)) # The sine values of the angles should remain the same after wrapping. self.assertAllClose( np.sin(actual_angles), np.sin(actual_wrapped_angles), atol=1e-5) # Check ranges match the wrapped expectations. self.assertTrue(np.all(actual_wrapped_angles >= -np.pi)) self.assertTrue(np.all(actual_wrapped_angles <= np.pi))
def GenerateStepSeedPair(p, unused_global_step=None, op_seed=None): """Override py_utils.GenerateStepSeedPair to use GetOverWriteGlobalStep.""" seed_dtype = tf.int32 if py_utils.use_tpu() else tf.int64 if p.is_inference and p.random_seed is None: # Unlike tf.random*, stateless random ops are completely determined by the # passed-in seeds. This means at inference time the same inputs will produce # the same outputs, even if the model is supposed to have randomness such as # dropout during inference. We inject additional randomness only during # inference if the graph is exported with random_seed=None as a workaround. return tf.random_uniform([2], maxval=seed_dtype.max, dtype=seed_dtype) with tf.name_scope('op_seed') as scope: global_step = tf.cast(GetOverWriteGlobalStep(), seed_dtype) step_seed = tf.cast(py_utils.GenerateSeedFromName(scope), seed_dtype) seeds = tf.stack([global_step, step_seed]) if p.random_seed is not None: seeds += p.random_seed if op_seed is not None: seeds += op_seed return seeds
def testDecoderSingleClassNMS(self): batch_size = 4 num_preds = 8 num_classes = 10 score_threshold = 0.05 nms_iou_threshold = 0.5 with tf.Graph().as_default(): tf.set_random_seed(12345) predicted_bboxes = tf.random_normal([batch_size, num_preds, 7]) classification_scores = tf.random_uniform( [batch_size, num_preds, num_classes], minval=0, maxval=1) bboxes, bbox_scores, valid_mask = detection_decoder.DecodeWithNMS( predicted_bboxes, classification_scores, nms_iou_threshold=nms_iou_threshold, score_threshold=score_threshold, use_oriented_per_class_nms=False) with self.session() as sess: outputs = sess.run([ predicted_bboxes, classification_scores, bboxes, bbox_scores, valid_mask ]) (input_bboxes, input_scores, output_bboxes, output_scores, mask) = outputs self.assertEqual((batch_size, num_preds, 7), input_bboxes.shape) self.assertEqual((batch_size, num_classes, num_preds, 7), output_bboxes.shape) self.assertEqual((batch_size, num_preds, num_classes), input_scores.shape) self.assertEqual((batch_size, num_classes, num_preds), output_scores.shape) self.assertEqual((batch_size, num_classes, num_preds), mask.shape)
def testDummyPipelineCnnNestedMapInput(self): batch_size = 16 num_layers = 4 cells = [] with self.session(graph=tf.Graph()) as sess: for i in range(num_layers): cells.append(_SimpyLayerWithNestedMapInput.Params().Set( name='layer_{}'.format(i))) p = PipeliningLayer.Params().Set( name='pipeline', num_micro_batches=8, micro_batch_size=2, nested_map_fprop=True, cell_tpl=cells, before_tpl=[]) layer = p.Instantiate() tf.set_random_seed(1245) inputs = tf.random_uniform([batch_size, 8, 8, 1], seed=12345) outputs = layer.FPropDefaultTheta( py_utils.NestedMap(vec=inputs, paddings=None)) sess.run(tf.global_variables_initializer()) sess.run(outputs.vec) self.assertEqual(outputs.vec.shape, (batch_size, 8, 8, 1))
def testTransformBBoxes3D(self): batch_size, num_boxes = 10, 20 bboxes_3d = tf.random_uniform((batch_size, num_boxes, 7)) transforms = self._MakeTransformTestTranslationMatrices(batch_size) bboxes_3d_transformed = geometry.TransformBBoxes3D(bboxes_3d, transforms) with self.session() as sess: actual_bboxes_3d, actual_bboxes_3d_transformed = sess.run( (bboxes_3d, bboxes_3d_transformed)) self.assertAllEqual(actual_bboxes_3d.shape, actual_bboxes_3d_transformed.shape) # Dimensions (slice 3:6) should remain unchanged. self.assertAllClose(actual_bboxes_3d[..., 3:6], actual_bboxes_3d_transformed[..., 3:6]) # Rotation should remain unchanged. self.assertAllClose(actual_bboxes_3d[..., 6], actual_bboxes_3d_transformed[..., 6]) # Center xyz should be different. self.assertNotAllClose(actual_bboxes_3d[..., :3], actual_bboxes_3d_transformed[..., :3])
def _TestSaveRestoreHelper(self, direction): """Test opaque params stay 'equivalent' after save-restore.""" input_dim = 4 cell_dim = 3 with tf.variable_scope('s1'): params_size_t = self._ParamsSize(input_dim, cell_dim, direction) params = tf.get_variable('cudnn_params', initializer=tf.random_uniform( [params_size_t]), validate_shape=False) reset_params_op = tf.assign(params, tf.zeros_like(params)) cur_scope_name = tf.get_variable_scope().name saveable = self._CreateSaveable(params, input_dim, cell_dim, direction, cur_scope_name) canonical_wts, canonical_bs = ( saveable.format_converter._opaque_to_cu_canonical( saveable._variables)) saver = saver_lib.Saver() with self.session(use_gpu=True) as sess: sess.run(tf.global_variables_initializer()) save_path = os.path.join(self.get_temp_dir(), 'save-restore-unidi') saver.save(sess, save_path) canonical_wts_v, canonical_bs_v = sess.run( [canonical_wts, canonical_bs]) with self.session(use_gpu=False) as sess: sess.run(tf.global_variables_initializer()) sess.run(reset_params_op) saver.restore(sess, save_path) canonical_wts_v_restored, canonical_bs_v_restored = sess.run( [canonical_wts, canonical_bs]) # Weight porition of the opaque params are exactly the same. For biases # porition, it's expected that the sum of biases each gate stays the same. self._CompareWeights(canonical_wts_v, canonical_wts_v_restored) self._CompareBiases(canonical_bs_v, canonical_bs_v_restored, direction)
def testBeamSearchOp(self): b_size = 8 num_beams = 2 seq_len = 6 num_classes = 5 best_scores_expected = [1.769434, 1.640316] cum_scores_expected = [ 1.823942, 1.609159, 1.610366, 1.454234, 1.348811, 1.3167, 1.346274, 1.045735 ] scores_expected = [ [ 0.86230338, 0.84442794, 0.45372832, 0.38127339, 0.42067075, 0.25818801, 0.38612545, 0.18693292 ], [ 0.96163845, 0.76473117, 0.74806261, 0.60980642, 0.9281404, 0.47227204, 0.89254606, 0.20130682 ], [0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0.], ] hyps_expected = [[1, 0, 0, 3, 4, 1, 3, 4], [1, 4, 4, 1, 1, 3, 1, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0]] prev_hyps_expected = [[0, 1, 0, 1, 0, 1, 0, 1], [0, 1, 0, 1, 4, 1, 2, 1], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0]] hyp_str_expected = """ beam_id: 1 ids: 1 ids: 2 scores: 0.25818801 scores: 0.65319967 atten_vecs { prob: 0.38612545 prob: 0.42067075 prob: 0.84442794 } atten_vecs { prob: 0.45298624 prob: 0.53518069 prob: 0.57700801 } """ atten_probs_expected = [ [ [0.45372832, 0.86230338, 0.65504861], [0.38612545, 0.42067075, 0.84442794], [0.45372832, 0.86230338, 0.65504861], [0.38612545, 0.42067075, 0.84442794], [0.45372832, 0.86230338, 0.65504861], [0.38612545, 0.42067075, 0.84442794], [0.45372832, 0.86230338, 0.65504861], [0.38612545, 0.42067075, 0.84442794], ], [ [0.45372832, 0.86230338, 0.65504861], [0.38612545, 0.42067075, 0.84442794], [0.45372832, 0.86230338, 0.65504861], [0.38612545, 0.42067075, 0.84442794], [0.0532794, 0.53777719, 0.07609642], [0.38612545, 0.42067075, 0.84442794], [0.25818801, 0.03645897, 0.38127339], [0.38612545, 0.42067075, 0.84442794], ], [[0., 0., 0.], [0., 0., 0.], [0., 0., 0.], [0., 0., 0.], [0., 0., 0.], [0., 0., 0.], [0., 0., 0.], [0., 0., 0.]], [[0., 0., 0.], [0., 0., 0.], [0., 0., 0.], [0., 0., 0.], [0., 0., 0.], [0., 0., 0.], [0., 0., 0.], [0., 0., 0.]], [[0., 0., 0.], [0., 0., 0.], [0., 0., 0.], [0., 0., 0.], [0., 0., 0.], [0., 0., 0.], [0., 0., 0.], [0., 0., 0.]], [[0., 0., 0.], [0., 0., 0.], [0., 0., 0.], [0., 0., 0.], [0., 0., 0.], [0., 0., 0.], [0., 0., 0.], [0., 0., 0.]], ] scores = [ tf.random_uniform([b_size, num_classes], seed=12345), tf.random_uniform([b_size, num_classes], seed=12346), ] init_atten_probs = tf.random_uniform([b_size, 3], seed=12345) atten_probs = tf.zeros([seq_len, b_size, 3]) done_hyps = self._testBeamSearchOpHelper( b_size, num_beams, seq_len, 0., scores, init_atten_probs, atten_probs, best_scores_expected, cum_scores_expected, scores_expected, hyps_expected, prev_hyps_expected, atten_probs_expected) self._SameHyp(hyp_str_expected, done_hyps[1, 5])
def Rand(shape): return tf.random_uniform(shape, minval=-0.2, maxval=0.2, dtype=tf.float64)
def RandWithCoeff(coeff): return coeff * tf.random_uniform(shape=[], dtype=coeff.dtype)
def _TimeMask(self, inputs, seq_lengths, noisify=False, gaussian_noise=False, dtype=tf.float32, domain_id_index=0): """Applies time masking with given degree to inputs. Args: inputs: Batch of input features of shape (batch_size, time_length, num_freq, channels). seq_lengths: The actual sequence lengths which mask been sampled of shape (batch_size,). noisify: Whether to noisify the masked out regions. gaussian_noise: Whether to use gaussian noise when noisifying. dtype: Data type. domain_id_index: domain id index. Returns: Inputs with random time masking applied. """ p = self.params # Get time masking parameters. time_mask_max_frames = p.time_mask_max_frames[domain_id_index] time_masks_per_frame = p.time_masks_per_frame[domain_id_index] use_dynamic_time_mask_max_frames = \ p.use_dynamic_time_mask_max_frames[domain_id_index] multiplicity = p.time_mask_count[domain_id_index] max_ratio = p.time_mask_max_ratio[domain_id_index] # If maximum mask length is zero, do nothing. if ((time_mask_max_frames == 0 and not use_dynamic_time_mask_max_frames) or max_ratio <= 0.0): return inputs if multiplicity == 0: return inputs seq_lengths = tf.cast(seq_lengths, tf.int32) batch_size, time_length, _, _ = py_utils.GetShape(inputs) # When using dynamic time mask size, discard upper-bound on # maximum allowed frames for time mask. if use_dynamic_time_mask_max_frames: time_mask_max_frames = None # Create masks in time direction and apply. block_arrays = self._GetMask( batch_size, choose_range=seq_lengths, mask_size=time_length, max_length=time_mask_max_frames, masks_per_frame=time_masks_per_frame, multiplicity=multiplicity, dtype=dtype, max_ratio=max_ratio) outputs = tf.einsum( 'bxyc,bx->bxyc', inputs, block_arrays, name='einsum_formasking') if noisify: # Sample noise with standard deviation with factor * 0.1 + 0.0001 # TODO(ngyuzh): Make sure this won't affect EOS. if gaussian_noise: stddev = 1.0 else: factor = tf.random_uniform((), minval=1.0, maxval=2.0, dtype=dtype, seed=p.random_seed) stddev = factor * 0.1 + 0.0001 noise = tf.random.normal( [tf.shape(inputs)[0], tf.shape(inputs)[1], tf.shape(inputs)[2]], stddev=stddev, seed=p.random_seed) if p.fprop_dtype is not None and p.fprop_dtype != p.dtype: noise = tf.cast(noise, p.fprop_dtype) outputs_mask = tf.einsum( 'bxy,bx->bxy', noise, 1.0 - block_arrays, name='einsum_fornoisymasking') outputs = outputs + tf.expand_dims(outputs_mask, -1) return outputs
def _TimeMask(self, inputs, seq_lengths, max_ratio=1.0, time_length=2560, noisify=False, dtype=tf.float32): """Applies time masking with given degree to inputs. Args: inputs: Batch of input features of shape (batch_size, time_length, num_freq, channels). seq_lengths: The actual sequence lengths which mask been sampled of shape (batch_size,). max_ratio: Maximum portion of the utterance allowed to be time-masked. time_length: Total length of time series. noisify: whether to noisify the masked out regions. dtype: Data type. Returns: Inputs with random time masking applied. """ p = self.params # If maximum mask length is zero, do nothing if (p.time_mask_max_frames == 0 and not p.use_dynamic_time_mask_max_frames): return inputs seq_lengths = tf.cast(seq_lengths, tf.int32) batch_size = tf.shape(inputs)[0] # Choose random masked length if p.use_dynamic_time_mask_max_frames: # TODO(ngyuzh): if an utterance is too short, it will never been masked. length_range = tf.cast(seq_lengths, dtype=tf.float32) * max_ratio max_length = tf.cast( tf.random.uniform( (batch_size, ), maxval=1.0, seed=p.random_seed) * length_range, tf.int32) else: max_length = tf.random.uniform((batch_size, ), maxval=p.time_mask_max_frames, dtype=tf.int32, seed=p.random_seed) # Create masks in time direction and apply block_arrays = self._GetMask(batch_size, max_length, choose_range=seq_lengths, mask_size=time_length, dtype=dtype, max_ratio=max_ratio) outputs = tf.einsum('bxyc,bx->bxyc', inputs, block_arrays, name='einsum_formasking') if noisify: # Sample noise with standard deviation with factor * 0.1 + 0.0001 # TODO(ngyuzh): Make sure this won't affect EOS. factor = tf.random_uniform((), minval=1.0, maxval=2.0, dtype=dtype, seed=p.random_seed) stddev = factor * 0.1 + 0.0001 noise = tf.random.normal([ tf.shape(inputs)[0], tf.shape(inputs)[1], tf.shape(inputs)[2] ], stddev=stddev, seed=p.random_seed) if p.fprop_dtype is not None and p.fprop_dtype != p.dtype: noise = tf.cast(noise, p.fprop_dtype) outputs_mask = tf.einsum('bxy,bx->bxy', noise, 1.0 - block_arrays, name='einsum_fornoisymasking') outputs = outputs + tf.expand_dims(outputs_mask, -1) return outputs
def NeighborhoodIndices(points, query_points, k, points_padding=None, max_distance=None, sample_neighbors_uniformly=False): """Get indices to k-neighbors of query_points in points. Padding is returned along-side indices. Non-padded points are guaranteed to be unique (non-repeated) points from original non-padded points. Padded points arise due to either a lack of points (k exceeds the number of original non-padded points) or points are too far away (exceeds max distance). Note: Padded point indices may refer to padded points from the original, or may be duplicates of the closest point. TODO(weihan,jngiam): PointCNN implementation makes an assumption that padded points are repeated points from the original points. This behavior is maintained here, but we should update PointCNN to respect indices paddings. Args: points: tensor of shape [N, P1, dims]. query_points: tensor of shape [N, P2, dims] k: Integer. points_padding: optional tensor of shape [N, P1] containing True/1.0 iff the point is a padded point. if None, then all points are considered real points. max_distance: float representing the maximum distance that each neighbor can be. If there are no points within the distance, then the closest point is returned (regardless of distance). If this is set to None, then no filtering by distance is performed. sample_neighbors_uniformly: boolean specifying whether to sample neighbors uniformly if they are within max distance. Returns: A pair of tensors: - indices: tensor of shape [N, P2, k]. - padding: tensor of shape [N, P2, k] where 1 represents a padded point, and 0 represents an unpadded (real) point. """ n, p1 = py_utils.GetShape(points, 2) query_points = py_utils.HasShape(query_points, [n, -1, -1]) _, p2 = py_utils.GetShape(query_points, 2) # Compute pair-wise squared distances. # Note that dist_mat contains the squared distance (without sqrt). Thus, when # using max_distance, we will need to square max_distance to make sure it's # in the same units. dist_mat = SquaredDistanceMatrix(query_points, points) dist_mat = py_utils.HasShape(dist_mat, [n, p2, p1]) # Add a large scalar to the distances for padded points. # dist_mat[i, j, k] will be: # if k < valid_num[i]: distance between points[i, k] and query_points[i, j] # otherwise: a large scalar added to dist_mat[i, j, k] if points_padding is not None: points_padding = tf.cast(tf.expand_dims(points_padding, 1), tf.float32) points_padding = py_utils.HasShape(points_padding, [n, 1, p1]) large_scalar = tf.reduce_max(dist_mat) + 1 dist_mat += points_padding * large_scalar # To perform sampling neighbors uniformly efficiently, we set all neighbors # that are within the distance threshold to have distances be drawn uniformly # at random. Using top_k with this enables selecting a random set quickly # without replacement. if sample_neighbors_uniformly: if max_distance is not None: mask_by_distance = tf.less_equal(dist_mat, max_distance**2) dist_mat = tf.where( mask_by_distance, tf.square(max_distance) * tf.random_uniform(tf.shape(dist_mat)), dist_mat) else: raise ValueError( 'Uniform sampling requires specifying max_distance.') top_k_dist, indices = tf.nn.top_k(-dist_mat, k=k, sorted=True) # N x P2 x K # Set padding using top_k_dist; padded points will have distance exceeding # the large_scalar. if points_padding is not None: paddings = tf.greater_equal(-top_k_dist, large_scalar) else: paddings = tf.zeros_like(top_k_dist, dtype=tf.bool) # Filter by max_distances by setting all indices that exceed the max_distance # to the closest point. if max_distance is not None: # Mask is true for points that are further than max_distance. mask_by_distance = tf.greater(-top_k_dist, tf.square(max_distance)) closest_idx = tf.tile(indices[:, :, :1], [1, 1, k]) indices = tf.where(mask_by_distance, closest_idx, indices) paddings |= mask_by_distance indices = tf.reshape(indices, [n, p2, k]) paddings = tf.cast(paddings, tf.float32) return indices, paddings
def testTopKTerminatedHypsOp(self): with self.session(use_gpu=False) as sess: b_size = 8 num_beams = 2 num_hyps_per_beam = b_size / num_beams seq_len = 6 scores = tf.random_uniform([b_size, 5], seed=12345) atten_probs = tf.random_uniform([b_size, 3], seed=12345) src_seq_lengths = [3, 3] best_scores = tf.zeros([num_beams]) cumulative_scores = tf.zeros([b_size]) in_scores = tf.zeros([seq_len, b_size]) in_hyps = tf.zeros([seq_len, b_size], dtype=tf.int32) in_prev_hyps = tf.zeros([seq_len, b_size], dtype=tf.int32) in_done_hyps = tf.as_string( tf.zeros([seq_len, b_size], dtype=tf.int32)) in_atten_probs = tf.zeros([seq_len, b_size, 3]) (out_best_scores_0, out_cumulative_scores_0, out_scores_0, out_hyps_0, out_prev_hyps_0, out_done_hyps_0, out_atten_probs_0, _) = ops.beam_search_step(scores, atten_probs, best_scores, cumulative_scores, in_scores, in_hyps, in_prev_hyps, in_done_hyps, in_atten_probs, [], 0, eos_id=2, beam_size=3.0, num_hyps_per_beam=num_hyps_per_beam) outputs = ops.beam_search_step(scores, atten_probs, out_best_scores_0, out_cumulative_scores_0, out_scores_0, out_hyps_0, out_prev_hyps_0, out_done_hyps_0, out_atten_probs_0, [], 1, eos_id=2, beam_size=3.0, num_hyps_per_beam=num_hyps_per_beam) # Get the topk terminated hyps. in_done_hyps = outputs[5] topk_hyps = ops.top_k_terminated_hyps( in_done_hyps, src_seq_lengths, k=2, num_hyps_per_beam=num_hyps_per_beam, length_normalization=0.2, coverage_penalty=0.2, target_seq_length_ratio=1.0) seq_ids, seq_lens, seq_scores = ops.unpack_hyp(tf.reshape( topk_hyps, [-1]), max_seq_length=5) k1, k2, k3, k4 = sess.run( [topk_hyps, seq_ids, seq_lens, seq_scores]) print(np.array_repr(k1)) assert k1.size == 4 expected_top1_for_beam_0 = """ beam_id: 0 ids: 1 ids: 2 scores: 0.86230338 scores: 0.65504861 atten_vecs { prob: 0.45372832 prob: 0.86230338 prob: 0.65504861 } atten_vecs { prob: 0.45372832 prob: 0.86230338 prob: 0.65504861 } normalized_score: 1.002714 """ expected_top2_for_beam_1 = """ beam_id: 1 ids: 3 ids: 2 scores: 0.38127339 scores: 0.57700801 atten_vecs { prob: 0.38612545 prob: 0.42067075 prob: 0.84442794 } atten_vecs { prob: 0.18693292 prob: 0.17821217 prob: 0.66380036 } normalized_score: 0.480028 """ self._SameHyp(expected_top1_for_beam_0, k1[0, 0]) self._SameHyp(expected_top2_for_beam_1, k1[1, 1]) self.assertAllClose(k2, [[1, 2, 0, 0, 0], [4, 2, 0, 0, 0], [4, 2, 0, 0, 0], [3, 2, 0, 0, 0]]) self.assertAllClose(k3, [2, 2, 2, 2]) self.assertAllClose(k4, [1.002714, 0.684296, 0.522484, 0.480028])