def testDecNotVisible(self): def _Notvisible(seg_id, seg_pos): a, b = tf.expand_dims(seg_id, -1), tf.expand_dims(seg_id, -2) return tf.cast( tf.math.logical_or( tf.less(tf.expand_dims(seg_pos, -1), tf.expand_dims(seg_pos, -2)), tf.math.logical_or( tf.not_equal(a, b), tf.math.logical_not( tf.math.logical_or(tf.cast(a, tf.bool), tf.cast(b, tf.bool))))), tf.float32) builder = gshard_builder.DenseBuilder.Params().Set( dtype=tf.float32).Instantiate() graph = tf.Graph() with graph.as_default(): segment_ids = tf.convert_to_tensor([[1, 1, 1, 1]], dtype=tf.int32) segment_pos = tf.convert_to_tensor([[1, 2, 3, 4]], dtype=tf.int32) y = builder._DecNotVisible(segment_ids, segment_pos) y2 = _Notvisible(segment_ids, segment_pos) with self.session(graph=graph) as sess: sess.run(tf.global_variables_initializer()) y_val, y2_val = sess.run([y, y2]) self.assertAllEqual(y_val, y2_val)
def testSourceTargetValues(self): max_length = 50 p = self._CreatePunctuatorInputParams() with self.session(use_gpu=False): inp = input_generator.PunctuatorInput(p) fetched = py_utils.NestedMap( self.evaluate(inp.GetPreprocessedInputBatch())) source_ids = fetched.src.ids tgt_ids = fetched.tgt.ids tgt_labels = fetched.tgt.labels expected_ref = ( b'Elk calling -- a skill that hunters perfected long ago to lure ' b'game with the promise of a little romance -- is now its own sport .' ) normalized_ref = expected_ref.lower().translate( None, string.punctuation.encode('utf-8')) normalized_ref = b' '.join(normalized_ref.split()) _, expected_src_ids, _ = self.evaluate( inp.tokenizer.StringsToIds(tf.convert_to_tensor( [normalized_ref]), max_length=max_length)) expected_tgt_ids, expected_tgt_labels, _ = self.evaluate( inp.tokenizer.StringsToIds(tf.convert_to_tensor([expected_ref ]), max_length=max_length)) self.assertAllEqual(expected_src_ids[0], source_ids[0, :max_length]) self.assertAllEqual(expected_tgt_ids[0], tgt_ids[0, :max_length]) self.assertAllEqual(expected_tgt_labels[0], tgt_labels[0, :max_length])
def _GetInputs(self, batch_size, max_seqlen, input_dim, full_seq=False): # Prepares inputs. np.random.seed(None) if self.input_rank == 3: inputs = np.random.normal( 0.5, 1, [batch_size, max_seqlen, input_dim]).astype(np.float32) else: assert self.input_rank == 4 inputs = np.random.normal( 0.5, 1, [batch_size, max_seqlen, 1, input_dim]).astype(np.float32) print(f'np.sum(inputs): {np.sum(inputs)}') inputs = tf.convert_to_tensor(inputs) if not full_seq: seqlen = np.random.randint( low=max_seqlen // 2, high=max_seqlen + 1, size=(batch_size,), dtype=np.int32) else: seqlen = np.full((batch_size,), max_seqlen, dtype=np.int32) print(f'seqlen: {seqlen}') seqlen = tf.convert_to_tensor(seqlen) paddings = py_utils.PaddingsFromLengths(seqlen, max_seqlen) return inputs, paddings
def testManyHotLabels(self): batch_size = 7 num_classes = 400 num_positive = 5 # To help keep the test simple, we put the positive labels on the # first 'num_positive' classes in every example. labels = np.zeros((batch_size, num_classes), np.float32) labels[:, :num_positive] = 1.0 logits = np.random.uniform(size=labels.shape).astype( np.float32) * 10 + 1e7 losses = label_lib.MultiLabelContrastiveLoss( tf.convert_to_tensor(labels, dtype=tf.float32), tf.convert_to_tensor(logits, dtype=tf.float32)) # Verify that the multi-label loss is equivalent to the average softmax # cross entropy of each positive pair vs. all negative pairs. negative_pair_logits = logits[:, num_positive:] one_vs_all_labels = np.zeros( (batch_size, num_classes - num_positive + 1), np.float32) one_vs_all_labels[:, 0] = 1 expected_loss_terms = [] for i in range(num_positive): one_vs_all_logits = np.concatenate( [logits[:, i:(i + 1)], negative_pair_logits], axis=1) expected_loss_terms.append( tf.nn.softmax_cross_entropy_with_logits( labels=one_vs_all_labels, logits=one_vs_all_logits)) expected_loss = tf.add_n(expected_loss_terms) / num_positive self.assertAllClose(expected_loss, losses)
def testSourceTargetValues(self): max_length = 50 p = self._CreatePunctuatorInputParams() with self.session(use_gpu=False) as sess: inp = input_generator.PunctuatorInput(p) tokenizer = inp.tokenizer_dict[ base_input_generator.DEFAULT_TOKENIZER_KEY] fetched = py_utils.NestedMap( sess.run(inp.GetPreprocessedInputBatch())) source_ids = fetched.src.ids tgt_ids = fetched.tgt.ids tgt_labels = fetched.tgt.labels expected_ref = ( b'His approach was inquisitive , a meeting of artful ' b'hesitation with fluid technique .') normalized_ref = expected_ref.lower().translate( None, string.punctuation.encode('utf-8')) normalized_ref = b' '.join(normalized_ref.split()) _, expected_src_ids, _ = sess.run( tokenizer.StringsToIds(tf.convert_to_tensor([normalized_ref]), max_length=max_length)) expected_tgt_ids, expected_tgt_labels, _ = sess.run( tokenizer.StringsToIds(tf.convert_to_tensor([expected_ref]), max_length=max_length)) self.assertAllEqual(expected_src_ids[0], source_ids[0, :max_length]) self.assertAllEqual(expected_tgt_ids[0], tgt_ids[0, :max_length]) self.assertAllEqual(expected_tgt_labels[0], tgt_labels[0, :max_length])
def test_max_assign_no_epsilon_scaling(self): score = tf.convert_to_tensor([[1.0, 1.5], [0.5, 1.1]]) row_sums = tf.convert_to_tensor([1.0, 1.0]) col_sums = tf.convert_to_tensor([1.0, 1.0]) upper_bound = tf.convert_to_tensor([[0.0, 1.0], [1.0, 1.0]]) score = score[tf.newaxis] row_sums = row_sums[tf.newaxis] col_sums = col_sums[tf.newaxis] upper_bound = upper_bound[tf.newaxis] results = differentiable_assignment.max_assignment( score, elementwise_upper_bound=upper_bound, row_sums=row_sums, col_sums=col_sums, epsilon=1e-3, num_iterations=800, use_epsilon_scaling=False) assignment, used_iter, eps, delta = results correct_assignment = [[0.0, 1.0], [1.0, 0.0]] print("") print("Test case 3:") print("Used iter:", used_iter) print("Last eps:", eps) print("Last delta:", delta) print("Assignment:", assignment[0]) self.assertShapeEqual(np.ones((1, 2, 2)), assignment) self.assertNDArrayNear(assignment[0], correct_assignment, err=1e-2)
def testStackingStreamStepRightContext(self): tf.random.set_seed(2021) batch_size, max_seqlen, input_dim, kernel = 2, 16, 8, 3 left_context, right_context = 6, 3 num_heads, ffn_dim = 2, 4 stride = 1 num_layers = 3 num_groups = 2 # Prepares inputs. np.random.seed(None) inputs = np.random.normal( 0.1, 1, [batch_size, max_seqlen, input_dim]).astype(np.float32) print(f'np.sum(inputs): {np.sum(inputs)}') inputs = tf.convert_to_tensor(inputs) seqlen = np.random.randint(low=max_seqlen // 2, high=max_seqlen + 1, size=(batch_size, ), dtype=np.int32) print(f'seqlen: {seqlen}') seqlen = tf.convert_to_tensor(seqlen) paddings = py_utils.PaddingsFromLengths(seqlen, max_seqlen) p = conformer_layer.ConformerLayer.CommonParams( input_dim=input_dim, is_causal=True, layer_order='conv_before_mhsa', atten_num_heads=num_heads, atten_left_context=left_context, atten_right_context=right_context, use_relative_atten=False, fflayer_hidden_dim=ffn_dim, kernel_size=kernel) p.lconv_tpl.conv_norm_layer_tpl = bn_layers.GroupNormLayer.Params( ).Set(num_groups=num_groups, cumulative=True) p.params_init = py_utils.WeightInit.Xavier(scale=1.0, seed=0) ps = [p.Copy().Set(name=f'base{i}') for i in range(num_layers)] layers = [x.Instantiate() for x in ps] base_outputs = self._BuildStackingBaseGraph(layers, num_layers, inputs, paddings) outputs = self._BuildStackingStreamGraph(layers, num_layers, inputs, paddings, stride) init_op = tf.global_variables_initializer() with self.session(use_gpu=False) as sess: sess.run(init_op) expected, actual = sess.run([base_outputs, outputs]) print(f'expected: {repr(expected)}, {expected.shape}') print(f'actual: {repr(actual)}, {actual.shape}') print(f'np.sum(np.abs(expected)): {np.sum(np.abs(expected))}') print(f'np.sum(np.abs(actual)): {np.sum(np.abs(actual))}') self.assertAllClose(expected, actual, atol=2e-6, rtol=2e-6) self.assertEqual(tuple(expected.shape), (batch_size, max_seqlen, input_dim))
def testSequenceAppendToken(self): x = np.asarray([[1, 2, 3, 0], [1, 2, 3, 4], [0, 0, 0, 0], [1, 0, 0, 0]], np.int32) x_paddings = np.asarray( [[0, 0, 0, 1], [0, 0, 0, 1], [1, 1, 1, 1], [0, 1, 1, 1]], np.float32) with self.session(): x_appended, x_appended_paddings = insertion.SequenceAppendToken( tf.convert_to_tensor(x), tf.convert_to_tensor(x_paddings), 10) x_appended, x_appended_paddings = self.evaluate([ tf.convert_to_tensor(x_appended), tf.convert_to_tensor(x_appended_paddings) ]) # `x_appended_gold` is the same as `x` w/ token `10` appended. # `x_appended_paddings_gold` is the corresponding paddings. x_appended_gold = np.asarray( [[1, 2, 3, 10], [1, 2, 3, 10], [10, 0, 0, 0], [1, 10, 0, 0]], np.int32) x_appended_paddings_gold = np.asarray( [[0, 0, 0, 0], [0, 0, 0, 0], [0, 1, 1, 1], [0, 0, 1, 1]], np.float32) self.assertAllEqual(x_appended, x_appended_gold) self.assertAllEqual(x_appended_paddings, x_appended_paddings_gold)
def testSequenceAppendTokenExtend(self): x = np.asarray([[1, 2, 3, 0], [1, 2, 3, 4], [0, 0, 0, 0], [1, 0, 0, 0]], np.int32) x_paddings = np.asarray( [[0, 0, 0, 1], [0, 0, 0, 0], [1, 1, 1, 1], [0, 1, 1, 1]], np.int32) with self.session(): x_appended, x_appended_paddings = insertion.SequenceAppendToken( tf.convert_to_tensor(x), tf.convert_to_tensor(x_paddings), 10, True) x_appended, x_appended_paddings = self.evaluate( [x_appended, x_appended_paddings]) # `x_appended_gold` is the same as `x` w/ token `10` appended, we also # test for the condition of extend=True which requires +1 dim in the # time dimension. # `x_appended_paddings_gold` is the corresponding paddings. x_appended_gold = np.asarray([[1, 2, 3, 10, 0], [1, 2, 3, 4, 10], [10, 0, 0, 0, 0], [1, 10, 0, 0, 0]], np.int32) x_appended_paddings_gold = np.asarray( [[0, 0, 0, 0, 1], [0, 0, 0, 0, 0], [0, 1, 1, 1, 1], [0, 0, 1, 1, 1]], np.int32) self.assertAllEqual(x_appended, x_appended_gold) self.assertAllEqual(x_appended_paddings, x_appended_paddings_gold)
def BatchedOrientedNMSIndices(self, bboxes, scores, nms_iou_threshold, score_threshold, max_boxes_per_class): """Runs batched version of a Per-Class 3D (7-DOF) Non Max Suppression. All outputs have shape [batch_size, num_classes, max_boxes_per_class]. Args: bboxes: A [batch_size, num_boxes, 7] floating point Tensor of bounding boxes in [x, y, z, dx, dy, dz, phi] format. scores: A [batch_size, num_boxes, num_classes] floating point Tensor containing box scores. nms_iou_threshold: Either a float or a list of floats of len num_classes with the IoU threshold to use when determining whether two boxes overlap for purposes of suppression. score_threshold: Either a float or a list of floats of len num_classes with the score threshold that allows NMS to quickly ignore boxes. max_boxes_per_class: An integer scalar with the maximum number of boxes per example to emit per class. Returns: A tuple of 3 tensors: - bbox_indices: An int32 Tensor with the indices of the chosen boxes. Values are in sort order until the class_idx switches. - bbox_scores: A float32 Tensor with the score for each box. - valid_mask: A float32 Tensor with 1/0 values indicating the validity of each box. 1 indicates valid, and 0 invalid. """ bboxes = py_utils.HasShape(bboxes, [-1, -1, 7]) batch_size, num_boxes = py_utils.GetShape(bboxes, 2) scores = py_utils.HasShape(scores, [batch_size, num_boxes, -1]) _, _, num_classes = py_utils.GetShape(scores) # Force the thresholds to be tensors of len num_classes nms_iou_threshold = tf.broadcast_to( tf.convert_to_tensor(nms_iou_threshold), [num_classes]) score_threshold = tf.broadcast_to( tf.convert_to_tensor(score_threshold), [num_classes]) def NMSBody(args): per_sample_bboxes, per_sample_scores = args indices, scores, mask = ops.non_max_suppression_3d( per_sample_bboxes, per_sample_scores, nms_iou_threshold=nms_iou_threshold, score_threshold=score_threshold, max_boxes_per_class=max_boxes_per_class) return indices, scores, mask bbox_indices, bbox_scores, valid_mask = tf.map_fn( fn=NMSBody, elems=(bboxes, scores), dtype=(tf.int32, tf.float32, tf.float32), back_prop=False) output_shape = [batch_size, num_classes, max_boxes_per_class] bbox_indices = py_utils.PadOrTrimTo(bbox_indices, output_shape) bbox_scores = py_utils.PadOrTrimTo(bbox_scores, output_shape) valid_mask = py_utils.PadOrTrimTo(valid_mask, output_shape) return bbox_indices, bbox_scores, valid_mask
def testSequenceTrimLastToken(self): x = np.asarray( [[1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4]], np.int32) x_paddings = np.asarray( [[0, 0, 0, 0], [0, 0, 0, 1], [1, 1, 1, 1], [0, 1, 1, 1]], np.float32) with self.session() as sess: x_trimmed, x_trimmed_paddings = insertion.SequenceTrimLastToken( tf.convert_to_tensor(x), tf.convert_to_tensor(x_paddings)) x_trimmed, x_trimmed_paddings = sess.run( [x_trimmed, x_trimmed_paddings]) # `x_trimmed_gold` is the same as `x` w/ last token removed. # `x_trimmed_paddings_gold` is the corresponding paddings. x_trimmed_gold = np.asarray( [[1, 2, 3, 0], [1, 2, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], np.int32) x_trimmed_paddings_gold = np.asarray( [[0, 0, 0, 1], [0, 0, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1]], np.float32) self.assertAllEqual(x_trimmed, x_trimmed_gold) self.assertAllEqual(x_trimmed_paddings, x_trimmed_paddings_gold)
def _testHelper(self, pooling_type, inputs, input_paddings, expected_output, expected_output_padding, feed_dict=None): param = conv_layers.GlobalPoolingLayer.Params().Set( name='test_layer', pooling_type=pooling_type) pooling_layer = param.Instantiate() with self.session(use_gpu=True) as sess: inputs = tf.convert_to_tensor(inputs, dtype=tf.float32) input_paddings = None if input_paddings is None else tf.convert_to_tensor( input_paddings, dtype=tf.float32) output, output_paddings = pooling_layer.FPropDefaultTheta( inputs, input_paddings) self.evaluate(tf.global_variables_initializer()) if input_paddings is None: self.assertIsNone(output_paddings) output_val = sess.run(output, feed_dict=feed_dict) else: output_val, output_paddings_val = sess.run([output, output_paddings], feed_dict=feed_dict) self.assertAllClose(expected_output, output_val) if input_paddings is not None: self.assertAllEqual(expected_output_padding, output_paddings_val)
def testApplyGShard(self, use_relative_atten): with self.session() as sess: conformer_p = conformer_layer.ConformerLayer.CommonParams( input_dim=self.dim, atten_num_heads=self.heads, atten_local_context=self.context, use_relative_atten=use_relative_atten, kernel_size=2, fflayer_hidden_dim=4 * self.dim) conformer_p.name = 'conformer_layer' conformer_layer.ApplyGshard( conformer_p, device_mesh=[1, 2], proj_w_split_list=[[0, 1], [1, 0]], proj_activation_split_list=[[0, -1, 1], [0, -1, -1]], atten_dnh_w_split=[0, 1, -1], atten_blnh_activation_split=[0, -1, 1, -1], atten_bld_activation_split=[0, -1, -1], lconv_df_w_split=[0, 1], lconv_hwim_w_split=[-1, -1, 1, -1], lconv_fd_w_split=[-1, -1], lconv_blf_activation_split=[0, -1, 1], lconv_bld_activation_split=[0, -1, -1]) inputs, paddings = self._GetInputs() conformer_l = conformer_p.Instantiate() outputs = conformer_l.FProp( conformer_l.theta, py_utils.NestedMap(features=tf.convert_to_tensor(inputs), paddings=tf.convert_to_tensor(paddings))) tf.logging.info('outputs=%s', outputs) tf.global_variables_initializer().run() out_vals = sess.run(outputs) print([x.shape for x in out_vals.Flatten()])
def testMoEFFLayerFProp(self, use_fflayer_start_moe, use_fflayer_end_moe, expected_aux_loss): p = self._GetParams() if use_fflayer_start_moe: p.fflayer_start_tpl = gshard_builder.MoEBuilder.Params().Set( e_dim=2, c_dim=2, num_devices=2) if use_fflayer_end_moe: p.fflayer_end_tpl = gshard_builder.MoEBuilder.Params().Set( e_dim=2, c_dim=2, num_devices=2) l = p.Instantiate() inputs, paddings = self._GetInputs() inputs = tf.convert_to_tensor(inputs) paddings = tf.convert_to_tensor(paddings) in_nmap = py_utils.NestedMap(features=inputs, paddings=paddings) in_nmap.aux_loss = tf.convert_to_tensor(0., py_utils.FPropDtype(p)) out_nmap = l.FPropDefaultTheta(in_nmap) self.assertIn('aux_loss', out_nmap) loss = tf.reduce_sum(out_nmap.features) + 0.01 * out_nmap.aux_loss grads = tf.gradients( loss, l.vars.Flatten(), unconnected_gradients=tf.UnconnectedGradients.ZERO) with self.session() as sess: tf.global_variables_initializer().run() out_vals = sess.run(out_nmap.features) grad_vals = sess.run(grads) self.assertEqual(out_nmap.aux_loss.shape, ()) aux_loss = sess.run(out_nmap.aux_loss) self.assertAlmostEqual(expected_aux_loss, aux_loss, places=5) print([x.shape for x in out_vals]) print([g.shape for g in grad_vals])
def testCheckNumerics(self): checked = py_utils.CheckNumerics( tf.convert_to_tensor([2.0, 3.0], tf.float32)) self.assertListEqual([2.0, 3.0], checked.numpy().tolist()) with self.assertRaisesRegex(tf.errors.InvalidArgumentError, 'NaN'): py_utils.CheckNumerics( tf.reduce_mean(tf.convert_to_tensor([], tf.float32)))
def add_labels(self, feature, labels, points_xyz): """Add 3d bounding box labels into the output feature map. Args: feature: A tf.Example feature map. labels: A repeated car.open_dataset.Label proto. points_xyz: A numpy array of shape [-1, 3] with the pointcloud. This is used to calculate the number of points in each 3D bounding box. """ label_classes = [] label_ids = [] detection_difficulty_levels = [] tracking_difficulty_levels = [] bboxes = [] label_md = [] for label in labels: box = label.box bbox_3d = [ box.center_x, box.center_y, box.center_z, box.length, box.width, box.height, box.heading ] md = [ label.metadata.speed_x, label.metadata.speed_y, label.metadata.accel_x, label.metadata.accel_y ] label_md += md bboxes += bbox_3d label_classes += [label.type] label_ids += [tf.compat.as_bytes(label.id)] detection_difficulty_levels += [label.detection_difficulty_level] tracking_difficulty_levels += [label.tracking_difficulty_level] # Calculate the number of points in each ground truth box which are needed # to fill in difficulty levels for each ground truth and to filter boxes # with less points than a configurable minimum. points_xyz = tf.convert_to_tensor(points_xyz, dtype=tf.float32) bboxes_3d = tf.convert_to_tensor(np.array(bboxes).reshape(-1, 7), dtype=tf.float32) points_in_bboxes_mask = geometry.IsWithinBBox3D(points_xyz, bboxes_3d) bboxes_3d_num_points = tf.reduce_sum(tf.cast(points_in_bboxes_mask, tf.int32), axis=0, keepdims=False) bboxes_3d_num_points = bboxes_3d_num_points.numpy().reshape([-1]) bboxes = np.array(bboxes).reshape(-1) label_md = np.array(label_md).reshape(-1) feature['labels'].int64_list.value[:] = label_classes feature['label_ids'].bytes_list.value[:] = label_ids feature['detection_difficulties'].int64_list.value[:] = ( detection_difficulty_levels) feature['tracking_difficulties'].int64_list.value[:] = ( tracking_difficulty_levels) feature['bboxes_3d'].float_list.value[:] = list(bboxes) feature['label_metadata'].float_list.value[:] = list(label_md) feature['bboxes_3d_num_points'].int64_list.value[:] = list( bboxes_3d_num_points)
def testCreateCanvasAndTargets(self): with self.session() as sess: tf.set_random_seed(_TF_RANDOM_SEED) batch = py_utils.NestedMap( src=py_utils.NestedMap( ids=tf.convert_to_tensor( np.asarray([ [10, 11, 12, 14, 2, 0], [20, 21, 22, 24, 25, 2], ], np.int32)), paddings=tf.convert_to_tensor( np.asarray([[0, 0, 0, 0, 0, 1], [0, 0, 0, 0, 0, 0]], np.float32))), tgt=py_utils.NestedMap( ids=tf.convert_to_tensor( np.asarray([[100, 101, 102, 104, 2, 0], [200, 201, 202, 204, 205, 2]], np.int32)), paddings=tf.convert_to_tensor( np.asarray([[0, 0, 0, 0, 0, 1], [0, 0, 0, 0, 0, 0]], np.float32)))) p = self._testParams() mdl = p.Instantiate() descriptor = mdl._CreateCanvasAndTargets(batch) canvas, canvas_paddings, target_indices, target_weights = sess.run( [ descriptor.canvas, descriptor.canvas_paddings, descriptor.target_indices, descriptor.target_weights ]) canvas_gold = np.asarray( [[32010, 32012, 32002, 2, 0, 0, 0, 0, 0, 0, 0, 0], [ 32020, 32021, 32022, 32024, 32025, 32002, 200, 201, 202, 204, 205, 2 ]], np.int32) canvas_paddings_gold = np.asarray( [[0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], np.float32) target_indices_gold = np.asarray( [[0, 0, 2], [0, 1, 11], [0, 1, 2], [0, 2, 14], [0, 2, 2], [1, 0, 2], [1, 1, 2], [1, 2, 2], [1, 3, 2], [1, 4, 2], [1, 5, 2], [0, 3, 100], [0, 3, 101], [0, 3, 102], [0, 3, 104], [0, 3, 2], [1, 6, 2], [1, 7, 2], [1, 8, 2], [1, 9, 2], [1, 10, 2], [1, 11, 2]], np.int32) target_weights_gold = np.asarray( [1, 1, 0, 1, 0] + [1, 1, 1, 1, 1, 1] + [1, 1, 1, 1, 0] + [1, 1, 1, 1, 1, 1], np.float32) target_weights_gold = np.reshape(target_weights_gold, [target_weights_gold.shape[0], 1]) self.assertAllEqual(canvas, canvas_gold) self.assertAllEqual(canvas_paddings, canvas_paddings_gold) self.assertAllEqual(target_indices, target_indices_gold) self.assertAllEqual(target_weights, target_weights_gold)
def testGetValidCanvasAndTargetsUnderUniformOraclePolicyForcedSample(self): """Tests for canvas+targets under uniform (rollin+oracle) policy.""" with self.session(use_gpu=True) as sess: params = insertion.SymbolInsertionLayer.Params() params.name = 'insertion' params.rollin_policy = 'oracle' params.oracle_policy = 'uniform' params.random_seed = 12345 insertion_layer = insertion.SymbolInsertionLayer(params) x = np.asarray( [[10, 11, 12, 13, 14, 15, 16, 1], [10, 11, 12, 13, 14, 15, 16, 1], [10, 1, 0, 0, 0, 0, 0, 0], [10, 11, 12, 13, 14, 15, 1, 0]], np.int32) x_paddings = np.asarray( [[0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 1, 1, 1, 1, 1, 1], [0, 0, 0, 0, 0, 0, 0, 1]], np.float32) spec = insertion_layer.FProp(None, tf.convert_to_tensor(x), tf.convert_to_tensor(x_paddings)) (canvas, canvas_indices, canvas_paddings, target_indices, target_weights) = sess.run([ spec.canvas, spec.canvas_indices, spec.canvas_paddings, spec.target_indices, spec.target_weights ]) canvas_gold = np.asarray( [[10, 12, 13, 15, 16, 1], [13, 1, 0, 0, 0, 0], [10, 1, 0, 0, 0, 0], [10, 12, 14, 1, 0, 0]], np.int32) canvas_indices_gold = np.asarray( [[0, 2, 3, 5, 6, 7], [3, 7, 7, 7, 7, 7], [0, 1, 7, 7, 7, 7], [0, 2, 4, 6, 7, 7]], np.int32) canvas_paddings_gold = np.asarray( [[0., 0., 0., 0., 0., 0.], [0., 0., 1., 1., 1., 1.], [0., 0., 1., 1., 1., 1.], [0., 0., 0., 0., 1., 1.]], np.float32) target_indices_gold = np.asarray( [[0, 0, 1], [0, 1, 11], [0, 1, 1], [0, 2, 1], [0, 3, 14], [0, 3, 1], [0, 4, 1], [0, 5, 1], [1, 0, 10], [1, 0, 11], [1, 0, 12], [1, 0, 1], [1, 1, 14], [1, 1, 15], [1, 1, 16], [1, 1, 1], [2, 0, 1], [2, 1, 1], [3, 0, 1], [3, 1, 11], [3, 1, 1], [3, 2, 13], [3, 2, 1], [3, 3, 15], [3, 3, 1]], np.int32) target_weights_gold = np.asarray( [1, 1, 0, 1, 1, 0, 1, 1] + [1, 1, 1, 0, 1, 1, 1, 0] + [1, 1] + [1, 1, 0, 1, 0, 1, 0], np.float32) target_weights_gold = np.reshape(target_weights_gold, [target_weights_gold.shape[0], 1]) self.assertAllEqual(canvas, canvas_gold) self.assertAllEqual(canvas_indices, canvas_indices_gold) self.assertAllEqual(canvas_paddings, canvas_paddings_gold) self.assertAllEqual(target_indices, target_indices_gold) self.assertAllEqual(target_weights, target_weights_gold)
def Apply(self, metrics, vmap, gradient_mask=None, gradient_adjuster=None): """Computes updates on 'vmap' to optimize 'loss'. TODO(rpang): explore merging gradient_mask and gradient_adjuster. Args: metrics: A Dict[str, (value, weight)], from which loss can be extracted according to p.loss_name. vmap: A `.NestedMap` object containing variables to optimize. gradient_mask: if not None, a dict mapping variable names to a 0/1 scalar. gradient_adjuster: if not None, a function that mutates a given var_grads. Returns: (losses, op, eval_metrics), where - losses is a list of scalar tensors; - op is a tf.Operation to update variables; - eval_metrics is a Dict[str, (value, weight)], where each value/weight is a scalar tensor. """ # We apply gradients outside the name_scope to maintain backwards # compatibility on variables created by self.optimizer.Apply(). losses, var_grads, eval_metrics = self._ComputeLossesAndGradients( metrics, vmap) if 'tpu_embedding_var_grads' in var_grads: tpu_embedding_var_grads = var_grads.tpu_embedding_var_grads del var_grads.tpu_embedding_var_grads tpu_embedding_collection = py_utils.GetTpuEmbeddingGraphCollection( )[0] assert tpu_embedding_collection tpu_emb_update_op, stats = tpu_embedding_collection.ApplyGradients( py_utils.GetTaskCallScope(), tpu_embedding_var_grads.Transform( lambda var_grad: var_grad.grad)) eval_metrics.update(stats) else: tpu_emb_update_op = tf.no_op() assert py_utils.GetGlobalStep() is not None lr = self.LearningRate() var_grads, stats = self.AdjustGradients( var_grads, gradient_mask=gradient_mask, gradient_adjuster=gradient_adjuster) eval_metrics.update(stats) self._var_grads = var_grads eval_metrics['learning_rate'] = (tf.convert_to_tensor(lr), tf.convert_to_tensor(1.)) var_update_op = tf.group( [tpu_emb_update_op, self.optimizer.Apply(lr, var_grads)]) return losses, var_update_op, eval_metrics
def testStreamStep(self, testonly_skip_norm_layers=False, norm_type='ln'): with flagsaver.flagsaver( testonly_skip_norm_layers=testonly_skip_norm_layers ), cluster_factory.SetEval(True): assert norm_type in ('ln', 'gn') batch, max_seqlen, input_dim, kernel = 2, 8, 2, 3 p = conformer_layer.LConvLayer.CommonParams(input_dim=input_dim, is_causal=True, kernel_size=kernel) if norm_type == 'ln': p.conv_norm_layer_tpl = lingvo_layers.LayerNorm.Params() else: p.conv_norm_layer_tpl = bn_layers.GroupNormLayer.Params().Set( num_groups=2, cumulative=True) p.name = 'lconv' l = p.Instantiate() init_op = tf.global_variables_initializer() np.random.seed(None) inputs = np.random.normal( 0.1, 0.5, [batch, max_seqlen, input_dim]).astype(np.float32) print(f'np.sum(inputs): {np.sum(inputs)}') inputs = tf.convert_to_tensor(inputs) seqlen = np.random.randint(low=1, high=max_seqlen + 1, size=(batch, ), dtype=np.int32) print(repr(seqlen)) seqlen = tf.convert_to_tensor(seqlen) paddings = py_utils.PaddingsFromLengths(seqlen, max_seqlen) base_outputs, _ = l.FProp(l.theta, inputs, paddings) base_outputs *= tf.expand_dims(1. - paddings, -1) outputs = [] state = l.zero_state(batch) for i in range(max_seqlen): output, _, state = l.StreamStep(l.theta, inputs[:, i:(i + 1), :], paddings[:, i:(i + 1)], state) outputs.append(output) # [b, t, d] outputs = tf.concat(outputs, axis=1) outputs *= tf.expand_dims(1. - paddings, -1) with self.session(use_gpu=False) as sess: sess.run(init_op) expected, actual = sess.run([base_outputs, outputs]) print(repr(expected)) print(repr(actual)) print(f'np.sum(np.abs(expected)): {np.sum(np.abs(expected))}') print(f'np.sum(np.abs(actual)): {np.sum(np.abs(actual))}') self.assertAllClose(expected, actual)
def test_normalize_trailing_eos(self, need_trailing_eos, results, data): ids = tf.convert_to_tensor(data[0], dtype=tf.int32) id_lens = tf.convert_to_tensor(data[1], dtype=tf.int32) with self.session(use_gpu=False) as sess: new_ids, new_id_lens = eos_normalization.NormalizeTrailingEos( ids, id_lens, need_trailing_eos=need_trailing_eos, eos_id=2) new_ids_np, ids_np, new_id_lens_np, id_lens_np = sess.run( [new_ids, ids, new_id_lens, id_lens]) self.assertAllEqual(new_id_lens_np, results) self._assert_label_equivalence(ids_np, id_lens_np, new_ids_np, new_id_lens_np)
def testLayerNormalizedLSTMCellLeanExt(self): cell_p = self._GetParams() seqlen, batch, input_dim = 4, 2, 2 inputs = tf.convert_to_tensor( np.random.rand(seqlen, batch, input_dim).astype(np.float32)) input_lens = np.random.randint(1, seqlen + 1, size=batch) paddings = 1. - tf.sequence_mask( input_lens, maxlen=seqlen, dtype=tf.float32) paddings = tf.transpose(paddings) reset_mask = tf.zeros((seqlen, batch), tf.float32) m0 = tf.convert_to_tensor( np.random.rand(batch, input_dim).astype(np.float32)) c0 = tf.convert_to_tensor( np.random.rand(batch, input_dim).astype(np.float32)) state0 = py_utils.NestedMap(m=m0, c=c0) with self.session(): cell = cell_p.Instantiate() self.evaluate(tf.global_variables_initializer()) # The canonical path state = state0 for i in range(seqlen): state, _ = cell.FPropDefaultTheta( state, py_utils.NestedMap( act=[inputs[i, :, :]], padding=paddings[i, :, tf.newaxis], reset_mask=reset_mask[i, :, tf.newaxis])) expected_state = self.evaluate(state) # Taking input projection outside of the loop. cell_theta = cell.theta.copy() cell_theta.wm_i = cell_theta.wm[:cell.params.num_input_nodes, :] cell_theta.wm_h = cell_theta.wm[cell.params.num_input_nodes:, :] proj_inputs = cell.ProjectInputSequence(cell_theta, py_utils.NestedMap(act=[inputs])) state = state0 for i in range(seqlen): state, _ = cell.FPropWithProjectedInput( cell_theta, state, py_utils.NestedMap( proj_inputs=proj_inputs[i, :, :], padding=paddings[i, :, tf.newaxis], reset_mask=reset_mask[i, :, tf.newaxis])) actual_state = self.evaluate(state) tf.logging.info('expected_state:{}'.format(expected_state)) tf.logging.info('actual_state:{}'.format(actual_state)) self.assertAllClose(expected_state.m, actual_state.m) self.assertAllClose(expected_state.c, actual_state.c)
def _ProcessLine(self, line): """A single-text-line processor. Gets a string tensor representing a line of text that have been read from the input file, and splits it to graphemes (characters). We use original characters as the target labels, and the lowercased and punctuation-removed characters as the source labels. Args: line: a 1D string tensor. Returns: A list of tensors, in the expected order by __init__. """ # Tokenize the input into integer ids. # tgt_ids has the start-of-sentence token prepended, and tgt_labels has the # end-of-sentence token appended. tgt_ids, tgt_labels, tgt_paddings = self.StringsToIds( tf.convert_to_tensor([line])) def Normalize(line): # Lowercase and remove punctuation. line = line.lower().translate(None, string.punctuation.encode('utf-8')) # Convert multiple consecutive spaces to a single one. line = b' '.join(line.split()) return line normalized_line = tf.py_func(Normalize, [line], tf.string, stateful=False) _, src_labels, src_paddings = self.StringsToIds(tf.convert_to_tensor( [normalized_line]), is_source=True) # The model expects the source without a start-of-sentence token. src_ids = src_labels # Compute the length for bucketing. bucket_key = tf.cast( tf.round( tf.maximum(tf.reduce_sum(1.0 - src_paddings), tf.reduce_sum(1.0 - tgt_paddings))), tf.int32) tgt_weights = 1.0 - tgt_paddings # Return tensors in an order consistent with __init__. out_tensors = [ src_ids, src_paddings, tgt_ids, tgt_paddings, tgt_labels, tgt_weights ] return [tf.squeeze(t, axis=0) for t in out_tensors], bucket_key
def _TestcausalDepthwiseConv2DLayerStreamStepHelper( self, test_only_skip_norm_layers, stride): batch_size, max_seqlen, channel = 2, 32, 3 kernel, channel_multiplier = 5, 1 params = conv_layers.CausalDepthwiseConv2DLayer.Params().Set( name='conv', filter_stride=[1, 1], filter_shape=[kernel, 1, channel, channel_multiplier], params_init=py_utils.WeightInit.Gaussian(0.1)) conv_layer = params.Instantiate() init_op = tf.global_variables_initializer() np.random.seed(None) inputs = np.random.normal( 0.5, 1, [batch_size, max_seqlen, 1, channel]).astype(np.float32) print(f'np.sum(inputs): {np.sum(inputs)}') inputs = tf.convert_to_tensor(inputs) seqlen = np.random.randint( low=1, high=max_seqlen + 1, size=(batch_size,), dtype=np.int32) print(repr(seqlen)) seqlen = tf.convert_to_tensor(seqlen) input_padding = py_utils.PaddingsFromLengths(seqlen, max_seqlen) base_outputs, _ = conv_layer.FProp(conv_layer.theta, inputs, input_padding) base_outputs *= tf.reshape(1. - input_padding, [batch_size, max_seqlen, 1, 1]) outputs = [] state = conv_layer.zero_state(batch_size) assert max_seqlen % stride == 0 for i in range(0, max_seqlen // stride): output, _, state = conv_layer.StreamStep( conv_layer.theta, inputs[:, stride * i:stride * (i + 1), :, :], input_padding[:, stride * i:stride * (i + 1)], state) outputs.append(output) # [b, t, 1, c * channel_multiplier] outputs = tf.concat(outputs, axis=1) outputs *= tf.reshape(1. - input_padding, [batch_size, max_seqlen, 1, 1]) with self.session(use_gpu=True) as sess: sess.run(init_op) expected, actual = sess.run([base_outputs, outputs]) print(repr(expected)) print(repr(actual)) print(f'np.sum(np.abs(ref_val)): {np.sum(np.abs(expected))}') print(f'np.sum(np.abs(new_val)): {np.sum(np.abs(actual))}') self.assertAllClose(expected, actual)
def testMaskedLoss(self): p = self._DualEncoderParamsForTest() p.encoder_configs['x'].output_dim = 5 p.encoder_configs['y'].output_dim = 5 x2y_weight = 0.75 y2x_weight = 0.25 p.loss_weights = {('x', 'y'): x2y_weight, ('y', 'x'): y2x_weight} # Mock the label_fn so it gives the in-batch pairs the following labels. x = label_lib.IGNORE_PAIR_LABEL example_pair_labels = tf.constant( [ # pyformat: disable [1, 0, 0], [0, 1, x], [0, x, 1] # pyformat: disable ], dtype=tf.int32) p.label_fn = lambda _: example_pair_labels x_input = np.arange(15, dtype=np.float32).reshape((3, 5)) / 10.0 y_input = np.arange(5, 20, dtype=np.float32).reshape((3, 5)) / 10.0 model = p.Instantiate() input_batch = py_utils.NestedMap(x_input=tf.convert_to_tensor(x_input), x_ids=tf.constant([1, 2, 3], dtype=tf.int64), y_input=tf.convert_to_tensor(y_input), y_ids=tf.constant([4, 2, 2], dtype=tf.int64)) # Check that pairs labeled "ignore" are excluded from the loss. # TODO(austinwaters): Instead of computing and checking the real loss # values, this test should just check that DualEncoder forwards the correct # inputs and labels to the loss function and applies the correct weight # to the result. expected_x2y_losses = np.array([1.6802696, 0.1602242, 0.00247565]) expected_y2x_losses = np.array([3.6856253, 0.048587330, 0.00020346955]) expected_average_loss = np.mean(x2y_weight * expected_x2y_losses + y2x_weight * expected_y2x_losses) with py_utils.GlobalStepContext(2): preds = model.ComputePredictions(model.theta, input_batch) metrics, _ = model.ComputeLoss(model.theta, preds, input_batch) loss, _ = metrics['loss'] with self.session() as sess: loss = sess.run(loss) self.assertAllClose(expected_average_loss, loss)
def testExtractBlockContext(self, block_size, left_context, right_context): x_val = np.random.random([2, 6, 2, 3, 4]) with self.session() as sess: x = tf.convert_to_tensor(x_val, tf.float32) x_context = attention_util.ExtractBlockContext( x, block_size, left_context, right_context) x_context_val = sess.run(x_context) # Check shape. batch_size = x_val.shape[0] other_dims = x_val.shape[2:] num_blocks = int(np.ceil(x_val.shape[1] / float(block_size))) context_size = block_size + left_context - 1 + right_context expected_shape = (batch_size, num_blocks, context_size) + other_dims self.assertAllEqual(expected_shape, x_context_val.shape) # Check values block by block. for block_idx in range(num_blocks): context_start = block_idx * block_size - left_context + 1 context_end = (block_idx + 1) * block_size + right_context slice_start = max(0, context_start) slice_end = min(x_val.shape[1], context_end) expected_val = x_val[:, slice_start:slice_end, ...] actual_val = x_context_val[:, block_idx, ...] # remove paddings front_padding = slice_start - context_start back_padding = context_end - slice_end actual_val = actual_val[:, front_padding:context_size - back_padding, ...] self.assertAllClose(expected_val, actual_val)
def scalar(name, value, while_loop_reduce='mean'): """Adds summary scalar. Outside of tpu_summary.context() does nothing. Args: name: string name value: scalar tensor value while_loop_reduce: optional argument, determines what to do when this summary appears inside a tf.while_loop. Can be 'mean' or 'sum'. Raises: RuntimeError: if the function is called in Eager mode. """ if py_utils.IsEagerMode(): raise RuntimeError(EAGER_MODE_EXCEPTION_STR) assert while_loop_reduce in ('mean', 'sum') ctx = TpuSummaryContext.current() if ctx is None: return x = TpuSummaryScalar() x.name = str(name) x.value = tf.convert_to_tensor(value) if x.value.shape != (): # pylint: disable=g-explicit-bool-comparison raise ValueError('use tpu_summary.tensor() instead: %r' % value) x.name_scope = tf.get_default_graph().get_name_scope() x.while_loop_reduce = while_loop_reduce ctx.summary_tensors.append(x)
def testPoolingWithUnknowShapeInput(self): """Tests GlobalPooling layer with unknown shape tensor.""" def remove_shape(tensor): shape = tf.placeholder(tf.int32, name='removed_shape') return tf.reshape(tensor, shape) g = tf.Graph() with g.as_default(), tf.Session(graph=g) as _: tf.random.set_seed(24332) input_shape = [3, 5, 2, 4] inputs = np.random.random(input_shape) - 0.5 expected_avg_output = np.mean(inputs, axis=(1, 2), keepdims=True) input_tensor = tf.convert_to_tensor(inputs, dtype=tf.float32) # initial shape is [3, 5, 2, 4] self.assertEqual(py_utils.GetShape(input_tensor), input_shape) # remove shape using a tf Defun and verify dynamic tensor shape. input_tensor = remove_shape(input_tensor) self.assertIsInstance(py_utils.GetShape(input_tensor), tf.Tensor) self.assertIsNone(input_tensor.shape.rank) self._testHelper( 'AVG', input_tensor, None, expected_avg_output, None, feed_dict={'removed_shape:0': input_shape})
def _DoPadding(x, b, l, r, d=None, padding_val=0.0): """A helper function to do padding in the front and rear. padding is done along axis 1. Args: x: a [b, t, d] tensor if d is not None, else [b, t] tensor. b: batch size. l: the length to be padded on the left. r: the length to be padded on the right. d: last dimension size if x is 3d tensor. padding_val: which value is used to pad. Returns: padded tensor. """ to_concate = [] padding_val = tf.convert_to_tensor(padding_val, dtype=x.dtype) front_pad_shape = [b, l] if d is None else [b, l, d] rear_pad_shape = [b, r] if d is None else [b, r, d] to_concate = [ tf.ones(front_pad_shape, dtype=x.dtype) * padding_val, x, tf.ones(rear_pad_shape, dtype=x.dtype) * padding_val ] return tf.concat(to_concate, axis=1)
def Update(x, i, v, *, inplace_update=None): """Performs scatter update: x[i] = v. A drop-in replacement for inplace_ops.alias_inplace_update ( aka tf.InplaceUpdate). Args: x: the source tensor. i: the index tensor. If None, do x = v. If a scalar, do x[i, ...] = v. If a vector, do x[j, ...] = v[j, ...] for j in i. v: the update value tensor. inplace_update: whether to perform inplace updates. If None, follows the current context set by SetInplaceUpdate. Returns: The updated tensor. """ if inplace_update is None: inplace_update = UseInplaceUpdate() if inplace_update: return tf.InplaceUpdate(x, i, v) if i is None: return py_utils.HasShape(v, tf.shape(x)) i = tf.convert_to_tensor(i) assert i.shape, i assert i.shape.rank in (0, 1), i if i.shape.rank == 0: y = tf.concat([x[:i, ...], v[None, ...], x[i + 1:, ...]], axis=0) y.set_shape(x.shape) return y return tf.tensor_scatter_nd_update(x, i[:, None], v)