def _CompareStackedElman(self, seqlen, batch, dims, layers): """Tests that StackedRecurrent computest the same output as Recurrent().""" trailing_pad_len = 2 g = tf.Graph() with g.as_default(): ref, output, _, _, _ = self._BuildStackedRecurrentElman( seqlen, trailing_pad_len, batch, dims, layers) ref = ref[:-trailing_pad_len] output = output[:-trailing_pad_len] with self.session(graph=g): ref_val, out_val = self.evaluate([ref, output]) self._LogDiff(ref_val, out_val) self.assertAllClose(ref_val, out_val)
def testEmptySequentialLayerFPropMeta(self): g = tf.Graph() with g.as_default(): p = layers.SequentialLayer.Params().Set(name='seq') l = p.Instantiate() x = py_utils.NestedMap(val=tf.random.normal(shape=[2, 32])) y = l.FPropDefaultTheta(x) self.assertIsInstance(y.val, tf.Tensor) y_shape = l.FPropMeta( p, py_utils.Transform(lambda t: tshape.Shape(t.shape), x)).out_shapes[0] self.assertEqual(y.val.shape.as_list(), y_shape.val.ToTensorShape().as_list())
def _TestFProp(self, p, in_shape, expected_out_shape): g = tf.Graph() with g.as_default(): l = p.Instantiate() x = tf.random.normal(shape=in_shape) y = l.FPropDefaultTheta(x) if isinstance(y, (list, tuple)): self.assertEqual(len(y), 1) y = y[0] with self.session(graph=g) as sess: sess.run(tf.global_variables_initializer()) val = sess.run(y) self.assertEqual(val.shape, expected_out_shape)
def testDecoderWithOrientedPerClassNMS(self): batch_size = 4 num_preds = 8 num_classes = 10 # An example of setting the score threshold high and IOU threshold low # for classes we don't care about score_threshold = [1.0] * num_classes score_threshold[1] = 0.05 nms_iou_threshold = [0.0] * num_classes nms_iou_threshold[1] = 0.5 with tf.Graph().as_default(): tf.random.set_seed(12345) predicted_bboxes = tf.random.normal([batch_size, num_preds, 7]) classification_scores = tf.random.uniform( [batch_size, num_preds, num_classes], minval=0, maxval=1) bboxes, bbox_scores, valid_mask = detection_decoder.DecodeWithNMS( predicted_bboxes, classification_scores, nms_iou_threshold=nms_iou_threshold, score_threshold=score_threshold, use_oriented_per_class_nms=True) with self.session() as sess: outputs = sess.run([ predicted_bboxes, classification_scores, bboxes, bbox_scores, valid_mask ]) (input_bboxes, input_scores, output_bboxes, output_scores, mask) = outputs self.assertEqual((batch_size, num_preds, 7), input_bboxes.shape) self.assertEqual((batch_size, num_classes, num_preds, 7), output_bboxes.shape) self.assertEqual((batch_size, num_preds, num_classes), input_scores.shape) self.assertEqual((batch_size, num_classes, num_preds), output_scores.shape) self.assertEqual((batch_size, num_classes, num_preds), mask.shape) # Assert that NMS did some kind of filtering for each class for cls_idx in range(num_classes): self.assertEqual( mask[:, cls_idx, :].sum(), (input_scores[:, :, cls_idx] > score_threshold[cls_idx]).sum()) self.assertEqual( mask[:, cls_idx, :].sum(), (output_scores[:, cls_idx, :] > score_threshold[cls_idx]).sum())
def testUniTransformerParallelFPropEntmax(self): length_dim = 4 graph = tf.Graph() params = gshard_builder.UniTransformer.Params().Set( gated_gelu=False, gated_ffn_activation=tf.nn.relu, positional_embedding=False, dtype=tf.float32, name='transformer', parallel_ffn=True, hidden_dim_reshape_segments=2, conv_kernel_size=2, builder=gshard_builder.RecurrentDenseBuilderParallelDecode.Params( ).Set( device_mesh_shape=[1, 1], device_mesh=None, relative_attention_num_buckets=32, relative_attention_type='bias', relative_attention_max_distance=128, dtype=tf.float32, num_devices=1, # we call .Split num_devices on axis 0 (batch) relative_attention_use_universal_1d_position=True, model_dim=32, model_dim_reshape_segments=2, attention_num_memory_heads=1, proj_weight_hdim=2, attention_num_heads=8, ff_dim=128, attention_key_value_dim=8, attention_combine_dims=True), batch_size=32, sequence_length=length_dim, num_transformer_layers=2, aux_loss_coef=0.0, loss_denominator=None, label_smoothing=0, vocab_size=128, max_length=length_dim, use_entmax=True) with graph.as_default(): py_utils.GetOrCreateGlobalStepVar() params.params_init = py_utils.WeightInit.Xavier(scale=1.0, seed=0) tf.random.set_seed(24332) model = params.Instantiate() with tf.Session(graph=graph) as sess: input_batch = self._PreLoadInput() loss = model.FPropDefaultTheta(input_batch)[0]['loss'][0] sess.run(tf.global_variables_initializer()) loss_eval = sess.run(loss) test_utils.CompareToGoldenSingleFloat(self, 5.146667, loss_eval)
def _ComputeFinalMetrics(self, classids=None, difficulty=None, distance=None, num_points=None, rotation=None): """Compute precision-recall curves as well as average precision. Args: classids: A list of N int32. difficulty: Not used. distance: int32 specifying a binned Euclidean distance of the ground truth bounding box. If None is specified, all distances are selected. num_points: int32 specifying a binned number of laser points within the ground truth bounding box. If None is specified, all boxes are selected. rotation: int32 specifying a binned rotation within the ground truth bounding box. If None is specified, all boxes are selected. Returns: dict. Each entry in the dict is a list of C (number of classes) dicts containing mapping from metric names to individual results. Individual entries may be the following items. - scalars: A list of C (number of classes) dicts mapping metric names to scalar values. - curves: A list of C dicts mapping metrics names to np.float32 arrays of shape [NumberOfPrecisionRecallPoints()+1, 2]. In the last dimension, 0 indexes precision and 1 indexes recall. """ del difficulty tf.logging.info('Computing final Waymo metrics.') assert classids is not None, 'classids must be supplied.' feed_dict = {} g = tf.Graph() scalar_fetches = [] curve_fetches = [] with g.as_default(): for classid in classids: data = self._GetData( classid, distance=distance, num_points=num_points, rotation=rotation) metrics = self._BuildMetric(data, classid) scalar_fetches += [metrics.scalar_metrics] curve_fetches += [metrics.curve_metrics] feed_dict.update(metrics.feed_dict) with tf.Session(graph=g) as sess: results = sess.run([scalar_fetches, curve_fetches], feed_dict=feed_dict) tf.logging.info('Finished computing final Waymo metrics.') return {'scalars': results[0], 'curves': results[1]}
def _ComputeFinalMetrics(self, classids=None, difficulty=None, distance=None, num_points=None, rotation=None): """Compute precision-recall curves as well as average precision. Args: classids: A list of N int32. difficulty: String in [easy, moderate, hard]. If None specified, all difficulty levels are permitted. distance: int32 specifying a binned Euclidean distance of the ground truth bounding box. If None is specified, all distances are selected. num_points: int32 specifying a binned number of laser points within the ground truth bounding box. If None is specified, all boxes are selected. rotation: int32 specifying a binned rotation within the ground truth bounding box. If None is specified, all boxes are selected. Returns: (dict, dict): - scalar_metrics: A list of C (number of clases) dicts mapping metric names to scalar values. - curve_metrics: A list of C dicts mapping metrics names to np.float32 arrays of shape [NumberOfPrecisionRecallPoints()+1, 2]. In the last dimension, 0 indexes precision and 1 indexes recall. """ assert classids is not None, 'classids must be supplied.' feed_dict = {} g = tf.Graph() scalar_fetches = [] curve_fetches = [] with g.as_default(): for classid in classids: data = self._GetData(classid, difficulty=difficulty, distance=distance, num_points=num_points, rotation=rotation) scalars, curves, class_feed_dict = self._BuildMetric( data, classid) scalar_fetches += [scalars] curve_fetches += [curves] feed_dict.update(class_feed_dict) with tf.Session(graph=g) as sess: results = sess.run([scalar_fetches, curve_fetches], feed_dict=feed_dict) return results[0], results[1]
def testSoftCondLayer(self): num_experts = 100 with self.session(use_gpu=False, graph=tf.Graph()) as sess: tf.random.set_seed(24332) p = layers.SoftCondLayer.Params().Set( name='soft_cond', cond_dim=2, num_experts=num_experts, body=lingvo_layers.FCLayer.Params().Set(input_dim=2, output_dim=2)) l = p.Instantiate() x = tf.random.normal(shape=[1, 2, 2]) y = l.FPropDefaultTheta(x) tf.global_variables_initializer().run() x_val, y_val, vars_val = sess.run([x, y, l.vars]) p_nz = layers.SoftCondLayer.Params().Set( name='soft_cond_nonzeros', cond_dim=2, num_experts=num_experts, nonzeros_mean=True, body=lingvo_layers.FCLayer.Params().Set(input_dim=2, output_dim=2)) l_nz = p_nz.Instantiate() x_nz = tf.random.normal(shape=[1, 2, 2]) y_nz = l_nz.FPropDefaultTheta(x_nz) tf.global_variables_initializer().run() x_nz_val, y_nz_val, vars_nz_val = sess.run([x_nz, y_nz, l_nz.vars]) np_val = x_val[0] np_nz_val = x_nz_val[0] taks_weight = np.exp(-1.0 * np.dot(np.mean(np_val, 0), vars_val.w)) taks_weight = 1.0 / (1.0 + taks_weight) nzs = np.count_nonzero(np_nz_val, 0).astype('float32') + 1e-10 taks_weight_nz = np.exp( -1.0 * np.dot(np.sum(np_nz_val, 0) / nzs, vars_nz_val.w)) taks_weight_nz = 1.0 / (1.0 + taks_weight_nz) weighted_weight = np.einsum('i,ijk->jk', taks_weight, vars_val.body.w) weighted_weight_nz = np.einsum('i,ijk->jk', taks_weight_nz, vars_nz_val.body.w) weighted_bias = np.einsum('i,ij->j', taks_weight, vars_val.body.b) weighted_bias_nz = np.einsum('i,ij->j', taks_weight_nz, vars_nz_val.body.b) np_val_out = np.maximum( 0, np.dot(np_val, weighted_weight) + weighted_bias) np_val_out_nz = np.maximum( 0, np.dot(np_nz_val, weighted_weight_nz) + weighted_bias_nz) self.assertAllClose(np_val_out, y_val[0]) self.assertAllClose(np_val_out_nz, y_nz_val[0])
def Run(num_splits): p = self._testParams() with self.session(use_gpu=False, graph=tf.Graph()) as sess: tf.set_random_seed(93820981) p.input.cur_iter_in_seed = False p.input.bucket_batch_limit = [ b * 2 / num_splits for b in p.input.bucket_batch_limit ] with cluster_factory.ForTestingWorker(gpus=num_splits, do_eval=True): mdl = p.Instantiate() metrics = mdl.FPropDefaultTheta()[0] tf.global_variables_initializer().run() return sess.run(metrics['loss'])
def testSpectrumAugmenterWithPerDomainPolicyFreqMask(self): with self.session(use_gpu=False, graph=tf.Graph()): tf.random.set_seed(1234) inputs = tf.ones([6, 5, 4, 2], dtype=tf.float32) input_domain_ids = tf.constant( [[1] * 5, [2] * 5, [0] * 5, [2] * 5, [0] * 5, [1] * 5], dtype=tf.float32) paddings = tf.zeros([3, 5]) p = spectrum_augmenter.SpectrumAugmenter.Params() p.name = 'specAug_layers' p.domain_ids = [0, 1, 2] p.freq_mask_max_bins = [0, 3, 8] p.time_mask_max_frames = 0 p.random_seed = 1234 specaug_layer = p.Instantiate() expected_output = np.array([[[[0., 0.], [0., 0.], [1., 1.], [1., 1.]], [[0., 0.], [0., 0.], [1., 1.], [1., 1.]], [[0., 0.], [0., 0.], [1., 1.], [1., 1.]], [[0., 0.], [0., 0.], [1., 1.], [1., 1.]], [[0., 0.], [0., 0.], [1., 1.], [1., 1.]]], [[[1., 1.], [0., 0.], [0., 0.], [0., 0.]], [[1., 1.], [0., 0.], [0., 0.], [0., 0.]], [[1., 1.], [0., 0.], [0., 0.], [0., 0.]], [[1., 1.], [0., 0.], [0., 0.], [0., 0.]], [[1., 1.], [0., 0.], [0., 0.], [0., 0.]]], [[[1., 1.], [1., 1.], [1., 1.], [1., 1.]], [[1., 1.], [1., 1.], [1., 1.], [1., 1.]], [[1., 1.], [1., 1.], [1., 1.], [1., 1.]], [[1., 1.], [1., 1.], [1., 1.], [1., 1.]], [[1., 1.], [1., 1.], [1., 1.], [1., 1.]]], [[[0., 0.], [0., 0.], [0., 0.], [0., 0.]], [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], [[0., 0.], [0., 0.], [0., 0.], [0., 0.]]], [[[1., 1.], [1., 1.], [1., 1.], [1., 1.]], [[1., 1.], [1., 1.], [1., 1.], [1., 1.]], [[1., 1.], [1., 1.], [1., 1.], [1., 1.]], [[1., 1.], [1., 1.], [1., 1.], [1., 1.]], [[1., 1.], [1., 1.], [1., 1.], [1., 1.]]], [[[1., 1.], [0., 0.], [0., 0.], [1., 1.]], [[1., 1.], [0., 0.], [0., 0.], [1., 1.]], [[1., 1.], [0., 0.], [0., 0.], [1., 1.]], [[1., 1.], [0., 0.], [0., 0.], [1., 1.]], [[1., 1.], [0., 0.], [0., 0.], [1., 1.]]]]) h, _ = specaug_layer.FPropDefaultTheta( inputs, paddings, domain_ids=input_domain_ids) actual_layer_output = self.evaluate(h) print(np.array_repr(actual_layer_output)) self.assertAllClose(actual_layer_output, expected_output)
def _testUniTransformerFProp(self, use_moe=False): length_dim = 4 graph = tf.Graph() params = gshard_builder.UniTransformer.Params().Set( gated_gelu=False, moe=use_moe, moe_gated_gelu=use_moe, positional_embedding=False, dtype=tf.float32, name='transformer', builder=gshard_builder.DenseBuilder.Params().Set( device_mesh_shape=[1, 1], device_mesh=None, relative_attention_num_buckets=32, relative_attention_type='bias', relative_attention_max_distance=128, dtype=tf.float32, num_devices=1, # we call .Split num_devices on axis 0 (batch) relative_attention_use_universal_1d_position=True, e_dim=2 if use_moe else None, num_groups=1 if use_moe else None, c_dim=2 if use_moe else None, model_dim=32, attention_num_heads=8, moe_hidden_dim=128, ff_dim=128, attention_key_value_dim=8, attention_combine_dims=True), batch_size=32, sequence_length=length_dim, num_transformer_layers=2, aux_loss_coef=0.0, loss_denominator=None, label_smoothing=0, vocab_size=128, max_length=length_dim) with graph.as_default(): py_utils.GetOrCreateGlobalStepVar() params.params_init = py_utils.WeightInit.Xavier(scale=1.0, seed=0) tf.random.set_seed(24332) model = params.Instantiate() with tf.Session(graph=graph) as sess: input_batch = self._PreLoadInput() loss = model.FPropDefaultTheta(input_batch)[0]['loss'][0] sess.run(tf.global_variables_initializer()) loss_eval = sess.run(loss) golden_float = 5.761248 if use_moe else 5.635831 test_utils.CompareToGoldenSingleFloat(self, golden_float, loss_eval)
def testParallelMatmulLayer(self): g = tf.Graph() with g.as_default(): tf.set_random_seed(24332) def MergeFn(xs): result = [] for x in zip(*xs): val = x[0] for v in x[1:]: val = tf.matmul(val, v) result.append(val) return tuple(result) p = layers.ParallelLayer.Params().Set( name='parallel', merge=MergeFn, sub=[ lingvo_layers.FCLayer.Params().Set(name='foo', input_dim=32, output_dim=4), lingvo_layers.FCLayer.Params().Set(name='bar', input_dim=32, output_dim=4), lingvo_layers.FCLayer.Params().Set(name='baz', input_dim=32, output_dim=4) ]) l = p.Instantiate() x = tf.random_normal(shape=[2, 4, 32]) y = l.FPropDefaultTheta(x) with self.session(graph=g) as sess: sess.run(tf.global_variables_initializer()) x_val, y_val, w = sess.run([x, y, l.vars]) out = [] act = x_val # relu(act \dot w + b) out += [np.maximum(0, np.matmul(act, w.foo.w) + w.foo.b)] self.assertEqual(out[-1].shape, (2, 4, 4)) out += [np.maximum(0, np.matmul(act, w.bar.w) + w.bar.b)] self.assertEqual(out[-1].shape, (2, 4, 4)) out += [np.maximum(0, np.matmul(act, w.baz.w) + w.baz.b)] self.assertEqual(out[-1].shape, (2, 4, 4)) np_result = out[0] for v in out[1:]: np_result = np.matmul(np_result, v) self.assertAllClose(np_result, y_val, atol=1e-5, rtol=1e-5)
def testEmptySequentialLayer(self): g = tf.Graph() with g.as_default(): tf.set_random_seed(24332) p = layers.SequentialLayer.Params().Set(name='seq') l = p.Instantiate() x = tf.random_normal(shape=[2, 32]) y = l.FPropDefaultTheta(x) self.assertIsInstance(y, tf.Tensor) with self.session(graph=g) as sess: sess.run(tf.global_variables_initializer()) x_val, y_val = sess.run([x, y]) self.assertAllEqual(x_val, y_val)
def FakeMnistData(tmpdir, train_size=60000, test_size=10000): """Fake Mnist data for unit tests.""" data_path = os.path.join(tmpdir, 'ckpt') with tf.Graph().as_default(): with tf.Session() as sess: x_train = tf.ones((train_size, 28, 28, 1), dtype=tf.uint8) y_train = tf.ones((train_size), dtype=tf.uint8) x_test = tf.ones((test_size, 28, 28, 1), dtype=tf.uint8) y_test = tf.ones((test_size), dtype=tf.uint8) sess.run( io_ops.save_v2(data_path, ['x_train', 'y_train', 'x_test', 'y_test'], [''] * 4, [x_train, y_train, x_test, y_test])) return data_path
def testBasicGrad(self): time, batch, dims, hidden_dim, vocab = 5, 3, 6, 4, 8 p = lm_layers.TransformerLm.Params() p.dtype = tf.float64 p.name = 'transformerlm' p.vocab_size = vocab p.emb.vocab_size = vocab p.emb.embedding_dim = dims p.model_dim = dims p.num_trans_layers = 1 p.trans_tpl.source_dim = dims p.trans_tpl.tr_atten_tpl.num_attention_heads = 2 p.trans_tpl.tr_fflayer_tpl.hidden_dim = hidden_dim p.softmax.input_dim = dims p.softmax.num_classes = vocab with self.session(use_gpu=False, graph=tf.Graph()) as sess: lm = p.Instantiate() np.random.seed(12345) inputs = np.random.randint(vocab, size=[time, batch]) targets = np.zeros([time, batch]) targets[:-1] = inputs[1:] inputs = tf.constant(inputs, tf.int32) paddings = np.zeros([time, batch]) paddings[-1] = 1.0 paddings = tf.constant(paddings, tf.float64) targets = tf.constant(targets, tf.int32) xent_output, _ = lm.FPropDefaultTheta( inputs=inputs, paddings=paddings, labels=py_utils.NestedMap(class_weights=1 - paddings, class_ids=targets)) lm_vars = lm.vars.Flatten() grads = tf.gradients(xent_output.avg_xent, lm_vars) for i, x in enumerate(grads): if isinstance(x, tf.IndexedSlices): grads[i] = tf.math.unsorted_segment_sum( x.values, x.indices, x.dense_shape[0]) tf.global_variables_initializer().run() self.assertEqual(len(lm_vars), len(grads)) for x, grad_x in zip(lm_vars, grads): grad_symbolic = sess.run(grad_x) grad_numeric = test_utils.ComputeNumericGradient( sess, xent_output.avg_xent, x, delta=1e-6) self.assertAllClose(grad_symbolic, grad_numeric, atol=0.005)
def testMix(self): # Generate couple files. def generate_test_data(tag, cnt): tmp = os.path.join(tf.test.get_temp_dir(), tag) with tf.python_io.TFRecordWriter(tmp) as w: for i in range(cnt): w.write(('%s:%08d' % (tag, i)).encode('utf-8')) return tmp path1 = generate_test_data('input1', 100) path2 = generate_test_data('input2', 200) path3 = generate_test_data('input3', 10) g = tf.Graph() with g.as_default(): # A record processor written in TF graph. def _process(record): return [record, record], 1 # Samples random records from the data files and processes them # to generate batches. (strs, vals), buckets = generic_input.GenericInput( file_pattern=','.join( ['tfrecord:' + path1, 'tfrecord:' + path2, 'tfrecord:' + path3]), input_source_weights=[0.2, 0.3, 0.5], file_random_seed=0, file_buffer_size=32, file_parallelism=4, bucket_batch_limit=[8], bucket_upper_bound=[1], processor=_process) with self.session(graph=g) as sess: tags_count = collections.defaultdict(int) total_count = 10000 for _ in range(total_count): ans_strs, ans_vals, ans_buckets = sess.run([strs, vals, buckets]) for s in ans_strs: tags_count[s.split(b':')[0]] += 1 self.assertEqual(ans_strs.shape, (8,)) self.assertEqual(ans_vals.shape, (8,)) self.assertAllEqual(ans_buckets, [1] * 8) self.assertEqual(sum(tags_count.values()), total_count * 8) mix_ratios = {} for k, v in six.iteritems(tags_count): mix_ratios[k] = float(v) / total_count / 8 self.assertAlmostEqual(mix_ratios[b'input1'], 0.2, delta=0.01) self.assertAlmostEqual(mix_ratios[b'input2'], 0.3, delta=0.01) self.assertAlmostEqual(mix_ratios[b'input3'], 0.5, delta=0.01)
def testBiasLayer(self): g = tf.Graph() with g.as_default(): tf.set_random_seed(24332) p = layers.BiasLayer.Params().Set(name='test', dims=10) l = p.Instantiate() x = tf.random_normal(shape=[2, 10]) y = l.FPropDefaultTheta(x) with self.session(graph=g) as sess: sess.run(tf.global_variables_initializer()) x_val, y_val, w_val = sess.run([x, y, l.vars]) self.assertEqual(w_val.b.shape, (10, )) self.assertAllClose(x_val + w_val.b, y_val)
def _WaitTillInit(job=None): """Wait until the model is ready.""" try: if py_utils.IsEagerMode(): topology = tf.tpu.experimental.initialize_tpu_system( resolver) else: # tpu.initialize_system() is called with None as embedding_config, as # embedding_config is not available yet. Later in _Loop, it is called # with the correct embedding_config. Since it cannot be called twice # in the same graph with different embedding_config, we use a # dummy_graph here. dummy_graph = tf.Graph() with dummy_graph.as_default(): tpu_initialize_system_op = tf.tpu.initialize_system( embedding_config=None, job=job) with self._GetSession(graph=dummy_graph) as sess: topology = sess.run(tpu_initialize_system_op) if train_cfg.train.tpu_computation_shape is None: computation_shape = py_utils.ComputationShape( num_devices_per_split, topology) else: computation_shape = train_cfg.train.tpu_computation_shape assert num_devices_per_split == np.prod(computation_shape) if train_cfg.train.tpu_device_order_mode is None: self.device_assignment = device_assignment_lib.device_assignment( topology, computation_shape=computation_shape, num_replicas=data_parallelism) else: self.device_assignment = device_assignment_lib.device_assignment( topology, computation_shape=computation_shape, num_replicas=data_parallelism, device_order_mode=train_cfg.train.tpu_device_order_mode ) py_utils.SetTpuDeviceAssignment(self.device_assignment, job) tf.logging.info('device_assignment.core_assignment: %s', str(self.device_assignment.core_assignment)) tf.logging.info( 'device_assignment.topology.device_coordinates: %s', str(self.device_assignment.topology.device_coordinates)) except py_utils.transient_tf_errors as e: tf.logging.info('TPU initialization failed: %s', e) raise
def _testOneModelParams(self, registry, name): with tf.Graph().as_default(): model_params = registry.GetClass(name)() try: all_datasets = model_params.GetAllDatasetParams() except datasets.GetAllDatasetParamsNotImplementedError: all_datasets = {} for dataset_name in datasets.GetDatasets(model_params): try: all_datasets[dataset_name] = getattr( model_params, dataset_name)() except NotImplementedError: pass p = model_params.Model() p.input = all_datasets['Train'] self.assertTrue(issubclass(p.cls, base_model.BaseModel)) self.assertIsNot(p.model, None) p.cluster.mode = 'sync' p.cluster.job = 'decoder' p.cluster.decoder.replicas = 1 with p.cluster.Instantiate(): # Instantiate the params class, to help catch errors in layer # constructors due to misconfigurations. mdl = p.Instantiate() self._ValidateEMA(name, mdl) p = mdl.params for dataset, input_p in all_datasets.items(): if issubclass(p.cls, base_model.SingleTaskModel): if (not isinstance(input_p, hyperparams.InstantiableParams) or not issubclass( input_p.cls, base_input_generator.BaseInputGenerator)): # Assume this function is not a dataset function but some helper. continue if (dataset != 'Train' and issubclass( input_p.cls, base_input_generator.BaseSequenceInputGenerator) and input_p.num_samples != 0): self.assertEqual( input_p.num_batcher_threads, 1, f'num_batcher_threads too large in {dataset}. Decoder or eval ' f'runs over this set might not span exactly one epoch.' ) else: self.assertTrue( issubclass(p.cls, base_model.MultiTaskModel))
def _load_graph_from_inference_graph(self, inference_graph): """Returns a tf.Graph() constructed from `inference_graph`. Args: inference_graph: An InferenceGraph proto from which a graph_def is loaded from. Returns: A loaded tf.Graph(). """ graph = tf.Graph() with graph.as_default(): with tf.device("/%s:0" % "cpu" if self._device_type == "tpu" else self._device_type): tf.import_graph_def(inference_graph.graph_def, name="") return graph
def testSaveOnly(self): train_dir = os.path.join(self.get_temp_dir(), 'testSaveOnly') os.mkdir(train_dir) p = base_model.SingleTaskModel.Params(LinearModel.Params()) p.input = base_input_generator.BaseInputGenerator.Params() with self.session(graph=tf.Graph()) as sess: model = p.Instantiate() self.evaluate(tf.global_variables_initializer()) saver = checkpointer.Checkpointer(train_dir, model, save_only=True) saver.Save(sess, model.global_step) with self.assertRaises(AssertionError): saver.RestoreIfNeeded(sess) self.assertTrue( os.path.isfile(os.path.join(train_dir, 'ckpt-00000000.index')))
def testMapLayer(self): g = tf.Graph() with g.as_default(): tf.random.set_seed(24332) p = layers.MapLayer.Params().Set( name='map', fn=tf.reduce_max, kwargs={'axis': 1}) l = p.Instantiate() x0, x1 = [tf.random.normal(shape=[2, 3, 5])] * 2 y0, y1 = l.FPropDefaultTheta(x0, x1) with self.session(graph=g): self.evaluate(tf.global_variables_initializer()) vx0, vx1, vy0, vy1 = self.evaluate([x0, x1, y0, y1]) self.assertAllClose(np.max(vx0, 1), vy0) self.assertAllClose(np.max(vx1, 1), vy1)
def testParallelLayer(self): g = tf.Graph() with g.as_default(): tf.set_random_seed(24332) p = layers.ParallelLayer.Params().Set( name='test', merge=lambda xs: tuple([tf.add_n(x) for x in zip(*xs)]), sub=[ lingvo_layers.FCLayer.Params().Set(name='foo', input_dim=32, output_dim=4), lingvo_layers.FCLayer.Params().Set(name='bar', input_dim=32, output_dim=4), layers.SequentialLayer.Params().Set( name='seq', sub=[ lingvo_layers.FCLayer.Params().Set(name='baz', input_dim=32, output_dim=4), lingvo_layers.DropoutLayer.Params().Set( name='dropout', keep_prob=0.5) ]) ]) p.is_eval = True l = p.Instantiate() x = tf.random_normal(shape=[2, 32]) y = l.FPropDefaultTheta(x) with self.session(graph=g) as sess: sess.run(tf.global_variables_initializer()) x_val, y_val, w = sess.run([x, y, l.vars]) out = [] act = x_val # relu(act \dot w + b) out += [np.maximum(0, np.matmul(act, w.foo.w) + w.foo.b)] self.assertEqual(out[-1].shape, (2, 4)) out += [np.maximum(0, np.matmul(act, w.bar.w) + w.bar.b)] self.assertEqual(out[-1].shape, (2, 4)) out += [np.maximum(0, np.matmul(act, w.seq.baz.w) + w.seq.baz.b)] self.assertEqual(out[-1].shape, (2, 4)) np_result = out[0] for v in out[1:]: np_result = np.add(np_result, v) self.assertAllClose(np_result, y_val)
def _testOneModelParams(self, registry, name): with tf.Graph().as_default(): p = registry.GetParams(name, 'Train') self.assertTrue(issubclass(p.cls, base_model.BaseModel)) self.assertIsNot(p.model, None) p.cluster.mode = 'sync' p.cluster.job = 'decoder' p.cluster.decoder.replicas = 1 with p.cluster.Instantiate(): # Instantiate the params class, to help catch errors in layer # constructors due to misconfigurations. mdl = p.Instantiate() self._ValidateEMA(name, mdl) p = mdl.params for dataset in ('Train', 'Dev', 'Test'): try: input_p = registry.GetParams(name, dataset).input except base_model_params.DatasetError: # Dataset not defined. if dataset == 'Dev': # Dev can be optional. pass else: raise if issubclass(p.cls, base_model.SingleTaskModel): self.assertTrue( issubclass(input_p.cls, base_input_generator.BaseInputGenerator), 'Error in %s' % dataset) if (dataset != 'Train') and issubclass( input_p.cls, base_input_generator.BaseSequenceInputGenerator ) and (input_p.num_samples != 0): self.assertEqual( input_p.num_batcher_threads, 1, 'num_batcher_threads too large in %s. Decoder ' 'or eval runs over this set might not span ' 'exactly one epoch.' % dataset) else: self.assertTrue( issubclass(p.cls, base_model.MultiTaskModel)) for _, v in input_p.IterParams(): self.assertTrue( issubclass( v.cls, base_input_generator.BaseInputGenerator), 'Error in %s' % dataset)
def _verify_timestep_counts(self, num_splits, auto_partition=False, micro_batch_size=None): num_micro_batches = 8 batch_size = 16 with self.session(graph=tf.Graph()) as sess: tf.random.set_seed(1245) inputs = tf.random.uniform([batch_size, 8, 8, 1], seed=12345) if auto_partition: layers = [ _SimpyLayer.Params().Set(name='layer_{}'.format(i)) for i in range(16) ] net = PipeliningLayer.Params().Set( name='pipeline', num_micro_batches=num_micro_batches, cell_tpl=_Partition(layers, num_splits, tshape.Shape([batch_size, 8, 8, 1]))).Instantiate() else: net = _BuildDummyPipelineCnn( num_splits=num_splits, micro_batch_size=micro_batch_size, num_micro_batches=num_micro_batches) endpoints = net.FPropDefaultTheta(inputs) if isinstance(endpoints, (list, tuple)): logits, aux_logits = endpoints else: logits = endpoints aux_logits = None loss = tf.reduce_mean(logits) grads = tf.gradients(loss, tf.trainable_variables()) grad_norm = tf.sqrt(py_utils.SumSquared(grads)) ts = net.GetAccumulatorValues().Flatten() sess.run(tf.global_variables_initializer()) grad_norm_val, ts_vals = sess.run([grad_norm, ts]) test_utils.CompareToGoldenSingleFloat(self, 0.268087, grad_norm_val) # Accumulator values should be equal to number of time steps in pipeline. for ts_val in list(ts_vals): expected_ts = num_micro_batches if num_splits > 1 else 1 self.assertEqual(ts_val, expected_ts) if aux_logits is not None: aux_logit_tensor = sess.run(aux_logits) self.assertEqual(aux_logit_tensor.shape, (batch_size, 8, 8, 1))
def testEinsumReplacementBxycBzxBzyc(self): with self.session(use_gpu=False, graph=tf.Graph()) as sess: a = tf.random.uniform(shape=[20, 7, 4, 3], minval=0, maxval=1, dtype=tf.float32) b = tf.random.uniform(shape=[20, 5, 7], minval=0, maxval=1, dtype=tf.float32) einsum = tf.einsum('bxyc,bzx->bzyc', a, b) p = spectrum_augmenter_on_device.SpectrumAugmenterOnDevice.Params() p.name = 'specAug_layers' specaug_layer = p.Instantiate() replacement = specaug_layer.EinsumBxycBzxBzyc(a, b) einsum, replacement = sess.run([einsum, replacement]) self.assertAllClose(einsum, replacement)
def testEinsumReplacementBBmBm(self): with self.session(use_gpu=False, graph=tf.Graph()): a = tf.random.uniform(shape=[20], minval=0, maxval=1, dtype=tf.float32) b = tf.random.uniform(shape=[20, 10], minval=0, maxval=1, dtype=tf.float32) einsum = tf.einsum('b,bm->bm', a, b) p = spectrum_augmenter_on_device.SpectrumAugmenterOnDevice.Params() p.name = 'specAug_layers' specaug_layer = p.Instantiate() replacement = specaug_layer.EinsumBBmBm(a, b) einsum, replacement = self.evaluate([einsum, replacement]) self.assertAllClose(einsum, replacement)
def testMnistV2(self): g = tf.Graph() with g.as_default(): tf.random.set_seed(1618) p = model_registry.GetParams('test.MnistV2', 'Test') p.random_seed = 73234288 p.input.ckpt = self.data_path p.task.params_init = py_utils.WeightInit.Uniform(0.1, seed=73234288) with cluster_factory.ForTestingWorker(mode='sync', job='trainer_client'): model = p.Instantiate() model.ConstructFPropBPropGraph() with self.session(graph=g): self.evaluate(tf.global_variables_initializer()) CompareToGoldenSingleFloat(self, 2.303070, self._runOneStep(model)) CompareToGoldenSingleFloat(self, 2.297364, self._runOneStep(model))
def __init__(self, tpu=None, worker_job_name=None, prefix_max_len=128, is_cloud_tpu_node=False): self._tpu = tpu self._worker_job = worker_job_name self._prefix_max_len = prefix_max_len self._c = threading.Condition() # lock # set in reset_session self._sess = None # set in configure_cluster_params self.cluster_params = None # set in init_graph self.cluster = None self.graph = tf.Graph() self.task = None self.compile_op = None self.init_vars_op = None self.infeed_op = None self.infeed_args = None self.outfeed_op = None self.outfeed = None self.decode_loop = None self.saver = None self.num_batches = None self.session_timeout_in_ms = None self._heartbeat = False self._saver_reshape = True # set in load_spm self.spm = None if worker_job_name is not None: if worker_job_name.startswith('/job:'): worker_job_name = worker_job_name.split(':')[1] else: self._worker_job = '/job:' + worker_job_name if is_cloud_tpu_node: cluster_resolver = tf.distribute.cluster_resolver.TPUClusterResolver( self._tpu, job_name=worker_job_name) self._cluster_def = cluster_resolver.cluster_spec().as_cluster_def( ) self._tpu = cluster_resolver.master() else: self._cluster_def = None
def __init__(self, inference_graph, subgraph_name=None, checkpoint=None, device_type="gpu", tf_master="", session_config=None): assert device_type in ["cpu", "gpu", "tpu"] subgraph_name = subgraph_name or "default" if isinstance(inference_graph, six.string_types): tf.logging.info("Reading inference graph from %s.", inference_graph) inference_graph = LoadInferenceGraph(inference_graph) self._inference_graph = inference_graph self._checkpoint = checkpoint self._device_type = device_type self._tf_master = tf_master self._session_config = session_config self._graph = tf.Graph() with self._graph.as_default(): tf.logging.info( "Loading inference graph for prediction subgraph_name={}.". format(subgraph_name)) self._saver = tf.train.Saver(saver_def=inference_graph.saver_def) with tf.device("/%s:0" % "cpu" if device_type == "tpu" else device_type): tf.import_graph_def(inference_graph.graph_def, name="") self._graph.finalize() if inference_graph.subgraphs: if subgraph_name not in inference_graph.subgraphs: raise ValueError( "Subgraph %s not defined. Valid subgraphs: %s" % (subgraph_name, list(inference_graph.subgraphs.keys()))) subgraph = inference_graph.subgraphs[subgraph_name] self._fetches = subgraph.fetches self._feeds = subgraph.feeds else: self._fetches = inference_graph.fetches self._feeds = inference_graph.feeds # Lock for creating new sessions. self._sess_lock = threading.Lock() self._cur_sess_id = 0 self._CreateNewSession()