class BaseTest(tf.test.TestCase): """Tests for Wide Deep model.""" @classmethod def setUpClass(cls): # pylint: disable=invalid-name super(BaseTest, cls).setUpClass() meals_main.define_meal_flags() def setUp(self): # Create temporary CSV file self.temp_dir = self.get_temp_dir() # dir_path = "/media/hungdo/SYSTEM/Users/HungDo/Documents/GitHub/FinalProject_RecommendationSys/dataset/csv_file/" # self.temp_dir = dir_path tf.io.gfile.makedirs(os.path.join(self.temp_dir, meals.FOOD)) self.user_csv = os.path.join(self.temp_dir, meals.FOOD, meals.USERS_FILE) self.recipe_csv = os.path.join(self.temp_dir, meals.FOOD, meals.RECIPES_FILE) # Read CSV files with tf.io.gfile.GFile(self.recipe_csv, "w") as f: f.write(TEST_RECIPE_DATA) with tf.io.gfile.GFile(self.user_csv, "w") as f: f.write(TEST_USER_DATA) @unittest.skipIf(keras_utils.is_v2_0(), "TF 1.0 only test.") def test_input_fn(self): train_input_fn, _, _ = meals_dataset.construct_input_fns( dataset=meals.FOOD, data_dir=self.temp_dir, batch_size=8, repeat=1) dataset = train_input_fn() features, labels = dataset.make_one_shot_iterator().get_next() with self.session() as sess: features, labels = sess.run((features, labels)) # Compare the two features dictionaries. for key in TEST_INPUT_VALUES: self.assertTrue(key in features) self.assertAllClose(TEST_INPUT_VALUES[key], features[key][0]) self.assertAllClose(labels[0], [1.0]) @unittest.skipIf(keras_utils.is_v2_0(), "TF 1.0 only test.") def test_end_to_end_deep(self): integration.run_synthetic(main=meals_main.main, tmp_root=self.temp_dir, extra_flags=[ "--data_dir", self.temp_dir, "--download_if_missing=false", "--train_epochs", "1", "--epochs_between_evals", "1" ], synth=False, max_train=None)
def test_collect_run_params(self): run_info = {} run_parameters = { "batch_size": 32, "synthetic_data": True, "train_epochs": 100.00, "dtype": "fp16", "resnet_size": 50, "random_tensor": tf.constant(2.0) } logger._collect_run_params(run_info, run_parameters) self.assertEqual(len(run_info["run_parameters"]), 6) self.assertEqual(run_info["run_parameters"][0], { "name": "batch_size", "long_value": 32 }) self.assertEqual(run_info["run_parameters"][1], { "name": "dtype", "string_value": "fp16" }) if keras_utils.is_v2_0(): self.assertEqual( run_info["run_parameters"][2], { "name": "random_tensor", "string_value": "tf.Tensor(2.0, shape=(), dtype=float32)" }) else: self.assertEqual( run_info["run_parameters"][2], { "name": "random_tensor", "string_value": "Tensor(\"Const:0\", shape=(), dtype=float32)" }) self.assertEqual(run_info["run_parameters"][3], { "name": "resnet_size", "long_value": 50 }) self.assertEqual(run_info["run_parameters"][4], { "name": "synthetic_data", "bool_value": "True" }) self.assertEqual(run_info["run_parameters"][5], { "name": "train_epochs", "float_value": 100.00 })
def setUp(self): if keras_utils.is_v2_0(): tf.compat.v1.disable_eager_execution() super(GoldenBaseTest, self).setUp()
class GoldenBaseTest(reference_data.BaseTest): """Class to ensure that reference data testing runs properly.""" def setUp(self): if keras_utils.is_v2_0(): tf.compat.v1.disable_eager_execution() super(GoldenBaseTest, self).setUp() @property def test_name(self): return "reference_data_test" def _uniform_random_ops(self, test=False, wrong_name=False, wrong_shape=False, bad_seed=False, bad_function=False): """Tests number generation and failure modes. This test is of a very simple graph: the generation of a 1x1 random tensor. However, it is also used to confirm that the tests are actually checking properly by failing in predefined ways. Args: test: Whether or not to run as a test case. wrong_name: Whether to assign the wrong name to the tensor. wrong_shape: Whether to create a tensor with the wrong shape. bad_seed: Whether or not to perturb the random seed. bad_function: Whether to perturb the correctness function. """ name = "uniform_random" g = tf.Graph() with g.as_default(): seed = self.name_to_seed(name) seed = seed + 1 if bad_seed else seed tf.compat.v1.set_random_seed(seed) tensor_name = "wrong_tensor" if wrong_name else "input_tensor" tensor_shape = (1, 2) if wrong_shape else (1, 1) input_tensor = tf.compat.v1.get_variable( tensor_name, dtype=tf.float32, initializer=tf.random.uniform(tensor_shape, maxval=1)) def correctness_function(tensor_result): result = float(tensor_result[0, 0]) result = result + 0.1 if bad_function else result return [result] self._save_or_test_ops(name=name, graph=g, ops_to_eval=[input_tensor], test=test, correctness_function=correctness_function) def _dense_ops(self, test=False): name = "dense" g = tf.Graph() with g.as_default(): tf.compat.v1.set_random_seed(self.name_to_seed(name)) input_tensor = tf.compat.v1.get_variable( "input_tensor", dtype=tf.float32, initializer=tf.random.uniform((1, 2), maxval=1)) layer = tf.compat.v1.layers.dense(inputs=input_tensor, units=4) layer = tf.compat.v1.layers.dense(inputs=layer, units=1) self._save_or_test_ops( name=name, graph=g, ops_to_eval=[layer], test=test, correctness_function=self.default_correctness_function) def test_uniform_random(self): self._uniform_random_ops(test=True) def test_tensor_name_error(self): with self.assertRaises(AssertionError): self._uniform_random_ops(test=True, wrong_name=True) @unittest.skipIf(keras_utils.is_v2_0(), "TODO:(b/136010138) Fails on TF 2.0.") def test_tensor_shape_error(self): with self.assertRaises(AssertionError): self._uniform_random_ops(test=True, wrong_shape=True) def test_incorrectness_function(self): with self.assertRaises(AssertionError): self._uniform_random_ops(test=True, bad_function=True) def test_dense(self): self._dense_ops(test=True) def regenerate(self): self._uniform_random_ops(test=False) self._dense_ops(test=False)
def run(flags_obj): """ Run ResNet ImageNet training and eval loop using native Keras APIs. Raises: ValueError: If fp16 is passed as it is not currently supported. Returns: Dictionary of training and eval stats. """ ######################################################################### # Construct AutoDist with ResourceSpec for Different Strategies if flags_obj.autodist_patch_tf: os.environ['AUTODIST_PATCH_TF'] = '1' else: os.environ['AUTODIST_PATCH_TF'] = '0' if flags_obj.cnn_model == 'vgg16': chunk = 25 elif flags_obj.cnn_model == 'resnet101': chunk = 200 elif flags_obj.cnn_model == 'inceptionv3': chunk = 30 else: chunk = 512 if flags_obj.autodist_strategy == 'PS': autodist = AutoDist(resource_spec_file, PS(local_proxy_variable=flags_obj.proxy)) elif flags_obj.autodist_strategy == 'PSLoadBalancing': autodist = AutoDist( resource_spec_file, PSLoadBalancing(local_proxy_variable=flags_obj.proxy)) elif flags_obj.autodist_strategy == 'PartitionedPS': autodist = AutoDist( resource_spec_file, PartitionedPS(local_proxy_variable=flags_obj.proxy)) elif flags_obj.autodist_strategy == 'AllReduce': autodist = AutoDist(resource_spec_file, AllReduce(chunk_size=chunk)) elif flags_obj.autodist_strategy == 'Parallax': autodist = AutoDist( resource_spec_file, Parallax(chunk_size=chunk, local_proxy_variable=flags_obj.proxy)) else: raise ValueError( 'the strategy can be only from PS, PSLoadBalancing, PartitionedPS, AllReduce, Parallax' ) ######################################################################### dtype = flags_core.get_tf_dtype(flags_obj) if dtype == tf.float16: loss_scale = flags_core.get_loss_scale(flags_obj, default_for_fp16=128) policy = tf.compat.v1.keras.mixed_precision.experimental.Policy( 'mixed_float16', loss_scale=loss_scale) tf.compat.v1.keras.mixed_precision.experimental.set_policy(policy) if not keras_utils.is_v2_0(): raise ValueError('--dtype=fp16 is not supported in TensorFlow 1.') elif dtype == tf.bfloat16: policy = tf.compat.v1.keras.mixed_precision.experimental.Policy( 'mixed_bfloat16') tf.compat.v1.keras.mixed_precision.experimental.set_policy(policy) input_fn = imagenet_preprocessing.input_fn drop_remainder = flags_obj.enable_xla if 'vgg' in flags_obj.cnn_model: lr_schedule = 0.01 else: lr_schedule = 0.1 if flags_obj.use_tensor_lr: lr_schedule = common.PiecewiseConstantDecayWithWarmup( batch_size=flags_obj.batch_size, epoch_size=imagenet_preprocessing.NUM_IMAGES['train'], warmup_epochs=common.LR_SCHEDULE[0][1], boundaries=list(p[1] for p in common.LR_SCHEDULE[1:]), multipliers=list(p[0] for p in common.LR_SCHEDULE), compute_lr_on_cpu=True) ######################################################################### # Build with Graph mode, and put all under AutoDist scope. with tf.Graph().as_default(), autodist.scope(): ########################################################################## train_input_dataset = input_fn( is_training=True, data_dir=flags_obj.data_dir, batch_size=flags_obj.batch_size, num_epochs=flags_obj.train_epochs, parse_record_fn=imagenet_preprocessing.parse_record, datasets_num_private_threads=flags_obj. datasets_num_private_threads, dtype=dtype, drop_remainder=drop_remainder, tf_data_experimental_slack=flags_obj.tf_data_experimental_slack, training_dataset_cache=flags_obj.training_dataset_cache, ) if flags_obj.cnn_model == 'resnet101': model = tf.keras.applications.ResNet101( weights=None, classes=imagenet_preprocessing.NUM_CLASSES) elif flags_obj.cnn_model == 'vgg16': model = tf.keras.applications.VGG16( weights=None, classes=imagenet_preprocessing.NUM_CLASSES) elif flags_obj.cnn_model == 'inceptionv3': model = tf.keras.applications.InceptionV3( weights=None, classes=imagenet_preprocessing.NUM_CLASSES) elif flags_obj.cnn_model == 'densenet121': model = tf.keras.applications.DenseNet121( weights=None, classes=imagenet_preprocessing.NUM_CLASSES) else: raise ValueError('Other Model Undeveloped') optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule, beta_1=0.9, beta_2=0.999, epsilon=1e-08) train_input_iterator = tf.compat.v1.data.make_one_shot_iterator( train_input_dataset) train_input, train_target = train_input_iterator.get_next() steps_per_epoch = (imagenet_preprocessing.NUM_IMAGES['train'] // flags_obj.batch_size) train_epochs = flags_obj.train_epochs if flags_obj.enable_checkpoint_and_export: ckpt_full_path = os.path.join(flags_obj.model_dir, 'model.ckpt-{epoch:04d}') if train_epochs <= 1 and flags_obj.train_steps: steps_per_epoch = min(flags_obj.train_steps, steps_per_epoch) train_epochs = 1 num_eval_steps = (imagenet_preprocessing.NUM_IMAGES['validation'] // flags_obj.batch_size) train_output = model(train_input, training=True) scc_loss = tf.keras.losses.SparseCategoricalCrossentropy() loss = scc_loss(train_target, train_output) var_list = variables.trainable_variables() + \ ops.get_collection(ops.GraphKeys.TRAINABLE_RESOURCE_VARIABLES) grad = optimizer.get_gradients(loss, var_list) train_op = optimizer.apply_gradients(zip(grad, var_list)) ##################################################################### # Create distributed session. # Instead of using the original TensorFlow session for graph execution, # let's use AutoDist's distributed session, in which a computational # graph for distributed training is constructed. # # [original line] # >>> sess = tf.compat.v1.Session() # sess = autodist.create_distributed_session() ##################################################################### summary = TimeHistory(flags_obj.batch_size, steps_per_epoch) for epoch_id in range(train_epochs): summary.on_epoch_begin(epoch_id) for batch_id in range(steps_per_epoch): summary.on_batch_begin(batch_id) loss_v, _ = sess.run([loss, train_op]) summary.on_batch_end(batch_id, loss_v) summary.on_epoch_end(epoch_id) summary.on_train_end() return