def testSidecarEvaluatorOutputsSummary(self): # Create a model with synthetic data, and fit for one epoch. model = keras.models.Sequential([keras.layers.Dense(10)]) model.compile(gradient_descent.SGD(), loss='mse', metrics=keras.metrics.CategoricalAccuracy()) data = np.random.random((1000, 32)) labels = np.random.random((1000, 10)) dataset = dataset_ops.Dataset.from_tensor_slices((data, labels)) dataset = dataset.batch(32) model.fit(dataset, epochs=1) # Save a checkpoint. checkpoint_dir = os.path.join(self.get_temp_dir(), 'ckpt') log_dir = os.path.join(self.get_temp_dir(), 'summary') logging.info('checkpoint_dir = %s, log_dir = %s', checkpoint_dir, log_dir) checkpoint = tracking_util.Checkpoint(model=model, optimizer=model.optimizer) checkpoint_manager = checkpoint_management.CheckpointManager( checkpoint, checkpoint_dir, max_to_keep=2) logging.info('Checkpoint manager saved to: %s', checkpoint_manager.save()) # Have an sidecar_evaluator evaluate once. sidecar_evaluator_lib.SidecarEvaluator(model, data=dataset, checkpoint_dir=checkpoint_dir, log_dir=log_dir, max_evaluations=1).start() # Asserts summary files do get written when log_dir is provided. summary_files = file_io.list_directory_v2(log_dir) self.assertNotEmpty( file_io.list_directory_v2(checkpoint_dir), 'Checkpoint should have been written and ' 'checkpoint_dir should not be empty.') self.assertNotEmpty( summary_files, 'Summary should have been written and ' 'log_dir should not be empty.') # Asserts the content of the summary file. event_pb_written = False for event_pb in summary_iterator.summary_iterator( os.path.join(log_dir, summary_files[0])): if event_pb.step > 0: self.assertEqual(event_pb.step, 32) self.assertEqual(event_pb.summary.value[0].tag, 'categorical_accuracy') event_pb_written = True # Verifying at least one non-zeroth step is written to summary. self.assertTrue(event_pb_written)
def proc_tensorboard_saves_on_chief_but_not_otherwise(test_obj): model, _, train_ds, steps = _model_setup(test_obj, file_format='') num_epoch = 2 # Incorporate type/index information and thread id in saving_filepath to # ensure every worker has a unique path. Note that in normal use case the # saving_filepath will be the same for all workers, but we use different # ones here just to test out chief saves summaries but non-chief doesn't. task_config = _get_task_config() saving_filepath = os.path.join( test_obj.get_temp_dir(), 'logfile_%s_%d' % (task_config['type'], task_config['index'])) # The saving_filepath shouldn't exist at the beginning (as it's unique). test_obj.assertFalse(file_io.file_exists_v2(saving_filepath)) model.fit( x=train_ds, epochs=num_epoch, steps_per_epoch=steps, callbacks=[callbacks.TensorBoard(log_dir=saving_filepath)]) # If it's chief, the summaries should be saved in the filepath; if not, # the directory should be empty (although created). Using # `file_io.list_directory()` since the directory may be created at this # point. test_obj.assertEqual( bool(file_io.list_directory_v2(saving_filepath)), test_base.is_chief())
def testSidecarEvaluatorOutputsSummarySavedWithCallback(self): checkpoint_dir = os.path.join(self.get_temp_dir(), 'checkpoints') log_dir = os.path.join(self.get_temp_dir(), 'summary') # Create a model with synthetic data, and fit for one epoch. model = self.createTestModel(compile_model=True) data = np.random.random((1000, 32)) labels = np.random.random((1000, 10)) dataset = dataset_ops.Dataset.from_tensor_slices((data, labels)) dataset = dataset.batch(32) save_callback = keras.callbacks.ModelCheckpoint(filepath=os.path.join( checkpoint_dir, 'ckpt-{epoch}'), save_weights_only=True) model.fit(dataset, epochs=1, callbacks=[save_callback]) self.assertNotEmpty( file_io.list_directory_v2(checkpoint_dir), 'Checkpoint should have been written and ' 'checkpoint_dir should not be empty.') # Create a new model used for evaluation. eval_model = self.createTestModel(compile_model=True) # Have an sidecar_evaluator evaluate once. sidecar_evaluator_lib.SidecarEvaluator(eval_model, data=dataset, checkpoint_dir=checkpoint_dir, log_dir=log_dir, max_evaluations=1).start() # Eval model has been restored to the same state as the original model, so # their weights should match. If not, restoration of the model didn't # work. self.assertModelsSameVariables(model, eval_model) self.assertSummaryEventsWritten(log_dir)
def assertSummaryEventsWritten(self, log_dir): # Asserts summary files do get written when log_dir is provided. summary_files = file_io.list_directory_v2(log_dir) self.assertNotEmpty( summary_files, 'Summary should have been written and ' 'log_dir should not be empty.') # Asserts the content of the summary file. event_pb_written = False event_tags = [] for summary_file in summary_files: for event_pb in summary_iterator.summary_iterator( os.path.join(log_dir, summary_file)): if event_pb.step > 0: self.assertEqual(event_pb.step, 32) event_tags.append(event_pb.summary.value[0].tag) event_pb_written = True self.assertCountEqual(event_tags, [ 'evaluation_categorical_accuracy_vs_iterations', 'evaluation_loss_vs_iterations', 'evaluation_mean_squared_error_1_vs_iterations', 'evaluation_mean_squared_error_2_vs_iterations', ]) # Verifying at least one non-zeroth step is written to summary. self.assertTrue(event_pb_written)
def list_dir(dir_path: Text, only_file_names: bool = False): """ Returns a list of files under dir. Args: dir_path (str): Path in filesystem. only_file_names (bool): Returns only file names if True. """ return [os.path.join(dir_path, f) if not only_file_names else f for f in file_io.list_directory_v2(dir_path)]
def testSidecarEvaluatorOutputsSummarySavedWithCallback( self, model_type, build_model): checkpoint_dir = os.path.join(self.get_temp_dir(), 'checkpoints') log_dir = os.path.join(self.get_temp_dir(), 'summary') # Create a model with synthetic data, and fit for one epoch. model = _test_model_builder( model_type=model_type, compile_model=True, build_model=False) data = np.random.random((1000, 32)) labels = np.random.random((1000, 10)) dataset = dataset_ops.Dataset.from_tensor_slices((data, labels)) dataset = dataset.batch(_BATCH_SIZE) save_callback = keras.callbacks.ModelCheckpoint( filepath=os.path.join(checkpoint_dir, 'ckpt-{epoch}'), save_weights_only=True) model.fit(dataset, epochs=1, callbacks=[save_callback]) self.assertNotEmpty( file_io.list_directory_v2(checkpoint_dir), 'Checkpoint should have been written and ' 'checkpoint_dir should not be empty.') # Create a new model used for evaluation. eval_model = _test_model_builder( model_type=model_type, compile_model=True, build_model=build_model) # Have an sidecar_evaluator evaluate once. sidecar_evaluator = sidecar_evaluator_lib.SidecarEvaluator( eval_model, data=dataset, checkpoint_dir=checkpoint_dir, max_evaluations=1, callbacks=[keras.callbacks.TensorBoard(log_dir=log_dir)]) with self.assertLogs() as cm: sidecar_evaluator.start() metrics_logging = [ line for line in cm.output if 'End of evaluation' in line ] self.assertLen(metrics_logging, 1) expected_logged_metrics = [ 'loss', 'categorical_accuracy', 'mean_squared_error_1', 'mean_squared_error_2' ] for metric_name in expected_logged_metrics: self.assertRegex(metrics_logging[0], f'{metric_name}=') # Eval model has been restored to the same state as the original model, so # their weights should match. If not, restoration of the model didn't # work. self.assertModelsSameVariables(model, eval_model) # check the iterations is restored. self.assertEqual(sidecar_evaluator._iterations.numpy(), _BATCH_SIZE) self.assertSummaryEventsWritten(os.path.join(log_dir, 'validation'))
def test_basic_module(self): flags.config().saved_model_fingerprinting.reset(True) save_dir = self._create_saved_model() files = file_io.list_directory_v2(save_dir) self.assertLen(files, 4) self.assertIn(constants.FINGERPRINT_FILENAME, files) fingerprint_def = self._read_fingerprint( file_io.join(save_dir, constants.FINGERPRINT_FILENAME)) # We cannot check the value due to non-determinism in serialization. self.assertGreater(fingerprint_def.graph_def_hash, 0)
def proc_tensorboard_works_with_same_file_path(test_obj, saving_filepath): model, _, train_ds, steps = _model_setup(test_obj, file_format='') num_epoch = 2 # The saving_filepath shouldn't exist at the beginning (as it's unique). test_obj.assertFalse(file_io.file_exists_v2(saving_filepath)) multi_process_runner.get_barrier().wait() model.fit( x=train_ds, epochs=num_epoch, steps_per_epoch=steps, callbacks=[callbacks.TensorBoard(log_dir=saving_filepath)]) multi_process_runner.get_barrier().wait() test_obj.assertTrue(file_io.list_directory_v2(saving_filepath))
def test_basic_module(self): save_dir = self._create_saved_model() files = file_io.list_directory_v2(save_dir) self.assertLen(files, 4) self.assertIn(constants.FINGERPRINT_FILENAME, files) fingerprint_def = self._read_fingerprint( file_io.join(save_dir, constants.FINGERPRINT_FILENAME)) # We cannot check this value due to non-determinism in serialization. self.assertGreater(fingerprint_def.graph_def_checksum, 0) self.assertEqual(fingerprint_def.graph_def_program_hash, 14830488309055091319) self.assertEqual(fingerprint_def.signature_def_hash, 1050878586713189074) # TODO(b/242348400): The checkpoint hash is non-deterministic, so we cannot # check its value here. self.assertGreater(fingerprint_def.checkpoint_hash, 0)
def assertSummaryEventsWritten(self, log_dir): # Asserts summary files do get written when log_dir is provided. summary_files = file_io.list_directory_v2(log_dir) self.assertNotEmpty( summary_files, 'Summary should have been written and ' 'log_dir should not be empty.') # Asserts the content of the summary file. event_pb_written = False for event_pb in summary_iterator.summary_iterator( os.path.join(log_dir, summary_files[0])): if event_pb.step > 0: self.assertEqual(event_pb.step, 32) self.assertEqual(event_pb.summary.value[0].tag, 'categorical_accuracy') event_pb_written = True # Verifying at least one non-zeroth step is written to summary. self.assertTrue(event_pb_written)
def testSidecarEvaluatorOutputsSummary(self, model_type, build_model): # Create a model with synthetic data, and fit for one epoch. model = _test_model_builder( model_type=model_type, compile_model=True, build_model=False) data = np.random.random((1000, 32)) labels = np.random.random((1000, 10)) dataset = dataset_ops.Dataset.from_tensor_slices((data, labels)) dataset = dataset.batch(32) model.fit(dataset, epochs=1) # Save a checkpoint. checkpoint_dir = os.path.join(self.get_temp_dir(), 'ckpt') log_dir = os.path.join(self.get_temp_dir(), 'summary') logging.info('checkpoint_dir = %s, log_dir = %s', checkpoint_dir, log_dir) checkpoint = tracking_util.Checkpoint( model=model, optimizer=model.optimizer) checkpoint_manager = checkpoint_management.CheckpointManager( checkpoint, checkpoint_dir, max_to_keep=2) logging.info('Checkpoint manager saved to: %s', checkpoint_manager.save()) self.assertNotEmpty( file_io.list_directory_v2(checkpoint_dir), 'Checkpoint should have been written and ' 'checkpoint_dir should not be empty.') # Create a new model used for evaluation. eval_model = _test_model_builder( model_type=model_type, compile_model=True, build_model=build_model) # Have a sidecar_evaluator evaluate once. sidecar_evaluator = sidecar_evaluator_lib.SidecarEvaluator( eval_model, data=dataset, checkpoint_dir=checkpoint_dir, max_evaluations=1, callbacks=[keras.callbacks.TensorBoard(log_dir=log_dir)]) sidecar_evaluator.start() # Eval model has been restored to the same state as the original model, so # their weights should match. If not, restoration of the model didn't # work. self.assertModelsSameVariables(model, eval_model) self.assertSummaryEventsWritten(os.path.join(log_dir, 'validation'))
def _verify_output_dir(output_dir: Optional[str], overwrite: bool) -> None: """Verifies the output directory. Raises an error if `output_dir` is not suitable for writing the output saved model. Args: output_dir: Output directory. overwrite: An option allowing to overwrite the existing output directory if set to true. Does not actually create or modify the `output_dir` in this function. Raises: FileExistsError: Iff `output_dir` is not empty and `overwrite` is false. """ dir_not_empty = ( output_dir is not None and file_io.file_exists_v2(output_dir) and file_io.list_directory_v2(output_dir)) if dir_not_empty and not overwrite: raise FileExistsError(f'Output directory already exists: {output_dir} . ' 'Please set overwrite_output_directory to true to ' 'overwrite the existing directory.')