Exemple #1
0
def rm_dir(dir_path: Text):
    """
    Deletes dir recursively. Dangerous operation.

    Args:
        dir_path (str): Dir to delete.
    """
    file_io.delete_recursively_v2(dir_path)
    def delete_backup(self):
        """Delete the backup directories.

    Delete the backup directories which should not exist after `fit()`
    successfully finishes.
    """
        if self.write_checkpoint_manager is self.read_checkpoint_manager:
            file_io.delete_recursively_v2(
                self.write_checkpoint_manager.directory)
 def test_create_and_delete_directory_tree_recursive(self):
     file_io.create_dir_v2('ram://testdirectory')
     file_io.create_dir_v2('ram://testdirectory/subdir1')
     file_io.create_dir_v2('ram://testdirectory/subdir2')
     file_io.create_dir_v2('ram://testdirectory/subdir1/subdir3')
     with gfile.GFile('ram://testdirectory/subdir1/subdir3/a.txt',
                      'w') as f:
         f.write('Hello, world.')
     file_io.delete_recursively_v2('ram://testdirectory')
     self.assertEqual(gfile.Glob('ram://testdirectory/*'), [])
  def delete_backup(self):
    """Delete the backup directories.

    Delete the backup directories which should not exist after `fit()`
    successfully finishes.
    """
    # pylint: disable=protected-access
    for pathname in file_io.get_matching_files_v2(
        self.write_checkpoint_manager._prefix + '*'):
      file_io.delete_recursively_v2(pathname)
    for pathname in file_io.get_matching_files_v2(
        os.path.join(self.write_checkpoint_manager.directory, 'checkpoint')):
      file_io.delete_recursively_v2(pathname)
def _create_empty_output_dir(output_directory: str) -> None:
  """Creates the `output_directory`.

  If `output_directory` already exists, it recursively deletes all contents
  inside the directory.

  Also creates the parent & intermediate directories.

  Args:
    output_directory: Output directory.
  """
  if file_io.file_exists_v2(output_directory):
    logging.info('Deleting existing directory for quantized model output: %s .',
                 output_directory)
    file_io.delete_recursively_v2(output_directory)

  file_io.recursive_create_dir_v2(output_directory)
 def test_create_and_delete_directory(self):
     file_io.create_dir_v2('ram://testdirectory')
     file_io.delete_recursively_v2('ram://testdirectory')
        def fn(model_path, checkpoint_dir):
            global_batch_size = per_worker_batch_size * num_workers
            strategy = collective_all_reduce_strategy.CollectiveAllReduceStrategy(
            )
            with strategy.scope():
                multi_worker_model = build_and_compile_cnn_model()

            callbacks = [
                keras.callbacks.ModelCheckpoint(
                    filepath=os.path.join(self.get_temp_dir(), 'checkpoint'))
            ]

            multi_worker_dataset = mnist_dataset(global_batch_size)
            if shard_policy:
                options = dataset_ops.Options()
                options.experimental_distribute.auto_shard_policy = shard_policy
                multi_worker_dataset = multi_worker_dataset.with_options(
                    options)

            multi_worker_model.fit(multi_worker_dataset,
                                   epochs=2,
                                   steps_per_epoch=20,
                                   callbacks=callbacks)

            def _is_chief(task_type, task_id):
                return task_type is None or task_type == 'chief' or (
                    task_type == 'worker' and task_id == 0)

            def _get_temp_dir(dirpath, task_id):
                base_dirpath = 'workertemp_' + str(task_id)
                temp_dir = os.path.join(dirpath, base_dirpath)
                file_io.recursive_create_dir_v2(temp_dir)
                return temp_dir

            def write_filepath(filepath, task_type, task_id):
                dirpath = os.path.dirname(filepath)
                base = os.path.basename(filepath)
                if not _is_chief(task_type, task_id):
                    dirpath = _get_temp_dir(dirpath, task_id)
                return os.path.join(dirpath, base)

            task_type, task_id = (strategy.cluster_resolver.task_type,
                                  strategy.cluster_resolver.task_id)
            write_model_path = write_filepath(model_path, task_type, task_id)

            multi_worker_model.save(write_model_path)
            if not _is_chief(task_type, task_id):
                file_io.delete_recursively_v2(
                    os.path.dirname(write_model_path))

            # Make sure chief finishes saving before non-chief's assertions.
            multi_process_runner.get_barrier().wait()

            if not file_io.file_exists_v2(model_path):
                raise RuntimeError()
            if file_io.file_exists_v2(write_model_path) != _is_chief(
                    task_type, task_id):
                raise RuntimeError()

            loaded_model = keras.saving.save.load_model(model_path)
            loaded_model.fit(multi_worker_dataset,
                             epochs=2,
                             steps_per_epoch=20)

            checkpoint = tracking_util.Checkpoint(model=multi_worker_model)
            write_checkpoint_dir = write_filepath(checkpoint_dir, task_type,
                                                  task_id)
            checkpoint_manager = checkpoint_management.CheckpointManager(
                checkpoint, directory=write_checkpoint_dir, max_to_keep=1)

            checkpoint_manager.save()
            if not _is_chief(task_type, task_id):
                file_io.delete_recursively_v2(write_checkpoint_dir)

            # Make sure chief finishes saving before non-chief's assertions.
            multi_process_runner.get_barrier().wait()

            if not file_io.file_exists_v2(checkpoint_dir):
                raise RuntimeError()
            if file_io.file_exists_v2(write_checkpoint_dir) != _is_chief(
                    task_type, task_id):
                raise RuntimeError()

            latest_checkpoint = checkpoint_management.latest_checkpoint(
                checkpoint_dir)
            checkpoint.restore(latest_checkpoint)
            multi_worker_model.fit(multi_worker_dataset,
                                   epochs=2,
                                   steps_per_epoch=20)

            logging.info('testMultiWorkerTutorial successfully ends')