def test_create_and_validate_inference_load_config_from_saved_models(
            self, use_recent):
        project_dir = self.get_temp_dir()
        saved_models_dir = os.path.join(project_dir,
                                        ProjectDirs.TRAINER.saved_models)
        maybe_mkdir(saved_models_dir)
        saved_models_tag_dirs = [
            os.path.join(saved_models_dir, sd)
            for sd in ['first', 'second', 'third']
        ]
        for d in saved_models_tag_dirs:
            maybe_mkdir(d)

        if use_recent:
            saved_model_path = None
            saved_model_path_must = saved_models_tag_dirs[-1]
        else:
            saved_model_path = "first"
            saved_model_path_must = saved_models_tag_dirs[0]

        config = cconfigs.create_and_validate_inference_load_config(
            project_dir=project_dir, saved_model_path=saved_model_path)
        self.assertIsInstance(config, cconfigs.InferenceLoadConfig)
        self.assertEqual(config.saved_model, saved_model_path_must)
        self.assertIsNone(config.checkpoint)
        self.assertIsNone(config.meta_graph)

        with self.assertRaises(ValueError):
            _ = cconfigs.create_and_validate_inference_load_config(
                project_dir=project_dir,
                saved_model_path="path_without_file",
                checkpoint_path="path_without_file")
        with self.assertRaises(FileNotFoundError):
            _ = cconfigs.create_and_validate_inference_load_config(
                project_dir=project_dir, saved_model_path="path_without_file")
Esempio n. 2
0
def _create_project_dirs_with_run_subfolders(project_dir,
                                             subdir,
                                             project_dirs_structure,
                                             run_name: Optional[str] = None,
                                             continue_last=False):
    logger = logging.getLogger(__name__)
    io_utils.maybe_mkdir(project_dir)
    lock = project_utils.ProjectLock(project_dir)
    lock.lock_or_wait()
    if continue_last:
        allowed_content_for_run = _get_allowed_content_for_run_to_continue(
            project_dirs_structure)
    else:
        allowed_content_for_run = ["configs"]
    run_subfolder = _select_run_subfolder(
        project_dir,
        subdir,
        run_name=run_name,
        allowed_content_for_run=allowed_content_for_run)
    logger.info("Use %s run", run_subfolder)
    project_structure = _update_project_structure_with_run_dir(
        run_subfolder, project_dirs_structure)
    kpi_dirs = _create_project_directories(project_dir, project_structure)
    dir_with_runs = os.path.join(project_dir, subdir)
    _add_symlink_for_last_run(os.path.join(dir_with_runs, run_subfolder))
    lock.release()
    return kpi_dirs
Esempio n. 3
0
 def save(self, name: str, values):
     io_utils.maybe_mkdir(self.save_target)
     save_fname = os.path.join(self.save_target, name + ".json")
     values_filtered, values_filtered_out = kpi_utils.filter_kpi_values(
         values)
     if values_filtered_out:
         logging.info(
             "Following KPI keys will not be stored to json: "
             "%s", list(values_filtered_out.keys()))
     with open(save_fname, 'w') as file:
         json.dump(values_filtered, file, indent=2, sort_keys=True)
Esempio n. 4
0
    def cache(self, values):
        io_utils.maybe_mkdir(self.cache_target)
        cache_fname = self._get_cache_fname()
        inputs_flatten = nest_utils.flatten_nested_struct(values)
        if os.path.exists(cache_fname):
            logger = logging.getLogger(__name__)
            logger.warning("Cache with name %s already exist!", cache_fname)
            return

        with open(cache_fname, "w") as file:
            json.dump(inputs_flatten, file, default=lambda x: x.tolist())
Esempio n. 5
0
    def begin(self):
        """
        Add graph to summary writer and create directory for summaries it it
        does not exist

        overridden from :obj:`tf.train.SummarySaverHook`. See its documentation
        for more information
        """
        self._summary_op = tf.get_collection(tf.GraphKeys.SUMMARIES)
        super().begin()
        io_utils.maybe_mkdir(self._output_dir)
        graph = self._get_graph_fn()
        self._summary_writer.add_graph(graph)
Esempio n. 6
0
 def restore(self):
     io_utils.maybe_mkdir(self.cache_target)
     cache_fname = self._get_cache_fname()
     if not os.path.exists(cache_fname):
         return None
     with open(cache_fname, "r") as file:
         restored = json.load(file)
     for each_key in restored:
         if isinstance(restored[each_key], list):
             restored[each_key] = np.array(restored[each_key])
     restored_unflatten = nest_utils.unflatten_dict_to_nested(restored)
     logger = logging.getLogger(__name__)
     logger.debug("restoring KPI values from %s", cache_fname)
     return restored_unflatten
Esempio n. 7
0
    def set_save_name(self, save_name: Optional[str] = None):
        """
        Set full save target by joining prefix, save name, suffix together
        with save_target

        Parameters
        ----------
        save_name
            save name
        """
        logger = logging.getLogger(__name__)
        if isinstance(save_name, bytes):
            save_name = save_name.decode()

        if self.remove_save_ext and save_name:
            save_name = os.path.splitext(save_name)[0]
        if save_name:
            save_name = os.path.join(*file_utils.get_basename_with_depth(
                save_name, self.save_name_depth))

        if not save_name:
            save_name = self.get_save_name_from_iteration_info()

        additional_names = [self.save_prefix, save_name, self.save_suffix]
        additional_names_concat = "-".join(
            [each_name for each_name in additional_names if
             each_name is not None])
        self._save_name = self.target_separator.join(
            [self.log_dir, additional_names_concat])

        if save_name and os.path.sep in save_name:
            root_directory = os.path.split(self._save_name)[0]
            try:
                os.makedirs(root_directory)
                logger.info("Directory %s was created by %s",
                            root_directory, self.name)
            except FileExistsError:
                logger.debug("Directory %s needed by %s already exists",
                             root_directory, self.name)
                io_utils.maybe_mkdir(root_directory)
Esempio n. 8
0
def _get_cache_fname(cache_dir: str, file_name: str):
    """
    Create name for cache file with temp stamp

    Parameters
    ----------
    cache_dir
        directory for cache
    file_name
        file name; to this file name the temp unique suffix will be added

    Returns
    -------
    cache_fname
        file name for cache
    """
    if cache_dir is None:
        return None
    io_utils.maybe_mkdir(cache_dir)
    _, cache_fname = tempfile.mkstemp(prefix=file_name + "-", dir=cache_dir)
    os.remove(cache_fname)
    logger = logging.getLogger(__name__)
    logger.info("Use cache with file file_name %s", cache_fname)
    return cache_fname
Esempio n. 9
0
def _get_and_maybe_create_summary_dir_for_mode(summary_dir: str,
                                               mode: str) -> str:
    summary_dir = os.path.join(summary_dir, mode)
    io_utils.maybe_mkdir(summary_dir)
    return summary_dir
Esempio n. 10
0
def create_trainer_project_dirs(project_dir: str,
                                continue_training: bool = False
                                ) -> _TrainerDirs:
    """
    Create project directories for training if needed and check if training
    project already exists

    Create following directories under project_dir:

    - training:

        - artifacts - artifacts of the training, e.g. dna, file_lists etc.
        - callbacks/{train/eval} - callbacks can write here
        - summaries/{train/eval} - tensorflow summaries

    - saved_models - saved models in timestamp
      subfolders and inference_graph.meta together with
      input_output_names.json
    - checkpoints - checkpoints like meta graph and the weights
    - (optional, if no mlflow URI was specified) mlruns - mlflow tracking uri

    Parameters
    ----------
    project_dir
        path to project directory
    continue_training
        if the training should be continued in same project folder; if no
        project directories exist, it will have no effect, otherwise if this set
        to False, no training will be started

    Returns
    -------
    training_dirs
        trainer directories

    Raises
    ------
    FileExistsError
        if this run was already used and exist inside of nucleus7_project.json
        file under runs key
    FileExistsError
        if project_dir has other content as
    """
    lock = project_utils.ProjectLock(project_dir)
    lock.lock_or_wait()
    io_utils.maybe_mkdir(os.path.join(project_dir, _TRAINING_DIR))
    try:
        _validate_training_project(project_dir, continue_training)
    # pylint: disable=invalid-name
    # is common practice to call exceptions as e
    except Exception as e:
        lock.release()
        raise e

    training_dirs = _create_project_directories(project_dir,
                                                ProjectDirs.TRAINER)
    for each_mode in ["train", "eval"]:
        io_utils.maybe_mkdir(os.path.join(training_dirs.summaries, each_mode))
        io_utils.maybe_mkdir(os.path.join(training_dirs.callbacks, each_mode))
    project_utils.collect_and_add_project_meta_info(project_dir)
    project_utils.add_runtype_to_project_meta_info(project_dir)
    lock.release()
    return training_dirs