def test_new_registry():
    """Tests that it works to write and read a simple value to a fresh registry with
     a non-existing directory"""
    with tempfile.TemporaryDirectory() as tmpdir:
        registry = pathlib.Path(tmpdir).joinpath("newregistry")
        disk_registry.write_key(registry, "akey", "aval")
        assert disk_registry.get_value(registry, "akey") == "aval"
def test_complicated_happy_path():
    """Tests that it works to write and read a 'complicated' value"""
    with tempfile.TemporaryDirectory() as tmpdir:
        value = """
        A long
        value with many weird character lie åøæ
        and some linebreaks"""
        disk_registry.write_key(tmpdir, "akey", value)
        assert disk_registry.get_value(tmpdir, "akey") == value
Esempio n. 3
0
def provide_saved_model(
    name: str,
    model_config: dict,
    data_config: dict,
    metadata: dict,
    output_dir: Union[os.PathLike, str],
    model_register_dir: Union[os.PathLike, str] = None,
    replace_cache=False,
) -> Union[os.PathLike, str]:
    """
    Ensures that the desired model exists on disk, and returns the path to it.

    Builds the model if needed, or finds it among already existing models if
    ``model_register_dir`` is non-None, and we find the model there. If
    `model_register_dir` is set we will also store the model-location of the generated
    model there for future use. Think about it as a cache that is never emptied.

    Parameters
    ----------
    name: str
        Name of the model to be built
    model_config: dict
        Config for the model. See
        :func:`gordo_components.builder.build_model.build_model`.
    data_config: dict
        Config for the data-configuration. See
        :func:`gordo_components.builder.build_model.build_model`.
    metadata: dict
        Extra metadata to be added to the built models if it is built. See
        :func:`gordo_components.builder.build_model.build_model`.
    output_dir: Union[os.PathLike, str]
        A path to where the model will be deposited if it is built.
    model_register_dir:
        A path to a register, see `gordo_components.util.disk_registry`. If this is None
        then always build the model, otherwise try to resolve the model from the
        registry.
    replace_cache: bool
        Forces a rebuild of the model, and replaces the entry in the cache with the new
        model.

    Returns
    -------
    Union[os.PathLike, str]:
        Path to the model
    """
    cache_key = calculate_model_key(name,
                                    model_config,
                                    data_config,
                                    metadata=metadata)
    if model_register_dir:
        logger.info(
            f"Model caching activated, attempting to read model-location with key "
            f"{cache_key} from register {model_register_dir}")
        if replace_cache:
            logger.info(
                "replace_cache activated, deleting any existing cache entry")
            cache_key = calculate_model_key(name,
                                            model_config,
                                            data_config,
                                            metadata=metadata)
            disk_registry.delete_value(model_register_dir, cache_key)

        existing_model_location = disk_registry.get_value(
            model_register_dir, cache_key)

        # Check that the model is actually there
        if existing_model_location and Path(existing_model_location).exists():
            logger.debug(
                f"Found existing model at path {existing_model_location}, returning it"
            )

            return existing_model_location
        elif existing_model_location:
            logger.warning(
                f"Found that the model-path {existing_model_location} stored in the "
                f"registry did not exist.")
        else:
            logger.info(
                f"Did not find the model with key {cache_key} in the register at "
                f"{model_register_dir}.")
    model, metadata = build_model(name=name,
                                  model_config=model_config,
                                  data_config=data_config,
                                  metadata=metadata)
    model_location = _save_model_for_workflow(model=model,
                                              metadata=metadata,
                                              output_dir=output_dir)
    logger.info(f"Successfully built model, and deposited at {model_location}")
    if model_register_dir:
        logger.info(f"Writing model-location to model registry")
        disk_registry.write_key(model_register_dir, cache_key, model_location)
    return model_location
def provide_saved_model(
    name: str,
    model_config: dict,
    data_config: dict,
    metadata: dict,
    output_dir: Union[os.PathLike, str],
    model_register_dir: Union[os.PathLike, str] = None,
    replace_cache=False,
    evaluation_config: dict = {"cv_mode": "full_build"},
) -> Union[os.PathLike, str]:
    """
    Ensures that the desired model exists on disk in `output_dir`, and returns the path
    to it. If `output_dir` exists we assume the model is there (no validation), and
    return that path.


    Builds the model if needed, or finds it among already existing models if
    ``model_register_dir`` is non-None, and we find the model there. If
    `model_register_dir` is set we will also store the model-location of the generated
    model there for future use. Think about it as a cache that is never emptied.

    Parameters
    ----------
    name: str
        Name of the model to be built
    model_config: dict
        Config for the model. See
        :func:`gordo_components.builder.build_model.build_model`.
    data_config: dict
        Config for the data-configuration. See
        :func:`gordo_components.builder.build_model.build_model`.
    metadata: dict
        Extra metadata to be added to the built models if it is built. See
        :func:`gordo_components.builder.build_model.build_model`.
    output_dir: Union[os.PathLike, str]
        A path to where the model will be deposited if it is built.
    model_register_dir:
        A path to a register, see `gordo_components.util.disk_registry`. If this is None
        then always build the model, otherwise try to resolve the model from the
        registry.
    replace_cache: bool
        Forces a rebuild of the model, and replaces the entry in the cache with the new
        model.
    evaluation_config: dict
        Config for the evaluation. See
        :func:`gordo_components.builder.build_model.build_model`.

    Returns
    -------
    Union[os.PathLike, str]:
        Path to the model
    """
    cache_key = calculate_model_key(name,
                                    model_config,
                                    data_config,
                                    evaluation_config,
                                    metadata=metadata)
    if model_register_dir:
        logger.info(
            f"Model caching activated, attempting to read model-location with key "
            f"{cache_key} from register {model_register_dir}")
        if replace_cache:
            logger.info(
                "replace_cache activated, deleting any existing cache entry")
            disk_registry.delete_value(model_register_dir, cache_key)
        else:
            cached_model_location = check_cache(model_register_dir, cache_key)
            if cached_model_location:
                logger.info(
                    f"Found model in cache, copying from {cached_model_location} to "
                    f"new location {output_dir} ")
                if cached_model_location == output_dir:
                    return output_dir
                else:
                    try:
                        # Why not shutil.copytree? Because in python <3.7 it causes
                        # errors on Azure NFS, see:
                        # - https://bugs.python.org/issue24564
                        # - https://stackoverflow.com/questions/51616058/shutil-copystat-fails-inside-docker-on-azure/51635427#51635427
                        copy_tree(
                            str(cached_model_location),
                            str(output_dir),
                            preserve_mode=0,
                            preserve_times=0,
                        )
                    except FileExistsError:
                        logger.warning(
                            f"Found that output directory {output_dir} "
                            f"already exists, assuming model is "
                            f"already located there")
                    return output_dir

    model, metadata = build_model(
        name=name,
        model_config=model_config,
        data_config=data_config,
        metadata=metadata,
        evaluation_config=evaluation_config,
    )
    model_location = _save_model_for_workflow(model=model,
                                              metadata=metadata,
                                              output_dir=output_dir)
    logger.info(f"Successfully built model, and deposited at {model_location}")
    if model_register_dir:
        logger.info(f"Writing model-location to model registry")
        disk_registry.write_key(model_register_dir, cache_key, model_location)
    return model_location