Python normalize_sensor_tags Beispiele, gordo_components.dataset.sensor_tag.normalize_sensor_tags Python Beispiele

Beispiel #1

0

Datei anzeigen

    def _endpoints_from_watchman(
            self, endpoint: str) -> typing.List[EndpointMetadata]:
        """
        Get a list of endpoints by querying Watchman
        """
        resp = requests.get(endpoint)
        if not resp.ok:
            raise IOError(f"Failed to get endpoints: {resp.content}")

        return [
            EndpointMetadata(
                target_name=data["endpoint-metadata"]["metadata"]["name"],
                healthy=data["healthy"],
                endpoint=f'{self.base_url}{data["endpoint"].rstrip("/")}',
                tag_list=normalize_sensor_tags(
                    data["endpoint-metadata"]["metadata"]["dataset"]
                    ["tag_list"]),
                target_tag_list=normalize_sensor_tags(
                    data["endpoint-metadata"]["metadata"]["dataset"]
                    ["target_tag_list"]),
                resolution=data["endpoint-metadata"]["metadata"]["dataset"]
                ["resolution"],
                model_offset=data["endpoint-metadata"]["metadata"]
                ["model"].get("model-offset", 0),
            ) if data["healthy"] else EndpointMetadata(
                target_name=None,
                healthy=data["healthy"],
                endpoint=f'{self.base_url}{data["endpoint"].rstrip("/")}',
                tag_list=None,
                target_tag_list=None,
                resolution=None,
                model_offset=None,
            ) for data in resp.json()["endpoints"]
        ]

Beispiel #2

0

Datei anzeigen

Datei: datasets.py Projekt: Lezinou/gordo-components

    def __init__(
        self,
        data_provider: GordoBaseDataProvider,
        from_ts: datetime,
        to_ts: datetime,
        tag_list: List[Union[str, Dict, SensorTag]],
        target_tag_list: Optional[List[Union[str, Dict, SensorTag]]] = None,
        resolution: str = "10T",
        row_filter: str = "",
        **_kwargs,
    ):
        """
        Creates a TimeSeriesDataset backed by a provided dataprovider.

        A TimeSeriesDataset is a dataset backed by timeseries, but resampled,
        aligned, and (optionally) filtered.

        Parameters
        ----------
        data_provider: GordoBaseDataProvider
            A dataprovider which can provide dataframes for tags from from_ts to to_ts
        from_ts: datetime
            Earliest possible point in the dataset (inclusive)
        to_ts: datetime
            Earliest possible point in the dataset (exclusive)
        tag_list: List[Union[str, Dict, sensor_tag.SensorTag]]
            List of tags to include in the dataset. The elements can be strings,
            dictionaries or SensorTag namedtuples.
        target_tag_list: Optional[List[Union[str, Dict, sensor_tag.SensorTag]]]
            List of tags to set as the dataset y. These will be treated the same as
            tag_list when fetching and pre-processing (resampling) but will be split
            into the y return from ``.get_data()``
        resolution: str
            The bucket size for grouping all incoming time data (e.g. "10T").
        row_filter: str
            Filter on the rows. Only rows satisfying the filter will be in the dataset.
            See :func:`gordo_components.dataset.filter_rows.pandas_filter_rows` for
            further documentation of the filter format.
        _kwargs
        """
        self.from_ts = from_ts
        self.to_ts = to_ts
        self.tag_list = normalize_sensor_tags(tag_list)
        self.target_tag_list = (normalize_sensor_tags(target_tag_list)
                                if target_tag_list else [])
        self.resolution = resolution
        self.data_provider = data_provider
        self.row_filter = row_filter

        if not self.from_ts.tzinfo or not self.to_ts.tzinfo:
            raise ValueError(
                f"Timestamps ({self.from_ts}, {self.to_ts}) need to include timezone "
                f"information")

Beispiel #3

0

Datei anzeigen

Datei: base.py Projekt: Lezinou/gordo-components

 def target_tags(self) -> typing.List[SensorTag]:
     if "target_tag_list" in current_app.metadata["dataset"]:
         return normalize_sensor_tags(
             current_app.metadata["dataset"]["target_tag_list"]
         )
     else:
         return []

Beispiel #4

0

Datei anzeigen

def test_load_series_dry_run(dates, ncs_reader):
    valid_tag_list_no_asset = normalize_sensor_tags(["TRC-123", "TRC-321"])
    for frame in ncs_reader.load_series(dates[0],
                                        dates[1],
                                        valid_tag_list_no_asset,
                                        dry_run=True):
        assert len(frame) == 0

Beispiel #5

0

Datei anzeigen

def test_load_series_known_prefix(dates, ncs_reader):
    valid_tag_list_no_asset = normalize_sensor_tags(["TRC-123", "TRC-321"])
    for frame in ncs_reader.load_series(dates[0], dates[1],
                                        valid_tag_list_no_asset):
        assert len(frame), 20
    for frame in ncs_reader.load_series(dates[0], dates[1],
                                        valid_tag_list_no_asset):
        assert len(frame), 20

Beispiel #6

0

Datei anzeigen

def _get_default_dataset_config():
    from_ts = dateutil.parser.isoparse("2017-01-01T08:56:00+00:00")
    to_ts = dateutil.parser.isoparse("2017-01-01T10:01:00+00:00")
    return {
        "type": "TimeSeriesDataset",
        "from_ts": from_ts,
        "to_ts": to_ts,
        "tag_list": normalize_sensor_tags(["TRC-FIQ -39-0706", "GRA-EM-23-0003ARV.PV"]),
        "data_provider": DataLakeProvider(),
    }

Beispiel #7

0

Datei anzeigen

def build(output_dir, model_config, data_config, metadata, model_register_dir):
    """
    Build a model and deposit it into 'output_dir' given the appropriate config
    settings.

    \b
    Parameters
    ----------
    output_dir: str
        Directory to save model & metadata to.
    model_config: dict
        kwargs to be used in initializing the model. Should also
        contain kwarg 'type' which references the model to use. ie. KerasAutoEncoder
    data_config: dict
        kwargs to be used in intializing the dataset. Should also
        contain kwarg 'type' which references the dataset to use. ie. InfluxBackedDataset
    metadata: dict
        Any additional metadata to save under the key 'user-defined'
    model_register_dir: path
        Path to a directory which will index existing models and their locations, used
        for re-using old models instead of rebuilding them. If omitted then always
        rebuild
    """

    # TODO: Move all data related input from environment variable to data_config,
    # TODO: thereby removing all these data_config['variable'] lines

    data_config["tag_list"] = data_config.pop("tags")

    # TODO: Move parsing from here, into the InfluxDataSet class
    data_config["from_ts"] = dateutil.parser.isoparse(
        data_config.pop("train_start_date"))

    # TODO: Move parsing from here, into the InfluxDataSet class
    data_config["to_ts"] = dateutil.parser.isoparse(
        data_config.pop("train_end_date"))

    # Set default data provider for data config
    data_config["data_provider"] = DataLakeProvider()
    asset = data_config.get("asset", None)
    tag_list = normalize_sensor_tags(data_config["tag_list"], asset)

    data_config["tag_list"] = tag_list

    logger.info(f"Building, output will be at: {output_dir}")
    logger.info(f"Model config: {model_config}")
    logger.info(f"Data config: {data_config}")
    logger.info(f"Register dir: {model_register_dir}")

    model_location = provide_saved_model(model_config, data_config, metadata,
                                         output_dir, model_register_dir)
    with open("/tmp/model-location.txt", "w") as f:
        f.write(model_location)
    return 0

Beispiel #8

0

Datei anzeigen

Datei: test_ncs_reader.py Projekt: mrdeveloperdude/gordo-components

def test_load_series_with_filter_bad_data(dates, remove_status_codes):

    ncs_reader = NcsReader(
        AzureDLFileSystemMock(), remove_status_codes=remove_status_codes
    )

    valid_tag_list = normalize_sensor_tags(["TRC-322"])
    series_gen = ncs_reader.load_series(dates[0], dates[1], valid_tag_list)
    # Checks if the bad data from the files under tests/gordo_components/data_provider/data/datalake/TRC-322
    # are filtered out. 20 rows exists, 5 of then have the value 0.

    n_expected = 15 if remove_status_codes != [] else 20
    assert all(len(series) == n_expected for series in series_gen)

Beispiel #9

0

Datei anzeigen

def test_can_handle_tag_unknow_prefix_raise(ncs_reader):
    with pytest.raises(ValueError):
        ncs_reader.can_handle_tag(normalize_sensor_tags(["XYZ-123"])[0])

Beispiel #10

0

Datei anzeigen

Datei: test_sensor_tag.py Projekt: flikka/gordo-components-old-private

def test_normalize_sensor_tags_not_ok():
    with pytest.raises(ValueError):
        tag_list_as_list_of_strings_nonsense = [TAG_NAME1, TAG_NAME2]
        normalize_sensor_tags(tag_list_as_list_of_strings_nonsense)

Beispiel #11

0

Datei anzeigen

@pytest.fixture
def ncs_reader():
    return NcsReader(AzureDLFileSystemMock())


@pytest.fixture
def dates():
    return (
        dateutil.parser.isoparse("2000-01-01T08:56:00+00:00"),
        dateutil.parser.isoparse("2001-09-01T10:01:00+00:00"),
    )


@pytest.mark.parametrize(
    "tag_to_check",
    [normalize_sensor_tags(["TRC-123"])[0],
     SensorTag("XYZ-123", "1776-TROC")],
)
def test_can_handle_tag_ok(tag_to_check, ncs_reader):
    assert ncs_reader.can_handle_tag(tag_to_check)


@pytest.mark.parametrize(
    "tag_to_check",
    [SensorTag("TRC-123", None),
     SensorTag("XYZ-123", "123-XXX")])
def test_can_handle_tag_notok(tag_to_check, ncs_reader):
    assert not ncs_reader.can_handle_tag(tag_to_check)


def test_can_handle_tag_unknow_prefix_raise(ncs_reader):

Beispiel #12

0

Datei anzeigen

def build(
    name,
    output_dir,
    model_config,
    data_config,
    metadata,
    model_register_dir,
    print_cv_scores,
    model_parameter,
    model_location_file,
    data_provider_threads,
):
    """
    Build a model and deposit it into 'output_dir' given the appropriate config
    settings.

    \b
    Parameters
    ----------
    name: str
        Name given to the model to build
    output_dir: str
        Directory to save model & metadata to.
    model_config: str
        String containing a yaml which will be parsed to a dict which will be used in
        initializing the model. Should also contain key 'type' which references the
        model to use. ie. KerasAutoEncoder
    data_config: dict
        kwargs to be used in intializing the dataset. Should also
        contain kwarg 'type' which references the dataset to use. ie. InfluxBackedDataset
    metadata: dict
        Any additional metadata to save under the key 'user-defined'
    model_register_dir: path
        Path to a directory which will index existing models and their locations, used
        for re-using old models instead of rebuilding them. If omitted then always
        rebuild
    print_cv_scores: bool
        Print cross validation scores to stdout
    model_parameter: List[Tuple]
        List of model key-values, wheres the values will be injected into the model
        config wherever there is a jinja variable with the key.
    model_location_file: str/path
        Path to a file to open and write the location of the serialized model to.
    data_provider_threads: int
        Number of threads to use for the data provider when fetching data.
    """

    # TODO: Move all data related input from environment variable to data_config,
    # TODO: thereby removing all these data_config['variable'] lines

    data_config["tag_list"] = data_config.pop("tags")

    # TODO: Move parsing from here, into the InfluxDataSet class
    data_config["from_ts"] = dateutil.parser.isoparse(
        data_config.pop("train_start_date")
    )

    # TODO: Move parsing from here, into the InfluxDataSet class
    data_config["to_ts"] = dateutil.parser.isoparse(data_config.pop("train_end_date"))

    # Set default data provider for data config
    data_config["data_provider"] = DataLakeProvider(threads=data_provider_threads)
    asset = data_config.get("asset", None)
    tag_list = normalize_sensor_tags(data_config["tag_list"], asset)

    data_config["tag_list"] = tag_list

    logger.info(f"Building, output will be at: {output_dir}")
    logger.info(f"Raw model config: {model_config}")
    logger.info(f"Data config: {data_config}")
    logger.info(f"Register dir: {model_register_dir}")

    model_parameter = dict(model_parameter)
    model_config = expand_model(model_config, model_parameter)
    model_config = yaml.full_load(model_config)

    # Convert the config into a pipeline, and back into definition to ensure
    # all default parameters are part of the config.
    logger.debug(f"Ensuring the passed model config is fully expanded.")
    model_config = pipeline_into_definition(pipeline_from_definition(model_config))

    model_location = provide_saved_model(
        name, model_config, data_config, metadata, output_dir, model_register_dir
    )
    # If the model is cached but without CV scores then we force a rebuild. We do this
    # by deleting the entry in the cache and then rerun `provide_saved_model`
    # (leaving the old model laying around)
    if print_cv_scores:
        saved_metadata = load_metadata(model_location)
        all_scores = get_all_score_strings(saved_metadata)
        if not all_scores:
            logger.warning(
                "Found that loaded model does not have cross validation values "
                "even though we were asked to print them, clearing cache and "
                "rebuilding model"
            )

            model_location = provide_saved_model(
                name,
                model_config,
                data_config,
                metadata,
                output_dir,
                model_register_dir,
                replace_cache=True,
            )
            saved_metadata = load_metadata(model_location)
            all_scores = get_all_score_strings(saved_metadata)

        for score in all_scores:
            print(score)

    # Write out the model location to this file.
    model_location_file.write(model_location)
    return 0

Beispiel #13

0

Datei anzeigen

Datei: datasets.py Projekt: tikyau/gordo-components

    def __init__(
        self,
        data_provider: GordoBaseDataProvider,
        from_ts: datetime,
        to_ts: datetime,
        tag_list: List[Union[str, Dict, SensorTag]],
        target_tag_list: Optional[List[Union[str, Dict, SensorTag]]] = None,
        resolution: str = "10T",
        row_filter: str = "",
        aggregation_methods: Union[str, List[str], Callable] = "mean",
        **_kwargs,
    ):
        """
        Creates a TimeSeriesDataset backed by a provided dataprovider.

        A TimeSeriesDataset is a dataset backed by timeseries, but resampled,
        aligned, and (optionally) filtered.

        Parameters
        ----------
        data_provider: GordoBaseDataProvider
            A dataprovider which can provide dataframes for tags from from_ts to to_ts
        from_ts: datetime
            Earliest possible point in the dataset (inclusive)
        to_ts: datetime
            Earliest possible point in the dataset (exclusive)
        tag_list: List[Union[str, Dict, sensor_tag.SensorTag]]
            List of tags to include in the dataset. The elements can be strings,
            dictionaries or SensorTag namedtuples.
        target_tag_list: Optional[List[Union[str, Dict, sensor_tag.SensorTag]]]
            List of tags to set as the dataset y. These will be treated the same as
            tag_list when fetching and pre-processing (resampling) but will be split
            into the y return from ``.get_data()``
        resolution: str
            The bucket size for grouping all incoming time data (e.g. "10T").
        row_filter: str
            Filter on the rows. Only rows satisfying the filter will be in the dataset.
            See :func:`gordo_components.dataset.filter_rows.pandas_filter_rows` for
            further documentation of the filter format.
        aggregation_methods
            Aggregation method(s) to use for the resampled buckets. If a single
            resample method is provided then the resulting dataframe will have names
            identical to the names of the series it got in. If several
            aggregation-methods are provided then the resulting dataframe will
            have a multi-level column index, with the series-name as the first level,
            and the aggregation method as the second level.
            See :py:func::`pandas.core.resample.Resampler#aggregate` for more
            information on possible aggregation methods.
        _kwargs
        """
        self.from_ts = from_ts
        self.to_ts = to_ts
        self.tag_list = normalize_sensor_tags(tag_list)
        self.target_tag_list = (normalize_sensor_tags(target_tag_list)
                                if target_tag_list else [])
        self.resolution = resolution
        self.data_provider = data_provider
        self.row_filter = row_filter
        self.aggregation_methods = aggregation_methods

        if not self.from_ts.tzinfo or not self.to_ts.tzinfo:
            raise ValueError(
                f"Timestamps ({self.from_ts}, {self.to_ts}) need to include timezone "
                f"information")

Beispiel #14

0

Datei anzeigen

def test_normalize_iroc_tags():
    normalized_tags = normalize_sensor_tags(IROC_MANY_ASSETS_TAG_LIST)
    assert normalized_tags == IROC_MANY_ASSETS_SENSOR_TAG_LIST

Beispiel #15

0

Datei anzeigen

Datei: test_sensor_tag.py Projekt: flikka/gordo-components-old-private

def test_normalize_sensor_tags_ok(good_input_tags, asset, expected_output_tags):
    tag_list_as_list_of_sensor_tag = normalize_sensor_tags(good_input_tags, asset)
    assert tag_list_as_list_of_sensor_tag == expected_output_tags

Beispiel #16

0

Datei anzeigen

Datei: test_ncs_reader.py Projekt: mrdeveloperdude/gordo-components

@pytest.fixture
def ncs_reader():
    return NcsReader(AzureDLFileSystemMock())


@pytest.fixture
def dates():
    return (
        dateutil.parser.isoparse("2000-01-01T08:56:00+00:00"),
        dateutil.parser.isoparse("2001-09-01T10:01:00+00:00"),
    )


@pytest.mark.parametrize(
    "tag_to_check",
    [normalize_sensor_tags(["TRC-123"])[0], SensorTag("XYZ-123", "1776-TROC")],
)
def test_can_handle_tag_ok(tag_to_check, ncs_reader):
    assert ncs_reader.can_handle_tag(tag_to_check)


@pytest.mark.parametrize(
    "tag_to_check", [SensorTag("TRC-123", None), SensorTag("XYZ-123", "123-XXX")]
)
def test_can_handle_tag_notok(tag_to_check, ncs_reader):
    assert not ncs_reader.can_handle_tag(tag_to_check)


def test_can_handle_tag_unknow_prefix_raise(ncs_reader):
    with pytest.raises(ValueError):
        ncs_reader.can_handle_tag(normalize_sensor_tags(["XYZ-123"])[0])

Beispiel #17

0

Datei anzeigen

    def get(self):

        context = dict()  # type: typing.Dict[str, typing.Any]
        context["status-code"] = 200
        start_time = timeit.default_timer()

        params = request.get_json() or request.args

        if not all(k in params for k in ("start", "end")):
            return (
                {
                    "error":
                    "must provide iso8601 formatted dates with "
                    "timezone-information for parameters 'start' and 'end'"
                },
                400,
            )

        try:
            start = self._parse_iso_datetime(params["start"])
            end = self._parse_iso_datetime(params["end"])
        except ValueError:
            logger.error(
                f"Failed to parse start and/or end date to ISO: start: "
                f"{params['start']} - end: {params['end']}")
            return (
                {
                    "error":
                    "Could not parse start/end date(s) into ISO datetime. "
                    "must provide iso8601 formatted dates for both."
                },
                400,
            )

        # Make request time span of one day
        if (end - start).days:
            return {
                "error": "Need to request a time span less than 24 hours."
            }, 400

        freq = pd.tseries.frequencies.to_offset(
            current_app.metadata["dataset"]["resolution"])

        dataset = TimeSeriesDataset(
            data_provider=g.data_provider,
            from_ts=start - freq.delta,
            to_ts=end,
            resolution=current_app.metadata["dataset"]["resolution"],
            tag_list=sensor_tag.normalize_sensor_tags(
                current_app.metadata["dataset"]["tag_list"]),
        )
        X, _y = dataset.get_data()

        # Want resampled buckets equal or greater than start, but less than end
        # b/c if end == 00:00:00 and req = 10 mins, a resampled bucket starting
        # at 00:00:00 would imply it has data until 00:10:00; which is passed
        # the requested end datetime
        X = X[(X.index > start - freq.delta) & (X.index + freq.delta < end)]

        try:
            xhat = self.get_predictions(X).tolist()

        # Model may only be a transformer, probably an AttributeError, but catch all to avoid logging other
        # exceptions twice if it happens.
        except Exception as exc:
            logger.critical(f"Failed to predict or transform; error: {exc}")
            return (
                {
                    "error":
                    "Something unexpected happened; check your input data"
                },
                400,
            )

        # In GET requests we need to pair the resulting predictions with their
        # specific timestamp and additionally match the predictions to the corresponding tags.
        data = []

        # This tags list is just for display/informative purposes, skipping the asset
        tags = [
            tag["name"] for tag in current_app.metadata["dataset"]["tag_list"]
        ]

        for prediction, time_stamp in zip(xhat, X.index[-len(xhat):]):

            # Auto encoders return double their input.
            # First half is input to model, second half is output of model
            tag_inputs = np.array(prediction[:len(tags)])
            tag_outputs = np.array(prediction[len(tags):])
            tag_errors = np.abs(tag_inputs - tag_outputs)
            data.append({
                "start":
                f"{time_stamp}",
                "end":
                f"{time_stamp + freq}",
                "tags": {tag: error
                         for tag, error in zip(tags, tag_errors)},
                "total_anomaly":
                np.linalg.norm(tag_inputs - tag_outputs),
            })
        context["output"] = data
        context["time-seconds"] = f"{timeit.default_timer() - start_time:.4f}"
        return context, context["status-code"]

Beispiel #18

0

Datei anzeigen

Datei: base.py Projekt: mrdeveloperdude/gordo-components

 def tags(self) -> typing.List[SensorTag]:
     return normalize_sensor_tags(g.metadata["dataset"]["tag_list"])

Beispiel #19

0

Datei anzeigen

Datei: cli.py Projekt: mrdeveloperdude/gordo-components

def build(
    name,
    output_dir,
    model_config,
    data_config,
    data_provider,
    metadata,
    model_register_dir,
    print_cv_scores,
    model_parameter,
    evaluation_config,
):
    """
    Build a model and deposit it into 'output_dir' given the appropriate config
    settings.

    \b
    Parameters
    ----------
    name: str
        Name given to the model to build
    output_dir: str
        Directory to save model & metadata to.
    model_config: str
        String containing a yaml which will be parsed to a dict which will be used in
        initializing the model. Should also contain key 'type' which references the
        model to use. ie. KerasAutoEncoder
    data_config: dict
        kwargs to be used in intializing the dataset. Should also
        contain kwarg 'type' which references the dataset to use. ie. InfluxBackedDataset
    data_provider: str
        A quoted data provider configuration in  JSON/YAML format.
        Should also contain key 'type' which references the data provider to use.

        Example::

          '{"type": "DataLakeProvider", "storename" : "example_store"}'

    metadata: dict
        Any additional metadata to save under the key 'user-defined'
    model_register_dir: path
        Path to a directory which will index existing models and their locations, used
        for re-using old models instead of rebuilding them. If omitted then always
        rebuild
    print_cv_scores: bool
        Print cross validation scores to stdout
    model_parameter: List[Tuple]
        List of model key-values, wheres the values will be injected into the model
        config wherever there is a jinja variable with the key.

    evaluation_config: dict
        Dict of parameters which are exposed to build_model.
            - cv_mode: str
                String which enables three different modes, represented as a key value in evaluation_config:
                * cross_val_only: Only perform cross validation
                * build_only: Skip cross validation and only build the model
                * full_build: Cross validation and full build of the model, default value
                Example::

                    {"cv_mode": "cross_val_only"}
    """

    data_config["tag_list"] = data_config.pop("tags")

    data_config["from_ts"] = dateutil.parser.isoparse(
        data_config.pop("train_start_date"))

    data_config["to_ts"] = dateutil.parser.isoparse(
        data_config.pop("train_end_date"))

    # Set default data provider for data config
    data_config["data_provider"] = data_provider
    asset = data_config.get("asset", None)
    tag_list = normalize_sensor_tags(data_config["tag_list"], asset)

    data_config["tag_list"] = tag_list

    # Normalize target tag list if present
    if "target_tag_list" in data_config:
        target_tag_list = normalize_sensor_tags(data_config["target_tag_list"],
                                                asset)
        data_config["target_tag_list"] = target_tag_list

    logger.info(f"Building, output will be at: {output_dir}")
    logger.info(f"Raw model config: {model_config}")
    logger.info(f"Data config: {data_config}")
    logger.info(f"Register dir: {model_register_dir}")

    model_parameter = dict(model_parameter)
    model_config = expand_model(model_config, model_parameter)
    model_config = yaml.full_load(model_config)

    # Convert the config into a pipeline, and back into definition to ensure
    # all default parameters are part of the config.
    logger.debug(f"Ensuring the passed model config is fully expanded.")
    model_config = pipeline_into_definition(
        pipeline_from_definition(model_config))
    logger.debug(f"Fully expanded model config: {model_config}")

    if evaluation_config["cv_mode"] == "cross_val_only":

        cache_model_location = None
        if model_register_dir is not None:
            cache_key = calculate_model_key(name,
                                            model_config,
                                            data_config,
                                            evaluation_config,
                                            metadata=metadata)
            cache_model_location = check_cache(model_register_dir, cache_key)

        if cache_model_location:
            metadata = load_metadata(cache_model_location)
        else:
            _, metadata = build_model(name, model_config, data_config,
                                      metadata, evaluation_config)

    else:
        model_location = provide_saved_model(
            name,
            model_config,
            data_config,
            metadata,
            output_dir,
            model_register_dir,
            evaluation_config=evaluation_config,
        )
        metadata = load_metadata(model_location)

    # If the model is cached but without CV scores then we force a rebuild. We do this
    # by deleting the entry in the cache and then rerun `provide_saved_model`
    # (leaving the old model laying around)
    if print_cv_scores:
        retrieved_metadata = metadata
        all_scores = get_all_score_strings(retrieved_metadata)
        if not all_scores:
            logger.warning(
                "Found that loaded model does not have cross validation values "
                "even though we were asked to print them, clearing cache and "
                "rebuilding model")

            model_location = provide_saved_model(
                name,
                model_config,
                data_config,
                metadata,
                output_dir,
                model_register_dir,
                replace_cache=True,
                evaluation_config=evaluation_config,
            )
            saved_metadata = load_metadata(model_location)
            all_scores = get_all_score_strings(saved_metadata)

        for score in all_scores:
            print(score)

    return 0