Ejemplo n.º 1
0
def test_postgres_reporter(postgresdb, metadata):
    """
    Check logging  of a machine into postgres
    """
    reporter1 = PostgresReporter(host="localhost")
    machine1 = Machine(**metadata)

    # Before inserting, the machine does not exist.
    with pytest.raises(peewee.DoesNotExist):
        PostgresMachine.get(PostgresMachine.name == machine1.name)

    reporter1.report(machine1)

    record = PostgresMachine.get(PostgresMachine.name == machine1.name)
    assert record.name == machine1.name

    # Create another logger to ensure nothing happened to the DB
    reporter2 = PostgresReporter(host="localhost")
    machine2 = Machine(**metadata)
    machine2.name = "another-machine"

    reporter2.report(machine2)

    # The first machine is still there
    record = PostgresMachine.get(PostgresMachine.name == machine1.name)
    assert record.name == machine1.name

    # And the second
    record = PostgresMachine.get(PostgresMachine.name == machine2.name)
    assert record.name == machine2.name
Ejemplo n.º 2
0
    def __init__(self, machine: Machine):
        """
        Build a model for a given :class:`gordo.workflow.config_elements.machine.Machine`

        Parameters
        ----------
        machine: Machine

        Example
        -------
        >>> from gordo_dataset.sensor_tag import SensorTag
        >>> from gordo.machine import Machine
        >>> from gordo.dependencies import configure_once
        >>> configure_once()
        >>> machine = Machine(
        ...     name="special-model-name",
        ...     model={"sklearn.decomposition.PCA": {"svd_solver": "auto"}},
        ...     dataset={
        ...         "type": "RandomDataset",
        ...         "train_start_date": "2017-12-25 06:00:00Z",
        ...         "train_end_date": "2017-12-30 06:00:00Z",
        ...         "tag_list": [SensorTag("Tag 1", None), SensorTag("Tag 2", None)],
        ...         "target_tag_list": [SensorTag("Tag 3", None), SensorTag("Tag 4", None)]
        ...     },
        ...     project_name='test-proj',
        ... )
        >>> builder = ModelBuilder(machine=machine)
        >>> model, machine = builder.build()
        """
        # Avoid overwriting the passed machine, copy doesn't work if it holds
        # reference to a loaded Tensorflow model; .to_dict() serializes it to
        # a primitive dict representation.
        self.machine = Machine(**machine.to_dict())
Ejemplo n.º 3
0
def machine():
    return Machine(
        name="test-model",
        model=MODEL_CONFIG,
        dataset=DATA_CONFIG,
        project_name="project-name",
    )
Ejemplo n.º 4
0
def test_model_builder_metrics_list(metrics_: Optional[List[str]]):
    model_config = {
        "sklearn.multioutput.MultiOutputRegressor": {
            "estimator": "sklearn.linear_model.LinearRegression"
        }
    }
    data_config = get_random_data()

    evaluation_config: Dict[str, Any] = {"cv_mode": "full_build"}
    if metrics_:
        evaluation_config.update({"metrics": metrics_})

    machine = Machine(
        name="model-name",
        dataset=data_config,
        model=model_config,
        evaluation=evaluation_config,
        project_name="test",
    )
    _model, machine = ModelBuilder(machine).build()

    expected_metrics = metrics_ or [
        "sklearn.metrics.explained_variance_score",
        "sklearn.metrics.r2_score",
        "sklearn.metrics.mean_squared_error",
        "sklearn.metrics.mean_absolute_error",
    ]

    assert all(
        metric.split(".")[-1].replace("_", "-") in
        machine.metadata.build_metadata.model.cross_validation.scores
        for metric in expected_metrics)
Ejemplo n.º 5
0
def test_setting_seed(seed, model_config):
    """
    Test that we can set the seed and get same results.
    """

    data_config = get_random_data()
    evaluation_config = {"cv_mode": "full_build", "seed": seed}

    # Training two instances, without a seed should result in different scores,
    # while doing it with a seed should result in the same scores.
    machine = Machine(
        name="model-name",
        dataset=data_config,
        model=model_config,
        evaluation=evaluation_config,
        project_name="test",
    )
    _model, machine1 = ModelBuilder(machine).build()
    _model, machine2 = ModelBuilder(machine).build()

    df1 = pd.DataFrame.from_dict(
        machine1.metadata.build_metadata.model.cross_validation.scores)
    df2 = pd.DataFrame.from_dict(
        machine2.metadata.build_metadata.model.cross_validation.scores)

    # Equality depends on the seed being set.
    if seed:
        assert df1.equals(df2)
    else:
        assert not df1.equals(df2)
Ejemplo n.º 6
0
def test_n_splits_from_config(mocked_pipeline_from_definition, cv):
    """
    Test that we can set arbitrary splitters and parameters in the config file which is called by the serializer.
    """
    data_config = get_random_data()
    evaluation_config = {"cv_mode": "full_build"}
    if cv:
        evaluation_config["cv"] = cv

    model_config = {
        "sklearn.multioutput.MultiOutputRegressor": {
            "estimator": "sklearn.ensemble.forest.RandomForestRegressor"
        }
    }

    machine = Machine(
        name="model-name",
        dataset=data_config,
        model=model_config,
        evaluation=evaluation_config,
        project_name="test",
    )

    ModelBuilder(machine).build()

    if cv:
        mocked_pipeline_from_definition.assert_called_with(cv)
    else:
        mocked_pipeline_from_definition.assert_called_with(
            {"sklearn.model_selection.TimeSeriesSplit": {
                "n_splits": 3
            }})
Ejemplo n.º 7
0
def test_scores_metadata(raw_model_config):
    data_config = get_random_data()
    model_config = yaml.load(raw_model_config, Loader=yaml.FullLoader)
    machine = Machine(
        dataset=data_config, model=model_config, name="model-name", project_name="test"
    )
    model, machine_out = ModelBuilder(machine).build()
    machine_check(machine_out, False)
Ejemplo n.º 8
0
def test_get_machine_log_items(metadata):
    """
    Test that dicts are correctly converted to MLflow types or errors raised
    """
    metrics, params = mlu.get_machine_log_items(Machine(**metadata))

    assert all(type(m) == Metric for m in metrics)
    assert all(type(p) == Param for p in params)
Ejemplo n.º 9
0
def test_builder_calls_machine_report(mocked_report_method, metadata):
    """
    When building a machine, the Modelbuilder.build should call Machine.report()
    so that it can run any reporters in the Machine's runtime.
    """
    machine = Machine(**metadata)
    ModelBuilder(machine).build()
    assert mocked_report_method.called_once()
Ejemplo n.º 10
0
def test_overwrite_report(postgresdb, metadata):
    """
    Ensure saving same machine is ok.
    """
    reporter1 = PostgresReporter(host="localhost")
    reporter2 = PostgresReporter(host="localhost")

    machine1 = Machine(**metadata)
    machine2 = Machine(**metadata)

    reporter1.report(machine1)

    # Reporting twice should be ok.
    reporter2.report(machine2)

    results = PostgresMachine.select().where(
        PostgresMachine.name == machine1.name)
    assert len([result for result in results]) == 1
Ejemplo n.º 11
0
 def _machine_from_server(self, name: str, revision: str) -> Machine:
     resp = self.session.get(
         f"{self.base_url}/gordo/v0/{self.project_name}/{name}/metadata",
         params={"revision": revision},
     )
     metadata = _handle_response(
         resp=resp, resource_name=f"Machine metadata for {name}")
     if isinstance(metadata, dict) and metadata.get("metadata", None):
         return Machine(**metadata.get("metadata", None))
     else:
         raise NotFound(f"Machine {name} not found")
Ejemplo n.º 12
0
def test_builder_metadata(raw_model_config):
    """
    Ensure the builder works with various model configs and that each has
    expected/valid metadata results.
    """
    model_config = yaml.load(raw_model_config, Loader=yaml.FullLoader)
    data_config = get_random_data()
    machine = Machine(
        name="model-name", dataset=data_config, model=model_config, project_name="test"
    )
    model, machine_out = ModelBuilder(machine).build()
    # Check metadata, and only verify 'history' if it's a *Keras* type model
    machine_check(machine_out, "Keras" in raw_model_config)
Ejemplo n.º 13
0
def test_builder_with_reporter(postgresdb, metadata):
    """
    Verify a model can take a reporter and .report() will run any given reporters
    """
    reporter = PostgresReporter(host="localhost")
    metadata["runtime"]["reporters"].append(reporter.to_dict())

    machine = Machine(**metadata)

    with pytest.raises(peewee.DoesNotExist):
        PostgresMachine.get(PostgresMachine.name == machine.name)
    machine.report()
    PostgresMachine.get(PostgresMachine.name == machine.name)
Ejemplo n.º 14
0
def test_client_get_dataset(gordo_project, metadata, ml_server):
    data_provider = providers.RandomDataProvider(min_size=10)
    client = Client(project=gordo_project, data_provider=data_provider)
    start = isoparse("2016-01-01T00:00:00+00:00")
    end = isoparse("2016-01-01T12:00:00+00:00")
    machine = Machine(**metadata)
    assert type(machine.dataset) is TimeSeriesDataset
    machine.dataset.row_filter_buffer_size = 12
    machine.dataset.n_samples_threshold = 10
    dataset = client._get_dataset(machine, start, end)
    assert dataset.row_filter_buffer_size == 0
    assert dataset.n_samples_threshold == 0
    assert dataset.low_threshold is None
    assert dataset.high_threshold is None
Ejemplo n.º 15
0
def test_mlflow_context_log_error(MockClient, metadata):
    """
    Test that an error while logging metadata as an artifact raises MlflowLoggingError
    """
    metadata = Machine(**metadata)
    mock_client = MockClient()
    mock_client.log_artifacts.side_effect = Exception(
        "Some unknown exception!")

    with pytest.raises(mlu.MlflowLoggingError):
        with mlu.mlflow_context("returns metadata", "unique_key", {}, {}) as (
                mlflow_client,
                run_id,
        ):
            mlu.log_machine(mlflow_client, run_id, metadata)
Ejemplo n.º 16
0
def test_provide_saved_model_simple_happy_path(tmpdir):
    """
    Test provide_saved_model with no caching
    """
    model_config = {"sklearn.decomposition.PCA": {"svd_solver": "auto"}}
    data_config = get_random_data()
    output_dir = os.path.join(tmpdir, "model")
    machine = Machine(
        name="model-name", dataset=data_config, model=model_config, project_name="test"
    )
    ModelBuilder(machine).build(output_dir=output_dir)

    # Assert the model was saved at the location
    # Should be model file, and the metadata
    assert len(os.listdir(output_dir)) == 2
Ejemplo n.º 17
0
def test_output_scores_metadata():
    data_config = get_random_data()
    raw_model_config = f"""
            gordo.machine.model.anomaly.diff.DiffBasedAnomalyDetector:
                scaler: sklearn.preprocessing.MinMaxScaler
                base_estimator:
                    sklearn.compose.TransformedTargetRegressor:
                        transformer: sklearn.preprocessing.MinMaxScaler
                        regressor:
                            sklearn.pipeline.Pipeline:
                                steps:
                                - sklearn.preprocessing.MinMaxScaler
                                - gordo.machine.model.models.KerasAutoEncoder:
                                    kind: feedforward_hourglass
                                    batch_size: 3
                                    compression_factor: 0.5
                                    encoding_layers: 1
                                    func: tanh
                                    out_func: linear
                                    epochs: 1
            """

    model_config = yaml.load(raw_model_config, Loader=yaml.FullLoader)
    machine = Machine(name="model-name",
                      dataset=data_config,
                      model=model_config,
                      project_name="test")
    model, machine_out = ModelBuilder(machine).build()
    scores_metadata = machine_out.metadata.build_metadata.model.cross_validation.scores
    assert (scores_metadata["explained-variance-score-Tag-1"]["fold-mean"] +
            scores_metadata["explained-variance-score-Tag-2"]["fold-mean"]
            ) / 2 == pytest.approx(
                scores_metadata["explained-variance-score"]["fold-mean"])

    assert (
        scores_metadata["r2-score-Tag-1"]["fold-mean"] +
        scores_metadata["r2-score-Tag-2"]["fold-mean"]) / 2 == pytest.approx(
            scores_metadata["r2-score"]["fold-mean"])

    assert (scores_metadata["mean-squared-error-Tag-1"]["fold-mean"] +
            scores_metadata["mean-squared-error-Tag-2"]["fold-mean"]
            ) / 2 == pytest.approx(
                scores_metadata["mean-squared-error"]["fold-mean"])

    assert (scores_metadata["mean-absolute-error-Tag-1"]["fold-mean"] +
            scores_metadata["mean-absolute-error-Tag-2"]["fold-mean"]
            ) / 2 == pytest.approx(
                scores_metadata["mean-absolute-error"]["fold-mean"])
Ejemplo n.º 18
0
def test_provide_saved_model_caching_handle_existing_same_dir(tmpdir):
    """If the model exists in the model register, and the path there is the
    same as output_dir, output_dir is returned"""
    model_config = {"sklearn.decomposition.PCA": {"svd_solver": "auto"}}
    data_config = get_random_data()
    output_dir = os.path.join(tmpdir, "model")
    registry_dir = os.path.join(tmpdir, "registry")
    machine = Machine(
        name="model-name", dataset=data_config, model=model_config, project_name="test"
    )
    builder = ModelBuilder(machine)
    builder.build(output_dir=output_dir, model_register_dir=registry_dir)
    assert builder.cached_model_path == output_dir

    # Saving to same output_dir as the one saved in the registry just returns the output_dir
    builder.build(output_dir=output_dir, model_register_dir=registry_dir)
    assert builder.cached_model_path == output_dir
Ejemplo n.º 19
0
def test_mlflow_context_log_metadata(MockClient, tmpdir, metadata):
    """
    Test that call to wrapped function initiates MLflow logging or throws warning
    """
    metadata = Machine(**metadata)
    mlflow.set_tracking_uri(f"file:{tmpdir}")

    mock_client = MockClient()
    mock_client.log_batch.return_value = "test"

    # Function with a metadata dict returned
    with mlu.mlflow_context("returns metadata", "unique_key", {}, {}) as (
            mlflow_client,
            run_id,
    ):
        mlu.log_machine(mlflow_client, run_id, metadata)

    assert mock_client.log_batch.called
Ejemplo n.º 20
0
def test_output_dir(tmpdir):
    """
    Test building of model will create subdirectories for model saving if needed.
    """
    model_config = {"sklearn.decomposition.PCA": {"svd_solver": "auto"}}
    data_config = get_random_data()
    output_dir = os.path.join(tmpdir, "some", "sub", "directories")
    machine = Machine(
        name="model-name", dataset=data_config, model=model_config, project_name="test"
    )
    builder = ModelBuilder(machine)
    model, machine_out = builder.build()
    machine_check(machine_out, False)

    builder._save_model(model=model, machine=machine_out, output_dir=output_dir)

    # Assert the model was saved at the location
    # Should be model file, and the metadata
    assert len(os.listdir(output_dir)) == 2
Ejemplo n.º 21
0
def test_provide_saved_model_caching_handle_existing_different_register(tmpdir):
    """If the model exists in the model register, but the output_dir is not where
    the model is, the model is copied to the new location, unless the new location
    already exists. If it does then return it"""
    model_config = {"sklearn.decomposition.PCA": {"svd_solver": "auto"}}
    data_config = get_random_data()
    output_dir1 = os.path.join(tmpdir, "model1")
    output_dir2 = os.path.join(tmpdir, "model2")

    registry_dir = os.path.join(tmpdir, "registry")
    machine = Machine(
        name="model-name", dataset=data_config, model=model_config, project_name="test"
    )
    builder = ModelBuilder(machine)
    builder.build(output_dir=output_dir1, model_register_dir=registry_dir)

    builder.build(output_dir=output_dir2, model_register_dir=registry_dir)
    assert builder.cached_model_path == output_dir2

    builder.build(output_dir=output_dir2, model_register_dir=registry_dir)
    assert builder.cached_model_path == output_dir2
Ejemplo n.º 22
0
    def _build(self) -> Tuple[sklearn.base.BaseEstimator, Machine]:
        """
        Build the model using the current state of the Builder

        Returns
        -------
            Tuple[sklearn.base.BaseEstimator, dict]
        """
        # Enforce random seed to 0 if not specified.
        self.set_seed(seed=self.machine.evaluation.get("seed", 0))

        # Get the dataset from config
        logger.debug(
            f"Initializing Dataset with config {self.machine.dataset.to_dict()}"
        )

        dataset = _get_dataset(self.machine.dataset.to_dict())

        logger.debug("Fetching training data")
        start = time.time()

        X, y = dataset.get_data()

        time_elapsed_data = time.time() - start

        # Get the model and dataset
        logger.debug(f"Initializing Model with config: {self.machine.model}")
        model = serializer.from_definition(self.machine.model)

        cv_duration_sec = None

        machine: Machine = Machine(
            name=self.machine.name,
            dataset=self.machine.dataset.to_dict(),
            metadata=self.machine.metadata,
            model=self.machine.model,
            project_name=self.machine.project_name,
            evaluation=self.machine.evaluation,
            runtime=self.machine.runtime,
        )

        split_metadata: Dict[str, Any] = dict()
        scores: Dict[str, Any] = dict()
        if self.machine.evaluation["cv_mode"].lower() in (
                "cross_val_only",
                "full_build",
        ):

            # Build up a metrics list.
            metrics_list = self.metrics_from_list(
                self.machine.evaluation.get("metrics"))

            # Cross validate
            if hasattr(model, "predict"):
                logger.debug("Starting cross validation")
                start = time.time()

                scaler = self.machine.evaluation.get("scoring_scaler")
                metrics_dict = self.build_metrics_dict(metrics_list,
                                                       y,
                                                       scaler=scaler)

                split_obj = serializer.from_definition(
                    self.machine.evaluation.get(
                        "cv",
                        {
                            "sklearn.model_selection.TimeSeriesSplit": {
                                "n_splits": 3
                            }
                        },
                    ))
                # Generate metadata about CV train, test splits
                split_metadata = ModelBuilder.build_split_dict(X, split_obj)

                cv_kwargs = dict(X=X,
                                 y=y,
                                 scoring=metrics_dict,
                                 return_estimator=True,
                                 cv=split_obj)
                if hasattr(model, "cross_validate"):
                    cv = model.cross_validate(**cv_kwargs)
                else:
                    cv = cross_validate(model, **cv_kwargs)

                for metric, test_metric in map(lambda k: (k, f"test_{k}"),
                                               metrics_dict):
                    val = {
                        "fold-mean": cv[test_metric].mean(),
                        "fold-std": cv[test_metric].std(),
                        "fold-max": cv[test_metric].max(),
                        "fold-min": cv[test_metric].min(),
                    }
                    val.update({
                        f"fold-{i + 1}": raw_value
                        for i, raw_value in enumerate(cv[test_metric].tolist())
                    })
                    scores.update({metric: val})

                cv_duration_sec = time.time() - start
            else:
                logger.debug(
                    "Unable to score model, has no attribute 'predict'.")

            # If cross_val_only, return without fitting to the whole dataset
            if self.machine.evaluation["cv_mode"] == "cross_val_only":
                machine.metadata.build_metadata = BuildMetadata(
                    model=ModelBuildMetadata(
                        cross_validation=CrossValidationMetaData(
                            cv_duration_sec=cv_duration_sec,
                            scores=scores,
                            splits=split_metadata,
                        )),
                    dataset=DatasetBuildMetadata(
                        query_duration_sec=time_elapsed_data,
                        dataset_meta=dataset.get_metadata(),
                    ),
                )
                return model, machine

        # Train
        logger.debug("Starting to train model.")
        start = time.time()
        model.fit(X, y)
        time_elapsed_model = time.time() - start

        # Build specific metadata
        machine.metadata.build_metadata = BuildMetadata(
            model=ModelBuildMetadata(
                model_offset=self._determine_offset(model, X),
                model_creation_date=str(
                    datetime.datetime.now(datetime.timezone.utc).astimezone()),
                model_builder_version=__version__,
                model_training_duration_sec=time_elapsed_model,
                cross_validation=CrossValidationMetaData(
                    cv_duration_sec=cv_duration_sec,
                    scores=scores,
                    splits=split_metadata,
                ),
                model_meta=self._extract_metadata_from_model(model),
            ),
            dataset=DatasetBuildMetadata(
                query_duration_sec=time_elapsed_data,
                dataset_meta=dataset.get_metadata(),
            ),
        )
        return model, machine
Ejemplo n.º 23
0
def test_provide_saved_model_caching(
    should_be_equal: bool,
    metadata: Optional[Metadata],
    tag_list: Optional[List[SensorTag]],
    replace_cache,
    tmpdir,
):
    """
    Test provide_saved_model with caching and possible cache busting if tag_list, or replace_cache is set.

    Builds two models and checks if their model-creation-date's are the same,
    which will be if and only if there is caching.

    Parameters
    ----------
    should_be_equal : bool
        Do we expect the two generated models to be at the same location or not? I.e. do
        we expect caching.
    metadata: Metadata
        Optional metadata which will be used as metadata for the second model.
    tag_list
        Optional list of strings which be used as the taglist in the dataset for the
        second model.
    replace_cache: bool
        Should we force a model cache replacement?

    """

    if tag_list is None:
        tag_list = []
    if metadata is None:
        metadata = Metadata()

    model_config = {"sklearn.decomposition.pca.PCA": {"svd_solver": "auto"}}
    data_config = get_random_data()
    output_dir = os.path.join(tmpdir, "model")
    registry_dir = os.path.join(tmpdir, "registry")
    machine = Machine(name="model-name",
                      dataset=data_config,
                      model=model_config,
                      project_name="test")
    _, first_machine = ModelBuilder(machine).build(
        output_dir=output_dir, model_register_dir=registry_dir)

    if tag_list:
        data_config["tag_list"] = tag_list

    new_output_dir = os.path.join(tmpdir, "model2")
    _, second_machine = ModelBuilder(machine=Machine(
        name="model-name",
        dataset=data_config,
        model=model_config,
        metadata=metadata,
        project_name="test",
        runtime={"something": True},
    )).build(
        output_dir=new_output_dir,
        model_register_dir=registry_dir,
        replace_cache=replace_cache,
    )

    model1_creation_date = (
        first_machine.metadata.build_metadata.model.model_creation_date)
    model2_creation_date = (
        second_machine.metadata.build_metadata.model.model_creation_date)
    assert "something" in second_machine.runtime

    if should_be_equal:
        assert model1_creation_date == model2_creation_date
    else:
        assert model1_creation_date != model2_creation_date

    if metadata is not None:
        assert metadata.user_defined == second_machine.metadata.user_defined
Ejemplo n.º 24
0
    def build(
        self,
        output_dir: Optional[Union[os.PathLike, str]] = None,
        model_register_dir: Optional[Union[os.PathLike, str]] = None,
        replace_cache=False,
    ) -> Tuple[sklearn.base.BaseEstimator, Machine]:
        """
        Always return a model and its metadata.

        If ``output_dir`` is supplied, it will save the model there.
        ``model_register_dir`` points to the model cache directory which it will
        attempt to read the model from. Supplying both will then have the effect
        of both; reading from the cache and saving that cached model to the new
        output directory.

        Parameters
        ----------
        output_dir: Optional[Union[os.PathLike, str]]
            A path to where the model will be deposited.
        model_register_dir: Optional[Union[os.PathLike, str]]
            A path to a register, see `:func:gordo.util.disk_registry`.
            If this is None then always build the model, otherwise try to resolve
            the model from the registry.
        replace_cache: bool
            Forces a rebuild of the model, and replaces the entry in the cache
            with the new model.

        Returns
        -------
        Tuple[sklearn.base.BaseEstimator, Machine]
            Built model and an updated ``Machine``
        """
        if not model_register_dir:
            model, machine = self._build()
        else:
            logger.debug(
                f"Model caching activated, attempting to read model-location with key "
                f"{self.cache_key} from register {model_register_dir}")
            self.cached_model_path = self.check_cache(model_register_dir)

            if replace_cache:
                logger.info(
                    "replace_cache=True, deleting any existing cache entry")
                disk_registry.delete_value(model_register_dir, self.cache_key)
                self.cached_model_path = None

            # Load the model from previous cached directory
            if self.cached_model_path:
                model = serializer.load(self.cached_model_path)
                metadata = serializer.load_metadata(self.cached_model_path)
                metadata["metadata"][
                    "user_defined"] = self.machine.metadata.user_defined

                metadata["runtime"] = self.machine.runtime

                machine = Machine(**metadata)

            # Otherwise build and cache the model
            else:
                model, machine = self._build()
                self.cached_model_path = self._save_model(
                    model=model,
                    machine=machine,
                    output_dir=output_dir  # type: ignore
                )
                logger.info(
                    f"Built model, and deposited at {self.cached_model_path}")
                logger.info(f"Writing model-location to model registry")
                disk_registry.write_key(  # type: ignore
                    model_register_dir, self.cache_key, self.cached_model_path)

        # Save model to disk, if we're not building for cv only purposes.
        if output_dir and (self.machine.evaluation.get("cv_mode") !=
                           "cross_val_only"):
            self.cached_model_path = self._save_model(model=model,
                                                      machine=machine,
                                                      output_dir=output_dir)
        return model, machine