Esempio n. 1
0
def test_client_cli_download_model(
    gordo_project, gordo_single_target, ml_server, tmpdir
):
    """
    Test proper execution of client predict sub-command
    """
    runner = CliRunner()

    # Empty output directory before downloading
    assert len(os.listdir(tmpdir)) == 0

    out = runner.invoke(
        gordo_client,
        args=[
            "--project",
            gordo_project,
            "download-model",
            str(tmpdir),
            "--target",
            gordo_single_target,
        ],
    )
    assert (
        out.exit_code == 0
    ), f"Expected output code 0 got '{out.exit_code}', {out.output}"

    # Output directory should not be empty any longer
    assert len(os.listdir(tmpdir)) > 0

    model_output_dir = os.path.join(tmpdir, gordo_single_target)
    assert os.path.isdir(model_output_dir)

    model = serializer.load(model_output_dir)
    assert isinstance(model, BaseEstimator)
    def test_pipeline_serialization(self):

        pipe = Pipeline(
            [
                ("pca1", PCA(n_components=10)),
                (
                    "fu",
                    FeatureUnion(
                        [
                            ("pca2", PCA(n_components=3)),
                            (
                                "pipe",
                                Pipeline(
                                    [
                                        ("minmax", MinMaxScaler()),
                                        ("truncsvd", TruncatedSVD(n_components=7)),
                                    ]
                                ),
                            ),
                        ]
                    ),
                ),
                ("ae", KerasAutoEncoder(kind="feedforward_hourglass")),
            ]
        )

        X = np.random.random(size=100).reshape(10, 10)
        pipe.fit(X.copy(), X.copy())

        with TemporaryDirectory() as tmp:

            # Test dump
            metadata = {"key": "value"}
            serializer.dump(pipe, tmp, metadata=metadata)

            # Test load from the serialized pipeline above
            pipe_clone = serializer.load(tmp)
            metadata_clone = serializer.load_metadata(tmp)

            # Ensure the metadata was saved and loaded back
            self.assertEqual(metadata, metadata_clone)

            # Verify same state for both pipelines
            y_hat_pipe1 = pipe.predict(X.copy()).flatten()
            y_hat_pipe2 = pipe_clone.predict(X.copy()).flatten()
            self.assertTrue(np.allclose(y_hat_pipe1, y_hat_pipe2))

            # Now use dumps/loads
            serialized = serializer.dumps(pipe)
            pipe_clone = serializer.loads(serialized)

            # Verify same state for both pipelines
            y_hat_pipe1 = pipe.predict(X.copy()).flatten()
            y_hat_pipe2 = pipe_clone.predict(X.copy()).flatten()
            self.assertTrue(np.allclose(y_hat_pipe1, y_hat_pipe2))
def test_dump_load_models(model):

    X = np.random.random(size=100).reshape(10, 10)
    model.fit(X.copy(), X.copy())
    model_out = model.predict(X.copy())

    with TemporaryDirectory() as tmp:
        serializer.dump(model, tmp)

        model_clone = serializer.load(tmp)
        model_clone_out = model_clone.predict(X.copy())

        assert np.allclose(model_out.flatten(), model_clone_out.flatten())
Esempio n. 4
0
def load_model(directory: str, name: str) -> BaseEstimator:
    """
    Load a given model from the directory by name.

    Parameters
    ----------
    directory: str
        Directory to look for the model
    name: str
        Name of the model to load, this would be the sub directory within the
        directory parameter.

    Returns
    -------
    BaseEstimator
    """
    start_time = timeit.default_timer()
    model = serializer.load(os.path.join(directory, name))
    logger.debug(f"Time to load model: {timeit.default_timer() - start_time}s")
    return model
Esempio n. 5
0
    def build(
        self,
        output_dir: Optional[Union[os.PathLike, str]] = None,
        model_register_dir: Optional[Union[os.PathLike, str]] = None,
        replace_cache=False,
    ) -> Tuple[sklearn.base.BaseEstimator, Machine]:
        """
        Always return a model and its metadata.

        If ``output_dir`` is supplied, it will save the model there.
        ``model_register_dir`` points to the model cache directory which it will
        attempt to read the model from. Supplying both will then have the effect
        of both; reading from the cache and saving that cached model to the new
        output directory.

        Parameters
        ----------
        output_dir: Optional[Union[os.PathLike, str]]
            A path to where the model will be deposited.
        model_register_dir: Optional[Union[os.PathLike, str]]
            A path to a register, see `:func:gordo.util.disk_registry`.
            If this is None then always build the model, otherwise try to resolve
            the model from the registry.
        replace_cache: bool
            Forces a rebuild of the model, and replaces the entry in the cache
            with the new model.

        Returns
        -------
        Tuple[sklearn.base.BaseEstimator, Machine]
            Built model and an updated ``Machine``
        """
        if not model_register_dir:
            model, machine = self._build()
        else:
            logger.debug(
                f"Model caching activated, attempting to read model-location with key "
                f"{self.cache_key} from register {model_register_dir}")
            self.cached_model_path = self.check_cache(model_register_dir)

            if replace_cache:
                logger.info(
                    "replace_cache=True, deleting any existing cache entry")
                disk_registry.delete_value(model_register_dir, self.cache_key)
                self.cached_model_path = None

            # Load the model from previous cached directory
            if self.cached_model_path:
                model = serializer.load(self.cached_model_path)
                metadata = serializer.load_metadata(self.cached_model_path)
                metadata["metadata"][
                    "user_defined"] = self.machine.metadata.user_defined

                metadata["runtime"] = self.machine.runtime

                machine = Machine(**metadata)

            # Otherwise build and cache the model
            else:
                model, machine = self._build()
                self.cached_model_path = self._save_model(
                    model=model,
                    machine=machine,
                    output_dir=output_dir  # type: ignore
                )
                logger.info(
                    f"Built model, and deposited at {self.cached_model_path}")
                logger.info(f"Writing model-location to model registry")
                disk_registry.write_key(  # type: ignore
                    model_register_dir, self.cache_key, self.cached_model_path)

        # Save model to disk, if we're not building for cv only purposes.
        if output_dir and (self.machine.evaluation.get("cv_mode") !=
                           "cross_val_only"):
            self.cached_model_path = self._save_model(model=model,
                                                      machine=machine,
                                                      output_dir=output_dir)
        return model, machine