Example #1
0
 def load_from_dir(cls, directory: str):
     with open(os.path.join(directory, "params.json"), "r") as f:
         params = json.load(f)
     params["base_estimator"] = serializer.load(
         os.path.join(directory, "base_estimator")
     )
     params["scaler"] = serializer.load(os.path.join(directory, "scaler"))
     return cls(**params)
Example #2
0
def load_model_and_metadata(
        model_dir_env_var: str) -> typing.Tuple[BaseEstimator, dict]:
    """
    Loads a model and metadata from the path found in ``model_dir_env_var``
    environment variable

    Parameters
    ----------
    model_dir_env_var: str
        The name of the environment variable which stores the location of the model

    Returns
    -------
    BaseEstimator, dict
        Tuple where the 0th element is the model, and the 1st element is the metadata
        associated with the model
    """
    logger.debug("Determining model location...")
    model_location = os.getenv(model_dir_env_var)
    if model_location is None:
        raise ValueError(
            f'Environment variable "{model_dir_env_var}" not set!')
    if not os.path.isdir(model_location):
        raise NotADirectoryError(
            f'The supplied directory: "{model_location}" does not exist!')

    model = serializer.load(model_location)
    metadata = serializer.load_metadata(model_location)
    return model, metadata
def test_client_cli_download_model(watchman_service):
    """
    Test proper execution of client predict sub-command
    """
    runner = CliRunner()

    with tempfile.TemporaryDirectory() as output_dir:

        # Empty output directory before downloading
        assert len(os.listdir(output_dir)) == 0

        out = runner.invoke(
            cli.gordo,
            args=[
                "client",
                "--project",
                tu.GORDO_PROJECT,
                "--target",
                tu.GORDO_SINGLE_TARGET,
                "download-model",
                output_dir,
            ],
        )
        assert (
            out.exit_code == 0
        ), f"Expected output code 0 got '{out.exit_code}', {out.output}"

        # Output directory should not be empty any longer
        assert len(os.listdir(output_dir)) > 0

        model_output_dir = os.path.join(output_dir, tu.GORDO_SINGLE_TARGET)
        assert os.path.isdir(model_output_dir)

        model = serializer.load(model_output_dir)
        assert isinstance(model, BaseEstimator)
def test_dump_load_models(model):

    X = np.random.random(size=100).reshape(10, 10)
    model.fit(X.copy(), X.copy())
    model_out = model.predict(X.copy())

    with TemporaryDirectory() as tmp:
        serializer.dump(model, tmp)

        model_clone = serializer.load(tmp)
        model_clone_out = model_clone.predict(X.copy())

        assert np.allclose(model_out.flatten(), model_clone_out.flatten())
    def test_dump_load_keras_directly(self):

        model = KerasAutoEncoder(kind="feedforward_hourglass")

        X = np.random.random(size=100).reshape(10, 10)
        model.fit(X.copy(), X.copy())

        with TemporaryDirectory() as tmp:
            serializer.dump(model, tmp)

            model_clone = serializer.load(tmp)

            self.assertTrue(
                np.allclose(
                    model.predict(X.copy()).flatten(),
                    model_clone.predict(X.copy()).flatten(),
                ))
def load_model(directory: str, name: str) -> BaseEstimator:
    """
    Load a given model from the directory by name.

    Parameters
    ----------
    directory: str
        Directory to look for the model
    name: str
        Name of the model to load, this would be the sub directory within the
        directory parameter.

    Returns
    -------
    BaseEstimator
    """
    model = serializer.load(os.path.join(directory, name))
    return model
    def test_pipeline_serialization(self):

        pipe = Pipeline([
            ("pca1", PCA(n_components=10)),
            (
                "fu",
                FeatureUnion([
                    ("pca2", PCA(n_components=3)),
                    (
                        "pipe",
                        Pipeline([
                            ("minmax", MinMaxScaler()),
                            ("truncsvd", TruncatedSVD(n_components=7)),
                        ]),
                    ),
                ]),
            ),
            ("ae", KerasAutoEncoder(kind="feedforward_hourglass")),
        ])

        X = np.random.random(size=100).reshape(10, 10)
        pipe.fit(X.copy(), X.copy())

        with TemporaryDirectory() as tmp:

            # Test dump
            metadata = {"key": "value"}
            serializer.dump(pipe, tmp, metadata=metadata)

            # Assert that a dirs are created for each step in Pipeline
            expected_structure = OrderedDict([
                ("n_step=000-class=sklearn.pipeline.Pipeline",
                 "metadata.json"),
                (
                    "n_step=000-class=sklearn.pipeline.Pipeline",
                    OrderedDict([
                        (
                            "n_step=000-class=sklearn.decomposition.pca.PCA",
                            "pca1.pkl.gz",
                        ),
                        (
                            "n_step=001-class=sklearn.pipeline.FeatureUnion",
                            "params.json",
                        ),
                        (
                            "n_step=001-class=sklearn.pipeline.FeatureUnion",
                            OrderedDict([
                                (
                                    "n_step=000-class=sklearn.decomposition.pca.PCA",
                                    "pca2.pkl.gz",
                                ),
                                (
                                    "n_step=001-class=sklearn.pipeline.Pipeline",
                                    OrderedDict([
                                        (
                                            "n_step=000-class=sklearn.preprocessing.data.MinMaxScaler",
                                            "minmax.pkl.gz",
                                        ),
                                        (
                                            "n_step=001-class=sklearn.decomposition.truncated_svd.TruncatedSVD",
                                            "truncsvd.pkl.gz",
                                        ),
                                    ]),
                                ),
                            ]),
                        ),
                        (
                            "n_step=002-class=gordo_components.model.models.KerasAutoEncoder",
                            "model.h5",
                        ),
                        (
                            "n_step=002-class=gordo_components.model.models.KerasAutoEncoder",
                            "params.json",
                        ),
                    ]),
                ),
            ])

            self._structure_verifier(prefix_dir=tmp,
                                     structure=expected_structure)

            # Test load from the serialized pipeline above
            pipe_clone = serializer.load(tmp)
            metadata_clone = serializer.load_metadata(tmp)

            # Ensure the metadata was saved and loaded back
            self.assertEqual(metadata, metadata_clone)

            # Verify same state for both pipelines
            y_hat_pipe1 = pipe.predict(X.copy()).flatten()
            y_hat_pipe2 = pipe_clone.predict(X.copy()).flatten()
            self.assertTrue(np.allclose(y_hat_pipe1, y_hat_pipe2))

            # Now use dumps/loads
            serialized = serializer.dumps(pipe)
            pipe_clone = serializer.loads(serialized)

            # Verify same state for both pipelines
            y_hat_pipe1 = pipe.predict(X.copy()).flatten()
            y_hat_pipe2 = pipe_clone.predict(X.copy()).flatten()
            self.assertTrue(np.allclose(y_hat_pipe1, y_hat_pipe2))