Beispiel #1
0
def test_diff_detector_serializability(config):
    """
    Should play well with the gordo serializer
    """
    config = yaml.load(config)

    model = serializer.pipeline_from_definition(config)
    serializer.pipeline_into_definition(model)
    serialized_bytes = serializer.dumps(model)
    serializer.loads(serialized_bytes)
def test_download_model(gordo_ml_server_client):
    """
    Test we can download a model, loadable via serializer.loads()
    """
    resp = gordo_ml_server_client.get("/download-model")

    serialized_model = resp.get_data()
    model = serializer.loads(serialized_model)

    # All models have a fit method
    assert hasattr(model, "fit")

    # Models MUST have either predict or transform
    assert hasattr(model, "predict") or hasattr(model, "transform")
Beispiel #3
0
 def download_model(self) -> typing.Dict[str, BaseEstimator]:
     """
     Download the actual model(s) from the ML server /download-model
     Returns
     -------
     Dict[str, BaseEstimator]
         Mapping of target name to the model
     """
     models = dict()
     for endpoint in self.endpoints:
         resp = self.session.get(f"{self.base_url + endpoint.endpoint}/download-model")
         if resp.ok:
             models[endpoint.name] = serializer.loads(resp.content)
         else:
             raise IOError(f"Failed to download model: '{repr(resp.content)}'")
     return models
    def test_pipeline_serialization(self):

        pipe = Pipeline([
            ("pca1", PCA(n_components=10)),
            (
                "fu",
                FeatureUnion([
                    ("pca2", PCA(n_components=3)),
                    (
                        "pipe",
                        Pipeline([
                            ("minmax", MinMaxScaler()),
                            ("truncsvd", TruncatedSVD(n_components=7)),
                        ]),
                    ),
                ]),
            ),
            ("ae", KerasAutoEncoder(kind="feedforward_hourglass")),
        ])

        X = np.random.random(size=100).reshape(10, 10)
        pipe.fit(X.copy(), X.copy())

        with TemporaryDirectory() as tmp:

            # Test dump
            metadata = {"key": "value"}
            serializer.dump(pipe, tmp, metadata=metadata)

            # Assert that a dirs are created for each step in Pipeline
            expected_structure = OrderedDict([
                ("n_step=000-class=sklearn.pipeline.Pipeline",
                 "metadata.json"),
                (
                    "n_step=000-class=sklearn.pipeline.Pipeline",
                    OrderedDict([
                        (
                            "n_step=000-class=sklearn.decomposition.pca.PCA",
                            "pca1.pkl.gz",
                        ),
                        (
                            "n_step=001-class=sklearn.pipeline.FeatureUnion",
                            "params.json",
                        ),
                        (
                            "n_step=001-class=sklearn.pipeline.FeatureUnion",
                            OrderedDict([
                                (
                                    "n_step=000-class=sklearn.decomposition.pca.PCA",
                                    "pca2.pkl.gz",
                                ),
                                (
                                    "n_step=001-class=sklearn.pipeline.Pipeline",
                                    OrderedDict([
                                        (
                                            "n_step=000-class=sklearn.preprocessing.data.MinMaxScaler",
                                            "minmax.pkl.gz",
                                        ),
                                        (
                                            "n_step=001-class=sklearn.decomposition.truncated_svd.TruncatedSVD",
                                            "truncsvd.pkl.gz",
                                        ),
                                    ]),
                                ),
                            ]),
                        ),
                        (
                            "n_step=002-class=gordo_components.model.models.KerasAutoEncoder",
                            "model.h5",
                        ),
                        (
                            "n_step=002-class=gordo_components.model.models.KerasAutoEncoder",
                            "params.json",
                        ),
                    ]),
                ),
            ])

            self._structure_verifier(prefix_dir=tmp,
                                     structure=expected_structure)

            # Test load from the serialized pipeline above
            pipe_clone = serializer.load(tmp)
            metadata_clone = serializer.load_metadata(tmp)

            # Ensure the metadata was saved and loaded back
            self.assertEqual(metadata, metadata_clone)

            # Verify same state for both pipelines
            y_hat_pipe1 = pipe.predict(X.copy()).flatten()
            y_hat_pipe2 = pipe_clone.predict(X.copy()).flatten()
            self.assertTrue(np.allclose(y_hat_pipe1, y_hat_pipe2))

            # Now use dumps/loads
            serialized = serializer.dumps(pipe)
            pipe_clone = serializer.loads(serialized)

            # Verify same state for both pipelines
            y_hat_pipe1 = pipe.predict(X.copy()).flatten()
            y_hat_pipe2 = pipe_clone.predict(X.copy()).flatten()
            self.assertTrue(np.allclose(y_hat_pipe1, y_hat_pipe2))