def test_diff_detector_serializability(config): """ Should play well with the gordo serializer """ config = yaml.load(config) model = serializer.from_definition(config) serializer.into_definition(model) serialized_bytes = serializer.dumps(model) serializer.loads(serialized_bytes)
def download_model(self, revision=None, targets: Optional[List[str]] = None ) -> typing.Dict[str, BaseEstimator]: """ Download the actual model(s) from the ML server /download-model Returns ------- Dict[str, BaseEstimator] Mapping of target name to the model """ models = dict() for machine_name in targets or self.get_machine_names( revision=revision): resp = self.session.get( f"{self.base_url}/gordo/v0/{self.project_name}/{machine_name}/download-model" ) content = _handle_response( resp, resource_name=f"Model download for model {machine_name}") if isinstance(content, bytes): models[machine_name] = serializer.loads(content) else: raise ValueError( f"Got unexpected return type: {type(content)} when attempting to" f" download the model {machine_name}.") return models
def test_pipeline_serialization(self): pipe = Pipeline( [ ("pca1", PCA(n_components=10)), ( "fu", FeatureUnion( [ ("pca2", PCA(n_components=3)), ( "pipe", Pipeline( [ ("minmax", MinMaxScaler()), ("truncsvd", TruncatedSVD(n_components=7)), ] ), ), ] ), ), ("ae", KerasAutoEncoder(kind="feedforward_hourglass")), ] ) X = np.random.random(size=100).reshape(10, 10) pipe.fit(X.copy(), X.copy()) with TemporaryDirectory() as tmp: # Test dump metadata = {"key": "value"} serializer.dump(pipe, tmp, metadata=metadata) # Test load from the serialized pipeline above pipe_clone = serializer.load(tmp) metadata_clone = serializer.load_metadata(tmp) # Ensure the metadata was saved and loaded back self.assertEqual(metadata, metadata_clone) # Verify same state for both pipelines y_hat_pipe1 = pipe.predict(X.copy()).flatten() y_hat_pipe2 = pipe_clone.predict(X.copy()).flatten() self.assertTrue(np.allclose(y_hat_pipe1, y_hat_pipe2)) # Now use dumps/loads serialized = serializer.dumps(pipe) pipe_clone = serializer.loads(serialized) # Verify same state for both pipelines y_hat_pipe1 = pipe.predict(X.copy()).flatten() y_hat_pipe2 = pipe_clone.predict(X.copy()).flatten() self.assertTrue(np.allclose(y_hat_pipe1, y_hat_pipe2))
def test_download_model(api_version, gordo_project, gordo_name, gordo_ml_server_client): """ Test we can download a model, loadable via serializer.loads() """ resp = gordo_ml_server_client.get( f"/gordo/{api_version}/{gordo_project}/{gordo_name}/download-model") serialized_model = resp.get_data() model = serializer.loads(serialized_model) # All models have a fit method assert hasattr(model, "fit") # Models MUST have either predict or transform assert hasattr(model, "predict") or hasattr(model, "transform") # Asking for a model that doesn't exist gives 404 resp = gordo_ml_server_client.get( f"/gordo/{api_version}/{gordo_project}/invalid-model-name/download-model" ) assert resp.status_code == 404