def load_from_dir(cls, directory: str): with open(os.path.join(directory, "params.json"), "r") as f: params = json.load(f) params["base_estimator"] = serializer.load( os.path.join(directory, "base_estimator") ) params["scaler"] = serializer.load(os.path.join(directory, "scaler")) return cls(**params)
def load_model_and_metadata( model_dir_env_var: str) -> typing.Tuple[BaseEstimator, dict]: """ Loads a model and metadata from the path found in ``model_dir_env_var`` environment variable Parameters ---------- model_dir_env_var: str The name of the environment variable which stores the location of the model Returns ------- BaseEstimator, dict Tuple where the 0th element is the model, and the 1st element is the metadata associated with the model """ logger.debug("Determining model location...") model_location = os.getenv(model_dir_env_var) if model_location is None: raise ValueError( f'Environment variable "{model_dir_env_var}" not set!') if not os.path.isdir(model_location): raise NotADirectoryError( f'The supplied directory: "{model_location}" does not exist!') model = serializer.load(model_location) metadata = serializer.load_metadata(model_location) return model, metadata
def test_client_cli_download_model(watchman_service): """ Test proper execution of client predict sub-command """ runner = CliRunner() with tempfile.TemporaryDirectory() as output_dir: # Empty output directory before downloading assert len(os.listdir(output_dir)) == 0 out = runner.invoke( cli.gordo, args=[ "client", "--project", tu.GORDO_PROJECT, "--target", tu.GORDO_SINGLE_TARGET, "download-model", output_dir, ], ) assert ( out.exit_code == 0 ), f"Expected output code 0 got '{out.exit_code}', {out.output}" # Output directory should not be empty any longer assert len(os.listdir(output_dir)) > 0 model_output_dir = os.path.join(output_dir, tu.GORDO_SINGLE_TARGET) assert os.path.isdir(model_output_dir) model = serializer.load(model_output_dir) assert isinstance(model, BaseEstimator)
def test_dump_load_models(model): X = np.random.random(size=100).reshape(10, 10) model.fit(X.copy(), X.copy()) model_out = model.predict(X.copy()) with TemporaryDirectory() as tmp: serializer.dump(model, tmp) model_clone = serializer.load(tmp) model_clone_out = model_clone.predict(X.copy()) assert np.allclose(model_out.flatten(), model_clone_out.flatten())
def test_dump_load_keras_directly(self): model = KerasAutoEncoder(kind="feedforward_hourglass") X = np.random.random(size=100).reshape(10, 10) model.fit(X.copy(), X.copy()) with TemporaryDirectory() as tmp: serializer.dump(model, tmp) model_clone = serializer.load(tmp) self.assertTrue( np.allclose( model.predict(X.copy()).flatten(), model_clone.predict(X.copy()).flatten(), ))
def load_model(directory: str, name: str) -> BaseEstimator: """ Load a given model from the directory by name. Parameters ---------- directory: str Directory to look for the model name: str Name of the model to load, this would be the sub directory within the directory parameter. Returns ------- BaseEstimator """ model = serializer.load(os.path.join(directory, name)) return model
def test_pipeline_serialization(self): pipe = Pipeline([ ("pca1", PCA(n_components=10)), ( "fu", FeatureUnion([ ("pca2", PCA(n_components=3)), ( "pipe", Pipeline([ ("minmax", MinMaxScaler()), ("truncsvd", TruncatedSVD(n_components=7)), ]), ), ]), ), ("ae", KerasAutoEncoder(kind="feedforward_hourglass")), ]) X = np.random.random(size=100).reshape(10, 10) pipe.fit(X.copy(), X.copy()) with TemporaryDirectory() as tmp: # Test dump metadata = {"key": "value"} serializer.dump(pipe, tmp, metadata=metadata) # Assert that a dirs are created for each step in Pipeline expected_structure = OrderedDict([ ("n_step=000-class=sklearn.pipeline.Pipeline", "metadata.json"), ( "n_step=000-class=sklearn.pipeline.Pipeline", OrderedDict([ ( "n_step=000-class=sklearn.decomposition.pca.PCA", "pca1.pkl.gz", ), ( "n_step=001-class=sklearn.pipeline.FeatureUnion", "params.json", ), ( "n_step=001-class=sklearn.pipeline.FeatureUnion", OrderedDict([ ( "n_step=000-class=sklearn.decomposition.pca.PCA", "pca2.pkl.gz", ), ( "n_step=001-class=sklearn.pipeline.Pipeline", OrderedDict([ ( "n_step=000-class=sklearn.preprocessing.data.MinMaxScaler", "minmax.pkl.gz", ), ( "n_step=001-class=sklearn.decomposition.truncated_svd.TruncatedSVD", "truncsvd.pkl.gz", ), ]), ), ]), ), ( "n_step=002-class=gordo_components.model.models.KerasAutoEncoder", "model.h5", ), ( "n_step=002-class=gordo_components.model.models.KerasAutoEncoder", "params.json", ), ]), ), ]) self._structure_verifier(prefix_dir=tmp, structure=expected_structure) # Test load from the serialized pipeline above pipe_clone = serializer.load(tmp) metadata_clone = serializer.load_metadata(tmp) # Ensure the metadata was saved and loaded back self.assertEqual(metadata, metadata_clone) # Verify same state for both pipelines y_hat_pipe1 = pipe.predict(X.copy()).flatten() y_hat_pipe2 = pipe_clone.predict(X.copy()).flatten() self.assertTrue(np.allclose(y_hat_pipe1, y_hat_pipe2)) # Now use dumps/loads serialized = serializer.dumps(pipe) pipe_clone = serializer.loads(serialized) # Verify same state for both pipelines y_hat_pipe1 = pipe.predict(X.copy()).flatten() y_hat_pipe2 = pipe_clone.predict(X.copy()).flatten() self.assertTrue(np.allclose(y_hat_pipe1, y_hat_pipe2))