def test_dump_load_models(model): X = np.random.random(size=100).reshape(10, 10) model.fit(X.copy(), X.copy()) model_out = model.predict(X.copy()) with TemporaryDirectory() as tmp: serializer.dump(model, tmp) model_clone = serializer.load(tmp) model_clone_out = model_clone.predict(X.copy()) assert np.allclose(model_out.flatten(), model_clone_out.flatten())
def trained_model_directory(gordo_project: str, gordo_name: str, sensors: List[SensorTag]): """ Fixture: Train a basic AutoEncoder and save it to a given directory will also save some metadata with the model """ with tempfile.TemporaryDirectory() as model_dir: # This is a model collection directory collection_dir = os.path.join(model_dir, gordo_project) # Model specific to the model being trained here model_dir = os.path.join(collection_dir, gordo_name) os.makedirs(model_dir, exist_ok=True) definition = ruamel.yaml.load( """ gordo_components.model.anomaly.diff.DiffBasedAnomalyDetector: base_estimator: sklearn.pipeline.Pipeline: steps: - sklearn.preprocessing.data.MinMaxScaler - gordo_components.model.models.KerasAutoEncoder: kind: feedforward_hourglass memory: """, Loader=ruamel.yaml.Loader, ) model = serializer.pipeline_from_definition(definition) X = np.random.random(size=len(sensors) * 10).reshape(10, len(sensors)) model.fit(X, X) serializer.dump( model, model_dir, metadata={ "dataset": { "tag_list": sensors, "resolution": "10T", "target_tag_list": sensors, }, "name": "machine-1", "model": { "model-offset": 0 }, "user-defined": { "model-name": "test-model" }, }, ) yield collection_dir
def test_dump_load_keras_directly(self): model = KerasAutoEncoder(kind="feedforward_hourglass") X = np.random.random(size=100).reshape(10, 10) model.fit(X.copy(), X.copy()) with TemporaryDirectory() as tmp: serializer.dump(model, tmp) model_clone = serializer.load(tmp) self.assertTrue( np.allclose( model.predict(X.copy()).flatten(), model_clone.predict(X.copy()).flatten(), ))
def download_model(ctx: click.Context, output_dir: str): """ Download the actual model from the target and write to an output directory """ client = Client(*ctx.obj["args"], **ctx.obj["kwargs"]) models = client.download_model() # Iterate over mapping of models and save into their own sub dirs of the output_dir for target, model in models.items(): model_out_dir = os.path.join(output_dir, target) os.mkdir(model_out_dir) click.secho( f"Writing model '{target}' to directory: '{model_out_dir}'...", nl=False) serializer.dump(model, model_out_dir) click.secho(f"done") click.secho(f"Wrote all models to directory: {output_dir}", fg="green")
def _save_model_for_workflow(model: BaseEstimator, metadata: dict, output_dir: Union[os.PathLike, str]): """ Save a model according to the expected Argo workflow procedure. Parameters ---------- model: BaseEstimator The model to save to the directory with gordo serializer. metadata: dict Various mappings of metadata to save alongside model. output_dir: Union[os.PathLike, str] The directory where to save the model, will create directories if needed. Returns ------- Union[os.PathLike, str] Path to the saved model """ os.makedirs(output_dir, exist_ok=True) # Ok if some dirs exist serializer.dump(model, output_dir, metadata=metadata) return output_dir
def trained_model_directory(sensors: List[SensorTag]): """ Fixture: Train a basic AutoEncoder and save it to a given directory will also save some metadata with the model """ with tempfile.TemporaryDirectory() as tmp_dir: definition = ruamel.yaml.load( """ gordo_components.model.anomaly.diff.DiffBasedAnomalyDetector: base_estimator: sklearn.pipeline.Pipeline: steps: - sklearn.preprocessing.data.MinMaxScaler - gordo_components.model.models.KerasAutoEncoder: kind: feedforward_hourglass memory: """, Loader=ruamel.yaml.Loader, ) model = serializer.pipeline_from_definition(definition) X = np.random.random(size=len(sensors) * 10).reshape(10, len(sensors)) model.fit(X, X) serializer.dump( model, tmp_dir, metadata={ "dataset": { "tag_list": sensors, "resolution": "10T", "target_tag_list": sensors, }, "name": "machine-1", "user-defined": { "model-name": "test-model" }, }, ) yield tmp_dir
def test_pipeline_serialization(self): pipe = Pipeline([ ("pca1", PCA(n_components=10)), ( "fu", FeatureUnion([ ("pca2", PCA(n_components=3)), ( "pipe", Pipeline([ ("minmax", MinMaxScaler()), ("truncsvd", TruncatedSVD(n_components=7)), ]), ), ]), ), ("ae", KerasAutoEncoder(kind="feedforward_hourglass")), ]) X = np.random.random(size=100).reshape(10, 10) pipe.fit(X.copy(), X.copy()) with TemporaryDirectory() as tmp: # Test dump metadata = {"key": "value"} serializer.dump(pipe, tmp, metadata=metadata) # Assert that a dirs are created for each step in Pipeline expected_structure = OrderedDict([ ("n_step=000-class=sklearn.pipeline.Pipeline", "metadata.json"), ( "n_step=000-class=sklearn.pipeline.Pipeline", OrderedDict([ ( "n_step=000-class=sklearn.decomposition.pca.PCA", "pca1.pkl.gz", ), ( "n_step=001-class=sklearn.pipeline.FeatureUnion", "params.json", ), ( "n_step=001-class=sklearn.pipeline.FeatureUnion", OrderedDict([ ( "n_step=000-class=sklearn.decomposition.pca.PCA", "pca2.pkl.gz", ), ( "n_step=001-class=sklearn.pipeline.Pipeline", OrderedDict([ ( "n_step=000-class=sklearn.preprocessing.data.MinMaxScaler", "minmax.pkl.gz", ), ( "n_step=001-class=sklearn.decomposition.truncated_svd.TruncatedSVD", "truncsvd.pkl.gz", ), ]), ), ]), ), ( "n_step=002-class=gordo_components.model.models.KerasAutoEncoder", "model.h5", ), ( "n_step=002-class=gordo_components.model.models.KerasAutoEncoder", "params.json", ), ]), ), ]) self._structure_verifier(prefix_dir=tmp, structure=expected_structure) # Test load from the serialized pipeline above pipe_clone = serializer.load(tmp) metadata_clone = serializer.load_metadata(tmp) # Ensure the metadata was saved and loaded back self.assertEqual(metadata, metadata_clone) # Verify same state for both pipelines y_hat_pipe1 = pipe.predict(X.copy()).flatten() y_hat_pipe2 = pipe_clone.predict(X.copy()).flatten() self.assertTrue(np.allclose(y_hat_pipe1, y_hat_pipe2)) # Now use dumps/loads serialized = serializer.dumps(pipe) pipe_clone = serializer.loads(serialized) # Verify same state for both pipelines y_hat_pipe1 = pipe.predict(X.copy()).flatten() y_hat_pipe2 = pipe_clone.predict(X.copy()).flatten() self.assertTrue(np.allclose(y_hat_pipe1, y_hat_pipe2))