def _save_model( model: BaseEstimator, machine: Union[Machine, dict], output_dir: Union[os.PathLike, str], ): """ Save the model according to the expected Argo workflow procedure. Parameters ---------- model: BaseEstimator The model to save to the directory with gordo serializer. machine: Union[Machine, dict] Machine instance used to build this model. output_dir: Union[os.PathLike, str] The directory where to save the model, will create directories if needed. Returns ------- Union[os.PathLike, str] Path to the saved model """ os.makedirs(output_dir, exist_ok=True) # Ok if some dirs exist serializer.dump( model, output_dir, metadata=machine.to_dict() if isinstance(machine, Machine) else machine, ) return output_dir
def test_pipeline_serialization(self): pipe = Pipeline( [ ("pca1", PCA(n_components=10)), ( "fu", FeatureUnion( [ ("pca2", PCA(n_components=3)), ( "pipe", Pipeline( [ ("minmax", MinMaxScaler()), ("truncsvd", TruncatedSVD(n_components=7)), ] ), ), ] ), ), ("ae", KerasAutoEncoder(kind="feedforward_hourglass")), ] ) X = np.random.random(size=100).reshape(10, 10) pipe.fit(X.copy(), X.copy()) with TemporaryDirectory() as tmp: # Test dump metadata = {"key": "value"} serializer.dump(pipe, tmp, metadata=metadata) # Test load from the serialized pipeline above pipe_clone = serializer.load(tmp) metadata_clone = serializer.load_metadata(tmp) # Ensure the metadata was saved and loaded back self.assertEqual(metadata, metadata_clone) # Verify same state for both pipelines y_hat_pipe1 = pipe.predict(X.copy()).flatten() y_hat_pipe2 = pipe_clone.predict(X.copy()).flatten() self.assertTrue(np.allclose(y_hat_pipe1, y_hat_pipe2)) # Now use dumps/loads serialized = serializer.dumps(pipe) pipe_clone = serializer.loads(serialized) # Verify same state for both pipelines y_hat_pipe1 = pipe.predict(X.copy()).flatten() y_hat_pipe2 = pipe_clone.predict(X.copy()).flatten() self.assertTrue(np.allclose(y_hat_pipe1, y_hat_pipe2))
def test_dump_load_models(model): X = np.random.random(size=100).reshape(10, 10) model.fit(X.copy(), X.copy()) model_out = model.predict(X.copy()) with TemporaryDirectory() as tmp: serializer.dump(model, tmp) model_clone = serializer.load(tmp) model_clone_out = model_clone.predict(X.copy()) assert np.allclose(model_out.flatten(), model_clone_out.flatten())
def trained_model_directory( model_collection_directory: str, config_str: str, gordo_name: str ): """ Fixture: Train a basic AutoEncoder and save it to a given directory will also save some metadata with the model """ # Model specific to the model being trained here model_dir = os.path.join(model_collection_directory, gordo_name) os.makedirs(model_dir, exist_ok=True) builder = local_build(config_str=config_str) model, metadata = next(builder) # type: ignore serializer.dump(model, model_dir, metadata=metadata.to_dict()) yield model_dir
def trained_model_directories(model_collection_directory: str, config_str: str): """ Fixture: Train a basic AutoEncoder and save it to a given directory will also save some metadata with the model """ # Model specific to the model being trained here builder = local_build(config_str=config_str) model_directories = {} for model, metadata in builder: metadata_dict = metadata.to_dict() model_name = metadata_dict.get("name") model_dir = os.path.join(model_collection_directory, model_name) os.makedirs(model_dir, exist_ok=True) serializer.dump(model, model_dir, metadata=metadata.to_dict()) model_directories[model_name] = model_dir yield model_directories
def download_model(ctx: click.Context, output_dir: str, target: typing.List[str]): """ Download the actual model from the target and write to an output directory """ client = Client(*ctx.obj["args"], **ctx.obj["kwargs"]) models = client.download_model(targets=target) # Iterate over mapping of models and save into their own sub dirs of the output_dir for model_name, model in models.items(): model_out_dir = os.path.join(output_dir, model_name) os.mkdir(model_out_dir) click.secho( f"Writing model '{model_name}' to directory: '{model_out_dir}'...", nl=False ) serializer.dump(model, model_out_dir) click.secho(f"done") click.secho(f"Wrote all models to directory: {output_dir}", fg="green")