Exemple #1
0
    def _save_model(
        model: BaseEstimator,
        machine: Union[Machine, dict],
        output_dir: Union[os.PathLike, str],
    ):
        """
        Save the model according to the expected Argo workflow procedure.

        Parameters
        ----------
        model: BaseEstimator
            The model to save to the directory with gordo serializer.
        machine: Union[Machine, dict]
            Machine instance used to build this model.
        output_dir: Union[os.PathLike, str]
            The directory where to save the model, will create directories if needed.

        Returns
        -------
        Union[os.PathLike, str]
            Path to the saved model
        """
        os.makedirs(output_dir, exist_ok=True)  # Ok if some dirs exist
        serializer.dump(
            model,
            output_dir,
            metadata=machine.to_dict()
            if isinstance(machine, Machine) else machine,
        )
        return output_dir
    def test_pipeline_serialization(self):

        pipe = Pipeline(
            [
                ("pca1", PCA(n_components=10)),
                (
                    "fu",
                    FeatureUnion(
                        [
                            ("pca2", PCA(n_components=3)),
                            (
                                "pipe",
                                Pipeline(
                                    [
                                        ("minmax", MinMaxScaler()),
                                        ("truncsvd", TruncatedSVD(n_components=7)),
                                    ]
                                ),
                            ),
                        ]
                    ),
                ),
                ("ae", KerasAutoEncoder(kind="feedforward_hourglass")),
            ]
        )

        X = np.random.random(size=100).reshape(10, 10)
        pipe.fit(X.copy(), X.copy())

        with TemporaryDirectory() as tmp:

            # Test dump
            metadata = {"key": "value"}
            serializer.dump(pipe, tmp, metadata=metadata)

            # Test load from the serialized pipeline above
            pipe_clone = serializer.load(tmp)
            metadata_clone = serializer.load_metadata(tmp)

            # Ensure the metadata was saved and loaded back
            self.assertEqual(metadata, metadata_clone)

            # Verify same state for both pipelines
            y_hat_pipe1 = pipe.predict(X.copy()).flatten()
            y_hat_pipe2 = pipe_clone.predict(X.copy()).flatten()
            self.assertTrue(np.allclose(y_hat_pipe1, y_hat_pipe2))

            # Now use dumps/loads
            serialized = serializer.dumps(pipe)
            pipe_clone = serializer.loads(serialized)

            # Verify same state for both pipelines
            y_hat_pipe1 = pipe.predict(X.copy()).flatten()
            y_hat_pipe2 = pipe_clone.predict(X.copy()).flatten()
            self.assertTrue(np.allclose(y_hat_pipe1, y_hat_pipe2))
def test_dump_load_models(model):

    X = np.random.random(size=100).reshape(10, 10)
    model.fit(X.copy(), X.copy())
    model_out = model.predict(X.copy())

    with TemporaryDirectory() as tmp:
        serializer.dump(model, tmp)

        model_clone = serializer.load(tmp)
        model_clone_out = model_clone.predict(X.copy())

        assert np.allclose(model_out.flatten(), model_clone_out.flatten())
Exemple #4
0
def trained_model_directory(
    model_collection_directory: str, config_str: str, gordo_name: str
):
    """
    Fixture: Train a basic AutoEncoder and save it to a given directory
    will also save some metadata with the model
    """

    # Model specific to the model being trained here
    model_dir = os.path.join(model_collection_directory, gordo_name)
    os.makedirs(model_dir, exist_ok=True)

    builder = local_build(config_str=config_str)
    model, metadata = next(builder)  # type: ignore
    serializer.dump(model, model_dir, metadata=metadata.to_dict())
    yield model_dir
Exemple #5
0
def trained_model_directories(model_collection_directory: str,
                              config_str: str):
    """
    Fixture: Train a basic AutoEncoder and save it to a given directory
    will also save some metadata with the model
    """

    # Model specific to the model being trained here
    builder = local_build(config_str=config_str)
    model_directories = {}
    for model, metadata in builder:
        metadata_dict = metadata.to_dict()
        model_name = metadata_dict.get("name")
        model_dir = os.path.join(model_collection_directory, model_name)
        os.makedirs(model_dir, exist_ok=True)
        serializer.dump(model, model_dir, metadata=metadata.to_dict())
        model_directories[model_name] = model_dir
    yield model_directories
Exemple #6
0
def download_model(ctx: click.Context, output_dir: str, target: typing.List[str]):
    """
    Download the actual model from the target and write to an output directory
    """
    client = Client(*ctx.obj["args"], **ctx.obj["kwargs"])
    models = client.download_model(targets=target)

    # Iterate over mapping of models and save into their own sub dirs of the output_dir
    for model_name, model in models.items():
        model_out_dir = os.path.join(output_dir, model_name)
        os.mkdir(model_out_dir)
        click.secho(
            f"Writing model '{model_name}' to directory: '{model_out_dir}'...", nl=False
        )
        serializer.dump(model, model_out_dir)
        click.secho(f"done")

    click.secho(f"Wrote all models to directory: {output_dir}", fg="green")