예제 #1
0
def test_output_dir(tmp_dir):
    """
    Test building of model will create subdirectories for model saving if needed.
    """
    from gordo_components.builder import build_model

    model_config = {"sklearn.decomposition.pca.PCA": {"svd_solver": "auto"}}
    data_config = get_random_data()
    output_dir = os.path.join(tmp_dir.name, "some", "sub", "directories")

    model, metadata = build_model(
        name="model-name",
        model_config=model_config,
        data_config=data_config,
        metadata={},
    )
    metadata_check(metadata, False)

    _save_model_for_workflow(model=model,
                             metadata=metadata,
                             output_dir=output_dir)

    # Assert the model was saved at the location
    # using gordo_components.serializer should create some subdir(s)
    # which start with 'n_step'
    dirs = [d for d in os.listdir(output_dir) if d.startswith("n_step")]
    assert (
        len(dirs) >= 1
    ), "Expected saving of model to create at least one subdir, but got {len(dirs)}"
예제 #2
0
    def test_model_builder_pipeline_in_pipeline(self):
        from gordo_components.builder import build_model
        import yaml

        raw_model_config = """
            sklearn.pipeline.Pipeline:
                steps:
                  - sklearn.pipeline.Pipeline:
                      steps:
                        - sklearn.preprocessing.data.MinMaxScaler
                  - sklearn.pipeline.Pipeline:
                      steps:
                        - sklearn.decomposition.pca.PCA:
                            svd_solver: auto
            """

        model_config = yaml.load(raw_model_config, Loader=yaml.FullLoader)
        data_config = get_random_data()

        model, metadata = build_model(
            name="model-name",
            model_config=model_config,
            data_config=data_config,
            metadata={},
        )

        self.metadata_check(metadata, False)
예제 #3
0
def test_model_builder_cv_scores_only(should_be_equal: bool,
                                      evaluation_config: dict):
    """
    Test checks that the model is None if cross_val_only is used as the cv_mode.
    If the default mode ('full_build') is used, the model should not be None.

    Parameters
    ----------
    should_be_equal: bool
        Refers to whether or not the cv_mode should be equal to full (default) or cross_val only.
    evaluation_config: dict
        The mode which is tested from within the evaluation_config, is either full or cross_val_only

    """

    model_config = {"sklearn.decomposition.pca.PCA": {"svd_solver": "auto"}}
    data_config = get_random_data()

    model, metadata = build_model(
        name="model-name",
        model_config=model_config,
        data_config=data_config,
        metadata={},
        evaluation_config=evaluation_config,
    )
    if should_be_equal:
        assert model is not None
    else:
        assert model is None
예제 #4
0
def test_scores_metadata(raw_model_config):
    data_config = get_random_data()
    model_config = yaml.load(raw_model_config, Loader=yaml.FullLoader)
    model, metadata = build_model(
        name="model-name",
        model_config=model_config,
        data_config=data_config,
        metadata={},
    )
    metadata_check(metadata, False)
예제 #5
0
    def test_model_builder_model_withouth_pipeline(self):
        raw_model_config = """
        gordo_components.model.models.KerasAutoEncoder:
            kind: feedforward_hourglass
        """

        model_config = yaml.load(raw_model_config, Loader=yaml.FullLoader)
        data_config = get_random_data()

        model, metadata = build_model(model_config=model_config,
                                      data_config=data_config,
                                      metadata={})

        self.metadata_check(metadata, True)
예제 #6
0
def test_output_scores_metadata():
    data_config = get_random_data()
    raw_model_config = f"""
            gordo_components.model.anomaly.diff.DiffBasedAnomalyDetector:
                scaler: sklearn.preprocessing.data.MinMaxScaler
                base_estimator:
                    sklearn.compose.TransformedTargetRegressor:
                        transformer: sklearn.preprocessing.data.MinMaxScaler
                        regressor:
                            sklearn.pipeline.Pipeline:
                                steps:
                                - sklearn.preprocessing.data.MinMaxScaler
                                - gordo_components.model.models.KerasAutoEncoder:
                                    kind: feedforward_hourglass
                                    batch_size: 3
                                    compression_factor: 0.5
                                    encoding_layers: 1
                                    func: tanh
                                    out_func: linear
                                    epochs: 1
            """

    model_config = yaml.load(raw_model_config, Loader=yaml.FullLoader)
    model, metadata = build_model(
        name="model-name",
        model_config=model_config,
        data_config=data_config,
        metadata={},
    )
    scores_metadata = metadata["model"]["cross-validation"]["scores"]
    assert (scores_metadata["explained-variance-score-Tag-1"]["fold-mean"] +
            scores_metadata["explained-variance-score-Tag-2"]["fold-mean"]
            ) / 2 == pytest.approx(
                scores_metadata["explained-variance-score"]["fold-mean"])

    assert (
        scores_metadata["r2-score-Tag-1"]["fold-mean"] +
        scores_metadata["r2-score-Tag-2"]["fold-mean"]) / 2 == pytest.approx(
            scores_metadata["r2-score"]["fold-mean"])

    assert (scores_metadata["mean-squared-error-Tag-1"]["fold-mean"] +
            scores_metadata["mean-squared-error-Tag-2"]["fold-mean"]
            ) / 2 == pytest.approx(
                scores_metadata["mean-squared-error"]["fold-mean"])

    assert (scores_metadata["mean-absolute-error-Tag-1"]["fold-mean"] +
            scores_metadata["mean-absolute-error-Tag-2"]["fold-mean"]
            ) / 2 == pytest.approx(
                scores_metadata["mean-absolute-error"]["fold-mean"])
예제 #7
0
def test_builder_metadata(raw_model_config):
    """
    Ensure the builder works with various model configs and that each has
    expected/valid metadata results.
    """
    model_config = yaml.load(raw_model_config, Loader=yaml.FullLoader)
    data_config = get_random_data()

    model, metadata = build_model(
        name="model-name",
        model_config=model_config,
        data_config=data_config,
        metadata={},
    )
    # Check metadata, and only verify 'history' if it's a *Keras* type model
    metadata_check(metadata, "Keras" in raw_model_config)
예제 #8
0
    def test_model_builder_pipeline(self):
        raw_model_config = """
        sklearn.pipeline.Pipeline:
            steps:
              - sklearn.preprocessing.data.MinMaxScaler
              - gordo_components.model.models.KerasAutoEncoder:
                  kind: feedforward_hourglass
        """

        model_config = yaml.load(raw_model_config, Loader=yaml.FullLoader)
        data_config = get_random_data()

        model, metadata = build_model(model_config=model_config,
                                      data_config=data_config,
                                      metadata={})

        self.metadata_check(metadata, True)
예제 #9
0
    def test_model_builder_save_history(self):
        """Checks that the metadata contains the keras model build history"""
        raw_model_config = """
        gordo_components.model.models.KerasAutoEncoder:
            kind: feedforward_hourglass
        """

        model_config = yaml.load(raw_model_config, Loader=yaml.FullLoader)
        data_config = get_random_data()

        model, metadata = build_model(
            name="model-name",
            model_config=model_config,
            data_config=data_config,
            metadata={},
        )

        self.metadata_check(metadata, True)
예제 #10
0
    def test_model_builder_model_withouth_pipeline(self):

        # MinMax is only a transformer and does not have a score either.
        raw_model_config = """
        sklearn.preprocessing.data.MinMaxScaler:
            feature_range: [-1, 1]
        """

        model_config = yaml.load(raw_model_config, Loader=yaml.FullLoader)
        data_config = get_random_data()

        model, metadata = build_model(
            name="model-name",
            model_config=model_config,
            data_config=data_config,
            metadata={},
        )

        self.metadata_check(metadata, False)
예제 #11
0
    def test_output_dir(self):
        """
        Test building of model will create subdirectories for model saving if needed.
        """
        from gordo_components.builder import build_model

        with TemporaryDirectory() as tmpdir:

            model_config = {
                "gordo_components.model.models.KerasAutoEncoder": {
                    "kind": "feedforward_hourglass"
                }
            }
            data_config = get_random_data()
            output_dir = os.path.join(tmpdir, "some", "sub", "directories")

            model, metadata = build_model(model_config=model_config,
                                          data_config=data_config,
                                          metadata={})

            self.metadata_check(metadata, True)

            _save_model_for_workflow(model=model,
                                     metadata=metadata,
                                     output_dir=output_dir)

            # Assert the model was saved at the location
            # using gordo_components.serializer should create some subdir(s)
            # which start with 'n_step'
            dirs = [
                d for d in os.listdir(output_dir) if d.startswith("n_step")
            ]
            self.assertGreaterEqual(
                len(dirs),
                1,
                msg="Expected saving of model to create at "
                f"least one subdir, but got {len(dirs)}",
            )