def test_output_dir(tmp_dir): """ Test building of model will create subdirectories for model saving if needed. """ from gordo_components.builder import build_model model_config = {"sklearn.decomposition.pca.PCA": {"svd_solver": "auto"}} data_config = get_random_data() output_dir = os.path.join(tmp_dir.name, "some", "sub", "directories") model, metadata = build_model( name="model-name", model_config=model_config, data_config=data_config, metadata={}, ) metadata_check(metadata, False) _save_model_for_workflow(model=model, metadata=metadata, output_dir=output_dir) # Assert the model was saved at the location # using gordo_components.serializer should create some subdir(s) # which start with 'n_step' dirs = [d for d in os.listdir(output_dir) if d.startswith("n_step")] assert ( len(dirs) >= 1 ), "Expected saving of model to create at least one subdir, but got {len(dirs)}"
def test_model_builder_pipeline_in_pipeline(self): from gordo_components.builder import build_model import yaml raw_model_config = """ sklearn.pipeline.Pipeline: steps: - sklearn.pipeline.Pipeline: steps: - sklearn.preprocessing.data.MinMaxScaler - sklearn.pipeline.Pipeline: steps: - sklearn.decomposition.pca.PCA: svd_solver: auto """ model_config = yaml.load(raw_model_config, Loader=yaml.FullLoader) data_config = get_random_data() model, metadata = build_model( name="model-name", model_config=model_config, data_config=data_config, metadata={}, ) self.metadata_check(metadata, False)
def test_model_builder_cv_scores_only(should_be_equal: bool, evaluation_config: dict): """ Test checks that the model is None if cross_val_only is used as the cv_mode. If the default mode ('full_build') is used, the model should not be None. Parameters ---------- should_be_equal: bool Refers to whether or not the cv_mode should be equal to full (default) or cross_val only. evaluation_config: dict The mode which is tested from within the evaluation_config, is either full or cross_val_only """ model_config = {"sklearn.decomposition.pca.PCA": {"svd_solver": "auto"}} data_config = get_random_data() model, metadata = build_model( name="model-name", model_config=model_config, data_config=data_config, metadata={}, evaluation_config=evaluation_config, ) if should_be_equal: assert model is not None else: assert model is None
def test_scores_metadata(raw_model_config): data_config = get_random_data() model_config = yaml.load(raw_model_config, Loader=yaml.FullLoader) model, metadata = build_model( name="model-name", model_config=model_config, data_config=data_config, metadata={}, ) metadata_check(metadata, False)
def test_model_builder_model_withouth_pipeline(self): raw_model_config = """ gordo_components.model.models.KerasAutoEncoder: kind: feedforward_hourglass """ model_config = yaml.load(raw_model_config, Loader=yaml.FullLoader) data_config = get_random_data() model, metadata = build_model(model_config=model_config, data_config=data_config, metadata={}) self.metadata_check(metadata, True)
def test_output_scores_metadata(): data_config = get_random_data() raw_model_config = f""" gordo_components.model.anomaly.diff.DiffBasedAnomalyDetector: scaler: sklearn.preprocessing.data.MinMaxScaler base_estimator: sklearn.compose.TransformedTargetRegressor: transformer: sklearn.preprocessing.data.MinMaxScaler regressor: sklearn.pipeline.Pipeline: steps: - sklearn.preprocessing.data.MinMaxScaler - gordo_components.model.models.KerasAutoEncoder: kind: feedforward_hourglass batch_size: 3 compression_factor: 0.5 encoding_layers: 1 func: tanh out_func: linear epochs: 1 """ model_config = yaml.load(raw_model_config, Loader=yaml.FullLoader) model, metadata = build_model( name="model-name", model_config=model_config, data_config=data_config, metadata={}, ) scores_metadata = metadata["model"]["cross-validation"]["scores"] assert (scores_metadata["explained-variance-score-Tag-1"]["fold-mean"] + scores_metadata["explained-variance-score-Tag-2"]["fold-mean"] ) / 2 == pytest.approx( scores_metadata["explained-variance-score"]["fold-mean"]) assert ( scores_metadata["r2-score-Tag-1"]["fold-mean"] + scores_metadata["r2-score-Tag-2"]["fold-mean"]) / 2 == pytest.approx( scores_metadata["r2-score"]["fold-mean"]) assert (scores_metadata["mean-squared-error-Tag-1"]["fold-mean"] + scores_metadata["mean-squared-error-Tag-2"]["fold-mean"] ) / 2 == pytest.approx( scores_metadata["mean-squared-error"]["fold-mean"]) assert (scores_metadata["mean-absolute-error-Tag-1"]["fold-mean"] + scores_metadata["mean-absolute-error-Tag-2"]["fold-mean"] ) / 2 == pytest.approx( scores_metadata["mean-absolute-error"]["fold-mean"])
def test_builder_metadata(raw_model_config): """ Ensure the builder works with various model configs and that each has expected/valid metadata results. """ model_config = yaml.load(raw_model_config, Loader=yaml.FullLoader) data_config = get_random_data() model, metadata = build_model( name="model-name", model_config=model_config, data_config=data_config, metadata={}, ) # Check metadata, and only verify 'history' if it's a *Keras* type model metadata_check(metadata, "Keras" in raw_model_config)
def test_model_builder_pipeline(self): raw_model_config = """ sklearn.pipeline.Pipeline: steps: - sklearn.preprocessing.data.MinMaxScaler - gordo_components.model.models.KerasAutoEncoder: kind: feedforward_hourglass """ model_config = yaml.load(raw_model_config, Loader=yaml.FullLoader) data_config = get_random_data() model, metadata = build_model(model_config=model_config, data_config=data_config, metadata={}) self.metadata_check(metadata, True)
def test_model_builder_save_history(self): """Checks that the metadata contains the keras model build history""" raw_model_config = """ gordo_components.model.models.KerasAutoEncoder: kind: feedforward_hourglass """ model_config = yaml.load(raw_model_config, Loader=yaml.FullLoader) data_config = get_random_data() model, metadata = build_model( name="model-name", model_config=model_config, data_config=data_config, metadata={}, ) self.metadata_check(metadata, True)
def test_model_builder_model_withouth_pipeline(self): # MinMax is only a transformer and does not have a score either. raw_model_config = """ sklearn.preprocessing.data.MinMaxScaler: feature_range: [-1, 1] """ model_config = yaml.load(raw_model_config, Loader=yaml.FullLoader) data_config = get_random_data() model, metadata = build_model( name="model-name", model_config=model_config, data_config=data_config, metadata={}, ) self.metadata_check(metadata, False)
def test_output_dir(self): """ Test building of model will create subdirectories for model saving if needed. """ from gordo_components.builder import build_model with TemporaryDirectory() as tmpdir: model_config = { "gordo_components.model.models.KerasAutoEncoder": { "kind": "feedforward_hourglass" } } data_config = get_random_data() output_dir = os.path.join(tmpdir, "some", "sub", "directories") model, metadata = build_model(model_config=model_config, data_config=data_config, metadata={}) self.metadata_check(metadata, True) _save_model_for_workflow(model=model, metadata=metadata, output_dir=output_dir) # Assert the model was saved at the location # using gordo_components.serializer should create some subdir(s) # which start with 'n_step' dirs = [ d for d in os.listdir(output_dir) if d.startswith("n_step") ] self.assertGreaterEqual( len(dirs), 1, msg="Expected saving of model to create at " f"least one subdir, but got {len(dirs)}", )