def test_build_use_registry(self): """ Using a registry causes the second build of a model to return the path to the first. """ with tempfile.TemporaryDirectory() as tmpdir: with temp_env_vars( OUTPUT_DIR=os.path.join(tmpdir, "dir1"), DATA_CONFIG=DATA_CONFIG, MODEL_CONFIG=json.dumps(MODEL_CONFIG), MODEL_REGISTER_DIR=tmpdir + "/reg", ): result1 = self.runner.invoke(cli.gordo, ["build"]) self.assertEqual(result1.exit_code, 0, msg=f"Command failed: {result1}") with open("/tmp/model-location.txt") as f: first_path = f.read() # OUTPUT_DIR is the only difference with temp_env_vars( OUTPUT_DIR=os.path.join(tmpdir, "dir2"), DATA_CONFIG=DATA_CONFIG, MODEL_CONFIG=json.dumps(MODEL_CONFIG), MODEL_REGISTER_DIR=tmpdir + "/reg", ): result2 = self.runner.invoke(cli.gordo, ["build"]) self.assertEqual(result2.exit_code, 0, msg=f"Command failed: {result2}") with open("/tmp/model-location.txt") as f: second_path = f.read() assert first_path == second_path
def test_build_cv_mode_cross_val_cache( tmpdir, should_save_model: bool, cv_mode_1: str, cv_mode_2: str, runner: CliRunner, machine: Machine, ): """ Checks that cv_scores uses cache if ran after a full build. Loads the same model, and can print the cv_scores from them. """ logger.info(f"MODEL_CONFIG={json.dumps(machine.model)}") machine.evaluation = cv_mode_1 # type: ignore with temp_env_vars(MACHINE=json.dumps(machine.to_dict()), OUTPUT_DIR=str(tmpdir)): runner.invoke(cli.gordo, ["build"]) machine.evaluation = cv_mode_2 # type: ignore with temp_env_vars(MACHINE=json.dumps(machine.to_dict()), OUTPUT_DIR=str(tmpdir)): runner.invoke(cli.gordo, ["build"]) if should_save_model: assert len(os.listdir(tmpdir)) > 0 else: assert len(os.listdir(tmpdir)) == 0
def test_build_use_registry(runner, tmpdir, machine): """ Using a registry causes the second build of a model to copy the first to the new location. """ output_dir_1 = os.path.join(tmpdir, "dir1") output_dir_2 = os.path.join(tmpdir, "dir2") with temp_env_vars( MACHINE=json.dumps(machine.to_dict()), OUTPUT_DIR=output_dir_1, MODEL_REGISTER_DIR=os.path.join(tmpdir, "reg"), ): result1 = runner.invoke(cli.gordo, ["build"]) assert result1.exit_code == 0, f"Command failed: {result1}" # OUTPUT_DIR is the only difference with temp_env_vars( MACHINE=json.dumps(machine.to_dict()), OUTPUT_DIR=output_dir_2, MODEL_REGISTER_DIR=os.path.join(tmpdir, "reg"), ): result2 = runner.invoke(cli.gordo, ["build"]) assert result2.exit_code == 0, f"Command failed: {result2}" first_metadata = serializer.load_metadata(output_dir_1) second_metadata = serializer.load_metadata(output_dir_2) # The metadata contains the model build date, so if it got rebuilt these two # would be different assert (first_metadata["metadata"]["build_metadata"]["model"] ["model_creation_date"] == second_metadata["metadata"] ["build_metadata"]["model"]["model_creation_date"])
def test_mlflow_reporter_set_cli_build( MockClient, mock_get_workspace_kwargs, mock_get_spauth_kwargs, monkeypatch, runner, tmpdir, machine, ): """ Tests disabling MlFlow logging in cli, and missing env var when enabled """ mlflow.set_tracking_uri(f"file:{tmpdir}") machine.runtime = dict( reporters=[{ "gordo.reporters.mlflow.MlFlowReporter": dict() }]) with temp_env_vars(MACHINE=json.dumps(machine.to_dict()), OUTPUT_DIR=str(tmpdir)): # Logging enabled, without env vars set: # Raise error result = runner.invoke(cli.gordo, ["build"]) result.exit_code != 0 # Logging enabled, with env vars set: # Build success, remote logging executed with monkeypatch.context() as m: m.setenv("DL_SERVICE_AUTH_STR", "test:test:test") m.setenv("AZUREML_WORKSPACE_STR", "test:test:test") result = runner.invoke(cli.gordo, ["build"]) result.exit_code == 1 assert MockClient.called assert mock_get_workspace_kwargs.called assert mock_get_spauth_kwargs.called # Reset call counts for m in [ MockClient, mock_get_workspace_kwargs, mock_get_spauth_kwargs ]: m.reset_mock() # Logging not enabled: # Build success, remote logging not executed machine.runtime = dict(builder=dict(remote_logging=dict(enable=False))) with temp_env_vars(MACHINE=json.dumps(machine.to_dict()), OUTPUT_DIR=str(tmpdir)): result = runner.invoke(cli.gordo, ["build"]) result.exit_code == 0 assert not MockClient.called assert not mock_get_workspace_kwargs.called assert not mock_get_spauth_kwargs.called
def test_build_cv_mode(tmpdir, runner: CliRunner, should_save_model: bool, cv_mode: str, machine: Machine): """ Testing build with cv_mode set to full and cross_val_only. Checks that cv_scores are printed and model are only saved when using the default (full) value. """ machine.model = MODEL_CONFIG_WITH_PREDICT machine.evaluation = cv_mode # type: ignore logger.info(f"MODEL_CONFIG={json.dumps(machine.model)}") tmp_model_dir = os.path.join(tmpdir, "tmp") os.makedirs(tmp_model_dir, exist_ok=True) with temp_env_vars(MACHINE=json.dumps(machine.to_dict()), OUTPUT_DIR=tmp_model_dir): result = runner.invoke(cli.gordo, ["build", "--print-cv-scores"]) assert result.exit_code == 0 # Checks that the file is empty or not depending on the mode. if should_save_model: assert len(os.listdir(tmp_model_dir)) != 0 else: assert len(os.listdir(tmp_model_dir)) == 0 # Checks the output contains 'explained-variance_raw-scores' assert "r2-score" in result.output assert "mean-squared-error" in result.output assert "mean-absolute-error" in result.output assert "explained-variance-score" in result.output
def test_list_revisions_listdir_fail(caplog): """ Verify the server will not fail if listing directories above the current model collection directory it has, fails. """ def listdir_fail(*args, **kwargs): raise FileNotFoundError() expected_revision = "some-project-revision-123" with patch.object(os, "listdir", side_effect=listdir_fail) as mocked_listdir: with caplog.at_level(logging.CRITICAL): with tu.temp_env_vars(MODEL_COLLECTION_DIR=expected_revision): app = server.build_app({"ENABLE_PROMETHEUS": False}) app.testing = True client = app.test_client() resp = client.get("/gordo/v0/test-project/revisions") assert mocked_listdir.called_once() assert set( resp.json.keys()) == {"latest", "available-revisions", "revision"} assert resp.json["latest"] == expected_revision assert isinstance(resp.json["available-revisions"], list) assert resp.json["available-revisions"] == [expected_revision]
def test_list_revisions(tmpdir, revisions: List[str]): """ Verify the server is capable of returning the project revisions it's capable of serving. """ # Server gets the 'latest' directory to serve models from, but knows other # revisions should be available a step up from this directory. model_dir = os.path.join(tmpdir, revisions[0]) # Make revision directories under the tmpdir [os.mkdir(os.path.join(tmpdir, rev)) for rev in revisions] # type: ignore # Request from the server what revisions it can serve, should match with tu.temp_env_vars(MODEL_COLLECTION_DIR=model_dir): app = server.build_app({"ENABLE_PROMETHEUS": False}) app.testing = True client = app.test_client() resp = client.get("/gordo/v0/test-project/revisions") resp_with_revision = client.get( f"/gordo/v0/test-project/revisions?revision={revisions[-1]}") assert set( resp.json.keys()) == {"latest", "available-revisions", "revision"} assert resp.json["latest"] == os.path.basename(model_dir) assert resp.json["revision"] == os.path.basename(model_dir) assert isinstance(resp.json["available-revisions"], list) assert set(resp.json["available-revisions"]) == set(revisions) # And the request asking to use a specific revision gives back that revision, # but will return the expected latest available assert resp_with_revision.json["latest"] == os.path.basename(model_dir) assert resp_with_revision.json["revision"] == revisions[-1]
def test_build_cv_mode_build_only(tmp_dir: tempfile.TemporaryDirectory): """ Testing build with cv_mode set to build_only. Checks that OUTPUT_DIR gets a model saved to it. It also checks that the metadata contains cv-duration-sec=None and cv-scores={} """ runner = CliRunner() logger.info(f"MODEL_CONFIG={json.dumps(MODEL_CONFIG)}") with temp_env_vars( MODEL_NAME="model-name", OUTPUT_DIR=tmp_dir.name, DATA_CONFIG=DATA_CONFIG, MODEL_CONFIG=json.dumps(MODEL_CONFIG), ): metadata_file = f"{os.path.join(tmp_dir.name, 'metadata.json')}" runner.invoke( cli.gordo, ["build", '--evaluation-config={"cv_mode": "build_only"}'] ) # A model has been saved assert len(os.listdir(tmp_dir.name)) != 0 with open(metadata_file) as f: metadata_json = json.loads(f.read()) assert metadata_json["model"]["cross-validation"]["cv-duration-sec"] is None assert metadata_json["model"]["cross-validation"]["scores"] == {}
def test_build_cv_mode_cross_val_cache( should_be_equal: bool, cv_mode_1: str, cv_mode_2: str, tmp_dir: tempfile.TemporaryDirectory, ): """ Checks that cv_scores uses cache if ran after a full build. Loads the same model, and can print the cv_scores from them. """ runner = CliRunner() logger.info(f"MODEL_CONFIG={json.dumps(MODEL_CONFIG)}") model_register_dir = f"{os.path.join(tmp_dir.name, 'reg')}" with temp_env_vars( MODEL_NAME="model-name", OUTPUT_DIR=tmp_dir.name, DATA_CONFIG=DATA_CONFIG, MODEL_CONFIG=json.dumps(MODEL_CONFIG), MODEL_REGISTER_DIR=model_register_dir, ): runner.invoke(cli.gordo, ["build", f"--evaluation-config={cv_mode_1}"]) runner.invoke(cli.gordo, ["build", f"--evaluation-config={cv_mode_2}"]) if should_be_equal: assert os.path.exists(model_register_dir) else: assert not os.path.exists(model_register_dir)
def test_build_cv_mode( should_save_model: bool, cv_mode: str, tmp_dir: tempfile.TemporaryDirectory ): """ Testing build with cv_mode set to full and cross_val_only. Checks that cv_scores are printed and model are only saved when using the default (full) value. """ runner = CliRunner() logger.info(f"MODEL_CONFIG={json.dumps(MODEL_CONFIG_WITH_PREDICT)}") with temp_env_vars( MODEL_NAME="model-name", OUTPUT_DIR=tmp_dir.name, DATA_CONFIG=DATA_CONFIG, MODEL_CONFIG=json.dumps(MODEL_CONFIG_WITH_PREDICT), ): result = runner.invoke( cli.gordo, ["build", "--print-cv-scores", f"--evaluation-config={cv_mode}"] ) # Checks that the file is empty or not depending on the mode. if should_save_model: assert len(os.listdir(tmp_dir.name)) != 0 else: assert len(os.listdir(tmp_dir.name)) == 0 # Checks the output contains 'explained-variance_raw-scores' assert "r2-score" in result.output assert "mean-squared-error" in result.output assert "mean-absolute-error" in result.output assert "explained-variance-score" in result.output
def test_build_env_args(self): """ Instead of passing OUTPUT_DIR directly to CLI, should be able to read environment variables """ logger.info(f"MODEL_CONFIG={json.dumps(MODEL_CONFIG)}") with tempfile.TemporaryDirectory() as tmpdir: with temp_env_vars( MODEL_NAME="model-name", OUTPUT_DIR=tmpdir, DATA_CONFIG=DATA_CONFIG, MODEL_CONFIG=json.dumps(MODEL_CONFIG), ): result = self.runner.invoke(cli.gordo, ["build"]) self.assertEqual(result.exit_code, 0, msg=f"Command failed: {result}") self.assertTrue( os.path.exists("/tmp/model-location.txt"), msg= 'Building was supposed to create a "model-location.txt", but it did not!', )
def test_build_use_registry_bust_cache(self): """ Even using a registry we get separate model-paths when we ask for models for different configurations. """ with tempfile.TemporaryDirectory() as tmpdir: output_dir_1 = os.path.join(tmpdir, "dir1") output_dir_2 = os.path.join(tmpdir, "dir2") with temp_env_vars( MODEL_NAME="model-name", OUTPUT_DIR=output_dir_1, DATA_CONFIG=DATA_CONFIG, MODEL_CONFIG=json.dumps(MODEL_CONFIG), MODEL_REGISTER_DIR=tmpdir + "/reg", ): result1 = self.runner.invoke(cli.gordo, ["build"]) self.assertEqual(result1.exit_code, 0, msg=f"Command failed: {result1}") with temp_env_vars( MODEL_NAME="model-name", OUTPUT_DIR=output_dir_2, # NOTE: Different train dates! DATA_CONFIG=( "{" ' "type": "RandomDataset",' ' "train_start_date": "2019-01-01T00:00:00+00:00", ' ' "train_end_date": "2019-06-01T00:00:00+00:00",' ' "tags": ["TRC1", "TRC2"],' "}" ), MODEL_CONFIG=json.dumps(MODEL_CONFIG), MODEL_REGISTER_DIR=tmpdir + "/reg", ): result2 = self.runner.invoke(cli.gordo, ["build"]) self.assertEqual(result2.exit_code, 0, msg=f"Command failed: {result2}") first_metadata = serializer.load_metadata(output_dir_1) second_metadata = serializer.load_metadata(output_dir_2) # The metadata contains the model build date, so if it got rebuilt these two # would be different assert ( first_metadata["model"]["model-creation-date"] != second_metadata["model"]["model-creation-date"] )
def gordo_ml_server_client(request, trained_model_directory): with tu.temp_env_vars(MODEL_COLLECTION_DIR=trained_model_directory): app = server.build_app() app.testing = True yield app.test_client()
def test_model_list_view_non_existant_proj(): with tu.temp_env_vars(MODEL_COLLECTION_DIR=os.path.join("does", "not", "exist")): app = server.build_app() app.testing = True client = app.test_client() resp = client.get("/gordo/v0/test-project/models") assert resp.status_code == 200 assert resp.json["models"] == []
def gordo_ml_server_client(request, trained_model_directory): with tu.temp_env_vars(MODEL_LOCATION=trained_model_directory): app = server.build_app(data_provider=request.param) app.testing = True yield app.test_client()
def test_build_use_registry_bust_cache(self): """ Even using a registry we get separate model-paths when we ask for models for different configurations. """ with tempfile.TemporaryDirectory() as tmpdir: with temp_env_vars( MODEL_NAME="model-name", OUTPUT_DIR=os.path.join(tmpdir, "dir1"), DATA_CONFIG=DATA_CONFIG, MODEL_CONFIG=json.dumps(MODEL_CONFIG), MODEL_REGISTER_DIR=tmpdir + "/reg", ): result1 = self.runner.invoke(cli.gordo, ["build"]) self.assertEqual(result1.exit_code, 0, msg=f"Command failed: {result1}") with open("/tmp/model-location.txt") as f: first_path = f.read() with temp_env_vars( MODEL_NAME="model-name", OUTPUT_DIR=os.path.join(tmpdir, "dir2"), # NOTE: Different train dates! DATA_CONFIG=( "{" ' "type": "RandomDataset",' ' "train_start_date": "2019-01-01T00:00:00+00:00", ' ' "train_end_date": "2019-06-01T00:00:00+00:00",' ' "tags": ["TRC1", "TRC2"]' "}"), MODEL_CONFIG=json.dumps(MODEL_CONFIG), MODEL_REGISTER_DIR=tmpdir + "/reg", ): result2 = self.runner.invoke(cli.gordo, ["build"]) self.assertEqual(result2.exit_code, 0, msg=f"Command failed: {result2}") with open("/tmp/model-location.txt") as f: second_path = f.read() assert first_path != second_path
def test_request_specific_revision(trained_model_directory, tmpdir, revisions): model_name = "test-model" current_revision = revisions[0] collection_dir = os.path.join(tmpdir, current_revision) # Copy trained model into revision model folders for revision in revisions: model_dir = os.path.join(tmpdir, revision, model_name) shutil.copytree(trained_model_directory, model_dir) # Now overwrite the metadata.json file to ensure the server actually reads # the metadata for this specific revision metadata_file = os.path.join(model_dir, "metadata.json") assert os.path.isfile(metadata_file) with open(metadata_file, "w") as fp: json.dump({"revision": revision, "model": model_name}, fp) with tu.temp_env_vars(MODEL_COLLECTION_DIR=collection_dir): app = server.build_app({"ENABLE_PROMETHEUS": False}) app.testing = True client = app.test_client() for revision in revisions: resp = client.get( f"/gordo/v0/test-project/{model_name}/metadata?revision={revision}" ) assert resp.status_code == 200 assert resp.json["revision"] == revision # Verify the server read the metadata.json file we had overwritten assert resp.json["metadata"] == { "revision": revision, "model": model_name } # Asking for a revision which doesn't exist gives a 410 Gone. resp = client.get( f"/gordo/v0/test-project/{model_name}/metadata?revision=does-not-exist" ) assert resp.status_code == 410 assert resp.json == { "error": "Revision 'does-not-exist' not found.", "revision": "does-not-exist", } # Again but by setting header, to ensure we also check the header resp = client.get( f"/gordo/v0/test-project/{model_name}/metadata", headers={"revision": "does-not-exist"}, ) assert resp.status_code == 410 assert resp.json == { "error": "Revision 'does-not-exist' not found.", "revision": "does-not-exist", }
def test_build_model_with_parameters(self): """ It works to build a simple model with parameters set """ model = """ { "sklearn.decomposition.pca.PCA": { "svd_solver": "{{svd_solver}}", "n_components": {{n_components}} } } """ svd_solver = "auto" n_components = 0.5 logger.info(f"MODEL_CONFIG={json.dumps(model)}") with tempfile.TemporaryDirectory() as tmpdir: with temp_env_vars( MODEL_NAME="model-name", OUTPUT_DIR=tmpdir, DATA_CONFIG=DATA_CONFIG, MODEL_CONFIG=model, ): location_file = f"{os.path.join(tmpdir, 'special-model-location.txt')}" args = [ "build", "--model-parameter", f"svd_solver,{svd_solver}", "--model-parameter", f"n_components,{n_components}", "--model-location-file", location_file, ] # Run it twice to ensure the model location in the location file # is only written once and not appended. for _ in range(2): result = self.runner.invoke(cli.gordo, args=args) self.assertEqual(result.exit_code, 0, msg=f"Command failed: {result}") self.assertTrue( os.path.exists(location_file), msg= f'Building was supposed to create a model location file at "{location_file}", but it did not!', ) with open(location_file, "r") as f: assert f.read() == tmpdir
def gordo_ml_server_client( request, model_collection_directory, trained_model_directory ): with tu.temp_env_vars(MODEL_COLLECTION_DIR=model_collection_directory): app = server.build_app() app.testing = True # always return a valid asset for any tag name with patch.object(sensor_tag, "_asset_from_tag_name", return_value="default"): yield app.test_client()
def test_server_version_route(model_collection_directory, gordo_revision): """ Simple route which returns the current version """ with tu.temp_env_vars(MODEL_COLLECTION_DIR=model_collection_directory): app = server.build_app() app.testing = True client = app.test_client() resp = client.get("/server-version") assert resp.status_code == 200 assert resp.json == {"revision": gordo_revision, "version": __version__}
def test_build_use_registry(self): """ Using a registry causes the second build of a model to copy the first to the new location. """ with tempfile.TemporaryDirectory() as tmpdir: output_dir_1 = os.path.join(tmpdir, "dir1") output_dir_2 = os.path.join(tmpdir, "dir2") with temp_env_vars( MODEL_NAME="model-name", OUTPUT_DIR=output_dir_1, DATA_CONFIG=DATA_CONFIG, MODEL_CONFIG=json.dumps(MODEL_CONFIG), MODEL_REGISTER_DIR=tmpdir + "/reg", ): result1 = self.runner.invoke(cli.gordo, ["build"]) self.assertEqual(result1.exit_code, 0, msg=f"Command failed: {result1}") # OUTPUT_DIR is the only difference with temp_env_vars( MODEL_NAME="model-name", OUTPUT_DIR=output_dir_2, DATA_CONFIG=DATA_CONFIG, MODEL_CONFIG=json.dumps(MODEL_CONFIG), MODEL_REGISTER_DIR=tmpdir + "/reg", ): result2 = self.runner.invoke(cli.gordo, ["build"]) self.assertEqual(result2.exit_code, 0, msg=f"Command failed: {result2}") first_metadata = serializer.load_metadata(output_dir_1) second_metadata = serializer.load_metadata(output_dir_2) # The metadata contains the model build date, so if it got rebuilt these two # would be different assert ( first_metadata["model"]["model-creation-date"] == second_metadata["model"]["model-creation-date"] )
def test_build_use_registry_bust_cache(runner, tmpdir, machine): """ Even using a registry we get separate model-paths when we ask for models for different configurations. """ output_dir_1 = os.path.join(tmpdir, "dir1") output_dir_2 = os.path.join(tmpdir, "dir2") with temp_env_vars( MACHINE=json.dumps(machine.to_dict()), OUTPUT_DIR=output_dir_1, MODEL_REGISTER_DIR=os.path.join(tmpdir, "reg"), ): result1 = runner.invoke(cli.gordo, ["build"]) assert result1.exit_code == 0, f"Command failed: {result1}" # NOTE: Different train dates! machine.dataset = machine.dataset.from_dict({ "type": "RandomDataset", "train_start_date": "2019-01-01T00:00:00+00:00", "train_end_date": "2019-06-01T00:00:00+00:00", "tags": ["TRC1", "TRC2"], }) with temp_env_vars( MACHINE=json.dumps(machine.to_dict()), OUTPUT_DIR=output_dir_2, MODEL_REGISTER_DIR=os.path.join(tmpdir, "reg"), ): result2 = runner.invoke(cli.gordo, ["build"]) assert result2.exit_code == 0, f"Command failed: {result2}" first_metadata = serializer.load_metadata(output_dir_1) second_metadata = serializer.load_metadata(output_dir_2) # The metadata contains the model build date, so if it got rebuilt these two # would be different assert (first_metadata["metadata"]["build_metadata"]["model"] ["model_creation_date"] != second_metadata["metadata"] ["build_metadata"]["model"]["model_creation_date"])
def test_non_existant_model_metadata(tmpdir, gordo_project, api_version): """ Simple route which returns the current version """ with tu.temp_env_vars(MODEL_COLLECTION_DIR=str(tmpdir)): app = server.build_app({"ENABLE_PROMETHEUS": False}) app.testing = True client = app.test_client() resp = client.get( f"/gordo/{api_version}/{gordo_project}/model-does-not-exist/metadata" ) assert resp.status_code == 404
def test_build_model_with_parameters(self): """ It works to build a simple model with parameters set """ model = """ { "sklearn.decomposition.pca.PCA": { "svd_solver": "{{svd_solver}}", "n_components": {{n_components}} } } """ svd_solver = "auto" n_components = 0.5 logger.info(f"MODEL_CONFIG={json.dumps(model)}") with tempfile.TemporaryDirectory() as tmpdir: with temp_env_vars( MODEL_NAME="model-name", OUTPUT_DIR=tmpdir, DATA_CONFIG=DATA_CONFIG, MODEL_CONFIG=model, ): args = [ "build", "--model-parameter", f"svd_solver,{svd_solver}", "--model-parameter", f"n_components,{n_components}", ] # Run it twice to ensure the model location in the location file # is only written once and not appended. for _ in range(2): result = self.runner.invoke(cli.gordo, args=args) self.assertEqual( result.exit_code, 0, msg=f"Command failed: {result}" ) self.assertGreater( len(os.listdir(tmpdir)), 1, msg="Building was supposed to create at least two files (" "model and metadata) in OUTPUT_DIR, but it did not!", )
def test_build_env_args(runner, tmpdir, machine): """ Instead of passing OUTPUT_DIR directly to CLI, should be able to read environment variables """ logger.info(f"MODEL_CONFIG={json.dumps(MODEL_CONFIG)}") with temp_env_vars(MACHINE=json.dumps(machine.to_dict()), OUTPUT_DIR=str(tmpdir)): result = runner.invoke(cli.gordo, ["build"]) assert result.exit_code == 0, f"Command failed: {result}, {result.exception}" assert ( len(os.listdir(tmpdir)) > 1 ), "Building was supposed to create at least two files (model and metadata) in OUTPUT_DIR, but it did not!"
def test_expected_models_route(tmpdir): """ Route that gives back the expected models names, which are just read from the 'EXPECTED_MODELS' env var. """ with tu.temp_env_vars( MODEL_COLLECTION_DIR=str(tmpdir), EXPECTED_MODELS=json.dumps(["model-a", "model-b"]), ): app = server.build_app({"ENABLE_PROMETHEUS": False}) app.testing = True client = app.test_client() resp = client.get("/gordo/v0/test-project/expected-models") assert resp.json["expected-models"] == ["model-a", "model-b"]
def test_models_by_revision_list_view(caplog, tmpdir, revision_to_models): """ Server returns expected models it can serve under specific revisions. revision_to_models: Dict[str, Tuple[str, ...]] Map revision codes to models belonging to that revision. Simulate serving some revision, but having access to other revisions and its models. """ # Current collection dir for the server, order isn't important. if revision_to_models: collection_dir = os.path.join(tmpdir, list(revision_to_models.keys())[0]) else: # This will cause a failure to look up a certain revision collection_dir = str(tmpdir) # Make all the revision and model subfolders for revision in revision_to_models.keys(): os.mkdir(os.path.join(tmpdir, revision)) for model in revision_to_models[revision]: os.makedirs(os.path.join(tmpdir, revision, model), exist_ok=True) with tu.temp_env_vars(MODEL_COLLECTION_DIR=collection_dir): app = server.build_app({"ENABLE_PROMETHEUS": False}) app.testing = True client = app.test_client() for revision in revision_to_models: resp = client.get( f"/gordo/v0/test-project/models?revision={revision}") assert resp.status_code == 200 assert "models" in resp.json assert sorted(resp.json["models"]) == sorted( revision_to_models[revision]) else: # revision_to_models is empty, so there is nothing on the server. # Test that asking for some arbitrary revision will give a 404 and error message resp = client.get( f"/gordo/v0/test-project/models?revision=revision-does-not-exist" ) assert resp.status_code == 410 assert resp.json == { "error": "Revision 'revision-does-not-exist' not found.", "revision": "revision-does-not-exist", }
def test_build_exit_code(exception, exit_code, runner, tmpdir, machine): """ Test that cli build exists with different exit codes for different errors. """ machine.model = MODEL_CONFIG_WITH_PREDICT machine.evaluation = {"cv_mode": "cross_val_only"} logger.info(f"MODEL_CONFIG={json.dumps(machine.model)}") with mock.patch( "gordo.cli.cli.ModelBuilder.build", mock.MagicMock(side_effect=exception, autospec=True, return_value=None), ): with temp_env_vars(MACHINE=json.dumps(machine.to_dict()), OUTPUT_DIR=str(tmpdir)): result = runner.invoke(cli.gordo, ["build"]) assert result.exit_code == exit_code
def test_build_model_with_parameters(runner, tmpdir, machine): """ It works to build a simple model with parameters set """ machine._strict = False machine.model = """ { "sklearn.decomposition.pca.PCA": { "svd_solver": "{{svd_solver}}", "n_components": {{n_components}} } } """ svd_solver = "auto" n_components = 0.5 logger.info(f"MODEL_CONFIG={json.dumps(machine.model)}") with temp_env_vars(MACHINE=json.dumps(machine.to_dict()), OUTPUT_DIR=str(tmpdir)): args = [ "build", "--model-parameter", f"svd_solver,{svd_solver}", "--model-parameter", f"n_components,{n_components}", ] # Run it twice to ensure the model location in the location file # is only written once and not appended. for _ in range(2): result = runner.invoke(cli.gordo, args=args) assert (result.exit_code == 0 ), f"Command failed: {result}, {result.exception}" assert ( len(os.listdir(tmpdir)) > 1 ), "Building was supposed to create at least two files (model and metadata) in OUTPUT_DIR, but it did not!"
def test_build_cv_mode_build_only(tmpdir, runner: CliRunner, machine: Machine): """ Testing build with cv_mode set to build_only. Checks that OUTPUT_DIR gets a model saved to it. It also checks that the metadata contains cv-duration-sec=None and cv-scores={} """ logger.info(f"MODEL_CONFIG={json.dumps(machine.model)}") machine.evaluation = {"cv_mode": "build_only"} with temp_env_vars(MACHINE=json.dumps(machine.to_dict()), OUTPUT_DIR=str(tmpdir)): metadata_file = f"{os.path.join(tmpdir, 'metadata.json')}" runner.invoke(cli.gordo, ["build"]) # A model has been saved assert len(os.listdir(tmpdir)) != 0 with open(metadata_file) as f: metadata_json = json.loads(f.read()) assert (metadata_json["metadata"]["build_metadata"]["model"] ["cross_validation"]["cv_duration_sec"] is None) assert (metadata_json["metadata"]["build_metadata"]["model"] ["cross_validation"]["scores"] == {})