def test_download_and_save_from_mlflow(self, tmp_path):
     saving_path = Path(tmp_path, "file.txt")
     with requests_mock.Mocker() as mocked_req:
         mocked_req.get("http://test-tracking-uri:8080/get-artifact?path=full_model.pkl&run_uuid=123-abc",
                        text="Hello World")
         ModelCache.download_and_save_from_ml_flow(saving_path, "123-abc")
         assert saving_path.read_text() == "Hello World"
    def test_get_latest_model_no_latest(self, tmp_path):
        cache = ModelCache(tmp_path)

        def get_search_return_values(experiment_ids, *args, **kwargs):
            df = pd.DataFrame(columns=list(cache.columns_of_interest.keys()), data=[{
                'run_id': "123",
                'tags.mlflow.runName': '1',
                'end_time': datetime(2020, 8, 29, 8, 0, 0),
                'params.MLPipelineParamsName': 'default',
                'params.FeatureSetName': 'default',
                'params.AlgorithmName': 'default',
                'params.AlgorithmParamsName': 'default',
                'tags.DidPassAcceptanceTest': 'no'
            }])
            df["end_time"] = pd.to_datetime(df["end_time"])
            return df

        def get_experiment_id(scenario, *args, **kwargs):
            if scenario == "houses":
                return None
            else:
                return SimpleNamespace(**{'experiment_id': "id_" + scenario})

        model_as_bytes = self.get_sample_model_path().read_bytes()
        with requests_mock.Mocker() as mocked_req:
            mocked_req.get("http://test-tracking-uri:8080/get-artifact?path=full_model.pkl&run_uuid=123",
                           content=model_as_bytes)
            mlflow.get_experiment_by_name = MagicMock(side_effect=get_experiment_id)
            mlflow.search_runs = MagicMock(side_effect=get_search_return_values)
            loaded_model = cache.get_loaded_model_for_scenario_and_run_id("groceries", "latest")
            assert loaded_model is None
    def test_return_a_couple_models(self):
        cache = ModelCache()

        def get_search_return_values(experiment_ids, *args, **kwargs):
            df = pd.DataFrame(columns=list(cache.columns_of_interest.keys()),
                              data=[{
                                  'run_id': "123",
                                  'tags.mlflow.runName': 'my_run',
                                  'tags.mlflow.BuildNumber': '4',
                                  'end_time': datetime(2020, 8, 29, 8, 0, 0),
                                  'params.MLPipelineParamsName': 'default',
                                  'params.FeatureSetName': 'default',
                                  'params.AlgorithmName': 'default',
                                  'params.AlgorithmParamsName': 'default',
                                  'tags.DidPassAcceptanceTest': 'no'
                              }, {
                                  'run_id': "456",
                                  'tags.mlflow.runName': 'my_second_run',
                                  'tags.mlflow.BuildNumber': '5',
                                  'end_time': datetime(2020, 8, 29, 9, 0, 0),
                                  'params.MLPipelineParamsName': 'default',
                                  'params.FeatureSetName': 'default',
                                  'params.AlgorithmName': 'default',
                                  'params.AlgorithmParamsName': 'default',
                                  'tags.DidPassAcceptanceTest': 'yes'
                              }])
            df["end_time"] = pd.to_datetime(df["end_time"])
            return df

        def get_experiment_id(scenario, *args, **kwargs):
            if scenario == "houses":
                return None
            else:
                return SimpleNamespace(**{'experiment_id': "id_" + scenario})

        mlflow.get_experiment_by_name = MagicMock(
            side_effect=get_experiment_id)
        mlflow.search_runs = MagicMock(side_effect=get_search_return_values)

        available_models = cache.list_available_models_from_ml_flow()
        assert set(available_models.keys()) == {"groceries", "iris"}
        assert len(available_models["groceries"]) == 2
        assert [x["run_id"]
                for x in available_models["groceries"]] == ['123', '456']
        assert [
            x["is_latest"] for x in available_models["groceries"]
            if x["run_id"] == "456"
        ][0]
        assert not [
            x["is_latest"]
            for x in available_models["groceries"] if x["run_id"] == "123"
        ][0]
    def test_list_no_models_available(self):
        cache = ModelCache()

        def get_search_return_values(experiment_ids, *args, **kwargs):
            df = pd.DataFrame(columns=list(cache.columns_of_interest.keys()))
            df["end_time"] = pd.to_datetime(df["end_time"])
            return df

        def get_experiment_id(scenario, *args, **kwargs):
            return SimpleNamespace(**{'experiment_id': "id_" + scenario})

        mlflow.get_experiment_by_name = MagicMock(side_effect=get_experiment_id)
        mlflow.search_runs = MagicMock(side_effect=get_search_return_values)

        available_models = cache.list_available_models_from_ml_flow()
        assert available_models == {"groceries": [], "houses": []}
 def test_is_not_latest_deployable_model(self):
     row = {
         "ml_pipeline_params_name": 'default',
         "feature_set_name": 'default',
         "algorithm_name": 'my_params',
         "algorithm_params_name": 'default',
         "passed_acceptance_test": 'yes'
     }
     assert not ModelCache.is_latest_deployable_model(row)
Ejemplo n.º 6
0
"""
Web app
"""
import logging

import requests
from flask import Flask, request, render_template

from cd4ml.logger.fluentd_logging import FluentdLogger
from cd4ml.webapp.model_cache import ModelCache
from cd4ml.webapp.webapp_data_scorer import get_form_from_model

logger = logging.getLogger(__name__)
fluentd_logger = FluentdLogger()
cache = ModelCache()

ERROR_NO_AVAILABLE_LATEST_MODEL = "No model is available for the latest scenario {}. " \
                                  "Please re-run jenkins and reload this page"
ERROR_NO_MODEL_AT_LOCATION = "No model is available for this identifier scenario {}. " \
                             "Please navigate back to the model picker and try again."

app = Flask(__name__,
            template_folder='webapp/templates',
            static_folder='webapp/static')


def make_page_for_scenario_and_identifier(scenario_name, identifier, request_data):
    model = cache.get_loaded_model_for_scenario_and_run_id(scenario_name, identifier)
    header_text, form_div, prediction = get_form_from_model(scenario_name, identifier, model,
                                                            initial_values=request_data)
    return header_text, form_div, prediction