def test_with_managed_run_sets_specified_run_tags():
    client = MlflowClient()
    tags_to_set = {
        "foo": "bar",
        "num_layers": "7",
    }

    patch_function_1 = with_managed_run(
        lambda original, *args, **kwargs: mlflow.active_run(),
        tags=tags_to_set)
    run1 = patch_function_1(lambda: "foo")
    assert tags_to_set.items() <= client.get_run(
        run1.info.run_id).data.tags.items()

    class PatchFunction2(PatchFunction):
        def _patch_implementation(self, original, *args, **kwargs):
            return mlflow.active_run()

        def _on_exception(self, exception):
            pass

    patch_function_2 = with_managed_run(PatchFunction2, tags=tags_to_set)
    run2 = patch_function_2.call(lambda: "foo")
    assert tags_to_set.items() <= client.get_run(
        run2.info.run_id).data.tags.items()
Example #2
0
def test_with_managed_run_with_throwing_class_exhibits_expected_behavior():
    client = MlflowClient()
    patch_function_active_run = None

    @with_managed_run
    class TestPatch(PatchFunction):
        def _patch_implementation(self, original, *args, **kwargs):
            nonlocal patch_function_active_run
            patch_function_active_run = mlflow.active_run()
            raise Exception("bad implementation")

        def _on_exception(self, exception):
            pass

    with pytest.raises(Exception):
        TestPatch.call(lambda: "foo")

    assert patch_function_active_run is not None
    status1 = client.get_run(patch_function_active_run.info.run_id).info.status
    assert RunStatus.from_string(status1) == RunStatus.FAILED

    with mlflow.start_run() as active_run, pytest.raises(Exception):
        TestPatch.call(lambda: "foo")
        assert patch_function_active_run == active_run
        # `with_managed_run` should not terminate a preexisting MLflow run,
        # even if the patch function throws
        status2 = client.get_run(active_run.info.run_id).info.status
        assert RunStatus.from_string(status2) == RunStatus.FINISHED
def _set_run_tag(run_id, path, version, data_format):
    client = MlflowClient()
    table_info_string = _get_table_info_string(path, version, data_format)
    existing_run = client.get_run(run_id)
    existing_tag = existing_run.data.tags.get(_SPARK_TABLE_INFO_TAG_NAME)
    new_table_info = _merge_tag_lines(existing_tag, table_info_string)
    client.set_tag(run_id, _SPARK_TABLE_INFO_TAG_NAME, new_table_info)
def test_safe_patch_manages_run_if_specified_and_sets_expected_run_tags(
    patch_destination, test_autologging_integration
):
    client = MlflowClient()
    active_run = None

    def patch_impl(original, *args, **kwargs):
        nonlocal active_run
        active_run = mlflow.active_run()
        return original(*args, **kwargs)

    with mock.patch(
        "mlflow.utils.autologging_utils.safety.with_managed_run", wraps=with_managed_run
    ) as managed_run_mock:
        safe_patch(
            test_autologging_integration, patch_destination, "fn", patch_impl, manage_run=True
        )
        patch_destination.fn()
        assert managed_run_mock.call_count == 1
        assert active_run is not None
        assert active_run.info.run_id is not None
        assert (
            client.get_run(active_run.info.run_id).data.tags[MLFLOW_AUTOLOGGING]
            == "test_integration"
        )
Example #5
0
def test_with_managed_run_with_non_throwing_function_exhibits_expected_behavior():
    client = MlflowClient()

    @with_managed_run
    def patch_function(original, *args, **kwargs):
        return mlflow.active_run()

    run1 = patch_function(lambda: "foo")
    run1_status = client.get_run(run1.info.run_id).info.status
    assert RunStatus.from_string(run1_status) == RunStatus.FINISHED

    with mlflow.start_run() as active_run:
        run2 = patch_function(lambda: "foo")

    assert run2 == active_run
    run2_status = client.get_run(run2.info.run_id).info.status
    assert RunStatus.from_string(run2_status) == RunStatus.FINISHED
Example #6
0
def get_best_model(**kwargs):
    """ For all the models we logged, determine the best performing run """
    ids = [r for ids in kwargs["ti"].xcom_pull(task_ids=["model_lr", "model_rf"], key="run_id") for r in ids]
    client = MlflowClient()
    runs = [client.get_run(run_id) for run_id in ids]
    run_r2 = [run.data.metrics["r2"] for run in runs]
    best_run = runs[np.argmax(run_r2)]
    kwargs["ti"].xcom_push(key="best_model_run_id", value=best_run.info.run_id)
Example #7
0
def test_with_managed_run_with_non_throwing_class_exhibits_expected_behavior():
    client = MlflowClient()

    @with_managed_run
    class TestPatch(PatchFunction):
        def _patch_implementation(self, original, *args, **kwargs):
            return mlflow.active_run()

        def _on_exception(self, exception):
            pass

    run1 = TestPatch.call(lambda: "foo")
    run1_status = client.get_run(run1.info.run_id).info.status
    assert RunStatus.from_string(run1_status) == RunStatus.FINISHED

    with mlflow.start_run() as active_run:
        run2 = TestPatch.call(lambda: "foo")

    assert run2 == active_run
    run2_status = client.get_run(run2.info.run_id).info.status
    assert RunStatus.from_string(run2_status) == RunStatus.FINISHED
Example #8
0
def extract_run_meta_metrics(experiment, float_cols=[], filter_unfinished=True):
    client = MlflowClient()
    # pull runs for experiment 
    experiment_id = client.get_experiment_by_name(experiment).experiment_id
    run_infos = client.list_run_infos(experiment_id)
    runs = [client.get_run(run_info.run_id) for run_info in run_infos]
    
    # extract run metrics and metadata from runs
    run_metrics = []
    run_metas = []
    for run in runs:
        # extract metadata from metrics
        run_id = run.info.run_id
        run_status = run.info.status
    
        # filter out unfinished runs if specified
        if filter_unfinished and run_status != "FINISHED":
            continue
        
        time_ms = (run.info.end_time - run.info.start_time
                   if run.info.status == "FINISHED" else np.nan)
        meta = {
            "artifact_uri": run.info.artifact_uri,
            "time_ms": time_ms,
            "run_id": run_id,
            "status": run.info.status,
        }
        meta.update(run.data.params)
        meta.update(run.data.tags)
        meta.update(run.data.metrics)
        run_metas.append(meta)

        # metrics history have to be extracted separately as they have
        # a separate epoch dimension
        metric_names = run.data.metrics.keys()
        histories = [client.get_metric_history(run_id, name) for name in metric_names]
        for history in histories:
            for metric in history:
                metric_dict = {
                    "run_id": run_id,
                    "metric": metric.key,
                    "step": metric.step,
                    "value": metric.value,
                }
                run_metrics.append(metric_dict)
                run_metrics.append(metric_dict)
    meta_df = pd.DataFrame(run_metas).astype({
        col: "float64" for col in float_cols
    }).set_index("run_id")
    
    metrics_df =  pd.DataFrame(run_metrics)
    return meta_df, metrics_df
def _poll_once(
    client: MlflowClient, prev_run_id: Optional[str] = None
) -> Optional[str]:
    lst = client.get_latest_versions(MLFLOW_MODEL_NAME, stages=[MLFLOW_MODEL_STAGE])
    if not lst:
        logger.info(f"No model name={MLFLOW_MODEL_NAME} in stage={MLFLOW_MODEL_STAGE}")
        logger.info(f"Deleting existing model deployments.")
        subprocess.run(
            f"kubectl -n {K8S_NS} delete --ignore-not-found cm {MS_CONFIGMAP_NAME}",
            shell=True,
            check=True,
        )
        subprocess.run(
            f"kubectl -n {K8S_NS} delete --ignore-not-found deployment {MS_DEPLOYMENT_NAME}",
            shell=True,
            check=True,
        )
        return None
    assert len(lst) == 1, (len(lst), lst)
    m: ModelVersion = lst[0]
    if m.run_id == prev_run_id:
        logger.info(f"Already deployed model: run_id={prev_run_id} version={m.version}")
    else:
        logger.info(f"Found a model to deploy! run_id={m.run_id} version={m.version}")
        logger.info(f"Model details:\n{m.to_proto()}")
        r: Run = client.get_run(m.run_id)
        logger.info(f"Model run details:\n{yaml.dump(r.to_dictionary())}")

        logger.info(f"Deploying model run_id={m.run_id} version={m.version}")
        k = f"kubectl -n {K8S_NS}"
        configmap_path = _render_configmap(MS_CONFIGMAP_TEMPLATE, run_id=m.run_id)
        # logger.debug(f"Rendered configmap:\n{configmap_path.read_text()}")
        subprocess.run(f"{k} apply -f {configmap_path}", shell=True, check=True)

        # No zero-downtime :(
        subprocess.run(
            f"{k} delete --ignore-not-found deployment {MS_DEPLOYMENT_NAME}",
            shell=True,
            check=True,
        )
        subprocess.run(
            f"envsubst '$$GIT_BRANCH' < {MS_DEPLOYMENT_PATH} | {k} apply -f -",
            shell=True,
            check=True,
        )
        logger.info(f"")
        logger.info(
            f"[+] Successfully patched model deployment to run_id={m.run_id} version={m.version}"
        )
        logger.info(f"")

    return m.run_id
def test_with_managed_run_ends_run_on_keyboard_interrupt():
    client = MlflowClient()
    run = None

    def original():
        nonlocal run
        run = mlflow.active_run()
        raise KeyboardInterrupt

    patch_function_1 = with_managed_run(
        "test_integration", lambda original, *args, **kwargs: original(*args, **kwargs)
    )

    with pytest.raises(KeyboardInterrupt):
        patch_function_1(original)

    assert not mlflow.active_run()
    run_status_1 = client.get_run(run.info.run_id).info.status
    assert RunStatus.from_string(run_status_1) == RunStatus.FAILED

    class PatchFunction2(PatchFunction):
        def _patch_implementation(self, original, *args, **kwargs):
            return original(*args, **kwargs)

        def _on_exception(self, exception):
            pass

    patch_function_2 = with_managed_run("test_integration", PatchFunction2)

    with pytest.raises(KeyboardInterrupt):

        patch_function_2.call(original)

    assert not mlflow.active_run()
    run_status_2 = client.get_run(run.info.run_id).info.status
    assert RunStatus.from_string(run_status_2) == RunStatus.FAILED
Example #11
0
def _validate_autologging_run(autologging_integration, run_id):
    """
    For testing purposes, verifies that an MLflow run produced by an `autologging_integration`
    satisfies the following properties:

        - The run has an autologging tag whose value is the name of the autologging integration
        - The run has a terminal status (e.g., KILLED, FAILED, FINISHED)
    """
    client = MlflowClient()
    run = client.get_run(run_id)
    autologging_tag_value = run.data.tags.get(MLFLOW_AUTOLOGGING)
    assert autologging_tag_value == autologging_integration, (
        "Autologging run with id {} failed to set autologging tag with expected value. Expected: "
        "'{}', Actual: '{}'".format(run_id, autologging_integration, autologging_tag_value)
    )
    assert RunStatus.is_terminated(
        RunStatus.from_string(run.info.status)
    ), "Autologging run with id {} has a non-terminal status '{}'".format(run_id, run.info.status)