def test_search_runs_no_arguments(): """ When no experiment ID is specified, it should try to get the implicit one. """ mock_experiment_id = mock.Mock() experiment_id_patch = mock.patch( "mlflow.tracking.fluent._get_experiment_id", return_value=mock_experiment_id ) get_paginated_runs_patch = mock.patch("mlflow.tracking.fluent._paginate", return_value=[]) with experiment_id_patch, get_paginated_runs_patch: search_runs() mlflow.tracking.fluent._paginate.assert_called_once() mlflow.tracking.fluent._get_experiment_id.assert_called_once()
def test_search_runs_no_arguments(): """ When no experiment ID is specified, it should try to get the implicit one or create a new experiment """ mock_experiment_id = mock.Mock() experiment_id_patch = mock.patch( "mlflow.tracking.fluent._get_experiment_id", return_value=mock_experiment_id) get_paginated_runs_patch = mock.patch( 'mlflow.tracking.fluent._get_paginated_runs', return_value=[]) with experiment_id_patch, get_paginated_runs_patch: search_runs() mlflow.tracking.fluent._get_paginated_runs.assert_called_once_with( mock_experiment_id, '', ViewType.ACTIVE_ONLY, SEARCH_MAX_RESULTS_PANDAS, None)
def test_search_runs_attributes(): runs = [ create_run(status=RunStatus.FINISHED, a_uri="dbfs:/test", run_id="abc", exp_id="123"), create_run(status=RunStatus.SCHEDULED, a_uri="dbfs:/test2", run_id="def", exp_id="321"), ] with mock.patch("mlflow.tracking.fluent._paginate", return_value=runs): pdf = search_runs() data = { "status": [RunStatus.FINISHED, RunStatus.SCHEDULED], "artifact_uri": ["dbfs:/test", "dbfs:/test2"], "run_id": ["abc", "def"], "experiment_id": ["123", "321"], "start_time": [pd.to_datetime(0, utc=True), pd.to_datetime(0, utc=True)], "end_time": [pd.to_datetime(0, utc=True), pd.to_datetime(0, utc=True)], } expected_df = pd.DataFrame(data) pd.testing.assert_frame_equal(pdf, expected_df, check_like=True, check_frame_type=False)
def test_search_runs_data(): runs = [ create_run(metrics=[Metric("mse", 0.2, 0, 0)], params=[Param("param", "value")], tags=[RunTag("tag", "value")]), create_run( metrics=[Metric("mse", 0.6, 0, 0), Metric("loss", 1.2, 0, 5)], params=[Param("param2", "val"), Param("k", "v")], tags=[RunTag("tag2", "v2")]) ] with mock.patch.object(MlflowClient, "search_runs", return_value=runs): pdf = search_runs() data = { 'status': [RunStatus.FINISHED] * 2, 'artifact_uri': [None] * 2, 'run_id': [''] * 2, 'experiment_id': [""] * 2, 'metrics.mse': [0.2, 0.6], 'metrics.loss': [np.nan, 1.2], 'params.param': ["value", None], 'params.param2': [None, "val"], 'params.k': [None, "v"], 'tags.tag': ["value", None], 'tags.tag2': [None, "v2"] } expected_df = pd.DataFrame(data) pd.testing.assert_frame_equal(pdf, expected_df, check_like=True, check_frame_type=False)
def test_search_runs_attributes(): runs = [ create_run(status=RunStatus.FINISHED, a_uri="dbfs:/test", run_id='abc', exp_id="123"), create_run(status=RunStatus.SCHEDULED, a_uri="dbfs:/test2", run_id='def', exp_id="321") ] with mock.patch('mlflow.tracking.fluent._get_paginated_runs', return_value=runs): pdf = search_runs() data = { 'status': [RunStatus.FINISHED, RunStatus.SCHEDULED], 'artifact_uri': ["dbfs:/test", "dbfs:/test2"], 'run_id': ['abc', 'def'], 'experiment_id': ["123", "321"], 'start_time': [pd.to_datetime(0, utc=True), pd.to_datetime(0, utc=True)], 'end_time': [pd.to_datetime(0, utc=True), pd.to_datetime(0, utc=True)] } expected_df = pd.DataFrame(data) pd.testing.assert_frame_equal(pdf, expected_df, check_like=True, check_frame_type=False)
def test_search_runs_data(): runs = [ create_run( metrics=[Metric("mse", 0.2, 0, 0)], params=[Param("param", "value")], tags=[RunTag("tag", "value")], start=1564675200000, end=1564683035000), create_run( metrics=[Metric("mse", 0.6, 0, 0), Metric("loss", 1.2, 0, 5)], params=[Param("param2", "val"), Param("k", "v")], tags=[RunTag("tag2", "v2")], start=1564765200000, end=1564783200000)] with mock.patch('mlflow.tracking.fluent._get_paginated_runs', return_value=runs): pdf = search_runs() data = { 'status': [RunStatus.FINISHED]*2, 'artifact_uri': [None]*2, 'run_id': ['']*2, 'experiment_id': [""]*2, 'metrics.mse': [0.2, 0.6], 'metrics.loss': [np.nan, 1.2], 'params.param': ["value", None], 'params.param2': [None, "val"], 'params.k': [None, "v"], 'tags.tag': ["value", None], 'tags.tag2': [None, "v2"], 'start_time': [pd.to_datetime(1564675200000, unit="ms", utc=True), pd.to_datetime(1564765200000, unit="ms", utc=True)], 'end_time': [pd.to_datetime(1564683035000, unit="ms", utc=True), pd.to_datetime(1564783200000, unit="ms", utc=True)]} expected_df = pd.DataFrame(data) pd.testing.assert_frame_equal(pdf, expected_df, check_like=True, check_frame_type=False)
def test_search_runs_with_arguments(): mock_experiment_ids = mock.Mock() mock_filter_string = mock.Mock() mock_view_type = mock.Mock() mock_max_results = mock.Mock() mock_order_by = mock.Mock() with mock.patch.object(MlflowClient, "search_runs", return_value=[]): pdf = search_runs(mock_experiment_ids, mock_filter_string, mock_view_type, mock_max_results, mock_order_by) MlflowClient.search_runs.assert_called_once_with( mock_experiment_ids, mock_filter_string, mock_view_type, mock_max_results, mock_order_by)
def test_search_runs_all_experiments(search_runs_output_format): """ When no experiment ID is specified but flag is passed, it should search all experiments. """ from mlflow.entities import Experiment mock_experiment_id = mock.Mock() mock_experiment = mock.Mock(Experiment) experiment_id_patch = mock.patch( "mlflow.tracking.fluent._get_experiment_id", return_value=mock_experiment_id) experiment_list_patch = mock.patch( "mlflow.tracking.fluent.list_experiments", return_value=[mock_experiment]) get_paginated_runs_patch = mock.patch("mlflow.tracking.fluent._paginate", return_value=[]) with experiment_id_patch, experiment_list_patch, get_paginated_runs_patch: search_runs(output_format=search_runs_output_format, search_all_experiments=True) mlflow.tracking.fluent.list_experiments.assert_called_once() mlflow.tracking.fluent._get_experiment_id.assert_not_called()
def generate_csv_with_runs(experiment_id, filename): # type: (str, str) -> None """ Generate CSV with all runs for an experiment """ runs = fluent.search_runs(experiment_ids=experiment_id) if filename: runs.to_csv(filename, index=False) print("Experiment with ID %s has been exported as a CSV to file: %s." % (experiment_id, filename)) else: print(runs.to_csv(index=False))
def test_search_runs_no_arguments(): # When no experiment ID is specified, # it should try to get the implicit one or create a new experiment mock_experiment_id = mock.Mock() experiment_id_patch = mock.patch( "mlflow.tracking.fluent._get_experiment_id", return_value=mock_experiment_id) with experiment_id_patch, mock.patch.object(MlflowClient, "search_runs", return_value=[]): pdf = search_runs() MlflowClient.search_runs.assert_called_once_with( mock_experiment_id, '', ViewType.ACTIVE_ONLY, SEARCH_MAX_RESULTS_PANDAS, None)
def test_search_runs_by_experiment_name(): name = f"Random experiment {random.randint(1, 1e6)}" exp_id = uuid.uuid4().hex experiment = create_experiment(experiment_id=exp_id, name=name) runs, data = create_test_runs_and_expected_data("pandas", exp_id) get_experiment_patch = mock.patch( "mlflow.tracking.fluent.get_experiment_by_name", return_value=experiment) get_paginated_runs_patch = mock.patch("mlflow.tracking.fluent._paginate", return_value=runs) with get_experiment_patch, get_paginated_runs_patch: result = search_runs(experiment_names=[name]) validate_search_runs(result, data, "pandas")
def test_search_runs_data(): import numpy as np import pandas as pd runs = [ create_run( metrics=[Metric("mse", 0.2, 0, 0)], params=[Param("param", "value")], tags=[RunTag("tag", "value")], start=1564675200000, end=1564683035000, ), create_run( metrics=[Metric("mse", 0.6, 0, 0), Metric("loss", 1.2, 0, 5)], params=[Param("param2", "val"), Param("k", "v")], tags=[RunTag("tag2", "v2")], start=1564765200000, end=1564783200000, ), ] with mock.patch("mlflow.tracking.fluent._paginate", return_value=runs): pdf = search_runs() data = { "status": [RunStatus.FINISHED] * 2, "artifact_uri": [None] * 2, "run_id": [""] * 2, "experiment_id": [""] * 2, "metrics.mse": [0.2, 0.6], "metrics.loss": [np.nan, 1.2], "params.param": ["value", None], "params.param2": [None, "val"], "params.k": [None, "v"], "tags.tag": ["value", None], "tags.tag2": [None, "v2"], "start_time": [ pd.to_datetime(1564675200000, unit="ms", utc=True), pd.to_datetime(1564765200000, unit="ms", utc=True), ], "end_time": [ pd.to_datetime(1564683035000, unit="ms", utc=True), pd.to_datetime(1564783200000, unit="ms", utc=True), ], } validate_search_runs(pdf, data, "pandas")
def test_search_runs_attributes(search_runs_output_format): start_times = [ get_search_runs_timestamp(search_runs_output_format), get_search_runs_timestamp(search_runs_output_format), ] end_times = [ get_search_runs_timestamp(search_runs_output_format), get_search_runs_timestamp(search_runs_output_format), ] runs = [ create_run( status=RunStatus.FINISHED, a_uri="dbfs:/test", run_id="abc", exp_id="123", start=start_times[0], end=end_times[0], ), create_run( status=RunStatus.SCHEDULED, a_uri="dbfs:/test2", run_id="def", exp_id="321", start=start_times[1], end=end_times[1], ), ] with mock.patch("mlflow.tracking.fluent._paginate", return_value=runs): pdf = search_runs(output_format=search_runs_output_format) data = { "status": [RunStatus.FINISHED, RunStatus.SCHEDULED], "artifact_uri": ["dbfs:/test", "dbfs:/test2"], "run_id": ["abc", "def"], "experiment_id": ["123", "321"], "start_time": start_times, "end_time": end_times, } validate_search_runs(pdf, data, search_runs_output_format)
def test_search_runs_attributes(): runs = [ create_run(status=RunStatus.FINISHED, a_uri="dbfs:/test", run_id='abc', exp_id="123"), create_run(status=RunStatus.SCHEDULED, a_uri="dbfs:/test2", run_id='def', exp_id="321") ] with mock.patch.object(MlflowClient, "search_runs", return_value=runs): pdf = search_runs() data = { 'status': [RunStatus.FINISHED, RunStatus.SCHEDULED], 'artifact_uri': ["dbfs:/test", "dbfs:/test2"], 'run_id': ['abc', 'def'], 'experiment_id': ["123", "321"] } expected_df = pd.DataFrame(data) pd.testing.assert_frame_equal(pdf, expected_df, check_like=True, check_frame_type=False)
def test_search_runs_by_experiment_id_and_name(): """When both experiment_ids and experiment_names are used, it should throw an exception""" err_msg = "Only experiment_ids or experiment_names can be used, but not both" with pytest.raises(MlflowException, match=err_msg): search_runs(experiment_ids=["id"], experiment_names=["name"])
def test_search_runs_by_non_existing_experiment_name(): """When invalid experiment names are used (including None), it should return an empty collection. """ for name in [None, f"Random {random.randint(1, 1e6)}"]: assert search_runs(experiment_names=[name], output_format="list") == []
def test_search_runs_attributes(search_runs_output_format): runs, data = create_test_runs_and_expected_data(search_runs_output_format) with mock.patch("mlflow.tracking.fluent._paginate", return_value=runs): pdf = search_runs(output_format=search_runs_output_format) validate_search_runs(pdf, data, search_runs_output_format)
def test_search_runs_data(): runs, data = create_test_runs_and_expected_data("pandas") with mock.patch("mlflow.tracking.fluent._paginate", return_value=runs): pdf = search_runs() validate_search_runs(pdf, data, "pandas")