Example #1
0
 def list_experiments(self,
                      view_type: str = ViewType.ACTIVE_ONLY
                      ) -> List[Experiment]:
     stages = LifecycleStage.view_type_to_stages(view_type)
     response = Search(index="mlflow-experiments").filter(
         "terms", lifecycle_stage=stages).execute()
     return [self._hit_to_mlflow_experiment(e) for e in response]
Example #2
0
 def list_all_columns(self, experiment_id: str,
                      run_view_type: str) -> 'Columns':
     columns: Dict[str, List[str]] = {
         "latest_metrics": [],
         "params": [],
         "tags": []
     }
     stages = LifecycleStage.view_type_to_stages(run_view_type)
     for column_type in ['latest_metrics', 'params', 'tags']:
         self._list_columns(experiment_id, stages, column_type,
                            columns[column_type])
     return Columns(metrics=columns['latest_metrics'],
                    params=columns['params'],
                    tags=columns['tags'])
Example #3
0
def test_list_experiments(view_type, tmpdir):
    sqlite_uri = "sqlite:///" + os.path.join(tmpdir.strpath, "test.db")
    store = SqlAlchemyStore(sqlite_uri, default_artifact_root=tmpdir.strpath)

    num_experiments = SEARCH_MAX_RESULTS_DEFAULT + 1

    if view_type == ViewType.DELETED_ONLY:
        # Delete the default experiment
        mlflow.tracking.MlflowClient(sqlite_uri).delete_experiment("0")

    # This is a bit hacky but much faster than creating experiments one by one with
    # `mlflow.create_experiment`
    with store.ManagedSessionMaker() as session:
        lifecycle_stages = LifecycleStage.view_type_to_stages(view_type)
        experiments = [
            SqlExperiment(
                name=f"exp_{i + 1}",
                lifecycle_stage=random.choice(lifecycle_stages),
                artifact_location=tmpdir.strpath,
            ) for i in range(num_experiments - 1)
        ]
        session.add_all(experiments)

    try:
        url, process = _init_server(sqlite_uri,
                                    root_artifact_uri=tmpdir.strpath)
        print("In process %s", process)
        mlflow.set_tracking_uri(url)
        # `max_results` is unspecified
        assert len(mlflow.list_experiments(view_type)) == num_experiments
        # `max_results` is larger than the number of experiments in the database
        assert len(mlflow.list_experiments(view_type, num_experiments +
                                           1)) == num_experiments
        # `max_results` is equal to the number of experiments in the database
        assert len(mlflow.list_experiments(view_type,
                                           num_experiments)) == num_experiments
        # `max_results` is smaller than the number of experiments in the database
        assert len(mlflow.list_experiments(view_type, num_experiments -
                                           1)) == num_experiments - 1
    finally:
        process.terminate()
Example #4
0
def test_list_experiments(search_filter_mock, create_store):
    meta = SimpleNamespace(**{"id": "1"})
    hit = {
        "meta": meta,
        "name": "name",
        "lifecycle_stage": LifecycleStage.ACTIVE,
        "artifact_location": "artifact_location"
    }
    response = [SimpleNamespace(**hit)]
    search_filter_mock.return_value = Search()
    search_filter_mock.return_value.execute = mock.MagicMock(
        return_value=response)
    real_experiments = create_store.list_experiments()
    search_filter_mock.assert_called_once_with(
        "terms",
        lifecycle_stage=LifecycleStage.view_type_to_stages(
            ViewType.ACTIVE_ONLY))
    search_filter_mock.return_value.execute.assert_called_once_with()
    mock_experiments = [
        create_store._hit_to_mlflow_experiment(e) for e in response
    ]
    assert real_experiments[0].__dict__ == mock_experiments[0].__dict__
Example #5
0
    def _search_runs(
            self,
            experiment_ids: List[str],
            filter_string: str,
            run_view_type: str,
            max_results: int = SEARCH_MAX_RESULTS_DEFAULT,
            order_by: List[str] = None,
            page_token: str = None,
            columns_to_whitelist: List[str] = None) -> Tuple[List[Run], str]:

        if max_results > 10000:
            raise MlflowException(
                "Invalid value for request parameter max_results. It must be at "
                "most {}, but got value {}".format(10000, max_results),
                INVALID_PARAMETER_VALUE)
        stages = LifecycleStage.view_type_to_stages(run_view_type)
        parsed_filters = SearchUtils.parse_search_filter(filter_string)
        filter_queries = [
            Q("match", experiment_id=experiment_ids[0]),
            Q("terms", lifecycle_stage=stages)
        ]
        filter_queries += self._build_elasticsearch_query(parsed_filters)
        sort_clauses = self._get_orderby_clauses(order_by)
        s = Search(index="mlflow-runs").query('bool', filter=filter_queries)
        s = s.sort(*sort_clauses)
        if page_token != "" and page_token is not None:
            s = s.extra(search_after=ast.literal_eval(page_token))
        response = s.params(size=max_results).execute()
        columns_to_whitelist_key_dict = self._build_columns_to_whitelist_key_dict(
            columns_to_whitelist)
        runs = [
            self._hit_to_mlflow_run(hit, columns_to_whitelist_key_dict)
            for hit in response
        ]
        if len(runs) == max_results:
            next_page_token = response.hits.hits[-1].sort
        else:
            next_page_token = []
        return runs, str(next_page_token)