def list_experiments(self, view_type: str = ViewType.ACTIVE_ONLY ) -> List[Experiment]: stages = LifecycleStage.view_type_to_stages(view_type) response = Search(index="mlflow-experiments").filter( "terms", lifecycle_stage=stages).execute() return [self._hit_to_mlflow_experiment(e) for e in response]
def list_all_columns(self, experiment_id: str, run_view_type: str) -> 'Columns': columns: Dict[str, List[str]] = { "latest_metrics": [], "params": [], "tags": [] } stages = LifecycleStage.view_type_to_stages(run_view_type) for column_type in ['latest_metrics', 'params', 'tags']: self._list_columns(experiment_id, stages, column_type, columns[column_type]) return Columns(metrics=columns['latest_metrics'], params=columns['params'], tags=columns['tags'])
def test_list_experiments(view_type, tmpdir): sqlite_uri = "sqlite:///" + os.path.join(tmpdir.strpath, "test.db") store = SqlAlchemyStore(sqlite_uri, default_artifact_root=tmpdir.strpath) num_experiments = SEARCH_MAX_RESULTS_DEFAULT + 1 if view_type == ViewType.DELETED_ONLY: # Delete the default experiment mlflow.tracking.MlflowClient(sqlite_uri).delete_experiment("0") # This is a bit hacky but much faster than creating experiments one by one with # `mlflow.create_experiment` with store.ManagedSessionMaker() as session: lifecycle_stages = LifecycleStage.view_type_to_stages(view_type) experiments = [ SqlExperiment( name=f"exp_{i + 1}", lifecycle_stage=random.choice(lifecycle_stages), artifact_location=tmpdir.strpath, ) for i in range(num_experiments - 1) ] session.add_all(experiments) try: url, process = _init_server(sqlite_uri, root_artifact_uri=tmpdir.strpath) print("In process %s", process) mlflow.set_tracking_uri(url) # `max_results` is unspecified assert len(mlflow.list_experiments(view_type)) == num_experiments # `max_results` is larger than the number of experiments in the database assert len(mlflow.list_experiments(view_type, num_experiments + 1)) == num_experiments # `max_results` is equal to the number of experiments in the database assert len(mlflow.list_experiments(view_type, num_experiments)) == num_experiments # `max_results` is smaller than the number of experiments in the database assert len(mlflow.list_experiments(view_type, num_experiments - 1)) == num_experiments - 1 finally: process.terminate()
def test_list_experiments(search_filter_mock, create_store): meta = SimpleNamespace(**{"id": "1"}) hit = { "meta": meta, "name": "name", "lifecycle_stage": LifecycleStage.ACTIVE, "artifact_location": "artifact_location" } response = [SimpleNamespace(**hit)] search_filter_mock.return_value = Search() search_filter_mock.return_value.execute = mock.MagicMock( return_value=response) real_experiments = create_store.list_experiments() search_filter_mock.assert_called_once_with( "terms", lifecycle_stage=LifecycleStage.view_type_to_stages( ViewType.ACTIVE_ONLY)) search_filter_mock.return_value.execute.assert_called_once_with() mock_experiments = [ create_store._hit_to_mlflow_experiment(e) for e in response ] assert real_experiments[0].__dict__ == mock_experiments[0].__dict__
def _search_runs( self, experiment_ids: List[str], filter_string: str, run_view_type: str, max_results: int = SEARCH_MAX_RESULTS_DEFAULT, order_by: List[str] = None, page_token: str = None, columns_to_whitelist: List[str] = None) -> Tuple[List[Run], str]: if max_results > 10000: raise MlflowException( "Invalid value for request parameter max_results. It must be at " "most {}, but got value {}".format(10000, max_results), INVALID_PARAMETER_VALUE) stages = LifecycleStage.view_type_to_stages(run_view_type) parsed_filters = SearchUtils.parse_search_filter(filter_string) filter_queries = [ Q("match", experiment_id=experiment_ids[0]), Q("terms", lifecycle_stage=stages) ] filter_queries += self._build_elasticsearch_query(parsed_filters) sort_clauses = self._get_orderby_clauses(order_by) s = Search(index="mlflow-runs").query('bool', filter=filter_queries) s = s.sort(*sort_clauses) if page_token != "" and page_token is not None: s = s.extra(search_after=ast.literal_eval(page_token)) response = s.params(size=max_results).execute() columns_to_whitelist_key_dict = self._build_columns_to_whitelist_key_dict( columns_to_whitelist) runs = [ self._hit_to_mlflow_run(hit, columns_to_whitelist_key_dict) for hit in response ] if len(runs) == max_results: next_page_token = response.hits.hits[-1].sort else: next_page_token = [] return runs, str(next_page_token)