def test_bad_comparators(entity_type, bad_comparators, key, entity_value): run = Run(run_info=RunInfo( run_uuid="hi", run_id="hi", experiment_id=0, user_id="user-id", status=RunStatus.to_string(RunStatus.FAILED), start_time=0, end_time=1, lifecycle_stage=LifecycleStage.ACTIVE), run_data=RunData(metrics=[], params=[], tags=[]) ) for bad_comparator in bad_comparators: bad_filter = "{entity_type}.{key} {comparator} {value}".format( entity_type=entity_type, key=key, comparator=bad_comparator, value=entity_value) with pytest.raises(MlflowException) as e: SearchUtils.filter([run], bad_filter) assert "Invalid comparator" in str(e.value.message)
def _search_runs(self, experiment_ids, filter_string, run_view_type, max_results, order_by, page_token): # TODO: push search query into backend database layer if max_results > SEARCH_MAX_RESULTS_THRESHOLD: raise MlflowException( "Invalid value for request parameter max_results. It must be at " "most {}, but got value {}".format( SEARCH_MAX_RESULTS_THRESHOLD, max_results), INVALID_PARAMETER_VALUE) stages = set(LifecycleStage.view_type_to_stages(run_view_type)) with self.ManagedSessionMaker() as session: # Fetch the appropriate runs and eagerly load their summary metrics, params, and # tags. These run attributes are referenced during the invocation of # ``run.to_mlflow_entity()``, so eager loading helps avoid additional database queries # that are otherwise executed at attribute access time under a lazy loading model. queried_runs = session \ .query(SqlRun) \ .options(*self._get_eager_run_query_options()) \ .filter( SqlRun.experiment_id.in_(experiment_ids), SqlRun.lifecycle_stage.in_(stages)) \ .all() runs = [run.to_mlflow_entity() for run in queried_runs] filtered = SearchUtils.filter(runs, filter_string) sorted_runs = SearchUtils.sort(filtered, order_by) runs, next_page_token = SearchUtils.paginate(sorted_runs, page_token, max_results) return runs, next_page_token
def test_correct_filtering(filter_string, matching_runs): runs = [ Run(run_info=RunInfo( run_uuid="hi", run_id="hi", experiment_id=0, user_id="user-id", status=RunStatus.to_string(RunStatus.FAILED), start_time=0, end_time=1, lifecycle_stage=LifecycleStage.ACTIVE), run_data=RunData( metrics=[Metric("key1", 121, 1, 0)], params=[Param("my_param", "A")], tags=[])), Run(run_info=RunInfo( run_uuid="hi2", run_id="hi2", experiment_id=0, user_id="user-id", status=RunStatus.to_string(RunStatus.FINISHED), start_time=0, end_time=1, lifecycle_stage=LifecycleStage.ACTIVE), run_data=RunData( metrics=[Metric("key1", 123, 1, 0)], params=[Param("my_param", "A")], tags=[RunTag("tag1", "C")])), Run(run_info=RunInfo( run_uuid="hi3", run_id="hi3", experiment_id=1, user_id="user-id", status=RunStatus.to_string(RunStatus.FAILED), start_time=0, end_time=1, lifecycle_stage=LifecycleStage.ACTIVE), run_data=RunData( metrics=[Metric("key1", 125, 1, 0)], params=[Param("my_param", "B")], tags=[RunTag("tag1", "D")])), ] filtered_runs = SearchUtils.filter(runs, filter_string) assert set(filtered_runs) == set([runs[i] for i in matching_runs])
def test_filter_runs_by_start_time(): runs = [ Run( run_info=RunInfo( run_uuid=run_id, run_id=run_id, experiment_id=0, user_id="user-id", status=RunStatus.to_string(RunStatus.FINISHED), start_time=idx, end_time=1, lifecycle_stage=LifecycleStage.ACTIVE, ), run_data=RunData(), ) for idx, run_id in enumerate(["a", "b", "c"]) ] assert SearchUtils.filter(runs, "attribute.start_time >= 0") == runs assert SearchUtils.filter(runs, "attribute.start_time > 1") == runs[2:] assert SearchUtils.filter(runs, "attribute.start_time = 2") == runs[2:]
def _search_runs(self, experiment_ids, filter_string, run_view_type, max_results, order_by, page_token): if max_results > SEARCH_MAX_RESULTS_THRESHOLD: raise MlflowException( "Invalid value for request parameter max_results. It must be at " "most {}, but got value {}".format( SEARCH_MAX_RESULTS_THRESHOLD, max_results), databricks_pb2.INVALID_PARAMETER_VALUE) runs = [] for experiment_id in experiment_ids: run_infos = self._list_run_infos(experiment_id, run_view_type) runs.extend(self.get_run(r.run_id) for r in run_infos) filtered = SearchUtils.filter(runs, filter_string) sorted_runs = SearchUtils.sort(filtered, order_by) runs, next_page_token = SearchUtils.paginate(sorted_runs, page_token, max_results) return runs, next_page_token
def _search_runs(self, experiment_ids, filter_string, run_view_type, max_results, order_by, page_token): if page_token: raise MlflowException("SQLAlchemy-backed tracking stores do not yet support pagination" "tokens.") # TODO: push search query into backend database layer if max_results > SEARCH_MAX_RESULTS_THRESHOLD: raise MlflowException("Invalid value for request parameter max_results. It must be at " "most {}, but got value {}".format(SEARCH_MAX_RESULTS_THRESHOLD, max_results), INVALID_PARAMETER_VALUE) with self.ManagedSessionMaker() as session: runs = [run.to_mlflow_entity() for exp in experiment_ids for run in self._list_runs(session, exp, run_view_type)] filtered = SearchUtils.filter(runs, filter_string) runs = SearchUtils.sort(filtered, order_by)[:max_results] return runs, None
def search_runs(self, experiment_ids, filter_string, run_view_type, max_results=SEARCH_MAX_RESULTS_THRESHOLD, order_by=None): # TODO: push search query into backend database layer if max_results > SEARCH_MAX_RESULTS_THRESHOLD: raise MlflowException( "Invalid value for request parameter max_results. It must be at " "most {}, but got value {}".format( SEARCH_MAX_RESULTS_THRESHOLD, max_results), INVALID_PARAMETER_VALUE) with self.ManagedSessionMaker() as session: runs = [ run.to_mlflow_entity() for exp in experiment_ids for run in self._list_runs(session, exp, run_view_type) ] filtered = SearchUtils.filter(runs, filter_string) return SearchUtils.sort(filtered, order_by)[:max_results]
def _search_runs( self, experiment_ids, filter_string, run_view_type, max_results, order_by, page_token, ): if max_results > SEARCH_MAX_RESULTS_THRESHOLD: raise MlflowException( "Invalid value for request parameter max_results. It must be at " "most {}, but got value {}".format( SEARCH_MAX_RESULTS_THRESHOLD, max_results), INVALID_PARAMETER_VALUE, ) runs = [] for experiment_id in experiment_ids: run_ids = self._list_runs_ids(experiment_id, run_view_type) run_infos = [ _dict_to_run_info(r) for r in self._get_run_list(run_ids) ] for run_info in run_infos: # Load the metrics, params and tags for the run run_id = run_info.run_id metrics = self.get_all_metrics(run_id) params = self.get_all_params(run_id) tags = self.get_all_tags(run_id) run = Run(run_info, RunData(metrics, params, tags)) runs.append(run) filtered = SearchUtils.filter(runs, filter_string) sorted_runs = SearchUtils.sort(filtered, order_by) runs, next_page_token = SearchUtils.paginate(sorted_runs, page_token, max_results) return runs, next_page_token