def test_space_order_by_search_runs(order_by, ascending_expected): identifier_type, identifier_name, ascending = SearchUtils.parse_order_by_for_search_runs( order_by ) assert identifier_type == "metric" assert identifier_name == "Mean Square Error" assert ascending == ascending_expected
def _get_orderby_clauses(order_by_list, session): """Sorts a set of runs based on their natural ordering and an overriding set of order_bys. Runs are naturally ordered first by start time descending, then by run id for tie-breaking. """ clauses = [] ordering_joins = [] clause_id = 0 # contrary to filters, it is not easily feasible to separately handle sorting # on attributes and on joined tables as we must keep all clauses in the same order if order_by_list: for order_by_clause in order_by_list: clause_id += 1 (key_type, key, ascending) = SearchUtils.parse_order_by_for_search_runs(order_by_clause) if SearchUtils.is_attribute(key_type, "="): order_value = getattr(SqlRun, SqlRun.get_attribute_name(key)) else: if SearchUtils.is_metric(key_type, "="): # any valid comparator entity = SqlLatestMetric elif SearchUtils.is_tag(key_type, "="): entity = SqlTag elif SearchUtils.is_param(key_type, "="): entity = SqlParam else: raise MlflowException( "Invalid identifier type '%s'" % key_type, error_code=INVALID_PARAMETER_VALUE, ) # build a subquery first because we will join it in the main request so that the # metric we want to sort on is available when we apply the sorting clause subquery = session.query(entity).filter(entity.key == key).subquery() ordering_joins.append(subquery) order_value = subquery.c.value # sqlite does not support NULLS LAST expression, so we sort first by # presence of the field (and is_nan for metrics), then by actual value # As the subqueries are created independently and used later in the # same main query, the CASE WHEN columns need to have unique names to # avoid ambiguity if SearchUtils.is_metric(key_type, "="): clauses.append( sql.case( [(subquery.c.is_nan.is_(True), 1), (order_value.is_(None), 1)], else_=0 ).label("clause_%s" % clause_id) ) else: # other entities do not have an 'is_nan' field clauses.append( sql.case([(order_value.is_(None), 1)], else_=0).label("clause_%s" % clause_id) ) if ascending: clauses.append(order_value) else: clauses.append(order_value.desc()) clauses.append(SqlRun.start_time.desc()) clauses.append(SqlRun.run_uuid) return clauses, ordering_joins
def _get_orderby_clauses(order_by_list, session): """Sorts a set of runs based on their natural ordering and an overriding set of order_bys. Runs are naturally ordered first by start time descending, then by run id for tie-breaking. """ clauses = [] ordering_joins = [] clause_id = 0 observed_order_by_clauses = set() # contrary to filters, it is not easily feasible to separately handle sorting # on attributes and on joined tables as we must keep all clauses in the same order if order_by_list: for order_by_clause in order_by_list: clause_id += 1 (key_type, key, ascending ) = SearchUtils.parse_order_by_for_search_runs(order_by_clause) if SearchUtils.is_string_attribute( key_type, key, "=") or SearchUtils.is_numeric_attribute( key_type, key, "="): order_value = getattr(SqlRun, SqlRun.get_attribute_name(key)) else: if SearchUtils.is_metric(key_type, "="): # any valid comparator entity = SqlLatestMetric elif SearchUtils.is_tag(key_type, "="): entity = SqlTag elif SearchUtils.is_param(key_type, "="): entity = SqlParam else: raise MlflowException( "Invalid identifier type '%s'" % key_type, error_code=INVALID_PARAMETER_VALUE, ) # build a subquery first because we will join it in the main request so that the # metric we want to sort on is available when we apply the sorting clause subquery = session.query(entity).filter( entity.key == key).subquery() ordering_joins.append(subquery) order_value = subquery.c.value # sqlite does not support NULLS LAST expression, so we sort first by # presence of the field (and is_nan for metrics), then by actual value # As the subqueries are created independently and used later in the # same main query, the CASE WHEN columns need to have unique names to # avoid ambiguity if SearchUtils.is_metric(key_type, "="): clauses.append( sql.case( [ # Ideally the use of "IS" is preferred here but owing to sqlalchemy # translation in MSSQL we are forced to use "=" instead. # These 2 options are functionally identical / unchanged because # the column (is_nan) is not nullable. However it could become an issue # if this precondition changes in the future. (subquery.c.is_nan == sqlalchemy.true(), 1), (order_value.is_(None), 1), ], else_=0, ).label("clause_%s" % clause_id)) else: # other entities do not have an 'is_nan' field clauses.append( sql.case([(order_value.is_(None), 1)], else_=0).label("clause_%s" % clause_id)) if (key_type, key) in observed_order_by_clauses: raise MlflowException( "`order_by` contains duplicate fields: {}".format( order_by_list)) observed_order_by_clauses.add((key_type, key)) if ascending: clauses.append(order_value) else: clauses.append(order_value.desc()) if (SearchUtils._ATTRIBUTE_IDENTIFIER, SqlRun.start_time.key) not in observed_order_by_clauses: clauses.append(SqlRun.start_time.desc()) clauses.append(SqlRun.run_uuid) return clauses, ordering_joins
def test_invalid_order_by_search_runs(order_by, error_message): with pytest.raises(MlflowException) as e: SearchUtils.parse_order_by_for_search_runs(order_by) assert error_message in e.value.message
def test_invalid_order_by_search_runs(order_by, error_message): with pytest.raises(MlflowException, match=error_message): SearchUtils.parse_order_by_for_search_runs(order_by)