Ejemplo n.º 1
0
def test_space_order_by_search_runs(order_by, ascending_expected):
    identifier_type, identifier_name, ascending = SearchUtils.parse_order_by_for_search_runs(
        order_by
    )
    assert identifier_type == "metric"
    assert identifier_name == "Mean Square Error"
    assert ascending == ascending_expected
Ejemplo n.º 2
0
def _get_orderby_clauses(order_by_list, session):
    """Sorts a set of runs based on their natural ordering and an overriding set of order_bys.
    Runs are naturally ordered first by start time descending, then by run id for tie-breaking.
    """

    clauses = []
    ordering_joins = []
    clause_id = 0
    # contrary to filters, it is not easily feasible to separately handle sorting
    # on attributes and on joined tables as we must keep all clauses in the same order
    if order_by_list:
        for order_by_clause in order_by_list:
            clause_id += 1
            (key_type, key, ascending) = SearchUtils.parse_order_by_for_search_runs(order_by_clause)
            if SearchUtils.is_attribute(key_type, "="):
                order_value = getattr(SqlRun, SqlRun.get_attribute_name(key))
            else:
                if SearchUtils.is_metric(key_type, "="):  # any valid comparator
                    entity = SqlLatestMetric
                elif SearchUtils.is_tag(key_type, "="):
                    entity = SqlTag
                elif SearchUtils.is_param(key_type, "="):
                    entity = SqlParam
                else:
                    raise MlflowException(
                        "Invalid identifier type '%s'" % key_type,
                        error_code=INVALID_PARAMETER_VALUE,
                    )

                # build a subquery first because we will join it in the main request so that the
                # metric we want to sort on is available when we apply the sorting clause
                subquery = session.query(entity).filter(entity.key == key).subquery()

                ordering_joins.append(subquery)
                order_value = subquery.c.value

            # sqlite does not support NULLS LAST expression, so we sort first by
            # presence of the field (and is_nan for metrics), then by actual value
            # As the subqueries are created independently and used later in the
            # same main query, the CASE WHEN columns need to have unique names to
            # avoid ambiguity
            if SearchUtils.is_metric(key_type, "="):
                clauses.append(
                    sql.case(
                        [(subquery.c.is_nan.is_(True), 1), (order_value.is_(None), 1)], else_=0
                    ).label("clause_%s" % clause_id)
                )
            else:  # other entities do not have an 'is_nan' field
                clauses.append(
                    sql.case([(order_value.is_(None), 1)], else_=0).label("clause_%s" % clause_id)
                )

            if ascending:
                clauses.append(order_value)
            else:
                clauses.append(order_value.desc())

    clauses.append(SqlRun.start_time.desc())
    clauses.append(SqlRun.run_uuid)
    return clauses, ordering_joins
Ejemplo n.º 3
0
def _get_orderby_clauses(order_by_list, session):
    """Sorts a set of runs based on their natural ordering and an overriding set of order_bys.
    Runs are naturally ordered first by start time descending, then by run id for tie-breaking.
    """

    clauses = []
    ordering_joins = []
    clause_id = 0
    observed_order_by_clauses = set()
    # contrary to filters, it is not easily feasible to separately handle sorting
    # on attributes and on joined tables as we must keep all clauses in the same order
    if order_by_list:
        for order_by_clause in order_by_list:
            clause_id += 1
            (key_type, key, ascending
             ) = SearchUtils.parse_order_by_for_search_runs(order_by_clause)
            if SearchUtils.is_string_attribute(
                    key_type, key, "=") or SearchUtils.is_numeric_attribute(
                        key_type, key, "="):
                order_value = getattr(SqlRun, SqlRun.get_attribute_name(key))
            else:
                if SearchUtils.is_metric(key_type,
                                         "="):  # any valid comparator
                    entity = SqlLatestMetric
                elif SearchUtils.is_tag(key_type, "="):
                    entity = SqlTag
                elif SearchUtils.is_param(key_type, "="):
                    entity = SqlParam
                else:
                    raise MlflowException(
                        "Invalid identifier type '%s'" % key_type,
                        error_code=INVALID_PARAMETER_VALUE,
                    )

                # build a subquery first because we will join it in the main request so that the
                # metric we want to sort on is available when we apply the sorting clause
                subquery = session.query(entity).filter(
                    entity.key == key).subquery()

                ordering_joins.append(subquery)
                order_value = subquery.c.value

            # sqlite does not support NULLS LAST expression, so we sort first by
            # presence of the field (and is_nan for metrics), then by actual value
            # As the subqueries are created independently and used later in the
            # same main query, the CASE WHEN columns need to have unique names to
            # avoid ambiguity
            if SearchUtils.is_metric(key_type, "="):
                clauses.append(
                    sql.case(
                        [
                            # Ideally the use of "IS" is preferred here but owing to sqlalchemy
                            # translation in MSSQL we are forced to use "=" instead.
                            # These 2 options are functionally identical / unchanged because
                            # the column (is_nan) is not nullable. However it could become an issue
                            # if this precondition changes in the future.
                            (subquery.c.is_nan == sqlalchemy.true(), 1),
                            (order_value.is_(None), 1),
                        ],
                        else_=0,
                    ).label("clause_%s" % clause_id))
            else:  # other entities do not have an 'is_nan' field
                clauses.append(
                    sql.case([(order_value.is_(None), 1)],
                             else_=0).label("clause_%s" % clause_id))

            if (key_type, key) in observed_order_by_clauses:
                raise MlflowException(
                    "`order_by` contains duplicate fields: {}".format(
                        order_by_list))
            observed_order_by_clauses.add((key_type, key))

            if ascending:
                clauses.append(order_value)
            else:
                clauses.append(order_value.desc())

    if (SearchUtils._ATTRIBUTE_IDENTIFIER,
            SqlRun.start_time.key) not in observed_order_by_clauses:
        clauses.append(SqlRun.start_time.desc())
    clauses.append(SqlRun.run_uuid)
    return clauses, ordering_joins
Ejemplo n.º 4
0
def test_invalid_order_by_search_runs(order_by, error_message):
    with pytest.raises(MlflowException) as e:
        SearchUtils.parse_order_by_for_search_runs(order_by)
    assert error_message in e.value.message
Ejemplo n.º 5
0
def test_invalid_order_by_search_runs(order_by, error_message):
    with pytest.raises(MlflowException, match=error_message):
        SearchUtils.parse_order_by_for_search_runs(order_by)