Example #1
0
def _get_column_quantiles_mysql(
    column, quantiles: Iterable, selectable, sqlalchemy_engine
) -> list:
    # MySQL does not support "percentile_disc", so we implement it as a compound query.
    # Please see https://stackoverflow.com/questions/19770026/calculate-percentile-value-using-mysql for reference.
    percent_rank_query: CTE = (
        sa.select(
            [
                column,
                sa.cast(
                    sa.func.percent_rank().over(order_by=column.asc()),
                    sa.dialects.mysql.DECIMAL(18, 15),
                ).label("p"),
            ]
        )
        .order_by(sa.column("p").asc())
        .select_from(selectable)
        .cte("t")
    )

    selects: List[WithinGroup] = []
    for idx, quantile in enumerate(quantiles):
        # pymysql cannot handle conversion of numpy float64 to float; convert just in case
        if np.issubdtype(type(quantile), np.float_):
            quantile = float(quantile)
        quantile_column: Label = (
            sa.func.first_value(column)
            .over(
                order_by=sa.case(
                    [
                        (
                            percent_rank_query.c.p
                            <= sa.cast(quantile, sa.dialects.mysql.DECIMAL(18, 15)),
                            percent_rank_query.c.p,
                        )
                    ],
                    else_=None,
                ).desc()
            )
            .label(f"q_{idx}")
        )
        selects.append(quantile_column)
    quantiles_query: Select = (
        sa.select(selects).distinct().order_by(percent_rank_query.c.p.desc())
    )

    try:
        quantiles_results: RowProxy = sqlalchemy_engine.execute(
            quantiles_query
        ).fetchone()
        return list(quantiles_results)
    except ProgrammingError as pe:
        exception_message: str = "An SQL syntax Exception occurred."
        exception_traceback: str = traceback.format_exc()
        exception_message += (
            f'{type(pe).__name__}: "{str(pe)}".  Traceback: "{exception_traceback}".'
        )
        logger.error(exception_message)
        raise pe
 def _sqlalchemy(
     cls,
     execution_engine: "SqlAlchemyExecutionEngine",
     metric_domain_kwargs: Dict,
     metric_value_kwargs: Dict,
     metrics: Dict[Tuple, Any],
     runtime_configuration: Dict,
 ):
     (
         selectable,
         compute_domain_kwargs,
         accessor_domain_kwargs,
     ) = execution_engine.get_compute_domain(
         metric_domain_kwargs, domain_type=MetricDomainTypes.COLUMN)
     column_name = accessor_domain_kwargs["column"]
     column = sa.column(column_name)
     sqlalchemy_engine = execution_engine.engine
     dialect = sqlalchemy_engine.dialect
     quantiles = metric_value_kwargs["quantiles"]
     allow_relative_error = metric_value_kwargs.get("allow_relative_error",
                                                    False)
     if dialect.name.lower() == "mssql":
         return _get_column_quantiles_mssql(
             column=column,
             quantiles=quantiles,
             selectable=selectable,
             sqlalchemy_engine=sqlalchemy_engine,
         )
     elif dialect.name.lower() == "bigquery":
         return _get_column_quantiles_bigquery(
             column=column,
             quantiles=quantiles,
             selectable=selectable,
             sqlalchemy_engine=sqlalchemy_engine,
         )
     elif dialect.name.lower() == "mysql":
         return _get_column_quantiles_mysql(
             column=column,
             quantiles=quantiles,
             selectable=selectable,
             sqlalchemy_engine=sqlalchemy_engine,
         )
     else:
         return _get_column_quantiles_generic_sqlalchemy(
             column=column,
             quantiles=quantiles,
             allow_relative_error=allow_relative_error,
             dialect=dialect,
             selectable=selectable,
             sqlalchemy_engine=sqlalchemy_engine,
         )
Example #3
0
 def _sqlalchemy(
     cls,
     execution_engine: "SqlAlchemyExecutionEngine",
     metric_domain_kwargs: Dict,
     metric_value_kwargs: Dict,
     metrics: Dict[Tuple, Any],
     runtime_configuration: Dict,
 ):
     (
         selectable,
         compute_domain_kwargs,
         accessor_domain_kwargs,
     ) = execution_engine.get_compute_domain(
         metric_domain_kwargs, domain_type=MetricDomainTypes.COLUMN
     )
     column_name = accessor_domain_kwargs["column"]
     column = sa.column(column_name)
     sqlalchemy_engine = execution_engine.engine
     dialect = sqlalchemy_engine.dialect
     quantiles = metric_value_kwargs["quantiles"]
     allow_relative_error = metric_value_kwargs.get("allow_relative_error", False)
     if dialect.name.lower() == "mssql":
         return _get_column_quantiles_mssql(
             column=column,
             quantiles=quantiles,
             selectable=selectable,
             sqlalchemy_engine=sqlalchemy_engine,
         )
     elif dialect.name.lower() == "bigquery":
         return _get_column_quantiles_bigquery(
             column=column,
             quantiles=quantiles,
             selectable=selectable,
             sqlalchemy_engine=sqlalchemy_engine,
         )
     elif dialect.name.lower() == "mysql":
         return _get_column_quantiles_mysql(
             column=column,
             quantiles=quantiles,
             selectable=selectable,
             sqlalchemy_engine=sqlalchemy_engine,
         )
     elif dialect.name.lower() == "snowflake":
         # NOTE: 20201216 - JPC - snowflake has a representation/precision limitation
         # in its percentile_disc implementation that causes an error when we do
         # not round. It is unclear to me *how* the call to round affects the behavior --
         # the binary representation should be identical before and after, and I do
         # not observe a type difference. However, the issue is replicable in the
         # snowflake console and directly observable in side-by-side comparisons with
         # and without the call to round()
         quantiles = [round(x, 10) for x in quantiles]
         return _get_column_quantiles_generic_sqlalchemy(
             column=column,
             quantiles=quantiles,
             allow_relative_error=allow_relative_error,
             dialect=dialect,
             selectable=selectable,
             sqlalchemy_engine=sqlalchemy_engine,
         )
     else:
         return _get_column_quantiles_generic_sqlalchemy(
             column=column,
             quantiles=quantiles,
             allow_relative_error=allow_relative_error,
             dialect=dialect,
             selectable=selectable,
             sqlalchemy_engine=sqlalchemy_engine,
         )