def _get_column_quantiles_mysql( column, quantiles: Iterable, selectable, sqlalchemy_engine ) -> list: # MySQL does not support "percentile_disc", so we implement it as a compound query. # Please see https://stackoverflow.com/questions/19770026/calculate-percentile-value-using-mysql for reference. percent_rank_query: CTE = ( sa.select( [ column, sa.cast( sa.func.percent_rank().over(order_by=column.asc()), sa.dialects.mysql.DECIMAL(18, 15), ).label("p"), ] ) .order_by(sa.column("p").asc()) .select_from(selectable) .cte("t") ) selects: List[WithinGroup] = [] for idx, quantile in enumerate(quantiles): # pymysql cannot handle conversion of numpy float64 to float; convert just in case if np.issubdtype(type(quantile), np.float_): quantile = float(quantile) quantile_column: Label = ( sa.func.first_value(column) .over( order_by=sa.case( [ ( percent_rank_query.c.p <= sa.cast(quantile, sa.dialects.mysql.DECIMAL(18, 15)), percent_rank_query.c.p, ) ], else_=None, ).desc() ) .label(f"q_{idx}") ) selects.append(quantile_column) quantiles_query: Select = ( sa.select(selects).distinct().order_by(percent_rank_query.c.p.desc()) ) try: quantiles_results: RowProxy = sqlalchemy_engine.execute( quantiles_query ).fetchone() return list(quantiles_results) except ProgrammingError as pe: exception_message: str = "An SQL syntax Exception occurred." exception_traceback: str = traceback.format_exc() exception_message += ( f'{type(pe).__name__}: "{str(pe)}". Traceback: "{exception_traceback}".' ) logger.error(exception_message) raise pe
def _sqlalchemy( cls, execution_engine: "SqlAlchemyExecutionEngine", metric_domain_kwargs: Dict, metric_value_kwargs: Dict, metrics: Dict[Tuple, Any], runtime_configuration: Dict, ): ( selectable, compute_domain_kwargs, accessor_domain_kwargs, ) = execution_engine.get_compute_domain( metric_domain_kwargs, domain_type=MetricDomainTypes.COLUMN) column_name = accessor_domain_kwargs["column"] column = sa.column(column_name) sqlalchemy_engine = execution_engine.engine dialect = sqlalchemy_engine.dialect quantiles = metric_value_kwargs["quantiles"] allow_relative_error = metric_value_kwargs.get("allow_relative_error", False) if dialect.name.lower() == "mssql": return _get_column_quantiles_mssql( column=column, quantiles=quantiles, selectable=selectable, sqlalchemy_engine=sqlalchemy_engine, ) elif dialect.name.lower() == "bigquery": return _get_column_quantiles_bigquery( column=column, quantiles=quantiles, selectable=selectable, sqlalchemy_engine=sqlalchemy_engine, ) elif dialect.name.lower() == "mysql": return _get_column_quantiles_mysql( column=column, quantiles=quantiles, selectable=selectable, sqlalchemy_engine=sqlalchemy_engine, ) else: return _get_column_quantiles_generic_sqlalchemy( column=column, quantiles=quantiles, allow_relative_error=allow_relative_error, dialect=dialect, selectable=selectable, sqlalchemy_engine=sqlalchemy_engine, )
def _sqlalchemy( cls, execution_engine: "SqlAlchemyExecutionEngine", metric_domain_kwargs: Dict, metric_value_kwargs: Dict, metrics: Dict[Tuple, Any], runtime_configuration: Dict, ): ( selectable, compute_domain_kwargs, accessor_domain_kwargs, ) = execution_engine.get_compute_domain( metric_domain_kwargs, domain_type=MetricDomainTypes.COLUMN ) column_name = accessor_domain_kwargs["column"] column = sa.column(column_name) sqlalchemy_engine = execution_engine.engine dialect = sqlalchemy_engine.dialect quantiles = metric_value_kwargs["quantiles"] allow_relative_error = metric_value_kwargs.get("allow_relative_error", False) if dialect.name.lower() == "mssql": return _get_column_quantiles_mssql( column=column, quantiles=quantiles, selectable=selectable, sqlalchemy_engine=sqlalchemy_engine, ) elif dialect.name.lower() == "bigquery": return _get_column_quantiles_bigquery( column=column, quantiles=quantiles, selectable=selectable, sqlalchemy_engine=sqlalchemy_engine, ) elif dialect.name.lower() == "mysql": return _get_column_quantiles_mysql( column=column, quantiles=quantiles, selectable=selectable, sqlalchemy_engine=sqlalchemy_engine, ) elif dialect.name.lower() == "snowflake": # NOTE: 20201216 - JPC - snowflake has a representation/precision limitation # in its percentile_disc implementation that causes an error when we do # not round. It is unclear to me *how* the call to round affects the behavior -- # the binary representation should be identical before and after, and I do # not observe a type difference. However, the issue is replicable in the # snowflake console and directly observable in side-by-side comparisons with # and without the call to round() quantiles = [round(x, 10) for x in quantiles] return _get_column_quantiles_generic_sqlalchemy( column=column, quantiles=quantiles, allow_relative_error=allow_relative_error, dialect=dialect, selectable=selectable, sqlalchemy_engine=sqlalchemy_engine, ) else: return _get_column_quantiles_generic_sqlalchemy( column=column, quantiles=quantiles, allow_relative_error=allow_relative_error, dialect=dialect, selectable=selectable, sqlalchemy_engine=sqlalchemy_engine, )