def _sqlalchemy_window(cls, column, _table, **kwargs):
     # Will - 20210126
     # This is a special case that needs to be handled for mysql, where you cannot refer to a temp_table
     # more than once in the same query. So instead of passing dup_query as-is, a second temp_table is created with
     # the column we will be performing the expectation on, and the query is performed against it.
     dialect = kwargs.get("_dialect", None)
     sql_engine = kwargs.get("_sqlalchemy_engine", None)
     try:
         dialect_name = dialect.dialect.name
     except AttributeError:
         try:
             dialect_name = dialect.name
         except AttributeError:
             dialect_name = ""
     if sql_engine and dialect and dialect_name == "mysql":
         temp_table_name = generate_temporary_table_name()
         temp_table_stmt = "CREATE TEMPORARY TABLE {new_temp_table} AS SELECT tmp.{column_name} FROM {source_table} tmp".format(
             new_temp_table=temp_table_name,
             source_table=_table,
             column_name=column.name,
         )
         sql_engine.execute(temp_table_stmt)
         dup_query = (sa.select([column]).select_from(
             sa.text(temp_table_name)).group_by(column).having(
                 sa.func.count(column) > 1))
     else:
         dup_query = (sa.select(
             [column]).select_from(_table).group_by(column).having(
                 sa.func.count(column) > 1))
     return column.notin_(dup_query)
Ejemplo n.º 2
0
def _get_column_quantiles_athena(
    column,
    quantiles: Iterable,
    selectable,
    sqlalchemy_engine,
) -> list:
    approx_percentiles = f"approx_percentile({column}, ARRAY{list(quantiles)})"
    selects_approx: List[TextClause] = [sa.text(approx_percentiles)]
    quantiles_query_approx: Select = sa.select(selects_approx).select_from(
        selectable)
    try:
        quantiles_results: Row = sqlalchemy_engine.execute(
            quantiles_query_approx).fetchone()
        # the ast literal eval is needed because the method is returning a json string and not a dict
        results = ast.literal_eval(quantiles_results[0])
        return results

        return results
    except ProgrammingError as pe:
        exception_message: str = "An SQL syntax Exception occurred."
        exception_traceback: str = traceback.format_exc()
        exception_message += (
            f'{type(pe).__name__}: "{str(pe)}".  Traceback: "{exception_traceback}".'
        )
        logger.error(exception_message)
        raise pe
def _get_column_quantiles_generic_sqlalchemy(
    column,
    quantiles: Iterable,
    allow_relative_error: bool,
    dialect,
    selectable,
    sqlalchemy_engine,
) -> list:
    selects: List[WithinGroup] = [
        sa.func.percentile_disc(quantile).within_group(column.asc())
        for quantile in quantiles
    ]
    quantiles_query: Select = sa.select(selects).select_from(selectable)

    try:
        quantiles_results: Row = sqlalchemy_engine.execute(quantiles_query).fetchone()
        return list(quantiles_results)
    except ProgrammingError:
        # ProgrammingError: (psycopg2.errors.SyntaxError) Aggregate function "percentile_disc" is not supported;
        # use approximate percentile_disc or percentile_cont instead.
        if attempt_allowing_relative_error(dialect):
            # Redshift does not have a percentile_disc method, but does support an approximate version.
            sql_approx: str = get_approximate_percentile_disc_sql(
                selects=selects, sql_engine_dialect=dialect
            )
            selects_approx: List[TextClause] = [sa.text(sql_approx)]
            quantiles_query_approx: Select = sa.select(selects_approx).select_from(
                selectable
            )
            if allow_relative_error:
                try:
                    quantiles_results: Row = sqlalchemy_engine.execute(
                        quantiles_query_approx
                    ).fetchone()
                    return list(quantiles_results)
                except ProgrammingError as pe:
                    exception_message: str = "An SQL syntax Exception occurred."
                    exception_traceback: str = traceback.format_exc()
                    exception_message += f'{type(pe).__name__}: "{str(pe)}".  Traceback: "{exception_traceback}".'
                    logger.error(exception_message)
                    raise pe
            else:
                raise ValueError(
                    f'The SQL engine dialect "{str(dialect)}" does not support computing quantiles '
                    "without approximation error; set allow_relative_error to True to allow approximate quantiles."
                )
        else:
            raise ValueError(
                f'The SQL engine dialect "{str(dialect)}" does not support computing quantiles with '
                "approximation error; set allow_relative_error to False to disable approximate quantiles."
            )
def _get_column_quantiles_trino(column, quantiles: Iterable, selectable,
                                sqlalchemy_engine) -> list:
    # Trino does not have the percentile_disc func, but instead has approx_percentile
    sql_approx: str = f"approx_percentile({column}, ARRAY{list(quantiles)})"
    selects_approx: List[TextClause] = [sa.text(sql_approx)]
    quantiles_query: Select = sa.select(selects_approx).select_from(selectable)

    try:
        quantiles_results: Row = sqlalchemy_engine.execute(
            quantiles_query).fetchone()
        return list(quantiles_results)[0]
    except (ProgrammingError, TrinoUserError) as pe:
        exception_message: str = "An SQL syntax Exception occurred."
        exception_traceback: str = traceback.format_exc()
        exception_message += (
            f'{type(pe).__name__}: "{str(pe)}".  Traceback: "{exception_traceback}".'
        )
        logger.error(exception_message)
        raise pe
Ejemplo n.º 5
0
    def _sqlalchemy(
        cls,
        execution_engine: SqlAlchemyExecutionEngine,
        metric_domain_kwargs: dict,
        metric_value_kwargs: dict,
        metrics: Dict[str, Any],
        runtime_configuration: dict,
    ) -> List[sqlalchemy_engine_Row]:
        query: Optional[str] = metric_value_kwargs.get(
            "query"
        ) or cls.default_kwarg_values.get("query")

        selectable: Union[sa.sql.Selectable, str]
        selectable, _, _ = execution_engine.get_compute_domain(
            metric_domain_kwargs, domain_type=MetricDomainTypes.TABLE
        )

        column: str = metric_value_kwargs.get("column")
        if isinstance(selectable, sa.Table):
            query = query.format(col=column, active_batch=selectable)
        elif isinstance(
            selectable, sa.sql.Subquery
        ):  # Specifying a runtime query in a RuntimeBatchRequest returns the active bacth as a Subquery; sectioning the active batch off w/ parentheses ensures flow of operations doesn't break
            query = query.format(col=column, active_batch=f"({selectable})")
        elif isinstance(
            selectable, sa.sql.Select
        ):  # Specifying a row_condition returns the active batch as a Select object, requiring compilation & aliasing when formatting the parameterized query
            query = query.format(
                col=column,
                active_batch=f'({selectable.compile(compile_kwargs={"literal_binds": True})}) AS subselect',
            )
        else:
            query = query.format(col=column, active_batch=f"({selectable})")

        engine: sqlalchemy_engine_Engine = execution_engine.engine
        result: List[sqlalchemy_engine_Row] = engine.execute(sa.text(query)).fetchall()

        return result