コード例 #1
0
    def _sqlalchemy(cls, column, regex_list, match_on, _dialect, **kwargs):
        if match_on not in ["any", "all"]:
            raise ValueError("match_on must be any or all")

        if len(regex_list) == 0:
            raise ValueError("At least one regex must be supplied in the regex_list.")

        regex_expression = get_dialect_regex_expression(column, regex_list[0], _dialect)
        if regex_expression is None:
            logger.warning(f"Regex is not supported for dialect {str(_dialect)}")
            raise NotImplementedError

        if match_on == "any":
            condition = sa.or_(
                *(
                    get_dialect_regex_expression(column, regex, _dialect)
                    for regex in regex_list
                )
            )
        else:
            condition = sa.and_(
                *(
                    get_dialect_regex_expression(column, regex, _dialect)
                    for regex in regex_list
                )
            )
        return condition
    def _sqlalchemy(cls, column, like_pattern_list, match_on, _dialect,
                    **kwargs):
        if not match_on:
            match_on = "any"

        if match_on not in ["any", "all"]:
            raise ValueError("match_on must be any or all")

        if len(like_pattern_list) == 0:
            raise ValueError(
                "At least one like_pattern must be supplied in the like_pattern_list."
            )

        like_pattern_expression = get_dialect_like_pattern_expression(
            column, _dialect, like_pattern_list[0])
        if like_pattern_expression is None:
            logger.warning(
                f"Like patterns are not supported for dialect {str(_dialect.dialect.name)}"
            )
            raise NotImplementedError

        if match_on == "any":
            condition = sa.or_(*(get_dialect_like_pattern_expression(
                column, _dialect, like_pattern)
                                 for like_pattern in like_pattern_list))
        else:
            condition = sa.and_(*(get_dialect_like_pattern_expression(
                column, _dialect, like_pattern)
                                  for like_pattern in like_pattern_list))
        return condition
コード例 #3
0
    def _sqlalchemy(cls, column_A, column_B, **kwargs):
        value_pairs_set = kwargs.get("value_pairs_set")

        if value_pairs_set is None:
            # vacuously true
            return sa.case([(column_A == column_B, True)], else_=True)

        value_pairs_set = [(x, y) for x, y in value_pairs_set]

        # or_ implementation was required due to mssql issues with in_
        conditions = [
            sa.or_(sa.and_(column_A == x, column_B == y))
            for x, y in value_pairs_set
        ]
        row_wise_cond = sa.or_(*conditions)

        return row_wise_cond
    def _sqlalchemy(cls, column_list, **kwargs):
        """
        The present approach relies on an inefficient query condition construction implementation, whose computational
        cost is O(num_columns^2).  However, until a more efficient implementation compatible with SQLAlchemy is
        available, this is the only feasible mechanism under the current architecture, where map metric providers must
        return a condition.  Nevertheless, SQL query length limit is 1GB (sufficient for most practical scenarios).
        """
        num_columns = len(column_list)

        # An arbitrary "num_columns" value used for issuing an explanatory message as a warning.
        if num_columns > 100:
            logger.warning(
                f"""Batch data with {num_columns} columns is detected.  Computing the "{cls.condition_metric_name}" \
metric for wide tables using SQLAlchemy leads to long WHERE clauses for the underlying database engine to process.
""")

        conditions = sa.or_(*(sa.or_(
            column_list[idx_src] == column_list[idx_dest],
            sa.and_(column_list[idx_src] == None, column_list[idx_dest] ==
                    None),
        ) for idx_src in range(num_columns - 1)
                              for idx_dest in range(idx_src + 1, num_columns)))
        row_wise_cond = sa.not_(conditions)
        return row_wise_cond
コード例 #5
0
    def _sqlalchemy(cls, column_A, column_B, **kwargs):
        allow_cross_type_comparisons: bool = (
            kwargs.get("allow_cross_type_comparisons") or False
        )
        if allow_cross_type_comparisons:
            raise NotImplementedError

        parse_strings_as_datetimes: bool = (
            kwargs.get("parse_strings_as_datetimes") or False
        )
        if parse_strings_as_datetimes:
            raise NotImplementedError

        or_equal: bool = kwargs.get("or_equal") or False
        if or_equal:
            return sa.or_(
                column_A >= column_B, sa.and_(column_A == None, column_B == None)
            )
        else:
            return column_A > column_B
コード例 #6
0
 def _sqlalchemy(cls, column_A, column_B, **kwargs):
     row_wise_cond = sa.and_(
         column_A == column_B,
         sa.not_(sa.or_(column_A == None, column_B == None)))
     return row_wise_cond
 def _sqlalchemy(cls, column_A, column_B, **kwargs):
     row_wise_cond = sa.and_(
         sa.func.abs(column_A - column_B) == 3,
         sa.not_(sa.or_(column_A == None, column_B == None)),
     )
     return row_wise_cond