def _sqlalchemy(cls, column, regex_list, match_on, _dialect, **kwargs): if match_on not in ["any", "all"]: raise ValueError("match_on must be any or all") if len(regex_list) == 0: raise ValueError("At least one regex must be supplied in the regex_list.") regex_expression = get_dialect_regex_expression(column, regex_list[0], _dialect) if regex_expression is None: logger.warning(f"Regex is not supported for dialect {str(_dialect)}") raise NotImplementedError if match_on == "any": condition = sa.or_( *( get_dialect_regex_expression(column, regex, _dialect) for regex in regex_list ) ) else: condition = sa.and_( *( get_dialect_regex_expression(column, regex, _dialect) for regex in regex_list ) ) return condition
def _sqlalchemy(cls, column, like_pattern_list, match_on, _dialect, **kwargs): if not match_on: match_on = "any" if match_on not in ["any", "all"]: raise ValueError("match_on must be any or all") if len(like_pattern_list) == 0: raise ValueError( "At least one like_pattern must be supplied in the like_pattern_list." ) like_pattern_expression = get_dialect_like_pattern_expression( column, _dialect, like_pattern_list[0]) if like_pattern_expression is None: logger.warning( f"Like patterns are not supported for dialect {str(_dialect.dialect.name)}" ) raise NotImplementedError if match_on == "any": condition = sa.or_(*(get_dialect_like_pattern_expression( column, _dialect, like_pattern) for like_pattern in like_pattern_list)) else: condition = sa.and_(*(get_dialect_like_pattern_expression( column, _dialect, like_pattern) for like_pattern in like_pattern_list)) return condition
def _sqlalchemy(cls, column_A, column_B, **kwargs): value_pairs_set = kwargs.get("value_pairs_set") if value_pairs_set is None: # vacuously true return sa.case([(column_A == column_B, True)], else_=True) value_pairs_set = [(x, y) for x, y in value_pairs_set] # or_ implementation was required due to mssql issues with in_ conditions = [ sa.or_(sa.and_(column_A == x, column_B == y)) for x, y in value_pairs_set ] row_wise_cond = sa.or_(*conditions) return row_wise_cond
def _sqlalchemy(cls, column_list, **kwargs): """ The present approach relies on an inefficient query condition construction implementation, whose computational cost is O(num_columns^2). However, until a more efficient implementation compatible with SQLAlchemy is available, this is the only feasible mechanism under the current architecture, where map metric providers must return a condition. Nevertheless, SQL query length limit is 1GB (sufficient for most practical scenarios). """ num_columns = len(column_list) # An arbitrary "num_columns" value used for issuing an explanatory message as a warning. if num_columns > 100: logger.warning( f"""Batch data with {num_columns} columns is detected. Computing the "{cls.condition_metric_name}" \ metric for wide tables using SQLAlchemy leads to long WHERE clauses for the underlying database engine to process. """) conditions = sa.or_(*(sa.or_( column_list[idx_src] == column_list[idx_dest], sa.and_(column_list[idx_src] == None, column_list[idx_dest] == None), ) for idx_src in range(num_columns - 1) for idx_dest in range(idx_src + 1, num_columns))) row_wise_cond = sa.not_(conditions) return row_wise_cond
def _sqlalchemy(cls, column_A, column_B, **kwargs): allow_cross_type_comparisons: bool = ( kwargs.get("allow_cross_type_comparisons") or False ) if allow_cross_type_comparisons: raise NotImplementedError parse_strings_as_datetimes: bool = ( kwargs.get("parse_strings_as_datetimes") or False ) if parse_strings_as_datetimes: raise NotImplementedError or_equal: bool = kwargs.get("or_equal") or False if or_equal: return sa.or_( column_A >= column_B, sa.and_(column_A == None, column_B == None) ) else: return column_A > column_B
def _sqlalchemy(cls, column_A, column_B, **kwargs): row_wise_cond = sa.and_( column_A == column_B, sa.not_(sa.or_(column_A == None, column_B == None))) return row_wise_cond
def _sqlalchemy(cls, column_A, column_B, **kwargs): row_wise_cond = sa.and_( sa.func.abs(column_A - column_B) == 3, sa.not_(sa.or_(column_A == None, column_B == None)), ) return row_wise_cond