def prop_filter_json_extract( prop: Property, idx: int, prepend: str = "", prop_var: str = "properties", allow_denormalized_props: bool = True, transform_expression: Optional[Callable[[str], str]] = None, ) -> Tuple[str, Dict[str, Any]]: # TODO: Once all queries are migrated over we can get rid of allow_denormalized_props if transform_expression is not None: prop_var = transform_expression(prop_var) property_expr, is_denormalized = get_property_string_expr( property_table(prop), prop.key, f"%(k{prepend}_{idx})s", prop_var, allow_denormalized_props) if is_denormalized and transform_expression: property_expr = transform_expression(property_expr) operator = prop.operator params: Dict[str, Any] = {} if operator == "is_not": params = { "k{}_{}".format(prepend, idx): prop.key, "v{}_{}".format(prepend, idx): box_value(prop.value) } return ( "AND NOT has(%(v{prepend}_{idx})s, {left})".format( idx=idx, prepend=prepend, left=property_expr), params, ) elif operator == "icontains": value = "%{}%".format(prop.value) params = { "k{}_{}".format(prepend, idx): prop.key, "v{}_{}".format(prepend, idx): value } return ( "AND {left} ILIKE %(v{prepend}_{idx})s".format(idx=idx, prepend=prepend, left=property_expr), params, ) elif operator == "not_icontains": value = "%{}%".format(prop.value) params = { "k{}_{}".format(prepend, idx): prop.key, "v{}_{}".format(prepend, idx): value } return ( "AND NOT ({left} ILIKE %(v{prepend}_{idx})s)".format( idx=idx, prepend=prepend, left=property_expr), params, ) elif operator in ("regex", "not_regex"): if not is_valid_regex(prop.value): return "AND 1 = 2", {} params = { "k{}_{}".format(prepend, idx): prop.key, "v{}_{}".format(prepend, idx): prop.value } return ( "AND {regex_function}({left}, %(v{prepend}_{idx})s)".format( regex_function="match" if operator == "regex" else "NOT match", idx=idx, prepend=prepend, left=property_expr, ), params, ) elif operator == "is_set": params = { "k{}_{}".format(prepend, idx): prop.key, "v{}_{}".format(prepend, idx): prop.value } if is_denormalized: return ( "AND notEmpty({left})".format(left=property_expr), params, ) return ( "AND JSONHas({prop_var}, %(k{prepend}_{idx})s)".format( idx=idx, prepend=prepend, prop_var=prop_var), params, ) elif operator == "is_not_set": params = { "k{}_{}".format(prepend, idx): prop.key, "v{}_{}".format(prepend, idx): prop.value } if is_denormalized: return ( "AND empty({left})".format(left=property_expr), params, ) return ( "AND (isNull({left}) OR NOT JSONHas({prop_var}, %(k{prepend}_{idx})s))" .format(idx=idx, prepend=prepend, prop_var=prop_var, left=property_expr), params, ) elif operator == "gt": params = { "k{}_{}".format(prepend, idx): prop.key, "v{}_{}".format(prepend, idx): prop.value } return ( "AND toFloat64OrNull(trim(BOTH '\"' FROM replaceRegexpAll({left}, ' ', ''))) > %(v{prepend}_{idx})s" .format( idx=idx, prepend=prepend, left=property_expr, ), params, ) elif operator == "lt": params = { "k{}_{}".format(prepend, idx): prop.key, "v{}_{}".format(prepend, idx): prop.value } return ( "AND toFloat64OrNull(trim(BOTH '\"' FROM replaceRegexpAll({left}, ' ', ''))) < %(v{prepend}_{idx})s" .format( idx=idx, prepend=prepend, left=property_expr, ), params, ) else: if is_json(prop.value) and not is_denormalized: clause = "AND has(%(v{prepend}_{idx})s, replaceRegexpAll(visitParamExtractRaw({prop_var}, %(k{prepend}_{idx})s),' ', ''))" params = { "k{}_{}".format(prepend, idx): prop.key, "v{}_{}".format(prepend, idx): box_value(prop.value, remove_spaces=True), } else: clause = "AND has(%(v{prepend}_{idx})s, {left})" params = { "k{}_{}".format(prepend, idx): prop.key, "v{}_{}".format(prepend, idx): box_value(prop.value) } return ( clause.format(left=property_expr, idx=idx, prepend=prepend, prop_var=prop_var), params, )
def prop_filter_json_extract( prop: Property, idx: int, prepend: str = "", prop_var: str = "properties") -> Tuple[str, Dict[str, Any]]: operator = prop.operator if operator == "is_not": params = { "k{}_{}".format(prepend, idx): prop.key, "v{}_{}".format(prepend, idx): prop.value } return ( "AND NOT (trim(BOTH '\"' FROM JSONExtractRaw({prop_var}, %(k{prepend}_{idx})s)) = %(v{prepend}_{idx})s)" .format(idx=idx, prepend=prepend, prop_var=prop_var), params, ) elif operator == "icontains": value = "%{}%".format(prop.value) params = { "k{}_{}".format(prepend, idx): prop.key, "v{}_{}".format(prepend, idx): value } return ( "AND trim(BOTH '\"' FROM JSONExtractRaw({prop_var}, %(k{prepend}_{idx})s)) LIKE %(v{prepend}_{idx})s" .format(idx=idx, prepend=prepend, prop_var=prop_var), params, ) elif operator == "not_icontains": value = "%{}%".format(prop.value) params = { "k{}_{}".format(prepend, idx): prop.key, "v{}_{}".format(prepend, idx): value } return ( "AND NOT (trim(BOTH '\"' FROM JSONExtractRaw({prop_var}, %(k{prepend}_{idx})s)) LIKE %(v{prepend}_{idx})s)" .format(idx=idx, prepend=prepend, prop_var=prop_var), params, ) elif operator == "regex": params = { "k{}_{}".format(prepend, idx): prop.key, "v{}_{}".format(prepend, idx): prop.value } return ( "AND match(trim(BOTH '\"' FROM JSONExtractRaw({prop_var}, %(k{prepend}_{idx})s)), %(v{prepend}_{idx})s)" .format(idx=idx, prepend=prepend, prop_var=prop_var), params, ) elif operator == "not_regex": params = { "k{}_{}".format(prepend, idx): prop.key, "v{}_{}".format(prepend, idx): prop.value } return ( "AND NOT match(trim(BOTH '\"' FROM JSONExtractRaw({prop_var}, %(k{prepend}_{idx})s)), %(v{prepend}_{idx})s)" .format(idx=idx, prepend=prepend, prop_var=prop_var), params, ) elif operator == "is_set": params = { "k{}_{}".format(prepend, idx): prop.key, "v{}_{}".format(prepend, idx): prop.value } return ( "AND JSONHas({prop_var}, %(k{prepend}_{idx})s)".format( idx=idx, prepend=prepend, prop_var=prop_var), params, ) elif operator == "is_not_set": params = { "k{}_{}".format(prepend, idx): prop.key, "v{}_{}".format(prepend, idx): prop.value } return ( "AND (isNull(trim(BOTH '\"' FROM JSONExtractRaw({prop_var}, %(k{prepend}_{idx})s))) OR NOT JSONHas({prop_var}, %(k{prepend}_{idx})s))" .format(idx=idx, prepend=prepend, prop_var=prop_var), params, ) elif operator == "gt": params = { "k{}_{}".format(prepend, idx): prop.key, "v{}_{}".format(prepend, idx): prop.value } return ( "AND toInt64OrNull(replaceRegexpAll(visitParamExtractRaw({prop_var}, %(k{prepend}_{idx})s), ' ', '')) > %(v{prepend}_{idx})s" .format(idx=idx, prepend=prepend, prop_var=prop_var), params, ) elif operator == "lt": params = { "k{}_{}".format(prepend, idx): prop.key, "v{}_{}".format(prepend, idx): prop.value } return ( "AND toInt64OrNull(replaceRegexpAll(visitParamExtractRaw({prop_var}, %(k{prepend}_{idx})s), ' ', '')) < %(v{prepend}_{idx})s" .format(idx=idx, prepend=prepend, prop_var=prop_var), params, ) else: if is_int(prop.value): clause = "AND JSONExtractInt({prop_var}, %(k{prepend}_{idx})s) = %(v{prepend}_{idx})s" elif is_json(prop.value): clause = "AND replaceRegexpAll(visitParamExtractRaw({prop_var}, %(k{prepend}_{idx})s),' ', '') = replaceRegexpAll(toString(%(v{prepend}_{idx})s),' ', '')" else: clause = "AND trim(BOTH '\"' FROM JSONExtractRaw({prop_var}, %(k{prepend}_{idx})s)) = %(v{prepend}_{idx})s" params = { "k{}_{}".format(prepend, idx): prop.key, "v{}_{}".format(prepend, idx): prop.value } return ( clause.format(idx=idx, prepend=prepend, prop_var=prop_var), params, )
def prop_filter_json_extract( prop: Property, idx: int, prepend: str = "", prop_var: str = "properties", allow_denormalized_props: bool = True, transform_expression: Optional[Callable[[str], str]] = None, property_operator: PropertyOperatorType = PropertyOperatorType.AND, ) -> Tuple[str, Dict[str, Any]]: # TODO: Once all queries are migrated over we can get rid of allow_denormalized_props if transform_expression is not None: prop_var = transform_expression(prop_var) property_expr, is_denormalized = get_property_string_expr( property_table(prop), prop.key, f"%(k{prepend}_{idx})s", prop_var, allow_denormalized_props) if is_denormalized and transform_expression: property_expr = transform_expression(property_expr) operator = prop.operator params: Dict[str, Any] = {} if operator == "is_not": params = { "k{}_{}".format(prepend, idx): prop.key, "v{}_{}".format(prepend, idx): box_value(prop.value) } return ( " {property_operator} NOT has(%(v{prepend}_{idx})s, {left})". format(idx=idx, prepend=prepend, left=property_expr, property_operator=property_operator), params, ) elif operator == "icontains": value = "%{}%".format(prop.value) params = { "k{}_{}".format(prepend, idx): prop.key, "v{}_{}".format(prepend, idx): value } return ( " {property_operator} {left} ILIKE %(v{prepend}_{idx})s".format( idx=idx, prepend=prepend, left=property_expr, property_operator=property_operator), params, ) elif operator == "not_icontains": value = "%{}%".format(prop.value) params = { "k{}_{}".format(prepend, idx): prop.key, "v{}_{}".format(prepend, idx): value } return ( " {property_operator} NOT ({left} ILIKE %(v{prepend}_{idx})s)". format(idx=idx, prepend=prepend, left=property_expr, property_operator=property_operator), params, ) elif operator in ("regex", "not_regex"): if not is_valid_regex(prop.value): # If OR'ing, shouldn't be a problem since nothing will match this specific clause return f"{property_operator} 1 = 2", {} params = { "k{}_{}".format(prepend, idx): prop.key, "v{}_{}".format(prepend, idx): prop.value } return ( " {property_operator} {regex_function}({left}, %(v{prepend}_{idx})s)" .format( regex_function="match" if operator == "regex" else "NOT match", idx=idx, prepend=prepend, left=property_expr, property_operator=property_operator, ), params, ) elif operator == "is_set": params = { "k{}_{}".format(prepend, idx): prop.key, "v{}_{}".format(prepend, idx): prop.value } if is_denormalized: return ( " {property_operator} notEmpty({left})".format( left=property_expr, property_operator=property_operator), params, ) return ( " {property_operator} JSONHas({prop_var}, %(k{prepend}_{idx})s)". format(idx=idx, prepend=prepend, prop_var=prop_var, property_operator=property_operator), params, ) elif operator == "is_not_set": params = { "k{}_{}".format(prepend, idx): prop.key, "v{}_{}".format(prepend, idx): prop.value } if is_denormalized: return ( " {property_operator} empty({left})".format( left=property_expr, property_operator=property_operator), params, ) return ( " {property_operator} (isNull({left}) OR NOT JSONHas({prop_var}, %(k{prepend}_{idx})s))" .format( idx=idx, prepend=prepend, prop_var=prop_var, left=property_expr, property_operator=property_operator, ), params, ) elif operator == "is_date_exact": # TODO introducing duplication in these branches now rather than refactor too early assert isinstance(prop.value, str) prop_value_param_key = "v{}_{}".format(prepend, idx) # if we're comparing against a date with no time, # truncate the values in the DB which may have times granularity = "day" if re.match(r"^\d{4}-\d{2}-\d{2}$", prop.value) else "second" query = f"""AND date_trunc('{granularity}', coalesce( parseDateTimeBestEffortOrNull({property_expr}), parseDateTimeBestEffortOrNull(substring({property_expr}, 1, 10)) )) = %({prop_value_param_key})s""" return ( query, { "k{}_{}".format(prepend, idx): prop.key, prop_value_param_key: prop.value, }, ) elif operator == "is_date_after": # TODO introducing duplication in these branches now rather than refactor too early assert isinstance(prop.value, str) prop_value_param_key = "v{}_{}".format(prepend, idx) # if we're comparing against a date with no time, # then instead of 2019-01-01 (implied 00:00:00) # use 2019-01-01 23:59:59 is_date_only = re.match(r"^\d{4}-\d{2}-\d{2}$", prop.value) try_parse_as_date = f"parseDateTimeBestEffortOrNull({property_expr})" try_parse_as_timestamp = f"parseDateTimeBestEffortOrNull(substring({property_expr}, 1, 10))" first_of_date_or_timestamp = f"coalesce({try_parse_as_date},{try_parse_as_timestamp})" if is_date_only: adjusted_value = f"subtractSeconds(addDays(toDate(%({prop_value_param_key})s), 1), 1)" else: adjusted_value = f"%({prop_value_param_key})s" query = f"""{property_operator} {first_of_date_or_timestamp} > {adjusted_value}""" return ( query, { "k{}_{}".format(prepend, idx): prop.key, prop_value_param_key: prop.value, }, ) elif operator == "is_date_before": # TODO introducing duplication in these branches now rather than refactor too early assert isinstance(prop.value, str) prop_value_param_key = "v{}_{}".format(prepend, idx) try_parse_as_date = f"parseDateTimeBestEffortOrNull({property_expr})" try_parse_as_timestamp = f"parseDateTimeBestEffortOrNull(substring({property_expr}, 1, 10))" first_of_date_or_timestamp = f"coalesce({try_parse_as_date},{try_parse_as_timestamp})" query = f"""{property_operator} {first_of_date_or_timestamp} < %({prop_value_param_key})s""" return ( query, { "k{}_{}".format(prepend, idx): prop.key, prop_value_param_key: prop.value, }, ) elif operator == "gt": params = { "k{}_{}".format(prepend, idx): prop.key, "v{}_{}".format(prepend, idx): prop.value } return ( " {property_operator} toFloat64OrNull(trim(BOTH '\"' FROM replaceRegexpAll({left}, ' ', ''))) > %(v{prepend}_{idx})s" .format( idx=idx, prepend=prepend, left=property_expr, property_operator=property_operator, ), params, ) elif operator == "lt": params = { "k{}_{}".format(prepend, idx): prop.key, "v{}_{}".format(prepend, idx): prop.value } return ( " {property_operator} toFloat64OrNull(trim(BOTH '\"' FROM replaceRegexpAll({left}, ' ', ''))) < %(v{prepend}_{idx})s" .format( idx=idx, prepend=prepend, left=property_expr, property_operator=property_operator, ), params, ) else: if is_json(prop.value) and not is_denormalized: clause = " {property_operator} has(%(v{prepend}_{idx})s, replaceRegexpAll(visitParamExtractRaw({prop_var}, %(k{prepend}_{idx})s),' ', ''))" params = { "k{}_{}".format(prepend, idx): prop.key, "v{}_{}".format(prepend, idx): box_value(prop.value, remove_spaces=True), } else: clause = " {property_operator} has(%(v{prepend}_{idx})s, {left})" params = { "k{}_{}".format(prepend, idx): prop.key, "v{}_{}".format(prepend, idx): box_value(prop.value) } return ( clause.format(left=property_expr, idx=idx, prepend=prepend, prop_var=prop_var, property_operator=property_operator), params, )
def prop_filter_json_extract( prop: Property, idx: int, prepend: str = "", prop_var: str = "properties", allow_denormalized_props: bool = False) -> Tuple[str, Dict[str, Any]]: # Once all queries are migrated over we can get rid of allow_denormalized_props is_denormalized = prop.key.lower( ) in settings.CLICKHOUSE_DENORMALIZED_PROPERTIES and allow_denormalized_props json_extract = "trim(BOTH '\"' FROM JSONExtractRaw({prop_var}, %(k{prepend}_{idx})s))".format( idx=idx, prepend=prepend, prop_var=prop_var) denormalized = "properties_{}".format(prop.key.lower()) operator = prop.operator params: Dict[str, Any] = {} if operator == "is_not": params = { "k{}_{}".format(prepend, idx): prop.key, "v{}_{}".format(prepend, idx): box_value(prop.value) } return ( "AND NOT has(%(v{prepend}_{idx})s, {left})".format( idx=idx, prepend=prepend, left=denormalized if is_denormalized else json_extract), params, ) elif operator == "icontains": value = "%{}%".format(prop.value) params = { "k{}_{}".format(prepend, idx): prop.key, "v{}_{}".format(prepend, idx): value } return ( "AND {left} LIKE %(v{prepend}_{idx})s".format( idx=idx, prepend=prepend, left=denormalized if is_denormalized else json_extract), params, ) elif operator == "not_icontains": value = "%{}%".format(prop.value) params = { "k{}_{}".format(prepend, idx): prop.key, "v{}_{}".format(prepend, idx): value } return ( "AND NOT ({left} LIKE %(v{prepend}_{idx})s)".format( idx=idx, prepend=prepend, left=denormalized if is_denormalized else json_extract), params, ) elif operator in ("regex", "not_regex"): if not is_valid_regex(prop.value): return "AND 1 = 2", {} params = { "k{}_{}".format(prepend, idx): prop.key, "v{}_{}".format(prepend, idx): prop.value } return ( "AND {regex_function}({left}, %(v{prepend}_{idx})s)".format( regex_function="match" if operator == "regex" else "NOT match", idx=idx, prepend=prepend, left=denormalized if is_denormalized else json_extract, ), params, ) elif operator == "is_set": params = { "k{}_{}".format(prepend, idx): prop.key, "v{}_{}".format(prepend, idx): prop.value } if is_denormalized: return ( "AND NOT isNull({left})".format(left=denormalized), params, ) return ( "AND JSONHas({prop_var}, %(k{prepend}_{idx})s)".format( idx=idx, prepend=prepend, prop_var=prop_var), params, ) elif operator == "is_not_set": params = { "k{}_{}".format(prepend, idx): prop.key, "v{}_{}".format(prepend, idx): prop.value } if is_denormalized: return ( "AND isNull({left})".format(left=denormalized), params, ) return ( "AND (isNull({left}) OR NOT JSONHas({prop_var}, %(k{prepend}_{idx})s))" .format(idx=idx, prepend=prepend, prop_var=prop_var, left=json_extract), params, ) elif operator == "gt": params = { "k{}_{}".format(prepend, idx): prop.key, "v{}_{}".format(prepend, idx): prop.value } return ( "AND toInt64OrNull(trim(BOTH '\"' FROM replaceRegexpAll({left}, ' ', ''))) > %(v{prepend}_{idx})s" .format( idx=idx, prepend=prepend, left=denormalized if is_denormalized else "visitParamExtractRaw({prop_var}, %(k{prepend}_{idx})s)". format( idx=idx, prepend=prepend, prop_var=prop_var, ), ), params, ) elif operator == "lt": params = { "k{}_{}".format(prepend, idx): prop.key, "v{}_{}".format(prepend, idx): prop.value } return ( "AND toInt64OrNull(trim(BOTH '\"' FROM replaceRegexpAll({left}, ' ', ''))) < %(v{prepend}_{idx})s" .format( idx=idx, prepend=prepend, left=denormalized if is_denormalized else "visitParamExtractRaw({prop_var}, %(k{prepend}_{idx})s)". format( idx=idx, prepend=prepend, prop_var=prop_var, ), ), params, ) else: if is_json(prop.value) and not is_denormalized: clause = "AND has(%(v{prepend}_{idx})s, replaceRegexpAll(visitParamExtractRaw({prop_var}, %(k{prepend}_{idx})s),' ', ''))" params = { "k{}_{}".format(prepend, idx): prop.key, "v{}_{}".format(prepend, idx): box_value(prop.value, remove_spaces=True), } else: clause = "AND has(%(v{prepend}_{idx})s, {left})" params = { "k{}_{}".format(prepend, idx): prop.key, "v{}_{}".format(prepend, idx): box_value(prop.value) } return ( clause.format( left=denormalized if is_denormalized else json_extract, idx=idx, prepend=prepend, prop_var=prop_var), params, )