Exemple #1
0
def prop_filter_json_extract(
    prop: Property,
    idx: int,
    prepend: str = "",
    prop_var: str = "properties",
    allow_denormalized_props: bool = True,
    transform_expression: Optional[Callable[[str], str]] = None,
) -> Tuple[str, Dict[str, Any]]:
    # TODO: Once all queries are migrated over we can get rid of allow_denormalized_props
    if transform_expression is not None:
        prop_var = transform_expression(prop_var)

    property_expr, is_denormalized = get_property_string_expr(
        property_table(prop), prop.key, f"%(k{prepend}_{idx})s", prop_var,
        allow_denormalized_props)

    if is_denormalized and transform_expression:
        property_expr = transform_expression(property_expr)

    operator = prop.operator
    params: Dict[str, Any] = {}

    if operator == "is_not":
        params = {
            "k{}_{}".format(prepend, idx): prop.key,
            "v{}_{}".format(prepend, idx): box_value(prop.value)
        }
        return (
            "AND NOT has(%(v{prepend}_{idx})s, {left})".format(
                idx=idx, prepend=prepend, left=property_expr),
            params,
        )
    elif operator == "icontains":
        value = "%{}%".format(prop.value)
        params = {
            "k{}_{}".format(prepend, idx): prop.key,
            "v{}_{}".format(prepend, idx): value
        }
        return (
            "AND {left} ILIKE %(v{prepend}_{idx})s".format(idx=idx,
                                                           prepend=prepend,
                                                           left=property_expr),
            params,
        )
    elif operator == "not_icontains":
        value = "%{}%".format(prop.value)
        params = {
            "k{}_{}".format(prepend, idx): prop.key,
            "v{}_{}".format(prepend, idx): value
        }
        return (
            "AND NOT ({left} ILIKE %(v{prepend}_{idx})s)".format(
                idx=idx, prepend=prepend, left=property_expr),
            params,
        )
    elif operator in ("regex", "not_regex"):
        if not is_valid_regex(prop.value):
            return "AND 1 = 2", {}

        params = {
            "k{}_{}".format(prepend, idx): prop.key,
            "v{}_{}".format(prepend, idx): prop.value
        }

        return (
            "AND {regex_function}({left}, %(v{prepend}_{idx})s)".format(
                regex_function="match" if operator == "regex" else "NOT match",
                idx=idx,
                prepend=prepend,
                left=property_expr,
            ),
            params,
        )
    elif operator == "is_set":
        params = {
            "k{}_{}".format(prepend, idx): prop.key,
            "v{}_{}".format(prepend, idx): prop.value
        }
        if is_denormalized:
            return (
                "AND notEmpty({left})".format(left=property_expr),
                params,
            )
        return (
            "AND JSONHas({prop_var}, %(k{prepend}_{idx})s)".format(
                idx=idx, prepend=prepend, prop_var=prop_var),
            params,
        )
    elif operator == "is_not_set":
        params = {
            "k{}_{}".format(prepend, idx): prop.key,
            "v{}_{}".format(prepend, idx): prop.value
        }
        if is_denormalized:
            return (
                "AND empty({left})".format(left=property_expr),
                params,
            )
        return (
            "AND (isNull({left}) OR NOT JSONHas({prop_var}, %(k{prepend}_{idx})s))"
            .format(idx=idx,
                    prepend=prepend,
                    prop_var=prop_var,
                    left=property_expr),
            params,
        )
    elif operator == "gt":
        params = {
            "k{}_{}".format(prepend, idx): prop.key,
            "v{}_{}".format(prepend, idx): prop.value
        }
        return (
            "AND toFloat64OrNull(trim(BOTH '\"' FROM replaceRegexpAll({left}, ' ', ''))) > %(v{prepend}_{idx})s"
            .format(
                idx=idx,
                prepend=prepend,
                left=property_expr,
            ),
            params,
        )
    elif operator == "lt":
        params = {
            "k{}_{}".format(prepend, idx): prop.key,
            "v{}_{}".format(prepend, idx): prop.value
        }
        return (
            "AND toFloat64OrNull(trim(BOTH '\"' FROM replaceRegexpAll({left}, ' ', ''))) < %(v{prepend}_{idx})s"
            .format(
                idx=idx,
                prepend=prepend,
                left=property_expr,
            ),
            params,
        )
    else:
        if is_json(prop.value) and not is_denormalized:
            clause = "AND has(%(v{prepend}_{idx})s, replaceRegexpAll(visitParamExtractRaw({prop_var}, %(k{prepend}_{idx})s),' ', ''))"
            params = {
                "k{}_{}".format(prepend, idx):
                prop.key,
                "v{}_{}".format(prepend, idx):
                box_value(prop.value, remove_spaces=True),
            }
        else:
            clause = "AND has(%(v{prepend}_{idx})s, {left})"
            params = {
                "k{}_{}".format(prepend, idx): prop.key,
                "v{}_{}".format(prepend, idx): box_value(prop.value)
            }
        return (
            clause.format(left=property_expr,
                          idx=idx,
                          prepend=prepend,
                          prop_var=prop_var),
            params,
        )
Exemple #2
0
def prop_filter_json_extract(
        prop: Property,
        idx: int,
        prepend: str = "",
        prop_var: str = "properties") -> Tuple[str, Dict[str, Any]]:
    operator = prop.operator
    if operator == "is_not":
        params = {
            "k{}_{}".format(prepend, idx): prop.key,
            "v{}_{}".format(prepend, idx): prop.value
        }
        return (
            "AND NOT (trim(BOTH '\"' FROM JSONExtractRaw({prop_var}, %(k{prepend}_{idx})s)) = %(v{prepend}_{idx})s)"
            .format(idx=idx, prepend=prepend, prop_var=prop_var),
            params,
        )
    elif operator == "icontains":
        value = "%{}%".format(prop.value)
        params = {
            "k{}_{}".format(prepend, idx): prop.key,
            "v{}_{}".format(prepend, idx): value
        }
        return (
            "AND trim(BOTH '\"' FROM JSONExtractRaw({prop_var}, %(k{prepend}_{idx})s)) LIKE %(v{prepend}_{idx})s"
            .format(idx=idx, prepend=prepend, prop_var=prop_var),
            params,
        )
    elif operator == "not_icontains":
        value = "%{}%".format(prop.value)
        params = {
            "k{}_{}".format(prepend, idx): prop.key,
            "v{}_{}".format(prepend, idx): value
        }
        return (
            "AND NOT (trim(BOTH '\"' FROM JSONExtractRaw({prop_var}, %(k{prepend}_{idx})s)) LIKE %(v{prepend}_{idx})s)"
            .format(idx=idx, prepend=prepend, prop_var=prop_var),
            params,
        )
    elif operator == "regex":
        params = {
            "k{}_{}".format(prepend, idx): prop.key,
            "v{}_{}".format(prepend, idx): prop.value
        }
        return (
            "AND match(trim(BOTH '\"' FROM JSONExtractRaw({prop_var}, %(k{prepend}_{idx})s)), %(v{prepend}_{idx})s)"
            .format(idx=idx, prepend=prepend, prop_var=prop_var),
            params,
        )
    elif operator == "not_regex":
        params = {
            "k{}_{}".format(prepend, idx): prop.key,
            "v{}_{}".format(prepend, idx): prop.value
        }
        return (
            "AND NOT match(trim(BOTH '\"' FROM JSONExtractRaw({prop_var}, %(k{prepend}_{idx})s)), %(v{prepend}_{idx})s)"
            .format(idx=idx, prepend=prepend, prop_var=prop_var),
            params,
        )
    elif operator == "is_set":
        params = {
            "k{}_{}".format(prepend, idx): prop.key,
            "v{}_{}".format(prepend, idx): prop.value
        }
        return (
            "AND JSONHas({prop_var}, %(k{prepend}_{idx})s)".format(
                idx=idx, prepend=prepend, prop_var=prop_var),
            params,
        )
    elif operator == "is_not_set":
        params = {
            "k{}_{}".format(prepend, idx): prop.key,
            "v{}_{}".format(prepend, idx): prop.value
        }
        return (
            "AND (isNull(trim(BOTH '\"' FROM JSONExtractRaw({prop_var}, %(k{prepend}_{idx})s))) OR NOT JSONHas({prop_var}, %(k{prepend}_{idx})s))"
            .format(idx=idx, prepend=prepend, prop_var=prop_var),
            params,
        )
    elif operator == "gt":
        params = {
            "k{}_{}".format(prepend, idx): prop.key,
            "v{}_{}".format(prepend, idx): prop.value
        }
        return (
            "AND toInt64OrNull(replaceRegexpAll(visitParamExtractRaw({prop_var}, %(k{prepend}_{idx})s), ' ', '')) > %(v{prepend}_{idx})s"
            .format(idx=idx, prepend=prepend, prop_var=prop_var),
            params,
        )
    elif operator == "lt":
        params = {
            "k{}_{}".format(prepend, idx): prop.key,
            "v{}_{}".format(prepend, idx): prop.value
        }
        return (
            "AND toInt64OrNull(replaceRegexpAll(visitParamExtractRaw({prop_var}, %(k{prepend}_{idx})s), ' ', '')) < %(v{prepend}_{idx})s"
            .format(idx=idx, prepend=prepend, prop_var=prop_var),
            params,
        )
    else:
        if is_int(prop.value):
            clause = "AND JSONExtractInt({prop_var}, %(k{prepend}_{idx})s) = %(v{prepend}_{idx})s"
        elif is_json(prop.value):
            clause = "AND replaceRegexpAll(visitParamExtractRaw({prop_var}, %(k{prepend}_{idx})s),' ', '') = replaceRegexpAll(toString(%(v{prepend}_{idx})s),' ', '')"
        else:
            clause = "AND trim(BOTH '\"' FROM JSONExtractRaw({prop_var}, %(k{prepend}_{idx})s)) = %(v{prepend}_{idx})s"

        params = {
            "k{}_{}".format(prepend, idx): prop.key,
            "v{}_{}".format(prepend, idx): prop.value
        }
        return (
            clause.format(idx=idx, prepend=prepend, prop_var=prop_var),
            params,
        )
Exemple #3
0
def prop_filter_json_extract(
    prop: Property,
    idx: int,
    prepend: str = "",
    prop_var: str = "properties",
    allow_denormalized_props: bool = True,
    transform_expression: Optional[Callable[[str], str]] = None,
    property_operator: PropertyOperatorType = PropertyOperatorType.AND,
) -> Tuple[str, Dict[str, Any]]:
    # TODO: Once all queries are migrated over we can get rid of allow_denormalized_props
    if transform_expression is not None:
        prop_var = transform_expression(prop_var)

    property_expr, is_denormalized = get_property_string_expr(
        property_table(prop), prop.key, f"%(k{prepend}_{idx})s", prop_var,
        allow_denormalized_props)

    if is_denormalized and transform_expression:
        property_expr = transform_expression(property_expr)

    operator = prop.operator
    params: Dict[str, Any] = {}

    if operator == "is_not":
        params = {
            "k{}_{}".format(prepend, idx): prop.key,
            "v{}_{}".format(prepend, idx): box_value(prop.value)
        }
        return (
            " {property_operator} NOT has(%(v{prepend}_{idx})s, {left})".
            format(idx=idx,
                   prepend=prepend,
                   left=property_expr,
                   property_operator=property_operator),
            params,
        )
    elif operator == "icontains":
        value = "%{}%".format(prop.value)
        params = {
            "k{}_{}".format(prepend, idx): prop.key,
            "v{}_{}".format(prepend, idx): value
        }
        return (
            " {property_operator} {left} ILIKE %(v{prepend}_{idx})s".format(
                idx=idx,
                prepend=prepend,
                left=property_expr,
                property_operator=property_operator),
            params,
        )
    elif operator == "not_icontains":
        value = "%{}%".format(prop.value)
        params = {
            "k{}_{}".format(prepend, idx): prop.key,
            "v{}_{}".format(prepend, idx): value
        }
        return (
            " {property_operator} NOT ({left} ILIKE %(v{prepend}_{idx})s)".
            format(idx=idx,
                   prepend=prepend,
                   left=property_expr,
                   property_operator=property_operator),
            params,
        )
    elif operator in ("regex", "not_regex"):
        if not is_valid_regex(prop.value):
            # If OR'ing, shouldn't be a problem since nothing will match this specific clause
            return f"{property_operator} 1 = 2", {}

        params = {
            "k{}_{}".format(prepend, idx): prop.key,
            "v{}_{}".format(prepend, idx): prop.value
        }

        return (
            " {property_operator} {regex_function}({left}, %(v{prepend}_{idx})s)"
            .format(
                regex_function="match" if operator == "regex" else "NOT match",
                idx=idx,
                prepend=prepend,
                left=property_expr,
                property_operator=property_operator,
            ),
            params,
        )
    elif operator == "is_set":
        params = {
            "k{}_{}".format(prepend, idx): prop.key,
            "v{}_{}".format(prepend, idx): prop.value
        }
        if is_denormalized:
            return (
                " {property_operator} notEmpty({left})".format(
                    left=property_expr, property_operator=property_operator),
                params,
            )
        return (
            " {property_operator} JSONHas({prop_var}, %(k{prepend}_{idx})s)".
            format(idx=idx,
                   prepend=prepend,
                   prop_var=prop_var,
                   property_operator=property_operator),
            params,
        )
    elif operator == "is_not_set":
        params = {
            "k{}_{}".format(prepend, idx): prop.key,
            "v{}_{}".format(prepend, idx): prop.value
        }
        if is_denormalized:
            return (
                " {property_operator} empty({left})".format(
                    left=property_expr, property_operator=property_operator),
                params,
            )
        return (
            " {property_operator} (isNull({left}) OR NOT JSONHas({prop_var}, %(k{prepend}_{idx})s))"
            .format(
                idx=idx,
                prepend=prepend,
                prop_var=prop_var,
                left=property_expr,
                property_operator=property_operator,
            ),
            params,
        )
    elif operator == "is_date_exact":
        # TODO introducing duplication in these branches now rather than refactor too early
        assert isinstance(prop.value, str)
        prop_value_param_key = "v{}_{}".format(prepend, idx)

        # if we're comparing against a date with no time,
        # truncate the values in the DB which may have times
        granularity = "day" if re.match(r"^\d{4}-\d{2}-\d{2}$",
                                        prop.value) else "second"
        query = f"""AND date_trunc('{granularity}', coalesce(
            parseDateTimeBestEffortOrNull({property_expr}),
            parseDateTimeBestEffortOrNull(substring({property_expr}, 1, 10))
        )) = %({prop_value_param_key})s"""

        return (
            query,
            {
                "k{}_{}".format(prepend, idx): prop.key,
                prop_value_param_key: prop.value,
            },
        )
    elif operator == "is_date_after":
        # TODO introducing duplication in these branches now rather than refactor too early
        assert isinstance(prop.value, str)
        prop_value_param_key = "v{}_{}".format(prepend, idx)

        # if we're comparing against a date with no time,
        # then instead of 2019-01-01 (implied 00:00:00)
        # use 2019-01-01 23:59:59
        is_date_only = re.match(r"^\d{4}-\d{2}-\d{2}$", prop.value)

        try_parse_as_date = f"parseDateTimeBestEffortOrNull({property_expr})"
        try_parse_as_timestamp = f"parseDateTimeBestEffortOrNull(substring({property_expr}, 1, 10))"
        first_of_date_or_timestamp = f"coalesce({try_parse_as_date},{try_parse_as_timestamp})"

        if is_date_only:
            adjusted_value = f"subtractSeconds(addDays(toDate(%({prop_value_param_key})s), 1), 1)"
        else:
            adjusted_value = f"%({prop_value_param_key})s"

        query = f"""{property_operator} {first_of_date_or_timestamp} > {adjusted_value}"""

        return (
            query,
            {
                "k{}_{}".format(prepend, idx): prop.key,
                prop_value_param_key: prop.value,
            },
        )
    elif operator == "is_date_before":
        # TODO introducing duplication in these branches now rather than refactor too early
        assert isinstance(prop.value, str)
        prop_value_param_key = "v{}_{}".format(prepend, idx)
        try_parse_as_date = f"parseDateTimeBestEffortOrNull({property_expr})"
        try_parse_as_timestamp = f"parseDateTimeBestEffortOrNull(substring({property_expr}, 1, 10))"
        first_of_date_or_timestamp = f"coalesce({try_parse_as_date},{try_parse_as_timestamp})"
        query = f"""{property_operator} {first_of_date_or_timestamp} < %({prop_value_param_key})s"""

        return (
            query,
            {
                "k{}_{}".format(prepend, idx): prop.key,
                prop_value_param_key: prop.value,
            },
        )
    elif operator == "gt":
        params = {
            "k{}_{}".format(prepend, idx): prop.key,
            "v{}_{}".format(prepend, idx): prop.value
        }
        return (
            " {property_operator} toFloat64OrNull(trim(BOTH '\"' FROM replaceRegexpAll({left}, ' ', ''))) > %(v{prepend}_{idx})s"
            .format(
                idx=idx,
                prepend=prepend,
                left=property_expr,
                property_operator=property_operator,
            ),
            params,
        )
    elif operator == "lt":
        params = {
            "k{}_{}".format(prepend, idx): prop.key,
            "v{}_{}".format(prepend, idx): prop.value
        }
        return (
            " {property_operator} toFloat64OrNull(trim(BOTH '\"' FROM replaceRegexpAll({left}, ' ', ''))) < %(v{prepend}_{idx})s"
            .format(
                idx=idx,
                prepend=prepend,
                left=property_expr,
                property_operator=property_operator,
            ),
            params,
        )
    else:
        if is_json(prop.value) and not is_denormalized:
            clause = " {property_operator} has(%(v{prepend}_{idx})s, replaceRegexpAll(visitParamExtractRaw({prop_var}, %(k{prepend}_{idx})s),' ', ''))"
            params = {
                "k{}_{}".format(prepend, idx):
                prop.key,
                "v{}_{}".format(prepend, idx):
                box_value(prop.value, remove_spaces=True),
            }
        else:
            clause = " {property_operator} has(%(v{prepend}_{idx})s, {left})"
            params = {
                "k{}_{}".format(prepend, idx): prop.key,
                "v{}_{}".format(prepend, idx): box_value(prop.value)
            }
        return (
            clause.format(left=property_expr,
                          idx=idx,
                          prepend=prepend,
                          prop_var=prop_var,
                          property_operator=property_operator),
            params,
        )
Exemple #4
0
def prop_filter_json_extract(
        prop: Property,
        idx: int,
        prepend: str = "",
        prop_var: str = "properties",
        allow_denormalized_props: bool = False) -> Tuple[str, Dict[str, Any]]:
    # Once all queries are migrated over we can get rid of allow_denormalized_props
    is_denormalized = prop.key.lower(
    ) in settings.CLICKHOUSE_DENORMALIZED_PROPERTIES and allow_denormalized_props
    json_extract = "trim(BOTH '\"' FROM JSONExtractRaw({prop_var}, %(k{prepend}_{idx})s))".format(
        idx=idx, prepend=prepend, prop_var=prop_var)
    denormalized = "properties_{}".format(prop.key.lower())
    operator = prop.operator
    params: Dict[str, Any] = {}
    if operator == "is_not":
        params = {
            "k{}_{}".format(prepend, idx): prop.key,
            "v{}_{}".format(prepend, idx): box_value(prop.value)
        }
        return (
            "AND NOT has(%(v{prepend}_{idx})s, {left})".format(
                idx=idx,
                prepend=prepend,
                left=denormalized if is_denormalized else json_extract),
            params,
        )
    elif operator == "icontains":
        value = "%{}%".format(prop.value)
        params = {
            "k{}_{}".format(prepend, idx): prop.key,
            "v{}_{}".format(prepend, idx): value
        }
        return (
            "AND {left} LIKE %(v{prepend}_{idx})s".format(
                idx=idx,
                prepend=prepend,
                left=denormalized if is_denormalized else json_extract),
            params,
        )
    elif operator == "not_icontains":
        value = "%{}%".format(prop.value)
        params = {
            "k{}_{}".format(prepend, idx): prop.key,
            "v{}_{}".format(prepend, idx): value
        }
        return (
            "AND NOT ({left} LIKE %(v{prepend}_{idx})s)".format(
                idx=idx,
                prepend=prepend,
                left=denormalized if is_denormalized else json_extract),
            params,
        )
    elif operator in ("regex", "not_regex"):
        if not is_valid_regex(prop.value):
            return "AND 1 = 2", {}

        params = {
            "k{}_{}".format(prepend, idx): prop.key,
            "v{}_{}".format(prepend, idx): prop.value
        }

        return (
            "AND {regex_function}({left}, %(v{prepend}_{idx})s)".format(
                regex_function="match" if operator == "regex" else "NOT match",
                idx=idx,
                prepend=prepend,
                left=denormalized if is_denormalized else json_extract,
            ),
            params,
        )
    elif operator == "is_set":
        params = {
            "k{}_{}".format(prepend, idx): prop.key,
            "v{}_{}".format(prepend, idx): prop.value
        }
        if is_denormalized:
            return (
                "AND NOT isNull({left})".format(left=denormalized),
                params,
            )
        return (
            "AND JSONHas({prop_var}, %(k{prepend}_{idx})s)".format(
                idx=idx, prepend=prepend, prop_var=prop_var),
            params,
        )
    elif operator == "is_not_set":
        params = {
            "k{}_{}".format(prepend, idx): prop.key,
            "v{}_{}".format(prepend, idx): prop.value
        }
        if is_denormalized:
            return (
                "AND isNull({left})".format(left=denormalized),
                params,
            )
        return (
            "AND (isNull({left}) OR NOT JSONHas({prop_var}, %(k{prepend}_{idx})s))"
            .format(idx=idx,
                    prepend=prepend,
                    prop_var=prop_var,
                    left=json_extract),
            params,
        )
    elif operator == "gt":
        params = {
            "k{}_{}".format(prepend, idx): prop.key,
            "v{}_{}".format(prepend, idx): prop.value
        }
        return (
            "AND toInt64OrNull(trim(BOTH '\"' FROM replaceRegexpAll({left}, ' ', ''))) > %(v{prepend}_{idx})s"
            .format(
                idx=idx,
                prepend=prepend,
                left=denormalized if is_denormalized else
                "visitParamExtractRaw({prop_var}, %(k{prepend}_{idx})s)".
                format(
                    idx=idx,
                    prepend=prepend,
                    prop_var=prop_var,
                ),
            ),
            params,
        )
    elif operator == "lt":
        params = {
            "k{}_{}".format(prepend, idx): prop.key,
            "v{}_{}".format(prepend, idx): prop.value
        }
        return (
            "AND toInt64OrNull(trim(BOTH '\"' FROM replaceRegexpAll({left}, ' ', ''))) < %(v{prepend}_{idx})s"
            .format(
                idx=idx,
                prepend=prepend,
                left=denormalized if is_denormalized else
                "visitParamExtractRaw({prop_var}, %(k{prepend}_{idx})s)".
                format(
                    idx=idx,
                    prepend=prepend,
                    prop_var=prop_var,
                ),
            ),
            params,
        )
    else:
        if is_json(prop.value) and not is_denormalized:
            clause = "AND has(%(v{prepend}_{idx})s, replaceRegexpAll(visitParamExtractRaw({prop_var}, %(k{prepend}_{idx})s),' ', ''))"
            params = {
                "k{}_{}".format(prepend, idx):
                prop.key,
                "v{}_{}".format(prepend, idx):
                box_value(prop.value, remove_spaces=True),
            }
        else:
            clause = "AND has(%(v{prepend}_{idx})s, {left})"
            params = {
                "k{}_{}".format(prepend, idx): prop.key,
                "v{}_{}".format(prepend, idx): box_value(prop.value)
            }
        return (
            clause.format(
                left=denormalized if is_denormalized else json_extract,
                idx=idx,
                prepend=prepend,
                prop_var=prop_var),
            params,
        )