def property_to_Q(self) -> Q: from .cohort import CohortPeople if self.type in CLICKHOUSE_ONLY_PROPERTY_TYPES: raise ValueError( f"property_to_Q: type is not supported: {repr(self.type)}") value = self._parse_value(self.value) if self.type == "cohort": from posthog.models.cohort import Cohort cohort_id = int(cast(Union[str, int], value)) cohort = Cohort.objects.get(pk=cohort_id) return Q( Exists( CohortPeople.objects.filter( cohort_id=cohort.pk, person_id=OuterRef("id"), version=cohort.version).only("id"))) column = "group_properties" if self.type == "group" else "properties" if self.operator == "is_not": return Q(~lookup_q(f"{column}__{self.key}", value) | ~Q(**{f"{column}__has_key": self.key})) if self.operator == "is_set": return Q(**{f"{column}__{self.key}__isnull": False}) if self.operator == "is_not_set": return Q(**{f"{column}__{self.key}__isnull": True}) if self.operator in ("regex", "not_regex") and not is_valid_regex(value): # Return no data for invalid regexes return Q(pk=-1) if isinstance(self.operator, str) and self.operator.startswith("not_"): return Q( ~Q(**{f"{column}__{self.key}__{self.operator[4:]}": value}) | ~Q(**{f"{column}__has_key": self.key}) | Q(**{f"{column}__{self.key}": None})) if self.operator == "exact" or self.operator is None: return lookup_q(f"{column}__{self.key}", value) else: assert not isinstance(value, list) return Q(**{f"{column}__{self.key}__{self.operator}": value})
def property_to_Q(self) -> Q: from .cohort import CohortPeople if self.type in CLICKHOUSE_ONLY_PROPERTY_TYPES: raise ValueError( f"property_to_Q: type is not supported: {repr(self.type)}") value = self._parse_value(self.value) if self.type == "cohort": cohort_id = int(cast(Union[str, int], value)) return Q( Exists( CohortPeople.objects.filter( cohort_id=cohort_id, person_id=OuterRef("id"), ).only("id"))) if self.operator == "is_not": return Q(~lookup_q(f"properties__{self.key}", value) | ~Q(properties__has_key=self.key)) if self.operator == "is_set": return Q(**{"properties__{}__isnull".format(self.key): False}) if self.operator == "is_not_set": return Q(**{"properties__{}__isnull".format(self.key): True}) if self.operator in ("regex", "not_regex") and not is_valid_regex(value): # Return no data for invalid regexes return Q(pk=-1) if isinstance(self.operator, str) and self.operator.startswith("not_"): return Q(~Q( **{ "properties__{}__{}".format(self.key, self.operator[4:]): value }) | ~Q(properties__has_key=self.key) | Q(**{"properties__{}".format(self.key): None})) if self.operator == "exact" or self.operator is None: return lookup_q(f"properties__{self.key}", value) else: assert not isinstance(value, list) return Q(**{f"properties__{self.key}__{self.operator}": value})
def property_to_Q(self) -> Q: from .cohort import CohortPeople value = self._parse_value(self.value) if self.type == "cohort": return Q( Exists( CohortPeople.objects.filter( cohort_id=int(value), person_id=OuterRef("id"), ).only("id"))) if self.operator == "is_not": return Q(~Q(**{"properties__{}".format(self.key): value}) | ~Q(properties__has_key=self.key)) if self.operator == "is_set": return Q(**{"properties__{}__isnull".format(self.key): False}) if self.operator == "is_not_set": return Q(**{"properties__{}__isnull".format(self.key): True}) if self.operator in ("regex", "not_regex") and not is_valid_regex(value): # Return no data for invalid regexes return Q(pk=-1) if isinstance(self.operator, str) and self.operator.startswith("not_"): return Q(~Q( **{ "properties__{}__{}".format(self.key, self.operator[4:]): value }) | ~Q(properties__has_key=self.key) | Q(**{"properties__{}".format(self.key): None})) return Q( **{ "properties__{}{}".format( self.key, f"__{self.operator}" if self.operator else ""): value })
def prop_filter_json_extract( prop: Property, idx: int, prepend: str = "", prop_var: str = "properties", allow_denormalized_props: bool = True, transform_expression: Optional[Callable[[str], str]] = None, ) -> Tuple[str, Dict[str, Any]]: # TODO: Once all queries are migrated over we can get rid of allow_denormalized_props if transform_expression is not None: prop_var = transform_expression(prop_var) property_expr, is_denormalized = get_property_string_expr( property_table(prop), prop.key, f"%(k{prepend}_{idx})s", prop_var, allow_denormalized_props) if is_denormalized and transform_expression: property_expr = transform_expression(property_expr) operator = prop.operator params: Dict[str, Any] = {} if operator == "is_not": params = { "k{}_{}".format(prepend, idx): prop.key, "v{}_{}".format(prepend, idx): box_value(prop.value) } return ( "AND NOT has(%(v{prepend}_{idx})s, {left})".format( idx=idx, prepend=prepend, left=property_expr), params, ) elif operator == "icontains": value = "%{}%".format(prop.value) params = { "k{}_{}".format(prepend, idx): prop.key, "v{}_{}".format(prepend, idx): value } return ( "AND {left} ILIKE %(v{prepend}_{idx})s".format(idx=idx, prepend=prepend, left=property_expr), params, ) elif operator == "not_icontains": value = "%{}%".format(prop.value) params = { "k{}_{}".format(prepend, idx): prop.key, "v{}_{}".format(prepend, idx): value } return ( "AND NOT ({left} ILIKE %(v{prepend}_{idx})s)".format( idx=idx, prepend=prepend, left=property_expr), params, ) elif operator in ("regex", "not_regex"): if not is_valid_regex(prop.value): return "AND 1 = 2", {} params = { "k{}_{}".format(prepend, idx): prop.key, "v{}_{}".format(prepend, idx): prop.value } return ( "AND {regex_function}({left}, %(v{prepend}_{idx})s)".format( regex_function="match" if operator == "regex" else "NOT match", idx=idx, prepend=prepend, left=property_expr, ), params, ) elif operator == "is_set": params = { "k{}_{}".format(prepend, idx): prop.key, "v{}_{}".format(prepend, idx): prop.value } if is_denormalized: return ( "AND notEmpty({left})".format(left=property_expr), params, ) return ( "AND JSONHas({prop_var}, %(k{prepend}_{idx})s)".format( idx=idx, prepend=prepend, prop_var=prop_var), params, ) elif operator == "is_not_set": params = { "k{}_{}".format(prepend, idx): prop.key, "v{}_{}".format(prepend, idx): prop.value } if is_denormalized: return ( "AND empty({left})".format(left=property_expr), params, ) return ( "AND (isNull({left}) OR NOT JSONHas({prop_var}, %(k{prepend}_{idx})s))" .format(idx=idx, prepend=prepend, prop_var=prop_var, left=property_expr), params, ) elif operator == "gt": params = { "k{}_{}".format(prepend, idx): prop.key, "v{}_{}".format(prepend, idx): prop.value } return ( "AND toFloat64OrNull(trim(BOTH '\"' FROM replaceRegexpAll({left}, ' ', ''))) > %(v{prepend}_{idx})s" .format( idx=idx, prepend=prepend, left=property_expr, ), params, ) elif operator == "lt": params = { "k{}_{}".format(prepend, idx): prop.key, "v{}_{}".format(prepend, idx): prop.value } return ( "AND toFloat64OrNull(trim(BOTH '\"' FROM replaceRegexpAll({left}, ' ', ''))) < %(v{prepend}_{idx})s" .format( idx=idx, prepend=prepend, left=property_expr, ), params, ) else: if is_json(prop.value) and not is_denormalized: clause = "AND has(%(v{prepend}_{idx})s, replaceRegexpAll(visitParamExtractRaw({prop_var}, %(k{prepend}_{idx})s),' ', ''))" params = { "k{}_{}".format(prepend, idx): prop.key, "v{}_{}".format(prepend, idx): box_value(prop.value, remove_spaces=True), } else: clause = "AND has(%(v{prepend}_{idx})s, {left})" params = { "k{}_{}".format(prepend, idx): prop.key, "v{}_{}".format(prepend, idx): box_value(prop.value) } return ( clause.format(left=property_expr, idx=idx, prepend=prepend, prop_var=prop_var), params, )
def prop_filter_json_extract( prop: Property, idx: int, prepend: str = "", prop_var: str = "properties", allow_denormalized_props: bool = True, transform_expression: Optional[Callable[[str], str]] = None, property_operator: PropertyOperatorType = PropertyOperatorType.AND, ) -> Tuple[str, Dict[str, Any]]: # TODO: Once all queries are migrated over we can get rid of allow_denormalized_props if transform_expression is not None: prop_var = transform_expression(prop_var) property_expr, is_denormalized = get_property_string_expr( property_table(prop), prop.key, f"%(k{prepend}_{idx})s", prop_var, allow_denormalized_props) if is_denormalized and transform_expression: property_expr = transform_expression(property_expr) operator = prop.operator params: Dict[str, Any] = {} if operator == "is_not": params = { "k{}_{}".format(prepend, idx): prop.key, "v{}_{}".format(prepend, idx): box_value(prop.value) } return ( " {property_operator} NOT has(%(v{prepend}_{idx})s, {left})". format(idx=idx, prepend=prepend, left=property_expr, property_operator=property_operator), params, ) elif operator == "icontains": value = "%{}%".format(prop.value) params = { "k{}_{}".format(prepend, idx): prop.key, "v{}_{}".format(prepend, idx): value } return ( " {property_operator} {left} ILIKE %(v{prepend}_{idx})s".format( idx=idx, prepend=prepend, left=property_expr, property_operator=property_operator), params, ) elif operator == "not_icontains": value = "%{}%".format(prop.value) params = { "k{}_{}".format(prepend, idx): prop.key, "v{}_{}".format(prepend, idx): value } return ( " {property_operator} NOT ({left} ILIKE %(v{prepend}_{idx})s)". format(idx=idx, prepend=prepend, left=property_expr, property_operator=property_operator), params, ) elif operator in ("regex", "not_regex"): if not is_valid_regex(prop.value): # If OR'ing, shouldn't be a problem since nothing will match this specific clause return f"{property_operator} 1 = 2", {} params = { "k{}_{}".format(prepend, idx): prop.key, "v{}_{}".format(prepend, idx): prop.value } return ( " {property_operator} {regex_function}({left}, %(v{prepend}_{idx})s)" .format( regex_function="match" if operator == "regex" else "NOT match", idx=idx, prepend=prepend, left=property_expr, property_operator=property_operator, ), params, ) elif operator == "is_set": params = { "k{}_{}".format(prepend, idx): prop.key, "v{}_{}".format(prepend, idx): prop.value } if is_denormalized: return ( " {property_operator} notEmpty({left})".format( left=property_expr, property_operator=property_operator), params, ) return ( " {property_operator} JSONHas({prop_var}, %(k{prepend}_{idx})s)". format(idx=idx, prepend=prepend, prop_var=prop_var, property_operator=property_operator), params, ) elif operator == "is_not_set": params = { "k{}_{}".format(prepend, idx): prop.key, "v{}_{}".format(prepend, idx): prop.value } if is_denormalized: return ( " {property_operator} empty({left})".format( left=property_expr, property_operator=property_operator), params, ) return ( " {property_operator} (isNull({left}) OR NOT JSONHas({prop_var}, %(k{prepend}_{idx})s))" .format( idx=idx, prepend=prepend, prop_var=prop_var, left=property_expr, property_operator=property_operator, ), params, ) elif operator == "is_date_exact": # TODO introducing duplication in these branches now rather than refactor too early assert isinstance(prop.value, str) prop_value_param_key = "v{}_{}".format(prepend, idx) # if we're comparing against a date with no time, # truncate the values in the DB which may have times granularity = "day" if re.match(r"^\d{4}-\d{2}-\d{2}$", prop.value) else "second" query = f"""AND date_trunc('{granularity}', coalesce( parseDateTimeBestEffortOrNull({property_expr}), parseDateTimeBestEffortOrNull(substring({property_expr}, 1, 10)) )) = %({prop_value_param_key})s""" return ( query, { "k{}_{}".format(prepend, idx): prop.key, prop_value_param_key: prop.value, }, ) elif operator == "is_date_after": # TODO introducing duplication in these branches now rather than refactor too early assert isinstance(prop.value, str) prop_value_param_key = "v{}_{}".format(prepend, idx) # if we're comparing against a date with no time, # then instead of 2019-01-01 (implied 00:00:00) # use 2019-01-01 23:59:59 is_date_only = re.match(r"^\d{4}-\d{2}-\d{2}$", prop.value) try_parse_as_date = f"parseDateTimeBestEffortOrNull({property_expr})" try_parse_as_timestamp = f"parseDateTimeBestEffortOrNull(substring({property_expr}, 1, 10))" first_of_date_or_timestamp = f"coalesce({try_parse_as_date},{try_parse_as_timestamp})" if is_date_only: adjusted_value = f"subtractSeconds(addDays(toDate(%({prop_value_param_key})s), 1), 1)" else: adjusted_value = f"%({prop_value_param_key})s" query = f"""{property_operator} {first_of_date_or_timestamp} > {adjusted_value}""" return ( query, { "k{}_{}".format(prepend, idx): prop.key, prop_value_param_key: prop.value, }, ) elif operator == "is_date_before": # TODO introducing duplication in these branches now rather than refactor too early assert isinstance(prop.value, str) prop_value_param_key = "v{}_{}".format(prepend, idx) try_parse_as_date = f"parseDateTimeBestEffortOrNull({property_expr})" try_parse_as_timestamp = f"parseDateTimeBestEffortOrNull(substring({property_expr}, 1, 10))" first_of_date_or_timestamp = f"coalesce({try_parse_as_date},{try_parse_as_timestamp})" query = f"""{property_operator} {first_of_date_or_timestamp} < %({prop_value_param_key})s""" return ( query, { "k{}_{}".format(prepend, idx): prop.key, prop_value_param_key: prop.value, }, ) elif operator == "gt": params = { "k{}_{}".format(prepend, idx): prop.key, "v{}_{}".format(prepend, idx): prop.value } return ( " {property_operator} toFloat64OrNull(trim(BOTH '\"' FROM replaceRegexpAll({left}, ' ', ''))) > %(v{prepend}_{idx})s" .format( idx=idx, prepend=prepend, left=property_expr, property_operator=property_operator, ), params, ) elif operator == "lt": params = { "k{}_{}".format(prepend, idx): prop.key, "v{}_{}".format(prepend, idx): prop.value } return ( " {property_operator} toFloat64OrNull(trim(BOTH '\"' FROM replaceRegexpAll({left}, ' ', ''))) < %(v{prepend}_{idx})s" .format( idx=idx, prepend=prepend, left=property_expr, property_operator=property_operator, ), params, ) else: if is_json(prop.value) and not is_denormalized: clause = " {property_operator} has(%(v{prepend}_{idx})s, replaceRegexpAll(visitParamExtractRaw({prop_var}, %(k{prepend}_{idx})s),' ', ''))" params = { "k{}_{}".format(prepend, idx): prop.key, "v{}_{}".format(prepend, idx): box_value(prop.value, remove_spaces=True), } else: clause = " {property_operator} has(%(v{prepend}_{idx})s, {left})" params = { "k{}_{}".format(prepend, idx): prop.key, "v{}_{}".format(prepend, idx): box_value(prop.value) } return ( clause.format(left=property_expr, idx=idx, prepend=prepend, prop_var=prop_var, property_operator=property_operator), params, )
def prop_filter_json_extract( prop: Property, idx: int, prepend: str = "", prop_var: str = "properties", allow_denormalized_props: bool = False) -> Tuple[str, Dict[str, Any]]: # Once all queries are migrated over we can get rid of allow_denormalized_props is_denormalized = prop.key.lower( ) in settings.CLICKHOUSE_DENORMALIZED_PROPERTIES and allow_denormalized_props json_extract = "trim(BOTH '\"' FROM JSONExtractRaw({prop_var}, %(k{prepend}_{idx})s))".format( idx=idx, prepend=prepend, prop_var=prop_var) denormalized = "properties_{}".format(prop.key.lower()) operator = prop.operator params: Dict[str, Any] = {} if operator == "is_not": params = { "k{}_{}".format(prepend, idx): prop.key, "v{}_{}".format(prepend, idx): box_value(prop.value) } return ( "AND NOT has(%(v{prepend}_{idx})s, {left})".format( idx=idx, prepend=prepend, left=denormalized if is_denormalized else json_extract), params, ) elif operator == "icontains": value = "%{}%".format(prop.value) params = { "k{}_{}".format(prepend, idx): prop.key, "v{}_{}".format(prepend, idx): value } return ( "AND {left} LIKE %(v{prepend}_{idx})s".format( idx=idx, prepend=prepend, left=denormalized if is_denormalized else json_extract), params, ) elif operator == "not_icontains": value = "%{}%".format(prop.value) params = { "k{}_{}".format(prepend, idx): prop.key, "v{}_{}".format(prepend, idx): value } return ( "AND NOT ({left} LIKE %(v{prepend}_{idx})s)".format( idx=idx, prepend=prepend, left=denormalized if is_denormalized else json_extract), params, ) elif operator in ("regex", "not_regex"): if not is_valid_regex(prop.value): return "AND 1 = 2", {} params = { "k{}_{}".format(prepend, idx): prop.key, "v{}_{}".format(prepend, idx): prop.value } return ( "AND {regex_function}({left}, %(v{prepend}_{idx})s)".format( regex_function="match" if operator == "regex" else "NOT match", idx=idx, prepend=prepend, left=denormalized if is_denormalized else json_extract, ), params, ) elif operator == "is_set": params = { "k{}_{}".format(prepend, idx): prop.key, "v{}_{}".format(prepend, idx): prop.value } if is_denormalized: return ( "AND NOT isNull({left})".format(left=denormalized), params, ) return ( "AND JSONHas({prop_var}, %(k{prepend}_{idx})s)".format( idx=idx, prepend=prepend, prop_var=prop_var), params, ) elif operator == "is_not_set": params = { "k{}_{}".format(prepend, idx): prop.key, "v{}_{}".format(prepend, idx): prop.value } if is_denormalized: return ( "AND isNull({left})".format(left=denormalized), params, ) return ( "AND (isNull({left}) OR NOT JSONHas({prop_var}, %(k{prepend}_{idx})s))" .format(idx=idx, prepend=prepend, prop_var=prop_var, left=json_extract), params, ) elif operator == "gt": params = { "k{}_{}".format(prepend, idx): prop.key, "v{}_{}".format(prepend, idx): prop.value } return ( "AND toInt64OrNull(trim(BOTH '\"' FROM replaceRegexpAll({left}, ' ', ''))) > %(v{prepend}_{idx})s" .format( idx=idx, prepend=prepend, left=denormalized if is_denormalized else "visitParamExtractRaw({prop_var}, %(k{prepend}_{idx})s)". format( idx=idx, prepend=prepend, prop_var=prop_var, ), ), params, ) elif operator == "lt": params = { "k{}_{}".format(prepend, idx): prop.key, "v{}_{}".format(prepend, idx): prop.value } return ( "AND toInt64OrNull(trim(BOTH '\"' FROM replaceRegexpAll({left}, ' ', ''))) < %(v{prepend}_{idx})s" .format( idx=idx, prepend=prepend, left=denormalized if is_denormalized else "visitParamExtractRaw({prop_var}, %(k{prepend}_{idx})s)". format( idx=idx, prepend=prepend, prop_var=prop_var, ), ), params, ) else: if is_json(prop.value) and not is_denormalized: clause = "AND has(%(v{prepend}_{idx})s, replaceRegexpAll(visitParamExtractRaw({prop_var}, %(k{prepend}_{idx})s),' ', ''))" params = { "k{}_{}".format(prepend, idx): prop.key, "v{}_{}".format(prepend, idx): box_value(prop.value, remove_spaces=True), } else: clause = "AND has(%(v{prepend}_{idx})s, {left})" params = { "k{}_{}".format(prepend, idx): prop.key, "v{}_{}".format(prepend, idx): box_value(prop.value) } return ( clause.format( left=denormalized if is_denormalized else json_extract, idx=idx, prepend=prepend, prop_var=prop_var), params, )