예제 #1
0
def parse_prop_clauses(filters: List[Property],
                       team_id: int,
                       prepend: str = "global",
                       table_name: str = "") -> Tuple[str, Dict]:
    final = []
    params: Dict[str, Any] = {"team_id": team_id}
    if table_name != "":
        table_name += "."

    for idx, prop in enumerate(filters):
        if prop.type == "cohort":
            cohort = Cohort.objects.get(pk=prop.value)
            person_id_query, cohort_filter_params = format_filter_query(cohort)
            params = {**params, **cohort_filter_params}
            final.append("AND {table_name}distinct_id IN ({clause})".format(
                table_name=table_name, clause=person_id_query))
        elif prop.type == "person":
            filter_query, filter_params = prop_filter_json_extract(
                prop, idx, "{}person".format(prepend))
            final.append(
                "AND {table_name}distinct_id IN ({filter_query})".format(
                    filter_query=GET_DISTINCT_IDS_BY_PROPERTY_SQL.format(
                        filters=filter_query),
                    table_name=table_name))
            params.update(filter_params)
        else:
            filter_query, filter_params = prop_filter_json_extract(
                prop, idx, prepend, prop_var="{}properties".format(table_name))
            final.append(
                "{filter_query} AND {table_name}team_id = %(team_id)s".format(
                    table_name=table_name, filter_query=filter_query))
            params.update(filter_params)
    return " ".join(final), params
예제 #2
0
def parse_prop_clauses(key: str, filters: List[Property], team: Team, prepend: str = "") -> Tuple[str, Dict]:
    final = ""
    params: Dict[str, Any] = {}
    for idx, prop in enumerate(filters):

        if prop.type == "cohort":
            cohort = Cohort.objects.get(pk=prop.value)
            person_id_query, cohort_filter_params = format_filter_query(cohort)
            params = {**params, **cohort_filter_params}
            final += "{cond} ({clause}) ".format(cond="AND distinct_id IN", clause=person_id_query)

        elif prop.type == "person":
            prepend = "person"
            filter = "(ep.key = %(k{prepend}_{idx})s) AND (ep.value {operator} %(v{prepend}_{idx})s)".format(
                idx=idx, operator=get_operator(prop.operator), prepend=prepend
            )
            clause = GET_DISTINCT_IDS_BY_PROPERTY_SQL.format(filters=filter)
            final += "{cond} ({clause}) ".format(cond="AND distinct_id IN", clause=clause)
            params.update(
                {"k{}_{}".format(prepend, idx): prop.key, "v{}_{}".format(prepend, idx): _pad_value(prop.value)}
            )

        else:
            filter = "(ep.key = %(k{prepend}_{idx})s) AND (ep.value {operator} %(v{prepend}_{idx})s)".format(
                idx=idx, operator=get_operator(prop.operator), prepend=prepend
            )
            clause = EVENT_PROP_CLAUSE.format(team_id=team.pk, filters=filter)
            final += "{cond} ({clause}) ".format(cond="AND {key} IN".format(key=key), clause=clause)
            params.update(
                {"k{}_{}".format(prepend, idx): prop.key, "v{}_{}".format(prepend, idx): _pad_value(prop.value)}
            )

    return final, params
예제 #3
0
def _process_content_sql(team: Team, entity: Entity, filter: Filter):

    filter = _handle_date_interval(filter)

    parsed_date_from, parsed_date_to, _ = parse_timestamps(filter=filter, team_id=team.pk)
    entity_sql, entity_params = format_entity_filter(entity=entity)
    person_filter = ""
    person_filter_params: Dict[str, Any] = {}

    if filter.breakdown_type == "cohort" and filter.breakdown_value != "all":
        cohort = Cohort.objects.get(pk=filter.breakdown_value)
        person_filter, person_filter_params = format_filter_query(cohort)
        person_filter = "AND distinct_id IN ({})".format(person_filter)
    elif (
        filter.breakdown_type == "person"
        and isinstance(filter.breakdown, str)
        and isinstance(filter.breakdown_value, str)
    ):
        person_prop = Property(**{"key": filter.breakdown, "value": filter.breakdown_value, "type": "person"})
        filter.properties.append(person_prop)

    prop_filters, prop_filter_params = parse_prop_clauses(filter.properties, team.pk)
    params: Dict = {"team_id": team.pk, **prop_filter_params, **entity_params, "offset": filter.offset}

    content_sql = PERSON_TREND_SQL.format(
        entity_filter=f"AND {entity_sql}",
        parsed_date_from=parsed_date_from,
        parsed_date_to=parsed_date_to,
        filters=prop_filters,
        breakdown_filter="",
        person_filter=person_filter,
    )
    return content_sql, {**params, **person_filter_params}
예제 #4
0
    def test_cohort_updated_props(self):
        # The way clickhouse works is that updates aren't instant, so two people with the same ID are in the database
        # Make sure we get the last one.
        person1 = _create_person(
            distinct_ids=["some_other_id_2"],
            team_id=self.team.pk,
            properties={"$some_prop": "updated"},
            timestamp=datetime(2020, 1, 1, 12, 0, 1),
        )
        _create_person(
            uuid=person1.uuid,
            distinct_ids=["some_other_id"],
            team_id=self.team.pk,
            properties={"$some_prop": "something"},
            timestamp=datetime(2020, 1, 1, 12, 0, 4),
        )

        cohort1 = Cohort.objects.create(
            team=self.team, groups=[{"properties": {"$some_prop": "updated"}}], name="cohort1",
        )

        final_query, params = format_filter_query(cohort1)

        result = sync_execute(final_query, {**params, "team_id": self.team.pk})
        self.assertEqual(len(result), 0)
예제 #5
0
def parse_prop_clauses(
    filters: List[Property],
    team_id: Optional[int],
    prepend: str = "global",
    table_name: str = "",
    allow_denormalized_props: bool = False,
    filter_test_accounts=False,
) -> Tuple[str, Dict]:
    final = []
    params: Dict[str, Any] = {}
    if team_id is not None:
        params["team_id"] = team_id
    if table_name != "":
        table_name += "."

    if filter_test_accounts:
        test_account_filters = Team.objects.only("test_account_filters").get(
            id=team_id).test_account_filters
        filters.extend([Property(**prop) for prop in test_account_filters])

    for idx, prop in enumerate(filters):
        if prop.type == "cohort":
            cohort = Cohort.objects.get(pk=prop.value, team_id=team_id)
            person_id_query, cohort_filter_params = format_filter_query(cohort)
            params = {**params, **cohort_filter_params}
            final.append("AND {table_name}distinct_id IN ({clause})".format(
                table_name=table_name, clause=person_id_query))
        elif prop.type == "person":
            filter_query, filter_params = prop_filter_json_extract(
                prop,
                idx,
                "{}person".format(prepend),
                allow_denormalized_props=allow_denormalized_props)
            final.append(
                "AND {table_name}distinct_id IN ({filter_query})".format(
                    filter_query=GET_DISTINCT_IDS_BY_PROPERTY_SQL.format(
                        filters=filter_query),
                    table_name=table_name))
            params.update(filter_params)
        elif prop.type == "element":
            query, filter_params = filter_element({prop.key: prop.value},
                                                  prepend="{}_".format(idx))
            final.append("AND {}".format(query[0]))
            params.update(filter_params)
        else:
            filter_query, filter_params = prop_filter_json_extract(
                prop,
                idx,
                prepend,
                prop_var="{}properties".format(table_name),
                allow_denormalized_props=allow_denormalized_props,
            )

            final.append(
                f"{filter_query} AND {table_name}team_id = %(team_id)s"
                if team_id else filter_query)
            params.update(filter_params)
    return " ".join(final), params
예제 #6
0
def _process_content_sql(team: Team, entity: Entity, filter: Filter):

    filter = _handle_date_interval(filter)

    parsed_date_from, parsed_date_to, _ = parse_timestamps(filter=filter,
                                                           team_id=team.pk)
    entity_sql, entity_params = format_entity_filter(entity=entity)
    person_filter = ""
    person_filter_params: Dict[str, Any] = {}

    if filter.breakdown_type == "cohort" and filter.breakdown_value != "all":
        cohort = Cohort.objects.get(pk=filter.breakdown_value)
        person_filter, person_filter_params = format_filter_query(cohort)
        person_filter = "AND distinct_id IN ({})".format(person_filter)
    elif filter.breakdown_type and isinstance(
            filter.breakdown, str) and isinstance(filter.breakdown_value, str):
        breakdown_prop = Property(
            **{
                "key": filter.breakdown,
                "value": filter.breakdown_value,
                "type": filter.breakdown_type
            })
        filter.properties.append(breakdown_prop)

    prop_filters, prop_filter_params = parse_prop_clauses(
        filter.properties,
        team.pk,
        filter_test_accounts=filter.filter_test_accounts)
    params: Dict = {
        "team_id": team.pk,
        **prop_filter_params,
        **entity_params, "offset": filter.offset
    }

    if entity.math in [WEEKLY_ACTIVE, MONTHLY_ACTIVE]:
        active_user_params = get_active_user_params(filter, entity, team.pk)
        content_sql = PERSONS_ACTIVE_USER_SQL.format(
            entity_query=f"AND {entity_sql}",
            parsed_date_from=parsed_date_from,
            parsed_date_to=parsed_date_to,
            filters=prop_filters,
            breakdown_filter="",
            person_filter=person_filter,
            GET_TEAM_PERSON_DISTINCT_IDS=GET_TEAM_PERSON_DISTINCT_IDS,
            **active_user_params,
        )
    else:
        content_sql = PERSON_TREND_SQL.format(
            entity_filter=f"AND {entity_sql}",
            parsed_date_from=parsed_date_from,
            parsed_date_to=parsed_date_to,
            filters=prop_filters,
            breakdown_filter="",
            person_filter=person_filter,
        )
    return content_sql, {**params, **person_filter_params}
예제 #7
0
 def _parse_breakdown_cohorts(self, cohorts: BaseManager) -> Tuple[List[str], Dict]:
     queries = []
     params: Dict[str, Any] = {}
     for cohort in cohorts:
         person_id_query, cohort_filter_params = format_filter_query(cohort)
         params = {**params, **cohort_filter_params}
         cohort_query = person_id_query.replace(
             "SELECT distinct_id", "SELECT distinct_id, {} as value".format(cohort.pk)
         )
         queries.append(cohort_query)
     return queries, params
예제 #8
0
def _parse_breakdown_cohorts(cohorts: BaseManager) -> Tuple[List[str], Dict]:
    queries = []
    params: Dict[str, Any] = {}
    for idx, cohort in enumerate(cohorts):
        person_id_query, cohort_filter_params = format_filter_query(
            cohort, idx)
        params = {**params, **cohort_filter_params}
        cohort_query = person_id_query.replace(
            "SELECT distinct_id", f"SELECT distinct_id, {cohort.pk} as value")
        queries.append(cohort_query)
    return queries, params
예제 #9
0
 def _parse_breakdown_cohorts(self,
                              cohorts: BaseManager) -> Tuple[str, Dict]:
     queries = []
     params: Dict[str, Any] = {}
     for cohort in cohorts:
         person_id_query, cohort_filter_params = format_filter_query(cohort)
         params = {**params, **cohort_filter_params}
         cohort_query = BREAKDOWN_COHORT_FILTER_SQL.format(
             clause=person_id_query, cohort_pk=cohort.pk)
         queries.append(cohort_query)
     return " UNION ALL ".join(queries), params
예제 #10
0
def _parse_breakdown_cohorts(cohorts: List[Cohort]) -> Tuple[List[str], Dict]:
    queries = []
    params: Dict[str, Any] = {}
    for idx, cohort in enumerate(cohorts):
        person_id_query, cohort_filter_params = format_filter_query(
            cohort, idx)
        params = {**params, **cohort_filter_params}
        cohort_query = person_id_query.replace(
            "SELECT distinct_id", f"SELECT distinct_id, {cohort.pk} as value",
            1)  # only replace the first top level occurrence
        queries.append(cohort_query)
    return queries, params
예제 #11
0
    def _calculate_entity_people(self, team: Team, entity: Entity,
                                 filter: Filter):
        parsed_date_from, parsed_date_to, _ = parse_timestamps(filter=filter,
                                                               team_id=team.pk)
        entity_sql, entity_params = format_entity_filter(entity=entity)
        person_filter = ""
        person_filter_params: Dict[str, Any] = {}

        if filter.breakdown_type == "cohort" and filter.breakdown_value != "all":
            cohort = Cohort.objects.get(pk=filter.breakdown_value)
            person_filter, person_filter_params = format_filter_query(cohort)
            person_filter = "AND distinct_id IN ({})".format(person_filter)
        elif (filter.breakdown_type == "person"
              and isinstance(filter.breakdown, str)
              and isinstance(filter.breakdown_value, str)):
            person_prop = Property(
                **{
                    "key": filter.breakdown,
                    "value": filter.breakdown_value,
                    "type": "person"
                })
            filter.properties.append(person_prop)

        prop_filters, prop_filter_params = parse_prop_clauses(
            filter.properties, team.pk)
        params: Dict = {
            "team_id": team.pk,
            **prop_filter_params,
            **entity_params, "offset": filter.offset
        }

        content_sql = PERSON_TREND_SQL.format(
            entity_filter=f"AND {entity_sql}",
            parsed_date_from=parsed_date_from,
            parsed_date_to=parsed_date_to,
            filters=prop_filters,
            breakdown_filter="",
            person_filter=person_filter,
        )

        people = sync_execute(
            PEOPLE_THROUGH_DISTINCT_SQL.format(
                content_sql=content_sql,
                latest_person_sql=GET_LATEST_PERSON_SQL.format(query="")),
            {
                **params,
                **person_filter_params
            },
        )
        serialized_people = ClickhousePersonSerializer(people, many=True).data

        return serialized_people
예제 #12
0
    def test_static_cohort_precalculated(self):
        Person.objects.create(team_id=self.team.pk, distinct_ids=["1"])
        Person.objects.create(team_id=self.team.pk, distinct_ids=["123"])
        Person.objects.create(team_id=self.team.pk, distinct_ids=["2"])
        # Team leakage
        team2 = Team.objects.create(organization=self.organization)
        Person.objects.create(team=team2, distinct_ids=["1"])

        cohort = Cohort.objects.create(
            team=self.team,
            groups=[],
            is_static=True,
            last_calculation=timezone.now(),
        )
        cohort.insert_users_by_list(["1", "123"])

        with freeze_time("2020-01-10"):
            cohort.calculate_people_ch()

        with self.settings(USE_PRECALCULATED_CH_COHORT_PEOPLE=True):
            sql, _ = format_filter_query(cohort)
            self.assertEqual(
                sqlparse.format(sql, reindent=True),
                sqlparse.format(
                    """
                SELECT distinct_id
                FROM
                (SELECT distinct_id,
                        argMax(person_id, _timestamp) as person_id
                FROM
                    (SELECT distinct_id,
                            person_id,
                            max(_timestamp) as _timestamp
                    FROM person_distinct_id
                    WHERE team_id = %(team_id)s
                    GROUP BY person_id,
                            distinct_id,
                            team_id
                    HAVING max(is_deleted) = 0)
                GROUP BY distinct_id)
                WHERE person_id IN
                    (SELECT person_id
                    FROM person_static_cohort
                    WHERE cohort_id = %(_cohort_id_0)s
                    AND team_id = %(team_id)s)
                """,
                    reindent=True,
                ),
            )
예제 #13
0
    def test_static_cohort_precalculated(self):
        Person.objects.create(team_id=self.team.pk, distinct_ids=["1"])
        Person.objects.create(team_id=self.team.pk, distinct_ids=["123"])
        Person.objects.create(team_id=self.team.pk, distinct_ids=["2"])
        # Team leakage
        team2 = Team.objects.create(organization=self.organization)
        Person.objects.create(team=team2, distinct_ids=["1"])

        cohort = Cohort.objects.create(team=self.team, groups=[], is_static=True, last_calculation=timezone.now(),)
        cohort.insert_users_by_list(["1", "123"])

        with freeze_time("2020-01-10"):
            cohort.calculate_people_ch(pending_version=0)

        with self.settings(USE_PRECALCULATED_CH_COHORT_PEOPLE=True):
            sql, _ = format_filter_query(cohort)
            self.assertQueryMatchesSnapshot(sql)
예제 #14
0
def parse_prop_clauses(
    key: str, filters: List[Property], team: Team, prepend: str = "", json_extract: bool = False
) -> Tuple[str, Dict]:
    final = ""
    params: Dict[str, Any] = {}
    for idx, prop in enumerate(filters):

        if prop.type == "cohort":
            cohort = Cohort.objects.get(pk=prop.value)
            person_id_query, cohort_filter_params = format_filter_query(cohort)
            params = {**params, **cohort_filter_params}
            final += "AND distinct_id IN ({clause}) ".format(clause=person_id_query)
        elif prop.type == "person":
            filter_query, filter_params = prop_filter_json_extract(prop, idx, "{}person".format(prepend))
            final += " AND distinct_id IN ({filter_query})".format(
                filter_query=GET_DISTINCT_IDS_BY_PROPERTY_SQL.format(filters=filter_query)
            )
            params.update(filter_params)
        else:
            filter_query, filter_params = prop_filter_json_extract(prop, idx, prepend)
            final += " {filter_query} AND team_id = %(team_id)s".format(filter_query=filter_query)
            params.update(filter_params)
    return final, params
예제 #15
0
def parse_prop_clauses(
    filters: List[Property],
    team_id: Optional[int],
    prepend: str = "global",
    table_name: str = "",
    allow_denormalized_props: bool = True,
    has_person_id_joined: bool = True,
    person_properties_mode: PersonPropertiesMode = PersonPropertiesMode.
    USING_SUBQUERY,
) -> Tuple[str, Dict]:
    final = []
    params: Dict[str, Any] = {}
    if team_id is not None:
        params["team_id"] = team_id
    if table_name != "":
        table_name += "."

    for idx, prop in enumerate(filters):
        if prop.type == "cohort":
            try:
                cohort = Cohort.objects.get(pk=prop.value, team_id=team_id)
            except Cohort.DoesNotExist:
                final.append(
                    "AND 0 = 13")  # If cohort doesn't exist, nothing can match
            else:
                person_id_query, cohort_filter_params = format_filter_query(
                    cohort, idx)
                params = {**params, **cohort_filter_params}
                final.append(
                    "AND {table_name}distinct_id IN ({clause})".format(
                        table_name=table_name, clause=person_id_query))
        elif prop.type == "person" and person_properties_mode != PersonPropertiesMode.EXCLUDE:
            # :TODO: Clean this up by using ClickhousePersonQuery over GET_DISTINCT_IDS_BY_PROPERTY_SQL to have access
            #   to materialized columns
            # :TODO: (performance) Avoid subqueries whenever possible, use joins instead
            is_direct_query = person_properties_mode == PersonPropertiesMode.USING_PERSON_PROPERTIES_COLUMN
            filter_query, filter_params = prop_filter_json_extract(
                prop,
                idx,
                "{}person".format(prepend),
                prop_var="person_props" if is_direct_query else "properties",
                allow_denormalized_props=allow_denormalized_props
                and is_direct_query,
            )
            if is_direct_query:
                final.append(filter_query)
                params.update(filter_params)
            else:
                final.append(
                    "AND {table_name}distinct_id IN ({filter_query})".format(
                        filter_query=GET_DISTINCT_IDS_BY_PROPERTY_SQL.format(
                            filters=filter_query),
                        table_name=table_name,
                    ))
                params.update(filter_params)
        elif prop.type == "element":
            query, filter_params = filter_element({prop.key: prop.value},
                                                  operator=prop.operator,
                                                  prepend="{}_".format(idx))
            if query:
                final.append(f" AND {query}")
                params.update(filter_params)
        elif prop.type == "event":
            filter_query, filter_params = prop_filter_json_extract(
                prop,
                idx,
                prepend,
                prop_var="{}properties".format(table_name),
                allow_denormalized_props=allow_denormalized_props,
            )

            final.append(
                f"{filter_query} AND {table_name}team_id = %(team_id)s"
                if team_id else filter_query)
            params.update(filter_params)
        elif prop.type == "group":
            # :TRICKY: This assumes group properties have already been joined, as in trends query
            filter_query, filter_params = prop_filter_json_extract(
                prop,
                idx,
                prepend,
                prop_var=f"group_properties_{prop.group_type_index}",
                allow_denormalized_props=False)

            final.append(filter_query)
            params.update(filter_params)
        elif prop.type in ("static-cohort", "precalculated-cohort"):
            cohort_id = cast(int, prop.value)

            method = format_static_cohort_query if prop.type == "static-cohort" else format_precalculated_cohort_query
            filter_query, filter_params = method(
                cohort_id,
                idx,
                prepend=prepend,
                custom_match_field="person_id")  # type: ignore
            if has_person_id_joined:
                final.append(f" AND {filter_query}")
            else:
                # :TODO: (performance) Avoid subqueries whenever possible, use joins instead
                subquery = GET_DISTINCT_IDS_BY_PERSON_ID_FILTER.format(
                    filters=filter_query)
                final.append(f"AND {table_name}distinct_id IN ({subquery})")
            params.update(filter_params)

    return " ".join(final), params
예제 #16
0
파일: property.py 프로젝트: akbansa/posthog
def parse_prop_clauses(
    team_id: int,
    filters: List[Property],
    prepend: str = "global",
    table_name: str = "",
    allow_denormalized_props: bool = True,
    has_person_id_joined: bool = True,
    person_properties_mode: PersonPropertiesMode = PersonPropertiesMode.
    USING_SUBQUERY,
    person_id_joined_alias: str = "person_id",
    group_properties_joined: bool = True,
    property_operator: PropertyOperatorType = PropertyOperatorType.AND,
) -> Tuple[str, Dict]:
    final = []
    params: Dict[str, Any] = {}
    if table_name != "":
        table_name += "."

    for idx, prop in enumerate(filters):
        if prop.type == "cohort":
            try:
                cohort = Cohort.objects.get(pk=prop.value)
            except Cohort.DoesNotExist:
                final.append(
                    f"{property_operator} 0 = 13"
                )  # If cohort doesn't exist, nothing can match, unless an OR operator is used
            else:

                if person_properties_mode == PersonPropertiesMode.USING_SUBQUERY:
                    person_id_query, cohort_filter_params = format_filter_query(
                        cohort, idx)
                    params = {**params, **cohort_filter_params}
                    final.append(
                        f"{property_operator} {table_name}distinct_id IN ({person_id_query})"
                    )
                else:
                    person_id_query, cohort_filter_params = format_cohort_subquery(
                        cohort,
                        idx,
                        custom_match_field=f"{person_id_joined_alias}")
                    params = {**params, **cohort_filter_params}
                    final.append(f"{property_operator} {person_id_query}")
        elif prop.type == "person" and person_properties_mode != PersonPropertiesMode.DIRECT:
            # :TODO: Clean this up by using ClickhousePersonQuery over GET_DISTINCT_IDS_BY_PROPERTY_SQL to have access
            #   to materialized columns
            # :TODO: (performance) Avoid subqueries whenever possible, use joins instead
            is_direct_query = person_properties_mode == PersonPropertiesMode.USING_PERSON_PROPERTIES_COLUMN
            filter_query, filter_params = prop_filter_json_extract(
                prop,
                idx,
                "{}person".format(prepend),
                prop_var="person_props" if is_direct_query else "properties",
                allow_denormalized_props=allow_denormalized_props
                and is_direct_query,
                property_operator=property_operator,
            )
            if is_direct_query:
                final.append(filter_query)
                params.update(filter_params)
            else:
                # Subquery filter here always should be blank as it's the first
                filter_query = filter_query.replace(property_operator, "", 1)
                final.append(
                    " {property_operator} {table_name}distinct_id IN ({filter_query})"
                    .format(
                        filter_query=GET_DISTINCT_IDS_BY_PROPERTY_SQL.format(
                            filters=filter_query,
                            GET_TEAM_PERSON_DISTINCT_IDS=
                            get_team_distinct_ids_query(team_id),
                        ),
                        table_name=table_name,
                        property_operator=property_operator,
                    ))
                params.update(filter_params)
        elif prop.type == "person" and person_properties_mode == PersonPropertiesMode.DIRECT:
            # this setting is used to generate the ClickhousePersonQuery SQL.
            # When using direct mode, there should only be person properties in the entire
            # property group
            filter_query, filter_params = prop_filter_json_extract(
                prop,
                idx,
                prepend=f"personquery_{prepend}",
                allow_denormalized_props=True,
                transform_expression=lambda column_name:
                f"argMax(person.{column_name}, _timestamp)",
                property_operator=property_operator,
            )
            final.append(filter_query)
            params.update(filter_params)
        elif prop.type == "element":
            query, filter_params = filter_element({prop.key: prop.value},
                                                  operator=prop.operator,
                                                  prepend="{}_".format(idx))
            if query:
                final.append(f"{property_operator} {query}")
                params.update(filter_params)
        elif prop.type == "event":
            filter_query, filter_params = prop_filter_json_extract(
                prop,
                idx,
                prepend,
                prop_var="{}properties".format(table_name),
                allow_denormalized_props=allow_denormalized_props,
                property_operator=property_operator,
            )
            final.append(f" {filter_query}")
            params.update(filter_params)
        elif prop.type == "group":
            if group_properties_joined:
                filter_query, filter_params = prop_filter_json_extract(
                    prop,
                    idx,
                    prepend,
                    prop_var=f"group_properties_{prop.group_type_index}",
                    allow_denormalized_props=False,
                    property_operator=property_operator,
                )
                final.append(filter_query)
                params.update(filter_params)
            else:
                # :TRICKY: offer groups support for queries which don't support automatically joining with groups table yet (e.g. lifecycle)
                filter_query, filter_params = prop_filter_json_extract(
                    prop,
                    idx,
                    prepend,
                    prop_var=f"group_properties",
                    allow_denormalized_props=False)
                group_type_index_var = f"{prepend}_group_type_index_{idx}"
                groups_subquery = GET_GROUP_IDS_BY_PROPERTY_SQL.format(
                    filters=filter_query,
                    group_type_index_var=group_type_index_var)
                final.append(
                    f"{property_operator} {table_name}$group_{prop.group_type_index} IN ({groups_subquery})"
                )
                params.update(filter_params)
                params[group_type_index_var] = prop.group_type_index
        elif prop.type in ("static-cohort", "precalculated-cohort"):
            cohort_id = cast(int, prop.value)

            method = format_static_cohort_query if prop.type == "static-cohort" else format_precalculated_cohort_query
            filter_query, filter_params = method(
                cohort_id,
                idx,
                prepend=prepend,
                custom_match_field=person_id_joined_alias)  # type: ignore
            if has_person_id_joined:
                final.append(f"{property_operator} {filter_query}")
            else:
                # :TODO: (performance) Avoid subqueries whenever possible, use joins instead
                subquery = GET_DISTINCT_IDS_BY_PERSON_ID_FILTER.format(
                    filters=filter_query,
                    GET_TEAM_PERSON_DISTINCT_IDS=get_team_distinct_ids_query(
                        team_id),
                )
                final.append(
                    f"{property_operator} {table_name}distinct_id IN ({subquery})"
                )
            params.update(filter_params)

    if final:
        # remove the first operator
        return " ".join(final).replace(property_operator, "", 1), params

    return "", params
예제 #17
0
def parse_prop_clauses(key: str,
                       filters: List[Property],
                       team: Team,
                       prepend: str = "") -> Tuple[str, Dict]:
    final = ""
    params: Dict[str, Any] = {}
    for idx, prop in enumerate(filters):

        if prop.type == "cohort":
            cohort = Cohort.objects.get(pk=prop.value)
            person_id_query, cohort_filter_params = format_filter_query(cohort)
            params = {**params, **cohort_filter_params}
            final += "AND distinct_id IN ({clause}) ".format(
                clause=person_id_query)

        elif prop.type == "person":

            prepend = "person"

            arg = "v{}_{}".format(prepend, idx)
            operator_clause, value = get_operator(prop, arg)

            filter = "(ep.key = %(k{prepend}_{idx})s) {and_statement} {operator_clause}".format(
                idx=idx,
                and_statement="AND" if operator_clause else "",
                operator_clause=operator_clause,
                prepend=prepend,
            )
            clause = GET_DISTINCT_IDS_BY_PROPERTY_SQL.format(
                filters=filter,
                negation="NOT "
                if prop.operator and "not" in prop.operator else "")
            final += "AND distinct_id IN ({clause}) ".format(clause=clause)
            params.update({
                "k{}_{}".format(prepend, idx): prop.key,
                arg: value
            })
        else:

            arg = "v{}_{}".format(prepend, idx)
            operator_clause, value = get_operator(prop, arg)

            filter = "(ep.key = %(k{prepend}_{idx})s) {and_statement} {operator_clause}".format(
                idx=idx,
                and_statement="AND" if operator_clause else "",
                operator_clause=operator_clause,
                prepend=prepend,
            )
            clause = EVENT_PROP_CLAUSE.format(team_id=team.pk, filters=filter)
            final += "{cond} ({clause}) AND team_id = %(team_id)s ".format(
                cond="AND {key} {negation}IN".format(
                    key=key,
                    negation="NOT "
                    if prop.operator and "not" in prop.operator else "",
                ),
                clause=clause,
            )
            params.update({
                "k{}_{}".format(prepend, idx): prop.key,
                arg: value
            })

    return final, params
예제 #18
0
    def _ch_filter_request(self, request: Request, team: Team) -> List:
        result = []

        all_filters = ""
        params = {"offset": 0, "team_id": team.pk}
        category = request.query_params.get("category")
        if category == "identified":
            all_filters += "AND is_identified = 1 "
        elif category == "anonymous":
            all_filters += "AND is_identified = 0 "

        if request.GET.get("search"):
            parts = request.GET["search"].split(" ")
            contains = []
            for idx, part in enumerate(parts):
                if ":" in part:
                    key_query_filter = """
                    AND person_id IN (
                        SELECT id FROM persons_properties_up_to_date_view WHERE key = %(person_{idx})s
                    ) 
                    """.format(idx=idx)
                    all_filters += key_query_filter
                    params = {
                        **params, "person_{idx}".format(idx=idx):
                        part.split(":")[1]
                    }
                else:
                    contains.append(part)
            for idx, search in enumerate(contains):
                search_query_filter = """
                AND person_id IN (
                    SELECT id FROM person WHERE properties LIKE %({arg})s AND team_id = %(team_id)s
                ) OR person_id IN (
                    SELECT person_id FROM person_distinct_id WHERE distinct_id LIKE %({arg})s AND team_id = %(team_id)s
                )
                """.format(arg="search_{idx}".format(idx=idx))
                all_filters += search_query_filter
                params = {
                    **params, "search_{idx}".format(idx=idx):
                    "%{}%".format(search)
                }

        if request.GET.get("cohort"):
            cohort_id = request.GET["cohort"]
            cohort = Cohort.objects.get(pk=cohort_id)
            cohort_query, cohort_params = format_filter_query(cohort)
            cohort_query_filter = """
            AND person_id IN ( 
                SELECT person_id FROM person_distinct_id WHERE distinct_id IN (
                    {clause}
                )
            ) """.format(clause=cohort_query)
            all_filters += cohort_query_filter
            params = {**params, **cohort_params}

        # if request.GET.get("properties"):
        #     pass

        if request.GET.get("id"):
            people = request.GET["id"].split(",")
            result = sync_execute(PEOPLE_SQL.format(content_sql=people),
                                  {"offset": 0})
        else:
            result = sync_execute(
                PEOPLE_BY_TEAM_SQL.format(filters=all_filters),
                params,
            )

        return result