예제 #1
0
    def _get_props(self,
                   filters: List[Property],
                   allow_denormalized_props: bool = False) -> Tuple[str, Dict]:

        filter_test_accounts = self._filter.filter_test_accounts
        team_id = self._team_id
        table_name = f"{self.EVENT_TABLE_ALIAS}."
        prepend = "global"

        final = []
        params: Dict[str, Any] = {}

        if filter_test_accounts:
            test_account_filters = Team.objects.only(
                "test_account_filters").get(id=team_id).test_account_filters
            filters.extend([Property(**prop) for prop in test_account_filters])

        for idx, prop in enumerate(filters):
            if prop.type == "cohort":
                person_id_query, cohort_filter_params = self._get_cohort_subquery(
                    prop)
                params = {**params, **cohort_filter_params}
                final.append(f"AND {person_id_query}")

            elif prop.type == "person":
                filter_query, filter_params = prop_filter_json_extract(
                    prop,
                    idx,
                    "{}person".format(prepend),
                    allow_denormalized_props=allow_denormalized_props,
                    prop_var=self._PERSON_PROPERTIES_ALIAS,
                )
                final.append(filter_query)
                params.update(filter_params)
            elif prop.type == "element":
                query, filter_params = filter_element(
                    {prop.key: prop.value}, prepend="{}_".format(idx))
                final.append("AND {}".format(query[0]))
                params.update(filter_params)
            else:
                filter_query, filter_params = prop_filter_json_extract(
                    prop,
                    idx,
                    prepend,
                    prop_var="properties",
                    allow_denormalized_props=allow_denormalized_props,
                )

                final.append(filter_query)
                params.update(filter_params)
        return " ".join(final), params
예제 #2
0
파일: cohort.py 프로젝트: zhang1998/posthog
def format_person_query(cohort: Cohort) -> Tuple[str, Dict[str, Any]]:
    filters = []
    params: Dict[str, Any] = {}
    for group_idx, group in enumerate(cohort.groups):
        if group.get("action_id"):
            action = Action.objects.get(pk=group["action_id"],
                                        team_id=cohort.team.pk)
            action_filter_query, action_params = format_action_filter(action)
            extract_person = "SELECT distinct_id FROM events WHERE uuid IN ({query})".format(
                query=action_filter_query)
            params = {**params, **action_params}
            filters.append("(" + extract_person + ")")

        elif group.get("properties"):
            from ee.clickhouse.models.property import prop_filter_json_extract

            filter = Filter(data=group)
            query = ""
            for idx, prop in enumerate(filter.properties):
                filter_query, filter_params = prop_filter_json_extract(
                    prop=prop,
                    idx=idx,
                    prepend="{}_{}_{}_person".format(cohort.pk, group_idx,
                                                     idx))
                params = {**params, **filter_params}
                query += " {}".format(filter_query)
            filters.append(GET_LATEST_PERSON_ID_SQL.format(query=query))

    joined_filter = " OR person_id IN ".join(filters)
    return joined_filter, params
예제 #3
0
    def _get_group_filters(self):
        if self.is_aggregating_by_groups:
            conditions, params = [""], {}

            properties = self._filter.correlation_property_values

            if properties:
                for index, property in enumerate(properties):
                    if property.type != "group":
                        continue

                    expr, prop_params = prop_filter_json_extract(
                        property,
                        index,
                        prepend=f"group_type_{property.group_type_index}",
                        prop_var=
                        f"group_properties_{property.group_type_index}",
                        allow_denormalized_props=True,
                    )

                    conditions.append(expr)
                    params.update(prop_params)

            return " ".join(conditions), params
        else:
            return "", {}
예제 #4
0
def format_person_query(cohort: Cohort) -> Tuple[str, Dict[str, Any]]:
    filters = []
    params: Dict[str, Any] = {}

    if cohort.is_static:
        return (
            "person_id IN (SELECT person_id FROM {} WHERE cohort_id = %(cohort_id)s AND team_id = %(team_id)s)"
            .format(PERSON_STATIC_COHORT_TABLE),
            {
                "cohort_id": cohort.pk,
                "team_id": cohort.team_id
            },
        )

    or_queries = []
    for group_idx, group in enumerate(cohort.groups):
        if group.get("action_id"):
            action = Action.objects.get(pk=group["action_id"],
                                        team_id=cohort.team.pk)
            action_filter_query, action_params = format_action_filter(
                action, prepend="_{}_action".format(group_idx))

            date_query: str = ""
            date_params: Dict[str, str] = {}
            if group.get("days"):
                date_query, date_params = parse_action_timestamps(
                    int(group.get("days")))

            extract_person = "SELECT distinct_id FROM events WHERE team_id = %(team_id)s {date_query} AND {query}".format(
                query=action_filter_query, date_query=date_query)
            params = {**params, **action_params, **date_params}
            filters.append("distinct_id IN (" + extract_person + ")")

        elif group.get("properties"):
            from ee.clickhouse.models.property import prop_filter_json_extract

            filter = Filter(data=group)
            query = ""
            for idx, prop in enumerate(filter.properties):
                filter_query, filter_params = prop_filter_json_extract(
                    prop=prop,
                    idx=idx,
                    prepend="{}_{}_{}_person".format(cohort.pk, group_idx,
                                                     idx))
                params = {**params, **filter_params}
                query += filter_query
            or_queries.append(query.replace("AND ", "", 1))
    if len(or_queries) > 0:
        query = "AND ({})".format(" OR ".join(or_queries))
        filters.append("person_id IN {}".format(
            GET_LATEST_PERSON_ID_SQL.format(query=query)))

    joined_filter = " OR ".join(filters)
    return joined_filter, params
예제 #5
0
def test_prop_filter_json_extract(test_events, property,
                                  expected_event_indexes):
    query, params = prop_filter_json_extract(property, 0)
    uuids = list(
        sorted([
            uuid for (uuid, ) in sync_execute(
                f"SELECT uuid FROM events WHERE 1 = 1 {query}", params)
        ]))
    expected = list(
        sorted([test_events[index] for index in expected_event_indexes]))

    assert uuids == expected
예제 #6
0
파일: cohort.py 프로젝트: EDsCODE/posthog
def get_properties_cohort_subquery(cohort: Cohort, cohort_group: Dict,
                                   group_idx: int):
    from ee.clickhouse.models.property import prop_filter_json_extract

    filter = Filter(data=cohort_group)
    params: Dict[str, Any] = {}

    query = ""
    for idx, prop in enumerate(filter.properties):
        filter_query, filter_params = prop_filter_json_extract(
            prop=prop,
            idx=idx,
            prepend="{}_{}_{}_person".format(cohort.pk, group_idx, idx))
        params = {**params, **filter_params}
        query += filter_query

    return query.replace("AND ", "", 1), params
예제 #7
0
def test_prop_filter_json_extract(test_events, property,
                                  expected_event_indexes, team):
    query, params = prop_filter_json_extract(property,
                                             0,
                                             allow_denormalized_props=False)
    uuids = list(
        sorted([
            uuid for (uuid, ) in sync_execute(
                f"SELECT uuid FROM events WHERE team_id = %(team_id)s {query}",
                {
                    "team_id": team.pk,
                    **params
                })
        ]))
    expected = list(
        sorted([test_events[index] for index in expected_event_indexes]))

    assert uuids == expected
예제 #8
0
    def _get_person_filters(self) -> Tuple[str, Dict]:
        conditions, params = [""], {}

        properties = self._filter.properties + (self._entity.properties
                                                if self._entity else [])

        for index, property in enumerate(properties):
            if property.type != "person":
                continue

            expr, prop_params = prop_filter_json_extract(
                property,
                index,
                prepend="personquery",
                allow_denormalized_props=True,
                transform_expression=lambda column_name:
                f"argMax(person.{column_name}, _timestamp)",
            )

            conditions.append(expr)
            params.update(prop_params)

        return " ".join(conditions), params
예제 #9
0
def get_properties_cohort_subquery(
        cohort: Cohort, cohort_group: Dict,
        group_idx: int) -> Tuple[str, Dict[str, Any]]:
    from ee.clickhouse.models.property import prop_filter_json_extract

    filter = Filter(data=cohort_group)
    params: Dict[str, Any] = {}

    query_parts = []
    # Cohorts don't yet support OR filters
    for idx, prop in enumerate(filter.property_groups.flat):
        if prop.type == "cohort":
            try:
                prop_cohort: Cohort = Cohort.objects.get(
                    pk=prop.value, team_id=cohort.team_id)
            except Cohort.DoesNotExist:
                return "0 = 14", {}
            if prop_cohort.pk == cohort.pk:
                # If we've encountered a cyclic dependency (meaning this cohort depends on this cohort),
                # we treat it as satisfied for all persons
                query_parts.append("AND 11 = 11")
            else:
                person_id_query, cohort_filter_params = format_filter_query(
                    prop_cohort, idx, "person_id")
                params.update(cohort_filter_params)
                query_parts.append(f"AND person.id IN ({person_id_query})")
        else:
            filter_query, filter_params = prop_filter_json_extract(
                prop=prop,
                idx=idx,
                prepend="{}_{}_{}_person".format(cohort.pk, group_idx, idx),
                allow_denormalized_props=False,
            )
            params.update(filter_params)
            query_parts.append(filter_query)

    return "\n".join(query_parts).replace("AND ", "", 1), params
예제 #10
0
def test_prop_filter_json_extract_materialized(test_events, property,
                                               expected_event_indexes, team):
    materialize("events", "attr")
    materialize("events", "email")

    query, params = prop_filter_json_extract(property,
                                             0,
                                             allow_denormalized_props=True)

    assert "JSONExtract" not in query

    uuids = list(
        sorted([
            uuid for (uuid, ) in sync_execute(
                f"SELECT uuid FROM events WHERE team_id = %(team_id)s {query}",
                {
                    "team_id": team.pk,
                    **params
                })
        ]))
    expected = list(
        sorted([test_events[index] for index in expected_event_indexes]))

    assert uuids == expected