Example #1
0
 def _person_join_condition(self) -> Tuple[str, Dict]:
     person_query = ClickhousePersonQuery(self.filter,
                                          self.team_id,
                                          self.column_optimizer,
                                          entity=self.entity)
     if person_query.is_used:
         query, params = person_query.get_query()
         return (
             f"""
         {EVENT_JOIN_PERSON_SQL}
         INNER JOIN ({query}) person
         ON person.id = pdi.person_id
         """,
             params,
         )
     elif self.entity.math == "dau":
         # Only join distinct_ids
         return EVENT_JOIN_PERSON_SQL, {}
     else:
         return "", {}
Example #2
0
    def _run_query(self, filter: Filter, join_person_tables=False) -> List:
        query, params = parse_prop_clauses(
            filter.properties,
            self.team.pk,
            allow_denormalized_props=True,
            person_properties_mode=PersonPropertiesMode.EXCLUDE,
        )
        joins = ""
        if join_person_tables:
            person_query = ClickhousePersonQuery(filter, self.team.pk)
            person_subquery, person_join_params = person_query.get_query()
            joins = f"""
                INNER JOIN ({GET_TEAM_PERSON_DISTINCT_IDS}) AS pdi ON events.distinct_id = pdi.distinct_id
                INNER JOIN ({person_subquery}) person ON pdi.person_id = person.id
            """
            params.update(person_join_params)

        final_query = f"SELECT uuid FROM events {joins} WHERE team_id = %(team_id)s {query}"
        # Make sure we don't accidentally use json on the properties field
        self.assertNotIn("json", final_query.lower())
        return sync_execute(final_query, {**params, "team_id": self.team.pk})
Example #3
0
 def _person_join_condition(self) -> Tuple[str, Dict]:
     person_query = ClickhousePersonQuery(self.filter,
                                          self.team_id,
                                          self.column_optimizer,
                                          entity=self.entity)
     event_join = EVENT_JOIN_PERSON_SQL.format(
         GET_TEAM_PERSON_DISTINCT_IDS=get_team_distinct_ids_query(
             self.team_id))
     if person_query.is_used:
         query, params = person_query.get_query()
         return (
             f"""
         {event_join}
         INNER JOIN ({query}) person
         ON person.id = pdi.person_id
         """,
             params,
         )
     elif self.entity.math == "dau":
         # Only join distinct_ids
         return event_join, {}
     else:
         return "", {}
Example #4
0
def get_breakdown_prop_values(
    filter: Filter,
    entity: Entity,
    aggregate_operation: str,
    team_id: int,
    limit: int = BREAKDOWN_VALUES_LIMIT,
    extra_params={},
    column_optimizer: Optional[ColumnOptimizer] = None,
):
    """
    Returns the top N breakdown prop values for event/person breakdown

    e.g. for Browser with limit 3 might return ['Chrome', 'Safari', 'Firefox', 'Other']
    """
    column_optimizer = column_optimizer or ColumnOptimizer(filter, team_id)
    parsed_date_from, parsed_date_to, date_params = parse_timestamps(
        filter=filter, team_id=team_id)

    props_to_filter = filter.property_groups.combine_property_group(
        PropertyOperatorType.AND, entity.property_groups)
    outer_properties = column_optimizer.property_optimizer.parse_property_groups(
        props_to_filter).outer

    prop_filters, prop_filter_params = parse_prop_grouped_clauses(
        team_id=team_id,
        property_group=outer_properties,
        table_name="e",
        prepend="e_brkdwn",
        person_properties_mode=PersonPropertiesMode.
        USING_PERSON_PROPERTIES_COLUMN,
        allow_denormalized_props=True,
    )

    entity_params, entity_format_params = get_entity_filtering_params(
        entity=entity, team_id=team_id, table_name="e")

    value_expression = _to_value_expression(filter.breakdown_type,
                                            filter.breakdown,
                                            filter.breakdown_group_type_index)

    person_join_clauses = ""
    person_join_params: Dict = {}
    person_query = ClickhousePersonQuery(filter,
                                         team_id,
                                         column_optimizer=column_optimizer,
                                         entity=entity)
    if person_query.is_used:
        person_subquery, person_join_params = person_query.get_query()
        person_join_clauses = f"""
            INNER JOIN ({get_team_distinct_ids_query(team_id)}) AS pdi ON e.distinct_id = pdi.distinct_id
            INNER JOIN ({person_subquery}) person ON pdi.person_id = person.id
        """

    groups_join_condition, groups_join_params = GroupsJoinQuery(
        filter, team_id, column_optimizer).get_join_query()

    elements_query = TOP_ELEMENTS_ARRAY_OF_KEY_SQL.format(
        value_expression=value_expression,
        parsed_date_from=parsed_date_from,
        parsed_date_to=parsed_date_to,
        prop_filters=prop_filters,
        aggregate_operation=aggregate_operation,
        person_join_clauses=person_join_clauses,
        groups_join_clauses=groups_join_condition,
        **entity_format_params,
    )

    return sync_execute(
        elements_query,
        {
            "key": filter.breakdown,
            "limit": limit,
            "team_id": team_id,
            "offset": filter.offset,
            **prop_filter_params,
            **entity_params,
            **person_join_params,
            **groups_join_params,
            **extra_params,
            **date_params,
        },
    )[0][0]
Example #5
0
class ClickhouseEventQuery(metaclass=ABCMeta):
    DISTINCT_ID_TABLE_ALIAS = "pdi"
    PERSON_TABLE_ALIAS = "person"
    EVENT_TABLE_ALIAS = "e"

    _filter: Union[Filter, PathFilter, RetentionFilter]
    _team_id: int
    _column_optimizer: ColumnOptimizer
    _person_query: ClickhousePersonQuery
    _should_join_distinct_ids = False
    _should_join_persons = False
    _should_round_interval = False
    _extra_fields: List[ColumnName]
    _extra_person_fields: List[ColumnName]

    def __init__(
        self,
        filter: Union[Filter, PathFilter, RetentionFilter],
        team_id: int,
        round_interval=False,
        should_join_distinct_ids=False,
        should_join_persons=False,
        # Extra events/person table columns to fetch since parent query needs them
        extra_fields: List[ColumnName] = [],
        extra_person_fields: List[ColumnName] = [],
        **kwargs,
    ) -> None:
        self._filter = filter
        self._team_id = team_id
        self._column_optimizer = ColumnOptimizer(self._filter, self._team_id)
        self._person_query = ClickhousePersonQuery(
            self._filter,
            self._team_id,
            self._column_optimizer,
            extra_fields=extra_person_fields)
        self.params: Dict[str, Any] = {
            "team_id": self._team_id,
        }

        self._should_join_distinct_ids = should_join_distinct_ids
        self._should_join_persons = should_join_persons
        self._extra_fields = extra_fields
        self._extra_person_fields = extra_person_fields

        if not self._should_join_distinct_ids:
            self._determine_should_join_distinct_ids()

        if not self._should_join_persons:
            self._determine_should_join_persons()

        self._should_round_interval = round_interval

    @abstractmethod
    def get_query(self) -> Tuple[str, Dict[str, Any]]:
        pass

    @abstractmethod
    def _determine_should_join_distinct_ids(self) -> None:
        pass

    def _get_disintct_id_query(self) -> str:
        if self._should_join_distinct_ids:
            return f"""
            INNER JOIN ({GET_TEAM_PERSON_DISTINCT_IDS}) AS {self.DISTINCT_ID_TABLE_ALIAS}
            ON events.distinct_id = {self.DISTINCT_ID_TABLE_ALIAS}.distinct_id
            """
        else:
            return ""

    def _determine_should_join_persons(self) -> None:
        if self._person_query.is_used:
            self._should_join_distinct_ids = True
            self._should_join_persons = True
            return

        # :KLUDGE: The following is mostly making sure if cohorts are included as well.
        #   Can be simplified significantly after https://github.com/PostHog/posthog/issues/5854
        if any(
                self._should_property_join_persons(prop)
                for prop in self._filter.properties):
            self._should_join_distinct_ids = True
            self._should_join_persons = True
            return

        if any(
                self._should_property_join_persons(prop)
                for entity in self._filter.entities
                for prop in entity.properties):
            self._should_join_distinct_ids = True
            self._should_join_persons = True
            return

        if self._filter.breakdown_type == "person":
            self._should_join_distinct_ids = True
            self._should_join_persons = True
            return

    def _should_property_join_persons(self, prop: Property) -> bool:
        return prop.type == "cohort" and self._does_cohort_need_persons(prop)

    def _does_cohort_need_persons(self, prop: Property) -> bool:
        try:
            cohort: Cohort = Cohort.objects.get(pk=prop.value,
                                                team_id=self._team_id)
        except Cohort.DoesNotExist:
            return False
        if is_precalculated_query(cohort):
            return True
        if cohort.is_static:
            return True
        for group in cohort.groups:
            if group.get("properties"):
                return True
        return False

    def _get_person_query(self) -> Tuple[str, Dict]:
        if self._should_join_persons:
            person_query, params = self._person_query.get_query()
            return (
                f"""
            INNER JOIN ({person_query}) {self.PERSON_TABLE_ALIAS}
            ON {self.PERSON_TABLE_ALIAS}.id = {self.DISTINCT_ID_TABLE_ALIAS}.person_id
            """,
                params,
            )
        else:
            return "", {}

    def _get_groups_query(self) -> Tuple[str, Dict]:
        return GroupsJoinQuery(self._filter, self._team_id,
                               self._column_optimizer).get_join_query()

    def _get_date_filter(self) -> Tuple[str, Dict]:

        parsed_date_from, parsed_date_to, date_params = parse_timestamps(
            filter=self._filter, team_id=self._team_id)

        query = f"""
        {parsed_date_from}
        {parsed_date_to}
        """

        return query, date_params

    def _get_props(self, filters: List[Property]) -> Tuple[str, Dict]:
        final = []
        params: Dict[str, Any] = {}

        for idx, prop in enumerate(filters):
            if prop.type == "cohort":
                person_id_query, cohort_filter_params = self._get_cohort_subquery(
                    prop)
                params = {**params, **cohort_filter_params}
                final.append(f"AND {person_id_query}")
            else:
                filter_query, filter_params = parse_prop_clauses(
                    [prop],
                    self._team_id,
                    prepend=f"global_{idx}",
                    allow_denormalized_props=True,
                    person_properties_mode=PersonPropertiesMode.EXCLUDE,
                )
                final.append(filter_query)
                params.update(filter_params)
        return " ".join(final), params

    def _get_cohort_subquery(self, prop) -> Tuple[str, Dict[str, Any]]:
        try:
            cohort: Cohort = Cohort.objects.get(pk=prop.value,
                                                team_id=self._team_id)
        except Cohort.DoesNotExist:
            return "0 = 11", {}  # If cohort doesn't exist, nothing can match

        is_precalculated = is_precalculated_query(cohort)

        person_id_query, cohort_filter_params = (
            format_precalculated_cohort_query(
                cohort.pk,
                0,
                custom_match_field=f"{self.DISTINCT_ID_TABLE_ALIAS}.person_id")
            if is_precalculated else format_person_query(
                cohort,
                0,
                custom_match_field=f"{self.DISTINCT_ID_TABLE_ALIAS}.person_id")
        )

        return person_id_query, cohort_filter_params
Example #6
0
def get_breakdown_prop_values(
    filter: Filter,
    entity: Entity,
    aggregate_operation: str,
    team_id: int,
    limit: int = 25,
    extra_params={},
    column_optimizer: Optional[ColumnOptimizer] = None,
):
    "Returns the top N breakdown prop values for event/person breakdown"

    parsed_date_from, parsed_date_to, date_params = parse_timestamps(
        filter=filter, team_id=team_id)
    prop_filters, prop_filter_params = parse_prop_clauses(
        filter.properties + entity.properties,
        team_id,
        table_name="e",
        prepend="e_brkdwn",
        person_properties_mode=PersonPropertiesMode.EXCLUDE,
        allow_denormalized_props=True,
    )

    entity_params, entity_format_params = get_entity_filtering_params(
        entity, team_id, table_name="e")

    if filter.breakdown_type == "person":
        value_expression, _ = get_property_string_expr(
            "person", cast(str, filter.breakdown), "%(key)s", "person_props")
    elif filter.breakdown_type == "group":
        value_expression, _ = get_property_string_expr(
            "groups", cast(str, filter.breakdown), "%(key)s",
            f"group_properties_{filter.breakdown_group_type_index}")
    else:
        value_expression, _ = get_property_string_expr(
            "events", cast(str, filter.breakdown), "%(key)s", "properties")

    person_join_clauses = ""
    person_join_params: Dict = {}
    person_query = ClickhousePersonQuery(filter,
                                         team_id,
                                         column_optimizer=column_optimizer,
                                         entity=entity)
    if person_query.is_used:
        person_subquery, person_join_params = person_query.get_query()
        person_join_clauses = f"""
            INNER JOIN ({GET_TEAM_PERSON_DISTINCT_IDS}) AS pdi ON e.distinct_id = pdi.distinct_id
            INNER JOIN ({person_subquery}) person ON pdi.person_id = person.id
        """

    groups_join_condition, groups_join_params = GroupsJoinQuery(
        filter, team_id, column_optimizer).get_join_query()

    elements_query = TOP_ELEMENTS_ARRAY_OF_KEY_SQL.format(
        value_expression=value_expression,
        parsed_date_from=parsed_date_from,
        parsed_date_to=parsed_date_to,
        prop_filters=prop_filters,
        aggregate_operation=aggregate_operation,
        person_join_clauses=person_join_clauses,
        groups_join_clauses=groups_join_condition,
        **entity_format_params,
    )

    return sync_execute(
        elements_query,
        {
            "key": filter.breakdown,
            "limit": limit,
            "team_id": team_id,
            "offset": filter.offset,
            **prop_filter_params,
            **entity_params,
            **person_join_params,
            **groups_join_params,
            **extra_params,
            **date_params,
        },
    )[0][0]