def _get_actor_subquery(self) -> Tuple[str, Dict[str, Any]]: if self.is_aggregating_by_groups: actor_join_subquery, actor_join_subquery_params = GroupsJoinQuery( self._filter, self._team.pk, join_key="funnel_actors.actor_id").get_join_query() else: person_query, actor_join_subquery_params = ClickhousePersonQuery( self._filter, self._team.pk, entity=Entity({ "id": "person", "type": "events", "properties": self._filter.correlation_property_values }), ).get_query() actor_join_subquery = f""" JOIN ({person_query}) person ON person.id = funnel_actors.actor_id """ return actor_join_subquery, actor_join_subquery_params
def test_groups_join_query_filtering(snapshot): filter = Filter( data={ "properties": [{ "key": "industry", "value": "finance", "type": "group", "group_type_index": 0 }] }) assert GroupsJoinQuery(filter, 2).get_join_query() == snapshot
def _get_aggregation_join_query(self): if self._filter.aggregation_group_type_index is None: person_query, person_query_params = ClickhousePersonQuery( self._filter, self._team.pk, ColumnOptimizer(self._filter, self._team.pk) ).get_query() return ( f""" JOIN ({person_query}) person ON person.id = funnel_actors.actor_id """, person_query_params, ) else: return GroupsJoinQuery(self._filter, self._team.pk, join_key="funnel_actors.actor_id").get_join_query()
def test_groups_join_query_filtering_with_custom_key_names(snapshot): filter = Filter( data={ "properties": [ { "key": "industry", "value": "finance", "type": "group", "group_type_index": 0 }, { "key": "company", "value": "crashed", "type": "group", "group_type_index": 2 }, ] }) assert GroupsJoinQuery( filter, 2, join_key="call_me_industry").get_join_query() == snapshot
def _get_aggregation_join_query(self): if self._team.actor_on_events_querying_enabled: return "", {} if self._filter.aggregation_group_type_index is None: person_query, person_query_params = PersonQuery( self._filter, self._team.pk, EnterpriseColumnOptimizer(self._filter, self._team.pk)).get_query() return ( f""" JOIN ({person_query}) person ON person.id = funnel_actors.actor_id """, person_query_params, ) else: return GroupsJoinQuery( self._filter, self._team.pk, join_key="funnel_actors.actor_id").get_join_query()
def get_query(self) -> Tuple[str, Dict, Callable]: interval_annotation = get_trunc_func_ch(self.filter.interval) num_intervals, seconds_in_interval, round_interval = get_time_diff( self.filter.interval, self.filter.date_from, self.filter.date_to, self.team_id) _, parsed_date_to, date_params = parse_timestamps(filter=self.filter, team_id=self.team_id) props_to_filter = self.filter.property_groups.combine_property_group( PropertyOperatorType.AND, self.entity.property_groups) outer_properties = self.column_optimizer.property_optimizer.parse_property_groups( props_to_filter).outer prop_filters, prop_filter_params = parse_prop_grouped_clauses( team_id=self.team_id, property_group=outer_properties, table_name="e", person_properties_mode=PersonPropertiesMode. USING_PERSON_PROPERTIES_COLUMN, ) aggregate_operation, _, math_params = process_math(self.entity) action_query = "" action_params: Dict = {} if self.entity.type == TREND_FILTER_TYPE_ACTIONS: action = self.entity.get_action() action_query, action_params = format_action_filter( team_id=self.team_id, action=action, table_name="e") self.params = { **self.params, **math_params, **prop_filter_params, **action_params, "event": self.entity.id, "key": self.filter.breakdown, **date_params, } breakdown_filter_params = { "parsed_date_from": date_from_clause(interval_annotation, round_interval), "parsed_date_to": parsed_date_to, "actions_query": "AND {}".format(action_query) if action_query else "", "event_filter": "AND event = %(event)s" if not action_query else "", "filters": prop_filters if props_to_filter.values else "", } _params, _breakdown_filter_params = {}, {} if self.filter.breakdown_type == "cohort": _params, breakdown_filter, _breakdown_filter_params, breakdown_value = self._breakdown_cohort_params( ) else: _params, breakdown_filter, _breakdown_filter_params, breakdown_value = self._breakdown_prop_params( "count(*)" if self.entity.math == "dau" else aggregate_operation, math_params, ) if len(_params["values"]) == 0: # If there are no breakdown values, we are sure that there's no relevant events, so instead of adjusting # a "real" SELECT for this, we only include the below dummy SELECT. # It's a drop-in replacement for a "real" one, simply always returning 0 rows. # See https://github.com/PostHog/posthog/pull/5674 for context. return ( "SELECT [now()] AS date, [0] AS data, '' AS breakdown_value LIMIT 0", {}, lambda _: [], ) person_join_condition, person_join_params = self._person_join_condition( ) groups_join_condition, groups_join_params = GroupsJoinQuery( self.filter, self.team_id, self.column_optimizer).get_join_query() self.params = { **self.params, **_params, **person_join_params, **groups_join_params } breakdown_filter_params = { **breakdown_filter_params, **_breakdown_filter_params } if self.filter.display in TRENDS_DISPLAY_BY_VALUE: breakdown_filter = breakdown_filter.format( **breakdown_filter_params) content_sql = BREAKDOWN_AGGREGATE_QUERY_SQL.format( breakdown_filter=breakdown_filter, person_join=person_join_condition, groups_join=groups_join_condition, aggregate_operation=aggregate_operation, breakdown_value=breakdown_value, ) time_range = enumerate_time_range(self.filter, seconds_in_interval) return ( content_sql, self.params, self._parse_single_aggregate_result(self.filter, self.entity, {"days": time_range}), ) else: breakdown_filter = breakdown_filter.format( **breakdown_filter_params) if self.entity.math in [WEEKLY_ACTIVE, MONTHLY_ACTIVE]: active_user_params = get_active_user_params( self.filter, self.entity, self.team_id) conditions = BREAKDOWN_ACTIVE_USER_CONDITIONS_SQL.format( **breakdown_filter_params, **active_user_params) inner_sql = BREAKDOWN_ACTIVE_USER_INNER_SQL.format( breakdown_filter=breakdown_filter, person_join=person_join_condition, groups_join=groups_join_condition, aggregate_operation=aggregate_operation, interval_annotation=interval_annotation, breakdown_value=breakdown_value, conditions=conditions, GET_TEAM_PERSON_DISTINCT_IDS=get_team_distinct_ids_query( self.team_id), **active_user_params, **breakdown_filter_params, ) elif self.filter.display == TRENDS_CUMULATIVE and self.entity.math == "dau": inner_sql = BREAKDOWN_CUMULATIVE_INNER_SQL.format( breakdown_filter=breakdown_filter, person_join=person_join_condition, groups_join=groups_join_condition, aggregate_operation=aggregate_operation, interval_annotation=interval_annotation, breakdown_value=breakdown_value, **breakdown_filter_params, ) else: inner_sql = BREAKDOWN_INNER_SQL.format( breakdown_filter=breakdown_filter, person_join=person_join_condition, groups_join=groups_join_condition, aggregate_operation=aggregate_operation, interval_annotation=interval_annotation, breakdown_value=breakdown_value, ) breakdown_query = BREAKDOWN_QUERY_SQL.format( interval=interval_annotation, num_intervals=num_intervals, inner_sql=inner_sql, ) self.params.update({ "seconds_in_interval": seconds_in_interval, "num_intervals": num_intervals, }) return breakdown_query, self.params, self._parse_trend_result( self.filter, self.entity)
def test_groups_join_query_blank(): filter = Filter(data={"properties": []}) assert GroupsJoinQuery(filter, 2).get_join_query() == ("", {})
def get_breakdown_prop_values( filter: Filter, entity: Entity, aggregate_operation: str, team_id: int, limit: int = BREAKDOWN_VALUES_LIMIT, extra_params={}, column_optimizer: Optional[EnterpriseColumnOptimizer] = None, ): """ Returns the top N breakdown prop values for event/person breakdown e.g. for Browser with limit 3 might return ['Chrome', 'Safari', 'Firefox', 'Other'] """ column_optimizer = column_optimizer or EnterpriseColumnOptimizer( filter, team_id) parsed_date_from, parsed_date_to, date_params = parse_timestamps( filter=filter, team_id=team_id) props_to_filter = filter.property_groups.combine_property_group( PropertyOperatorType.AND, entity.property_groups) outer_properties = column_optimizer.property_optimizer.parse_property_groups( props_to_filter).outer prop_filters, prop_filter_params = parse_prop_grouped_clauses( team_id=team_id, property_group=outer_properties, table_name="e", prepend="e_brkdwn", person_properties_mode=PersonPropertiesMode. USING_PERSON_PROPERTIES_COLUMN, allow_denormalized_props=True, ) entity_params, entity_format_params = get_entity_filtering_params( entity=entity, team_id=team_id, table_name="e") value_expression = _to_value_expression(filter.breakdown_type, filter.breakdown, filter.breakdown_group_type_index) person_join_clauses = "" person_join_params: Dict = {} person_query = PersonQuery(filter, team_id, column_optimizer=column_optimizer, entity=entity) if person_query.is_used: person_subquery, person_join_params = person_query.get_query() person_join_clauses = f""" INNER JOIN ({get_team_distinct_ids_query(team_id)}) AS pdi ON e.distinct_id = pdi.distinct_id INNER JOIN ({person_subquery}) person ON pdi.person_id = person.id """ groups_join_condition, groups_join_params = GroupsJoinQuery( filter, team_id, column_optimizer).get_join_query() elements_query = TOP_ELEMENTS_ARRAY_OF_KEY_SQL.format( value_expression=value_expression, parsed_date_from=parsed_date_from, parsed_date_to=parsed_date_to, prop_filters=prop_filters, aggregate_operation=aggregate_operation, person_join_clauses=person_join_clauses, groups_join_clauses=groups_join_condition, **entity_format_params, ) return sync_execute( elements_query, { "key": filter.breakdown, "limit": limit, "team_id": team_id, "offset": filter.offset, **prop_filter_params, **entity_params, **person_join_params, **groups_join_params, **extra_params, **date_params, }, )[0][0]
def _get_groups_query(self) -> Tuple[str, Dict]: return GroupsJoinQuery(self._filter, self._team_id, self._column_optimizer).get_join_query()
def _get_groups_query(self) -> Tuple[str, Dict]: return GroupsJoinQuery( self._filter, self._team_id, self._column_optimizer, using_person_on_events=self._using_person_on_events ).get_join_query()
def get_breakdown_prop_values( filter: Filter, entity: Entity, aggregate_operation: str, team_id: int, limit: int = 25, extra_params={}, column_optimizer: Optional[ColumnOptimizer] = None, ): "Returns the top N breakdown prop values for event/person breakdown" parsed_date_from, parsed_date_to, date_params = parse_timestamps( filter=filter, team_id=team_id) prop_filters, prop_filter_params = parse_prop_clauses( filter.properties + entity.properties, team_id, table_name="e", prepend="e_brkdwn", person_properties_mode=PersonPropertiesMode.EXCLUDE, allow_denormalized_props=True, ) entity_params, entity_format_params = get_entity_filtering_params( entity, team_id, table_name="e") if filter.breakdown_type == "person": value_expression, _ = get_property_string_expr( "person", cast(str, filter.breakdown), "%(key)s", "person_props") elif filter.breakdown_type == "group": value_expression, _ = get_property_string_expr( "groups", cast(str, filter.breakdown), "%(key)s", f"group_properties_{filter.breakdown_group_type_index}") else: value_expression, _ = get_property_string_expr( "events", cast(str, filter.breakdown), "%(key)s", "properties") person_join_clauses = "" person_join_params: Dict = {} person_query = ClickhousePersonQuery(filter, team_id, column_optimizer=column_optimizer, entity=entity) if person_query.is_used: person_subquery, person_join_params = person_query.get_query() person_join_clauses = f""" INNER JOIN ({GET_TEAM_PERSON_DISTINCT_IDS}) AS pdi ON e.distinct_id = pdi.distinct_id INNER JOIN ({person_subquery}) person ON pdi.person_id = person.id """ groups_join_condition, groups_join_params = GroupsJoinQuery( filter, team_id, column_optimizer).get_join_query() elements_query = TOP_ELEMENTS_ARRAY_OF_KEY_SQL.format( value_expression=value_expression, parsed_date_from=parsed_date_from, parsed_date_to=parsed_date_to, prop_filters=prop_filters, aggregate_operation=aggregate_operation, person_join_clauses=person_join_clauses, groups_join_clauses=groups_join_condition, **entity_format_params, ) return sync_execute( elements_query, { "key": filter.breakdown, "limit": limit, "team_id": team_id, "offset": filter.offset, **prop_filter_params, **entity_params, **person_join_params, **groups_join_params, **extra_params, **date_params, }, )[0][0]