def test_group_types_to_query(self): group_types_to_query = lambda filter: ColumnOptimizer( filter, self.team.id).group_types_to_query self.assertEqual(group_types_to_query(BASE_FILTER), set()) self.assertEqual(group_types_to_query(FILTER_WITH_PROPERTIES), {2}) self.assertEqual(group_types_to_query(FILTER_WITH_GROUPS), {2})
def __init__( self, filter: Union[Filter, PathFilter, RetentionFilter, StickinessFilter], team_id: int, column_optimizer: Optional[ColumnOptimizer] = None, *, entity: Optional[Entity] = None, extra_fields: List[ColumnName] = [], ) -> None: self._filter = filter self._team_id = team_id self._entity = entity self._column_optimizer = column_optimizer or ColumnOptimizer( self._filter, self._team_id) self._extra_fields = set(extra_fields) if self.PERSON_PROPERTIES_ALIAS in self._extra_fields: self._extra_fields = self._extra_fields - { self.PERSON_PROPERTIES_ALIAS } | {"properties"} properties = self._filter.property_groups.combine_property_group( PropertyOperatorType.AND, self._entity.property_groups if self._entity else None) self._inner_person_properties = self._column_optimizer.property_optimizer.parse_property_groups( properties).inner
def __init__( self, filter: Union[Filter, PathFilter, RetentionFilter, StickinessFilter, SessionRecordingsFilter], team_id: int, round_interval=False, should_join_distinct_ids=False, should_join_persons=False, # Extra events/person table columns to fetch since parent query needs them extra_fields: List[ColumnName] = [], extra_event_properties: List[PropertyName] = [], extra_person_fields: List[ColumnName] = [], **kwargs, ) -> None: self._filter = filter self._team_id = team_id self._extra_event_properties = extra_event_properties self._column_optimizer = ColumnOptimizer(self._filter, self._team_id) self._extra_person_fields = extra_person_fields self.params: Dict[str, Any] = { "team_id": self._team_id, } self._should_join_distinct_ids = should_join_distinct_ids self._should_join_persons = should_join_persons self._extra_fields = extra_fields self._extra_person_fields = extra_person_fields if not self._should_join_distinct_ids: self._determine_should_join_distinct_ids() if not self._should_join_persons: self._determine_should_join_persons() self._should_round_interval = round_interval
def __init__( self, filter: Union[Filter, PathFilter, RetentionFilter], team_id: int, column_optimizer: Optional[ColumnOptimizer] = None, ) -> None: self._filter = filter self._team_id = team_id self._column_optimizer = column_optimizer or ColumnOptimizer(self._filter, self._team_id)
def __init__(self, entity: Entity, filter: Filter, team_id: int, column_optimizer: Optional[ColumnOptimizer] = None): self.entity = entity self.filter = filter self.team_id = team_id self.params: Dict[str, Any] = {"team_id": team_id} self.column_optimizer = column_optimizer or ColumnOptimizer( self.filter, self.team_id)
def test_properties_used_in_filter_with_actions(self): action = Action.objects.create(team=self.team) ActionStep.objects.create( event="$autocapture", action=action, url="https://example.com/donate", url_matching=ActionStep.EXACT, ) ActionStep.objects.create( action=action, event="$autocapture", tag_name="button", text="Pay $10", properties=[{ "key": "$browser", "value": "Chrome", "type": "person" }], ) filter = Filter(data={"actions": [{"id": action.id, "math": "dau"}]}) self.assertEqual( ColumnOptimizer(filter, self.team.id).properties_used_in_filter, { ("$current_url", "event", None): 1, ("$browser", "person", None): 1 }, ) filter = BASE_FILTER.with_data( {"exclusions": [{ "id": action.id, "type": "actions" }]}) self.assertEqual( ColumnOptimizer(filter, self.team.id).properties_used_in_filter, { ("$current_url", "event", None): 1, ("$browser", "person", None): 1 }, )
def test_should_query_element_chain_column_with_actions(self): action = Action.objects.create(team=self.team) step1 = ActionStep.objects.create( event="$autocapture", action=action, url="https://example.com/donate", url_matching=ActionStep.EXACT, ) filter = Filter(data={"actions": [{"id": action.id, "math": "dau"}]}) self.assertEqual( ColumnOptimizer(filter, self.team.id).should_query_elements_chain_column, False, ) ActionStep.objects.create( action=action, event="$autocapture", tag_name="button", text="Pay $10", ) self.assertEqual( ColumnOptimizer(filter, self.team.id).should_query_elements_chain_column, True, ) filter = BASE_FILTER.with_data( {"exclusions": [{ "id": action.id, "type": "actions" }]}) self.assertEqual( ColumnOptimizer(filter, self.team.id).should_query_elements_chain_column, True, )
def test_materialized_columns_checks(self): optimizer = lambda: ColumnOptimizer(FILTER_WITH_PROPERTIES, self.team. id) self.assertEqual(optimizer().event_columns_to_query, {"properties"}) self.assertEqual(optimizer().person_columns_to_query, {"properties"}) materialize("events", "event_prop") materialize("person", "person_prop") self.assertEqual(optimizer().event_columns_to_query, {"mat_event_prop"}) self.assertEqual(optimizer().person_columns_to_query, {"pmat_person_prop"})
def _get_aggregation_join_query(self): if self._filter.aggregation_group_type_index is None: person_query, person_query_params = ClickhousePersonQuery( self._filter, self._team.pk, ColumnOptimizer(self._filter, self._team.pk) ).get_query() return ( f""" JOIN ({person_query}) person ON person.id = funnel_actors.actor_id """, person_query_params, ) else: return GroupsJoinQuery(self._filter, self._team.pk, join_key="funnel_actors.actor_id").get_join_query()
def test_should_query_element_chain_column(self): should_query_elements_chain_column = lambda filter: ColumnOptimizer( filter, self.team.id).should_query_elements_chain_column self.assertEqual(should_query_elements_chain_column(BASE_FILTER), False) self.assertEqual( should_query_elements_chain_column(FILTER_WITH_PROPERTIES), True) filter = Filter( data={ "events": [{ "id": "$pageview", "type": "events", "order": 0, "properties": PROPERTIES_OF_ALL_TYPES, }] }) self.assertEqual(should_query_elements_chain_column(filter), True)
def __init__( self, filter: Union[Filter, PathFilter, RetentionFilter], team_id: int, column_optimizer: Optional[ColumnOptimizer] = None, *, entity: Optional[Entity] = None, extra_fields: List[ColumnName] = [], ) -> None: self._filter = filter self._team_id = team_id self._entity = entity self._column_optimizer = column_optimizer or ColumnOptimizer( self._filter, self._team_id) self._extra_fields = set(extra_fields) if self.PERSON_PROPERTIES_ALIAS in self._extra_fields: self._extra_fields = self._extra_fields - { self.PERSON_PROPERTIES_ALIAS } | {"properties"}
def test_properties_used_in_filter(self): properties_used_in_filter = lambda filter: ColumnOptimizer( filter, self.team.id).properties_used_in_filter self.assertEqual(properties_used_in_filter(BASE_FILTER), {}) self.assertEqual( properties_used_in_filter(FILTER_WITH_PROPERTIES), { ("event_prop", "event", None): 1, ("person_prop", "person", None): 1, ("id", "cohort", None): 1, ("tag_name", "element", None): 1, ("group_prop", "group", 2): 1, }, ) # Breakdown cases filter = BASE_FILTER.with_data({ "breakdown": "some_prop", "breakdown_type": "person" }) self.assertEqual(properties_used_in_filter(filter), {("some_prop", "person", None): 1}) filter = BASE_FILTER.with_data({ "breakdown": "some_prop", "breakdown_type": "event" }) self.assertEqual(properties_used_in_filter(filter), {("some_prop", "event", None): 1}) filter = BASE_FILTER.with_data({ "breakdown": [11], "breakdown_type": "cohort" }) self.assertEqual(properties_used_in_filter(filter), {}) filter = BASE_FILTER.with_data({ "breakdown": "some_prop", "breakdown_type": "group", "breakdown_group_type_index": 1 }) self.assertEqual(properties_used_in_filter(filter), {("some_prop", "group", 1): 1}) # Funnel Correlation cases filter = BASE_FILTER.with_data({ "funnel_correlation_type": "events", "funnel_correlation_names": ["random_column"] }) self.assertEqual(properties_used_in_filter(filter), {}) filter = BASE_FILTER.with_data({ "funnel_correlation_type": "properties", "funnel_correlation_names": ["random_column", "$browser"] }) self.assertEqual(properties_used_in_filter(filter), { ("random_column", "person", None): 1, ("$browser", "person", None): 1 }) filter = BASE_FILTER.with_data( {"funnel_correlation_type": "properties"}) self.assertEqual(properties_used_in_filter(filter), {}) filter = Filter( data={ "events": [{ "id": "$pageview", "type": "events", "order": 0, "math": "sum", "math_property": "numeric_prop", "properties": PROPERTIES_OF_ALL_TYPES, }] }) self.assertEqual( properties_used_in_filter(filter), { ("numeric_prop", "event", None): 1, ("event_prop", "event", None): 1, ("person_prop", "person", None): 1, ("id", "cohort", None): 1, ("tag_name", "element", None): 1, ("group_prop", "group", 2): 1, }, ) filter = Filter( data={ "events": [{ "id": "$pageview", "type": "events", "order": 0, "math": "unique_group", "math_group_type_index": 1, }] }) self.assertEqual( properties_used_in_filter(filter), { ("$group_1", "event", None): 1, }, )
def get_properties_query(self) -> Tuple[str, Dict[str, Any]]: if not self._filter.correlation_property_names: raise ValidationError( "Property Correlation expects atleast one Property to run correlation on" ) funnel_persons_query, funnel_persons_params = self.get_funnel_persons_cte( ) person_prop_query, person_prop_params = self._get_properties_prop_clause( ) person_query, person_query_params = ClickhousePersonQuery( self._filter, self._team.pk, ColumnOptimizer(self._filter, self._team.pk)).get_query() query = f""" WITH funnel_people as ({funnel_persons_query}), %(target_step)s AS target_step SELECT concat(prop.1, '::', prop.2) as name, -- We generate a unique identifier for each property value as: PropertyName::Value countDistinctIf(person_id, steps = target_step) AS success_count, countDistinctIf(person_id, steps <> target_step) AS failure_count FROM ( SELECT person_id, funnel_people.steps as steps, /* We can extract multiple property values at the same time, since we're already querying the person table. This gives us something like: -------------------- person1, steps, [property_value_0, property_value_1, property_value_2] person2, steps, [property_value_0, property_value_1, property_value_2] To group by property name, we need to extract the property from the array. ArrayJoin helps us do that. It transforms the above into: -------------------- person1, steps, property_value_0 person1, steps, property_value_1 person1, steps, property_value_2 person2, steps, property_value_0 person2, steps, property_value_1 person2, steps, property_value_2 To avoid clashes and clarify the values, we also zip with the property name, to generate tuples like: (property_name, property_value), which we then group by */ {person_prop_query} FROM funnel_people JOIN ({person_query}) person ON person.id = funnel_people.person_id ) person_with_props -- Group by the tuple items: (property_name, property_value) generated by zip GROUP BY prop.1, prop.2 HAVING prop.1 NOT IN %(exclude_property_names)s UNION ALL SELECT '{self.TOTAL_IDENTIFIER}' as name, countDistinctIf(person_id, steps = target_step) AS success_count, countDistinctIf(person_id, steps <> target_step) AS failure_count FROM funnel_people """ params = { **funnel_persons_params, **person_prop_params, **person_query_params, "target_step": len(self._filter.entities), "property_names": self._filter.correlation_property_names, "exclude_property_names": self._filter.correlation_property_exclude_names, } return query, params
def get_breakdown_prop_values( filter: Filter, entity: Entity, aggregate_operation: str, team_id: int, limit: int = BREAKDOWN_VALUES_LIMIT, extra_params={}, column_optimizer: Optional[ColumnOptimizer] = None, ): """ Returns the top N breakdown prop values for event/person breakdown e.g. for Browser with limit 3 might return ['Chrome', 'Safari', 'Firefox', 'Other'] """ column_optimizer = column_optimizer or ColumnOptimizer(filter, team_id) parsed_date_from, parsed_date_to, date_params = parse_timestamps( filter=filter, team_id=team_id) props_to_filter = filter.property_groups.combine_property_group( PropertyOperatorType.AND, entity.property_groups) outer_properties = column_optimizer.property_optimizer.parse_property_groups( props_to_filter).outer prop_filters, prop_filter_params = parse_prop_grouped_clauses( team_id=team_id, property_group=outer_properties, table_name="e", prepend="e_brkdwn", person_properties_mode=PersonPropertiesMode. USING_PERSON_PROPERTIES_COLUMN, allow_denormalized_props=True, ) entity_params, entity_format_params = get_entity_filtering_params( entity=entity, team_id=team_id, table_name="e") value_expression = _to_value_expression(filter.breakdown_type, filter.breakdown, filter.breakdown_group_type_index) person_join_clauses = "" person_join_params: Dict = {} person_query = ClickhousePersonQuery(filter, team_id, column_optimizer=column_optimizer, entity=entity) if person_query.is_used: person_subquery, person_join_params = person_query.get_query() person_join_clauses = f""" INNER JOIN ({get_team_distinct_ids_query(team_id)}) AS pdi ON e.distinct_id = pdi.distinct_id INNER JOIN ({person_subquery}) person ON pdi.person_id = person.id """ groups_join_condition, groups_join_params = GroupsJoinQuery( filter, team_id, column_optimizer).get_join_query() elements_query = TOP_ELEMENTS_ARRAY_OF_KEY_SQL.format( value_expression=value_expression, parsed_date_from=parsed_date_from, parsed_date_to=parsed_date_to, prop_filters=prop_filters, aggregate_operation=aggregate_operation, person_join_clauses=person_join_clauses, groups_join_clauses=groups_join_condition, **entity_format_params, ) return sync_execute( elements_query, { "key": filter.breakdown, "limit": limit, "team_id": team_id, "offset": filter.offset, **prop_filter_params, **entity_params, **person_join_params, **groups_join_params, **extra_params, **date_params, }, )[0][0]