Beispiel #1
0
    def test_group_types_to_query(self):
        group_types_to_query = lambda filter: ColumnOptimizer(
            filter, self.team.id).group_types_to_query

        self.assertEqual(group_types_to_query(BASE_FILTER), set())
        self.assertEqual(group_types_to_query(FILTER_WITH_PROPERTIES), {2})
        self.assertEqual(group_types_to_query(FILTER_WITH_GROUPS), {2})
Beispiel #2
0
    def __init__(
        self,
        filter: Union[Filter, PathFilter, RetentionFilter, StickinessFilter],
        team_id: int,
        column_optimizer: Optional[ColumnOptimizer] = None,
        *,
        entity: Optional[Entity] = None,
        extra_fields: List[ColumnName] = [],
    ) -> None:
        self._filter = filter
        self._team_id = team_id
        self._entity = entity
        self._column_optimizer = column_optimizer or ColumnOptimizer(
            self._filter, self._team_id)
        self._extra_fields = set(extra_fields)

        if self.PERSON_PROPERTIES_ALIAS in self._extra_fields:
            self._extra_fields = self._extra_fields - {
                self.PERSON_PROPERTIES_ALIAS
            } | {"properties"}

        properties = self._filter.property_groups.combine_property_group(
            PropertyOperatorType.AND,
            self._entity.property_groups if self._entity else None)

        self._inner_person_properties = self._column_optimizer.property_optimizer.parse_property_groups(
            properties).inner
Beispiel #3
0
    def __init__(
        self,
        filter: Union[Filter, PathFilter, RetentionFilter, StickinessFilter,
                      SessionRecordingsFilter],
        team_id: int,
        round_interval=False,
        should_join_distinct_ids=False,
        should_join_persons=False,
        # Extra events/person table columns to fetch since parent query needs them
        extra_fields: List[ColumnName] = [],
        extra_event_properties: List[PropertyName] = [],
        extra_person_fields: List[ColumnName] = [],
        **kwargs,
    ) -> None:
        self._filter = filter
        self._team_id = team_id
        self._extra_event_properties = extra_event_properties
        self._column_optimizer = ColumnOptimizer(self._filter, self._team_id)
        self._extra_person_fields = extra_person_fields
        self.params: Dict[str, Any] = {
            "team_id": self._team_id,
        }

        self._should_join_distinct_ids = should_join_distinct_ids
        self._should_join_persons = should_join_persons
        self._extra_fields = extra_fields
        self._extra_person_fields = extra_person_fields

        if not self._should_join_distinct_ids:
            self._determine_should_join_distinct_ids()

        if not self._should_join_persons:
            self._determine_should_join_persons()

        self._should_round_interval = round_interval
Beispiel #4
0
 def __init__(
     self,
     filter: Union[Filter, PathFilter, RetentionFilter],
     team_id: int,
     column_optimizer: Optional[ColumnOptimizer] = None,
 ) -> None:
     self._filter = filter
     self._team_id = team_id
     self._column_optimizer = column_optimizer or ColumnOptimizer(self._filter, self._team_id)
Beispiel #5
0
 def __init__(self,
              entity: Entity,
              filter: Filter,
              team_id: int,
              column_optimizer: Optional[ColumnOptimizer] = None):
     self.entity = entity
     self.filter = filter
     self.team_id = team_id
     self.params: Dict[str, Any] = {"team_id": team_id}
     self.column_optimizer = column_optimizer or ColumnOptimizer(
         self.filter, self.team_id)
    def test_properties_used_in_filter_with_actions(self):
        action = Action.objects.create(team=self.team)
        ActionStep.objects.create(
            event="$autocapture",
            action=action,
            url="https://example.com/donate",
            url_matching=ActionStep.EXACT,
        )
        ActionStep.objects.create(
            action=action,
            event="$autocapture",
            tag_name="button",
            text="Pay $10",
            properties=[{
                "key": "$browser",
                "value": "Chrome",
                "type": "person"
            }],
        )

        filter = Filter(data={"actions": [{"id": action.id, "math": "dau"}]})
        self.assertEqual(
            ColumnOptimizer(filter, self.team.id).properties_used_in_filter,
            {
                ("$current_url", "event", None): 1,
                ("$browser", "person", None): 1
            },
        )

        filter = BASE_FILTER.with_data(
            {"exclusions": [{
                "id": action.id,
                "type": "actions"
            }]})
        self.assertEqual(
            ColumnOptimizer(filter, self.team.id).properties_used_in_filter,
            {
                ("$current_url", "event", None): 1,
                ("$browser", "person", None): 1
            },
        )
    def test_should_query_element_chain_column_with_actions(self):
        action = Action.objects.create(team=self.team)
        step1 = ActionStep.objects.create(
            event="$autocapture",
            action=action,
            url="https://example.com/donate",
            url_matching=ActionStep.EXACT,
        )

        filter = Filter(data={"actions": [{"id": action.id, "math": "dau"}]})
        self.assertEqual(
            ColumnOptimizer(filter,
                            self.team.id).should_query_elements_chain_column,
            False,
        )

        ActionStep.objects.create(
            action=action,
            event="$autocapture",
            tag_name="button",
            text="Pay $10",
        )

        self.assertEqual(
            ColumnOptimizer(filter,
                            self.team.id).should_query_elements_chain_column,
            True,
        )

        filter = BASE_FILTER.with_data(
            {"exclusions": [{
                "id": action.id,
                "type": "actions"
            }]})
        self.assertEqual(
            ColumnOptimizer(filter,
                            self.team.id).should_query_elements_chain_column,
            True,
        )
    def test_materialized_columns_checks(self):
        optimizer = lambda: ColumnOptimizer(FILTER_WITH_PROPERTIES, self.team.
                                            id)

        self.assertEqual(optimizer().event_columns_to_query, {"properties"})
        self.assertEqual(optimizer().person_columns_to_query, {"properties"})

        materialize("events", "event_prop")
        materialize("person", "person_prop")

        self.assertEqual(optimizer().event_columns_to_query,
                         {"mat_event_prop"})
        self.assertEqual(optimizer().person_columns_to_query,
                         {"pmat_person_prop"})
Beispiel #9
0
    def _get_aggregation_join_query(self):
        if self._filter.aggregation_group_type_index is None:
            person_query, person_query_params = ClickhousePersonQuery(
                self._filter, self._team.pk, ColumnOptimizer(self._filter, self._team.pk)
            ).get_query()

            return (
                f"""
                JOIN ({person_query}) person
                    ON person.id = funnel_actors.actor_id
            """,
                person_query_params,
            )
        else:
            return GroupsJoinQuery(self._filter, self._team.pk, join_key="funnel_actors.actor_id").get_join_query()
    def test_should_query_element_chain_column(self):
        should_query_elements_chain_column = lambda filter: ColumnOptimizer(
            filter, self.team.id).should_query_elements_chain_column

        self.assertEqual(should_query_elements_chain_column(BASE_FILTER),
                         False)
        self.assertEqual(
            should_query_elements_chain_column(FILTER_WITH_PROPERTIES), True)

        filter = Filter(
            data={
                "events": [{
                    "id": "$pageview",
                    "type": "events",
                    "order": 0,
                    "properties": PROPERTIES_OF_ALL_TYPES,
                }]
            })
        self.assertEqual(should_query_elements_chain_column(filter), True)
Beispiel #11
0
    def __init__(
        self,
        filter: Union[Filter, PathFilter, RetentionFilter],
        team_id: int,
        column_optimizer: Optional[ColumnOptimizer] = None,
        *,
        entity: Optional[Entity] = None,
        extra_fields: List[ColumnName] = [],
    ) -> None:
        self._filter = filter
        self._team_id = team_id
        self._entity = entity
        self._column_optimizer = column_optimizer or ColumnOptimizer(
            self._filter, self._team_id)
        self._extra_fields = set(extra_fields)

        if self.PERSON_PROPERTIES_ALIAS in self._extra_fields:
            self._extra_fields = self._extra_fields - {
                self.PERSON_PROPERTIES_ALIAS
            } | {"properties"}
    def test_properties_used_in_filter(self):
        properties_used_in_filter = lambda filter: ColumnOptimizer(
            filter, self.team.id).properties_used_in_filter

        self.assertEqual(properties_used_in_filter(BASE_FILTER), {})
        self.assertEqual(
            properties_used_in_filter(FILTER_WITH_PROPERTIES),
            {
                ("event_prop", "event", None): 1,
                ("person_prop", "person", None): 1,
                ("id", "cohort", None): 1,
                ("tag_name", "element", None): 1,
                ("group_prop", "group", 2): 1,
            },
        )

        # Breakdown cases
        filter = BASE_FILTER.with_data({
            "breakdown": "some_prop",
            "breakdown_type": "person"
        })
        self.assertEqual(properties_used_in_filter(filter),
                         {("some_prop", "person", None): 1})

        filter = BASE_FILTER.with_data({
            "breakdown": "some_prop",
            "breakdown_type": "event"
        })
        self.assertEqual(properties_used_in_filter(filter),
                         {("some_prop", "event", None): 1})

        filter = BASE_FILTER.with_data({
            "breakdown": [11],
            "breakdown_type": "cohort"
        })
        self.assertEqual(properties_used_in_filter(filter), {})

        filter = BASE_FILTER.with_data({
            "breakdown": "some_prop",
            "breakdown_type": "group",
            "breakdown_group_type_index": 1
        })
        self.assertEqual(properties_used_in_filter(filter),
                         {("some_prop", "group", 1): 1})

        # Funnel Correlation cases
        filter = BASE_FILTER.with_data({
            "funnel_correlation_type":
            "events",
            "funnel_correlation_names": ["random_column"]
        })
        self.assertEqual(properties_used_in_filter(filter), {})

        filter = BASE_FILTER.with_data({
            "funnel_correlation_type":
            "properties",
            "funnel_correlation_names": ["random_column", "$browser"]
        })
        self.assertEqual(properties_used_in_filter(filter), {
            ("random_column", "person", None): 1,
            ("$browser", "person", None): 1
        })

        filter = BASE_FILTER.with_data(
            {"funnel_correlation_type": "properties"})
        self.assertEqual(properties_used_in_filter(filter), {})

        filter = Filter(
            data={
                "events": [{
                    "id": "$pageview",
                    "type": "events",
                    "order": 0,
                    "math": "sum",
                    "math_property": "numeric_prop",
                    "properties": PROPERTIES_OF_ALL_TYPES,
                }]
            })
        self.assertEqual(
            properties_used_in_filter(filter),
            {
                ("numeric_prop", "event", None): 1,
                ("event_prop", "event", None): 1,
                ("person_prop", "person", None): 1,
                ("id", "cohort", None): 1,
                ("tag_name", "element", None): 1,
                ("group_prop", "group", 2): 1,
            },
        )

        filter = Filter(
            data={
                "events": [{
                    "id": "$pageview",
                    "type": "events",
                    "order": 0,
                    "math": "unique_group",
                    "math_group_type_index": 1,
                }]
            })
        self.assertEqual(
            properties_used_in_filter(filter),
            {
                ("$group_1", "event", None): 1,
            },
        )
Beispiel #13
0
    def get_properties_query(self) -> Tuple[str, Dict[str, Any]]:

        if not self._filter.correlation_property_names:
            raise ValidationError(
                "Property Correlation expects atleast one Property to run correlation on"
            )

        funnel_persons_query, funnel_persons_params = self.get_funnel_persons_cte(
        )

        person_prop_query, person_prop_params = self._get_properties_prop_clause(
        )

        person_query, person_query_params = ClickhousePersonQuery(
            self._filter, self._team.pk,
            ColumnOptimizer(self._filter, self._team.pk)).get_query()

        query = f"""
            WITH
                funnel_people as ({funnel_persons_query}),
                %(target_step)s AS target_step
            SELECT
                concat(prop.1, '::', prop.2) as name,
                -- We generate a unique identifier for each property value as: PropertyName::Value
                countDistinctIf(person_id, steps = target_step) AS success_count,
                countDistinctIf(person_id, steps <> target_step) AS failure_count
            FROM (
                SELECT
                    person_id,
                    funnel_people.steps as steps,
                    /*
                        We can extract multiple property values at the same time, since we're
                        already querying the person table.
                        This gives us something like:
                        --------------------
                        person1, steps, [property_value_0, property_value_1, property_value_2]
                        person2, steps, [property_value_0, property_value_1, property_value_2]

                        To group by property name, we need to extract the property from the array. ArrayJoin helps us do that.
                        It transforms the above into:

                        --------------------

                        person1, steps, property_value_0
                        person1, steps, property_value_1
                        person1, steps, property_value_2

                        person2, steps, property_value_0
                        person2, steps, property_value_1
                        person2, steps, property_value_2

                        To avoid clashes and clarify the values, we also zip with the property name, to generate
                        tuples like: (property_name, property_value), which we then group by
                    */
                    {person_prop_query}
                FROM funnel_people
                JOIN ({person_query}) person
                ON person.id = funnel_people.person_id
            ) person_with_props
            -- Group by the tuple items: (property_name, property_value) generated by zip
            GROUP BY prop.1, prop.2
            HAVING prop.1 NOT IN %(exclude_property_names)s
            UNION ALL
            SELECT
                '{self.TOTAL_IDENTIFIER}' as name,
                countDistinctIf(person_id, steps = target_step) AS success_count,
                countDistinctIf(person_id, steps <> target_step) AS failure_count
            FROM funnel_people
        """
        params = {
            **funnel_persons_params,
            **person_prop_params,
            **person_query_params,
            "target_step":
            len(self._filter.entities),
            "property_names":
            self._filter.correlation_property_names,
            "exclude_property_names":
            self._filter.correlation_property_exclude_names,
        }

        return query, params
Beispiel #14
0
def get_breakdown_prop_values(
    filter: Filter,
    entity: Entity,
    aggregate_operation: str,
    team_id: int,
    limit: int = BREAKDOWN_VALUES_LIMIT,
    extra_params={},
    column_optimizer: Optional[ColumnOptimizer] = None,
):
    """
    Returns the top N breakdown prop values for event/person breakdown

    e.g. for Browser with limit 3 might return ['Chrome', 'Safari', 'Firefox', 'Other']
    """
    column_optimizer = column_optimizer or ColumnOptimizer(filter, team_id)
    parsed_date_from, parsed_date_to, date_params = parse_timestamps(
        filter=filter, team_id=team_id)

    props_to_filter = filter.property_groups.combine_property_group(
        PropertyOperatorType.AND, entity.property_groups)
    outer_properties = column_optimizer.property_optimizer.parse_property_groups(
        props_to_filter).outer

    prop_filters, prop_filter_params = parse_prop_grouped_clauses(
        team_id=team_id,
        property_group=outer_properties,
        table_name="e",
        prepend="e_brkdwn",
        person_properties_mode=PersonPropertiesMode.
        USING_PERSON_PROPERTIES_COLUMN,
        allow_denormalized_props=True,
    )

    entity_params, entity_format_params = get_entity_filtering_params(
        entity=entity, team_id=team_id, table_name="e")

    value_expression = _to_value_expression(filter.breakdown_type,
                                            filter.breakdown,
                                            filter.breakdown_group_type_index)

    person_join_clauses = ""
    person_join_params: Dict = {}
    person_query = ClickhousePersonQuery(filter,
                                         team_id,
                                         column_optimizer=column_optimizer,
                                         entity=entity)
    if person_query.is_used:
        person_subquery, person_join_params = person_query.get_query()
        person_join_clauses = f"""
            INNER JOIN ({get_team_distinct_ids_query(team_id)}) AS pdi ON e.distinct_id = pdi.distinct_id
            INNER JOIN ({person_subquery}) person ON pdi.person_id = person.id
        """

    groups_join_condition, groups_join_params = GroupsJoinQuery(
        filter, team_id, column_optimizer).get_join_query()

    elements_query = TOP_ELEMENTS_ARRAY_OF_KEY_SQL.format(
        value_expression=value_expression,
        parsed_date_from=parsed_date_from,
        parsed_date_to=parsed_date_to,
        prop_filters=prop_filters,
        aggregate_operation=aggregate_operation,
        person_join_clauses=person_join_clauses,
        groups_join_clauses=groups_join_condition,
        **entity_format_params,
    )

    return sync_execute(
        elements_query,
        {
            "key": filter.breakdown,
            "limit": limit,
            "team_id": team_id,
            "offset": filter.offset,
            **prop_filter_params,
            **entity_params,
            **person_join_params,
            **groups_join_params,
            **extra_params,
            **date_params,
        },
    )[0][0]