Example #1
0
    def test_event_properties_filter(self):
        filter = Filter(
            data={
                "date_from":
                "2021-05-01 00:00:00",
                "date_to":
                "2021-05-07 00:00:00",
                "events": [
                    {
                        "id": "viewed",
                        "order": 0
                    },
                ],
                "properties": [{
                    "key": "some_key",
                    "value": "test_val",
                    "operator": "exact",
                    "type": "event"
                }],
            })

        entity = Entity({"id": "viewed", "type": "events"})

        global_prop_query, global_prop_query_params = TrendsEventQuery(
            filter=filter, entity=entity, team_id=self.team.pk).get_query()
        sync_execute(global_prop_query, global_prop_query_params)

        filter = Filter(
            data={
                "date_from": "2021-05-01 00:00:00",
                "date_to": "2021-05-07 00:00:00",
                "events": [
                    {
                        "id": "viewed",
                        "order": 0
                    },
                ],
            })

        entity = Entity({
            "id":
            "viewed",
            "type":
            "events",
            "properties": [{
                "key": "some_key",
                "value": "test_val",
                "operator": "exact",
                "type": "event"
            }],
        })

        entity_prop_query, entity_prop_query_params = TrendsEventQuery(
            filter=filter, entity=entity, team_id=self.team.pk).get_query()

        # global queries and enttiy queries should be the same
        self.assertEqual(sqlparse.format(global_prop_query, reindent=True),
                         sqlparse.format(entity_prop_query, reindent=True))

        sync_execute(entity_prop_query, entity_prop_query_params)
Example #2
0
    def _total_volume_query(self, entity: Entity, filter: Filter, team_id: int) -> Tuple[str, Dict, Callable]:

        interval_annotation = get_trunc_func_ch(filter.interval)
        num_intervals, seconds_in_interval, _ = get_time_diff(
            filter.interval or "day", filter.date_from, filter.date_to, team_id=team_id
        )
        aggregate_operation, join_condition, math_params = process_math(entity)

        trend_event_query = TrendsEventQuery(
            filter=filter,
            entity=entity,
            team_id=team_id,
            should_join_distinct_ids=True
            if join_condition != "" or entity.math in [WEEKLY_ACTIVE, MONTHLY_ACTIVE]
            else False,
        )
        event_query, event_query_params = trend_event_query.get_query()

        content_sql_params = {
            "aggregate_operation": aggregate_operation,
            "timestamp": "e.timestamp",
            "interval": interval_annotation,
        }
        params: Dict = {"team_id": team_id}
        params = {**params, **math_params, **event_query_params}

        if filter.display in TRENDS_DISPLAY_BY_VALUE:
            content_sql = VOLUME_TOTAL_AGGREGATE_SQL.format(event_query=event_query, **content_sql_params)
            time_range = enumerate_time_range(filter, seconds_in_interval)

            return (
                content_sql,
                params,
                lambda result: [
                    {"aggregated_value": result[0][0] if result and len(result) else 0, "days": time_range}
                ],
            )
        else:

            if entity.math in [WEEKLY_ACTIVE, MONTHLY_ACTIVE]:
                content_sql = ACTIVE_USER_SQL.format(
                    event_query=event_query,
                    **content_sql_params,
                    parsed_date_to=trend_event_query.parsed_date_to,
                    parsed_date_from=trend_event_query.parsed_date_from,
                    **trend_event_query.active_user_params
                )
            else:
                content_sql = VOLUME_SQL.format(event_query=event_query, **content_sql_params)

            null_sql = NULL_SQL.format(
                interval=interval_annotation,
                seconds_in_interval=seconds_in_interval,
                num_intervals=num_intervals,
                date_to=filter.date_to.strftime("%Y-%m-%d %H:%M:%S"),
            )
            final_query = AGGREGATE_SQL.format(null_sql=null_sql, content_sql=content_sql)
            return final_query, params, self._parse_total_volume_result(filter)
Example #3
0
    def _total_volume_query(self, entity: Entity, filter: Filter,
                            team_id: int) -> Tuple[str, Dict, Callable]:
        trunc_func = get_trunc_func_ch(filter.interval)
        interval_func = get_interval_func_ch(filter.interval)
        aggregate_operation, join_condition, math_params = process_math(entity)

        trend_event_query = TrendsEventQuery(
            filter=filter,
            entity=entity,
            team_id=team_id,
            should_join_distinct_ids=True if join_condition != ""
            or entity.math in [WEEKLY_ACTIVE, MONTHLY_ACTIVE] else False,
        )
        event_query, event_query_params = trend_event_query.get_query()

        content_sql_params = {
            "aggregate_operation": aggregate_operation,
            "timestamp": "e.timestamp",
            "interval": trunc_func,
        }
        params: Dict = {"team_id": team_id}
        params = {**params, **math_params, **event_query_params}

        if filter.display in TRENDS_DISPLAY_BY_VALUE:
            content_sql = VOLUME_TOTAL_AGGREGATE_SQL.format(
                event_query=event_query, **content_sql_params)

            return (content_sql, params,
                    self._parse_aggregate_volume_result(
                        filter, entity, team_id))
        else:

            if entity.math in [WEEKLY_ACTIVE, MONTHLY_ACTIVE]:
                content_sql = ACTIVE_USER_SQL.format(
                    event_query=event_query,
                    **content_sql_params,
                    parsed_date_to=trend_event_query.parsed_date_to,
                    parsed_date_from=trend_event_query.parsed_date_from,
                    **trend_event_query.active_user_params,
                )
            elif filter.display == TRENDS_CUMULATIVE and entity.math == "dau":
                cumulative_sql = CUMULATIVE_SQL.format(event_query=event_query)
                content_sql = VOLUME_SQL.format(event_query=cumulative_sql,
                                                **content_sql_params)
            else:
                content_sql = VOLUME_SQL.format(event_query=event_query,
                                                **content_sql_params)

            null_sql = NULL_SQL.format(trunc_func=trunc_func,
                                       interval_func=interval_func)
            params["interval"] = filter.interval
            final_query = AGGREGATE_SQL.format(null_sql=null_sql,
                                               content_sql=content_sql)
            return final_query, params, self._parse_total_volume_result(
                filter, entity, team_id)
Example #4
0
    def test_basic_event_filter(self):
        filter = Filter(
            data={
                "date_from": "2021-05-01 00:00:00",
                "date_to": "2021-05-07 00:00:00",
                "events": [
                    {
                        "id": "viewed",
                        "order": 0
                    },
                ],
            })

        entity = Entity({"id": "viewed", "type": "events"})

        query, params = TrendsEventQuery(filter=filter,
                                         entity=entity,
                                         team_id=self.team.pk).get_query()

        correct = """
        SELECT e.timestamp as timestamp,
        e.properties as properties
        FROM events e
        WHERE team_id = %(team_id)s
            AND event = %(event)s
            AND toStartOfDay(timestamp) >= toStartOfDay(toDateTime(%(date_from)s))
            AND timestamp <= '2021-05-07 23:59:59'
        """

        self.assertEqual(sqlparse.format(query, reindent=True),
                         sqlparse.format(correct, reindent=True))

        sync_execute(query, params)
Example #5
0
    def test_static_cohort_filter(self):
        cohort = _create_cohort(team=self.team,
                                name="cohort1",
                                groups=[],
                                is_static=True)

        filter = Filter(
            data={
                "date_from":
                "2021-05-01 00:00:00",
                "date_to":
                "2021-05-07 00:00:00",
                "events": [
                    {
                        "id": "viewed",
                        "order": 0
                    },
                ],
                "properties": [{
                    "key": "id",
                    "value": cohort.pk,
                    "type": "cohort"
                }],
            })

        entity = Entity({
            "id": "viewed",
            "type": "events",
        })

        query, params = TrendsEventQuery(filter=filter,
                                         entity=entity,
                                         team_id=self.team.pk).get_query()
        sync_execute(query, params)
Example #6
0
    def _run_query(self, filter: Filter, entity=None):
        entity = entity or filter.entities[0]

        query, params = TrendsEventQuery(filter=filter, entity=entity, team=self.team).get_query()

        result = sync_execute(query, params)

        return result, query
Example #7
0
    def test_denormalised_props(self):
        filters = {
            "events": [
                {
                    "id": "user signed up",
                    "type": "events",
                    "order": 0,
                    "properties": [{
                        "key": "test_prop",
                        "value": "hi"
                    }],
                },
            ],
            "date_from":
            "2020-01-01",
            "properties": [{
                "key": "test_prop",
                "value": "hi"
            }],
            "date_to":
            "2020-01-14",
        }

        with self.settings(CLICKHOUSE_DENORMALIZED_PROPERTIES=["test_prop"]):

            p1 = Person.objects.create(team_id=self.team.pk,
                                       distinct_ids=["p1"],
                                       properties={"key": "value"})
            _create_event(
                team=self.team,
                event="$pageview",
                distinct_id="p1",
                timestamp="2020-01-02T12:00:00Z",
                properties={"test_prop": "hi"},
            )

            p2 = Person.objects.create(team_id=self.team.pk,
                                       distinct_ids=["p2"],
                                       properties={"key_2": "value_2"})
            _create_event(
                team=self.team,
                event="$pageview",
                distinct_id="p2",
                timestamp="2020-01-02T12:00:00Z",
                properties={"test_prop": "hi"},
            )

            filter = Filter(data=filters)
            query, params = TrendsEventQuery(filter=filter,
                                             entity=filter.entities[0],
                                             team_id=self.team.pk).get_query()
            sync_execute(query, params)
            self.assertIn("properties_test_prop", query)
Example #8
0
    def test_entity_filtered_by_cohort(self):
        cohort = _create_cohort(team=self.team,
                                name="cohort1",
                                groups=[{
                                    "properties": {
                                        "name": "test"
                                    }
                                }])

        filter = Filter(
            data={
                "date_from":
                "2021-05-01 00:00:00",
                "date_to":
                "2021-05-07 00:00:00",
                "events": [
                    {
                        "id":
                        "$pageview",
                        "order":
                        0,
                        "properties": [{
                            "key": "id",
                            "type": "cohort",
                            "value": cohort.pk
                        }],
                    },
                ],
            })

        p1 = Person.objects.create(team_id=self.team.pk,
                                   distinct_ids=["p1"],
                                   properties={"name": "test"})
        _create_event(team=self.team,
                      event="$pageview",
                      distinct_id="p1",
                      timestamp="2020-01-02T12:00:00Z")

        p2 = Person.objects.create(team_id=self.team.pk,
                                   distinct_ids=["p2"],
                                   properties={"name": "foo"})
        _create_event(team=self.team,
                      event="$pageview",
                      distinct_id="p2",
                      timestamp="2020-01-02T12:01:00Z")

        query, params = TrendsEventQuery(filter=filter,
                                         entity=filter.entities[0],
                                         team_id=self.team.pk).get_query()
        sync_execute(query, params)
Example #9
0
    def actor_query(self,
                    limit_actors: Optional[bool] = True) -> Tuple[str, Dict]:
        if self._filter.breakdown_type == "cohort" and self._filter.breakdown_value != "all":
            cohort = Cohort.objects.get(pk=self._filter.breakdown_value,
                                        team_id=self._team.pk)
            self._filter = self._filter.with_data({
                "properties":
                self._filter.property_groups.combine_properties(
                    PropertyOperatorType.AND,
                    [Property(key="id", value=cohort.pk, type="cohort")
                     ]).to_dict()
            })
        elif (self._filter.breakdown_type
              and isinstance(self._filter.breakdown, str)
              and isinstance(self._filter.breakdown_value, str)):
            if self._filter.breakdown_type == "group":
                breakdown_prop = Property(
                    key=self._filter.breakdown,
                    value=self._filter.breakdown_value,
                    type=self._filter.breakdown_type,
                    group_type_index=self._filter.breakdown_group_type_index,
                )
            else:
                breakdown_prop = Property(key=self._filter.breakdown,
                                          value=self._filter.breakdown_value,
                                          type=self._filter.breakdown_type)

            self._filter = self._filter.with_data({
                "properties":
                self._filter.property_groups.combine_properties(
                    PropertyOperatorType.AND, [breakdown_prop]).to_dict()
            })

        extra_fields: List[str] = [
            "distinct_id", "team_id"
        ] if not self.is_aggregating_by_groups else []
        if self._filter.include_recordings:
            extra_fields += ["uuid"]

        events_query, params = TrendsEventQuery(
            filter=self._filter,
            team=self._team,
            entity=self.entity,
            should_join_distinct_ids=not self.is_aggregating_by_groups,
            should_join_persons=not self.is_aggregating_by_groups,
            extra_event_properties=["$window_id", "$session_id"]
            if self._filter.include_recordings else [],
            extra_fields=extra_fields,
        ).get_query()

        matching_events_select_statement = (
            ", groupUniqArray(10)((timestamp, uuid, $session_id, $window_id)) as matching_events"
            if self._filter.include_recordings else "")

        return (
            GET_ACTORS_FROM_EVENT_QUERY.format(
                id_field=self._aggregation_actor_field,
                matching_events_select_statement=
                matching_events_select_statement,
                events_query=events_query,
                limit="LIMIT %(limit)s" if limit_actors else "",
                offset="OFFSET %(offset)s" if limit_actors else "",
            ),
            {
                **params, "offset": self._filter.offset,
                "limit": 200
            },
        )
Example #10
0
    def _total_volume_query(self, entity: Entity, filter: Filter,
                            team: Team) -> Tuple[str, Dict, Callable]:
        trunc_func = get_trunc_func_ch(filter.interval)
        interval_func = get_interval_func_ch(filter.interval)
        aggregate_operation, join_condition, math_params = process_math(
            entity, team)

        trend_event_query = TrendsEventQuery(
            filter=filter,
            entity=entity,
            team=team,
            should_join_distinct_ids=True if join_condition != "" or
            (entity.math in [WEEKLY_ACTIVE, MONTHLY_ACTIVE]
             and not team.aggregate_users_by_distinct_id) else False,
        )
        event_query, event_query_params = trend_event_query.get_query()

        content_sql_params = {
            "aggregate_operation": aggregate_operation,
            "timestamp": "e.timestamp",
            "interval": trunc_func,
        }
        params: Dict = {"team_id": team.id}
        params = {**params, **math_params, **event_query_params}

        if filter.display in TRENDS_DISPLAY_BY_VALUE:
            content_sql = VOLUME_TOTAL_AGGREGATE_SQL.format(
                event_query=event_query, **content_sql_params)

            return (content_sql, params,
                    self._parse_aggregate_volume_result(
                        filter, entity, team.id))
        else:

            if entity.math in [WEEKLY_ACTIVE, MONTHLY_ACTIVE]:
                content_sql = ACTIVE_USER_SQL.format(
                    event_query=event_query,
                    **content_sql_params,
                    parsed_date_to=trend_event_query.parsed_date_to,
                    parsed_date_from=trend_event_query.parsed_date_from,
                    aggregator="distinct_id"
                    if team.aggregate_users_by_distinct_id else "person_id",
                    **trend_event_query.active_user_params,
                )
            elif filter.display == TRENDS_CUMULATIVE and entity.math == "dau":
                cumulative_sql = CUMULATIVE_SQL.format(event_query=event_query)
                content_sql = VOLUME_SQL.format(event_query=cumulative_sql,
                                                **content_sql_params)
            else:
                content_sql = VOLUME_SQL.format(event_query=event_query,
                                                **content_sql_params)

            null_sql = NULL_SQL.format(trunc_func=trunc_func,
                                       interval_func=interval_func)
            params["interval"] = filter.interval

            # If we have a smoothing interval > 1 then add in the sql to
            # handling rolling average. Else just do a sum. This is possibly an
            # nessacary optimization.
            if filter.smoothing_intervals > 1:
                smoothing_operation = f"""
                    AVG(SUM(total))
                    OVER (
                        ORDER BY day_start
                        ROWS BETWEEN {filter.smoothing_intervals - 1} PRECEDING
                        AND CURRENT ROW
                    )"""
            else:
                smoothing_operation = "SUM(total)"

            final_query = AGGREGATE_SQL.format(
                null_sql=null_sql,
                content_sql=content_sql,
                smoothing_operation=smoothing_operation,
                aggregate="count"
                if filter.smoothing_intervals < 2 else "floor(count)",
            )
            return final_query, params, self._parse_total_volume_result(
                filter, entity, team.id)