Example #1
0
    def _get_date_filter(self) -> Tuple[str, Dict]:
        date_filter = ""
        date_params: Dict[str, Any] = {}
        interval_annotation = get_trunc_func_ch(self._filter.interval)
        _, _, round_interval = get_time_diff(self._filter.interval or "day",
                                             self._filter.date_from,
                                             self._filter.date_to,
                                             team_id=self._team_id)
        _, parsed_date_to, date_params = parse_timestamps(
            filter=self._filter, team_id=self._team_id)
        parsed_date_from = date_from_clause(interval_annotation,
                                            round_interval)

        self.parsed_date_from = parsed_date_from
        self.parsed_date_to = parsed_date_to

        if self._entity.math in [WEEKLY_ACTIVE, MONTHLY_ACTIVE]:
            date_filter = "{parsed_date_from_prev_range} {parsed_date_to}"
            format_params = get_active_user_params(self._filter, self._entity,
                                                   self._team_id)
            self.active_user_params = format_params

            date_filter = date_filter.format(**format_params,
                                             parsed_date_to=parsed_date_to)
        else:
            date_filter = "{parsed_date_from} {parsed_date_to}".format(
                parsed_date_from=parsed_date_from,
                parsed_date_to=parsed_date_to)

        return date_filter, date_params
    def stickiness(self, entity: Entity, filter: StickinessFilter, team_id: int) -> Dict[str, Any]:

        parsed_date_from, parsed_date_to, _ = parse_timestamps(filter=filter, team_id=team_id)
        prop_filters, prop_filter_params = parse_prop_clauses(filter.properties, team_id)
        trunc_func = get_trunc_func_ch(filter.interval)

        params: Dict = {"team_id": team_id}
        params = {**params, **prop_filter_params, "num_intervals": filter.num_intervals}
        if entity.type == TREND_FILTER_TYPE_ACTIONS:
            action = Action.objects.get(pk=entity.id)
            action_query, action_params = format_action_filter(action)
            if action_query == "":
                return {}

            params = {**params, **action_params}
            content_sql = STICKINESS_ACTIONS_SQL.format(
                team_id=team_id,
                actions_query=action_query,
                parsed_date_from=parsed_date_from,
                parsed_date_to=parsed_date_to,
                filters=prop_filters,
                trunc_func=trunc_func,
            )
        else:
            content_sql = STICKINESS_SQL.format(
                team_id=team_id,
                event=entity.id,
                parsed_date_from=parsed_date_from,
                parsed_date_to=parsed_date_to,
                filters=prop_filters,
                trunc_func=trunc_func,
            )

        counts = sync_execute(content_sql, params)
        return self.process_result(counts, filter)
Example #3
0
    def _format_lifecycle_query(self, entity: Entity, filter: Filter,
                                team_id: int) -> Tuple[str, Dict, Callable]:
        date_from = filter.date_from

        if not date_from:
            date_from = get_earliest_timestamp(team_id)

        interval = filter.interval or "day"
        num_intervals, seconds_in_interval, _ = get_time_diff(
            interval, filter.date_from, filter.date_to, team_id)
        interval_increment, interval_string, sub_interval_string = self.get_interval(
            interval)
        trunc_func = get_trunc_func_ch(interval)
        event_query = ""
        event_params: Dict[str, Any] = {}

        props_to_filter = [*filter.properties, *entity.properties]
        prop_filters, prop_filter_params = parse_prop_clauses(
            props_to_filter,
            team_id,
            filter_test_accounts=filter.filter_test_accounts)

        _, _, date_params = parse_timestamps(filter=filter, team_id=team_id)

        if entity.type == TREND_FILTER_TYPE_ACTIONS:
            try:
                action = entity.get_action()
                event_query, event_params = format_action_filter(action)
            except:
                return "", {}, self._parse_result(filter, entity)
        else:
            event_query = "event = %(event)s"
            event_params = {"event": entity.id}

        return (
            LIFECYCLE_SQL.format(
                interval=interval_string,
                trunc_func=trunc_func,
                event_query=event_query,
                filters=prop_filters,
                sub_interval=sub_interval_string,
                GET_TEAM_PERSON_DISTINCT_IDS=GET_TEAM_PERSON_DISTINCT_IDS,
            ),
            {
                "team_id":
                team_id,
                "prev_date_from":
                (date_from - interval_increment).strftime("%Y-%m-%d{}".format(
                    " %H:%M:%S" if filter.interval == "hour"
                    or filter.interval == "minute" else " 00:00:00")),
                "num_intervals":
                num_intervals,
                "seconds_in_interval":
                seconds_in_interval,
                **event_params,
                **date_params,
                **prop_filter_params,
            },
            self._parse_result(filter, entity),
        )
def _process_content_sql(target_entity: Entity, filter: StickinessFilter,
                         team: Team) -> Tuple[str, Dict[str, Any]]:
    parsed_date_from, parsed_date_to, _ = parse_timestamps(filter=filter,
                                                           team_id=team.pk)
    prop_filters, prop_filter_params = parse_prop_clauses(
        filter.properties,
        team.pk,
        filter_test_accounts=filter.filter_test_accounts)
    entity_sql, entity_params = _format_entity_filter(entity=target_entity)
    trunc_func = get_trunc_func_ch(filter.interval)

    params: Dict = {
        "team_id": team.pk,
        **prop_filter_params,
        "stickiness_day": filter.selected_interval,
        **entity_params,
        "offset": filter.offset,
    }

    content_sql = STICKINESS_PEOPLE_SQL.format(
        entity_filter=entity_sql,
        parsed_date_from=parsed_date_from,
        parsed_date_to=parsed_date_to,
        filters=prop_filters,
        trunc_func=trunc_func,
        GET_TEAM_PERSON_DISTINCT_IDS=GET_TEAM_PERSON_DISTINCT_IDS,
    )
    return content_sql, params
    def _retrieve_people(self, target_entity: Entity, filter: StickinessFilter,
                         team: Team) -> ReturnDict:

        parsed_date_from, parsed_date_to, _ = parse_timestamps(filter=filter,
                                                               team_id=team.pk)
        prop_filters, prop_filter_params = parse_prop_clauses(
            filter.properties, team.pk)
        entity_sql, entity_params = self._format_entity_filter(
            entity=target_entity)
        trunc_func = get_trunc_func_ch(filter.interval)

        params: Dict = {
            "team_id": team.pk,
            **prop_filter_params,
            "stickiness_day": filter.selected_interval,
            **entity_params,
            "offset": filter.offset,
        }

        content_sql = STICKINESS_PEOPLE_SQL.format(
            entity_filter=entity_sql,
            parsed_date_from=parsed_date_from,
            parsed_date_to=parsed_date_to,
            filters=prop_filters,
            trunc_func=trunc_func,
        )

        people = sync_execute(
            PEOPLE_SQL.format(
                content_sql=content_sql,
                query="",
                latest_person_sql=GET_LATEST_PERSON_SQL.format(query="")),
            params,
        )
        return ClickhousePersonSerializer(people, many=True).data
    def _retrieve_people(self, filter: RetentionFilter, team: Team):
        period = filter.period
        is_first_time_retention = filter.retention_type == RETENTION_FIRST_TIME
        trunc_func = get_trunc_func_ch(period)
        prop_filters, prop_filter_params = parse_prop_clauses(
            filter.properties, team.pk)

        returning_entity = filter.returning_entity if filter.selected_interval > 0 else filter.target_entity
        target_query, target_params = self._get_condition(filter.target_entity,
                                                          table="e")
        target_query_formatted = "AND {target_query}".format(
            target_query=target_query)
        return_query, return_params = self._get_condition(returning_entity,
                                                          table="e",
                                                          prepend="returning")
        return_query_formatted = "AND {return_query}".format(
            return_query=return_query)

        reference_event_query = (REFERENCE_EVENT_UNIQUE_SQL
                                 if is_first_time_retention else
                                 REFERENCE_EVENT_SQL).format(
                                     target_query=target_query_formatted,
                                     filters=prop_filters,
                                     trunc_func=trunc_func,
                                 )
        reference_date_from = filter.date_from
        reference_date_to = filter.date_from + filter.period_increment
        date_from = filter.date_from + filter.selected_interval * filter.period_increment
        date_to = date_from + filter.period_increment

        result = sync_execute(
            RETENTION_PEOPLE_SQL.format(
                reference_event_query=reference_event_query,
                target_query=return_query_formatted,
                filters=prop_filters),
            {
                "team_id":
                team.pk,
                "start_date":
                date_from.strftime("%Y-%m-%d{}".format(
                    " %H:%M:%S" if filter.period == "Hour" else " 00:00:00")),
                "end_date":
                date_to.strftime("%Y-%m-%d{}".format(
                    " %H:%M:%S" if filter.period == "Hour" else " 00:00:00")),
                "reference_start_date":
                reference_date_from.strftime("%Y-%m-%d{}".format(
                    " %H:%M:%S" if filter.period == "Hour" else " 00:00:00")),
                "reference_end_date":
                reference_date_to.strftime("%Y-%m-%d{}".format(
                    " %H:%M:%S" if filter.period == "Hour" else " 00:00:00")),
                "offset":
                filter.offset,
                **target_params,
                **return_params,
                **prop_filter_params,
            },
        )
        serialized = ClickhousePersonSerializer(result, many=True).data
        return serialized
Example #7
0
    def _total_volume_query(self, entity: Entity, filter: Filter, team_id: int) -> Tuple[str, Dict, Callable]:

        interval_annotation = get_trunc_func_ch(filter.interval)
        num_intervals, seconds_in_interval, _ = get_time_diff(
            filter.interval or "day", filter.date_from, filter.date_to, team_id=team_id
        )
        aggregate_operation, join_condition, math_params = process_math(entity)

        trend_event_query = TrendsEventQuery(
            filter=filter,
            entity=entity,
            team_id=team_id,
            should_join_distinct_ids=True
            if join_condition != "" or entity.math in [WEEKLY_ACTIVE, MONTHLY_ACTIVE]
            else False,
        )
        event_query, event_query_params = trend_event_query.get_query()

        content_sql_params = {
            "aggregate_operation": aggregate_operation,
            "timestamp": "e.timestamp",
            "interval": interval_annotation,
        }
        params: Dict = {"team_id": team_id}
        params = {**params, **math_params, **event_query_params}

        if filter.display in TRENDS_DISPLAY_BY_VALUE:
            content_sql = VOLUME_TOTAL_AGGREGATE_SQL.format(event_query=event_query, **content_sql_params)
            time_range = enumerate_time_range(filter, seconds_in_interval)

            return (
                content_sql,
                params,
                lambda result: [
                    {"aggregated_value": result[0][0] if result and len(result) else 0, "days": time_range}
                ],
            )
        else:

            if entity.math in [WEEKLY_ACTIVE, MONTHLY_ACTIVE]:
                content_sql = ACTIVE_USER_SQL.format(
                    event_query=event_query,
                    **content_sql_params,
                    parsed_date_to=trend_event_query.parsed_date_to,
                    parsed_date_from=trend_event_query.parsed_date_from,
                    **trend_event_query.active_user_params
                )
            else:
                content_sql = VOLUME_SQL.format(event_query=event_query, **content_sql_params)

            null_sql = NULL_SQL.format(
                interval=interval_annotation,
                seconds_in_interval=seconds_in_interval,
                num_intervals=num_intervals,
                date_to=filter.date_to.strftime("%Y-%m-%d %H:%M:%S"),
            )
            final_query = AGGREGATE_SQL.format(null_sql=null_sql, content_sql=content_sql)
            return final_query, params, self._parse_total_volume_result(filter)
Example #8
0
    def calculate_avg(self, filter: Filter, team: Team):

        parsed_date_from, parsed_date_to, _ = parse_timestamps(filter, team.pk)

        filters, params = parse_prop_clauses(
            filter.properties, team.pk, filter_test_accounts=filter.filter_test_accounts
        )

        interval_notation = get_trunc_func_ch(filter.interval)
        num_intervals, seconds_in_interval, _ = get_time_diff(
            filter.interval or "day", filter.date_from, filter.date_to, team.pk
        )

        entity_conditions, entity_params = entity_query_conditions(filter, team)
        if not entity_conditions:
            entity_conditions = ["event != '$feature_flag_called'"]  # default conditino

        params = {**params, **entity_params}
        entity_query = " OR ".join(entity_conditions)

        avg_query = SESSIONS_NO_EVENTS_SQL.format(
            team_id=team.pk,
            date_from=parsed_date_from,
            date_to=parsed_date_to,
            filters=filters,
            sessions_limit="",
            entity_filter=f"AND ({entity_query})",
        )
        per_period_query = AVERAGE_PER_PERIOD_SQL.format(sessions=avg_query, interval=interval_notation)

        null_sql = NULL_SQL.format(
            date_to=filter.date_to.strftime("%Y-%m-%d 00:00:00"),
            interval=interval_notation,
            num_intervals=num_intervals,
            seconds_in_interval=seconds_in_interval,
        )

        final_query = AVERAGE_SQL.format(sessions=per_period_query, null_sql=null_sql)

        params = {**params, "team_id": team.pk}
        response = sync_execute(final_query, params)
        values = self.clean_values(filter, response)
        time_series_data = append_data(values, interval=filter.interval, math=None)
        scaled_data, _ = scale_time_series(time_series_data["data"])
        time_series_data.update({"data": scaled_data})
        # calculate average
        total = sum(val[1] for val in values)

        if total == 0:
            return []

        valid_days = sum(1 if val[1] else 0 for val in values)
        overall_average = (total / valid_days) if valid_days else 0

        result = self._format_avg(overall_average)
        time_series_data.update(result)

        return [time_series_data]
Example #9
0
    def get_query(self) -> str:
        step_counts = self.get_step_counts_without_aggregation_query()
        # Expects multiple rows for same person, first event time, steps taken.
        self.params.update(self.funnel_order.params)

        reached_from_step_count_condition, reached_to_step_count_condition, _ = self.get_steps_reached_conditions(
        )
        trunc_func = get_trunc_func_ch(self._filter.interval)
        interval_func = get_interval_func_ch(self._filter.interval)

        if self._filter.date_from is None:
            _date_from = get_earliest_timestamp(self._team.pk)
        else:
            _date_from = self._filter.date_from

        breakdown_clause = self._get_breakdown_prop()
        formatted_date_from = format_ch_timestamp(_date_from, self._filter)
        formatted_date_to = format_ch_timestamp(self._filter.date_to,
                                                self._filter)

        self.params.update({
            "formatted_date_from": formatted_date_from,
            "formatted_date_to": formatted_date_to,
            "interval": self._filter.interval,
        })

        query = f"""
            SELECT
                entrance_period_start,
                reached_from_step_count,
                reached_to_step_count,
                if(reached_from_step_count > 0, round(reached_to_step_count / reached_from_step_count * 100, 2), 0) AS conversion_rate
                {breakdown_clause}
            FROM (
                SELECT
                    entrance_period_start,
                    countIf({reached_from_step_count_condition}) AS reached_from_step_count,
                    countIf({reached_to_step_count_condition}) AS reached_to_step_count
                    {breakdown_clause}
                FROM (
                    {step_counts}
                ) GROUP BY entrance_period_start {breakdown_clause}
            ) data
            RIGHT OUTER JOIN (
                SELECT
                    {trunc_func}(toDateTime(%(formatted_date_from)s) + {interval_func}(number)) AS entrance_period_start
                    {', breakdown_value as prop' if breakdown_clause else ''}
                FROM numbers(dateDiff(%(interval)s, toDateTime(%(formatted_date_from)s), toDateTime(%(formatted_date_to)s)) + 1) AS period_offsets
                {'ARRAY JOIN (%(breakdown_values)s) AS breakdown_value' if breakdown_clause else ''}
            ) fill
            USING (entrance_period_start {breakdown_clause})
            ORDER BY entrance_period_start ASC
            SETTINGS allow_experimental_window_functions = 1"""

        return query
Example #10
0
    def _total_volume_query(self, entity: Entity, filter: Filter,
                            team_id: int) -> Tuple[str, Dict, Callable]:
        trunc_func = get_trunc_func_ch(filter.interval)
        interval_func = get_interval_func_ch(filter.interval)
        aggregate_operation, join_condition, math_params = process_math(entity)

        trend_event_query = TrendsEventQuery(
            filter=filter,
            entity=entity,
            team_id=team_id,
            should_join_distinct_ids=True if join_condition != ""
            or entity.math in [WEEKLY_ACTIVE, MONTHLY_ACTIVE] else False,
        )
        event_query, event_query_params = trend_event_query.get_query()

        content_sql_params = {
            "aggregate_operation": aggregate_operation,
            "timestamp": "e.timestamp",
            "interval": trunc_func,
        }
        params: Dict = {"team_id": team_id}
        params = {**params, **math_params, **event_query_params}

        if filter.display in TRENDS_DISPLAY_BY_VALUE:
            content_sql = VOLUME_TOTAL_AGGREGATE_SQL.format(
                event_query=event_query, **content_sql_params)

            return (content_sql, params,
                    self._parse_aggregate_volume_result(
                        filter, entity, team_id))
        else:

            if entity.math in [WEEKLY_ACTIVE, MONTHLY_ACTIVE]:
                content_sql = ACTIVE_USER_SQL.format(
                    event_query=event_query,
                    **content_sql_params,
                    parsed_date_to=trend_event_query.parsed_date_to,
                    parsed_date_from=trend_event_query.parsed_date_from,
                    **trend_event_query.active_user_params,
                )
            elif filter.display == TRENDS_CUMULATIVE and entity.math == "dau":
                cumulative_sql = CUMULATIVE_SQL.format(event_query=event_query)
                content_sql = VOLUME_SQL.format(event_query=cumulative_sql,
                                                **content_sql_params)
            else:
                content_sql = VOLUME_SQL.format(event_query=event_query,
                                                **content_sql_params)

            null_sql = NULL_SQL.format(trunc_func=trunc_func,
                                       interval_func=interval_func)
            params["interval"] = filter.interval
            final_query = AGGREGATE_SQL.format(null_sql=null_sql,
                                               content_sql=content_sql)
            return final_query, params, self._parse_total_volume_result(
                filter, entity, team_id)
Example #11
0
 def _get_funnel_trend_null_sql(self):
     interval_annotation = get_trunc_func_ch(self._filter.interval)
     num_intervals, seconds_in_interval, round_interval = get_time_diff(
         self._filter.interval or "day", self._filter.date_from, self._filter.date_to, team_id=self._team.id
     )
     funnel_trend_null_sql = NULL_SQL_FUNNEL_TRENDS.format(
         interval=interval_annotation,
         seconds_in_interval=seconds_in_interval,
         num_intervals=num_intervals,
         date_to=self._filter.date_to.strftime("%Y-%m-%d %H:%M:%S"),
     )
     return funnel_trend_null_sql
Example #12
0
    def get_query(self) -> str:

        steps_per_person_query = self.funnel_order.get_step_counts_without_aggregation_query(
        )
        # expects multiple rows for same person, first event time, steps taken.
        self.params.update(self.funnel_order.params)

        num_intervals, seconds_in_interval, _ = get_time_diff(
            self._filter.interval or "day",
            self._filter.date_from,
            self._filter.date_to,
            team_id=self._team.pk)
        interval_method = get_trunc_func_ch(self._filter.interval)

        # How many steps must have been done to count for the denominator
        from_step = self._filter.funnel_from_step or 1
        # How many steps must have been done to count for the numerator
        to_step = self._filter.funnel_to_step or len(self._filter.entities)

        reached_from_step_count_condition = f"steps_completed >= {from_step}"
        reached_to_step_count_condition = f"steps_completed >= {to_step}"

        query = f"""
            SELECT
                entrance_period_start,
                reached_from_step_count,
                reached_to_step_count,
                if(reached_from_step_count > 0, round(reached_to_step_count / reached_from_step_count * 100, 2), 0) AS conversion_rate
            FROM (
                SELECT
                    entrance_period_start,
                    countIf({reached_from_step_count_condition}) AS reached_from_step_count,
                    countIf({reached_to_step_count_condition}) AS reached_to_step_count
                FROM (
                    SELECT
                        person_id,
                        {interval_method}(timestamp) AS entrance_period_start,
                        max(steps) AS steps_completed
                    FROM (
                        {steps_per_person_query}
                    ) GROUP BY person_id, entrance_period_start
                ) GROUP BY entrance_period_start
            ) data
            RIGHT JOIN (
                SELECT
                    {interval_method}(toDateTime('{self._filter.date_from.strftime(TIMESTAMP_FORMAT)}') + number * {seconds_in_interval}) AS entrance_period_start
                FROM numbers({num_intervals}) AS period_offsets
            ) fill
            USING (entrance_period_start)
            ORDER BY entrance_period_start ASC
            SETTINGS allow_experimental_window_functions = 1"""

        return query
Example #13
0
    def _normal_query(self, entity: Entity, filter: Filter, team_id: int) -> Tuple[str, Dict, Callable]:

        interval_annotation = get_trunc_func_ch(filter.interval)
        num_intervals, seconds_in_interval, round_interval = get_time_diff(
            filter.interval or "day", filter.date_from, filter.date_to, team_id=team_id
        )
        _, parsed_date_to, date_params = parse_timestamps(filter=filter, team_id=team_id)

        props_to_filter = [*filter.properties, *entity.properties]
        prop_filters, prop_filter_params = parse_prop_clauses(props_to_filter, team_id)

        aggregate_operation, join_condition, math_params = process_math(entity)

        params: Dict = {"team_id": team_id}
        params = {**params, **prop_filter_params, **math_params, **date_params}
        content_sql_params = {
            "interval": interval_annotation,
            "parsed_date_from": date_from_clause(interval_annotation, round_interval),
            "parsed_date_to": parsed_date_to,
            "timestamp": "timestamp",
            "team_id": team_id,
            "filters": prop_filters,
            "event_join": join_condition,
            "aggregate_operation": aggregate_operation,
        }

        entity_params, entity_format_params = self._populate_entity_params(entity)
        params = {**params, **entity_params}
        content_sql_params = {**content_sql_params, **entity_format_params}

        if filter.display in TRENDS_DISPLAY_BY_VALUE:
            agg_query = self._determine_single_aggregate_query(filter, entity)
            content_sql = agg_query.format(**content_sql_params)

            return (
                content_sql,
                params,
                lambda result: [{"aggregated_value": result[0][0] if result and len(result) else 0}],
            )
        else:
            content_sql = self._determine_trend_aggregate_query(filter, entity)
            content_sql = content_sql.format(**content_sql_params)

            null_sql = NULL_SQL.format(
                interval=interval_annotation,
                seconds_in_interval=seconds_in_interval,
                num_intervals=num_intervals,
                date_to=filter.date_to.strftime("%Y-%m-%d %H:%M:%S"),
            )
            final_query = AGGREGATE_SQL.format(null_sql=null_sql, content_sql=content_sql)
            return final_query, params, self._parse_normal_result(filter)
Example #14
0
    def calculate_avg(self, filter: Filter, team: Team):

        parsed_date_from, parsed_date_to, _ = parse_timestamps(filter, team.pk)

        filters, params = parse_prop_clauses(filter.properties, team.pk)

        interval_notation = get_trunc_func_ch(filter.interval)
        num_intervals, seconds_in_interval = get_time_diff(
            filter.interval or "day", filter.date_from, filter.date_to,
            team.pk)

        avg_query = SESSIONS_NO_EVENTS_SQL.format(
            team_id=team.pk,
            date_from=parsed_date_from,
            date_to=parsed_date_to,
            filters=filters,
            sessions_limit="",
        )
        per_period_query = AVERAGE_PER_PERIOD_SQL.format(
            sessions=avg_query, interval=interval_notation)

        null_sql = NULL_SQL.format(
            date_to=filter.date_to.strftime("%Y-%m-%d 00:00:00"),
            interval=interval_notation,
            num_intervals=num_intervals,
            seconds_in_interval=seconds_in_interval,
        )

        final_query = AVERAGE_SQL.format(sessions=per_period_query,
                                         null_sql=null_sql)

        params = {**params, "team_id": team.pk}
        response = sync_execute(final_query, params)
        values = self.clean_values(filter, response)
        time_series_data = append_data(values,
                                       interval=filter.interval,
                                       math=None)
        # calculate average
        total = sum(val[1] for val in values)

        if total == 0:
            return []

        valid_days = sum(1 if val[1] else 0 for val in values)
        overall_average = (total / valid_days) if valid_days else 0

        result = self._format_avg(overall_average)
        time_series_data.update(result)

        return [time_series_data]
Example #15
0
    def get_step_counts_without_aggregation_query(
            self,
            *,
            specific_entrance_period_start: Optional[datetime] = None) -> str:
        steps_per_person_query = self.funnel_order.get_step_counts_without_aggregation_query(
        )
        interval_method = get_trunc_func_ch(self._filter.interval)

        # This is used by funnel trends when we only need data for one period, e.g. person per data point
        if specific_entrance_period_start:
            self.params[
                "entrance_period_start"] = specific_entrance_period_start.strftime(
                    TIMESTAMP_FORMAT)

        return f"""
Example #16
0
    def get_query(self) -> str:
        step_counts = self.get_step_counts_without_aggregation_query()
        # Expects multiple rows for same person, first event time, steps taken.
        self.params.update(self.funnel_order.params)

        reached_from_step_count_condition, reached_to_step_count_condition, _ = self.get_steps_reached_conditions(
        )
        interval_method = get_trunc_func_ch(self._filter.interval)
        num_intervals, seconds_in_interval, _ = get_time_diff(
            self._filter.interval or "day",
            self._filter.date_from,
            self._filter.date_to,
            team_id=self._team.pk)

        breakdown_clause = self._get_breakdown_prop()

        query = f"""
            SELECT
                entrance_period_start,
                reached_from_step_count,
                reached_to_step_count,
                if(reached_from_step_count > 0, round(reached_to_step_count / reached_from_step_count * 100, 2), 0) AS conversion_rate
                {breakdown_clause}
            FROM (
                SELECT
                    entrance_period_start,
                    countIf({reached_from_step_count_condition}) AS reached_from_step_count,
                    countIf({reached_to_step_count_condition}) AS reached_to_step_count
                    {breakdown_clause}
                FROM (
                    {step_counts}
                ) GROUP BY entrance_period_start {breakdown_clause}
            ) data
            RIGHT OUTER JOIN (
                SELECT
                    {interval_method}(toDateTime('{self._filter.date_from.strftime(TIMESTAMP_FORMAT)}') + number * {seconds_in_interval}) AS entrance_period_start
                    {', breakdown_value as prop' if breakdown_clause else ''}
                FROM numbers({num_intervals}) AS period_offsets
                {'ARRAY JOIN (%(breakdown_values)s) AS breakdown_value' if breakdown_clause else ''}
            ) fill
            USING (entrance_period_start {breakdown_clause})
            ORDER BY entrance_period_start ASC
            SETTINGS allow_experimental_window_functions = 1"""

        return query
Example #17
0
    def _configure_sql(self):
        funnel_trend_null_sql = self._get_funnel_trend_null_sql()
        parsed_date_from, parsed_date_to, _ = self._get_dates()
        prop_filters, _ = self._get_filters()
        steps = self._get_steps()
        step_count = len(steps)
        interval_method = get_trunc_func_ch(self._filter.interval)

        sql = FUNNEL_TREND_SQL.format(
            team_id=self._team.pk,
            steps=", ".join(steps),
            step_count=step_count,
            filters=prop_filters.replace("uuid IN", "events.uuid IN", 1),
            parsed_date_from=parsed_date_from,
            parsed_date_to=parsed_date_to,
            within_time=self._filter.milliseconds_from_days(self._filter.funnel_window_days),
            latest_distinct_id_sql=GET_LATEST_PERSON_DISTINCT_ID_SQL,
            funnel_trend_null_sql=funnel_trend_null_sql,
            interval_method=interval_method,
        )
        return sql
Example #18
0
    def get_step_counts_without_aggregation_query(
            self,
            *,
            specific_entrance_period_start: Optional[datetime] = None) -> str:
        steps_per_person_query = self.funnel_order.get_step_counts_without_aggregation_query(
        )

        trunc_func = get_trunc_func_ch(self._filter.interval)

        # This is used by funnel trends when we only need data for one period, e.g. person per data point
        if specific_entrance_period_start:
            self.params[
                "entrance_period_start"] = specific_entrance_period_start.strftime(
                    TIMESTAMP_FORMAT)

        event_select_clause = ""
        if self._filter.include_recordings:
            max_steps = len(self._filter.entities)
            event_select_clause = self._get_matching_event_arrays(max_steps)

        breakdown_clause = self._get_breakdown_prop()
        return f"""
Example #19
0
    def get_people(
        self,
        filter: Filter,
        team_id: int,
        target_date: datetime,
        lifecycle_type: str,
        request: Request,
        limit: int = 100,
    ):
        entity = filter.entities[0]
        date_from = filter.date_from

        if not date_from:
            date_from = get_earliest_timestamp(team_id)

        interval = filter.interval or "day"
        num_intervals, seconds_in_interval, _ = get_time_diff(interval,
                                                              filter.date_from,
                                                              filter.date_to,
                                                              team_id=team_id)
        interval_increment, interval_string, sub_interval_string = self.get_interval(
            interval)
        trunc_func = get_trunc_func_ch(interval)
        event_query = ""
        event_params: Dict[str, Any] = {}

        _, _, date_params = parse_timestamps(filter=filter, team_id=team_id)

        if entity.type == TREND_FILTER_TYPE_ACTIONS:
            try:
                action = entity.get_action()
                event_query, event_params = format_action_filter(action)
            except:
                return []
        else:
            event_query = "event = %(event)s"
            event_params = {"event": entity.id}

        props_to_filter = [*filter.properties, *entity.properties]
        prop_filters, prop_filter_params = parse_prop_clauses(
            props_to_filter,
            team_id,
            filter_test_accounts=filter.filter_test_accounts)

        result = sync_execute(
            LIFECYCLE_PEOPLE_SQL.format(
                interval=interval_string,
                trunc_func=trunc_func,
                event_query=event_query,
                filters=prop_filters,
                sub_interval=sub_interval_string,
                GET_TEAM_PERSON_DISTINCT_IDS=GET_TEAM_PERSON_DISTINCT_IDS,
            ),
            {
                "team_id":
                team_id,
                "prev_date_from":
                (date_from - interval_increment).strftime("%Y-%m-%d{}".format(
                    " %H:%M:%S" if filter.interval == "hour"
                    or filter.interval == "minute" else " 00:00:00")),
                "num_intervals":
                num_intervals,
                "seconds_in_interval":
                seconds_in_interval,
                **event_params,
                **date_params,
                **prop_filter_params,
                "status":
                lifecycle_type,
                "target_date":
                target_date.strftime("%Y-%m-%d{}".format(
                    " %H:%M:%S" if filter.interval == "hour"
                    or filter.interval == "minute" else " 00:00:00")),
                "offset":
                filter.offset,
                "limit":
                limit,
            },
        )
        people = get_persons_by_uuids(team_id=team_id,
                                      uuids=[p[0] for p in result])
        people = people.prefetch_related(
            Prefetch("persondistinctid_set", to_attr="distinct_ids_cache"))

        from posthog.api.person import PersonSerializer

        return PersonSerializer(people, many=True).data
    def _execute_sql(
        self,
        filter: RetentionFilter,
        team: Team,
    ) -> Dict[Tuple[int, int], Dict[str, Any]]:
        period = filter.period
        prop_filters, prop_filter_params = parse_prop_clauses(
            filter.properties,
            team.pk,
            filter_test_accounts=filter.filter_test_accounts)
        target_entity = filter.target_entity
        returning_entity = filter.returning_entity
        is_first_time_retention = filter.retention_type == RETENTION_FIRST_TIME
        date_from = filter.date_from
        date_to = filter.date_to

        target_query = ""
        target_params: Dict = {}
        trunc_func = get_trunc_func_ch(period)

        target_query, target_params = self._get_condition(target_entity,
                                                          table="e")
        returning_query, returning_params = self._get_condition(
            returning_entity, table="e", prepend="returning")

        target_query_formatted = "AND {target_query}".format(
            target_query=target_query)
        returning_query_formatted = "AND {returning_query}".format(
            returning_query=returning_query)

        reference_event_sql = (REFERENCE_EVENT_UNIQUE_SQL
                               if is_first_time_retention else
                               REFERENCE_EVENT_SQL).format(
                                   target_query=target_query_formatted,
                                   filters=prop_filters,
                                   trunc_func=trunc_func,
                               )

        target_condition, _ = self._get_condition(target_entity,
                                                  table="reference_event")
        if is_first_time_retention:
            target_condition = target_condition.replace(
                "reference_event.uuid", "reference_event.min_uuid")
            target_condition = target_condition.replace(
                "reference_event.event", "reference_event.min_event")
        returning_condition, _ = self._get_condition(returning_entity,
                                                     table="event",
                                                     prepend="returning")
        result = sync_execute(
            RETENTION_SQL.format(
                target_query=target_query_formatted,
                returning_query=returning_query_formatted,
                filters=prop_filters,
                trunc_func=trunc_func,
                extra_union="UNION ALL {} ".format(reference_event_sql),
                reference_event_sql=reference_event_sql,
                target_condition=target_condition,
                returning_condition=returning_condition,
            ),
            {
                "team_id":
                team.pk,
                "start_date":
                date_from.strftime("%Y-%m-%d{}".format(
                    " %H:%M:%S" if filter.period == "Hour" else " 00:00:00")),
                "end_date":
                date_to.strftime("%Y-%m-%d{}".format(
                    " %H:%M:%S" if filter.period == "Hour" else " 00:00:00")),
                "reference_start_date":
                date_from.strftime("%Y-%m-%d{}".format(
                    " %H:%M:%S" if filter.period == "Hour" else " 00:00:00")),
                "reference_end_date":
                ((date_from + filter.period_increment) if filter.display
                 == TRENDS_LINEAR else date_to).strftime("%Y-%m-%d{}".format(
                     " %H:%M:%S" if filter.period == "Hour" else " 00:00:00")),
                **prop_filter_params,
                **target_params,
                **returning_params,
                "period":
                period,
            },
        )

        initial_interval_result = sync_execute(
            INITIAL_INTERVAL_SQL.format(
                reference_event_sql=reference_event_sql,
                trunc_func=trunc_func,
            ),
            {
                "team_id":
                team.pk,
                "start_date":
                date_from.strftime("%Y-%m-%d{}".format(
                    " %H:%M:%S" if filter.period == "Hour" else " 00:00:00")),
                "end_date":
                date_to.strftime("%Y-%m-%d{}".format(
                    " %H:%M:%S" if filter.period == "Hour" else " 00:00:00")),
                "reference_start_date":
                date_from.strftime("%Y-%m-%d{}".format(
                    " %H:%M:%S" if filter.period == "Hour" else " 00:00:00")),
                "reference_end_date":
                ((date_from + filter.period_increment) if filter.display
                 == TRENDS_LINEAR else date_to).strftime("%Y-%m-%d{}".format(
                     " %H:%M:%S" if filter.period == "Hour" else " 00:00:00")),
                **prop_filter_params,
                **target_params,
                **returning_params,
                "period":
                period,
            },
        )

        result_dict = {}
        for initial_res in initial_interval_result:
            result_dict.update({
                (initial_res[0], 0): {
                    "count": initial_res[1],
                    "people": []
                }
            })

        for res in result:
            result_dict.update({
                (res[0], res[1]): {
                    "count": res[2],
                    "people": []
                }
            })

        return result_dict
    def _retrieve_people_in_period(self, filter: RetentionFilter, team: Team):
        period = filter.period
        is_first_time_retention = filter.retention_type == RETENTION_FIRST_TIME
        trunc_func = get_trunc_func_ch(period)
        prop_filters, prop_filter_params = parse_prop_clauses(
            filter.properties, team.pk)

        target_query, target_params = self._get_condition(filter.target_entity,
                                                          table="e")
        target_query_formatted = "AND {target_query}".format(
            target_query=target_query)
        return_query, return_params = self._get_condition(
            filter.returning_entity, table="e", prepend="returning")
        return_query_formatted = "AND {return_query}".format(
            return_query=return_query)

        first_event_sql = (REFERENCE_EVENT_UNIQUE_PEOPLE_PER_PERIOD_SQL
                           if is_first_time_retention else
                           REFERENCE_EVENT_PEOPLE_PER_PERIOD_SQL).format(
                               target_query=target_query_formatted,
                               filters=prop_filters,
                               trunc_func=trunc_func,
                           )
        default_event_query = (
            DEFAULT_REFERENCE_EVENT_UNIQUE_PEOPLE_PER_PERIOD_SQL
            if is_first_time_retention else
            DEFAULT_REFERENCE_EVENT_PEOPLE_PER_PERIOD_SQL).format(
                target_query=target_query_formatted,
                filters=prop_filters,
                trunc_func=trunc_func,
            )

        date_from = filter.date_from + filter.selected_interval * filter.period_increment
        date_to = filter.date_to

        filter = filter.with_data({
            "total_intervals":
            filter.total_intervals - filter.selected_interval
        })

        query_result = sync_execute(
            RETENTION_PEOPLE_PER_PERIOD_SQL.format(
                returning_query=return_query_formatted,
                filters=prop_filters,
                first_event_sql=first_event_sql,
                first_event_default_sql=default_event_query,
                trunc_func=trunc_func,
            ),
            {
                "team_id":
                team.pk,
                "start_date":
                date_from.strftime("%Y-%m-%d{}".format(
                    " %H:%M:%S" if filter.period == "Hour" else " 00:00:00")),
                "end_date":
                date_to.strftime("%Y-%m-%d{}".format(
                    " %H:%M:%S" if filter.period == "Hour" else " 00:00:00")),
                "offset":
                filter.offset,
                "limit":
                100,
                "period":
                period,
                **target_params,
                **return_params,
                **prop_filter_params,
            },
        )
        people_dict = {}

        from posthog.api.person import PersonSerializer

        people = get_persons_by_uuids(team_id=team.pk,
                                      uuids=[val[0] for val in query_result])
        people = people.prefetch_related(
            Prefetch("persondistinctid_set", to_attr="distinct_ids_cache"))

        for person in people:
            people_dict.update(
                {str(person.uuid): PersonSerializer(person).data})

        result = self.process_people_in_period(filter, query_result,
                                               people_dict)
        return result
Example #22
0
    def _execute_sql(
        self,
        filter: RetentionFilter,
        team: Team,
    ) -> Dict[Tuple[int, int], Dict[str, Any]]:
        period = filter.period
        is_first_time_retention = filter.retention_type == RETENTION_FIRST_TIME
        date_from = filter.date_from
        trunc_func = get_trunc_func_ch(period)

        returning_event_query, returning_event_params = RetentionEventsQuery(
            filter=filter,
            team_id=team.pk,
            event_query_type=RetentionQueryType.RETURNING).get_query()
        target_event_query, target_event_params = RetentionEventsQuery(
            filter=filter,
            team_id=team.pk,
            event_query_type=RetentionQueryType.TARGET_FIRST_TIME
            if is_first_time_retention else RetentionQueryType.TARGET,
        ).get_query()

        all_params = {
            "team_id":
            team.pk,
            "start_date":
            date_from.strftime("%Y-%m-%d{}".format(
                " %H:%M:%S" if filter.period == "Hour" else " 00:00:00")),
            **returning_event_params,
            **target_event_params,
            "period":
            period,
        }

        result = sync_execute(
            RETENTION_SQL.format(
                returning_event_query=returning_event_query,
                trunc_func=trunc_func,
                target_event_query=target_event_query,
            ),
            all_params,
        )

        initial_interval_result = sync_execute(
            INITIAL_INTERVAL_SQL.format(
                reference_event_sql=target_event_query,
                trunc_func=trunc_func,
            ), all_params)

        result_dict = {}
        for initial_res in initial_interval_result:
            result_dict.update({
                (initial_res[0], 0): {
                    "count": initial_res[1],
                    "people": []
                }
            })

        for res in result:
            result_dict.update({
                (res[0], res[1]): {
                    "count": res[2],
                    "people": []
                }
            })

        return result_dict
Example #23
0
    def _format_breakdown_query(self, entity: Entity, filter: Filter,
                                team_id: int) -> Tuple[str, Dict, Callable]:
        # process params
        params: Dict[str, Any] = {"team_id": team_id}
        interval_annotation = get_trunc_func_ch(filter.interval)
        num_intervals, seconds_in_interval, round_interval = get_time_diff(
            filter.interval or "day", filter.date_from, filter.date_to,
            team_id)
        _, parsed_date_to, date_params = parse_timestamps(filter=filter,
                                                          team_id=team_id)

        props_to_filter = [*filter.properties, *entity.properties]
        prop_filters, prop_filter_params = parse_prop_clauses(
            props_to_filter,
            team_id,
            table_name="e",
            filter_test_accounts=filter.filter_test_accounts)
        aggregate_operation, _, math_params = process_math(entity)

        if entity.math == "dau" or filter.breakdown_type == "person":
            join_condition = EVENT_JOIN_PERSON_SQL
        else:
            join_condition = ""

        action_query = ""
        action_params: Dict = {}
        if entity.type == TREND_FILTER_TYPE_ACTIONS:
            action = Action.objects.get(pk=entity.id)
            action_query, action_params = format_action_filter(action)

        null_sql = NULL_BREAKDOWN_SQL.format(
            interval=interval_annotation,
            seconds_in_interval=seconds_in_interval,
            num_intervals=num_intervals,
            date_to=(filter.date_to).strftime("%Y-%m-%d %H:%M:%S"),
        )

        params = {
            **params,
            **math_params,
            **prop_filter_params,
            **action_params,
            "event": entity.id,
            "key": filter.breakdown,
            **date_params,
        }

        breakdown_filter_params = {
            "parsed_date_from":
            date_from_clause(interval_annotation, round_interval),
            "parsed_date_to":
            parsed_date_to,
            "actions_query":
            "AND {}".format(action_query) if action_query else "",
            "event_filter":
            "AND event = %(event)s" if not action_query else "",
            "filters":
            prop_filters if props_to_filter else "",
        }
        breakdown_query = self._get_breakdown_query(filter)

        _params, _breakdown_filter_params = {}, {}

        if filter.breakdown_type == "cohort":
            _params, breakdown_filter, _breakdown_filter_params, breakdown_value = self._breakdown_cohort_params(
                team_id, filter, entity)
        elif filter.breakdown_type == "person":
            _params, breakdown_filter, _breakdown_filter_params, breakdown_value = self._breakdown_person_params(
                filter, team_id)
        else:
            _params, breakdown_filter, _breakdown_filter_params, breakdown_value = self._breakdown_prop_params(
                filter, team_id)

        if len(_params["values"]) == 0:
            return "SELECT 1", {}, lambda _: []

        params = {**params, **_params}
        breakdown_filter_params = {
            **breakdown_filter_params,
            **_breakdown_filter_params
        }

        if filter.display in TRENDS_DISPLAY_BY_VALUE:
            breakdown_filter = breakdown_filter.format(
                **breakdown_filter_params)
            content_sql = breakdown_query.format(
                breakdown_filter=breakdown_filter,
                event_join=join_condition,
                aggregate_operation=aggregate_operation,
                breakdown_value=breakdown_value,
            )

            return content_sql, params, self._parse_single_aggregate_result(
                filter, entity)

        else:

            null_sql = null_sql.format(
                interval=interval_annotation,
                seconds_in_interval=seconds_in_interval,
                num_intervals=num_intervals,
                date_to=(filter.date_to).strftime("%Y-%m-%d %H:%M:%S"),
            )
            breakdown_filter = breakdown_filter.format(
                **breakdown_filter_params)
            breakdown_query = breakdown_query.format(
                null_sql=null_sql,
                breakdown_filter=breakdown_filter,
                event_join=join_condition,
                aggregate_operation=aggregate_operation,
                interval_annotation=interval_annotation,
                breakdown_value=breakdown_value,
            )

            return breakdown_query, params, self._parse_trend_result(
                filter, entity)
Example #24
0
    def get_query(self) -> Tuple[str, Dict, Callable]:
        interval_annotation = get_trunc_func_ch(self.filter.interval)
        num_intervals, seconds_in_interval, round_interval = get_time_diff(
            self.filter.interval, self.filter.date_from, self.filter.date_to,
            self.team_id)
        _, parsed_date_to, date_params = parse_timestamps(filter=self.filter,
                                                          team_id=self.team_id)

        props_to_filter = self.filter.property_groups.combine_property_group(
            PropertyOperatorType.AND, self.entity.property_groups)

        outer_properties = self.column_optimizer.property_optimizer.parse_property_groups(
            props_to_filter).outer
        prop_filters, prop_filter_params = parse_prop_grouped_clauses(
            team_id=self.team_id,
            property_group=outer_properties,
            table_name="e",
            person_properties_mode=PersonPropertiesMode.
            USING_PERSON_PROPERTIES_COLUMN,
        )
        aggregate_operation, _, math_params = process_math(self.entity)

        action_query = ""
        action_params: Dict = {}
        if self.entity.type == TREND_FILTER_TYPE_ACTIONS:
            action = self.entity.get_action()
            action_query, action_params = format_action_filter(
                team_id=self.team_id, action=action, table_name="e")

        self.params = {
            **self.params,
            **math_params,
            **prop_filter_params,
            **action_params,
            "event": self.entity.id,
            "key": self.filter.breakdown,
            **date_params,
        }

        breakdown_filter_params = {
            "parsed_date_from":
            date_from_clause(interval_annotation, round_interval),
            "parsed_date_to":
            parsed_date_to,
            "actions_query":
            "AND {}".format(action_query) if action_query else "",
            "event_filter":
            "AND event = %(event)s" if not action_query else "",
            "filters":
            prop_filters if props_to_filter.values else "",
        }

        _params, _breakdown_filter_params = {}, {}

        if self.filter.breakdown_type == "cohort":
            _params, breakdown_filter, _breakdown_filter_params, breakdown_value = self._breakdown_cohort_params(
            )
        else:
            _params, breakdown_filter, _breakdown_filter_params, breakdown_value = self._breakdown_prop_params(
                "count(*)"
                if self.entity.math == "dau" else aggregate_operation,
                math_params,
            )

        if len(_params["values"]) == 0:
            # If there are no breakdown values, we are sure that there's no relevant events, so instead of adjusting
            # a "real" SELECT for this, we only include the below dummy SELECT.
            # It's a drop-in replacement for a "real" one, simply always returning 0 rows.
            # See https://github.com/PostHog/posthog/pull/5674 for context.
            return (
                "SELECT [now()] AS date, [0] AS data, '' AS breakdown_value LIMIT 0",
                {},
                lambda _: [],
            )

        person_join_condition, person_join_params = self._person_join_condition(
        )
        groups_join_condition, groups_join_params = GroupsJoinQuery(
            self.filter, self.team_id, self.column_optimizer).get_join_query()
        self.params = {
            **self.params,
            **_params,
            **person_join_params,
            **groups_join_params
        }
        breakdown_filter_params = {
            **breakdown_filter_params,
            **_breakdown_filter_params
        }

        if self.filter.display in TRENDS_DISPLAY_BY_VALUE:
            breakdown_filter = breakdown_filter.format(
                **breakdown_filter_params)
            content_sql = BREAKDOWN_AGGREGATE_QUERY_SQL.format(
                breakdown_filter=breakdown_filter,
                person_join=person_join_condition,
                groups_join=groups_join_condition,
                aggregate_operation=aggregate_operation,
                breakdown_value=breakdown_value,
            )
            time_range = enumerate_time_range(self.filter, seconds_in_interval)

            return (
                content_sql,
                self.params,
                self._parse_single_aggregate_result(self.filter, self.entity,
                                                    {"days": time_range}),
            )

        else:

            breakdown_filter = breakdown_filter.format(
                **breakdown_filter_params)

            if self.entity.math in [WEEKLY_ACTIVE, MONTHLY_ACTIVE]:
                active_user_params = get_active_user_params(
                    self.filter, self.entity, self.team_id)
                conditions = BREAKDOWN_ACTIVE_USER_CONDITIONS_SQL.format(
                    **breakdown_filter_params, **active_user_params)
                inner_sql = BREAKDOWN_ACTIVE_USER_INNER_SQL.format(
                    breakdown_filter=breakdown_filter,
                    person_join=person_join_condition,
                    groups_join=groups_join_condition,
                    aggregate_operation=aggregate_operation,
                    interval_annotation=interval_annotation,
                    breakdown_value=breakdown_value,
                    conditions=conditions,
                    GET_TEAM_PERSON_DISTINCT_IDS=get_team_distinct_ids_query(
                        self.team_id),
                    **active_user_params,
                    **breakdown_filter_params,
                )
            elif self.filter.display == TRENDS_CUMULATIVE and self.entity.math == "dau":
                inner_sql = BREAKDOWN_CUMULATIVE_INNER_SQL.format(
                    breakdown_filter=breakdown_filter,
                    person_join=person_join_condition,
                    groups_join=groups_join_condition,
                    aggregate_operation=aggregate_operation,
                    interval_annotation=interval_annotation,
                    breakdown_value=breakdown_value,
                    **breakdown_filter_params,
                )
            else:
                inner_sql = BREAKDOWN_INNER_SQL.format(
                    breakdown_filter=breakdown_filter,
                    person_join=person_join_condition,
                    groups_join=groups_join_condition,
                    aggregate_operation=aggregate_operation,
                    interval_annotation=interval_annotation,
                    breakdown_value=breakdown_value,
                )

            breakdown_query = BREAKDOWN_QUERY_SQL.format(
                interval=interval_annotation,
                num_intervals=num_intervals,
                inner_sql=inner_sql,
            )
            self.params.update({
                "seconds_in_interval": seconds_in_interval,
                "num_intervals": num_intervals,
            })

            return breakdown_query, self.params, self._parse_trend_result(
                self.filter, self.entity)
Example #25
0
    def _get_trends(self) -> List[Dict[str, Any]]:
        serialized: Dict[str, Any] = {
            "count": 0,
            "data": [],
            "days": [],
            "labels": []
        }
        prop_filters, prop_filter_params = parse_prop_clauses(
            self._filter.properties,
            self._team.pk,
            prepend="global",
            allow_denormalized_props=True)
        parsed_date_from, parsed_date_to, _ = parse_timestamps(
            filter=self._filter, table="events.", team_id=self._team.pk)
        self.params.update(prop_filter_params)
        steps = [
            self._build_steps_query(entity, index)
            for index, entity in enumerate(self._filter.entities)
        ]
        funnel_query = FUNNEL_SQL.format(
            team_id=self._team.id,
            steps=", ".join(steps),
            filters=prop_filters.replace("uuid IN", "events.uuid IN", 1),
            parsed_date_from=parsed_date_from,
            parsed_date_to=parsed_date_to,
            top_level_groupby=", date",
            extra_select="{}(timestamp) as date,".format(
                get_trunc_func_ch(self._filter.interval)),
            extra_groupby=",{}(timestamp)".format(
                get_trunc_func_ch(self._filter.interval)),
            within_time="86400000000",
        )
        results = sync_execute(funnel_query, self.params)
        parsed_results = []

        for result in results:
            temp = [item for item in result]
            temp[1] = datetime(
                result[1].year,
                result[1].month,
                result[1].day,
                getattr(result[1], "hour", 0),
                getattr(result[1], "minute", 0),
                getattr(result[1], "second", 0),
                tzinfo=pytz.utc,
            )
            parsed_results.append(temp)

        date_range = get_daterange(self._filter.date_from
                                   or parsed_results[0][1],
                                   self._filter.date_to,
                                   frequency=self._filter.interval)

        # Rejig the data from a row for each date and step to one row per date
        data_dict: Dict[datetime, Dict] = {}
        for item in parsed_results:
            if not data_dict.get(item[1]):
                data_dict[item[1]] = {
                    "date": item[1],
                    "total_people": item[2],
                    "count": 0
                }
            else:
                # the query gives people who made it to that step
                # so we need to count all the people from each step
                data_dict[item[1]]["total_people"] += item[2]
                data_dict[item[1]]["count"] = round(
                    item[2] / data_dict[item[1]]["total_people"] * 100)
        data_array = [value for _, value in data_dict.items()]

        if self._filter.interval == "week":
            for df in data_array:
                df["date"] -= timedelta(days=df["date"].weekday() + 1)
        elif self._filter.interval == "month":
            for df in data_array:
                df["date"] = df["date"].replace(day=1)
        for df in data_array:
            df["date"] = df["date"].isoformat()

        datewise_data = {d["date"]: d["count"] for d in data_array}
        values = [(key, datewise_data.get(key.isoformat(), 0))
                  for key in date_range]

        for data_item in values:
            serialized["days"].append(data_item[0])
            serialized["data"].append(data_item[1])
            serialized["labels"].append(
                format_label_date(data_item[0], self._filter.interval))
        return [serialized]
Example #26
0
    def _format_breakdown_query(self, entity: Entity, filter: Filter,
                                team_id: int) -> Tuple[str, Dict, Callable]:
        # process params
        params: Dict[str, Any] = {"team_id": team_id}
        interval_annotation = get_trunc_func_ch(filter.interval)
        num_intervals, seconds_in_interval, round_interval = get_time_diff(
            filter.interval or "day", filter.date_from, filter.date_to,
            team_id)
        _, parsed_date_to, date_params = parse_timestamps(filter=filter,
                                                          team_id=team_id)

        props_to_filter = [*filter.properties, *entity.properties]
        prop_filters, prop_filter_params = parse_prop_clauses(
            props_to_filter,
            team_id,
            table_name="e",
            filter_test_accounts=filter.filter_test_accounts)
        aggregate_operation, _, math_params = process_math(entity)

        if entity.math == "dau" or filter.breakdown_type == "person":
            join_condition = EVENT_JOIN_PERSON_SQL
        else:
            join_condition = ""

        action_query = ""
        action_params: Dict = {}
        if entity.type == TREND_FILTER_TYPE_ACTIONS:
            action = entity.get_action()
            action_query, action_params = format_action_filter(action,
                                                               table_name="e")

        params = {
            **params,
            **math_params,
            **prop_filter_params,
            **action_params,
            "event": entity.id,
            "key": filter.breakdown,
            **date_params,
        }

        breakdown_filter_params = {
            "parsed_date_from":
            date_from_clause(interval_annotation, round_interval),
            "parsed_date_to":
            parsed_date_to,
            "actions_query":
            "AND {}".format(action_query) if action_query else "",
            "event_filter":
            "AND event = %(event)s" if not action_query else "",
            "filters":
            prop_filters if props_to_filter else "",
        }

        _params, _breakdown_filter_params = {}, {}

        if filter.breakdown_type == "cohort":
            _params, breakdown_filter, _breakdown_filter_params, breakdown_value = self._breakdown_cohort_params(
                team_id, filter, entity)
        elif filter.breakdown_type == "person":
            (
                _params,
                breakdown_filter,
                _breakdown_filter_params,
                breakdown_value,
            ) = self._breakdown_person_params(
                "count(*)" if entity.math == "dau" else aggregate_operation,
                entity, filter, team_id)
        else:
            (
                _params,
                breakdown_filter,
                _breakdown_filter_params,
                breakdown_value,
            ) = self._breakdown_prop_params(
                "count(*)" if entity.math == "dau" else aggregate_operation,
                entity, filter, team_id)

        if len(_params["values"]) == 0:
            return "SELECT 1", {}, lambda _: []

        params = {**params, **_params}
        breakdown_filter_params = {
            **breakdown_filter_params,
            **_breakdown_filter_params
        }

        if filter.display in TRENDS_DISPLAY_BY_VALUE:
            breakdown_filter = breakdown_filter.format(
                **breakdown_filter_params)
            content_sql = BREAKDOWN_AGGREGATE_QUERY_SQL.format(
                breakdown_filter=breakdown_filter,
                event_join=join_condition,
                aggregate_operation=aggregate_operation,
                breakdown_value=breakdown_value,
            )
            time_range = enumerate_time_range(filter, seconds_in_interval)

            return content_sql, params, self._parse_single_aggregate_result(
                filter, entity, {"days": time_range})

        else:

            breakdown_filter = breakdown_filter.format(
                **breakdown_filter_params)

            if entity.math in [WEEKLY_ACTIVE, MONTHLY_ACTIVE]:
                active_user_params = get_active_user_params(
                    filter, entity, team_id)
                conditions = BREAKDOWN_ACTIVE_USER_CONDITIONS_SQL.format(
                    **breakdown_filter_params, **active_user_params)
                inner_sql = BREAKDOWN_ACTIVE_USER_INNER_SQL.format(
                    breakdown_filter=breakdown_filter,
                    event_join=join_condition,
                    aggregate_operation=aggregate_operation,
                    interval_annotation=interval_annotation,
                    breakdown_value=breakdown_value,
                    conditions=conditions,
                    GET_TEAM_PERSON_DISTINCT_IDS=GET_TEAM_PERSON_DISTINCT_IDS,
                    **active_user_params,
                    **breakdown_filter_params)
            else:
                inner_sql = BREAKDOWN_INNER_SQL.format(
                    breakdown_filter=breakdown_filter,
                    event_join=join_condition,
                    aggregate_operation=aggregate_operation,
                    interval_annotation=interval_annotation,
                    breakdown_value=breakdown_value,
                )

            breakdown_query = BREAKDOWN_QUERY_SQL.format(
                interval=interval_annotation,
                num_intervals=num_intervals,
                inner_sql=inner_sql,
            )
            params.update({
                "date_to":
                filter.date_to.strftime("%Y-%m-%d %H:%M:%S"),
                "seconds_in_interval":
                seconds_in_interval,
                "num_intervals":
                num_intervals,
            })

            return breakdown_query, params, self._parse_trend_result(
                filter, entity)
Example #27
0
    def _retrieve_people(self, filter: RetentionFilter, team: Team):
        period = filter.period
        is_first_time_retention = filter.retention_type == RETENTION_FIRST_TIME
        trunc_func = get_trunc_func_ch(period)
        prop_filters, prop_filter_params = parse_prop_clauses(
            filter.properties,
            team.pk,
            filter_test_accounts=filter.filter_test_accounts)

        returning_entity = filter.returning_entity if filter.selected_interval > 0 else filter.target_entity
        target_query, target_params = self._get_condition(filter.target_entity,
                                                          table="e")
        target_query_formatted = "AND {target_query}".format(
            target_query=target_query)
        return_query, return_params = self._get_condition(returning_entity,
                                                          table="e",
                                                          prepend="returning")
        return_query_formatted = "AND {return_query}".format(
            return_query=return_query)

        reference_event_query = (
            REFERENCE_EVENT_UNIQUE_SQL
            if is_first_time_retention else REFERENCE_EVENT_SQL).format(
                target_query=target_query_formatted,
                filters=prop_filters,
                trunc_func=trunc_func,
                GET_TEAM_PERSON_DISTINCT_IDS=GET_TEAM_PERSON_DISTINCT_IDS,
            )
        reference_date_from = filter.date_from
        reference_date_to = filter.date_from + filter.period_increment
        date_from = filter.date_from + filter.selected_interval * filter.period_increment
        date_to = date_from + filter.period_increment

        result = sync_execute(
            RETENTION_PEOPLE_SQL.format(
                reference_event_query=reference_event_query,
                target_query=return_query_formatted,
                filters=prop_filters,
                GET_TEAM_PERSON_DISTINCT_IDS=GET_TEAM_PERSON_DISTINCT_IDS,
            ),
            {
                "team_id":
                team.pk,
                "start_date":
                date_from.strftime("%Y-%m-%d{}".format(
                    " %H:%M:%S" if filter.period == "Hour" else " 00:00:00")),
                "end_date":
                date_to.strftime("%Y-%m-%d{}".format(
                    " %H:%M:%S" if filter.period == "Hour" else " 00:00:00")),
                "reference_start_date":
                reference_date_from.strftime("%Y-%m-%d{}".format(
                    " %H:%M:%S" if filter.period == "Hour" else " 00:00:00")),
                "reference_end_date":
                reference_date_to.strftime("%Y-%m-%d{}".format(
                    " %H:%M:%S" if filter.period == "Hour" else " 00:00:00")),
                "offset":
                filter.offset,
                **target_params,
                **return_params,
                **prop_filter_params,
            },
        )
        people = Person.objects.filter(team_id=team.pk,
                                       uuid__in=[val[0] for val in result])

        from posthog.api.person import PersonSerializer

        return PersonSerializer(people, many=True).data
Example #28
0
    def _serialize_lifecycle(self, entity: Entity, filter: Filter,
                             team_id: int) -> List[Dict[str, Any]]:

        date_from = filter.date_from

        if not date_from:
            date_from = get_earliest_timestamp(team_id)

        interval = filter.interval or "day"
        num_intervals, seconds_in_interval, _ = get_time_diff(
            interval, filter.date_from, filter.date_to, team_id)
        interval_increment, interval_string, sub_interval_string = self.get_interval(
            interval)
        trunc_func = get_trunc_func_ch(interval)
        event_query = ""
        event_params: Dict[str, Any] = {}

        props_to_filter = [*filter.properties, *entity.properties]
        prop_filters, prop_filter_params = parse_prop_clauses(
            props_to_filter, team_id)

        _, _, date_params = parse_timestamps(filter=filter, team_id=team_id)

        if entity.type == TREND_FILTER_TYPE_ACTIONS:
            try:
                action = Action.objects.get(pk=entity.id)
                event_query, event_params = format_action_filter(action)
            except:
                return []
        else:
            event_query = "event = %(event)s"
            event_params = {"event": entity.id}

        result = sync_execute(
            LIFECYCLE_SQL.format(
                interval=interval_string,
                trunc_func=trunc_func,
                event_query=event_query,
                filters=prop_filters,
                sub_interval=sub_interval_string,
            ),
            {
                "team_id":
                team_id,
                "prev_date_from":
                (date_from - interval_increment).strftime("%Y-%m-%d{}".format(
                    " %H:%M:%S" if filter.interval == "hour"
                    or filter.interval == "minute" else " 00:00:00")),
                "num_intervals":
                num_intervals,
                "seconds_in_interval":
                seconds_in_interval,
                **event_params,
                **date_params,
                **prop_filter_params,
            },
        )

        res = []
        for val in result:
            label = "{} - {}".format(entity.name, val[2])
            additional_values = {"label": label, "status": val[2]}
            parsed_result = parse_response(val, filter, additional_values)
            res.append(parsed_result)

        return res
Example #29
0
    def _normal_query(self, entity: Entity, filter: Filter,
                      team_id: int) -> Tuple[str, Dict, Callable]:

        interval_annotation = get_trunc_func_ch(filter.interval)
        num_intervals, seconds_in_interval, round_interval = get_time_diff(
            filter.interval or "day",
            filter.date_from,
            filter.date_to,
            team_id=team_id)
        _, parsed_date_to, date_params = parse_timestamps(filter=filter,
                                                          team_id=team_id)

        props_to_filter = [*filter.properties, *entity.properties]
        prop_filters, prop_filter_params = parse_prop_clauses(
            props_to_filter,
            team_id,
            filter_test_accounts=filter.filter_test_accounts)

        aggregate_operation, join_condition, math_params = process_math(entity)

        params: Dict = {"team_id": team_id}
        params = {**params, **prop_filter_params, **math_params, **date_params}
        content_sql_params = {
            "interval":
            interval_annotation,
            "parsed_date_from":
            date_from_clause(interval_annotation, round_interval),
            "parsed_date_to":
            parsed_date_to,
            "timestamp":
            "timestamp",
            "filters":
            prop_filters,
            "event_join":
            join_condition,
            "aggregate_operation":
            aggregate_operation,
            "entity_query":
            "AND {actions_query}" if entity.type == TREND_FILTER_TYPE_ACTIONS
            else "AND event = %(event)s",
        }

        entity_params, entity_format_params = self._populate_entity_params(
            entity)
        params = {**params, **entity_params}

        if filter.display in TRENDS_DISPLAY_BY_VALUE:
            content_sql = VOLUME_TOTAL_AGGREGATE_SQL.format(
                **content_sql_params).format(**entity_format_params)
            time_range = self._enumerate_time_range(filter,
                                                    seconds_in_interval)

            return (
                content_sql,
                params,
                lambda result: [{
                    "aggregated_value":
                    result[0][0] if result and len(result) else 0,
                    "days":
                    time_range
                }],
            )
        else:

            if entity.math in [WEEKLY_ACTIVE, MONTHLY_ACTIVE]:
                sql_params = get_active_user_params(filter, entity, team_id)
                content_sql = ACTIVE_USER_SQL.format(
                    **content_sql_params,
                    **sql_params).format(**entity_format_params)
            else:
                # entity_format_params depends on format clause from content_sql_params
                content_sql = VOLUME_SQL.format(**content_sql_params).format(
                    **entity_format_params)

            null_sql = NULL_SQL.format(
                interval=interval_annotation,
                seconds_in_interval=seconds_in_interval,
                num_intervals=num_intervals,
                date_to=filter.date_to.strftime("%Y-%m-%d %H:%M:%S"),
            )
            final_query = AGGREGATE_SQL.format(null_sql=null_sql,
                                               content_sql=content_sql)
            return final_query, params, self._parse_normal_result(filter)
Example #30
0
    def _format_breakdown_query(
        self, entity: Entity, filter: Filter, breakdown: List, team_id: int
    ) -> List[Dict[str, Any]]:
        # process params
        params: Dict[str, Any] = {"team_id": team_id}
        interval_annotation = get_trunc_func_ch(filter.interval)
        num_intervals, seconds_in_interval, round_interval = get_time_diff(
            filter.interval or "day", filter.date_from, filter.date_to, team_id
        )
        _, parsed_date_to, date_params = parse_timestamps(filter=filter, team_id=team_id)

        props_to_filter = [*filter.properties, *entity.properties]
        prop_filters, prop_filter_params = parse_prop_clauses(props_to_filter, team_id, table_name="e")
        aggregate_operation, _, math_params = process_math(entity)

        if entity.math == "dau" or filter.breakdown_type == "person":
            join_condition = EVENT_JOIN_PERSON_SQL
        else:
            join_condition = ""

        action_query = ""
        action_params: Dict = {}
        if entity.type == TREND_FILTER_TYPE_ACTIONS:
            action = Action.objects.get(pk=entity.id)
            action_query, action_params = format_action_filter(action)

        null_sql = NULL_BREAKDOWN_SQL.format(
            interval=interval_annotation,
            seconds_in_interval=seconds_in_interval,
            num_intervals=num_intervals,
            date_to=(filter.date_to).strftime("%Y-%m-%d %H:%M:%S"),
        )

        params = {
            **params,
            **math_params,
            **prop_filter_params,
            **action_params,
            "event": entity.id,
            "key": filter.breakdown,
            **date_params,
        }

        breakdown_filter_params = {
            "parsed_date_from": date_from_clause(interval_annotation, round_interval),
            "parsed_date_to": parsed_date_to,
            "actions_query": "AND {}".format(action_query) if action_query else "",
            "event_filter": "AND event = %(event)s" if not action_query else "",
            "filters": prop_filters if props_to_filter else "",
        }
        breakdown_query = self._get_breakdown_query(filter, breakdown)

        _params, _breakdown_filter_params = {}, {}

        if filter.breakdown_type == "cohort":
            if "all" in breakdown:
                null_sql = NULL_SQL
                breakdown_filter = BREAKDOWN_CONDITIONS_SQL
                breakdown_value = ""
            else:
                _params, breakdown_filter, _breakdown_filter_params, breakdown_value = self._breakdown_cohort_params(
                    breakdown, team_id
                )
        elif filter.breakdown_type == "person":
            _params, breakdown_filter, _breakdown_filter_params, breakdown_value = self._breakdown_person_params(
                filter, team_id
            )
        else:
            _params, breakdown_filter, _breakdown_filter_params, breakdown_value = self._breakdown_prop_params(
                filter, team_id
            )

        params = {**params, **_params}
        breakdown_filter_params = {**breakdown_filter_params, **_breakdown_filter_params}

        if filter.display == TRENDS_TABLE or filter.display == TRENDS_PIE:
            breakdown_filter = breakdown_filter.format(**breakdown_filter_params)
            content_sql = breakdown_query.format(
                breakdown_filter=breakdown_filter,
                event_join=join_condition,
                aggregate_operation=aggregate_operation,
                breakdown_value=breakdown_value,
            )

            result = sync_execute(content_sql, params)
            parsed_results = self._parse_single_aggregate_result(result, filter, entity, breakdown)

            return parsed_results

        else:

            null_sql = null_sql.format(
                interval=interval_annotation,
                seconds_in_interval=seconds_in_interval,
                num_intervals=num_intervals,
                date_to=(filter.date_to).strftime("%Y-%m-%d %H:%M:%S"),
            )
            breakdown_filter = breakdown_filter.format(**breakdown_filter_params)
            breakdown_query = breakdown_query.format(
                null_sql=null_sql,
                breakdown_filter=breakdown_filter,
                event_join=join_condition,
                aggregate_operation=aggregate_operation,
                interval_annotation=interval_annotation,
                breakdown_value=breakdown_value,
            )

            try:
                result = sync_execute(breakdown_query, params)
            except:
                result = []

            parsed_results = self._parse_trend_result(result, filter, entity, breakdown)

            return parsed_results