Exemple #1
0
def test_get_earliest_timestamp(db, team):
    _create_event(team=team,
                  event="sign up",
                  distinct_id="1",
                  timestamp="2020-01-04T14:10:00Z")
    _create_event(team=team,
                  event="sign up",
                  distinct_id="1",
                  timestamp="2020-01-06T14:10:00Z")

    assert get_earliest_timestamp(team.id) == datetime(2020,
                                                       1,
                                                       4,
                                                       14,
                                                       10,
                                                       tzinfo=pytz.UTC)

    _create_event(team=team,
                  event="sign up",
                  distinct_id="1",
                  timestamp="1984-01-06T14:10:00Z")
    _create_event(team=team,
                  event="sign up",
                  distinct_id="1",
                  timestamp="2014-01-01T01:00:00Z")
    _create_event(team=team,
                  event="sign up",
                  distinct_id="1",
                  timestamp="2015-01-01T01:00:00Z")

    assert get_earliest_timestamp(team.id) == datetime(2015,
                                                       1,
                                                       1,
                                                       1,
                                                       tzinfo=pytz.UTC)
Exemple #2
0
    def _format_lifecycle_query(self, entity: Entity, filter: Filter,
                                team_id: int) -> Tuple[str, Dict, Callable]:
        date_from = filter.date_from

        if not date_from:
            date_from = get_earliest_timestamp(team_id)

        interval = filter.interval or "day"
        num_intervals, seconds_in_interval, _ = get_time_diff(
            interval, filter.date_from, filter.date_to, team_id)
        interval_increment, interval_string, sub_interval_string = self.get_interval(
            interval)
        trunc_func = get_trunc_func_ch(interval)
        event_query = ""
        event_params: Dict[str, Any] = {}

        props_to_filter = [*filter.properties, *entity.properties]
        prop_filters, prop_filter_params = parse_prop_clauses(
            props_to_filter,
            team_id,
            filter_test_accounts=filter.filter_test_accounts)

        _, _, date_params = parse_timestamps(filter=filter, team_id=team_id)

        if entity.type == TREND_FILTER_TYPE_ACTIONS:
            try:
                action = entity.get_action()
                event_query, event_params = format_action_filter(action)
            except:
                return "", {}, self._parse_result(filter, entity)
        else:
            event_query = "event = %(event)s"
            event_params = {"event": entity.id}

        return (
            LIFECYCLE_SQL.format(
                interval=interval_string,
                trunc_func=trunc_func,
                event_query=event_query,
                filters=prop_filters,
                sub_interval=sub_interval_string,
                GET_TEAM_PERSON_DISTINCT_IDS=GET_TEAM_PERSON_DISTINCT_IDS,
            ),
            {
                "team_id":
                team_id,
                "prev_date_from":
                (date_from - interval_increment).strftime("%Y-%m-%d{}".format(
                    " %H:%M:%S" if filter.interval == "hour"
                    or filter.interval == "minute" else " 00:00:00")),
                "num_intervals":
                num_intervals,
                "seconds_in_interval":
                seconds_in_interval,
                **event_params,
                **date_params,
                **prop_filter_params,
            },
            self._parse_result(filter, entity),
        )
Exemple #3
0
def get_active_user_params(filter: Filter, entity: Entity,
                           team_id: int) -> Dict[str, Any]:
    params = {}
    params.update({
        "prev_interval":
        "7 DAY" if entity.math == WEEKLY_ACTIVE else "30 day"
    })
    diff = timedelta(days=7) if entity.math == WEEKLY_ACTIVE else timedelta(
        days=30)
    if filter.date_from:
        params.update({
            "parsed_date_from_prev_range":
            f"AND timestamp >= '{format_ch_timestamp(filter.date_from - diff, filter)}'"
        })
    else:
        try:
            earliest_date = get_earliest_timestamp(team_id)
        except IndexError:
            raise ValidationError(
                "Active User queries require a lower date bound")
        else:
            params.update({
                "parsed_date_from_prev_range":
                f"AND timestamp >= '{format_ch_timestamp(earliest_date - diff, filter)}'"
            })

    return params
Exemple #4
0
def earliest_timestamp_func(team_id: int):
    if is_clickhouse_enabled():
        from ee.clickhouse.queries.util import get_earliest_timestamp

        return get_earliest_timestamp(team_id)
    from posthog.models.event import Event

    return Event.objects.earliest_timestamp(team_id)
Exemple #5
0
    def get_query(self) -> str:
        step_counts = self.get_step_counts_without_aggregation_query()
        # Expects multiple rows for same person, first event time, steps taken.
        self.params.update(self.funnel_order.params)

        reached_from_step_count_condition, reached_to_step_count_condition, _ = self.get_steps_reached_conditions(
        )
        trunc_func = get_trunc_func_ch(self._filter.interval)
        interval_func = get_interval_func_ch(self._filter.interval)

        if self._filter.date_from is None:
            _date_from = get_earliest_timestamp(self._team.pk)
        else:
            _date_from = self._filter.date_from

        breakdown_clause = self._get_breakdown_prop()
        formatted_date_from = format_ch_timestamp(_date_from, self._filter)
        formatted_date_to = format_ch_timestamp(self._filter.date_to,
                                                self._filter)

        self.params.update({
            "formatted_date_from": formatted_date_from,
            "formatted_date_to": formatted_date_to,
            "interval": self._filter.interval,
        })

        query = f"""
            SELECT
                entrance_period_start,
                reached_from_step_count,
                reached_to_step_count,
                if(reached_from_step_count > 0, round(reached_to_step_count / reached_from_step_count * 100, 2), 0) AS conversion_rate
                {breakdown_clause}
            FROM (
                SELECT
                    entrance_period_start,
                    countIf({reached_from_step_count_condition}) AS reached_from_step_count,
                    countIf({reached_to_step_count_condition}) AS reached_to_step_count
                    {breakdown_clause}
                FROM (
                    {step_counts}
                ) GROUP BY entrance_period_start {breakdown_clause}
            ) data
            RIGHT OUTER JOIN (
                SELECT
                    {trunc_func}(toDateTime(%(formatted_date_from)s) + {interval_func}(number)) AS entrance_period_start
                    {', breakdown_value as prop' if breakdown_clause else ''}
                FROM numbers(dateDiff(%(interval)s, toDateTime(%(formatted_date_from)s), toDateTime(%(formatted_date_to)s)) + 1) AS period_offsets
                {'ARRAY JOIN (%(breakdown_values)s) AS breakdown_value' if breakdown_clause else ''}
            ) fill
            USING (entrance_period_start {breakdown_clause})
            ORDER BY entrance_period_start ASC
            SETTINGS allow_experimental_window_functions = 1"""

        return query
Exemple #6
0
    def calculate_trends(self, request: Request) -> List[Dict[str, Any]]:
        team = self.team
        filter = Filter(request=request)

        if filter.shown_as == TRENDS_STICKINESS:
            earliest_timestamp_func = lambda team_id: get_earliest_timestamp(team_id)
            stickiness_filter = StickinessFilter(
                request=request, team=team, get_earliest_timestamp=earliest_timestamp_func
            )
            result = ClickhouseStickiness().run(stickiness_filter, team)
        else:
            result = ClickhouseTrends().run(filter, team)

        self._refresh_dashboard(request=request)
        return result
Exemple #7
0
    def get_people(
        self,
        filter: Filter,
        team_id: int,
        target_date: datetime,
        lifecycle_type: str,
        request: Request,
        limit: int = 100,
    ):
        entity = filter.entities[0]
        date_from = filter.date_from

        if not date_from:
            date_from = get_earliest_timestamp(team_id)

        interval = filter.interval or "day"
        num_intervals, seconds_in_interval, _ = get_time_diff(interval,
                                                              filter.date_from,
                                                              filter.date_to,
                                                              team_id=team_id)
        interval_increment, interval_string, sub_interval_string = self.get_interval(
            interval)
        trunc_func = get_trunc_func_ch(interval)
        event_query = ""
        event_params: Dict[str, Any] = {}

        _, _, date_params = parse_timestamps(filter=filter, team_id=team_id)

        if entity.type == TREND_FILTER_TYPE_ACTIONS:
            try:
                action = entity.get_action()
                event_query, event_params = format_action_filter(action)
            except:
                return []
        else:
            event_query = "event = %(event)s"
            event_params = {"event": entity.id}

        props_to_filter = [*filter.properties, *entity.properties]
        prop_filters, prop_filter_params = parse_prop_clauses(
            props_to_filter,
            team_id,
            filter_test_accounts=filter.filter_test_accounts)

        result = sync_execute(
            LIFECYCLE_PEOPLE_SQL.format(
                interval=interval_string,
                trunc_func=trunc_func,
                event_query=event_query,
                filters=prop_filters,
                sub_interval=sub_interval_string,
                GET_TEAM_PERSON_DISTINCT_IDS=GET_TEAM_PERSON_DISTINCT_IDS,
            ),
            {
                "team_id":
                team_id,
                "prev_date_from":
                (date_from - interval_increment).strftime("%Y-%m-%d{}".format(
                    " %H:%M:%S" if filter.interval == "hour"
                    or filter.interval == "minute" else " 00:00:00")),
                "num_intervals":
                num_intervals,
                "seconds_in_interval":
                seconds_in_interval,
                **event_params,
                **date_params,
                **prop_filter_params,
                "status":
                lifecycle_type,
                "target_date":
                target_date.strftime("%Y-%m-%d{}".format(
                    " %H:%M:%S" if filter.interval == "hour"
                    or filter.interval == "minute" else " 00:00:00")),
                "offset":
                filter.offset,
                "limit":
                limit,
            },
        )
        people = get_persons_by_uuids(team_id=team_id,
                                      uuids=[p[0] for p in result])
        people = people.prefetch_related(
            Prefetch("persondistinctid_set", to_attr="distinct_ids_cache"))

        from posthog.api.person import PersonSerializer

        return PersonSerializer(people, many=True).data
Exemple #8
0
def test_get_earliest_timestamp_with_no_events(db, team):
    assert get_earliest_timestamp(team.id) == datetime(2021,
                                                       1,
                                                       14,
                                                       tzinfo=pytz.UTC)
Exemple #9
0
    def _serialize_lifecycle(self, entity: Entity, filter: Filter,
                             team_id: int) -> List[Dict[str, Any]]:

        date_from = filter.date_from

        if not date_from:
            date_from = get_earliest_timestamp(team_id)

        interval = filter.interval or "day"
        num_intervals, seconds_in_interval, _ = get_time_diff(
            interval, filter.date_from, filter.date_to, team_id)
        interval_increment, interval_string, sub_interval_string = self.get_interval(
            interval)
        trunc_func = get_trunc_func_ch(interval)
        event_query = ""
        event_params: Dict[str, Any] = {}

        props_to_filter = [*filter.properties, *entity.properties]
        prop_filters, prop_filter_params = parse_prop_clauses(
            props_to_filter, team_id)

        _, _, date_params = parse_timestamps(filter=filter, team_id=team_id)

        if entity.type == TREND_FILTER_TYPE_ACTIONS:
            try:
                action = Action.objects.get(pk=entity.id)
                event_query, event_params = format_action_filter(action)
            except:
                return []
        else:
            event_query = "event = %(event)s"
            event_params = {"event": entity.id}

        result = sync_execute(
            LIFECYCLE_SQL.format(
                interval=interval_string,
                trunc_func=trunc_func,
                event_query=event_query,
                filters=prop_filters,
                sub_interval=sub_interval_string,
            ),
            {
                "team_id":
                team_id,
                "prev_date_from":
                (date_from - interval_increment).strftime("%Y-%m-%d{}".format(
                    " %H:%M:%S" if filter.interval == "hour"
                    or filter.interval == "minute" else " 00:00:00")),
                "num_intervals":
                num_intervals,
                "seconds_in_interval":
                seconds_in_interval,
                **event_params,
                **date_params,
                **prop_filter_params,
            },
        )

        res = []
        for val in result:
            label = "{} - {}".format(entity.name, val[2])
            additional_values = {"label": label, "status": val[2]}
            parsed_result = parse_response(val, filter, additional_values)
            res.append(parsed_result)

        return res
Exemple #10
0
 def track_earliest_timestamp(self):
     get_earliest_timestamp(2)
Exemple #11
0
def earliest_timestamp_func(team_id: int):
    from ee.clickhouse.queries.util import get_earliest_timestamp

    return get_earliest_timestamp(team_id)