def test_get_earliest_timestamp(db, team): _create_event(team=team, event="sign up", distinct_id="1", timestamp="2020-01-04T14:10:00Z") _create_event(team=team, event="sign up", distinct_id="1", timestamp="2020-01-06T14:10:00Z") assert get_earliest_timestamp(team.id) == datetime(2020, 1, 4, 14, 10, tzinfo=pytz.UTC) _create_event(team=team, event="sign up", distinct_id="1", timestamp="1984-01-06T14:10:00Z") _create_event(team=team, event="sign up", distinct_id="1", timestamp="2014-01-01T01:00:00Z") _create_event(team=team, event="sign up", distinct_id="1", timestamp="2015-01-01T01:00:00Z") assert get_earliest_timestamp(team.id) == datetime(2015, 1, 1, 1, tzinfo=pytz.UTC)
def _format_lifecycle_query(self, entity: Entity, filter: Filter, team_id: int) -> Tuple[str, Dict, Callable]: date_from = filter.date_from if not date_from: date_from = get_earliest_timestamp(team_id) interval = filter.interval or "day" num_intervals, seconds_in_interval, _ = get_time_diff( interval, filter.date_from, filter.date_to, team_id) interval_increment, interval_string, sub_interval_string = self.get_interval( interval) trunc_func = get_trunc_func_ch(interval) event_query = "" event_params: Dict[str, Any] = {} props_to_filter = [*filter.properties, *entity.properties] prop_filters, prop_filter_params = parse_prop_clauses( props_to_filter, team_id, filter_test_accounts=filter.filter_test_accounts) _, _, date_params = parse_timestamps(filter=filter, team_id=team_id) if entity.type == TREND_FILTER_TYPE_ACTIONS: try: action = entity.get_action() event_query, event_params = format_action_filter(action) except: return "", {}, self._parse_result(filter, entity) else: event_query = "event = %(event)s" event_params = {"event": entity.id} return ( LIFECYCLE_SQL.format( interval=interval_string, trunc_func=trunc_func, event_query=event_query, filters=prop_filters, sub_interval=sub_interval_string, GET_TEAM_PERSON_DISTINCT_IDS=GET_TEAM_PERSON_DISTINCT_IDS, ), { "team_id": team_id, "prev_date_from": (date_from - interval_increment).strftime("%Y-%m-%d{}".format( " %H:%M:%S" if filter.interval == "hour" or filter.interval == "minute" else " 00:00:00")), "num_intervals": num_intervals, "seconds_in_interval": seconds_in_interval, **event_params, **date_params, **prop_filter_params, }, self._parse_result(filter, entity), )
def get_active_user_params(filter: Filter, entity: Entity, team_id: int) -> Dict[str, Any]: params = {} params.update({ "prev_interval": "7 DAY" if entity.math == WEEKLY_ACTIVE else "30 day" }) diff = timedelta(days=7) if entity.math == WEEKLY_ACTIVE else timedelta( days=30) if filter.date_from: params.update({ "parsed_date_from_prev_range": f"AND timestamp >= '{format_ch_timestamp(filter.date_from - diff, filter)}'" }) else: try: earliest_date = get_earliest_timestamp(team_id) except IndexError: raise ValidationError( "Active User queries require a lower date bound") else: params.update({ "parsed_date_from_prev_range": f"AND timestamp >= '{format_ch_timestamp(earliest_date - diff, filter)}'" }) return params
def earliest_timestamp_func(team_id: int): if is_clickhouse_enabled(): from ee.clickhouse.queries.util import get_earliest_timestamp return get_earliest_timestamp(team_id) from posthog.models.event import Event return Event.objects.earliest_timestamp(team_id)
def get_query(self) -> str: step_counts = self.get_step_counts_without_aggregation_query() # Expects multiple rows for same person, first event time, steps taken. self.params.update(self.funnel_order.params) reached_from_step_count_condition, reached_to_step_count_condition, _ = self.get_steps_reached_conditions( ) trunc_func = get_trunc_func_ch(self._filter.interval) interval_func = get_interval_func_ch(self._filter.interval) if self._filter.date_from is None: _date_from = get_earliest_timestamp(self._team.pk) else: _date_from = self._filter.date_from breakdown_clause = self._get_breakdown_prop() formatted_date_from = format_ch_timestamp(_date_from, self._filter) formatted_date_to = format_ch_timestamp(self._filter.date_to, self._filter) self.params.update({ "formatted_date_from": formatted_date_from, "formatted_date_to": formatted_date_to, "interval": self._filter.interval, }) query = f""" SELECT entrance_period_start, reached_from_step_count, reached_to_step_count, if(reached_from_step_count > 0, round(reached_to_step_count / reached_from_step_count * 100, 2), 0) AS conversion_rate {breakdown_clause} FROM ( SELECT entrance_period_start, countIf({reached_from_step_count_condition}) AS reached_from_step_count, countIf({reached_to_step_count_condition}) AS reached_to_step_count {breakdown_clause} FROM ( {step_counts} ) GROUP BY entrance_period_start {breakdown_clause} ) data RIGHT OUTER JOIN ( SELECT {trunc_func}(toDateTime(%(formatted_date_from)s) + {interval_func}(number)) AS entrance_period_start {', breakdown_value as prop' if breakdown_clause else ''} FROM numbers(dateDiff(%(interval)s, toDateTime(%(formatted_date_from)s), toDateTime(%(formatted_date_to)s)) + 1) AS period_offsets {'ARRAY JOIN (%(breakdown_values)s) AS breakdown_value' if breakdown_clause else ''} ) fill USING (entrance_period_start {breakdown_clause}) ORDER BY entrance_period_start ASC SETTINGS allow_experimental_window_functions = 1""" return query
def calculate_trends(self, request: Request) -> List[Dict[str, Any]]: team = self.team filter = Filter(request=request) if filter.shown_as == TRENDS_STICKINESS: earliest_timestamp_func = lambda team_id: get_earliest_timestamp(team_id) stickiness_filter = StickinessFilter( request=request, team=team, get_earliest_timestamp=earliest_timestamp_func ) result = ClickhouseStickiness().run(stickiness_filter, team) else: result = ClickhouseTrends().run(filter, team) self._refresh_dashboard(request=request) return result
def get_people( self, filter: Filter, team_id: int, target_date: datetime, lifecycle_type: str, request: Request, limit: int = 100, ): entity = filter.entities[0] date_from = filter.date_from if not date_from: date_from = get_earliest_timestamp(team_id) interval = filter.interval or "day" num_intervals, seconds_in_interval, _ = get_time_diff(interval, filter.date_from, filter.date_to, team_id=team_id) interval_increment, interval_string, sub_interval_string = self.get_interval( interval) trunc_func = get_trunc_func_ch(interval) event_query = "" event_params: Dict[str, Any] = {} _, _, date_params = parse_timestamps(filter=filter, team_id=team_id) if entity.type == TREND_FILTER_TYPE_ACTIONS: try: action = entity.get_action() event_query, event_params = format_action_filter(action) except: return [] else: event_query = "event = %(event)s" event_params = {"event": entity.id} props_to_filter = [*filter.properties, *entity.properties] prop_filters, prop_filter_params = parse_prop_clauses( props_to_filter, team_id, filter_test_accounts=filter.filter_test_accounts) result = sync_execute( LIFECYCLE_PEOPLE_SQL.format( interval=interval_string, trunc_func=trunc_func, event_query=event_query, filters=prop_filters, sub_interval=sub_interval_string, GET_TEAM_PERSON_DISTINCT_IDS=GET_TEAM_PERSON_DISTINCT_IDS, ), { "team_id": team_id, "prev_date_from": (date_from - interval_increment).strftime("%Y-%m-%d{}".format( " %H:%M:%S" if filter.interval == "hour" or filter.interval == "minute" else " 00:00:00")), "num_intervals": num_intervals, "seconds_in_interval": seconds_in_interval, **event_params, **date_params, **prop_filter_params, "status": lifecycle_type, "target_date": target_date.strftime("%Y-%m-%d{}".format( " %H:%M:%S" if filter.interval == "hour" or filter.interval == "minute" else " 00:00:00")), "offset": filter.offset, "limit": limit, }, ) people = get_persons_by_uuids(team_id=team_id, uuids=[p[0] for p in result]) people = people.prefetch_related( Prefetch("persondistinctid_set", to_attr="distinct_ids_cache")) from posthog.api.person import PersonSerializer return PersonSerializer(people, many=True).data
def test_get_earliest_timestamp_with_no_events(db, team): assert get_earliest_timestamp(team.id) == datetime(2021, 1, 14, tzinfo=pytz.UTC)
def _serialize_lifecycle(self, entity: Entity, filter: Filter, team_id: int) -> List[Dict[str, Any]]: date_from = filter.date_from if not date_from: date_from = get_earliest_timestamp(team_id) interval = filter.interval or "day" num_intervals, seconds_in_interval, _ = get_time_diff( interval, filter.date_from, filter.date_to, team_id) interval_increment, interval_string, sub_interval_string = self.get_interval( interval) trunc_func = get_trunc_func_ch(interval) event_query = "" event_params: Dict[str, Any] = {} props_to_filter = [*filter.properties, *entity.properties] prop_filters, prop_filter_params = parse_prop_clauses( props_to_filter, team_id) _, _, date_params = parse_timestamps(filter=filter, team_id=team_id) if entity.type == TREND_FILTER_TYPE_ACTIONS: try: action = Action.objects.get(pk=entity.id) event_query, event_params = format_action_filter(action) except: return [] else: event_query = "event = %(event)s" event_params = {"event": entity.id} result = sync_execute( LIFECYCLE_SQL.format( interval=interval_string, trunc_func=trunc_func, event_query=event_query, filters=prop_filters, sub_interval=sub_interval_string, ), { "team_id": team_id, "prev_date_from": (date_from - interval_increment).strftime("%Y-%m-%d{}".format( " %H:%M:%S" if filter.interval == "hour" or filter.interval == "minute" else " 00:00:00")), "num_intervals": num_intervals, "seconds_in_interval": seconds_in_interval, **event_params, **date_params, **prop_filter_params, }, ) res = [] for val in result: label = "{} - {}".format(entity.name, val[2]) additional_values = {"label": label, "status": val[2]} parsed_result = parse_response(val, filter, additional_values) res.append(parsed_result) return res
def track_earliest_timestamp(self): get_earliest_timestamp(2)
def earliest_timestamp_func(team_id: int): from ee.clickhouse.queries.util import get_earliest_timestamp return get_earliest_timestamp(team_id)