def _set_default_dates(self, filter: Filter) -> None: # format default dates if filter.session_type != SESSION_AVG and filter.session_type != SESSION_DIST: if not filter._date_from: filter._date_from = timezone.now().replace(hour=0, minute=0, second=0, microsecond=0) if not filter._date_to and filter.date_from: filter._date_to = filter.date_from + relativedelta(days=1) else: if not filter._date_from: filter._date_from = relative_date_parse("-7d") if not filter._date_to: filter._date_to = timezone.now()
def calculate_trends(self, filter: Filter, team_id: int) -> List[Dict[str, Any]]: actions = Action.objects.filter(team_id=team_id).order_by("-id") if len(filter.actions) > 0: actions = Action.objects.filter( pk__in=[entity.id for entity in filter.actions], team_id=team_id) actions = actions.prefetch_related( Prefetch("steps", queryset=ActionStep.objects.order_by("id"))) entities_list = [] if not filter.date_from: filter._date_from = (Event.objects.filter( team_id=team_id).order_by("timestamp")[0].timestamp.replace( hour=0, minute=0, second=0, microsecond=0).isoformat()) if not filter.date_to: filter._date_to = now().isoformat() for entity in filter.entities: if entity.type == TREND_FILTER_TYPE_ACTIONS: try: db_action = [ action for action in actions if action.id == entity.id ][0] entity.name = db_action.name except IndexError: continue entities_list.extend( handle_compare(entity=entity, filter=filter, func=self._serialize_entity, team_id=team_id)) return entities_list
def calculate_list(self, filter: Filter, team: Team, limit: int, offset: int): filters, params = parse_prop_clauses("uuid", filter.properties, team) if not filter._date_from: filter._date_from = timezone.now().replace(hour=0, minute=0, second=0, microsecond=0) if not filter._date_to and filter.date_from: filter._date_to = filter.date_from + relativedelta(days=1) date_from, date_to = parse_timestamps(filter) params = { **params, "team_id": team.pk, "limit": limit, "offset": offset } query = SESSION_SQL.format( date_from=date_from, date_to=date_to, filters="{}".format(filters) if filter.properties else "", sessions_limit="LIMIT %(offset)s, %(limit)s", ) query_result = sync_execute(query, params) result = self._parse_list_results(query_result) self._add_person_properties(team, result) return result
def stickiness(self, entity: Entity, filter: Filter, team_id: int) -> Dict[str, Any]: if not filter.date_from: filter._date_from = (Event.objects.filter( team_id=team_id).order_by("timestamp")[0].timestamp.replace( hour=0, minute=0, second=0, microsecond=0).isoformat()) if not filter.date_to or not filter.date_from: raise ValueError("_stickiness needs date_to and date_from set") range_days = (filter.date_to - filter.date_from).days + 2 events = process_entity_for_events( entity=entity, team_id=team_id, order_by=None, ) events = events.filter(filter_events(team_id, filter, entity)) events = (events.filter(filter_events( team_id, filter, entity)).values("person_id").annotate(day_count=Count( functions.TruncDay("timestamp"), distinct=True)).filter( day_count__lte=range_days)) events_sql, events_sql_params = events.query.sql_with_params() aggregated_query = "select count(v.person_id), v.day_count from ({}) as v group by v.day_count".format( events_sql) counts = execute_custom_sql(aggregated_query, events_sql_params) return self.process_result(counts, range_days)
def _determine_compared_filter(self, filter, request): date_from, date_to = get_compare_period_dates(filter.date_from, filter.date_to) compared_filter = Filter(request=request) compared_filter._date_from = date_from.date().isoformat() compared_filter._date_to = date_to.date().isoformat() return compared_filter
def trends(self, request: request.Request, *args: Any, **kwargs: Any) -> Response: actions = self.get_queryset() actions = actions.filter(deleted=False) team = request.user.team_set.get() entities_list = [] filter = Filter(request=request) if len(filter.entities) == 0: # If no filters, automatically grab all actions and show those instead filter.entities = [Entity({'id': action.id, 'name': action.name, 'type': TREND_FILTER_TYPE_ACTIONS}) for action in actions] if not filter.date_from: filter._date_from = Event.objects.filter(team=team)\ .order_by('timestamp')[0]\ .timestamp\ .replace(hour=0, minute=0, second=0, microsecond=0)\ .isoformat() if not filter.date_to: filter._date_to = now().isoformat() for entity in filter.entities: if entity.type == TREND_FILTER_TYPE_ACTIONS: try: db_action = [action for action in actions if action.id == entity.id][0] entity.name = db_action.name except IndexError: continue trend_entity = self._serialize_entity( entity=entity, filter=filter, request=request, team=team ) entities_list.extend(trend_entity) return Response(entities_list)
def preprocess_params(self, filter: Filter, total_intervals=11): period = filter.period or "Day" tdelta, t1 = self.determineTimedelta(total_intervals, period) filter._date_to = (filter.date_to + t1).isoformat() first_time_retention = filter.retention_type == RETENTION_FIRST_TIME if period == "Hour": date_to = filter.date_to date_from: datetime.datetime = date_to - tdelta elif period == "Week": date_to = filter.date_to.replace(hour=0, minute=0, second=0, microsecond=0) date_from = date_to - tdelta date_from = date_from - timedelta(days=date_from.isoweekday() % 7) else: date_to = filter.date_to.replace(hour=0, minute=0, second=0, microsecond=0) date_from = date_to - tdelta filter._date_from = date_from.isoformat() filter._date_to = date_to.isoformat() entity = ( Entity({"id": "$pageview", "type": TREND_FILTER_TYPE_EVENTS}) if not filter.target_entity else filter.target_entity ) returning_entity = ( Entity({"id": "$pageview", "type": TREND_FILTER_TYPE_EVENTS}) if not len(filter.entities) > 0 else filter.entities[0] ) # need explicit handling of date_from so it's not optional but also need filter object for date_filter_Q return filter, entity, returning_entity, first_time_retention, date_from, date_to
def calculate_retention(self, request: request.Request) -> List[Dict[str, Any]]: team = request.user.team filter = Filter(request=request) filter._date_from = "-11d" result = retention.Retention().run(filter, team) return result
def retention(self, request: request.Request, *args: Any, **kwargs: Any) -> Response: team = request.user.team filter = Filter(request=request) filter._date_from = "-11d" result = retention.Retention().run(filter, team) return Response({"data": result})
def calculate_trends(filter: Filter, params: dict, team_id: int, actions: QuerySet) -> List[Dict[str, Any]]: compare = params.get("compare") entities_list = [] actions = actions.filter(deleted=False) if len(filter.entities) == 0: # If no filters, automatically grab all actions and show those instead filter.entities = [ Entity({ "id": action.id, "name": action.name, "type": TREND_FILTER_TYPE_ACTIONS, }) for action in actions ] if not filter.date_from: filter._date_from = (Event.objects.filter( team_id=team_id).order_by("timestamp")[0].timestamp.replace( hour=0, minute=0, second=0, microsecond=0).isoformat()) if not filter.date_to: filter._date_to = now().isoformat() compared_filter = None if compare: compared_filter = determine_compared_filter(filter) for entity in filter.entities: if entity.type == TREND_FILTER_TYPE_ACTIONS: try: db_action = [ action for action in actions if action.id == entity.id ][0] entity.name = db_action.name except IndexError: continue trend_entity = serialize_entity(entity=entity, filter=filter, params=params, team_id=team_id) if compare and compared_filter: trend_entity = convert_to_comparison( trend_entity, filter, "{} - {}".format(entity.name, "current")) entities_list.extend(trend_entity) compared_trend_entity = serialize_entity(entity=entity, filter=compared_filter, params=params, team_id=team_id) compared_trend_entity = convert_to_comparison( compared_trend_entity, compared_filter, "{} - {}".format(entity.name, "previous"), ) entities_list.extend(compared_trend_entity) else: entities_list.extend(trend_entity) return entities_list
def retention(self, request: request.Request, *args: Any, **kwargs: Any) -> Response: team = request.user.team_set.get() properties = request.GET.get("properties", "{}") filter = Filter(data={"properties": json.loads(properties)}) filter._date_from = "-11d" result = calculate_retention(filter, team) return Response(result)
def retention(self, request: request.Request, *args: Any, **kwargs: Any) -> Response: team = request.user.team_set.get() filter = Filter(request=request) filter._date_from = "-11d" result = calculate_retention(filter, team) return Response(result)
def calculate_sessions(self, events: QuerySet, session_type: Optional[str], filter: Filter, team: Team, offset: int) -> List[Dict[str, Any]]: # format date filter for session view _date_gte = Q() if session_type is None: # if _date_from is not explicitely set we only want to get the last day worth of data # otherwise the query is very slow if filter._date_from and filter.date_to: _date_gte = Q( timestamp__gte=filter.date_from, timestamp__lte=filter.date_to + relativedelta(days=1), ) else: dt = now() dt = dt.replace(hour=0, minute=0, second=0, microsecond=0) _date_gte = Q(timestamp__gte=dt, timestamp__lte=dt + relativedelta(days=1)) else: if not filter.date_from: filter._date_from = (Event.objects.filter( team_id=team).order_by("timestamp")[0].timestamp.replace( hour=0, minute=0, second=0, microsecond=0).isoformat()) sessions = (events.filter(_date_gte).annotate( previous_timestamp=Window( expression=Lag("timestamp", default=None), partition_by=F("distinct_id"), order_by=F("timestamp").asc(), )).annotate(previous_event=Window( expression=Lag("event", default=None), partition_by=F("distinct_id"), order_by=F("timestamp").asc(), ))) sessions_sql, sessions_sql_params = sessions.query.sql_with_params() all_sessions = "\ SELECT *,\ SUM(new_session) OVER (ORDER BY distinct_id, timestamp) AS global_session_id,\ SUM(new_session) OVER (PARTITION BY distinct_id ORDER BY timestamp) AS user_session_id\ FROM (SELECT id, distinct_id, event, elements_hash, timestamp, properties, CASE WHEN EXTRACT('EPOCH' FROM (timestamp - previous_timestamp)) >= (60 * 30)\ OR previous_timestamp IS NULL \ THEN 1 ELSE 0 END AS new_session \ FROM ({}) AS inner_sessions\ ) AS outer_sessions".format(sessions_sql) result: List = [] if session_type == "avg": result = self._session_avg(all_sessions, sessions_sql_params, filter) elif session_type == "dist": result = self._session_dist(all_sessions, sessions_sql_params) else: result = self._session_list(all_sessions, sessions_sql_params, team, filter, offset) return result
def _set_default_dates(self, filter: Filter, team_id: int) -> None: # format default dates if not filter.date_from: filter._date_from = ( Event.objects.filter(team_id=team_id) .order_by("timestamp")[0] .timestamp.replace(hour=0, minute=0, second=0, microsecond=0) .isoformat() )
def calculate_avg(self, filter: Filter, team: Team): # format default dates if not filter._date_from: filter._date_from = relative_date_parse("-7d") if not filter._date_to: filter._date_to = timezone.now() parsed_date_from, parsed_date_to = parse_timestamps(filter) filters, params = parse_prop_clauses("uuid", filter.properties, team) interval_notation = get_interval_annotation_ch(filter.interval) num_intervals, seconds_in_interval = get_time_diff( filter.interval or "day", filter.date_from, filter.date_to) avg_query = SESSIONS_NO_EVENTS_SQL.format( team_id=team.pk, date_from=parsed_date_from, date_to=parsed_date_to, filters="{}".format(filters) if filter.properties else "", sessions_limit="", ) per_period_query = AVERAGE_PER_PERIOD_SQL.format( sessions=avg_query, interval=interval_notation) null_sql = NULL_SQL.format( date_to=(filter.date_to or timezone.now()).strftime("%Y-%m-%d 00:00:00"), interval=interval_notation, num_intervals=num_intervals, seconds_in_interval=seconds_in_interval, ) final_query = AVERAGE_SQL.format(sessions=per_period_query, null_sql=null_sql) params = {**params, "team_id": team.pk} response = sync_execute(final_query, params) values = self.clean_values(filter, response) time_series_data = append_data(values, interval=filter.interval, math=None) # calculate average total = sum(val[1] for val in values) if total == 0: return [] valid_days = sum(1 if val[1] else 0 for val in values) overall_average = (total / valid_days) if valid_days else 0 result = self._format_avg(overall_average) time_series_data.update(result) return [time_series_data]
def calculate_retention(self, filter: Filter, team: Team, total_intervals=11): def _determineTimedelta( total_intervals: int, period: str ) -> Tuple[Union[timedelta, relativedelta], Union[timedelta, relativedelta]]: if period == "Hour": return timedelta(hours=total_intervals), timedelta(hours=1) elif period == "Week": return timedelta(weeks=total_intervals), timedelta(weeks=1) elif period == "Month": return relativedelta(months=total_intervals), relativedelta( months=1) elif period == "Day": return timedelta(days=total_intervals), timedelta(days=1) else: raise ValueError(f"Period {period} is unsupported.") period = filter.period or "Day" tdelta, t1 = _determineTimedelta(total_intervals, period) filter._date_to = ((filter.date_to if filter.date_to else now()) + t1).isoformat() if period == "Hour": date_to = filter.date_to if filter.date_to else now() date_from = date_to - tdelta else: date_to = (filter.date_to if filter.date_to else now()).replace( hour=0, minute=0, second=0, microsecond=0) date_from = date_to - tdelta filter._date_from = date_from.isoformat() filter._date_to = date_to.isoformat() resultset = Event.objects.query_retention(filter, team) result = [{ "values": [ resultset.get((first_day, day), { "count": 0, "people": [] }) for day in range(total_intervals - first_day) ], "label": "{} {}".format(period, first_day), "date": (date_from + _determineTimedelta(first_day, period)[0]), } for first_day in range(total_intervals)] return result
def calculate_retention(self, filter: Filter, team: Team, total_intervals=11): def _determineTimedelta( total_intervals: int, period: str) -> Union[timedelta, relativedelta]: if period == "Hour": return timedelta(hours=total_intervals) elif period == "Week": return timedelta(weeks=total_intervals) elif period == "Month": return relativedelta(months=total_intervals) elif period == "Day": return timedelta(days=total_intervals) else: raise ValueError(f"Period {period} is unsupported.") period = filter.period or "Day" if period == "Hour": date_from: datetime.datetime = filter.date_from # type: ignore filter._date_to = ( date_from + _determineTimedelta(total_intervals, period)).isoformat() else: filter._date_from = ((filter.date_from.replace( hour=0, minute=0, second=0, microsecond=0)).isoformat() if filter.date_from else filter._date_from) date_from: datetime.datetime = filter.date_from # type: ignore filter._date_to = ( date_from + _determineTimedelta(total_intervals, period)).isoformat() labels_format = "%a. %-d %B" hourly_format = "%-H:%M %p" resultset = Event.objects.query_retention(filter, team) result = [{ "values": [ resultset.get((first_day, day), { "count": 0, "people": [] }) for day in range(total_intervals - first_day) ], "label": "{} {}".format(period, first_day), "date": (date_from + _determineTimedelta(first_day, period) ).strftime(labels_format + (hourly_format if period == "Hour" else "")), } for first_day in range(total_intervals)] return result
def calculate_sessions(self, events: QuerySet, filter: Filter, team: Team) -> List[Dict[str, Any]]: all_sessions, sessions_sql_params = self.build_all_sessions_query( events) if filter.session == SESSION_AVG: if not filter.date_from: filter._date_from = (Event.objects.filter( team_id=team).order_by("timestamp")[0].timestamp.replace( hour=0, minute=0, second=0, microsecond=0).isoformat()) return self._session_avg(all_sessions, sessions_sql_params, filter) else: # SESSION_DIST return self._session_dist(all_sessions, sessions_sql_params)
def retention(self, request: request.Request, *args: Any, **kwargs: Any) -> Response: team = request.user.team_set.get() properties = request.GET.get("properties", "{}") start_entity_data = request.GET.get("start_entity", None) start_entity: Optional[Entity] = None if start_entity_data: data = json.loads(start_entity_data) start_entity = Entity({"id": data["id"], "type": data["type"]}) filter = Filter(data={"properties": json.loads(properties)}) filter._date_from = "-11d" result = calculate_retention(filter, team, start_entity=start_entity) return Response(result)
def list(self, request: Request, *args: Any, **kwargs: Any) -> Response: if not endpoint_enabled(CH_EVENT_ENDPOINT, request.user.distinct_id): return super().list(request) team = request.user.team filter = Filter(request=request) if request.GET.get("after"): filter._date_from = request.GET["after"] if request.GET.get("before"): filter._date_to = request.GET["before"] limit = "LIMIT 101" conditions, condition_params = determine_event_conditions(request.GET.dict()) prop_filters, prop_filter_params = parse_prop_clauses("uuid", filter.properties, team) if prop_filters != "": query_result = sync_execute( SELECT_EVENT_WITH_PROP_SQL.format(conditions=conditions, limit=limit, filters=prop_filters), {"team_id": team.pk, **condition_params, **prop_filter_params}, ) else: query_result = sync_execute( SELECT_EVENT_WITH_ARRAY_PROPS_SQL.format(conditions=conditions, limit=limit), {"team_id": team.pk, **condition_params}, ) result = ClickhouseEventSerializer( query_result, many=True, context={ "elements": self._get_elements(query_result, team), "people": self._get_people(query_result, team), }, ).data if len(query_result) > 100: path = request.get_full_path() reverse = request.GET.get("orderBy", "-timestamp") != "-timestamp" next_url: Optional[str] = request.build_absolute_uri( "{}{}{}={}".format( path, "&" if "?" in path else "?", "after" if reverse else "before", query_result[99][3].strftime("%Y-%m-%dT%H:%M:%S.%fZ"), ) ) else: next_url = None return Response({"next": next_url, "results": result})
def retention(self, request: request.Request, *args: Any, **kwargs: Any) -> Response: team = request.user.team properties = request.GET.get("properties", "{}") filter = Filter(data={"properties": json.loads(properties)}) start_entity_data = request.GET.get("start_entity", None) if start_entity_data: data = json.loads(start_entity_data) filter.entities = [Entity({"id": data["id"], "type": data["type"]})] filter._date_from = "-11d" result = retention.Retention().run(filter, team) return Response({"data": result})
def list(self, request: Request, *args: Any, **kwargs: Any) -> Response: team = request.user.team assert team is not None filter = Filter(request=request) if request.GET.get("after"): filter._date_from = request.GET["after"] if request.GET.get("before"): filter._date_to = request.GET["before"] limit = "LIMIT 101" conditions, condition_params = determine_event_conditions(request.GET.dict()) prop_filters, prop_filter_params = parse_prop_clauses(filter.properties, team.pk) if request.GET.get("action_id"): action = Action.objects.get(pk=request.GET["action_id"]) if action.steps.count() == 0: return Response({"next": False, "results": []}) action_query, params = format_action_filter(action) prop_filters += " AND {}".format(action_query) prop_filter_params = {**prop_filter_params, **params} if prop_filters != "": query_result = sync_execute( SELECT_EVENT_WITH_PROP_SQL.format(conditions=conditions, limit=limit, filters=prop_filters), {"team_id": team.pk, **condition_params, **prop_filter_params}, ) else: query_result = sync_execute( SELECT_EVENT_WITH_ARRAY_PROPS_SQL.format(conditions=conditions, limit=limit), {"team_id": team.pk, **condition_params}, ) result = ClickhouseEventSerializer( query_result[0:100], many=True, context={"people": self._get_people(query_result, team),}, ).data if len(query_result) > 100: path = request.get_full_path() reverse = request.GET.get("orderBy", "-timestamp") != "-timestamp" next_url: Optional[str] = request.build_absolute_uri( "{}{}{}={}".format( path, "&" if "?" in path else "?", "after" if reverse else "before", query_result[99][3].strftime("%Y-%m-%dT%H:%M:%S.%fZ"), ) ) else: next_url = None return Response({"next": next_url, "results": result})
def calculate_dist(self, filter: Filter, team: Team): # format default dates if not filter._date_from: filter._date_from = relative_date_parse("-7d") if not filter._date_to: filter._date_to = timezone.now() parsed_date_from, parsed_date_to = parse_timestamps(filter) filters, params = parse_prop_clauses("uuid", filter.properties, team) dist_query = DIST_SQL.format( team_id=team.pk, date_from=parsed_date_from, date_to=parsed_date_to, filters="{}".format(filters) if filter.properties else "", sessions_limit="", ) params = {**params, "team_id": team.pk} result = sync_execute(dist_query, params) dist_labels = [ "0 seconds (1 event)", "0-3 seconds", "3-10 seconds", "10-30 seconds", "30-60 seconds", "1-3 minutes", "3-10 minutes", "10-30 minutes", "30-60 minutes", "1+ hours", ] res = [{ "label": dist_labels[index], "count": result[0][index] } for index in range(len(dist_labels))] return res
def run(self, filter: Filter, team: Team, *args, **kwargs) -> List[Dict[str, Any]]: response = [] if not filter.date_from: filter._date_from = (Event.objects.filter( team_id=team.pk).order_by("timestamp")[0].timestamp.replace( hour=0, minute=0, second=0, microsecond=0).isoformat()) if not filter.date_to: filter._date_to = now().isoformat() for entity in filter.entities: if entity.type == TREND_FILTER_TYPE_ACTIONS: entity.name = Action.objects.only("name").get( team=team, pk=entity.id).name entity_resp = handle_compare(entity=entity, filter=filter, func=self._serialize_entity, team_id=team.pk) response.extend(entity_resp) return response
def run(self, filter: Filter, team: Team, *args, **kwargs) -> List[Dict[str, Any]]: if not filter._date_from: filter._date_from = relative_date_parse("-7d") if not filter._date_to: filter._date_to = timezone.now() if filter.interval is None: filter.interval = "day" response = [] for entity in filter.entities: if entity.type == TREND_FILTER_TYPE_ACTIONS: entity.name = Action.objects.only("name").get( team=team, pk=entity.id).name entity_resp = handle_compare(filter=filter, func=self._serialize_entity, team=team, entity=entity) response.extend(entity_resp) return response