def preprocess_params(self, filter: Filter, total_intervals=11): period = filter.period or "Day" tdelta, t1 = self.determineTimedelta(total_intervals, period) filter._date_to = (filter.date_to + t1).isoformat() first_time_retention = filter.retention_type == RETENTION_FIRST_TIME if period == "Hour": date_to = filter.date_to date_from: datetime.datetime = date_to - tdelta elif period == "Week": date_to = filter.date_to.replace(hour=0, minute=0, second=0, microsecond=0) date_from = date_to - tdelta date_from = date_from - timedelta(days=date_from.isoweekday() % 7) else: date_to = filter.date_to.replace(hour=0, minute=0, second=0, microsecond=0) date_from = date_to - tdelta filter._date_from = date_from.isoformat() filter._date_to = date_to.isoformat() entity = ( Entity({"id": "$pageview", "type": TREND_FILTER_TYPE_EVENTS}) if not filter.target_entity else filter.target_entity ) returning_entity = ( Entity({"id": "$pageview", "type": TREND_FILTER_TYPE_EVENTS}) if not len(filter.entities) > 0 else filter.entities[0] ) # need explicit handling of date_from so it's not optional but also need filter object for date_filter_Q return filter, entity, returning_entity, first_time_retention, date_from, date_to
def calculate_retention(self, filter: Filter, team: Team, total_intervals=11): def _determineTimedelta( total_intervals: int, period: str ) -> Tuple[Union[timedelta, relativedelta], Union[timedelta, relativedelta]]: if period == "Hour": return timedelta(hours=total_intervals), timedelta(hours=1) elif period == "Week": return timedelta(weeks=total_intervals), timedelta(weeks=1) elif period == "Month": return relativedelta(months=total_intervals), relativedelta( months=1) elif period == "Day": return timedelta(days=total_intervals), timedelta(days=1) else: raise ValueError(f"Period {period} is unsupported.") period = filter.period or "Day" tdelta, t1 = _determineTimedelta(total_intervals, period) filter._date_to = ((filter.date_to if filter.date_to else now()) + t1).isoformat() if period == "Hour": date_to = filter.date_to if filter.date_to else now() date_from = date_to - tdelta else: date_to = (filter.date_to if filter.date_to else now()).replace( hour=0, minute=0, second=0, microsecond=0) date_from = date_to - tdelta filter._date_from = date_from.isoformat() filter._date_to = date_to.isoformat() resultset = Event.objects.query_retention(filter, team) result = [{ "values": [ resultset.get((first_day, day), { "count": 0, "people": [] }) for day in range(total_intervals - first_day) ], "label": "{} {}".format(period, first_day), "date": (date_from + _determineTimedelta(first_day, period)[0]), } for first_day in range(total_intervals)] return result
def _set_default_dates(self, filter: Filter) -> None: # format default dates if filter.session_type != SESSION_AVG and filter.session_type != SESSION_DIST: if not filter._date_from: filter._date_from = timezone.now().replace(hour=0, minute=0, second=0, microsecond=0) if not filter._date_to and filter.date_from: filter._date_to = filter.date_from + relativedelta(days=1) else: if not filter._date_from: filter._date_from = relative_date_parse("-7d") if not filter._date_to: filter._date_to = timezone.now()
def calculate_retention(self, filter: Filter, team: Team, total_intervals=11): def _determineTimedelta( total_intervals: int, period: str) -> Union[timedelta, relativedelta]: if period == "Hour": return timedelta(hours=total_intervals) elif period == "Week": return timedelta(weeks=total_intervals) elif period == "Month": return relativedelta(months=total_intervals) elif period == "Day": return timedelta(days=total_intervals) else: raise ValueError(f"Period {period} is unsupported.") period = filter.period or "Day" if period == "Hour": date_from: datetime.datetime = filter.date_from # type: ignore filter._date_to = ( date_from + _determineTimedelta(total_intervals, period)).isoformat() else: filter._date_from = ((filter.date_from.replace( hour=0, minute=0, second=0, microsecond=0)).isoformat() if filter.date_from else filter._date_from) date_from: datetime.datetime = filter.date_from # type: ignore filter._date_to = ( date_from + _determineTimedelta(total_intervals, period)).isoformat() labels_format = "%a. %-d %B" hourly_format = "%-H:%M %p" resultset = Event.objects.query_retention(filter, team) result = [{ "values": [ resultset.get((first_day, day), { "count": 0, "people": [] }) for day in range(total_intervals - first_day) ], "label": "{} {}".format(period, first_day), "date": (date_from + _determineTimedelta(first_day, period) ).strftime(labels_format + (hourly_format if period == "Hour" else "")), } for first_day in range(total_intervals)] return result
def calculate_retention(self, filter: Filter, team: Team, total_intervals=11): date_from: datetime.datetime = filter.date_from # type: ignore filter._date_to = (date_from + timedelta(days=total_intervals)).isoformat() labels_format = "%a. %-d %B" resultset = Event.objects.query_retention(filter, team) result = [{ "values": [ resultset.get((first_day, day), { "count": 0, "people": [] }) for day in range(total_intervals - first_day) ], "label": "Day {}".format(first_day), "date": (date_from + timedelta(days=first_day)).strftime(labels_format), } for first_day in range(total_intervals)] return result
def calculate_list(self, filter: Filter, team: Team, limit: int, offset: int): filters, params = parse_prop_clauses("uuid", filter.properties, team) if not filter._date_from: filter._date_from = timezone.now().replace(hour=0, minute=0, second=0, microsecond=0) if not filter._date_to and filter.date_from: filter._date_to = filter.date_from + relativedelta(days=1) date_from, date_to = parse_timestamps(filter) params = { **params, "team_id": team.pk, "limit": limit, "offset": offset } query = SESSION_SQL.format( date_from=date_from, date_to=date_to, filters="{}".format(filters) if filter.properties else "", sessions_limit="LIMIT %(offset)s, %(limit)s", ) query_result = sync_execute(query, params) result = self._parse_list_results(query_result) self._add_person_properties(team, result) return result
def run(self, filter: Filter, team: Team, *args, **kwargs) -> List[Dict[str, Any]]: events = (Event.objects.filter(team=team).filter( filter.properties_to_Q(team_id=team.pk)).add_person_id( team.pk).order_by("-timestamp")) session_type = kwargs.get("session_type", None) offset = kwargs.get("offset", 0) if not filter.date_to: filter._date_to = now().isoformat() calculated = [] # get compared period if filter.compare and filter._date_from != "all" and session_type == "avg": calculated = self.calculate_sessions( events.filter(filter.date_filter_Q), session_type, filter, team, offset) calculated = self._convert_to_comparison(calculated, "current") compare_filter = determine_compared_filter(filter) compared_calculated = self.calculate_sessions( events.filter(compare_filter.date_filter_Q), session_type, compare_filter, team, offset) converted_compared_calculated = self._convert_to_comparison( compared_calculated, "previous") calculated.extend(converted_compared_calculated) else: # if session_type is None, it's a list of sessions which shouldn't have any date filtering if session_type is not None: events = events.filter(filter.date_filter_Q) calculated = self.calculate_sessions(events, session_type, filter, team, offset) return calculated
def calculate_trends(self, filter: Filter, team_id: int) -> List[Dict[str, Any]]: actions = Action.objects.filter(team_id=team_id).order_by("-id") if len(filter.actions) > 0: actions = Action.objects.filter( pk__in=[entity.id for entity in filter.actions], team_id=team_id) actions = actions.prefetch_related( Prefetch("steps", queryset=ActionStep.objects.order_by("id"))) entities_list = [] if not filter.date_from: filter._date_from = (Event.objects.filter( team_id=team_id).order_by("timestamp")[0].timestamp.replace( hour=0, minute=0, second=0, microsecond=0).isoformat()) if not filter.date_to: filter._date_to = now().isoformat() for entity in filter.entities: if entity.type == TREND_FILTER_TYPE_ACTIONS: try: db_action = [ action for action in actions if action.id == entity.id ][0] entity.name = db_action.name except IndexError: continue entities_list.extend( handle_compare(entity=entity, filter=filter, func=self._serialize_entity, team_id=team_id)) return entities_list
def _determine_compared_filter(self, filter, request): date_from, date_to = get_compare_period_dates(filter.date_from, filter.date_to) compared_filter = Filter(request=request) compared_filter._date_from = date_from.date().isoformat() compared_filter._date_to = date_to.date().isoformat() return compared_filter
def trends(self, request: request.Request, *args: Any, **kwargs: Any) -> Response: actions = self.get_queryset() actions = actions.filter(deleted=False) team = request.user.team_set.get() entities_list = [] filter = Filter(request=request) if len(filter.entities) == 0: # If no filters, automatically grab all actions and show those instead filter.entities = [Entity({'id': action.id, 'name': action.name, 'type': TREND_FILTER_TYPE_ACTIONS}) for action in actions] if not filter.date_from: filter._date_from = Event.objects.filter(team=team)\ .order_by('timestamp')[0]\ .timestamp\ .replace(hour=0, minute=0, second=0, microsecond=0)\ .isoformat() if not filter.date_to: filter._date_to = now().isoformat() for entity in filter.entities: if entity.type == TREND_FILTER_TYPE_ACTIONS: try: db_action = [action for action in actions if action.id == entity.id][0] entity.name = db_action.name except IndexError: continue trend_entity = self._serialize_entity( entity=entity, filter=filter, request=request, team=team ) entities_list.extend(trend_entity) return Response(entities_list)
def run(self, filter: Filter, team: Team, *args, **kwargs) -> List[Dict[str, Any]]: events = (Event.objects.filter(team=team).filter( filter.properties_to_Q(team_id=team.pk)).add_person_id( team.pk).order_by("-timestamp")) limit = int(kwargs.get("limit", SESSIONS_LIST_DEFAULT_LIMIT)) offset = filter.offset if not filter.date_to: filter._date_to = now().isoformat() calculated = [] # get compared period if filter.compare and filter._date_from != "all" and filter.session_type == SESSION_AVG: calculated = self.calculate_sessions( events.filter(filter.date_filter_Q), filter, team, limit, offset) calculated = convert_to_comparison(calculated, "current", filter) compare_filter = determine_compared_filter(filter) compared_calculated = self.calculate_sessions( events.filter(compare_filter.date_filter_Q), compare_filter, team, limit, offset) converted_compared_calculated = convert_to_comparison( compared_calculated, "previous", filter) calculated.extend(converted_compared_calculated) else: # if session_type is None, it's a list of sessions which shouldn't have any date filtering if filter.session_type is not None: events = events.filter(filter.date_filter_Q) calculated = self.calculate_sessions(events, filter, team, limit, offset) return calculated
def calculate_trends(filter: Filter, params: dict, team_id: int, actions: QuerySet) -> List[Dict[str, Any]]: compare = params.get("compare") entities_list = [] actions = actions.filter(deleted=False) if len(filter.entities) == 0: # If no filters, automatically grab all actions and show those instead filter.entities = [ Entity({ "id": action.id, "name": action.name, "type": TREND_FILTER_TYPE_ACTIONS, }) for action in actions ] if not filter.date_from: filter._date_from = (Event.objects.filter( team_id=team_id).order_by("timestamp")[0].timestamp.replace( hour=0, minute=0, second=0, microsecond=0).isoformat()) if not filter.date_to: filter._date_to = now().isoformat() compared_filter = None if compare: compared_filter = determine_compared_filter(filter) for entity in filter.entities: if entity.type == TREND_FILTER_TYPE_ACTIONS: try: db_action = [ action for action in actions if action.id == entity.id ][0] entity.name = db_action.name except IndexError: continue trend_entity = serialize_entity(entity=entity, filter=filter, params=params, team_id=team_id) if compare and compared_filter: trend_entity = convert_to_comparison( trend_entity, filter, "{} - {}".format(entity.name, "current")) entities_list.extend(trend_entity) compared_trend_entity = serialize_entity(entity=entity, filter=compared_filter, params=params, team_id=team_id) compared_trend_entity = convert_to_comparison( compared_trend_entity, compared_filter, "{} - {}".format(entity.name, "previous"), ) entities_list.extend(compared_trend_entity) else: entities_list.extend(trend_entity) return entities_list
def calculate_avg(self, filter: Filter, team: Team): # format default dates if not filter._date_from: filter._date_from = relative_date_parse("-7d") if not filter._date_to: filter._date_to = timezone.now() parsed_date_from, parsed_date_to = parse_timestamps(filter) filters, params = parse_prop_clauses("uuid", filter.properties, team) interval_notation = get_interval_annotation_ch(filter.interval) num_intervals, seconds_in_interval = get_time_diff( filter.interval or "day", filter.date_from, filter.date_to) avg_query = SESSIONS_NO_EVENTS_SQL.format( team_id=team.pk, date_from=parsed_date_from, date_to=parsed_date_to, filters="{}".format(filters) if filter.properties else "", sessions_limit="", ) per_period_query = AVERAGE_PER_PERIOD_SQL.format( sessions=avg_query, interval=interval_notation) null_sql = NULL_SQL.format( date_to=(filter.date_to or timezone.now()).strftime("%Y-%m-%d 00:00:00"), interval=interval_notation, num_intervals=num_intervals, seconds_in_interval=seconds_in_interval, ) final_query = AVERAGE_SQL.format(sessions=per_period_query, null_sql=null_sql) params = {**params, "team_id": team.pk} response = sync_execute(final_query, params) values = self.clean_values(filter, response) time_series_data = append_data(values, interval=filter.interval, math=None) # calculate average total = sum(val[1] for val in values) if total == 0: return [] valid_days = sum(1 if val[1] else 0 for val in values) overall_average = (total / valid_days) if valid_days else 0 result = self._format_avg(overall_average) time_series_data.update(result) return [time_series_data]
def list(self, request: Request, *args: Any, **kwargs: Any) -> Response: if not endpoint_enabled(CH_EVENT_ENDPOINT, request.user.distinct_id): return super().list(request) team = request.user.team filter = Filter(request=request) if request.GET.get("after"): filter._date_from = request.GET["after"] if request.GET.get("before"): filter._date_to = request.GET["before"] limit = "LIMIT 101" conditions, condition_params = determine_event_conditions(request.GET.dict()) prop_filters, prop_filter_params = parse_prop_clauses("uuid", filter.properties, team) if prop_filters != "": query_result = sync_execute( SELECT_EVENT_WITH_PROP_SQL.format(conditions=conditions, limit=limit, filters=prop_filters), {"team_id": team.pk, **condition_params, **prop_filter_params}, ) else: query_result = sync_execute( SELECT_EVENT_WITH_ARRAY_PROPS_SQL.format(conditions=conditions, limit=limit), {"team_id": team.pk, **condition_params}, ) result = ClickhouseEventSerializer( query_result, many=True, context={ "elements": self._get_elements(query_result, team), "people": self._get_people(query_result, team), }, ).data if len(query_result) > 100: path = request.get_full_path() reverse = request.GET.get("orderBy", "-timestamp") != "-timestamp" next_url: Optional[str] = request.build_absolute_uri( "{}{}{}={}".format( path, "&" if "?" in path else "?", "after" if reverse else "before", query_result[99][3].strftime("%Y-%m-%dT%H:%M:%S.%fZ"), ) ) else: next_url = None return Response({"next": next_url, "results": result})
def list(self, request: Request, *args: Any, **kwargs: Any) -> Response: team = request.user.team assert team is not None filter = Filter(request=request) if request.GET.get("after"): filter._date_from = request.GET["after"] if request.GET.get("before"): filter._date_to = request.GET["before"] limit = "LIMIT 101" conditions, condition_params = determine_event_conditions(request.GET.dict()) prop_filters, prop_filter_params = parse_prop_clauses(filter.properties, team.pk) if request.GET.get("action_id"): action = Action.objects.get(pk=request.GET["action_id"]) if action.steps.count() == 0: return Response({"next": False, "results": []}) action_query, params = format_action_filter(action) prop_filters += " AND {}".format(action_query) prop_filter_params = {**prop_filter_params, **params} if prop_filters != "": query_result = sync_execute( SELECT_EVENT_WITH_PROP_SQL.format(conditions=conditions, limit=limit, filters=prop_filters), {"team_id": team.pk, **condition_params, **prop_filter_params}, ) else: query_result = sync_execute( SELECT_EVENT_WITH_ARRAY_PROPS_SQL.format(conditions=conditions, limit=limit), {"team_id": team.pk, **condition_params}, ) result = ClickhouseEventSerializer( query_result[0:100], many=True, context={"people": self._get_people(query_result, team),}, ).data if len(query_result) > 100: path = request.get_full_path() reverse = request.GET.get("orderBy", "-timestamp") != "-timestamp" next_url: Optional[str] = request.build_absolute_uri( "{}{}{}={}".format( path, "&" if "?" in path else "?", "after" if reverse else "before", query_result[99][3].strftime("%Y-%m-%dT%H:%M:%S.%fZ"), ) ) else: next_url = None return Response({"next": next_url, "results": result})
def calculate_dist(self, filter: Filter, team: Team): # format default dates if not filter._date_from: filter._date_from = relative_date_parse("-7d") if not filter._date_to: filter._date_to = timezone.now() parsed_date_from, parsed_date_to = parse_timestamps(filter) filters, params = parse_prop_clauses("uuid", filter.properties, team) dist_query = DIST_SQL.format( team_id=team.pk, date_from=parsed_date_from, date_to=parsed_date_to, filters="{}".format(filters) if filter.properties else "", sessions_limit="", ) params = {**params, "team_id": team.pk} result = sync_execute(dist_query, params) dist_labels = [ "0 seconds (1 event)", "0-3 seconds", "3-10 seconds", "10-30 seconds", "30-60 seconds", "1-3 minutes", "3-10 minutes", "10-30 minutes", "30-60 minutes", "1+ hours", ] res = [{ "label": dist_labels[index], "count": result[0][index] } for index in range(len(dist_labels))] return res
def run(self, filter: Filter, team: Team, *args, **kwargs) -> List[Dict[str, Any]]: response = [] if not filter.date_from: filter._date_from = (Event.objects.filter( team_id=team.pk).order_by("timestamp")[0].timestamp.replace( hour=0, minute=0, second=0, microsecond=0).isoformat()) if not filter.date_to: filter._date_to = now().isoformat() for entity in filter.entities: if entity.type == TREND_FILTER_TYPE_ACTIONS: entity.name = Action.objects.only("name").get( team=team, pk=entity.id).name entity_resp = handle_compare(entity=entity, filter=filter, func=self._serialize_entity, team_id=team.pk) response.extend(entity_resp) return response
def run(self, filter: Filter, team: Team, *args, **kwargs) -> List[Dict[str, Any]]: if not filter._date_from: filter._date_from = relative_date_parse("-7d") if not filter._date_to: filter._date_to = timezone.now() if filter.interval is None: filter.interval = "day" response = [] for entity in filter.entities: if entity.type == TREND_FILTER_TYPE_ACTIONS: entity.name = Action.objects.only("name").get( team=team, pk=entity.id).name entity_resp = handle_compare(filter=filter, func=self._serialize_entity, team=team, entity=entity) response.extend(entity_resp) return response
def calculate_retention(filter: Filter, team: Team, total_days=11): date_from: datetime.datetime = filter.date_from # type: ignore filter._date_to = (date_from + timedelta(days=total_days)).isoformat() labels_format = "%a. %-d %B" resultset = Event.objects.query_retention(filter, team) by_dates = {(int(row.first_date), int(row.date)): row.count for row in resultset} result = { "data": [{ "values": [ by_dates.get((first_day, day), 0) for day in range(total_days - first_day) ], "label": "Day {}".format(first_day), "date": (date_from + timedelta(days=first_day)).strftime(labels_format), } for first_day in range(total_days)] } return result