def _get_trends(self) -> List[Dict[str, Any]]: serialized: Dict[str, Any] = {"count": 0, "data": [], "days": [], "labels": []} with connection.cursor() as cursor: qstring = self._build_trends_query(self._filter).as_string(cursor.connection) cursor.execute(qstring) steps_at_dates = namedtuplefetchall(cursor) date_range = get_daterange( self._filter.date_from or steps_at_dates[0].date, self._filter.date_to, frequency=self._filter.interval ) data_array = [ {"date": step.date, "count": round(self._get_last_step_attr(step) / step.step_0_count * 100)} for step in steps_at_dates ] if self._filter.interval == "week": for df in data_array: df["date"] -= timedelta(days=df["date"].weekday() + 1) elif self._filter.interval == "month": for df in data_array: df["date"] = df["date"].replace(day=1) for df in data_array: df["date"] = df["date"].replace(tzinfo=pytz.utc).isoformat() datewise_data = {d["date"]: d["count"] for d in data_array} values = [(key, datewise_data.get(key.isoformat(), 0)) for key in date_range] for item in values: serialized["days"].append(item[0]) serialized["data"].append(item[1]) serialized["labels"].append(format_label_date(item[0], self._filter.interval)) return [serialized]
def group_events_to_date( date_from: Optional[datetime.datetime], date_to: Optional[datetime.datetime], aggregates: QuerySet, interval: str, breakdown: Optional[str] = None, ) -> Dict[str, Dict[datetime.datetime, int]]: response = {} if interval == "day": if date_from: date_from = date_from.replace(hour=0, minute=0, second=0, microsecond=0) if date_to: date_to = date_to.replace(hour=0, minute=0, second=0, microsecond=0) time_index = get_daterange(date_from, date_to, frequency=interval) if len(aggregates) > 0: dataframe, unique_cohorts = build_dataarray(aggregates, interval, breakdown) for value in unique_cohorts: filtered = list(filter(lambda d: d["breakdown"] == value, dataframe)) datewise_data = {d["date"]: d["count"] for d in filtered} if value is None: value = "nan" response[value] = {key: datewise_data.get(key, 0) for key in time_index} else: response["total"] = {key: 0 for key in time_index} return response
def _get_trends(self) -> List[Dict[str, Any]]: serialized: Dict[str, Any] = { "count": 0, "data": [], "days": [], "labels": [] } prop_filters, prop_filter_params = parse_prop_clauses( self._filter.properties, self._team.pk, prepend="global", allow_denormalized_props=True) parsed_date_from, parsed_date_to, _ = parse_timestamps( filter=self._filter, table="events.", team_id=self._team.pk) self.params.update(prop_filter_params) steps = [ self._build_steps_query(entity, index) for index, entity in enumerate(self._filter.entities) ] funnel_query = FUNNEL_SQL.format( team_id=self._team.id, steps=", ".join(steps), filters=prop_filters.replace("uuid IN", "events.uuid IN", 1), parsed_date_from=parsed_date_from, parsed_date_to=parsed_date_to, top_level_groupby=", date", extra_select="{}(timestamp) as date,".format( get_trunc_func_ch(self._filter.interval)), extra_groupby=",{}(timestamp)".format( get_trunc_func_ch(self._filter.interval)), within_time="86400000000", ) results = sync_execute(funnel_query, self.params) parsed_results = [] for result in results: temp = [item for item in result] temp[1] = datetime( result[1].year, result[1].month, result[1].day, getattr(result[1], "hour", 0), getattr(result[1], "minute", 0), getattr(result[1], "second", 0), tzinfo=pytz.utc, ) parsed_results.append(temp) date_range = get_daterange(self._filter.date_from or parsed_results[0][1], self._filter.date_to, frequency=self._filter.interval) # Rejig the data from a row for each date and step to one row per date data_dict: Dict[datetime, Dict] = {} for item in parsed_results: if not data_dict.get(item[1]): data_dict[item[1]] = { "date": item[1], "total_people": item[2], "count": 0 } else: # the query gives people who made it to that step # so we need to count all the people from each step data_dict[item[1]]["total_people"] += item[2] data_dict[item[1]]["count"] = round( item[2] / data_dict[item[1]]["total_people"] * 100) data_array = [value for _, value in data_dict.items()] if self._filter.interval == "week": for df in data_array: df["date"] -= timedelta(days=df["date"].weekday() + 1) elif self._filter.interval == "month": for df in data_array: df["date"] = df["date"].replace(day=1) for df in data_array: df["date"] = df["date"].isoformat() datewise_data = {d["date"]: d["count"] for d in data_array} values = [(key, datewise_data.get(key.isoformat(), 0)) for key in date_range] for data_item in values: serialized["days"].append(data_item[0]) serialized["data"].append(data_item[1]) serialized["labels"].append( format_label_date(data_item[0], self._filter.interval)) return [serialized]
def _session_avg(self, base_query: Query, params: QueryParams, filter: Filter) -> List[Dict[str, Any]]: def _determineInterval(interval): if interval == "minute": return ( "minute", "min", ) elif interval == "hour": return "hour", "H" elif interval == "week": return "week", "W" elif interval == "month": return "month", "M" else: return "day", "D" interval, interval_freq = _determineInterval(filter.interval) average_length_time = "SELECT date_trunc('{interval}', timestamp) as start_time,\ AVG(length) AS average_session_length_per_day,\ SUM(length) AS total_session_length_per_day, \ COUNT(1) as num_sessions_per_day\ FROM (SELECT global_session_id, EXTRACT('EPOCH' FROM (MAX(timestamp) - MIN(timestamp)))\ AS length,\ MIN(timestamp) as timestamp FROM ({}) as count GROUP BY 1) as agg group by 1 order by start_time".format( base_query, interval=interval) cursor = connection.cursor() cursor.execute(average_length_time, params) time_series_avg = cursor.fetchall() if len(time_series_avg) == 0: return [] date_range = get_daterange(filter.date_from, filter.date_to, frequency=interval) data_array = [{ "date": a[0], "count": a[1], "breakdown": "Total" } for a in time_series_avg] if interval == "week": for df in data_array: df["date"] -= datetime.timedelta(days=df["date"].weekday() + 1) elif interval == "month": for df in data_array: df["date"] = (df["date"].replace(day=1) + datetime.timedelta( days=32)).replace(day=1) - datetime.timedelta(days=1) datewise_data = {d["date"]: d["count"] for d in data_array} values = [(key, datewise_data.get(key, 0)) for key in date_range] time_series_data = append_data(values, interval=filter.interval, math=None) scaled_data, label = scale_time_series(time_series_data["data"]) time_series_data.update({"data": scaled_data}) # calculate average totals = [sum(x) for x in list(zip(*time_series_avg))[2:4]] overall_average = (totals[0] / totals[1]) if totals else 0 avg_formatted = friendly_time(overall_average) avg_split = avg_formatted.split(" ") time_series_data.update({ "label": "Average Session Length ({})".format(avg_split[1]), "count": int(avg_split[0]), "aggregated_value": int(avg_split[0]), }) time_series_data.update( {"chartLabel": "Average Session Length ({})".format(label)}) result = [time_series_data] return result