Exemple #1
0
    def _get_trends(self) -> List[Dict[str, Any]]:
        serialized: Dict[str, Any] = {"count": 0, "data": [], "days": [], "labels": []}
        with connection.cursor() as cursor:
            qstring = self._build_trends_query(self._filter).as_string(cursor.connection)
            cursor.execute(qstring)
            steps_at_dates = namedtuplefetchall(cursor)

        date_range = get_daterange(
            self._filter.date_from or steps_at_dates[0].date, self._filter.date_to, frequency=self._filter.interval
        )

        data_array = [
            {"date": step.date, "count": round(self._get_last_step_attr(step) / step.step_0_count * 100)}
            for step in steps_at_dates
        ]

        if self._filter.interval == "week":
            for df in data_array:
                df["date"] -= timedelta(days=df["date"].weekday() + 1)
        elif self._filter.interval == "month":
            for df in data_array:
                df["date"] = df["date"].replace(day=1)
        for df in data_array:
            df["date"] = df["date"].replace(tzinfo=pytz.utc).isoformat()

        datewise_data = {d["date"]: d["count"] for d in data_array}
        values = [(key, datewise_data.get(key.isoformat(), 0)) for key in date_range]

        for item in values:
            serialized["days"].append(item[0])
            serialized["data"].append(item[1])
            serialized["labels"].append(format_label_date(item[0], self._filter.interval))
        return [serialized]
Exemple #2
0
def group_events_to_date(
    date_from: Optional[datetime.datetime],
    date_to: Optional[datetime.datetime],
    aggregates: QuerySet,
    interval: str,
    breakdown: Optional[str] = None,
) -> Dict[str, Dict[datetime.datetime, int]]:
    response = {}

    if interval == "day":
        if date_from:
            date_from = date_from.replace(hour=0, minute=0, second=0, microsecond=0)
        if date_to:
            date_to = date_to.replace(hour=0, minute=0, second=0, microsecond=0)

    time_index = get_daterange(date_from, date_to, frequency=interval)
    if len(aggregates) > 0:
        dataframe, unique_cohorts = build_dataarray(aggregates, interval, breakdown)

        for value in unique_cohorts:
            filtered = list(filter(lambda d: d["breakdown"] == value, dataframe))
            datewise_data = {d["date"]: d["count"] for d in filtered}
            if value is None:
                value = "nan"
            response[value] = {key: datewise_data.get(key, 0) for key in time_index}
    else:
        response["total"] = {key: 0 for key in time_index}

    return response
Exemple #3
0
    def _get_trends(self) -> List[Dict[str, Any]]:
        serialized: Dict[str, Any] = {
            "count": 0,
            "data": [],
            "days": [],
            "labels": []
        }
        prop_filters, prop_filter_params = parse_prop_clauses(
            self._filter.properties,
            self._team.pk,
            prepend="global",
            allow_denormalized_props=True)
        parsed_date_from, parsed_date_to, _ = parse_timestamps(
            filter=self._filter, table="events.", team_id=self._team.pk)
        self.params.update(prop_filter_params)
        steps = [
            self._build_steps_query(entity, index)
            for index, entity in enumerate(self._filter.entities)
        ]
        funnel_query = FUNNEL_SQL.format(
            team_id=self._team.id,
            steps=", ".join(steps),
            filters=prop_filters.replace("uuid IN", "events.uuid IN", 1),
            parsed_date_from=parsed_date_from,
            parsed_date_to=parsed_date_to,
            top_level_groupby=", date",
            extra_select="{}(timestamp) as date,".format(
                get_trunc_func_ch(self._filter.interval)),
            extra_groupby=",{}(timestamp)".format(
                get_trunc_func_ch(self._filter.interval)),
            within_time="86400000000",
        )
        results = sync_execute(funnel_query, self.params)
        parsed_results = []

        for result in results:
            temp = [item for item in result]
            temp[1] = datetime(
                result[1].year,
                result[1].month,
                result[1].day,
                getattr(result[1], "hour", 0),
                getattr(result[1], "minute", 0),
                getattr(result[1], "second", 0),
                tzinfo=pytz.utc,
            )
            parsed_results.append(temp)

        date_range = get_daterange(self._filter.date_from
                                   or parsed_results[0][1],
                                   self._filter.date_to,
                                   frequency=self._filter.interval)

        # Rejig the data from a row for each date and step to one row per date
        data_dict: Dict[datetime, Dict] = {}
        for item in parsed_results:
            if not data_dict.get(item[1]):
                data_dict[item[1]] = {
                    "date": item[1],
                    "total_people": item[2],
                    "count": 0
                }
            else:
                # the query gives people who made it to that step
                # so we need to count all the people from each step
                data_dict[item[1]]["total_people"] += item[2]
                data_dict[item[1]]["count"] = round(
                    item[2] / data_dict[item[1]]["total_people"] * 100)
        data_array = [value for _, value in data_dict.items()]

        if self._filter.interval == "week":
            for df in data_array:
                df["date"] -= timedelta(days=df["date"].weekday() + 1)
        elif self._filter.interval == "month":
            for df in data_array:
                df["date"] = df["date"].replace(day=1)
        for df in data_array:
            df["date"] = df["date"].isoformat()

        datewise_data = {d["date"]: d["count"] for d in data_array}
        values = [(key, datewise_data.get(key.isoformat(), 0))
                  for key in date_range]

        for data_item in values:
            serialized["days"].append(data_item[0])
            serialized["data"].append(data_item[1])
            serialized["labels"].append(
                format_label_date(data_item[0], self._filter.interval))
        return [serialized]
Exemple #4
0
    def _session_avg(self, base_query: Query, params: QueryParams,
                     filter: Filter) -> List[Dict[str, Any]]:
        def _determineInterval(interval):
            if interval == "minute":
                return (
                    "minute",
                    "min",
                )
            elif interval == "hour":
                return "hour", "H"
            elif interval == "week":
                return "week", "W"
            elif interval == "month":
                return "month", "M"
            else:
                return "day", "D"

        interval, interval_freq = _determineInterval(filter.interval)

        average_length_time = "SELECT date_trunc('{interval}', timestamp) as start_time,\
                        AVG(length) AS average_session_length_per_day,\
                        SUM(length) AS total_session_length_per_day, \
                        COUNT(1) as num_sessions_per_day\
                        FROM (SELECT global_session_id, EXTRACT('EPOCH' FROM (MAX(timestamp) - MIN(timestamp)))\
                            AS length,\
                            MIN(timestamp) as timestamp FROM ({}) as count GROUP BY 1) as agg group by 1 order by start_time".format(
            base_query, interval=interval)

        cursor = connection.cursor()
        cursor.execute(average_length_time, params)
        time_series_avg = cursor.fetchall()
        if len(time_series_avg) == 0:
            return []

        date_range = get_daterange(filter.date_from,
                                   filter.date_to,
                                   frequency=interval)
        data_array = [{
            "date": a[0],
            "count": a[1],
            "breakdown": "Total"
        } for a in time_series_avg]

        if interval == "week":
            for df in data_array:
                df["date"] -= datetime.timedelta(days=df["date"].weekday() + 1)
        elif interval == "month":
            for df in data_array:
                df["date"] = (df["date"].replace(day=1) + datetime.timedelta(
                    days=32)).replace(day=1) - datetime.timedelta(days=1)

        datewise_data = {d["date"]: d["count"] for d in data_array}
        values = [(key, datewise_data.get(key, 0)) for key in date_range]

        time_series_data = append_data(values,
                                       interval=filter.interval,
                                       math=None)
        scaled_data, label = scale_time_series(time_series_data["data"])
        time_series_data.update({"data": scaled_data})
        # calculate average
        totals = [sum(x) for x in list(zip(*time_series_avg))[2:4]]
        overall_average = (totals[0] / totals[1]) if totals else 0
        avg_formatted = friendly_time(overall_average)
        avg_split = avg_formatted.split(" ")

        time_series_data.update({
            "label":
            "Average Session Length ({})".format(avg_split[1]),
            "count":
            int(avg_split[0]),
            "aggregated_value":
            int(avg_split[0]),
        })
        time_series_data.update(
            {"chartLabel": "Average Session Length ({})".format(label)})
        result = [time_series_data]
        return result