Esempio n. 1
0
 def run(self, *args, **kwargs) -> List[Dict[str, Any]]:
     with connection.cursor() as cursor:
         qstring = self._build_query(self._gen_lateral_bodies()).as_string(
             cursor.connection)
         cursor.execute(qstring)
         results = namedtuplefetchall(cursor)
     return self.data_to_return(results)
Esempio n. 2
0
def query_sessions_in_range(team: Team, start_time: datetime.datetime,
                            end_time: datetime.datetime,
                            filter: SessionsFilter) -> List[dict]:
    filter_query, filter_params = "", {}

    if filter.recording_duration_filter:
        filter_query = f"AND duration {OPERATORS[filter.recording_duration_filter.operator]} INTERVAL '%(min_recording_duration)s seconds'"
        filter_params = {
            "min_recording_duration": filter.recording_duration_filter.value,
        }

    with connection.cursor() as cursor:
        cursor.execute(
            SESSIONS_IN_RANGE_QUERY.format(filter_query=filter_query),
            {
                "team_id": team.id,
                "start_time": start_time,
                "end_time": end_time,
                **filter_params,
            },
        )

        results = namedtuplefetchall(cursor)

    return [row._asdict() for row in results]
Esempio n. 3
0
    def _get_trends(self) -> List[Dict[str, Any]]:
        serialized: Dict[str, Any] = {"count": 0, "data": [], "days": [], "labels": []}
        with connection.cursor() as cursor:
            qstring = self._build_trends_query(self._filter).as_string(cursor.connection)
            cursor.execute(qstring)
            steps_at_dates = namedtuplefetchall(cursor)

        date_range = get_daterange(
            self._filter.date_from or steps_at_dates[0].date, self._filter.date_to, frequency=self._filter.interval
        )

        data_array = [
            {"date": step.date, "count": round(self._get_last_step_attr(step) / step.step_0_count * 100)}
            for step in steps_at_dates
        ]

        if self._filter.interval == "week":
            for df in data_array:
                df["date"] -= timedelta(days=df["date"].weekday() + 1)
        elif self._filter.interval == "month":
            for df in data_array:
                df["date"] = df["date"].replace(day=1)
        for df in data_array:
            df["date"] = df["date"].replace(tzinfo=pytz.utc).isoformat()

        datewise_data = {d["date"]: d["count"] for d in data_array}
        values = [(key, datewise_data.get(key.isoformat(), 0)) for key in date_range]

        for item in values:
            serialized["days"].append(item[0])
            serialized["data"].append(item[1])
            serialized["labels"].append(format_label_date(item[0], self._filter.interval))
        return [serialized]
Esempio n. 4
0
    def _execute_sql(self, filter: RetentionFilter, team: Team,) -> Dict[Tuple[int, int], Dict[str, Any]]:
        format_fields, params = self._determine_query_params(filter, team)

        final_query = """
            SELECT
                {fields}
                COUNT(DISTINCT "events"."person_id"),
                array_agg(DISTINCT "events"."person_id") as people
            FROM ({event_query}) events
            LEFT JOIN ({reference_event_query}) first_event_date
              ON (events.person_id = first_event_date.person_id)
            WHERE event_date >= first_date
            AND {target_condition} AND {return_condition}
            OR ({target_condition} AND event_date = first_date)
            GROUP BY date, first_date
        """.format(
            **format_fields
        )

        with connection.cursor() as cursor:
            cursor.execute(final_query, params)
            data = namedtuplefetchall(cursor)

            by_dates = {}
            for row in data:
                by_dates.update({(int(row.first_date), int(row.date)): {"count": row.count}})

        return by_dates
Esempio n. 5
0
 def run(self, *args, **kwargs) -> SessionRecordingQueryResult:
     with connection.cursor() as cursor:
         query, query_params = self._build_query()
         cursor.execute(query, query_params)
         query_results = namedtuplefetchall(cursor)
     session_recordings = self._data_to_return(query_results)
     return self._paginate_results(session_recordings)
Esempio n. 6
0
    def run(self, *args, **kwargs) -> List[Dict[str, Any]]:
        """
        Builds and runs a query to get all persons that have been in the funnel
        steps defined by `self._filter.entities`. For example, entities may be
        defined as:

            1. event with event name "user signed up"
            2. event with event name "user looked at report"

        For a person to match they have to have gone through all `entities` in
        order. We also only return one such chain of entities, the earliest one
        we find.
        """

        # If no steps are defined, then there's no point in querying the database
        if len(self._filter.entities) == 0:
            return []

        if self._filter.display == TRENDS_LINEAR:
            return self._get_trends()

        with connection.cursor() as cursor:
            # Then we build a query to query for them in order
            qstring = self._build_query(within_time=None)

            cursor.execute(qstring)
            results = namedtuplefetchall(cursor)
        return self.data_to_return(results)
Esempio n. 7
0
    def run(self, *args, **kwargs) -> List[Dict[str, Any]]:
        if len(self._filter.entities) == 0:
            return []

        if self._filter.display == TRENDS_LINEAR:
            return self._get_trends()

        with connection.cursor() as cursor:
            qstring = self._build_query(self._gen_lateral_bodies()).as_string(cursor.connection)
            cursor.execute(qstring)
            results = namedtuplefetchall(cursor)
        return self.data_to_return(results)
Esempio n. 8
0
def fetch_sql(sql_: str, params: Tuple[Any, ...]) -> List[Any]:
    with connection.cursor() as cursor:
        cursor.execute(sql.SQL(sql_), params)
        return namedtuplefetchall(cursor)
Esempio n. 9
0
def status_report() -> None:
    period_start, period_end = get_previous_week()
    report: Dict[str, Any] = {
        "posthog_version": VERSION,
        "period": {
            "start_inclusive": period_start.isoformat(),
            "end_inclusive": period_end.isoformat()
        },
    }
    report["users_who_logged_in"] = [{
        "id": user.id,
        "distinct_id": user.distinct_id
    } if user.anonymize_data else {
        "id": user.id,
        "distinct_id": user.distinct_id,
        "first_name": user.first_name,
        "email": user.email
    } for user in User.objects.filter(last_login__gte=period_start)]
    report["teams"] = {}
    for team in Team.objects.all():
        team_report: Dict[str, Any] = {}
        events_considered_total = Event.objects.filter(team_id=team.id)
        events_considered_new_in_period = events_considered_total.filter(
            created_at__gte=period_start,
            created_at__lte=period_end,
        )
        persons_considered_total = Event.objects.filter(team_id=team.id)
        persons_considered_total_new_in_period = persons_considered_total.filter(
            created_at__gte=period_start,
            created_at__lte=period_end,
        )
        team_report["events_count_total"] = events_considered_total.count()
        team_report[
            "events_count_new_in_period"] = events_considered_new_in_period.count(
            )
        team_report["persons_count_total"] = persons_considered_total.count()
        team_report[
            "persons_count_new_in_period"] = persons_considered_total_new_in_period.count(
            )

        with connection.cursor() as cursor:
            cursor.execute(
                sql.SQL("""
                SELECT COUNT(DISTINCT person_id) as persons_count
                FROM posthog_event JOIN posthog_persondistinctid ON (posthog_event.distinct_id = posthog_persondistinctid.distinct_id) WHERE posthog_event.team_id = %s AND posthog_event.created_at >= %s AND posthog_event.created_at < %s
            """),
                (team.id, report["period"]["start_inclusive"],
                 report["period"]["end_exclusive"]),
            )
            team_report["persons_count_active_in_period"] = cursor.fetchone(
            )[0]
            cursor.execute(
                sql.SQL("""
                SELECT properties->>'$lib' as lib, COUNT(*) as count
                FROM posthog_event WHERE team_id = %s AND created_at >= %s AND created_at < %s GROUP BY lib
            """),
                (team.id, report["period"]["start_inclusive"],
                 report["period"]["end_exclusive"]),
            )
            team_report["events_count_by_lib"] = {
                result.lib: result.count
                for result in namedtuplefetchall(cursor)
            }
            cursor.execute(
                sql.SQL("""
                SELECT event as name, COUNT(*) as count
                FROM posthog_event WHERE team_id = %s AND created_at >= %s AND created_at < %s GROUP BY name
            """),
                (team.id, report["period"]["start_inclusive"],
                 report["period"]["end_exclusive"]),
            )
            team_report["events_count_by_name"] = {
                result.name: result.count
                for result in namedtuplefetchall(cursor)
            }
        report["teams"][team.id] = team_report
    posthoganalytics.api_key = "sTMFPsFhdP1Ssg"
    disabled = posthoganalytics.disabled
    posthoganalytics.disabled = False
    posthoganalytics.capture(get_machine_id(), "instance status report",
                             report)
    posthoganalytics.disabled = disabled
Esempio n. 10
0
    def _execute_sql(
        self,
        filter: RetentionFilter,
        team: Team,
    ) -> Dict[Tuple[int, int], Dict[str, Any]]:

        period = filter.period
        is_first_time_retention = filter.retention_type == RETENTION_FIRST_TIME

        events: QuerySet = QuerySet()
        entity_condition, entity_condition_strigified = self.get_entity_condition(
            filter.target_entity, "first_event_date")
        returning_condition, returning_condition_stringified = self.get_entity_condition(
            filter.returning_entity, "events")
        events = Event.objects.filter(team_id=team.pk).add_person_id(
            team.pk).annotate(event_date=F("timestamp"))

        trunc, fields = self._get_trunc_func("timestamp", period)

        if is_first_time_retention:
            filtered_events = events.filter(
                filter.properties_to_Q(team_id=team.pk))
            first_date = (filtered_events.filter(entity_condition).values(
                "person_id", "event",
                "action").annotate(first_date=Min(trunc)).filter(
                    filter.custom_date_filter_Q("first_date")).distinct())
            final_query = (filtered_events.filter(
                filter.date_filter_Q).filter(returning_condition).values_list(
                    "person_id", "event_date", "event", "action").union(
                        first_date.values_list("first_date", "person_id",
                                               "event", "action")))
        else:
            filtered_events = events.filter(filter.date_filter_Q).filter(
                filter.properties_to_Q(team_id=team.pk))
            first_date = (filtered_events.filter(entity_condition).annotate(
                first_date=trunc).values("first_date", "person_id", "event",
                                         "action").distinct())

            final_query = (
                filtered_events.filter(returning_condition).values_list(
                    "person_id", "event_date", "event", "action").union(
                        first_date.values_list("first_date", "person_id",
                                               "event", "action")))

        event_query, events_query_params = final_query.query.sql_with_params()
        reference_event_query, first_date_params = first_date.query.sql_with_params(
        )

        final_query = """
            SELECT
                {fields}
                COUNT(DISTINCT "events"."person_id"),
                array_agg(DISTINCT "events"."person_id") as people
            FROM ({event_query}) events
            LEFT JOIN ({reference_event_query}) first_event_date
              ON (events.person_id = first_event_date.person_id)
            WHERE event_date >= first_date
            AND {target_condition} AND {return_condition}
            OR ({target_condition} AND event_date = first_date)
            GROUP BY date, first_date
        """.format(
            event_query=event_query,
            reference_event_query=reference_event_query,
            fields=fields,
            return_condition=returning_condition_stringified,
            target_condition=entity_condition_strigified,
        )
        event_params = (filter.target_entity.id, filter.returning_entity.id,
                        filter.target_entity.id)

        start_params = ((filter.date_from, filter.date_from)
                        if period == "Month" or period == "Hour" else
                        (filter.date_from, ))

        with connection.cursor() as cursor:
            cursor.execute(
                final_query,
                start_params + events_query_params + first_date_params +
                event_params,
            )
            data = namedtuplefetchall(cursor)

            scores: dict = {}
            for datum in data:
                key = round(datum.first_date, 1)
                if not scores.get(key, None):
                    scores.update({key: {}})
                for person in datum.people:
                    if not scores[key].get(person, None):
                        scores[key].update({person: 1})
                    else:
                        scores[key][person] += 1

        by_dates = {}
        for row in data:
            people = sorted(
                row.people,
                key=lambda p: scores[round(row.first_date, 1)][int(p)],
                reverse=True,
            )

            random_key = "".join(
                random.SystemRandom().choice(string.ascii_uppercase +
                                             string.digits) for _ in range(10))
            cache_key = generate_cache_key("{}{}{}".format(
                random_key, str(round(row.first_date, 0)), str(team.pk)))
            cache.set(
                cache_key,
                people,
                600,
            )
            by_dates.update({
                (int(row.first_date), int(row.date)): {
                    "count": row.count,
                    "people": people[0:100],
                    "offset": 100,
                    "next": cache_key if len(people) > 100 else None,
                }
            })

        return by_dates