コード例 #1
0
    def sessions(self, request: request.Request) -> response.Response:
        team = self.request.user.team_set.get()
        date_filter = request_to_date_query(request.GET.dict())

        if not date_filter.get('timestamp__gte'):
            date_filter['timestamp__gte'] = Event.objects.filter(team=team)\
               .order_by('timestamp')[0]\
               .timestamp\
               .replace(hour=0, minute=0, second=0, microsecond=0)

        if not date_filter.get('timestamp__lte'):
            date_filter['timestamp__lte'] = now()

        events = self.get_queryset().filter(**date_filter)

        session_type = self.request.GET.get('session')
        calculated = []

        # get compared period
        compare = request.GET.get('compare')
        if compare and request.GET.get('date_from') != 'all':
            calculated = self.calculate_sessions(events, session_type,
                                                 date_filter)
            calculated = self._convert_to_comparison(calculated, 'current')
            compared_calculated = self._handle_compared(
                date_filter, session_type)
            converted_compared_calculated = self._convert_to_comparison(
                compared_calculated, 'previous')
            calculated.extend(converted_compared_calculated)
        else:
            calculated = self.calculate_sessions(events, session_type,
                                                 date_filter)

        return response.Response(calculated)
コード例 #2
0
    def get_steps(self) -> List[Dict[str, Any]]:
        funnel_steps = self.filters.get('actions', []) + self.filters.get(
            'events', [])
        funnel_steps = sorted(funnel_steps, key=lambda step: step['order'])
        people = Person.objects.all()\
            .filter(
                team_id=self.team_id,
                persondistinctid__distinct_id__isnull=False
            )\
            .annotate(**self._annotate_steps(
                team_id=self.team_id,
                funnel_steps=funnel_steps,
                date_query=request_to_date_query(self.filters)
            ))\
            .filter(step_0__isnull=False)\
            .distinct('pk')

        steps = []
        for index, funnel_step in enumerate(funnel_steps):
            relevant_people = [
                person.id for person in people
                if getattr(person, 'step_{}'.format(index))
            ]
            steps.append(self._serialize_step(funnel_step, relevant_people))

        if len(steps) > 0:
            for index, _ in enumerate(steps):
                steps[index]['people'] = self._order_people_in_step(
                    steps, steps[index]['people'])[0:100]
        return steps
コード例 #3
0
    def get_steps(self) -> List[Dict[str, Any]]:
        funnel_steps = self.filters.get("actions", []) + self.filters.get(
            "events", [])
        properties = self.filters.get("properties", [])
        funnel_steps = sorted(funnel_steps, key=lambda step: step["order"])
        people = (Person.objects.all().filter(
            team_id=self.team_id,
            persondistinctid__distinct_id__isnull=False).annotate(
                **self._annotate_steps(
                    team_id=self.team_id,
                    funnel_steps=funnel_steps,
                    date_query=request_to_date_query(self.filters),
                    properties=properties,
                )).filter(step_0__isnull=False).distinct("pk"))

        steps = []
        for index, funnel_step in enumerate(funnel_steps):
            relevant_people = [
                person.id for person in people
                if getattr(person, "step_{}".format(index))
            ]
            steps.append(self._serialize_step(funnel_step, relevant_people))

        if len(steps) > 0:
            for index, _ in enumerate(steps):
                steps[index]["people"] = self._order_people_in_step(
                    steps, steps[index]["people"])[0:100]
        return steps
コード例 #4
0
ファイル: paths.py プロジェクト: GalDayan/posthog
 def get_list(self, request):
     team = self.team
     date_query = request_to_date_query(request.GET, exact=False)
     filter = PathFilter(request=request)
     start_point = request.GET.get("start")
     request_type = request.GET.get("type", None)
     resp = paths.Paths().run(
         filter=filter, start_point=start_point, date_query=date_query, request_type=request_type, team=team,
     )
     return resp
コード例 #5
0
ファイル: paths.py プロジェクト: hupratt/posthog
    def list(self, request):
        team = request.user.team_set.get()
        resp = []
        date_query = request_to_date_query(request.GET)

        sessions = Event.objects.filter(
                team=team,
                event='$pageview',
                **date_query
            )\
            .annotate(previous_timestamp=Window(
                expression=Lag('timestamp', default=None),
                partition_by=F('distinct_id'),
                order_by=F('timestamp').asc()
            ))

        sessions_sql, sessions_sql_params = sessions.query.sql_with_params()

        cursor = connection.cursor()
        cursor.execute(
            '\
        SELECT source_event, target_event, count(*) from (\
            SELECT event_number || \'_\' || current_url as target_event,LAG(event_number || \'_\' || current_url, 1) OVER (\
                            PARTITION BY session\
                            ) AS source_event from \
        (\
            SELECT properties->> \'$current_url\' as current_url, sessionified.session\
                ,ROW_NUMBER() OVER (\
                        PARTITION BY distinct_id\
                        ,session ORDER BY timestamp\
                        ) AS event_number\
        FROM (\
            SELECT events_notated.*, SUM(new_session) OVER (\
                ORDER BY distinct_id\
                        ,timestamp\
                ) AS session\
            FROM (\
                SELECT *, CASE WHEN EXTRACT(\'EPOCH\' FROM (timestamp - previous_timestamp)) >= (60 * 30) OR previous_timestamp IS NULL THEN 1 ELSE 0 END AS new_session\
                FROM ({}) AS inner_sessions \
            ) as events_notated \
        ) as sessionified\
        ) as final\
        where event_number <= 4\
        ) as counts\
        where source_event is not null and target_event is not null and SUBSTRING(source_event, 3) != SUBSTRING(target_event, 3)\
        group by source_event, target_event order by count desc limit 15\
        '.format(sessions_sql), sessions_sql_params)
        rows = cursor.fetchall()

        for row in rows:
            resp.append({'source': row[0], 'target': row[1], 'value': row[2]})

        resp = sorted(resp, key=lambda x: x['value'], reverse=True)
        return Response(resp)
コード例 #6
0
ファイル: paths.py プロジェクト: sitedata/posthog
 def list(self, request):
     team = request.user.team_set.get()
     date_query = request_to_date_query(request.GET, exact=False)
     filter = Filter(request=request)
     start_point = request.GET.get("start")
     request_type = request.GET.get("type", None)
     resp = paths.Paths().run(filter=filter,
                              start_point=start_point,
                              date_query=date_query,
                              request_type=request_type,
                              team=team)
     return Response(resp)
コード例 #7
0
    def sessions(self, request: request.Request) -> response.Response:
        team = self.request.user.team_set.get()
        session_type = self.request.GET.get('session')

        date_filter = request_to_date_query(request.GET.dict(), exact=True)
        if not date_filter.get('timestamp__gte'):
            date_filter['timestamp__gte'] = Event.objects.filter(team=team)\
               .order_by('timestamp')[0]\
               .timestamp\
               .replace(hour=0, minute=0, second=0, microsecond=0)

        if not date_filter.get('timestamp__lte'):
            date_filter['timestamp__lte'] = now()

        events = self.get_queryset()
        if session_type is not None:
            events = events.filter(**date_filter)

        calculated = []

        # get compared period
        compare = request.GET.get('compare')
        result: Dict[str, Any] = {'result': []}
        if compare and request.GET.get(
                'date_from') != 'all' and session_type == 'avg':
            calculated = self.calculate_sessions(events, session_type,
                                                 date_filter, team, request)
            calculated = self._convert_to_comparison(calculated, 'current')
            compared_events = self._handle_compared(date_filter)
            compared_calculated = self.calculate_sessions(
                compared_events, session_type, date_filter, team, request)
            converted_compared_calculated = self._convert_to_comparison(
                compared_calculated, 'previous')
            calculated.extend(converted_compared_calculated)
        else:
            calculated = self.calculate_sessions(events, session_type,
                                                 date_filter, team, request)
        result.update({'result': calculated})

        # add pagination
        if session_type is None:
            offset = int(request.GET.get('offset', '0')) + 50
            if len(calculated) > 49:
                date_from = calculated[0]['start_time'].isoformat()
                result.update({'offset': offset})
                result.update({'date_from': date_from})
        return response.Response(result)
コード例 #8
0
    def sessions(self, request: request.Request) -> response.Response:
        team = self.request.user.team_set.get()
        session_type = self.request.GET.get("session")

        date_filter = request_to_date_query(request.GET.dict(), exact=True)
        if not date_filter.get("timestamp__gte"):
            date_filter["timestamp__gte"] = (Event.objects.filter(
                team=team).order_by("timestamp")[0].timestamp.replace(
                    hour=0, minute=0, second=0, microsecond=0))

        if not date_filter.get("timestamp__lte"):
            date_filter["timestamp__lte"] = now()

        events = self.get_queryset()
        if session_type is not None:
            events = events.filter(**date_filter)

        calculated = []

        # get compared period
        compare = request.GET.get("compare")
        result: Dict[str, Any] = {"result": []}
        if compare and request.GET.get(
                "date_from") != "all" and session_type == "avg":
            calculated = self.calculate_sessions(events, session_type,
                                                 date_filter, team, request)
            calculated = self._convert_to_comparison(calculated, "current")
            compared_events = self._handle_compared(date_filter)
            compared_calculated = self.calculate_sessions(
                compared_events, session_type, date_filter, team, request)
            converted_compared_calculated = self._convert_to_comparison(
                compared_calculated, "previous")
            calculated.extend(converted_compared_calculated)
        else:
            calculated = self.calculate_sessions(events, session_type,
                                                 date_filter, team, request)
        result.update({"result": calculated})

        # add pagination
        if session_type is None:
            offset = int(request.GET.get("offset", "0")) + 50
            if len(calculated) > 49:
                date_from = calculated[0]["start_time"].isoformat()
                result.update({"offset": offset})
                result.update({"date_from": date_from})
        return response.Response(result)
コード例 #9
0
ファイル: paths.py プロジェクト: sonoftherock/posthog
    def list(self, request):
        team = request.user.team_set.get()
        resp = []
        date_query = request_to_date_query(request.GET)
        aggregate: QuerySet[
            PersonDistinctId] = PersonDistinctId.objects.filter(team=team)

        aggregate = self._add_event_and_url_at_position(
            aggregate, team, 1, date_query)
        urls: List[str] = []

        for index in range(1, 4):
            aggregate = self._add_event_and_url_at_position(
                aggregate, team, index + 1, date_query)
            first_url_key = 'url_{}'.format(index)
            second_url_key = 'url_{}'.format(index + 1)
            rows = aggregate\
                .filter(
                    **({'{}__in'.format(first_url_key): urls} if urls else {}),
                    **{'{}__isnull'.format(second_url_key): False}
                )\
                .values(
                    first_url_key,
                    second_url_key
                )\
                .annotate(count=Count('pk'))\
                .order_by('-count')[0: 6]

            urls = []
            for row in rows:
                resp.append({
                    'source':
                    '{}_{}'.format(index, row[first_url_key]),
                    'target':
                    '{}_{}'.format(index + 1, row[second_url_key]),
                    'value':
                    row['count']
                })
                urls.append(row[second_url_key])

        resp = sorted(resp, key=lambda x: x['value'], reverse=True)
        return Response(resp)
コード例 #10
0
ファイル: funnel.py プロジェクト: maximmarakov/posthog
    def get_steps(self, funnel: Funnel) -> List[Dict[str, Any]]:
        # for some reason, rest_framework executes SerializerMethodField multiple times,
        # causing lots of slow queries. 
        # Seems a known issue: https://stackoverflow.com/questions/55023511/serializer-being-called-multiple-times-django-python
        if hasattr(funnel, 'steps_cache'):
            return []
        funnel.steps_cache = True # type: ignore

        funnel_steps = funnel.steps.all().order_by('order').prefetch_related('action')
        if self.context['view'].action != 'retrieve' or self.context['request'].GET.get('exclude_count'):
            return [self._serialize_step(step) for step in funnel_steps]

        if len(funnel_steps) == 0:
            return []

        people = Person.objects.all()\
            .filter(
                team_id=funnel.team_id,
                persondistinctid__distinct_id__isnull=False
            )\
            .annotate(**self._annotate_steps(
                team_id=funnel.team_id,
                funnel_steps=funnel_steps,
                date_query=request_to_date_query(self.context['request'])
            ))\
            .filter(step_0__isnull=False)\
            .distinct('pk')

        steps = []
        for index, funnel_step in enumerate(funnel_steps):
            relevant_people = [person.id for person in people if getattr(person, 'step_{}'.format(index))]
            steps.append(self._serialize_step(funnel_step, relevant_people))

        if len(steps) > 0:
            for index, _ in enumerate(steps):
                steps[index]['people'] = self._order_people_in_step(steps, steps[index]['people'])[0:100]
        return steps
コード例 #11
0
    def calculate_paths(self, filter: PathFilter, team: Team):
        date_query = request_to_date_query({"date_from": filter._date_from, "date_to": filter._date_to}, exact=False)
        resp = []
        prop_type = filter.prop_type
        event, event_filter = filter.target_event
        start_comparator = filter.comparator

        sessions = (
            Event.objects.add_person_id(team.pk)
            .filter(team=team, **(event_filter), **date_query)
            .filter(
                ~Q(event__in=["$autocapture", "$pageview", "$identify", "$pageleave", "$screen"])
                if event is None
                else Q()
            )
            .filter(
                properties_to_Q(filter.properties, team_id=team.pk, filter_test_accounts=filter.filter_test_accounts)
                if filter and (filter.properties or filter.filter_test_accounts)
                else Q()
            )
            .annotate(
                previous_timestamp=Window(
                    expression=Lag("timestamp", default=None),
                    partition_by=F("person_id"),
                    order_by=F("timestamp").asc(),
                )
            )
        )

        sessions_sql, sessions_sql_params = sessions.query.sql_with_params()

        if event == "$autocapture":
            sessions_sql = self._add_elements(query_string=sessions_sql)

        events_notated = "\
        SELECT *, CASE WHEN EXTRACT('EPOCH' FROM (timestamp - previous_timestamp)) >= (60 * 30) OR previous_timestamp IS NULL THEN 1 ELSE 0 END AS new_session\
        FROM ({}) AS inner_sessions\
        ".format(
            sessions_sql
        )

        sessionified = "\
        SELECT events_notated.*, SUM(new_session) OVER (\
            ORDER BY person_id\
                    ,timestamp\
            ) AS session\
        FROM ({}) as events_notated\
        ".format(
            events_notated
        )

        if filter and filter.start_point:
            sessionified = self._apply_start_point(
                start_comparator=start_comparator, query_string=sessionified, start_point=filter.start_point,
            )

        final = "\
        SELECT {} as path_type, id, sessionified.session\
            ,ROW_NUMBER() OVER (\
                    PARTITION BY person_id\
                    ,session ORDER BY timestamp\
                    ) AS event_number\
        FROM ({}) as sessionified\
        ".format(
            prop_type, sessionified
        )

        counts = "\
        SELECT event_number || '_' || path_type as target_event, id as target_id, LAG(event_number || '_' || path_type, 1) OVER (\
            PARTITION BY session\
            ) AS source_event , LAG(id, 1) OVER (\
            PARTITION BY session\
            ) AS source_id from \
        ({}) as final\
        where event_number <= 4\
        ".format(
            final
        )

        query = "\
        SELECT source_event, target_event, MAX(target_id), MAX(source_id), count(*) from ({}) as counts\
        where source_event is not null and target_event is not null\
        group by source_event, target_event order by count desc limit 20\
        ".format(
            counts
        )

        cursor = connection.cursor()
        cursor.execute(query, sessions_sql_params)
        rows = cursor.fetchall()

        for row in rows:
            resp.append(
                {"source": row[0], "target": row[1], "target_id": row[2], "source_id": row[3], "value": row[4],}
            )

        resp = sorted(resp, key=lambda x: x["value"], reverse=True)
        return resp
コード例 #12
0
    def list(self, request):
        team = request.user.team_set.get()
        resp = []
        date_query = request_to_date_query(request.GET)
        event, path_type = self._determine_path_type(request)

        sessions = Event.objects.filter(
                team=team,
                **({"event":event} if event else {'event__regex':'^[^\$].*'}), #anything without $ (default)
                **date_query
            )\
            .annotate(previous_timestamp=Window(
                expression=Lag('timestamp', default=None),
                partition_by=F('distinct_id'),
                order_by=F('timestamp').asc()
            ))

        sessions_sql, sessions_sql_params = sessions.query.sql_with_params()

        if event == "$autocapture":
            element = 'SELECT \'<\'|| e."tag_name" || \'> \'  || e."text" as tag_name_source, e."text" as text_source FROM "posthog_element" e JOIN \
                    ( SELECT group_id, MIN("posthog_element"."order") as minOrder FROM "posthog_element" GROUP BY group_id) e2 ON e.order = e2.minOrder AND e.group_id = e2.group_id where e.group_id = v2.group_id'

            element_group = 'SELECT g."id" as group_id FROM "posthog_elementgroup" g where v1."elements_hash" = g."hash"'
            sessions_sql = 'SELECT * FROM ({}) as v1 JOIN LATERAL ({}) as v2 on true JOIN LATERAL ({}) as v3 on true'.format(
                sessions_sql, element_group, element)

        cursor = connection.cursor()
        cursor.execute(
            '\
        SELECT source_event, target_event, MAX(target_id), MAX(source_id), count(*) from (\
            SELECT event_number || \'_\' || path_type as target_event, id as target_id, LAG(event_number || \'_\' || path_type, 1) OVER (\
                            PARTITION BY session\
                            ) AS source_event , LAG(id, 1) OVER (\
                            PARTITION BY session\
                            ) AS source_id from \
        (\
            SELECT {} as path_type, id, sessionified.session\
                ,ROW_NUMBER() OVER (\
                        PARTITION BY distinct_id\
                        ,session ORDER BY timestamp\
                        ) AS event_number\
        FROM (\
            SELECT events_notated.*, SUM(new_session) OVER (\
                ORDER BY distinct_id\
                        ,timestamp\
                ) AS session\
            FROM (\
                SELECT *, CASE WHEN EXTRACT(\'EPOCH\' FROM (timestamp - previous_timestamp)) >= (60 * 30) OR previous_timestamp IS NULL THEN 1 ELSE 0 END AS new_session\
                FROM ({}) AS inner_sessions \
            ) as events_notated \
        ) as sessionified\
        ) as final\
        where event_number <= 4\
        ) as counts\
        where source_event is not null and target_event is not null and SUBSTRING(source_event, 3) != SUBSTRING(target_event, 3)\
        group by source_event, target_event order by count desc limit 15\
        '.format(path_type, sessions_sql), sessions_sql_params)
        rows = cursor.fetchall()

        for row in rows:
            resp.append({
                'source': row[0],
                'target': row[1],
                'target_id': row[2],
                'source_id': row[3],
                'value': row[4]
            })

        resp = sorted(resp, key=lambda x: x['value'], reverse=True)
        return Response(resp)
コード例 #13
0
 def sessions(self, request: request.Request) -> response.Response:
     events = self.get_queryset().filter(
         **request_to_date_query(request.GET.dict()))
     session_type = self.request.GET.get('session')
     calculated = self.calculate_sessions(events, session_type)
     return response.Response(calculated)
コード例 #14
0
    def list(self, request):
        team = request.user.team_set.get()
        resp = []
        date_query = request_to_date_query(request.GET)
        event, path_type, event_filter, start_comparator = self._determine_path_type(
            request)
        properties = request.GET.get('properties')
        start_point = request.GET.get('start')

        sessions = Event.objects.add_person_id(team.pk).filter(
                team=team,
                **(event_filter),
                **date_query
            )\
            .filter(~Q(event__in=['$autocapture', '$pageview', '$identify', '$pageleave']) if event is None else Q())\
            .filter(Filter(data={'properties': json.loads(properties)}).properties_to_Q() if properties else Q())\
            .annotate(previous_timestamp=Window(
                expression=Lag('timestamp', default=None),
                partition_by=F('distinct_id'),
                order_by=F('timestamp').asc()
            ))

        sessions_sql, sessions_sql_params = sessions.query.sql_with_params()

        if event == "$autocapture":
            sessions_sql = self._add_elements(query_string=sessions_sql)

        events_notated = '\
        SELECT *, CASE WHEN EXTRACT(\'EPOCH\' FROM (timestamp - previous_timestamp)) >= (60 * 30) OR previous_timestamp IS NULL THEN 1 ELSE 0 END AS new_session\
        FROM ({}) AS inner_sessions\
        '.format(sessions_sql)

        sessionified = '\
        SELECT events_notated.*, SUM(new_session) OVER (\
            ORDER BY distinct_id\
                    ,timestamp\
            ) AS session\
        FROM ({}) as events_notated\
        '.format(events_notated)

        if start_point:
            sessionified = self._apply_start_point(
                start_comparator=start_comparator,
                query_string=sessionified,
                start_point=start_point)

        final = '\
        SELECT {} as path_type, id, sessionified.session\
            ,ROW_NUMBER() OVER (\
                    PARTITION BY distinct_id\
                    ,session ORDER BY timestamp\
                    ) AS event_number\
        FROM ({}) as sessionified\
        '.format(path_type, sessionified)

        counts = '\
        SELECT event_number || \'_\' || path_type as target_event, id as target_id, LAG(event_number || \'_\' || path_type, 1) OVER (\
            PARTITION BY session\
            ) AS source_event , LAG(id, 1) OVER (\
            PARTITION BY session\
            ) AS source_id from \
        ({}) as final\
        where event_number <= 4\
        '.format(final)

        cursor = connection.cursor()
        cursor.execute(
            '\
        SELECT source_event, target_event, MAX(target_id), MAX(source_id), count(*) from ({}) as counts\
        where source_event is not null and target_event is not null\
        group by source_event, target_event order by count desc limit 20\
        '.format(counts), sessions_sql_params)
        rows = cursor.fetchall()

        for row in rows:
            resp.append({
                'source': row[0],
                'target': row[1],
                'target_id': row[2],
                'source_id': row[3],
                'value': row[4]
            })

        resp = sorted(resp, key=lambda x: x['value'], reverse=True)
        return Response(resp)