def sessions(self, request: request.Request) -> response.Response: team = self.request.user.team_set.get() date_filter = request_to_date_query(request.GET.dict()) if not date_filter.get('timestamp__gte'): date_filter['timestamp__gte'] = Event.objects.filter(team=team)\ .order_by('timestamp')[0]\ .timestamp\ .replace(hour=0, minute=0, second=0, microsecond=0) if not date_filter.get('timestamp__lte'): date_filter['timestamp__lte'] = now() events = self.get_queryset().filter(**date_filter) session_type = self.request.GET.get('session') calculated = [] # get compared period compare = request.GET.get('compare') if compare and request.GET.get('date_from') != 'all': calculated = self.calculate_sessions(events, session_type, date_filter) calculated = self._convert_to_comparison(calculated, 'current') compared_calculated = self._handle_compared( date_filter, session_type) converted_compared_calculated = self._convert_to_comparison( compared_calculated, 'previous') calculated.extend(converted_compared_calculated) else: calculated = self.calculate_sessions(events, session_type, date_filter) return response.Response(calculated)
def get_steps(self) -> List[Dict[str, Any]]: funnel_steps = self.filters.get('actions', []) + self.filters.get( 'events', []) funnel_steps = sorted(funnel_steps, key=lambda step: step['order']) people = Person.objects.all()\ .filter( team_id=self.team_id, persondistinctid__distinct_id__isnull=False )\ .annotate(**self._annotate_steps( team_id=self.team_id, funnel_steps=funnel_steps, date_query=request_to_date_query(self.filters) ))\ .filter(step_0__isnull=False)\ .distinct('pk') steps = [] for index, funnel_step in enumerate(funnel_steps): relevant_people = [ person.id for person in people if getattr(person, 'step_{}'.format(index)) ] steps.append(self._serialize_step(funnel_step, relevant_people)) if len(steps) > 0: for index, _ in enumerate(steps): steps[index]['people'] = self._order_people_in_step( steps, steps[index]['people'])[0:100] return steps
def get_steps(self) -> List[Dict[str, Any]]: funnel_steps = self.filters.get("actions", []) + self.filters.get( "events", []) properties = self.filters.get("properties", []) funnel_steps = sorted(funnel_steps, key=lambda step: step["order"]) people = (Person.objects.all().filter( team_id=self.team_id, persondistinctid__distinct_id__isnull=False).annotate( **self._annotate_steps( team_id=self.team_id, funnel_steps=funnel_steps, date_query=request_to_date_query(self.filters), properties=properties, )).filter(step_0__isnull=False).distinct("pk")) steps = [] for index, funnel_step in enumerate(funnel_steps): relevant_people = [ person.id for person in people if getattr(person, "step_{}".format(index)) ] steps.append(self._serialize_step(funnel_step, relevant_people)) if len(steps) > 0: for index, _ in enumerate(steps): steps[index]["people"] = self._order_people_in_step( steps, steps[index]["people"])[0:100] return steps
def get_list(self, request): team = self.team date_query = request_to_date_query(request.GET, exact=False) filter = PathFilter(request=request) start_point = request.GET.get("start") request_type = request.GET.get("type", None) resp = paths.Paths().run( filter=filter, start_point=start_point, date_query=date_query, request_type=request_type, team=team, ) return resp
def list(self, request): team = request.user.team_set.get() resp = [] date_query = request_to_date_query(request.GET) sessions = Event.objects.filter( team=team, event='$pageview', **date_query )\ .annotate(previous_timestamp=Window( expression=Lag('timestamp', default=None), partition_by=F('distinct_id'), order_by=F('timestamp').asc() )) sessions_sql, sessions_sql_params = sessions.query.sql_with_params() cursor = connection.cursor() cursor.execute( '\ SELECT source_event, target_event, count(*) from (\ SELECT event_number || \'_\' || current_url as target_event,LAG(event_number || \'_\' || current_url, 1) OVER (\ PARTITION BY session\ ) AS source_event from \ (\ SELECT properties->> \'$current_url\' as current_url, sessionified.session\ ,ROW_NUMBER() OVER (\ PARTITION BY distinct_id\ ,session ORDER BY timestamp\ ) AS event_number\ FROM (\ SELECT events_notated.*, SUM(new_session) OVER (\ ORDER BY distinct_id\ ,timestamp\ ) AS session\ FROM (\ SELECT *, CASE WHEN EXTRACT(\'EPOCH\' FROM (timestamp - previous_timestamp)) >= (60 * 30) OR previous_timestamp IS NULL THEN 1 ELSE 0 END AS new_session\ FROM ({}) AS inner_sessions \ ) as events_notated \ ) as sessionified\ ) as final\ where event_number <= 4\ ) as counts\ where source_event is not null and target_event is not null and SUBSTRING(source_event, 3) != SUBSTRING(target_event, 3)\ group by source_event, target_event order by count desc limit 15\ '.format(sessions_sql), sessions_sql_params) rows = cursor.fetchall() for row in rows: resp.append({'source': row[0], 'target': row[1], 'value': row[2]}) resp = sorted(resp, key=lambda x: x['value'], reverse=True) return Response(resp)
def list(self, request): team = request.user.team_set.get() date_query = request_to_date_query(request.GET, exact=False) filter = Filter(request=request) start_point = request.GET.get("start") request_type = request.GET.get("type", None) resp = paths.Paths().run(filter=filter, start_point=start_point, date_query=date_query, request_type=request_type, team=team) return Response(resp)
def sessions(self, request: request.Request) -> response.Response: team = self.request.user.team_set.get() session_type = self.request.GET.get('session') date_filter = request_to_date_query(request.GET.dict(), exact=True) if not date_filter.get('timestamp__gte'): date_filter['timestamp__gte'] = Event.objects.filter(team=team)\ .order_by('timestamp')[0]\ .timestamp\ .replace(hour=0, minute=0, second=0, microsecond=0) if not date_filter.get('timestamp__lte'): date_filter['timestamp__lte'] = now() events = self.get_queryset() if session_type is not None: events = events.filter(**date_filter) calculated = [] # get compared period compare = request.GET.get('compare') result: Dict[str, Any] = {'result': []} if compare and request.GET.get( 'date_from') != 'all' and session_type == 'avg': calculated = self.calculate_sessions(events, session_type, date_filter, team, request) calculated = self._convert_to_comparison(calculated, 'current') compared_events = self._handle_compared(date_filter) compared_calculated = self.calculate_sessions( compared_events, session_type, date_filter, team, request) converted_compared_calculated = self._convert_to_comparison( compared_calculated, 'previous') calculated.extend(converted_compared_calculated) else: calculated = self.calculate_sessions(events, session_type, date_filter, team, request) result.update({'result': calculated}) # add pagination if session_type is None: offset = int(request.GET.get('offset', '0')) + 50 if len(calculated) > 49: date_from = calculated[0]['start_time'].isoformat() result.update({'offset': offset}) result.update({'date_from': date_from}) return response.Response(result)
def sessions(self, request: request.Request) -> response.Response: team = self.request.user.team_set.get() session_type = self.request.GET.get("session") date_filter = request_to_date_query(request.GET.dict(), exact=True) if not date_filter.get("timestamp__gte"): date_filter["timestamp__gte"] = (Event.objects.filter( team=team).order_by("timestamp")[0].timestamp.replace( hour=0, minute=0, second=0, microsecond=0)) if not date_filter.get("timestamp__lte"): date_filter["timestamp__lte"] = now() events = self.get_queryset() if session_type is not None: events = events.filter(**date_filter) calculated = [] # get compared period compare = request.GET.get("compare") result: Dict[str, Any] = {"result": []} if compare and request.GET.get( "date_from") != "all" and session_type == "avg": calculated = self.calculate_sessions(events, session_type, date_filter, team, request) calculated = self._convert_to_comparison(calculated, "current") compared_events = self._handle_compared(date_filter) compared_calculated = self.calculate_sessions( compared_events, session_type, date_filter, team, request) converted_compared_calculated = self._convert_to_comparison( compared_calculated, "previous") calculated.extend(converted_compared_calculated) else: calculated = self.calculate_sessions(events, session_type, date_filter, team, request) result.update({"result": calculated}) # add pagination if session_type is None: offset = int(request.GET.get("offset", "0")) + 50 if len(calculated) > 49: date_from = calculated[0]["start_time"].isoformat() result.update({"offset": offset}) result.update({"date_from": date_from}) return response.Response(result)
def list(self, request): team = request.user.team_set.get() resp = [] date_query = request_to_date_query(request.GET) aggregate: QuerySet[ PersonDistinctId] = PersonDistinctId.objects.filter(team=team) aggregate = self._add_event_and_url_at_position( aggregate, team, 1, date_query) urls: List[str] = [] for index in range(1, 4): aggregate = self._add_event_and_url_at_position( aggregate, team, index + 1, date_query) first_url_key = 'url_{}'.format(index) second_url_key = 'url_{}'.format(index + 1) rows = aggregate\ .filter( **({'{}__in'.format(first_url_key): urls} if urls else {}), **{'{}__isnull'.format(second_url_key): False} )\ .values( first_url_key, second_url_key )\ .annotate(count=Count('pk'))\ .order_by('-count')[0: 6] urls = [] for row in rows: resp.append({ 'source': '{}_{}'.format(index, row[first_url_key]), 'target': '{}_{}'.format(index + 1, row[second_url_key]), 'value': row['count'] }) urls.append(row[second_url_key]) resp = sorted(resp, key=lambda x: x['value'], reverse=True) return Response(resp)
def get_steps(self, funnel: Funnel) -> List[Dict[str, Any]]: # for some reason, rest_framework executes SerializerMethodField multiple times, # causing lots of slow queries. # Seems a known issue: https://stackoverflow.com/questions/55023511/serializer-being-called-multiple-times-django-python if hasattr(funnel, 'steps_cache'): return [] funnel.steps_cache = True # type: ignore funnel_steps = funnel.steps.all().order_by('order').prefetch_related('action') if self.context['view'].action != 'retrieve' or self.context['request'].GET.get('exclude_count'): return [self._serialize_step(step) for step in funnel_steps] if len(funnel_steps) == 0: return [] people = Person.objects.all()\ .filter( team_id=funnel.team_id, persondistinctid__distinct_id__isnull=False )\ .annotate(**self._annotate_steps( team_id=funnel.team_id, funnel_steps=funnel_steps, date_query=request_to_date_query(self.context['request']) ))\ .filter(step_0__isnull=False)\ .distinct('pk') steps = [] for index, funnel_step in enumerate(funnel_steps): relevant_people = [person.id for person in people if getattr(person, 'step_{}'.format(index))] steps.append(self._serialize_step(funnel_step, relevant_people)) if len(steps) > 0: for index, _ in enumerate(steps): steps[index]['people'] = self._order_people_in_step(steps, steps[index]['people'])[0:100] return steps
def calculate_paths(self, filter: PathFilter, team: Team): date_query = request_to_date_query({"date_from": filter._date_from, "date_to": filter._date_to}, exact=False) resp = [] prop_type = filter.prop_type event, event_filter = filter.target_event start_comparator = filter.comparator sessions = ( Event.objects.add_person_id(team.pk) .filter(team=team, **(event_filter), **date_query) .filter( ~Q(event__in=["$autocapture", "$pageview", "$identify", "$pageleave", "$screen"]) if event is None else Q() ) .filter( properties_to_Q(filter.properties, team_id=team.pk, filter_test_accounts=filter.filter_test_accounts) if filter and (filter.properties or filter.filter_test_accounts) else Q() ) .annotate( previous_timestamp=Window( expression=Lag("timestamp", default=None), partition_by=F("person_id"), order_by=F("timestamp").asc(), ) ) ) sessions_sql, sessions_sql_params = sessions.query.sql_with_params() if event == "$autocapture": sessions_sql = self._add_elements(query_string=sessions_sql) events_notated = "\ SELECT *, CASE WHEN EXTRACT('EPOCH' FROM (timestamp - previous_timestamp)) >= (60 * 30) OR previous_timestamp IS NULL THEN 1 ELSE 0 END AS new_session\ FROM ({}) AS inner_sessions\ ".format( sessions_sql ) sessionified = "\ SELECT events_notated.*, SUM(new_session) OVER (\ ORDER BY person_id\ ,timestamp\ ) AS session\ FROM ({}) as events_notated\ ".format( events_notated ) if filter and filter.start_point: sessionified = self._apply_start_point( start_comparator=start_comparator, query_string=sessionified, start_point=filter.start_point, ) final = "\ SELECT {} as path_type, id, sessionified.session\ ,ROW_NUMBER() OVER (\ PARTITION BY person_id\ ,session ORDER BY timestamp\ ) AS event_number\ FROM ({}) as sessionified\ ".format( prop_type, sessionified ) counts = "\ SELECT event_number || '_' || path_type as target_event, id as target_id, LAG(event_number || '_' || path_type, 1) OVER (\ PARTITION BY session\ ) AS source_event , LAG(id, 1) OVER (\ PARTITION BY session\ ) AS source_id from \ ({}) as final\ where event_number <= 4\ ".format( final ) query = "\ SELECT source_event, target_event, MAX(target_id), MAX(source_id), count(*) from ({}) as counts\ where source_event is not null and target_event is not null\ group by source_event, target_event order by count desc limit 20\ ".format( counts ) cursor = connection.cursor() cursor.execute(query, sessions_sql_params) rows = cursor.fetchall() for row in rows: resp.append( {"source": row[0], "target": row[1], "target_id": row[2], "source_id": row[3], "value": row[4],} ) resp = sorted(resp, key=lambda x: x["value"], reverse=True) return resp
def list(self, request): team = request.user.team_set.get() resp = [] date_query = request_to_date_query(request.GET) event, path_type = self._determine_path_type(request) sessions = Event.objects.filter( team=team, **({"event":event} if event else {'event__regex':'^[^\$].*'}), #anything without $ (default) **date_query )\ .annotate(previous_timestamp=Window( expression=Lag('timestamp', default=None), partition_by=F('distinct_id'), order_by=F('timestamp').asc() )) sessions_sql, sessions_sql_params = sessions.query.sql_with_params() if event == "$autocapture": element = 'SELECT \'<\'|| e."tag_name" || \'> \' || e."text" as tag_name_source, e."text" as text_source FROM "posthog_element" e JOIN \ ( SELECT group_id, MIN("posthog_element"."order") as minOrder FROM "posthog_element" GROUP BY group_id) e2 ON e.order = e2.minOrder AND e.group_id = e2.group_id where e.group_id = v2.group_id' element_group = 'SELECT g."id" as group_id FROM "posthog_elementgroup" g where v1."elements_hash" = g."hash"' sessions_sql = 'SELECT * FROM ({}) as v1 JOIN LATERAL ({}) as v2 on true JOIN LATERAL ({}) as v3 on true'.format( sessions_sql, element_group, element) cursor = connection.cursor() cursor.execute( '\ SELECT source_event, target_event, MAX(target_id), MAX(source_id), count(*) from (\ SELECT event_number || \'_\' || path_type as target_event, id as target_id, LAG(event_number || \'_\' || path_type, 1) OVER (\ PARTITION BY session\ ) AS source_event , LAG(id, 1) OVER (\ PARTITION BY session\ ) AS source_id from \ (\ SELECT {} as path_type, id, sessionified.session\ ,ROW_NUMBER() OVER (\ PARTITION BY distinct_id\ ,session ORDER BY timestamp\ ) AS event_number\ FROM (\ SELECT events_notated.*, SUM(new_session) OVER (\ ORDER BY distinct_id\ ,timestamp\ ) AS session\ FROM (\ SELECT *, CASE WHEN EXTRACT(\'EPOCH\' FROM (timestamp - previous_timestamp)) >= (60 * 30) OR previous_timestamp IS NULL THEN 1 ELSE 0 END AS new_session\ FROM ({}) AS inner_sessions \ ) as events_notated \ ) as sessionified\ ) as final\ where event_number <= 4\ ) as counts\ where source_event is not null and target_event is not null and SUBSTRING(source_event, 3) != SUBSTRING(target_event, 3)\ group by source_event, target_event order by count desc limit 15\ '.format(path_type, sessions_sql), sessions_sql_params) rows = cursor.fetchall() for row in rows: resp.append({ 'source': row[0], 'target': row[1], 'target_id': row[2], 'source_id': row[3], 'value': row[4] }) resp = sorted(resp, key=lambda x: x['value'], reverse=True) return Response(resp)
def sessions(self, request: request.Request) -> response.Response: events = self.get_queryset().filter( **request_to_date_query(request.GET.dict())) session_type = self.request.GET.get('session') calculated = self.calculate_sessions(events, session_type) return response.Response(calculated)
def list(self, request): team = request.user.team_set.get() resp = [] date_query = request_to_date_query(request.GET) event, path_type, event_filter, start_comparator = self._determine_path_type( request) properties = request.GET.get('properties') start_point = request.GET.get('start') sessions = Event.objects.add_person_id(team.pk).filter( team=team, **(event_filter), **date_query )\ .filter(~Q(event__in=['$autocapture', '$pageview', '$identify', '$pageleave']) if event is None else Q())\ .filter(Filter(data={'properties': json.loads(properties)}).properties_to_Q() if properties else Q())\ .annotate(previous_timestamp=Window( expression=Lag('timestamp', default=None), partition_by=F('distinct_id'), order_by=F('timestamp').asc() )) sessions_sql, sessions_sql_params = sessions.query.sql_with_params() if event == "$autocapture": sessions_sql = self._add_elements(query_string=sessions_sql) events_notated = '\ SELECT *, CASE WHEN EXTRACT(\'EPOCH\' FROM (timestamp - previous_timestamp)) >= (60 * 30) OR previous_timestamp IS NULL THEN 1 ELSE 0 END AS new_session\ FROM ({}) AS inner_sessions\ '.format(sessions_sql) sessionified = '\ SELECT events_notated.*, SUM(new_session) OVER (\ ORDER BY distinct_id\ ,timestamp\ ) AS session\ FROM ({}) as events_notated\ '.format(events_notated) if start_point: sessionified = self._apply_start_point( start_comparator=start_comparator, query_string=sessionified, start_point=start_point) final = '\ SELECT {} as path_type, id, sessionified.session\ ,ROW_NUMBER() OVER (\ PARTITION BY distinct_id\ ,session ORDER BY timestamp\ ) AS event_number\ FROM ({}) as sessionified\ '.format(path_type, sessionified) counts = '\ SELECT event_number || \'_\' || path_type as target_event, id as target_id, LAG(event_number || \'_\' || path_type, 1) OVER (\ PARTITION BY session\ ) AS source_event , LAG(id, 1) OVER (\ PARTITION BY session\ ) AS source_id from \ ({}) as final\ where event_number <= 4\ '.format(final) cursor = connection.cursor() cursor.execute( '\ SELECT source_event, target_event, MAX(target_id), MAX(source_id), count(*) from ({}) as counts\ where source_event is not null and target_event is not null\ group by source_event, target_event order by count desc limit 20\ '.format(counts), sessions_sql_params) rows = cursor.fetchall() for row in rows: resp.append({ 'source': row[0], 'target': row[1], 'target_id': row[2], 'source_id': row[3], 'value': row[4] }) resp = sorted(resp, key=lambda x: x['value'], reverse=True) return Response(resp)