def test_filter_test_accounts(self): # 0 seconds person_factory(team_id=self.team.pk, distinct_ids=["2"], properties={"email": "*****@*****.**"}) person_factory( team_id=self.team.pk, distinct_ids=["4"], ) with freeze_time("2012-01-11T01:25:30.000Z"): event_factory(team=self.team, event="1st action", distinct_id="2") event_factory(team=self.team, event="1st action", distinct_id="4") with freeze_time("2012-01-11T01:31:30.000Z"): event_factory(team=self.team, event="1st action", distinct_id="2") with freeze_time("2012-01-11T01:51:30.000Z"): event_factory(team=self.team, event="1st action", distinct_id="4") with freeze_time("2012-01-12T03:40:30.000Z"): response = sessions().run( SessionsFilter( data={ "date_from": "all", "session": "dist", FILTER_TEST_ACCOUNTS: True, "events": [ { "id": "1st action" }, ], }), self.team, ) self.assertEqual(response[6]["count"], 0) self.assertEqual(response[7]["count"], 1) response = sessions().run( SessionsFilter( data={ "interval": "day", "session": "avg", FILTER_TEST_ACCOUNTS: True, "events": [ { "id": "1st action" }, ], }), self.team, ) self.assertEqual(response[0]["data"][6], 26)
def test_sessions_by_distinct_id(self): self.create_large_testset() sessions, _ = self.run_query(SessionsFilter(data={"distinct_id": "88"})) self.assertLength(sessions, 1) self.assertEqual(sessions[0]["distinct_id"], "88") sessions, _ = self.run_query(SessionsFilter(data={"distinct_id": "foobar"})) self.assertLength(sessions, 0)
def test_sessions_list(self): self.create_test_data() response, _ = self.run_query(SessionsFilter(data={"properties": []})) self.assertEqual(len(response), 2) self.assertEqual(response[0]["distinct_id"], "2") response, _ = self.run_query(SessionsFilter(data={"properties": [{"key": "$os", "value": "Mac OS X"}]})) self.assertEqual(len(response), 1) self.assertEqual(response[0]["distinct_id"], "1")
def test_sessions_list(self): self.create_test_data() with freeze_time("2012-01-15T04:01:34.000Z"): response, _ = self.run_query(SessionsFilter(data={"properties": []})) self.assertEqual(len(response), 2) self.assertEqual(response[0]["distinct_id"], "2") response, _ = self.run_query(SessionsFilter(data={"properties": [{"key": "$os", "value": "Mac OS X"}]})) self.assertEqual(len(response), 1)
def test_filter_by_entity_action(self): action1 = _create_action(name="custom-event", team=self.team) action2 = _create_action(name="another-event", team=self.team) self.create_test_data() self.assertLength( self.run_query( SessionsFilter( data={ "filters": [{ "type": "action_type", "key": "id", "value": action1.id }] }))[0], 2, ) self.assertLength( self.run_query( SessionsFilter( data={ "filters": [{ "type": "action_type", "key": "id", "value": action2.id }] }))[0], 1, ) self.assertLength( self.run_query( SessionsFilter( data={ "filters": [{ "type": "action_type", "key": "id", "value": action1.id, "properties": [{ "key": "$os", "value": "Mac OS X" }], }] }))[0], 1, )
def test_filter_with_pagination(self): self.create_large_testset() sessions, pagination = self.run_query( SessionsFilter(data={"filters": [{"type": "person", "key": "email", "value": "*****@*****.**"}]}) ) self.assertLength(sessions, 1) self.assertEqual(sessions[0]["distinct_id"], "99") self.assertIsNone(pagination) sessions, pagination = self.run_query( SessionsFilter( data={ "filters": [ { "type": "event_type", "key": "id", "value": "$pageview", "properties": [{"key": "$some_property", "value": 88}], } ] } ) ) self.assertLength(sessions, 1) self.assertEqual(sessions[0]["distinct_id"], "88") self.assertIsNone(pagination) sessions, pagination = self.run_query( SessionsFilter( data={"filters": [{"type": "recording", "key": "duration", "operator": "gt", "value": 0}]} ) ) self.assertLength(sessions, 1) self.assertEqual(sessions[0]["distinct_id"], "77") self.assertIsNone(pagination) sessions, pagination = self.run_query( SessionsFilter(data={"filters": [{"type": "person", "key": "mod15", "value": 10}]}) ) self.assertEqual([session["distinct_id"] for session in sessions], ["10", "25", "40", "55", "70", "85"]) self.assertIsNone(pagination) sessions, pagination = self.run_query( SessionsFilter(data={"filters": [{"type": "person", "key": "mod4", "value": 3}]}) ) self.assertEqual([session["distinct_id"] for session in sessions], list(map(str, range(3, 42, 4)))) self.assertIsNotNone(pagination)
def test_sessions_avg_length(self): # make sure out of range event doesn't get included with freeze_time("2012-01-01T03:21:34.000Z"): event_factory(team=self.team, event="bad action", distinct_id="1") with freeze_time("2012-01-14T03:21:34.000Z"): event_factory(team=self.team, event="1st action", distinct_id="1") event_factory(team=self.team, event="1st action", distinct_id="2") # 4 minutes with freeze_time("2012-01-14T03:25:34.000Z"): event_factory(team=self.team, event="2nd action", distinct_id="1") event_factory(team=self.team, event="2nd action", distinct_id="2") with freeze_time("2012-01-15T03:59:34.000Z"): event_factory(team=self.team, event="3rd action", distinct_id="1") event_factory(team=self.team, event="3rd action", distinct_id="2") # 2 minutes with freeze_time("2012-01-15T04:01:34.000Z"): event_factory(team=self.team, event="4th action", distinct_id="1") event_factory(team=self.team, event="4th action", distinct_id="2") with freeze_time("2012-01-21T04:01:34.000Z"): response = sessions().run( SessionsFilter( data={ "session": "avg", "events": [ {"id": "1st action"}, {"id": "2nd action"}, {"id": "3rd action"}, {"id": "4th action"}, ], } ), self.team, ) with freeze_time("2012-01-21T04:01:34.000Z"): no_entity_response = sessions().run(SessionsFilter(data={"session": "avg"}), self.team,) self.assertEqual(response[0]["count"], 3) # average length of all sessions # time series self.assertEqual(response[0]["data"][0], 4.0) self.assertEqual(response[0]["data"][1], 2.0) self.assertEqual(response[0]["labels"][0], "Sat. 14 January") self.assertEqual(response[0]["labels"][1], "Sun. 15 January") self.assertEqual(response[0]["days"][0], "2012-01-14") self.assertEqual(response[0]["days"][1], "2012-01-15") self.assertEqual(response[0]["chartLabel"], "Average Session Length (minutes)") self.assertEqual(response, no_entity_response)
def get_filter(team, data: dict = {}, request: Optional[HttpRequest] = None): from posthog.models.filters.filter import Filter from posthog.models.filters.retention_filter import RetentionFilter from posthog.models.filters.sessions_filter import SessionsFilter from posthog.models.filters.stickiness_filter import StickinessFilter insight = data.get("insight") if not insight and request: insight = request.GET.get("insight") if insight == INSIGHT_RETENTION: return RetentionFilter(data={ **data, "insight": INSIGHT_RETENTION }, request=request) elif insight == INSIGHT_SESSIONS: return SessionsFilter(data={ **data, "insight": INSIGHT_SESSIONS }, request=request) elif insight == INSIGHT_STICKINESS or (insight == INSIGHT_TRENDS and data.get("shown_as") == "Stickiness"): return StickinessFilter(data=data, request=request, team=team, get_earliest_timestamp=earliest_timestamp_func) elif insight == INSIGHT_PATHS: return PathFilter(data={ **data, "insight": INSIGHT_PATHS }, request=request) return Filter(data=data, request=request)
def test_match_multiple_action_filters(self): self.create_test_data() sessions = self.run_query( SessionsFilter( data={ "filters": [ { "type": "event_type", "key": "id", "value": "custom-event" }, { "type": "event_type", "key": "id", "value": "another-event" }, ] })) self.assertLength(sessions, 1) self.assertEqual( sessions[0]["action_filter_times"], [ datetime(2012, 1, 15, 4, 1, 34).replace(tzinfo=pytz.UTC), datetime(2012, 1, 15, 4, 1, 34).replace(tzinfo=pytz.UTC), ], )
def session(self, request: request.Request, *args: Any, **kwargs: Any) -> Response: from posthog.queries.sessions import SESSIONS_LIST_DEFAULT_LIMIT team = self.team filter = SessionsFilter(request=request) limit = SESSIONS_LIST_DEFAULT_LIMIT + 1 result: Dict[str, Any] = { "result": sessions.Sessions().run(filter=filter, team=team, limit=limit) } if filter.distinct_id: result = self._filter_sessions_by_distinct_id( filter.distinct_id, result) if filter.session_type is None: offset = filter.offset + limit - 1 if len(result["result"]) > SESSIONS_LIST_DEFAULT_LIMIT: result["result"].pop() date_from = result["result"][0]["start_time"].isoformat() result.update({OFFSET: offset}) result.update({DATE_FROM: date_from}) return Response(result)
def sessions(self, request: Request, *args: Any, **kwargs: Any) -> Response: team = self.team filter = SessionsFilter(request=request) limit = int(request.GET.get("limit", SESSIONS_LIST_DEFAULT_LIMIT)) offset = int(request.GET.get("offset", 0)) response = ClickhouseSessionsList().run(team=team, filter=filter, limit=limit + 1, offset=offset) if filter.distinct_id: try: person_ids = get_persons_by_distinct_ids( team.pk, [filter.distinct_id])[0].distinct_ids response = [ session for i, session in enumerate(response) if response[i]["distinct_id"] in person_ids ] except IndexError: response = [] if len(response) > limit: response.pop() return Response({"result": response, "offset": offset + limit}) else: return Response({ "result": response, })
def test_compare(self): with freeze_time("2012-01-14T03:21:34.000Z"): event_factory(team=self.team, event="1st action", distinct_id="1") event_factory(team=self.team, event="1st action", distinct_id="2") with freeze_time("2012-01-14T03:25:34.000Z"): event_factory(team=self.team, event="2nd action", distinct_id="1") event_factory(team=self.team, event="2nd action", distinct_id="2") with freeze_time("2012-01-25T03:59:34.000Z"): event_factory(team=self.team, event="3rd action", distinct_id="1") event_factory(team=self.team, event="3rd action", distinct_id="2") with freeze_time("2012-01-25T04:01:34.000Z"): event_factory(team=self.team, event="4th action", distinct_id="1") event_factory(team=self.team, event="4th action", distinct_id="2") filter = SessionsFilter( data={ "date_from": "2012-01-20", "date_to": "2012-01-30", "interval": "day", "compare": True, "session": "avg", } ) # Run without anything to compare to compare_response = sessions().run(filter=filter, team=self.team) self.assertEqual(compare_response[0]["data"][5], 2.0) self.assertEqual(compare_response[1]["data"][4], 4.0)
def sessions(self, request: request.Request, *args: Any, **kwargs: Any) -> Response: from posthog.queries.sessions.sessions_list import SessionsList filter = SessionsFilter(request=request) sessions, pagination = SessionsList.run(filter=filter, team=self.team) return Response({"result": sessions, "pagination": pagination})
def test_filter_sessions_by_recordings(self): _, team2, user2 = User.objects.bootstrap("Test2", "*****@*****.**", None) SessionRecordingViewed.objects.create(team=self.team, user_id=self.user.pk, session_id="1") SessionRecordingViewed.objects.create(team=team2, user_id=user2.pk, session_id="2") self._test_filter_sessions( SessionsFilter(data={"user_id": self.user.pk}), [[{ "id": "1", "viewed": True }, { "id": "3", "viewed": False }], [], [{ "id": "2", "viewed": False }], []], )
def sessions(self, request: Request, *args: Any, **kwargs: Any) -> Response: # type: ignore filter = SessionsFilter(request=request, team=self.team) sessions, pagination = ClickhouseSessionsList.run(team=self.team, filter=filter) return Response({"result": sessions, "pagination": pagination})
def test_filter_sessions_by_unseen_recording(self): SessionRecordingViewed.objects.create(team=self.team, user_id=self.user.pk, session_id="2") self._test_filter_sessions( SessionsFilter( data={ "filters": [{ "type": "recording", "key": "unseen", "value": 1 }], "user_id": self.user.pk }), [[ { "id": "1", "recording_duration": 25, "viewed": False }, { "id": "3", "recording_duration": 45, "viewed": False }, ]], )
def test_filter_by_entity_event(self): self.create_test_data() self.assertLength( self.run_query( SessionsFilter(data={"filters": [{"type": "event_type", "key": "id", "value": "custom-event"}]}) )[0], 2, ) self.assertLength( self.run_query( SessionsFilter(data={"filters": [{"type": "event_type", "key": "id", "value": "another-event"}]}) )[0], 1, ) self.assertLength( self.run_query( SessionsFilter( data={ "filters": [ {"type": "event_type", "key": "id", "value": "custom-event"}, {"type": "event_type", "key": "id", "value": "another-event"}, ] } ) )[0], 1, ) self.assertLength( self.run_query( SessionsFilter( data={ "filters": [ { "type": "event_type", "key": "id", "value": "custom-event", "properties": [{"key": "$os", "value": "Mac OS X"}], } ] } ) )[0], 1, )
def test_no_events(self): response = sessions().run( SessionsFilter( data={"date_from": "2012-01-20", "date_to": "2012-01-30", "interval": "day", "session": "avg"} ), self.team, ) self.assertEqual(response, [])
def test_start_end_url(self): self.create_test_data() response, _ = self.run_query(SessionsFilter(data={"properties": []})) self.assertDictContainsSubset( {"distinct_id": "2", "start_url": "aloha.com/2", "end_url": "aloha.com/lastpage"}, response[0] ) self.assertDictContainsSubset({"distinct_id": "1", "start_url": None, "end_url": None}, response[1])
def test_sessions_count_buckets_default(self): with freeze_time("2012-01-11T01:25:30.000Z"): event_factory(team=self.team, event="1st action", distinct_id="2") with freeze_time("2012-01-21T01:25:30.000Z"): response = sessions().run(SessionsFilter(data={"session": "dist"}), self.team) for _, item in enumerate(response): self.assertEqual(item["count"], 0)
def calculate_session(self, request: Request) -> Dict[str, Any]: return { "result": ClickhouseSessions().run(team=self.team, filter=SessionsFilter( request=request, data={"insight": INSIGHT_SESSIONS})) }
def calculate_sessions(self, events: QuerySet, filter: SessionsFilter, team: Team, limit: int, offset: int) -> List[Dict[str, Any]]: # format date filter for session view _date_gte = Q() if filter.session_type is None: # if _date_from is not explicitely set we only want to get the last day worth of data # otherwise the query is very slow if filter._date_from and filter.date_to: _date_gte = Q( timestamp__gte=filter.date_from, timestamp__lte=filter.date_to + relativedelta(days=1), ) else: dt = now() dt = dt.replace(hour=0, minute=0, second=0, microsecond=0) _date_gte = Q(timestamp__gte=dt, timestamp__lte=dt + relativedelta(days=1)) else: if not filter.date_from: filter._date_from = (Event.objects.filter( team_id=team).order_by("timestamp")[0].timestamp.replace( hour=0, minute=0, second=0, microsecond=0).isoformat()) sessions = (events.filter(_date_gte).annotate( previous_timestamp=Window( expression=Lag("timestamp", default=None), partition_by=F("distinct_id"), order_by=F("timestamp").asc(), )).annotate(previous_event=Window( expression=Lag("event", default=None), partition_by=F("distinct_id"), order_by=F("timestamp").asc(), ))) sessions_sql, sessions_sql_params = sessions.query.sql_with_params() all_sessions = "\ SELECT *,\ SUM(new_session) OVER (ORDER BY distinct_id, timestamp) AS global_session_id,\ SUM(new_session) OVER (PARTITION BY distinct_id ORDER BY timestamp) AS user_session_id\ FROM (SELECT id, team_id, distinct_id, event, elements_hash, timestamp, properties, CASE WHEN EXTRACT('EPOCH' FROM (timestamp - previous_timestamp)) >= (60 * 30)\ OR previous_timestamp IS NULL \ THEN 1 ELSE 0 END AS new_session \ FROM ({}) AS inner_sessions\ ) AS outer_sessions".format(sessions_sql) result: List = [] if filter.session_type == SESSION_AVG: result = self._session_avg(all_sessions, sessions_sql_params, filter) elif filter.session_type == SESSION_DIST: result = self._session_dist(all_sessions, sessions_sql_params) else: result = self._session_list(all_sessions, sessions_sql_params, team, filter, limit, offset) return result
def test_sessions_and_cohort(self): self.create_test_data() cohort = Cohort.objects.create(team=self.team, groups=[{"properties": {"email": "bla"}}]) cohort.calculate_people() with freeze_time("2012-01-15T04:01:34.000Z"): response, _ = self.run_query( SessionsFilter(data={"properties": [{"key": "id", "value": cohort.pk, "type": "cohort"}],}) ) self.assertEqual(len(response), 1)
def test_filter_sessions_by_recording_duration_lt(self): self._test_filter_sessions( SessionsFilter( data={"filters": [{"type": "recording", "key": "duration", "operator": "lt", "value": 30}]} ), [ [{"id": "1", "recording_duration": 25, "viewed": False}], [{"id": "2", "recording_duration": 13, "viewed": False}], ], )
def sessions(self, request: request.Request, *args: Any, **kwargs: Any) -> Response: from posthog.queries.sessions.sessions_list import SessionsList filter = SessionsFilter(request=request) pagination = json.loads(request.GET.get("pagination", "{}")) sessions, pagination = SessionsList().run(filter=filter, team=self.team, **pagination) if filter.distinct_id: sessions = self._filter_sessions_by_distinct_id(filter.distinct_id, sessions) return Response({"result": sessions, "pagination": pagination})
def test_filter_sessions_by_recording_duration_gt(self): self._test_filter_sessions( SessionsFilter( data={"filters": [{"type": "recording", "key": "duration", "operator": "gt", "value": 15}]} ), [ [ {"id": "1", "recording_duration": 25, "viewed": False}, {"id": "3", "recording_duration": 45, "viewed": False}, ] ], )
def run(cls, filter: SessionsFilter, team: Team, *args, **kwargs) -> Tuple[List[Dict], Optional[Dict]]: "Sessions queries do post-filtering based on session recordings. This makes sure we return some data every page" limit = kwargs.get("limit", SESSIONS_LIST_DEFAULT_LIMIT) results = [] while True: page, pagination = cls(filter, team, limit=limit).fetch_page() results.extend(page) if len(results) >= limit or pagination is None: return results, pagination filter = filter.with_data({"pagination": pagination})
def sessions(self, request: Request, *args: Any, **kwargs: Any) -> Response: filter = SessionsFilter(request=request) sessions, pagination = ClickhouseSessionsList().run(team=self.team, filter=filter) if filter.distinct_id: try: person_ids = get_persons_by_distinct_ids(self.team.pk, [filter.distinct_id])[0].distinct_ids sessions = [session for i, session in enumerate(sessions) if session["distinct_id"] in person_ids] except IndexError: sessions = [] return Response({"result": sessions, "pagination": pagination})
def test_filter_sessions_by_recording_duration_lt(self): self._test_filter_sessions( SessionsFilter( data={ "filters": [{ "type": "recording", "key": "duration", "operator": "lt", "value": 30 }] }), [["1"], ["2"]], )
def test_match_multiple_action_filters(self): self.create_test_data() sessions, _ = self.run_query( SessionsFilter( data={ "filters": [ {"type": "event_type", "key": "id", "value": "custom-event"}, {"type": "event_type", "key": "id", "value": "another-event"}, ] } ) ) self.assertLength(sessions, 1) self.assertLength(sessions[0]["matching_events"], 3)