def _retrieve_people(self, target_entity: Entity, filter: StickinessFilter, team: Team) -> ReturnDict: from posthog.api.person import PersonSerializer if target_entity.type == TREND_FILTER_TYPE_EVENTS: filtered_events = base.process_entity_for_events(target_entity, team_id=team.pk, order_by=None).filter( base.filter_events(team.pk, filter, target_entity) ) elif target_entity.type == TREND_FILTER_TYPE_ACTIONS: actions = Action.objects.filter(deleted=False, team=team) actions = actions.prefetch_related(Prefetch("steps", queryset=ActionStep.objects.order_by("id"))) try: actions.get(pk=target_entity.id) except Action.DoesNotExist: return PersonSerializer([], many=True).data filtered_events = base.process_entity_for_events(target_entity, team_id=team.pk, order_by=None).filter( base.filter_events(team.pk, filter, target_entity) ) else: raise ValueError("target entity must be action or event") events = ( filtered_events.values("person_id") .annotate(day_count=Count(filter.trunc_func("timestamp"), distinct=True)) .filter(day_count=filter.selected_interval) ) people = Person.objects.filter( team=team, id__in=[p["person_id"] for p in events[filter.offset : filter.offset + 100]], ) people = people.prefetch_related(Prefetch("persondistinctid_set", to_attr="distinct_ids_cache")) return PersonSerializer(people, many=True).data
def get_people( self, filter: Filter, team_id: int, target_date: datetime, lifecycle_type: str, request: Request, limit: int = 100, ): event_query, event_params = LifecycleEventQuery( team_id=team_id, filter=filter).get_query() result = sync_execute( LIFECYCLE_PEOPLE_SQL.format(events_query=event_query, interval_expr=filter.interval), { **event_params, "status": lifecycle_type, "target_date": target_date, "offset": filter.offset, "limit": limit, }, ) people = get_persons_by_uuids(team_id=team_id, uuids=[p[0] for p in result]) people = people.prefetch_related( Prefetch("persondistinctid_set", to_attr="distinct_ids_cache")) from posthog.api.person import PersonSerializer return PersonSerializer(people, many=True).data
def _format_results(self, results): people = Person.objects.filter(team_id=self._team.pk, uuid__in=[val[0] for val in results]) from posthog.api.person import PersonSerializer return PersonSerializer(people, many=True).data
def run(self) -> Dict[str, Any]: from posthog.api.person import PersonSerializer distinct_id, start_time, snapshots = self.get_snapshot_data() # Apply limit and offset after decompressing to account for non-fully formed chunks. snapshots_subset = snapshots[self._offset:(self._offset + self._limit)] duration = 0 if len(snapshots) > 1: duration = get_milliseconds_between_dates( datetime.fromtimestamp(snapshots[-1].get("timestamp", 0) / 1000.0), datetime.fromtimestamp(snapshots[0].get("timestamp", 0) / 1000.0), ) has_next = len(snapshots) > (self._offset + self._limit + 1) next_url = (format_query_params_absolute_url( self._request, self._offset + self._limit, self._limit) if has_next else None) person = (PersonSerializer( Person.objects.get(team=self._team, persondistinctid__distinct_id=distinct_id)).data if distinct_id else None) return { "snapshots": snapshots_subset, "person": person, "start_time": start_time, "next": next_url, "duration": duration, }
def on_perform(self, event): from posthog.api.event import EventSerializer from posthog.api.person import PersonSerializer event.action = self event.serialized_person = PersonSerializer(event.person).data payload = EventSerializer(event).data raw_hook_event.send( sender=None, event_name="action_performed", instance=self, payload=payload, user=event.team, )
def _retrieve_people(self, filter: RetentionFilter, team: Team): period = filter.period trunc, fields = self._get_trunc_func("timestamp", period) is_first_time_retention = filter.retention_type == RETENTION_FIRST_TIME entity_condition, _ = self.get_entity_condition( filter.target_entity, "events") returning_condition, _ = self.get_entity_condition( filter.returning_entity, "first_event_date") _entity_condition = returning_condition if filter.selected_interval > 0 else entity_condition events = Event.objects.filter(team_id=team.pk).add_person_id(team.pk) filtered_events = events.filter( filter.recurring_date_filter_Q()).filter( properties_to_Q(filter.properties, team_id=team.pk)) inner_events = (Event.objects.filter(team_id=team.pk).filter( properties_to_Q(filter.properties, team_id=team.pk)).add_person_id( team.pk).filter(**{ "person_id": OuterRef("id") }).filter(entity_condition).values("person_id").annotate( first_date=Min(trunc)).filter( filter.reference_date_filter_Q("first_date")).distinct( ) if is_first_time_retention else Event.objects.filter( team_id=team.pk).filter( filter.reference_date_filter_Q()).filter( properties_to_Q( filter.properties, team_id=team.pk)).add_person_id( team.pk).filter( **{ "person_id": OuterRef("id") }).filter(entity_condition)) filtered_events = (filtered_events.filter(_entity_condition).filter( Exists( Person.objects.filter(**{ "id": OuterRef("person_id"), }).filter(Exists(inner_events)).only("id"))).values( "person_id").distinct()).all() people = Person.objects.filter( team=team, id__in=[ p["person_id"] for p in filtered_events[filter.offset:filter.offset + 100] ], ) people = people.prefetch_related( Prefetch("persondistinctid_set", to_attr="distinct_ids_cache")) from posthog.api.person import PersonSerializer return PersonSerializer(people, many=True).data
def run(self, filter: Filter, team: Team, *args, **kwargs) -> Dict[str, Any]: from posthog.api.person import PersonSerializer distinct_id, snapshots = self.query_recording_snapshots(team, kwargs["session_recording_id"]) person = ( PersonSerializer(Person.objects.get(team=team, persondistinctid__distinct_id=distinct_id)).data if distinct_id else None ) return {"snapshots": list(sorted(snapshots, key=lambda s: s["timestamp"])), "person": person}
def _retrieve_people(self, target_entity: Entity, filter: StickinessFilter, team: Team) -> ReturnDict: from posthog.api.person import PersonSerializer events = stickiness_process_entity_type(target_entity, team, filter) events = stickiness_format_intervals(events, filter) people = stickiness_fetch_people(events, team, filter) people = people.prefetch_related( Prefetch("persondistinctid_set", to_attr="distinct_ids_cache")) return PersonSerializer(people, many=True).data
def retrieve(self, request: request.Request, *args: Any, **kwargs: Any) -> response.Response: session_recording_id = kwargs["pk"] session_recording_meta_data = self._get_session_recording_meta_data( request, session_recording_id) if not session_recording_meta_data: raise exceptions.NotFound("Session not found") if not request.user.is_authenticated: # for mypy raise exceptions.NotAuthenticated() viewed_session_recording = SessionRecordingViewed.objects.filter( team=self.team, user=request.user, session_id=session_recording_id).exists() session_recording_serializer = SessionRecordingMetadataSerializer( data={ "segments": [ dataclasses.asdict(segment) for segment in session_recording_meta_data.segments ], "start_and_end_times_by_window_id": session_recording_meta_data.start_and_end_times_by_window_id, "session_id": session_recording_id, "viewed": viewed_session_recording, }) session_recording_serializer.is_valid(raise_exception=True) try: person: Union[Person, None] = Person.objects.get( persondistinctid__distinct_id=session_recording_meta_data. distinct_id, persondistinctid__team_id=self.team, team=self.team, ) except Person.DoesNotExist: person = None if request.GET.get("save_view"): SessionRecordingViewed.objects.get_or_create( team=self.team, user=request.user, session_id=session_recording_id) return response.Response({ "result": { "session_recording": session_recording_serializer.data, "person": PersonSerializer(instance=person).data, } })
def list(self, request: request.Request, *args: Any, **kwargs: Any) -> Response: filter = SessionRecordingsFilter(request=request) (session_recordings, more_recordings_available) = self._get_session_recording_list(filter) if not request.user.is_authenticated: # for mypy raise exceptions.NotAuthenticated() viewed_session_recordings = set( SessionRecordingViewed.objects.filter( team=self.team, user=request.user).values_list("session_id", flat=True)) distinct_ids = map(lambda x: x["distinct_id"], session_recordings) person_distinct_ids = PersonDistinctId.objects.filter( distinct_id__in=distinct_ids, team=self.team).select_related("person") distinct_id_to_person = {} for person_distinct_id in person_distinct_ids: distinct_id_to_person[ person_distinct_id.distinct_id] = person_distinct_id.person session_recordings = list( map( lambda x: { **x, "viewed": x["session_id"] in viewed_session_recordings, }, session_recordings, )) session_recording_serializer = SessionRecordingSerializer( data=session_recordings, many=True) session_recording_serializer.is_valid(raise_exception=True) session_recording_serializer_with_person = list( map( lambda session_recording: { **session_recording, "person": PersonSerializer(instance=distinct_id_to_person.get( session_recording["distinct_id"])).data, }, session_recording_serializer.data, )) return Response({ "results": session_recording_serializer_with_person, "has_next": more_recordings_available })
def _retrieve_people(self, filter: RetentionFilter, team: Team): period = filter.period is_first_time_retention = filter.retention_type == RETENTION_FIRST_TIME trunc_func = get_trunc_func_ch(period) prop_filters, prop_filter_params = parse_prop_clauses(filter.properties, team.pk) returning_entity = filter.returning_entity if filter.selected_interval > 0 else filter.target_entity target_query, target_params = self._get_condition(filter.target_entity, table="e") target_query_formatted = "AND {target_query}".format(target_query=target_query) return_query, return_params = self._get_condition(returning_entity, table="e", prepend="returning") return_query_formatted = "AND {return_query}".format(return_query=return_query) reference_event_query = (REFERENCE_EVENT_UNIQUE_SQL if is_first_time_retention else REFERENCE_EVENT_SQL).format( target_query=target_query_formatted, filters=prop_filters, trunc_func=trunc_func, ) reference_date_from = filter.date_from reference_date_to = filter.date_from + filter.period_increment date_from = filter.date_from + filter.selected_interval * filter.period_increment date_to = date_from + filter.period_increment result = sync_execute( RETENTION_PEOPLE_SQL.format( reference_event_query=reference_event_query, target_query=return_query_formatted, filters=prop_filters ), { "team_id": team.pk, "start_date": date_from.strftime( "%Y-%m-%d{}".format(" %H:%M:%S" if filter.period == "Hour" else " 00:00:00") ), "end_date": date_to.strftime( "%Y-%m-%d{}".format(" %H:%M:%S" if filter.period == "Hour" else " 00:00:00") ), "reference_start_date": reference_date_from.strftime( "%Y-%m-%d{}".format(" %H:%M:%S" if filter.period == "Hour" else " 00:00:00") ), "reference_end_date": reference_date_to.strftime( "%Y-%m-%d{}".format(" %H:%M:%S" if filter.period == "Hour" else " 00:00:00") ), "offset": filter.offset, **target_params, **return_params, **prop_filter_params, }, ) people = Person.objects.filter(team_id=team.pk, uuid__in=[val[0] for val in result]) from posthog.api.person import PersonSerializer return PersonSerializer(people, many=True).data
def run(self, team: Team, session_recording_id: str, *args, **kwargs) -> Dict[str, Any]: from posthog.api.person import PersonSerializer distinct_id, start_time, snapshots = self.query_recording_snapshots(team, session_recording_id) person = ( PersonSerializer(Person.objects.get(team=team, persondistinctid__distinct_id=distinct_id)).data if distinct_id else None ) return { "snapshots": snapshots, "person": person, "start_time": start_time, }
def _retrieve_people_in_period(self, filter: RetentionFilter, team: Team): new_data = filter._data new_data.update({ "total_intervals": filter.total_intervals - filter.selected_interval }) filter = RetentionFilter(data=new_data) format_fields, params = self._determine_query_params(filter, team) final_query = """ SELECT person_id, count(person_id) appearance_count, array_agg(date) appearances FROM ( SELECT DISTINCT {fields} "events"."person_id" FROM ({event_query}) events LEFT JOIN ({reference_event_query}) first_event_date ON (events.person_id = first_event_date.person_id) WHERE event_date >= first_date AND {target_condition} AND {return_condition} OR ({target_condition} AND event_date = first_date) ) person_appearances WHERE first_date = 0 GROUP BY person_id ORDER BY appearance_count DESC LIMIT %s OFFSET %s """.format(**format_fields) result = [] from posthog.api.person import PersonSerializer with connection.cursor() as cursor: cursor.execute( final_query, params + (100, filter.offset), ) raw_results = cursor.fetchall() people_dict = {} for person in Person.objects.filter( team_id=team.pk, id__in=[val[0] for val in raw_results]): people_dict.update({person.pk: PersonSerializer(person).data}) result = self.process_people_in_period(filter, raw_results, people_dict) return result
def retrieve(self, request: request.Request, *args: Any, **kwargs: Any) -> response.Response: session_recording_id = kwargs["pk"] filter = SessionRecordingsFilter(request=request) session_recording_meta_data = self._get_session_recording_meta_data( request, filter, session_recording_id) if not session_recording_meta_data.get("session_id"): raise exceptions.NotFound("Session not found") if not request.user.is_authenticated: # for mypy raise exceptions.NotAuthenticated() viewed_session_recording = SessionRecordingViewed.objects.filter( team=self.team, user=request.user, session_id=session_recording_id).exists() session_recording_serializer = SessionRecordingSerializer( data={ **session_recording_meta_data, "session_id": session_recording_id, "viewed": viewed_session_recording }) session_recording_serializer.is_valid(raise_exception=True) distinct_id = session_recording_meta_data["distinct_id"] try: person: Union[Person, None] = Person.objects.get( persondistinctid__distinct_id=distinct_id, persondistinctid__team_id=self.team, team=self.team) except Person.DoesNotExist: person = None if request.GET.get("save_view"): SessionRecordingViewed.objects.get_or_create( team=self.team, user=request.user, session_id=session_recording_id) return response.Response({ "result": { "session_recording": session_recording_serializer.data, "person": PersonSerializer(instance=person).data, } })
def test_merge_people(self, mock_capture_internal) -> None: # created first person3 = _create_person(team=self.team, distinct_ids=["distinct_id_3"], properties={"oh": "hello"}) person1 = _create_person( team=self.team, distinct_ids=["1"], properties={"$browser": "whatever", "$os": "Mac OS X"} ) person2 = _create_person(team=self.team, distinct_ids=["2"], properties={"random_prop": "asdf"}) response = self.client.post("/api/person/%s/merge/" % person1.pk, {"ids": [person2.pk, person3.pk]},) mock_capture_internal.assert_has_calls( [ mock.call( {"event": "$create_alias", "properties": {"alias": "2"}}, "1", None, None, unittest.mock.ANY, unittest.mock.ANY, self.team.id, ), mock.call( {"event": "$create_alias", "properties": {"alias": "distinct_id_3"}}, "1", None, None, unittest.mock.ANY, unittest.mock.ANY, self.team.id, ), ], any_order=True, ) self.assertEqual(response.status_code, 201) self.assertCountEqual(response.json()["distinct_ids"], ["1", "2", "distinct_id_3"]) person_one_dict = PersonSerializer(person1).data person_two_dict = PersonSerializer(person2).data person_three_dict = PersonSerializer(person3).data person_three_log = { "user": {"first_name": "", "email": "*****@*****.**"}, "activity": "was_merged_into_person", "scope": "Person", "item_id": str(person3.pk), "detail": { "changes": None, "name": None, "merge": {"type": "Person", "source": person_three_dict, "target": person_one_dict}, }, "created_at": "2021-08-25T22:09:14.252000Z", } person_one_log = { "user": {"first_name": "", "email": "*****@*****.**"}, "activity": "people_merged_into", "scope": "Person", # don't store deleted person's name, so user primary key "item_id": str(person1.pk), "detail": { "changes": None, "name": None, "merge": {"type": "Person", "source": [person_three_dict, person_two_dict], "target": person_one_dict}, }, "created_at": "2021-08-25T22:09:14.252000Z", } person_two_log = { "user": {"first_name": "", "email": "*****@*****.**"}, "activity": "was_merged_into_person", "scope": "Person", "item_id": str(person2.pk), "detail": { "changes": None, "name": None, "merge": {"type": "Person", "source": person_two_dict, "target": person_one_dict}, }, "created_at": "2021-08-25T22:09:14.252000Z", } self._assert_person_activity( person_id=None, # changes for all three people expected=[person_three_log, person_one_log, person_two_log,], ) self._assert_person_activity( person_id=person1.pk, expected=[person_one_log,], ) self._assert_person_activity( person_id=person2.pk, expected=[person_two_log,], ) self._assert_person_activity( person_id=person3.pk, expected=[person_three_log,], )
def get_people( self, filter: Filter, team_id: int, target_date: datetime, lifecycle_type: str, request: Request, limit: int = 100, ): entity = filter.entities[0] date_from = filter.date_from if not date_from: date_from = get_earliest_timestamp(team_id) interval = filter.interval or "day" num_intervals, seconds_in_interval, _ = get_time_diff(interval, filter.date_from, filter.date_to, team_id=team_id) interval_increment, interval_string, sub_interval_string = self.get_interval( interval) trunc_func = get_trunc_func_ch(interval) event_query = "" event_params: Dict[str, Any] = {} _, _, date_params = parse_timestamps(filter=filter, team_id=team_id) if entity.type == TREND_FILTER_TYPE_ACTIONS: try: action = entity.get_action() event_query, event_params = format_action_filter(action) except: return [] else: event_query = "event = %(event)s" event_params = {"event": entity.id} props_to_filter = [*filter.properties, *entity.properties] prop_filters, prop_filter_params = parse_prop_clauses( props_to_filter, team_id, filter_test_accounts=filter.filter_test_accounts) result = sync_execute( LIFECYCLE_PEOPLE_SQL.format( interval=interval_string, trunc_func=trunc_func, event_query=event_query, filters=prop_filters, sub_interval=sub_interval_string, GET_TEAM_PERSON_DISTINCT_IDS=GET_TEAM_PERSON_DISTINCT_IDS, ), { "team_id": team_id, "prev_date_from": (date_from - interval_increment).strftime("%Y-%m-%d{}".format( " %H:%M:%S" if filter.interval == "hour" or filter.interval == "minute" else " 00:00:00")), "num_intervals": num_intervals, "seconds_in_interval": seconds_in_interval, **event_params, **date_params, **prop_filter_params, "status": lifecycle_type, "target_date": target_date.strftime("%Y-%m-%d{}".format( " %H:%M:%S" if filter.interval == "hour" or filter.interval == "minute" else " 00:00:00")), "offset": filter.offset, "limit": limit, }, ) people = get_persons_by_uuids(team_id=team_id, uuids=[p[0] for p in result]) people = people.prefetch_related( Prefetch("persondistinctid_set", to_attr="distinct_ids_cache")) from posthog.api.person import PersonSerializer return PersonSerializer(people, many=True).data
def _retrieve_people_in_period(self, filter: RetentionFilter, team: Team): period = filter.period is_first_time_retention = filter.retention_type == RETENTION_FIRST_TIME trunc_func = get_trunc_func_ch(period) prop_filters, prop_filter_params = parse_prop_clauses( filter.properties, team.pk) target_query, target_params = self._get_condition(filter.target_entity, table="e") target_query_formatted = "AND {target_query}".format( target_query=target_query) return_query, return_params = self._get_condition( filter.returning_entity, table="e", prepend="returning") return_query_formatted = "AND {return_query}".format( return_query=return_query) first_event_sql = (REFERENCE_EVENT_UNIQUE_PEOPLE_PER_PERIOD_SQL if is_first_time_retention else REFERENCE_EVENT_PEOPLE_PER_PERIOD_SQL).format( target_query=target_query_formatted, filters=prop_filters, trunc_func=trunc_func, ) default_event_query = ( DEFAULT_REFERENCE_EVENT_UNIQUE_PEOPLE_PER_PERIOD_SQL if is_first_time_retention else DEFAULT_REFERENCE_EVENT_PEOPLE_PER_PERIOD_SQL).format( target_query=target_query_formatted, filters=prop_filters, trunc_func=trunc_func, ) date_from = filter.date_from + filter.selected_interval * filter.period_increment date_to = filter.date_to filter = filter.with_data({ "total_intervals": filter.total_intervals - filter.selected_interval }) query_result = sync_execute( RETENTION_PEOPLE_PER_PERIOD_SQL.format( returning_query=return_query_formatted, filters=prop_filters, first_event_sql=first_event_sql, first_event_default_sql=default_event_query, trunc_func=trunc_func, ), { "team_id": team.pk, "start_date": date_from.strftime("%Y-%m-%d{}".format( " %H:%M:%S" if filter.period == "Hour" else " 00:00:00")), "end_date": date_to.strftime("%Y-%m-%d{}".format( " %H:%M:%S" if filter.period == "Hour" else " 00:00:00")), "offset": filter.offset, "limit": 100, "period": period, **target_params, **return_params, **prop_filter_params, }, ) people_dict = {} from posthog.api.person import PersonSerializer people = get_persons_by_uuids(team_id=team.pk, uuids=[val[0] for val in query_result]) people = people.prefetch_related( Prefetch("persondistinctid_set", to_attr="distinct_ids_cache")) for person in people: people_dict.update( {str(person.uuid): PersonSerializer(person).data}) result = self.process_people_in_period(filter, query_result, people_dict) return result
def get_people( self, filter: Filter, team_id: int, target_date: datetime, lifecycle_type: str, limit: int = 100, ): entity = filter.entities[0] period = filter.interval or "day" num_intervals, prev_date_from, date_from, date_to, after_date_to = get_time_diff( period, filter.date_from, filter.date_to, team_id) interval_trunc, sub_interval = get_trunc_func(period=period) # include the before and after when filteirng all events filter = Filter( data={ **filter._data, "date_from": prev_date_from.isoformat(), "date_to": after_date_to.isoformat() }) filtered_events = Event.objects.filter(team_id=team_id).filter( filter_events(team_id, filter, entity)) event_query, event_params = queryset_to_named_query(filtered_events) earliest_events_filtered = Event.objects.filter( team_id=team_id).filter( filter_events(team_id, filter, entity, include_dates=False)) earliest_events_query, earliest_events_params = queryset_to_named_query( earliest_events_filtered, "earliest_events") with connection.cursor() as cursor: cursor.execute( LIFECYCLE_PEOPLE_SQL.format( action_join=ACTION_JOIN if entity.type == TREND_FILTER_TYPE_ACTIONS else "", event_condition="{} = %(event)s".format( "action_id" if entity.type == TREND_FILTER_TYPE_ACTIONS else "event"), events=event_query, earliest_events=earliest_events_query, ), { "team_id": team_id, "event": entity.id, "interval": interval_trunc, "one_interval": "1 " + interval_trunc, "sub_interval": "1 " + sub_interval, "num_intervals": num_intervals, "prev_date_from": prev_date_from, "date_from": date_from, "date_to": date_to, "after_date_to": after_date_to, "target_date": target_date, "status": lifecycle_type, "offset": filter.offset, "limit": limit, **event_params, **earliest_events_params, }, ) pids = cursor.fetchall() people = Person.objects.filter( team_id=team_id, id__in=[p[0] for p in pids], ) people = people.prefetch_related( Prefetch("persondistinctid_set", to_attr="distinct_ids_cache")) from posthog.api.person import PersonSerializer return PersonSerializer(people, many=True).data