def test_retention_people_first_time(self): _, _, p3, _ = self._create_first_time_retention_events() # even if set to hour 6 it should default to beginning of day and include all pageviews above target_entity = json.dumps({"id": "$user_signed_up", "type": TREND_FILTER_TYPE_EVENTS}) result = retention().actors( RetentionFilter( data={ "date_to": self._date(10, hour=6), RETENTION_TYPE: RETENTION_FIRST_TIME, "target_entity": target_entity, "returning_entity": {"id": "$pageview", "type": "events"}, "selected_interval": 1, } ), self.team, ) self.assertEqual(len(result), 1) self.assertIn(result[0]["id"], [p3.pk, p3.uuid]) result = retention().actors( RetentionFilter( data={ "date_to": self._date(14, hour=6), RETENTION_TYPE: RETENTION_FIRST_TIME, "target_entity": target_entity, "returning_entity": {"id": "$pageview", "type": "events"}, "selected_interval": 1, } ), self.team, ) self.assertEqual(len(result), 0)
def calculate_retention(self, request: request.Request) -> List[Dict[str, Any]]: team = self.team filter = RetentionFilter(request=request) if not filter.date_from: filter._date_from = "-11d" result = retention.Retention().run(filter, team) return result
def _retrieve_people(self, filter: RetentionFilter, team: Team): period = filter.period trunc, fields = self._get_trunc_func("timestamp", period) is_first_time_retention = filter.retention_type == RETENTION_FIRST_TIME entity_condition, _ = self.get_entity_condition( filter.target_entity, "events") returning_condition, _ = self.get_entity_condition( filter.returning_entity, "first_event_date") _entity_condition = returning_condition if filter.selected_interval > 0 else entity_condition events = Event.objects.filter(team_id=team.pk).add_person_id(team.pk) filtered_events = events.filter( filter.recurring_date_filter_Q()).filter( properties_to_Q(filter.properties, team_id=team.pk)) inner_events = (Event.objects.filter(team_id=team.pk).filter( properties_to_Q(filter.properties, team_id=team.pk)).add_person_id( team.pk).filter(**{ "person_id": OuterRef("id") }).filter(entity_condition).values("person_id").annotate( first_date=Min(trunc)).filter( filter.reference_date_filter_Q("first_date")).distinct( ) if is_first_time_retention else Event.objects.filter( team_id=team.pk).filter( filter.reference_date_filter_Q()).filter( properties_to_Q( filter.properties, team_id=team.pk)).add_person_id( team.pk).filter( **{ "person_id": OuterRef("id") }).filter(entity_condition)) filtered_events = (filtered_events.filter(_entity_condition).filter( Exists( Person.objects.filter(**{ "id": OuterRef("person_id"), }).filter(Exists(inner_events)).only("id"))).values( "person_id").distinct()).all() people = Person.objects.filter( team=team, id__in=[ p["person_id"] for p in filtered_events[filter.offset:filter.offset + 100] ], ) people = people.prefetch_related( Prefetch("persondistinctid_set", to_attr="distinct_ids_cache")) from posthog.api.person import PersonSerializer return PersonSerializer(people, many=True).data
def test_first_time_retention(self): self._create_first_time_retention_events() target_entity = json.dumps({"id": "$user_signed_up", "type": TREND_FILTER_TYPE_EVENTS}) result = retention().run( RetentionFilter( data={ "date_to": self._date(5, hour=6), RETENTION_TYPE: RETENTION_FIRST_TIME, "target_entity": target_entity, "returning_entity": {"id": "$pageview", "type": "events"}, "total_intervals": 7, } ), self.team, ) self.assertEqual(len(result), 7) self.assertEqual( self.pluck(result, "label"), ["Day 0", "Day 1", "Day 2", "Day 3", "Day 4", "Day 5", "Day 6"], ) self.assertEqual( self.pluck(result, "values", "count"), [[2, 1, 2, 2, 1, 0, 1], [1, 1, 0, 1, 1, 1], [0, 0, 0, 0, 0], [1, 1, 0, 1], [0, 0, 0], [0, 0], [0]], )
def test_retention_people_in_period(self): person1 = person_factory(team_id=self.team.pk, distinct_ids=["person1", "alias1"]) person2 = person_factory(team_id=self.team.pk, distinct_ids=["person2"]) self._create_events([ ("person1", self._date(0)), ("person1", self._date(1)), ("person1", self._date(2)), ("person1", self._date(5)), ("alias1", self._date(5, 9)), ("person1", self._date(6)), ("person2", self._date(1)), ("person2", self._date(2)), ("person2", self._date(3)), ("person2", self._date(6)), ("person2", self._date(7)), ]) # even if set to hour 6 it should default to beginning of day and include all pageviews above result = retention().people_in_period( RetentionFilter(data={ "date_to": self._date(10, hour=6), "selected_interval": 2 }), self.team) self.assertEqual(result[0]["person"]["id"], person2.pk) self.assertEqual(result[0]["appearances"], [1, 1, 0, 0, 1, 1, 0, 0, 0]) self.assertEqual(result[1]["person"]["id"], person1.pk) self.assertEqual(result[1]["appearances"], [1, 0, 0, 1, 1, 0, 0, 0, 0])
def test_first_time_retention_people(self): _, _, p3, _ = self._create_first_time_retention_events() target_entity = json.dumps({ "id": "$user_signed_up", "type": TREND_FILTER_TYPE_EVENTS }) result = retention().people_in_period( RetentionFilter( data={ "date_to": self._date(6, hour=6), RETENTION_TYPE: RETENTION_FIRST_TIME, "target_entity": target_entity, "returning_entity": { "id": "$pageview", "type": "events" }, "total_intervals": 7, "selected_interval": 0, }), self.team, ) self.assertEqual(len(result["detail"]), 1) self.assertEqual(result["detail"][0]["person"]["id"], p3.pk) self.assertEqual(result["detail"][0]["appearances"], [1, 1, 0, 1, 1, 1, 0])
def test_retention_people_basic(self): person1 = person_factory(team_id=self.team.pk, distinct_ids=["person1", "alias1"]) person2 = person_factory(team_id=self.team.pk, distinct_ids=["person2"]) self._create_events( [ ("person1", self._date(0)), ("person1", self._date(1)), ("person1", self._date(2)), ("person1", self._date(5)), ("alias1", self._date(5, 9)), ("person1", self._date(6)), ("person2", self._date(1)), ("person2", self._date(2)), ("person2", self._date(3)), ("person2", self._date(6)), ] ) # even if set to hour 6 it should default to beginning of day and include all pageviews above result = retention().actors( RetentionFilter(data={"date_to": self._date(10, hour=6), "selected_interval": 2}), self.team ) self.assertEqual(len(result), 1) self.assertTrue(result[0]["id"] == person1.pk or result[0]["id"], person1.uuid)
def test_retention_people_in_perieod_first_time(self): p1, p2, p3, p4 = self._create_first_time_retention_events() # even if set to hour 6 it should default to beginning of day and include all pageviews above target_entity = json.dumps({ "id": "$user_signed_up", "type": TREND_FILTER_TYPE_EVENTS }) result1 = retention().actors_in_period( RetentionFilter( data={ "date_to": self._date(10, hour=6), RETENTION_TYPE: RETENTION_FIRST_TIME, "target_entity": target_entity, "returning_entity": { "id": "$pageview", "type": "events" }, "selected_interval": 0, }), self.team, ) self.assertEqual(len(result1), 1) self.assertTrue(result1[0]["person"]["id"] == p3.pk or result1[0]["person"]["id"] == p3.uuid) self.assertEqual(result1[0]["appearances"], [1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0])
def test_day_interval(self): person1 = person_factory(team_id=self.team.pk, distinct_ids=["person1", "alias1"]) person2 = person_factory(team_id=self.team.pk, distinct_ids=["person2"]) self._create_events([ ("person1", self._date(0)), ("person1", self._date(1)), ("person1", self._date(2)), ("person1", self._date(5)), ("alias1", self._date(5, 9)), ("person1", self._date(6)), ("person2", self._date(1)), ("person2", self._date(2)), ("person2", self._date(3)), ("person2", self._date(6)), ]) # even if set to hour 6 it should default to beginning of day and include all pageviews above result = retention().run( RetentionFilter(data={"date_to": self._date(10, hour=6)}), self.team) self.assertEqual(len(result), 11) self.assertEqual( self.pluck(result, "label"), [ "Day 0", "Day 1", "Day 2", "Day 3", "Day 4", "Day 5", "Day 6", "Day 7", "Day 8", "Day 9", "Day 10", ], ) self.assertEqual(result[0]["date"], datetime(2020, 6, 10, 0, tzinfo=pytz.UTC)) self.assertEqual( self.pluck(result, "values", "count"), [ [1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0], [2, 2, 1, 0, 1, 2, 0, 0, 0, 0], [2, 1, 0, 1, 2, 0, 0, 0, 0], [1, 0, 0, 1, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0], [1, 1, 0, 0, 0, 0], [2, 0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0], [0, 0], [0], ], )
def calculate_retention(self, request: request.Request) -> Dict[str, Any]: team = self.team data = {} if not request.GET.get("date_from"): data.update({"date_from": "-11d"}) filter = RetentionFilter(data=data, request=request) result = retention.Retention().run(filter, team) return {"result": result}
def test_retention_graph(self): person1 = person_factory(team_id=self.team.pk, distinct_ids=["person1", "alias1"]) person2 = person_factory(team_id=self.team.pk, distinct_ids=["person2"]) self._create_events([ ("person1", self._date(0)), ("person1", self._date(1)), ("person1", self._date(2)), ("person1", self._date(5)), ("alias1", self._date(5, 9)), ("person1", self._date(6)), ("person2", self._date(0)), ("person2", self._date(1)), ("person2", self._date(2)), ("person2", self._date(3)), ("person2", self._date(6)), ]) result = retention().run( RetentionFilter(data={ "date_to": self._date(10, hour=6), "display": TRENDS_LINEAR }), self.team) self.assertEqual( result[0]["count"], 2, ) self.assertEqual( result[0]["labels"], [ "Day 0", "Day 1", "Day 2", "Day 3", "Day 4", "Day 5", "Day 6", "Day 7", "Day 8", "Day 9", "Day 10" ], ) self.assertEqual( result[0]["days"], [ "2020-06-10", "2020-06-11", "2020-06-12", "2020-06-13", "2020-06-14", "2020-06-15", "2020-06-16", "2020-06-17", "2020-06-18", "2020-06-19", "2020-06-20", ], ) self.assertEqual( result[0]["data"], [ 100.0, 100.0, 100.0, 50.0, 0.0, 50.0, 100.0, 0.0, 0.0, 0.0, 0.0 ], )
def calculate_retention(self, request: request.Request) -> Dict[str, Any]: team = self.team data = {} if not request.GET.get("date_from"): data.update({"date_from": "-11d"}) filter = RetentionFilter(data=data, request=request, team=self.team) base_uri = request.build_absolute_uri("/") result = ClickhouseRetention(base_uri=base_uri).run(filter, team) return {"result": result}
def _construct_people_url_for_trend_breakdown_interval( self, filter: RetentionFilter, selected_interval: int, breakdown_values: BreakdownValues, ): params = RetentionFilter({ **filter._data, "breakdown_values": breakdown_values, "selected_interval": selected_interval }).to_params() return f"{self._base_uri}api/person/retention/?{urlencode(params)}"
def test_interval_rounding(self): Person.objects.create( team=self.team, distinct_ids=["person1", "alias1"], properties={"email": "*****@*****.**"}, ) Person.objects.create( team=self.team, distinct_ids=["person2"], properties={"email": "*****@*****.**"}, ) self._create_events( [ ("person1", self._date(0)), ("person2", self._date(0)), ("person1", self._date(1)), ("person2", self._date(1)), ("person1", self._date(7)), ("person2", self._date(7)), ("person1", self._date(14)), ("person1", self._date(month=1, day=-6)), ("person2", self._date(month=1, day=-6)), ("person2", self._date(month=1, day=1)), ("person1", self._date(month=1, day=1)), ("person2", self._date(month=1, day=15)), ] ) result = retention().run( RetentionFilter( data={"date_to": self._date(14, month=1, hour=0), "period": "Week", "total_intervals": 7} ), self.team, ) self.assertEqual( self.pluck(result, "label"), ["Week 0", "Week 1", "Week 2", "Week 3", "Week 4", "Week 5", "Week 6"], ) self.assertEqual( self.pluck(result, "values", "count"), [[2, 2, 1, 2, 2, 0, 1], [2, 1, 2, 2, 0, 1], [1, 1, 1, 0, 0], [2, 2, 0, 1], [2, 0, 1], [0, 0], [1],], ) self.assertEqual( self.pluck(result, "date"), [ datetime(2020, 6, 7, 0, tzinfo=pytz.UTC), datetime(2020, 6, 14, 0, tzinfo=pytz.UTC), datetime(2020, 6, 21, 0, tzinfo=pytz.UTC), datetime(2020, 6, 28, 0, tzinfo=pytz.UTC), datetime(2020, 7, 5, 0, tzinfo=pytz.UTC), datetime(2020, 7, 12, 0, tzinfo=pytz.UTC), datetime(2020, 7, 19, 0, tzinfo=pytz.UTC), ], )
def test_retention_with_properties(self): person1 = person_factory(team_id=self.team.pk, distinct_ids=["person1", "alias1"]) person2 = person_factory(team_id=self.team.pk, distinct_ids=["person2"]) self._create_events( [ ("person1", self._date(0)), ("person1", self._date(1)), ("person1", self._date(2)), ("person1", self._date(5)), ("alias1", self._date(5, 9)), ("person1", self._date(6)), ("person2", self._date(1)), ("person2", self._date(2)), ("person2", self._date(3)), ("person2", self._date(6)), ] ) result = retention().run( RetentionFilter( data={ "properties": [{"key": "$some_property", "value": "value"}], "date_to": self._date(10, hour=0), } ), self.team, ) self.assertEqual(len(result), 11) self.assertEqual( self.pluck(result, "label"), ["Day 0", "Day 1", "Day 2", "Day 3", "Day 4", "Day 5", "Day 6", "Day 7", "Day 8", "Day 9", "Day 10",], ) self.assertEqual(result[0]["date"], datetime(2020, 6, 10, 0, tzinfo=pytz.UTC)) self.assertEqual( self.pluck(result, "values", "count"), [ [1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0], [1, 0, 1, 0, 0, 0, 0, 0, 0, 0], [1, 0, 0, 1, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0], [0, 0], [0], ], )
def build_returning_event_query(filter: RetentionFilter, team: Team): returning_event_query_templated, returning_event_params = RetentionEventsQuery( filter=filter.with_data({ "breakdowns": [] }), # Avoid pulling in breakdown values from returning event query team_id=team.pk, event_query_type=RetentionQueryType.RETURNING, ).get_query() query = substitute_params(returning_event_query_templated, returning_event_params) return query
def test_filter_test_accounts(self): person1 = person_factory( team_id=self.team.pk, distinct_ids=["person1", "alias1"], properties={"email": "*****@*****.**"} ) person2 = person_factory(team_id=self.team.pk, distinct_ids=["person2"]) self._create_events( [ ("person1", self._date(0)), ("person1", self._date(1)), ("person1", self._date(2)), ("person1", self._date(5)), ("alias1", self._date(5, 9)), ("person1", self._date(6)), ("person2", self._date(1)), ("person2", self._date(2)), ("person2", self._date(3)), ("person2", self._date(6)), ] ) # even if set to hour 6 it should default to beginning of day and include all pageviews above result = retention().run( RetentionFilter(data={"date_to": self._date(10, hour=6), FILTER_TEST_ACCOUNTS: True}, team=self.team), self.team, ) self.assertEqual(len(result), 11) self.assertEqual( self.pluck(result, "label"), ["Day 0", "Day 1", "Day 2", "Day 3", "Day 4", "Day 5", "Day 6", "Day 7", "Day 8", "Day 9", "Day 10",], ) self.assertEqual(result[0]["date"], datetime(2020, 6, 10, 0, tzinfo=pytz.UTC)) self.assertEqual( self.pluck(result, "values", "count"), [ [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 1, 1, 0, 0, 1, 0, 0, 0, 0], [1, 1, 0, 0, 1, 0, 0, 0, 0], [1, 0, 0, 1, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0], [0, 0], [0], ], )
def retention(self, request: request.Request) -> response.Response: team = request.user.team if not team: return response.Response( { "message": "Could not retrieve team", "detail": "Could not validate team associated with user" }, status=400, ) filter = RetentionFilter(request=request) people = self.retention_class().people(filter, team) next_url = paginated_result(people, request, filter.offset) return response.Response({"result": people, "next": next_url})
def _retrieve_people_in_period(self, filter: RetentionFilter, team: Team): new_data = filter._data new_data.update({ "total_intervals": filter.total_intervals - filter.selected_interval }) filter = RetentionFilter(data=new_data) format_fields, params = self._determine_query_params(filter, team) final_query = """ SELECT person_id, count(person_id) appearance_count, array_agg(date) appearances FROM ( SELECT DISTINCT {fields} "events"."person_id" FROM ({event_query}) events LEFT JOIN ({reference_event_query}) first_event_date ON (events.person_id = first_event_date.person_id) WHERE event_date >= first_date AND {target_condition} AND {return_condition} OR ({target_condition} AND event_date = first_date) ) person_appearances WHERE first_date = 0 GROUP BY person_id ORDER BY appearance_count DESC LIMIT %s OFFSET %s """.format(**format_fields) result = [] from posthog.api.person import PersonSerializer with connection.cursor() as cursor: cursor.execute( final_query, params + (100, filter.offset), ) raw_results = cursor.fetchall() people_dict = {} for person in Person.objects.filter( team_id=team.pk, id__in=[val[0] for val in raw_results]): people_dict.update({person.pk: PersonSerializer(person).data}) result = self.process_people_in_period(filter, raw_results, people_dict) return result
def test_retention_multiple_events(self): person_factory(team_id=self.team.pk, distinct_ids=["person1", "alias1"]) person_factory(team_id=self.team.pk, distinct_ids=["person2"]) person_factory(team_id=self.team.pk, distinct_ids=["person3"]) person_factory(team_id=self.team.pk, distinct_ids=["person4"]) first_event = "$some_event" self._create_events( [ ("person1", self._date(0)), ("person1", self._date(1)), ("person1", self._date(2)), ("person1", self._date(3)), ("person2", self._date(0)), ("person2", self._date(1)), ("person2", self._date(2)), ("person2", self._date(3)), ("person3", self._date(5)), ], first_event, ) self._create_events( [("person1", self._date(5)), ("person1", self._date(6)), ("person2", self._date(5)),], "$pageview", ) target_entity = json.dumps({"id": first_event, "type": TREND_FILTER_TYPE_EVENTS}) result = retention().run( RetentionFilter( data={ "date_to": self._date(6, hour=6), "target_entity": target_entity, "returning_entity": {"id": "$pageview", "type": "events"}, "total_intervals": 7, } ), self.team, ) self.assertEqual(len(result), 7) self.assertEqual( self.pluck(result, "label"), ["Day 0", "Day 1", "Day 2", "Day 3", "Day 4", "Day 5", "Day 6"], ) self.assertEqual( self.pluck(result, "values", "count"), [[2, 0, 0, 0, 0, 2, 1], [2, 0, 0, 0, 2, 1], [2, 0, 0, 2, 1], [2, 0, 2, 1], [0, 0, 0], [1, 0], [0]], )
def process_table_result( self, resultset: Dict[Tuple[int, int], Dict[str, Any]], filter: RetentionFilter, ): result = [ { "values": [ resultset.get((first_day, day), {"count": 0, "people": []}) for day in range(filter.total_intervals - first_day) ], "label": "{} {}".format(filter.period, first_day), "date": (filter.date_from + RetentionFilter.determine_time_delta(first_day, filter.period)[0]), } for first_day in range(filter.total_intervals) ] return result
def test_retention_event_action(self): person1 = person_factory(team=self.team, distinct_ids=["person1", "alias1"]) person2 = person_factory(team=self.team, distinct_ids=["person2"]) action = self._create_signup_actions( [ ("person1", self._date(0)), ("person1", self._date(1)), ("person1", self._date(2)), ("person1", self._date(3)), ("person2", self._date(0)), ("person2", self._date(1)), ("person2", self._date(2)), ("person2", self._date(3)), ] ) some_event = "$some_event" self._create_events( [("person1", self._date(3)), ("person2", self._date(5)),], some_event, ) start_entity = json.dumps({"id": action.pk, "type": TREND_FILTER_TYPE_ACTIONS}) result = retention().run( RetentionFilter( data={ "date_to": self._date(6, hour=0), "target_entity": start_entity, "returning_entity": {"id": some_event, "type": TREND_FILTER_TYPE_EVENTS}, "total_intervals": 7, } ), self.team, ) self.assertEqual(len(result), 7) self.assertEqual( self.pluck(result, "label"), ["Day 0", "Day 1", "Day 2", "Day 3", "Day 4", "Day 5", "Day 6"], ) self.assertEqual(result[0]["date"], datetime(2020, 6, 10, 0, tzinfo=pytz.UTC)) self.assertEqual( self.pluck(result, "values", "count"), [[2, 0, 0, 1, 0, 1, 0], [2, 0, 1, 0, 1, 0], [2, 1, 0, 1, 0], [2, 0, 1, 0], [0, 0, 0], [0, 0], [0],], )
def test_retention_with_user_properties(self): person1 = person_factory( team_id=self.team.pk, distinct_ids=["person1", "alias1"], properties={"email": "*****@*****.**"}, ) person2 = person_factory( team_id=self.team.pk, distinct_ids=["person2"], properties={"email": "*****@*****.**"}, ) self._create_events( [ ("person1", self._date(0)), ("person1", self._date(1)), ("person1", self._date(2)), ("person1", self._date(5)), ("alias1", self._date(5, 9)), ("person1", self._date(6)), ("person2", self._date(1)), ("person2", self._date(2)), ("person2", self._date(3)), ("person2", self._date(6)), ] ) result = retention().run( RetentionFilter( data={ "properties": [{"key": "email", "value": "*****@*****.**", "type": "person",}], "date_to": self._date(6, hour=0), "total_intervals": 7, } ), self.team, ) self.assertEqual(len(result), 7) self.assertEqual( self.pluck(result, "label"), ["Day 0", "Day 1", "Day 2", "Day 3", "Day 4", "Day 5", "Day 6"], ) self.assertEqual(result[0]["date"], datetime(2020, 6, 10, 0, tzinfo=pytz.UTC)) self.assertEqual( self.pluck(result, "values", "count"), [[1, 1, 1, 0, 0, 1, 1], [1, 1, 0, 0, 1, 1], [1, 0, 0, 1, 1], [0, 0, 0, 0], [0, 0, 0], [1, 1], [1],], )
def retention(self, request: request.Request) -> response.Response: display = request.GET.get("display", None) team = cast(User, request.user).team if not team: return response.Response( {"message": "Could not retrieve team", "detail": "Could not validate team associated with user"}, status=400, ) filter = RetentionFilter(request=request) if display == TRENDS_TABLE: people = self.retention_class().people_in_period(filter, team) else: people = self.retention_class().people(filter, team) next_url = paginated_result(people, request, filter.offset) return response.Response({"result": people, "next": next_url})
def test_retention_default(self): person1 = person_factory(team_id=self.team.pk, distinct_ids=["person1", "alias1"]) person2 = person_factory(team_id=self.team.pk, distinct_ids=["person2"]) self._create_events([ ("person1", self._date(0)), ("person1", self._date(1)), ("person1", self._date(2)), ("person1", self._date(5)), ("alias1", self._date(5, 9)), ("person1", self._date(6)), ("person2", self._date(1)), ("person2", self._date(2)), ("person2", self._date(3)), ("person2", self._date(6)), ]) result = retention().run(RetentionFilter(data={"dummy": "dummy"}), self.team) self.assertEqual( self.pluck(result, "values", "count"), [ [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0], [0, 0], [0], ], )
def process_table_result( self, resultset: Dict[CohortKey, Dict[str, Any]], filter: RetentionFilter, ): """ Constructs a response for the rest api when there is no breakdown specified We process the non-breakdown case separately from the breakdown case so we can easily maintain compatability from when we didn't have breakdowns. The key difference is that we "zero fill" the cohorts as we want to have a result for each cohort between the specified date range. """ def construct_url(first_day): params = RetentionFilter({ **filter._data, "display": "ActionsTable", "breakdown_values": [first_day] }).to_params() return "/api/person/retention/?" f"{urlencode(params)}" result = [{ "values": [ resultset.get(CohortKey((first_day, ), day), { "count": 0, "people": [] }) for day in range(filter.total_intervals - first_day) ], "label": "{} {}".format(filter.period, first_day), "date": (filter.date_from + RetentionFilter.determine_time_delta( first_day, filter.period)[0]), "people_url": construct_url(first_day), } for first_day in range(filter.total_intervals)] return result
def test_month_interval(self): Person.objects.create( team=self.team, distinct_ids=["person1", "alias1"], properties={"email": "*****@*****.**"}, ) Person.objects.create( team=self.team, distinct_ids=["person2"], properties={"email": "*****@*****.**"}, ) self._create_events([ ("person1", self._date(day=0, month=-5)), ("person2", self._date(day=0, month=-5)), ("person1", self._date(day=0, month=-4)), ("person2", self._date(day=0, month=-4)), ("person1", self._date(day=0, month=-3)), ("person2", self._date(day=0, month=-3)), ("person1", self._date(day=0, month=-1)), ("person1", self._date(day=0, month=0)), ("person2", self._date(day=0, month=0)), ("person2", self._date(day=0, month=1)), ("person1", self._date(day=0, month=3)), ("person2", self._date(day=0, month=5)), ]) filter = RetentionFilter(data={ "date_to": self._date(0, month=5, hour=0), "period": "Month" }) result = retention().run(filter, self.team, total_intervals=11) self.assertEqual( self.pluck(result, "label"), [ "Month 0", "Month 1", "Month 2", "Month 3", "Month 4", "Month 5", "Month 6", "Month 7", "Month 8", "Month 9", "Month 10", ], ) self.assertEqual( self.pluck(result, "values", "count"), [ [2, 2, 2, 0, 1, 2, 1, 0, 1, 0, 1], [2, 2, 0, 1, 2, 1, 0, 1, 0, 1], [2, 0, 1, 2, 1, 0, 1, 0, 1], [0, 0, 0, 0, 0, 0, 0, 0], [1, 1, 0, 0, 1, 0, 0], [2, 1, 0, 1, 0, 1], [1, 0, 0, 0, 1], [0, 0, 0, 0], [1, 0, 0], [0, 0], [1], ], ) self.assertEqual( self.pluck(result, "date"), [ datetime(2020, 1, 10, 0, tzinfo=pytz.UTC), datetime(2020, 2, 10, 0, tzinfo=pytz.UTC), datetime(2020, 3, 10, 0, tzinfo=pytz.UTC), datetime(2020, 4, 10, 0, tzinfo=pytz.UTC), datetime(2020, 5, 10, 0, tzinfo=pytz.UTC), datetime(2020, 6, 10, 0, tzinfo=pytz.UTC), datetime(2020, 7, 10, 0, tzinfo=pytz.UTC), datetime(2020, 8, 10, 0, tzinfo=pytz.UTC), datetime(2020, 9, 10, 0, tzinfo=pytz.UTC), datetime(2020, 10, 10, 0, tzinfo=pytz.UTC), datetime(2020, 11, 10, 0, tzinfo=pytz.UTC), ], )
def _retrieve_people_in_period(self, filter: RetentionFilter, team: Team): period = filter.period is_first_time_retention = filter.retention_type == RETENTION_FIRST_TIME trunc_func = get_trunc_func_ch(period) prop_filters, prop_filter_params = parse_prop_clauses( filter.properties, team.pk) target_query, target_params = self._get_condition(filter.target_entity, table="e") target_query_formatted = "AND {target_query}".format( target_query=target_query) return_query, return_params = self._get_condition( filter.returning_entity, table="e", prepend="returning") return_query_formatted = "AND {return_query}".format( return_query=return_query) first_event_sql = (REFERENCE_EVENT_UNIQUE_PEOPLE_PER_PERIOD_SQL if is_first_time_retention else REFERENCE_EVENT_PEOPLE_PER_PERIOD_SQL).format( target_query=target_query_formatted, filters=prop_filters, trunc_func=trunc_func, ) default_event_query = ( DEFAULT_REFERENCE_EVENT_UNIQUE_PEOPLE_PER_PERIOD_SQL if is_first_time_retention else DEFAULT_REFERENCE_EVENT_PEOPLE_PER_PERIOD_SQL).format( target_query=target_query_formatted, filters=prop_filters, trunc_func=trunc_func, ) date_from = filter.date_from + filter.selected_interval * filter.period_increment date_to = filter.date_to filter = filter.with_data({ "total_intervals": filter.total_intervals - filter.selected_interval }) query_result = sync_execute( RETENTION_PEOPLE_PER_PERIOD_SQL.format( returning_query=return_query_formatted, filters=prop_filters, first_event_sql=first_event_sql, first_event_default_sql=default_event_query, trunc_func=trunc_func, ), { "team_id": team.pk, "start_date": date_from.strftime("%Y-%m-%d{}".format( " %H:%M:%S" if filter.period == "Hour" else " 00:00:00")), "end_date": date_to.strftime("%Y-%m-%d{}".format( " %H:%M:%S" if filter.period == "Hour" else " 00:00:00")), "offset": filter.offset, "limit": 100, "period": period, **target_params, **return_params, **prop_filter_params, }, ) people_dict = {} from posthog.api.person import PersonSerializer people = get_persons_by_uuids(team_id=team.pk, uuids=[val[0] for val in query_result]) people = people.prefetch_related( Prefetch("persondistinctid_set", to_attr="distinct_ids_cache")) for person in people: people_dict.update( {str(person.uuid): PersonSerializer(person).data}) result = self.process_people_in_period(filter, query_result, people_dict) return result
def _determine_query_params(self, filter: RetentionFilter, team: Team): period = filter.period is_first_time_retention = filter.retention_type == RETENTION_FIRST_TIME events: QuerySet = QuerySet() entity_condition, entity_condition_strigified = self.get_entity_condition( filter.target_entity, "first_event_date" ) returning_condition, returning_condition_stringified = self.get_entity_condition( filter.returning_entity, "events" ) events = Event.objects.filter(team_id=team.pk).add_person_id(team.pk).annotate(event_date=F("timestamp")) trunc, fields = self._get_trunc_func("timestamp", period) if is_first_time_retention: filtered_events = events.filter(properties_to_Q(filter.properties, team_id=team.pk)) first_date = ( filtered_events.filter(entity_condition) .values("person_id", "event", "action") .annotate(first_date=Min(trunc)) .filter(filter.custom_date_filter_Q("first_date")) .distinct() ) final_query = ( filtered_events.filter(filter.date_filter_Q) .filter(returning_condition) .values_list("person_id", "event_date", "event", "action") .union(first_date.values_list("first_date", "person_id", "event", "action")) ) else: filtered_events = events.filter(filter.date_filter_Q).filter( properties_to_Q(filter.properties, team_id=team.pk) ) first_date = ( filtered_events.filter(entity_condition) .annotate(first_date=trunc) .values("first_date", "person_id", "event", "action") .distinct() ) final_query = ( filtered_events.filter(returning_condition) .values_list("person_id", "event_date", "event", "action") .union(first_date.values_list("first_date", "person_id", "event", "action")) ) start_params = ( (filter.date_from, filter.date_from) if period == "Month" or period == "Hour" else (filter.date_from,) ) event_query, events_query_params = final_query.query.sql_with_params() reference_event_query, first_date_params = first_date.query.sql_with_params() event_params = (filter.target_entity.id, filter.returning_entity.id, filter.target_entity.id) return ( { "event_query": event_query, "reference_event_query": reference_event_query, "fields": fields, "return_condition": returning_condition_stringified, "target_condition": entity_condition_strigified, }, start_params + events_query_params + first_date_params + event_params, )
def test_hour_interval(self): Person.objects.create( team=self.team, distinct_ids=["person1", "alias1"], properties={"email": "*****@*****.**"}, ) Person.objects.create( team=self.team, distinct_ids=["person2"], properties={"email": "*****@*****.**"}, ) self._create_events([ ("person1", self._date(day=0, hour=6)), ("person2", self._date(day=0, hour=6)), ("person1", self._date(day=0, hour=7)), ("person2", self._date(day=0, hour=7)), ("person1", self._date(day=0, hour=8)), ("person2", self._date(day=0, hour=8)), ("person1", self._date(day=0, hour=10)), ("person1", self._date(day=0, hour=11)), ("person2", self._date(day=0, hour=11)), ("person2", self._date(day=0, hour=12)), ("person1", self._date(day=0, hour=14)), ("person2", self._date(day=0, hour=16)), ]) filter = RetentionFilter(data={ "date_to": self._date(0, hour=16), "period": "Hour" }) result = retention().run(filter, self.team, total_intervals=11) self.assertEqual( self.pluck(result, "label"), [ "Hour 0", "Hour 1", "Hour 2", "Hour 3", "Hour 4", "Hour 5", "Hour 6", "Hour 7", "Hour 8", "Hour 9", "Hour 10", ], ) self.assertEqual( self.pluck(result, "values", "count"), [ [2, 2, 2, 0, 1, 2, 1, 0, 1, 0, 1], [2, 2, 0, 1, 2, 1, 0, 1, 0, 1], [2, 0, 1, 2, 1, 0, 1, 0, 1], [0, 0, 0, 0, 0, 0, 0, 0], [1, 1, 0, 0, 1, 0, 0], [2, 1, 0, 1, 0, 1], [1, 0, 0, 0, 1], [0, 0, 0, 0], [1, 0, 0], [0, 0], [1], ], ) self.assertEqual( self.pluck(result, "date"), [ datetime(2020, 6, 10, 6, tzinfo=pytz.UTC), datetime(2020, 6, 10, 7, tzinfo=pytz.UTC), datetime(2020, 6, 10, 8, tzinfo=pytz.UTC), datetime(2020, 6, 10, 9, tzinfo=pytz.UTC), datetime(2020, 6, 10, 10, tzinfo=pytz.UTC), datetime(2020, 6, 10, 11, tzinfo=pytz.UTC), datetime(2020, 6, 10, 12, tzinfo=pytz.UTC), datetime(2020, 6, 10, 13, tzinfo=pytz.UTC), datetime(2020, 6, 10, 14, tzinfo=pytz.UTC), datetime(2020, 6, 10, 15, tzinfo=pytz.UTC), datetime(2020, 6, 10, 16, tzinfo=pytz.UTC), ], )