def test_groups_aggregating(self): self._create_groups_and_events() filter = RetentionFilter( data={ "date_to": self._date(10, month=1, hour=0), "period": "Week", "total_intervals": 7, "aggregation_group_type_index": 0, }, team=self.team, ) result = ClickhouseRetention().run(filter, self.team) self.assertEqual( self.pluck(result, "values", "count"), [ [2, 2, 1, 2, 2, 0, 1], [2, 1, 2, 2, 0, 1], [1, 1, 1, 0, 0], [2, 2, 0, 1], [2, 0, 1], [0, 0], [1], ], ) actor_result = ClickhouseRetention().actors( filter.with_data({"selected_interval": 0}), self.team) assert [actor["id"] for actor in actor_result] == ["org:5", "org:6"] filter = RetentionFilter( data={ "date_to": self._date(10, month=1, hour=0), "period": "Week", "total_intervals": 7, "aggregation_group_type_index": 1, }, team=self.team, ) result = ClickhouseRetention().run(filter, self.team) self.assertEqual( self.pluck(result, "values", "count"), [ [1, 0, 0, 1, 0, 0, 1], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [1, 0, 0, 1], [0, 0, 0], [0, 0], [1], ], )
def test_simplify_when_aggregating_by_group(self): filter = RetentionFilter(data={"aggregation_group_type_index": 0}) self.assertEqual( filter.simplify(self.team).properties_to_dict(), { "properties": { "type": "AND", "values": [{"key": "$group_0", "operator": "is_not", "value": "", "type": "event"}], } }, )
def test_groups_aggregating(self): self._create_groups_and_events() filter = RetentionFilter( data={ "date_to": self._date(10, month=1, hour=0), "period": "Week", "total_intervals": 7, "aggregation_group_type_index": 0, }, team=self.team, ) result = ClickhouseRetention().run(filter, self.team) self.assertEqual( self.pluck(result, "values", "count"), [ [2, 2, 1, 2, 2, 0, 1], [2, 1, 2, 2, 0, 1], [1, 1, 1, 0, 0], [2, 2, 0, 1], [2, 0, 1], [0, 0], [1], ], ) filter = RetentionFilter( data={ "date_to": self._date(10, month=1, hour=0), "period": "Week", "total_intervals": 7, "aggregation_group_type_index": 1, }, team=self.team, ) result = ClickhouseRetention().run(filter, self.team) self.assertEqual( self.pluck(result, "values", "count"), [ [1, 0, 0, 1, 0, 0, 1], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [1, 0, 0, 1], [0, 0, 0], [0, 0], [1], ], )
def test_refresh_dashboard_cache_types( self, patch_update_cache_item: MagicMock, _patch_apply_async: MagicMock ) -> None: self._test_refresh_dashboard_cache_types( RetentionFilter( data={"insight": "RETENTION", "events": [{"id": "cache this"}], "date_to": now().isoformat()} ), CacheType.RETENTION, patch_update_cache_item, ) self._test_refresh_dashboard_cache_types( Filter(data={"insight": "TRENDS", "events": [{"id": "$pageview"}]}), CacheType.TRENDS, patch_update_cache_item, ) self._test_refresh_dashboard_cache_types( StickinessFilter( data={ "insight": "TRENDS", "shown_as": "Stickiness", "date_from": "2020-01-01", "events": [{"id": "watched movie"}], ENTITY_TYPE: "events", ENTITY_ID: "watched movie", }, team=self.team, get_earliest_timestamp=Event.objects.earliest_timestamp, ), CacheType.STICKINESS, patch_update_cache_item, )
def retention(self, request: Request, *args: Any, **kwargs: Any) -> Response: team = self.team filter = RetentionFilter(request=request) result = ClickhouseRetention().run(filter, team) return Response({"data": result})
def track_retention_filter_by_person_property_materialized(self): filter = RetentionFilter( data={ "insight": "RETENTION", "target_event": { "id": "$pageview" }, "returning_event": { "id": "$pageview" }, "total_intervals": 14, "retention_type": "retention_first_time", "period": "Week", "properties": [{ "key": "email", "operator": "icontains", "value": ".com", "type": "person" }], **DATE_RANGE, }, team=self.team, ) ClickhouseRetention().run(filter, self.team)
def get_filter(team, data: dict = {}, request: Optional[HttpRequest] = None): from posthog.models.filters.filter import Filter from posthog.models.filters.retention_filter import RetentionFilter from posthog.models.filters.sessions_filter import SessionsFilter from posthog.models.filters.stickiness_filter import StickinessFilter insight = data.get("insight") if not insight and request: insight = request.GET.get("insight") if insight == INSIGHT_RETENTION: return RetentionFilter(data={ **data, "insight": INSIGHT_RETENTION }, request=request) elif insight == INSIGHT_SESSIONS: return SessionsFilter(data={ **data, "insight": INSIGHT_SESSIONS }, request=request) elif insight == INSIGHT_STICKINESS or (insight == INSIGHT_TRENDS and data.get("shown_as") == "Stickiness"): return StickinessFilter(data=data, request=request, team=team, get_earliest_timestamp=earliest_timestamp_func) elif insight == INSIGHT_PATHS: return PathFilter(data={ **data, "insight": INSIGHT_PATHS }, request=request) return Filter(data=data, request=request)
def calculate_retention(self, request: Request) -> Dict[str, Any]: team = self.team data = {} if not request.GET.get("date_from"): data.update({"date_from": "-11d"}) filter = RetentionFilter(data=data, request=request, team=self.team) result = ClickhouseRetention().run(filter, team) return {"result": result}
def _construct_people_url_for_trend_breakdown_interval( self, filter: RetentionFilter, selected_interval: int, breakdown_values: BreakdownValues, ): params = RetentionFilter({ **filter._data, "breakdown_values": breakdown_values, "selected_interval": selected_interval }).to_params() return f"{self._base_uri}api/person/retention/?{urlencode(params)}"
def test_entities(self): filter = RetentionFilter( data={ "target_entity": { "id": "$autocapture", "type": "events" }, "returning_entity": '{"id": "signup", "type": "events"}', }).to_dict() self.assertEqual(filter["target_entity"]["id"], "$autocapture") self.assertEqual(filter["returning_entity"]["id"], "signup")
def test_groups_in_period(self): self._create_groups_and_events() filter = RetentionFilter( data={ "date_to": self._date(10, month=1, hour=0), "period": "Week", "total_intervals": 7, "aggregation_group_type_index": 0, }, team=self.team, ) actor_result = ClickhouseRetention().actors_in_period( filter.with_data({"selected_interval": 0}), self.team) self.assertTrue(actor_result[0]["person"]["id"] == "org:6") self.assertEqual(actor_result[0]["appearances"], [1, 1, 0, 1, 1, 0, 1]) self.assertTrue(actor_result[1]["person"]["id"] == "org:5") self.assertEqual(actor_result[1]["appearances"], [1, 1, 1, 1, 1, 0, 0])
def track_retention(self): filter = RetentionFilter( data={ "insight": "RETENTION", "target_event": {"id": "$pageview"}, "returning_event": {"id": "$pageview"}, "total_intervals": 14, "retention_type": "retention_first_time", "period": "Week", **DATE_RANGE, }, team=self.team, ) ClickhouseRetention().run(filter, self.team)
def build_returning_event_query( filter: RetentionFilter, team: Team, aggregate_users_by_distinct_id: Optional[bool] = None): returning_event_query_templated, returning_event_params = RetentionEventsQuery( filter=filter.with_data({ "breakdowns": [] }), # Avoid pulling in breakdown values from returning event query team=team, event_query_type=RetentionQueryType.RETURNING, aggregate_users_by_distinct_id=aggregate_users_by_distinct_id, ).get_query() query = substitute_params(returning_event_query_templated, returning_event_params) return query
def get_filter(team, data: dict = {}, request: Optional[HttpRequest] = None) -> Filter: from posthog.models.filters.retention_filter import RetentionFilter from posthog.models.filters.sessions_filter import SessionsFilter from posthog.models.filters.stickiness_filter import StickinessFilter insight = data.get("insight") if not insight and request: insight = request.GET.get("insight") if insight == INSIGHT_RETENTION: return RetentionFilter(data={ **data, "insight": INSIGHT_RETENTION }, request=request) elif insight == INSIGHT_TRENDS and data.get("shown_as") == "Stickiness": return StickinessFilter(data=data, request=request, team=team) return Filter(data=data, request=request)
def retention(self, request: request.Request) -> response.Response: display = request.GET.get("display", None) team = cast(User, request.user).team if not team: return response.Response( {"message": "Could not retrieve team", "detail": "Could not validate team associated with user"}, status=400, ) filter = RetentionFilter(request=request, team=team) base_uri = request.build_absolute_uri("/") if display == TRENDS_TABLE: people = self.retention_class(base_uri=base_uri).actors_in_period(filter, team) else: people = self.retention_class(base_uri=base_uri).actors(filter, team) next_url = paginated_result(people, request, filter.offset) return response.Response({"result": people, "next": next_url})
def track_retention_with_person_breakdown(self): filter = RetentionFilter( data={ "insight": "RETENTION", "target_event": {"id": "$pageview"}, "returning_event": {"id": "$pageview"}, "total_intervals": 14, "retention_type": "retention_first_time", "breakdown_type": "person", "breakdowns": [ {"type": "person", "property": "$browser"}, {"type": "person", "property": "$browser_version"}, ], "period": "Week", **DATE_RANGE, }, team=self.team, ) with no_materialized_columns(): ClickhouseRetention().run(filter, self.team)
def process_table_result( self, resultset: Dict[CohortKey, Dict[str, Any]], filter: RetentionFilter, ): """ Constructs a response for the rest api when there is no breakdown specified We process the non-breakdown case separately from the breakdown case so we can easily maintain compatability from when we didn't have breakdowns. The key difference is that we "zero fill" the cohorts as we want to have a result for each cohort between the specified date range. """ def construct_url(first_day): params = RetentionFilter({ **filter._data, "display": "ActionsTable", "breakdown_values": [first_day] }).to_params() return "/api/person/retention/?" f"{urlencode(params)}" result = [{ "values": [ resultset.get(CohortKey((first_day, ), day), { "count": 0, "people": [] }) for day in range(filter.total_intervals - first_day) ], "label": "{} {}".format(filter.period, first_day), "date": (filter.date_from + RetentionFilter.determine_time_delta( first_day, filter.period)[0]), "people_url": construct_url(first_day), } for first_day in range(filter.total_intervals)] return result
def test_fill_date_from_and_date_to(self): with freeze_time("2020-10-01T12:00:00Z"): filter = RetentionFilter(data={}) self.assertEqual(filter.date_from.isoformat(), "2020-09-21T00:00:00+00:00") self.assertEqual(filter.date_to.isoformat(), "2020-10-02T00:00:00+00:00") # Make sure these dates aren't present in final filter to ensure rolling retention self.assertEqual( filter.to_dict(), { "display": "RetentionTable", "insight": "RETENTION", "period": "Day", "retention_type": "retention_recurring", "total_intervals": 11, }, ) with freeze_time("2020-10-01T12:00:00Z"): filter = RetentionFilter(data={"date_to": "2020-08-01"}) self.assertEqual(filter.date_from.isoformat(), "2020-07-22T00:00:00+00:00") self.assertEqual(filter.date_to.isoformat(), "2020-08-02T00:00:00+00:00") # Make sure these dates aren't present in final filter to ensure rolling retention self.assertEqual( filter.to_dict(), { "date_to": "2020-08-02T00:00:00+00:00", "display": "RetentionTable", "insight": "RETENTION", "period": "Day", "retention_type": "retention_recurring", "total_intervals": 11, }, )
def test_groups_filtering(self): self._create_groups_and_events() result = ClickhouseRetention().run( RetentionFilter( data={ "date_to": self._date(10, month=1, hour=0), "period": "Week", "total_intervals": 7, "properties": [{ "key": "industry", "value": "technology", "type": "group", "group_type_index": 0 }], }, team=self.team, ), self.team, ) self.assertEqual( self.pluck(result, "values", "count"), [ [1, 1, 0, 1, 1, 0, 1], [1, 0, 1, 1, 0, 1], [0, 0, 0, 0, 0], [1, 1, 0, 1], [1, 0, 1], [0, 0], [1], ], ) result = ClickhouseRetention().run( RetentionFilter( data={ "date_to": self._date(10, month=1, hour=0), "period": "Week", "total_intervals": 7, "properties": [{ "key": "industry", "value": "", "type": "group", "group_type_index": 0, "operator": "is_set" }], }, team=self.team, ), self.team, ) self.assertEqual( self.pluck(result, "values", "count"), [ [2, 2, 1, 2, 2, 0, 1], [2, 1, 2, 2, 0, 1], [1, 1, 1, 0, 0], [2, 2, 0, 1], [2, 0, 1], [0, 0], [1], ], )
def construct_url(first_day): params = RetentionFilter({ **filter._data, "display": "ActionsTable", "breakdown_values": [first_day] }).to_params() return "/api/person/retention/?" f"{urlencode(params)}"
def test_fill_date_from_and_date_to(self): with freeze_time("2020-10-01T12:00:00Z"): filter = RetentionFilter(data={}) self.assertEqual(filter.date_from.isoformat(), "2020-09-21T00:00:00+00:00") self.assertEqual(filter.date_to.isoformat(), "2020-10-02T00:00:00+00:00") # Make sure these dates aren't present in final filter to ensure rolling retention self.assertEqual( filter.to_dict(), { "display": "RetentionTable", "insight": "RETENTION", "period": "Day", "retention_type": "retention_recurring", "total_intervals": 11, "returning_entity": { "id": "$pageview", "math": None, "math_property": None, "math_group_type_index": None, "name": "$pageview", "custom_name": None, "order": None, "properties": {}, "type": "events", }, "target_entity": { "id": "$pageview", "math": None, "math_property": None, "math_group_type_index": None, "name": "$pageview", "custom_name": None, "order": None, "properties": {}, "type": "events", }, }, ) with freeze_time("2020-10-01T12:00:00Z"): filter = RetentionFilter(data={"date_to": "2020-08-01"}) self.assertEqual(filter.date_from.isoformat(), "2020-07-22T00:00:00+00:00") self.assertEqual(filter.date_to.isoformat(), "2020-08-02T00:00:00+00:00") # Make sure these dates aren't present in final filter to ensure rolling retention # The date_to below is the base value that's provided when the object was created (_date_to). # It doesn't match the date_to above because the retention filter will transform date_to to include one period ahead self.assertEqual( filter.to_dict(), { "date_to": "2020-08-01", "display": "RetentionTable", "insight": "RETENTION", "period": "Day", "retention_type": "retention_recurring", "total_intervals": 11, "returning_entity": { "id": "$pageview", "math": None, "math_property": None, "math_group_type_index": None, "name": "$pageview", "custom_name": None, "order": None, "properties": {}, "type": "events", }, "target_entity": { "id": "$pageview", "math": None, "math_property": None, "math_group_type_index": None, "name": "$pageview", "custom_name": None, "order": None, "properties": {}, "type": "events", }, }, )