def test_filter_properties(self): earliest_timestamp_func = lambda team_id: Event.objects.earliest_timestamp(team_id) filter = StickinessFilter( data={ "interval": "month", "date_from": "2020-01-01T20:00:00Z", "date_to": "2020-02-01T20:00:00Z", "events": [{"id": "$pageview"}], "compare": True, }, team=self.team, get_earliest_timestamp=earliest_timestamp_func, ) self.assertEqual( filter.to_dict(), { "compare": True, "date_from": "2020-01-01T20:00:00Z", "date_to": "2020-02-01T20:00:00Z", "events": [ { "id": "$pageview", "type": "events", "order": None, "name": "$pageview", "math": None, "math_property": None, "properties": [], } ], "insight": "TRENDS", "interval": "month", }, )
def test_filter_properties(self): filter = StickinessFilter( data={ "interval": "month", "date_from": "2020-01-01T20:00:00Z", "date_to": "2020-02-01T20:00:00Z", "events": [{"id": "$pageview", "custom_name": "Custom event"}], "compare": True, }, team=self.team, get_earliest_timestamp=get_earliest_timestamp, ) self.assertEqual( filter.to_dict(), { "compare": True, "date_from": "2020-01-01T20:00:00Z", "date_to": "2020-02-01T20:00:00Z", "events": [ { "id": "$pageview", "type": "events", "order": None, "name": "$pageview", "custom_name": "Custom event", "math": None, "math_property": None, "math_group_type_index": None, "properties": {}, } ], "insight": "TRENDS", "interval": "month", }, )
def test_stickiness_regression(self, patch_update_cache_item: MagicMock, patch_apply_async: MagicMock) -> None: # We moved Stickiness from being a "shown_as" item to its own insight # This move caused issues hence a regression test filter_stickiness = StickinessFilter( data={ "events": [{"id": "$pageview"}], "properties": [{"key": "$browser", "value": "Mac OS X"}], "date_from": "2012-01-10", "date_to": "2012-01-15", "insight": INSIGHT_STICKINESS, "shown_as": "Stickiness", }, team=self.team, get_earliest_timestamp=Event.objects.earliest_timestamp, ) filter = Filter( data={ "events": [{"id": "$pageview"}], "properties": [{"key": "$browser", "value": "Mac OS X"}], "date_from": "2012-01-10", "date_to": "2012-01-15", } ) shared_dashboard = Dashboard.objects.create(team=self.team, is_shared=True) DashboardItem.objects.create(dashboard=shared_dashboard, filters=filter_stickiness.to_dict(), team=self.team) DashboardItem.objects.create(dashboard=shared_dashboard, filters=filter.to_dict(), team=self.team) item_stickiness_key = generate_cache_key(filter_stickiness.toJSON() + "_" + str(self.team.pk)) item_key = generate_cache_key(filter.toJSON() + "_" + str(self.team.pk)) update_cached_items() for call_item in patch_update_cache_item.call_args_list: update_cache_item(*call_item[0]) self.assertEqual( get_safe_cache(item_stickiness_key)["result"][0]["labels"], ["1 day", "2 days", "3 days", "4 days", "5 days", "6 days"], ) self.assertEqual( get_safe_cache(item_key)["result"][0]["labels"], [ "Tue. 10 January", "Wed. 11 January", "Thu. 12 January", "Fri. 13 January", "Sat. 14 January", "Sun. 15 January", ], )
def test_timezones(self, patch_feature_enabled): journeys_for( { "person1": [ { "event": "$pageview", "timestamp": datetime(2021, 5, 2, 1), }, # this time will fall on 5/1 in US Pacific {"event": "$pageview", "timestamp": datetime(2021, 5, 2, 9)}, {"event": "$pageview", "timestamp": datetime(2021, 5, 4, 3)}, ], }, self.team, ) data = ClickhouseStickiness().run( filter=StickinessFilter( data={ "shown_as": "Stickiness", "date_from": "2021-05-01", "date_to": "2021-05-15", "events": [{"id": "$pageview"}], }, team=self.team, ), team=self.team, ) self.assertEqual(data[0]["days"], [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) self.assertEqual(data[0]["data"], [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) self.team.timezone = "US/Pacific" self.team.save() data_pacific = ClickhouseStickiness().run( filter=StickinessFilter( data={ "shown_as": "Stickiness", "date_from": "2021-05-01", "date_to": "2021-05-15", "events": [{"id": "$pageview"}], }, team=self.team, ), team=self.team, ) self.assertEqual(data_pacific[0]["days"], [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) self.assertEqual(data_pacific[0]["data"], [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
def _retrieve_people(self, target_entity: Entity, filter: StickinessFilter, team: Team) -> ReturnDict: from posthog.api.person import PersonSerializer if target_entity.type == TREND_FILTER_TYPE_EVENTS: filtered_events = base.process_entity_for_events(target_entity, team_id=team.pk, order_by=None).filter( base.filter_events(team.pk, filter, target_entity) ) elif target_entity.type == TREND_FILTER_TYPE_ACTIONS: actions = Action.objects.filter(deleted=False, team=team) actions = actions.prefetch_related(Prefetch("steps", queryset=ActionStep.objects.order_by("id"))) try: actions.get(pk=target_entity.id) except Action.DoesNotExist: return PersonSerializer([], many=True).data filtered_events = base.process_entity_for_events(target_entity, team_id=team.pk, order_by=None).filter( base.filter_events(team.pk, filter, target_entity) ) else: raise ValueError("target entity must be action or event") events = ( filtered_events.values("person_id") .annotate(day_count=Count(filter.trunc_func("timestamp"), distinct=True)) .filter(day_count=filter.selected_interval) ) people = Person.objects.filter( team=team, id__in=[p["person_id"] for p in events[filter.offset : filter.offset + 100]], ) people = people.prefetch_related(Prefetch("persondistinctid_set", to_attr="distinct_ids_cache")) return PersonSerializer(people, many=True).data
def test_stickiness_prop_filter(self): self._create_multiple_people() with freeze_time("2020-01-08T13:01:01Z"): filter = StickinessFilter( data={ "shown_as": "Stickiness", "date_from": "2020-01-01", "date_to": "2020-01-08", "events": [{ "id": "watched movie" }], "properties": [{ "key": "$browser", "value": "Chrome" }], }, team=self.team, get_earliest_timestamp=get_earliest_timestamp, ) response = stickiness().run(filter, self.team) self.assertEqual(response[0]["count"], 4) self.assertEqual(response[0]["labels"][0], "1 day") self.assertEqual(response[0]["data"][0], 2) self.assertEqual(response[0]["labels"][1], "2 days") self.assertEqual(response[0]["data"][1], 1) self.assertEqual(response[0]["labels"][2], "3 days") self.assertEqual(response[0]["data"][2], 1) self.assertEqual(response[0]["labels"][6], "7 days") self.assertEqual(response[0]["data"][6], 0)
def test_stickiness_people_endpoint(self): person1, _, _, person4 = self._create_multiple_people() watched_movie = action_factory(team=self.team, name="watch movie action", event_name="watched movie") filter = StickinessFilter( data={ "shown_as": "Stickiness", "stickiness_days": 1, "date_from": "2020-01-01", "date_to": "2020-01-08", }, team=self.team, get_earliest_timestamp=get_earliest_timestamp, ) target_entity = Entity({"id": watched_movie.id, "type": "actions"}) factory = APIRequestFactory() request = factory.get("/person/stickiness") people = stickiness().people(target_entity, filter, self.team, request) all_people_ids = [str(person["id"]) for person in people] self.assertListEqual(sorted(all_people_ids), sorted([str(person1.pk), str(person4.pk)]))
def test_stickiness_people_with_entity_filter(self): person1, _, _, _ = self._create_multiple_people() filter = StickinessFilter( data={ "shown_as": "Stickiness", "stickiness_days": 1, "date_from": "2020-01-01", "date_to": "2020-01-08", }, team=self.team, get_earliest_timestamp=get_earliest_timestamp, ) target_entity = Entity({ "id": "watched movie", "type": "events", "properties": [{ "key": "$browser", "value": "Chrome" }] }) factory = APIRequestFactory() request = factory.get("/person/stickiness") people = stickiness().people(target_entity, filter, self.team, request) self.assertEqual(len(people), 1) self.assertEqual(people[0]["id"], person1.id)
def stickiness(self, request: request.Request) -> response.Response: team = cast(User, request.user).team if not team: return response.Response( { "message": "Could not retrieve team", "detail": "Could not validate team associated with user" }, status=400, ) earliest_timestamp_func = lambda team_id: Event.objects.earliest_timestamp( team_id) filter = StickinessFilter( request=request, team=team, get_earliest_timestamp=earliest_timestamp_func) target_entity = get_target_entity(request) people = self.stickiness_class().people(target_entity, filter, team, request) next_url = paginated_result(people, request, filter.offset) return response.Response({ "results": [{ "people": people, "count": len(people) }], "next": next_url })
def test_filter_test_accounts(self): self._create_multiple_people() p1 = person_factory(team_id=self.team.id, distinct_ids=["ph"], properties={"email": "*****@*****.**"}) event_factory( team=self.team, event="watched movie", distinct_id="ph", timestamp=datetime.fromisoformat("2020-01-01T12:00:00.000000").replace(tzinfo=timezone.utc).isoformat(), properties={"$browser": "Chrome"}, ) with freeze_time("2020-01-08T13:01:01Z"): filter = StickinessFilter( data={ "shown_as": "Stickiness", "date_from": "2020-01-01", "date_to": "2020-01-08", "events": [{"id": "watched movie"}], "filter_test_accounts": "true", }, team=self.team, get_earliest_timestamp=get_earliest_timestamp, ) response = stickiness().run(filter, self.team) self.assertEqual(response[0]["count"], 4) self.assertEqual(response[0]["labels"][0], "1 day") self.assertEqual(response[0]["data"][0], 2) self.assertEqual(response[0]["labels"][1], "2 days") self.assertEqual(response[0]["data"][1], 1) self.assertEqual(response[0]["labels"][2], "3 days") self.assertEqual(response[0]["data"][2], 1) self.assertEqual(response[0]["labels"][6], "7 days") self.assertEqual(response[0]["data"][6], 0)
def test_refresh_dashboard_cache_types( self, patch_update_cache_item: MagicMock, _patch_apply_async: MagicMock ) -> None: self._test_refresh_dashboard_cache_types( RetentionFilter( data={"insight": "RETENTION", "events": [{"id": "cache this"}], "date_to": now().isoformat()} ), CacheType.RETENTION, patch_update_cache_item, ) self._test_refresh_dashboard_cache_types( Filter(data={"insight": "TRENDS", "events": [{"id": "$pageview"}]}), CacheType.TRENDS, patch_update_cache_item, ) self._test_refresh_dashboard_cache_types( StickinessFilter( data={ "insight": "TRENDS", "shown_as": "Stickiness", "date_from": "2020-01-01", "events": [{"id": "watched movie"}], ENTITY_TYPE: "events", ENTITY_ID: "watched movie", }, team=self.team, get_earliest_timestamp=Event.objects.earliest_timestamp, ), CacheType.STICKINESS, patch_update_cache_item, )
def stickiness(self, request: request.Request) -> response.Response: team = cast(User, request.user).team if not team: return response.Response( {"message": "Could not retrieve team", "detail": "Could not validate team associated with user"}, status=400, ) filter = StickinessFilter(request=request, team=team, get_earliest_timestamp=get_earliest_timestamp) if not filter.limit: filter = filter.with_data({LIMIT: 100}) target_entity = get_target_entity(filter) people = self.stickiness_class().people(target_entity, filter, team, request) next_url = paginated_result(people, request, filter.offset) return response.Response({"results": [{"people": people, "count": len(people)}], "next": next_url})
def get_filter(team, data: dict = {}, request: Optional[HttpRequest] = None): from posthog.models.filters.filter import Filter from posthog.models.filters.retention_filter import RetentionFilter from posthog.models.filters.sessions_filter import SessionsFilter from posthog.models.filters.stickiness_filter import StickinessFilter insight = data.get("insight") if not insight and request: insight = request.GET.get("insight") if insight == INSIGHT_RETENTION: return RetentionFilter(data={ **data, "insight": INSIGHT_RETENTION }, request=request) elif insight == INSIGHT_SESSIONS: return SessionsFilter(data={ **data, "insight": INSIGHT_SESSIONS }, request=request) elif insight == INSIGHT_STICKINESS or (insight == INSIGHT_TRENDS and data.get("shown_as") == "Stickiness"): return StickinessFilter(data=data, request=request, team=team, get_earliest_timestamp=earliest_timestamp_func) elif insight == INSIGHT_PATHS: return PathFilter(data={ **data, "insight": INSIGHT_PATHS }, request=request) return Filter(data=data, request=request)
def insert_cohort_from_query(cohort_id: int, insight_type: str, filter_data: Dict[str, Any], entity_data: Dict[str, Any]) -> None: if is_clickhouse_enabled(): from ee.clickhouse.queries.clickhouse_stickiness import insert_stickiness_people_into_cohort from ee.clickhouse.queries.util import get_earliest_timestamp from ee.clickhouse.views.actions import insert_entity_people_into_cohort from ee.clickhouse.views.cohort import insert_cohort_people_into_pg from posthog.models.entity import Entity from posthog.models.filters.filter import Filter from posthog.models.filters.stickiness_filter import StickinessFilter cohort = Cohort.objects.get(pk=cohort_id) entity = Entity(data=entity_data) if insight_type == INSIGHT_STICKINESS: _stickiness_filter = StickinessFilter( data=filter_data, team=cohort.team, get_earliest_timestamp=get_earliest_timestamp) insert_stickiness_people_into_cohort(cohort, entity, _stickiness_filter) else: _filter = Filter(data=filter_data) insert_entity_people_into_cohort(cohort, entity, _filter) insert_cohort_people_into_pg(cohort=cohort)
def test_stickiness_hours(self): self._create_multiple_people(period=timedelta(hours=1)) with freeze_time("2020-01-01T20:01:01Z"): filter = StickinessFilter( data={ "shown_as": "Stickiness", "date_from": "2020-01-01T12:00:00.00Z", "date_to": "2020-01-01T20:00:00.00Z", "events": [{ "id": "watched movie" }], "interval": "hour", }, team=self.team, get_earliest_timestamp=get_earliest_timestamp, ) response = stickiness().run(filter, self.team) self.assertEqual(response[0]["count"], 4) self.assertEqual(response[0]["labels"][0], "1 hour") self.assertEqual(response[0]["data"][0], 2) self.assertEqual(response[0]["labels"][1], "2 hours") self.assertEqual(response[0]["data"][1], 1) self.assertEqual(response[0]["labels"][2], "3 hours") self.assertEqual(response[0]["data"][2], 1) self.assertEqual(response[0]["labels"][6], "7 hours") self.assertEqual(response[0]["data"][6], 0)
def track_stickiness_filter_by_person_property(self): filter = StickinessFilter( data={ "insight": "STICKINESS", "events": [{ "id": "$pageview" }], "shown_as": "Stickiness", "display": "ActionsLineGraph", "properties": [{ "key": "email", "operator": "icontains", "value": ".com", "type": "person" }], **DATE_RANGE, }, team=self.team, ) with no_materialized_columns(): ClickhouseStickiness().run(filter, self.team)
def test_stickiness_months(self): self._create_multiple_people(period=relativedelta(months=1)) with freeze_time("2020-02-08T13:01:01Z"): filter = StickinessFilter( data={ "shown_as": "Stickiness", "date_from": "2020-01-01", "date_to": "2020-09-08", "events": [{ "id": "watched movie" }], "interval": "month", }, team=self.team, get_earliest_timestamp=get_earliest_timestamp, ) response = stickiness().run(filter, self.team) self.assertEqual(response[0]["count"], 4) self.assertEqual(response[0]["labels"][0], "1 month") self.assertEqual(response[0]["data"][0], 2) self.assertEqual(response[0]["labels"][1], "2 months") self.assertEqual(response[0]["data"][1], 1) self.assertEqual(response[0]["labels"][2], "3 months") self.assertEqual(response[0]["data"][2], 1) self.assertEqual(response[0]["labels"][6], "7 months") self.assertEqual(response[0]["data"][6], 0)
def insert_cohort_actors_into_ch(cohort: Cohort, filter_data: Dict): insight_type = filter_data.get("insight") query_builder: ActorBaseQuery if insight_type == INSIGHT_TRENDS: filter = Filter(data=filter_data, team=cohort.team) entity = get_target_entity(filter) query_builder = ClickhouseTrendsActors(cohort.team, entity, filter) elif insight_type == INSIGHT_STICKINESS: stickiness_filter = StickinessFilter(data=filter_data, team=cohort.team) entity = get_target_entity(stickiness_filter) query_builder = ClickhouseStickinessActors(cohort.team, entity, stickiness_filter) elif insight_type == INSIGHT_FUNNELS: funnel_filter = Filter(data=filter_data, team=cohort.team) if funnel_filter.correlation_person_entity: query_builder = FunnelCorrelationActors(filter=funnel_filter, team=cohort.team) else: funnel_actor_class = get_funnel_actor_class(funnel_filter) query_builder = funnel_actor_class(filter=funnel_filter, team=cohort.team) elif insight_type == INSIGHT_PATHS: path_filter = PathFilter(data=filter_data, team=cohort.team) query_builder = ClickhousePathsActors(path_filter, cohort.team, funnel_filter=None) else: if settings.DEBUG: raise ValueError( f"Insight type: {insight_type} not supported for cohort creation" ) else: capture_exception( Exception( f"Insight type: {insight_type} not supported for cohort creation" )) if query_builder.is_aggregating_by_groups: if settings.DEBUG: raise ValueError( f"Query type: Group based queries are not supported for cohort creation" ) else: capture_exception( Exception( f"Query type: Group based queries are not supported for cohort creation" )) else: query, params = query_builder.actor_query(limit_actors=False) insert_actors_into_cohort_by_query(cohort, query, params)
def track_stickiness(self): filter = StickinessFilter( data={ "insight": "STICKINESS", "events": [{"id": "$pageview"}], "shown_as": "Stickiness", "display": "ActionsLineGraph", **DATE_RANGE, }, team=self.team, ) ClickhouseStickiness().run(filter, self.team)
def trend(self, request: Request, *args: Any, **kwargs: Any) -> Response: team = self.team filter = Filter(request=request) if filter.shown_as == TRENDS_STICKINESS: filter = StickinessFilter(request=request, team=team, get_earliest_timestamp=get_earliest_timestamp) result = ClickhouseStickiness().run(filter, team) else: result = ClickhouseTrends().run(filter, team) self._refresh_dashboard(request=request) return Response(result)
def calculate_trends(self, request: Request) -> List[Dict[str, Any]]: team = self.team filter = Filter(request=request) if filter.shown_as == TRENDS_STICKINESS: earliest_timestamp_func = lambda team_id: get_earliest_timestamp(team_id) stickiness_filter = StickinessFilter( request=request, team=team, get_earliest_timestamp=earliest_timestamp_func ) result = ClickhouseStickiness().run(stickiness_filter, team) else: result = ClickhouseTrends().run(filter, team) self._refresh_dashboard(request=request) return result
def calculate_trends(self, request: Request) -> Dict[str, Any]: team = self.team filter = Filter(request=request) if filter.shown_as == TRENDS_STICKINESS: stickiness_filter = StickinessFilter( request=request, team=team, get_earliest_timestamp=get_earliest_timestamp ) result = ClickhouseStickiness().run(stickiness_filter, team) else: trends_query = ClickhouseTrends() result = trends_query.run(filter, team) self._refresh_dashboard(request=request) return {"result": result}
def calculate_trends(self, request: request.Request) -> Dict[str, Any]: team = self.team filter = Filter(request=request) if filter.shown_as == TRENDS_STICKINESS: earliest_timestamp_func = lambda team_id: Event.objects.earliest_timestamp(team_id) stickiness_filter = StickinessFilter( request=request, team=team, get_earliest_timestamp=earliest_timestamp_func ) result = stickiness.Stickiness().run(stickiness_filter, team) else: result = trends.Trends().run(filter, team) self._refresh_dashboard(request=request) return {"result": result}
def calculate_trends(self, request: request.Request) -> List[Dict[str, Any]]: team = self.team filter = Filter(request=request) if filter.shown_as == TRENDS_STICKINESS: filter = StickinessFilter( request=request, team=team, get_earliest_timestamp=Event.objects.earliest_timestamp) result = stickiness.Stickiness().run(filter, team) else: result = trends.Trends().run(filter, team) self._refresh_dashboard(request=request) return result
def _calculate_trends(self, request: request.Request) -> List[Dict[str, Any]]: team = self.team filter = Filter(request=request) if filter.shown_as == TRENDS_STICKINESS: filter = StickinessFilter( request=request, team=team, get_earliest_timestamp=Event.objects.earliest_timestamp ) result = stickiness.Stickiness().run(filter, team) else: result = trends.Trends().run(filter, team) dashboard_id = request.GET.get("from_dashboard", None) if dashboard_id: DashboardItem.objects.filter(pk=dashboard_id).update(last_refresh=now()) return result
def get_filter(team, data: dict = {}, request: Optional[HttpRequest] = None) -> Filter: from posthog.models.filters.retention_filter import RetentionFilter from posthog.models.filters.sessions_filter import SessionsFilter from posthog.models.filters.stickiness_filter import StickinessFilter insight = data.get("insight") if not insight and request: insight = request.GET.get("insight") if insight == INSIGHT_RETENTION: return RetentionFilter(data={ **data, "insight": INSIGHT_RETENTION }, request=request) elif insight == INSIGHT_TRENDS and data.get("shown_as") == "Stickiness": return StickinessFilter(data=data, request=request, team=team) return Filter(data=data, request=request)
def _handle_static(self, cohort: Cohort, request: Request): if request.FILES.get("csv"): self._calculate_static_by_csv(request.FILES["csv"], cohort) else: try: filter = Filter(request=request) team = request.user.team target_entity = get_target_entity(request) if filter.shown_as == TRENDS_STICKINESS: stickiness_filter = StickinessFilter( request=request, team=team, get_earliest_timestamp=self.earliest_timestamp_func ) self._handle_stickiness_people(target_entity, cohort, stickiness_filter) else: self._handle_trend_people(target_entity, cohort, filter) except Exception as e: capture_exception(e) raise ValueError("This cohort has no conditions")
def stickiness(self, entity: Entity, filter: StickinessFilter, team_id: int) -> Dict[str, Any]: events = process_entity_for_events(entity=entity, team_id=team_id, order_by=None,) events = events.filter(filter_events(team_id, filter, entity)) events = ( events.filter(filter_events(team_id, filter, entity)) .values("person_id") .annotate(interval_count=Count(filter.trunc_func("timestamp"), distinct=True)) .filter(interval_count__lte=filter.total_intervals) ) events_sql, events_sql_params = events.query.sql_with_params() aggregated_query = "select count(v.person_id), v.interval_count from ({}) as v group by v.interval_count".format( events_sql ) counts = execute_custom_sql(aggregated_query, events_sql_params) return self.process_result(counts, filter)
def test_stickiness_action(self): self._create_multiple_people() watched_movie = action_factory(team=self.team, name="watch movie action", event_name="watched movie") with freeze_time("2020-01-08T13:01:01Z"): filter = StickinessFilter( data={ "shown_as": "Stickiness", "date_from": "2020-01-01", "date_to": "2020-01-08", "actions": [{"id": watched_movie.pk}], }, team=self.team, get_earliest_timestamp=get_earliest_timestamp, ) response = stickiness().run(filter, self.team) self.assertEqual(response[0]["label"], "watch movie action") self.assertEqual(response[0]["count"], 4) self.assertEqual(response[0]["labels"][0], "1 day")
def _calculate_trends(self, request: request.Request) -> List[Dict[str, Any]]: team = self.team filter = Filter(request=request, team=self.team) if filter.insight == INSIGHT_STICKINESS or filter.shown_as == TRENDS_STICKINESS: earliest_timestamp_func = lambda team_id: Event.objects.earliest_timestamp( team_id) stickiness_filter = StickinessFilter( request=request, team=team, get_earliest_timestamp=earliest_timestamp_func) result = stickiness.Stickiness().run(stickiness_filter, team) else: result = trends.Trends().run(filter, team) dashboard_id = request.GET.get("from_dashboard", None) if dashboard_id: Insight.objects.filter(pk=dashboard_id).update(last_refresh=now()) return result