def calculate_list(self, filter: Filter, team: Team, limit: int, offset: int): filters, params = parse_prop_clauses("uuid", filter.properties, team) if not filter._date_from: filter._date_from = timezone.now().replace(hour=0, minute=0, second=0, microsecond=0) if not filter._date_to and filter.date_from: filter._date_to = filter.date_from + relativedelta(days=1) date_from, date_to = parse_timestamps(filter) params = {**params, "team_id": team.pk, "limit": limit, "offset": offset, "distinct_id_limit": limit + offset} query = SESSION_SQL.format( date_from=date_from, date_to=date_to, filters="{}".format(filters) if filter.properties else "", sessions_limit="LIMIT %(offset)s, %(limit)s", ) query_result = sync_execute(query, params) result = self._parse_list_results(query_result) self._add_person_properties(team, result) add_session_recording_ids(team, result) return result
def test_old_style_properties(self): filter = Filter(data={ 'properties': { '$browser__is_not': 'IE7', '$OS': 'Mac', } }) self.assertEqual(filter.properties[0].key, '$browser') self.assertEqual(filter.properties[0].operator, 'is_not') self.assertEqual(filter.properties[0].value, 'IE7') self.assertEqual(filter.properties[0].type, 'event') self.assertEqual(filter.properties[1].key, '$OS') self.assertEqual(filter.properties[1].operator, None) self.assertEqual(filter.properties[1].value, 'Mac')
def test_retention_action_start_point(self): person1 = Person.objects.create(team=self.team, distinct_ids=["person1", "alias1"]) person2 = Person.objects.create(team=self.team, distinct_ids=["person2"]) action = self._create_signup_actions([ ("person1", self._date(0)), ("person1", self._date(1)), ("person1", self._date(2)), ("person1", self._date(5)), ("alias1", self._date(5, 9)), ("person1", self._date(6)), ("person2", self._date(1)), ("person2", self._date(2)), ("person2", self._date(3)), ("person2", self._date(6)), ]) start_entity = Entity({ "id": action.pk, "type": TREND_FILTER_TYPE_ACTIONS }) result = Retention().run( Filter(data={ "date_from": self._date(0, hour=0), "entities": [start_entity] }), self.team, total_days=7) self.assertEqual(len(result), 7) self.assertEqual( self.pluck(result, "label"), ["Day 0", "Day 1", "Day 2", "Day 3", "Day 4", "Day 5", "Day 6"], ) self.assertEqual(result[0]["date"], "Wed. 10 June") self.assertEqual( self.pluck(result, "values", "count"), [ [1, 1, 1, 0, 0, 1, 1], [2, 2, 1, 0, 1, 2], [2, 1, 0, 1, 2], [1, 0, 0, 1], [0, 0, 0], [1, 1], [2], ], )
def test_trends_compare(self): self._create_events() with freeze_time("2020-01-04T13:00:01Z"): response = Trends().run( Filter(data={ "compare": "true", "events": [{ "id": "sign up" }] }), self.team) self.assertEqual(response[0]["label"], "sign up - current") self.assertEqual(response[0]["labels"][4], "day 4") self.assertEqual(response[0]["data"][4], 3.0) self.assertEqual(response[0]["labels"][5], "day 5") self.assertEqual(response[0]["data"][5], 1.0) self.assertEqual(response[1]["label"], "sign up - previous") self.assertEqual(response[1]["labels"][4], "day 4") self.assertEqual(response[1]["data"][4], 1.0) self.assertEqual(response[1]["labels"][5], "day 5") self.assertEqual(response[1]["data"][5], 0.0) with freeze_time("2020-01-04T13:00:01Z"): no_compare_response = Trends().run( Filter(data={ "compare": "false", "events": [{ "id": "sign up" }] }), self.team) self.assertEqual(no_compare_response[0]["label"], "sign up") self.assertEqual(no_compare_response[0]["labels"][4], "Wed. 1 January") self.assertEqual(no_compare_response[0]["data"][4], 3.0) self.assertEqual(no_compare_response[0]["labels"][5], "Thu. 2 January") self.assertEqual(no_compare_response[0]["data"][5], 1.0)
def test_retention_with_properties(self): person1 = Person.objects.create( team=self.team, distinct_ids=["person1", "alias1"], properties={"email": "*****@*****.**"}) person2 = Person.objects.create( team=self.team, distinct_ids=["person2"], properties={"email": "*****@*****.**"}) self._create_pageviews([ ("person1", self._date(0)), ("person1", self._date(1)), ("person1", self._date(2)), ("person1", self._date(5)), ("alias1", self._date(5, 9)), ("person1", self._date(6)), ("person2", self._date(1)), ("person2", self._date(2)), ("person2", self._date(3)), ("person2", self._date(6)), ]) result = Retention().run( Filter( data={ "properties": [{ "key": "email", "value": "*****@*****.**", "type": "person" }], "date_from": self._date(0, hour=0), }), self.team, total_days=7, ) self.assertEqual(len(result), 7) self.assertEqual( self.pluck(result, "label"), ["Day 0", "Day 1", "Day 2", "Day 3", "Day 4", "Day 5", "Day 6"], ) self.assertEqual(result[0]["date"], "Wed. 10 June") self.assertEqual( self.pluck(result, "values", "count"), [[1, 1, 1, 0, 0, 1, 1], [1, 1, 0, 0, 1, 1], [1, 0, 0, 1, 1], [0, 0, 0, 0], [0, 0, 0], [1, 1], [1]], )
def format_action_filter(action: Action, prepend: str = "action", index=0, use_loop: bool = False) -> Tuple[str, Dict]: # get action steps params = {"team_id": action.team.pk} steps = action.steps.all() if len(steps) == 0: # If no steps, it shouldn't match this part of the query return "1=2", {} or_queries = [] for index, step in enumerate(steps): conditions: List[str] = [] # filter element if step.event == AUTOCAPTURE_EVENT: el_conditions, element_params = filter_element( step, "{}{}".format(index, prepend)) params = {**params, **element_params} conditions += el_conditions # filter event conditions (ie URL) event_conditions, event_params = filter_event( step, "{}{}".format(index, prepend), index) params = {**params, **event_params} conditions += event_conditions if step.properties: from ee.clickhouse.models.property import parse_prop_clauses prop_query, prop_params = parse_prop_clauses( Filter(data={ "properties": step.properties }).properties, action.team.pk, prepend="action_props_{}".format(index), ) conditions.append(prop_query.replace("AND", "", 1)) params = {**params, **prop_params} if len(conditions) > 0: or_queries.append(" AND ".join(conditions)) if use_loop: formatted_query = "SELECT uuid FROM events WHERE {} AND team_id = %(team_id)s".format( ") OR uuid IN (SELECT uuid FROM events WHERE team_id = %(team_id)s AND " .join(or_queries)) else: formatted_query = "(({}))".format(") OR (".join(or_queries)) return formatted_query, params
def get_actions(queryset: QuerySet, params: dict, team_id: int) -> QuerySet: if params.get(TREND_FILTER_TYPE_ACTIONS): queryset = queryset.filter(pk__in=[ action.id for action in Filter({ "actions": json.loads(params.get("actions", "[]")) }).actions ]) if params.get("include_count"): queryset = queryset.annotate(count=Count(TREND_FILTER_TYPE_EVENTS)) queryset = queryset.prefetch_related( Prefetch("steps", queryset=ActionStep.objects.order_by("id"))) return queryset.filter(team_id=team_id).order_by("-id")
def test_regex(self): event1 = event_factory(team=self.team, distinct_id="test", event="$pageview") event2 = event_factory( team=self.team, event="$pageview", distinct_id="test", properties={"$current_url": "https://whatever.com"}, ) filter = Filter( data={"properties": { "$current_url__regex": "\.com$" }}) events = filter_events(filter, self.team) self.assertEqual(events[0]["id"], event2.pk) filter = Filter( data={"properties": { "$current_url__not_regex": "\.eee$" }}) events = filter_events(filter, self.team, order_by="timestamp") self.assertEqual(events[0]["id"], event1.pk) self.assertEqual(events[1]["id"], event2.pk)
def calculate_trends(self, request: request.Request) -> List[Dict[str, Any]]: team = self.team filter = Filter(request=request) if filter.shown_as == TRENDS_STICKINESS: earliest_timestamp_func = lambda team_id: Event.objects.earliest_timestamp(team_id) stickiness_filter = StickinessFilter( request=request, team=team, get_earliest_timestamp=earliest_timestamp_func ) result = stickiness.Stickiness().run(stickiness_filter, team) else: result = trends.Trends().run(filter, team) self._refresh_dashboard(request=request) return result
def test_basic_results(self): """ The `skip_results` query parameter can be passed so that only a list of objects is returned, without the actual query data. This can speed things up if it's not needed. """ filter_dict = { "events": [{"id": "$pageview"}], } DashboardItem.objects.create( filters=Filter(data=filter_dict).to_dict(), team=self.team, short_id="12345678", ) DashboardItem.objects.create( filters=Filter(data=filter_dict).to_dict(), team=self.team, saved=True, ) response = self.client.get("/api/insight/?basic=true") self.assertEqual(response.status_code, status.HTTP_200_OK) self.assertEqual(len(response.json()["results"]), 2) self.assertEqual( list(response.json()["results"][0].keys()), ["id", "short_id", "name", "filters", "dashboard", "color", "last_refresh", "refreshing", "saved"], )
def get_action_tables_and_properties( action: Action) -> Counter[PropertyIdentifier]: from ee.clickhouse.models.property import extract_tables_and_properties result: Counter[PropertyIdentifier] = Counter() for action_step in action.steps.all(): if action_step.url: result[("$current_url", "event", None)] += 1 result += extract_tables_and_properties( Filter(data={ "properties": action_step.properties or [] }).properties) return result
def _calculate_trends(self, request: request.Request) -> List[Dict[str, Any]]: team = request.user.team filter = Filter(request=request) if filter.shown_as == "Stickiness": result = stickiness.Stickiness().run(filter, team) else: result = trends.Trends().run(filter, team) dashboard_id = request.GET.get("from_dashboard", None) if dashboard_id: DashboardItem.objects.filter(pk=dashboard_id).update( last_refresh=now()) return result
def trend(self, request: request.Request, *args: Any, **kwargs: Any) -> Response: try: serializer = TrendSerializer(request=request) serializer.is_valid(raise_exception=True) except Exception as e: capture_exception(e) result = self.calculate_trends(request) filter = Filter(request=request, team=self.team) next = ( format_paginated_url(request, filter.offset, BREAKDOWN_VALUES_LIMIT) if len(result["result"]) >= BREAKDOWN_VALUES_LIMIT else None ) return Response({**result, "next": next})
def test_is_not_persons(self): person_factory(team_id=self.team.pk, distinct_ids=["p1"], properties={"url": "https://whatever.com"}) p2_uuid = person_factory( team_id=self.team.pk, distinct_ids=["p2"], properties={"url": "https://example.com"} ) filter = Filter( data={ "properties": [ {"type": "person", "key": "url", "value": "https://whatever.com", "operator": "is_not"} ] } ) results = filter_persons(filter, self.team) self.assertCountEqual(results, [p2_uuid])
def test_property_filtering(self): self._create_events() with freeze_time("2020-01-04"): response = Trends().run( Filter(data={ "properties": [{ "key": "$some_property", "value": "value" }] }), self.team) self.assertEqual(response[0]["labels"][4], "Wed. 1 January") self.assertEqual(response[0]["data"][4], 1.0) self.assertEqual(response[0]["labels"][5], "Thu. 2 January") self.assertEqual(response[0]["data"][5], 0) self.assertEqual(response[1]["count"], 0)
def get_queryset(self): queryset = super().get_queryset() if self.action == 'list': # type: ignore queryset = queryset.filter(deleted=False) if self.request.GET.get(TREND_FILTER_TYPE_ACTIONS): queryset = queryset.filter(pk__in=[action.id for action in Filter({'actions': json.loads(self.request.GET['actions'])}).actions]) if self.request.GET.get('include_count'): queryset = queryset.annotate(count=Count(TREND_FILTER_TYPE_EVENTS)) queryset = queryset.prefetch_related(Prefetch('steps', queryset=ActionStep.objects.order_by('id'))) return queryset\ .filter(team=self.request.user.team_set.get())\ .order_by('-id')
def test_filter_by_all(self): filter = Filter( data={ "properties": [ { "key": "name", "value": json.dumps({"first_name": "Mary", "last_name": "Smith"}), "type": "person", } ], "date_from": "all", } ) date_filter_query = filter.date_filter_Q self.assertEqual(date_filter_query, Q())
def test_default_filter_by_date_from(self): filter = Filter( data={ "properties": [ { "key": "name", "value": json.dumps({"first_name": "Mary", "last_name": "Smith"}), "type": "person", } ], } ) one_week_ago = timezone.now().replace(hour=0, minute=0, second=0, microsecond=0) - relativedelta(days=7) date_filter_query = filter.date_filter_Q self.assertEqual(date_filter_query, Q(timestamp__gte=one_week_ago))
def test_json_object(self): person1 = Person.objects.create( team=self.team, distinct_ids=["person1"], properties={"name": {"first_name": "Mary", "last_name": "Smith"}}, ) event1 = Event.objects.create( team=self.team, distinct_id="person1", event="$pageview", properties={"$current_url": "https://something.com"}, ) filter = Filter( data={ "properties": [ { "key": "name", "value": json.dumps({"first_name": "Mary", "last_name": "Smith"}), "type": "person", } ] } ) events = Event.objects.add_person_id(self.team.pk).filter(filter.properties_to_Q(team_id=self.team.pk)) self.assertEqual(events[0], event1) self.assertEqual(len(events), 1)
def create(self, validated_data: Dict, *args: Any, **kwargs: Any) -> DashboardItem: request = self.context["request"] team = request.user.team validated_data.pop( "last_refresh", None ) # last_refresh sometimes gets sent if dashboard_item is duplicated if not validated_data.get("dashboard", None): dashboard_item = DashboardItem.objects.create( team=team, created_by=request.user, **validated_data) return dashboard_item elif validated_data["dashboard"].team == team: filter_data = validated_data.pop("filters", None) filters = Filter(data=filter_data) if filter_data else None dashboard_item = DashboardItem.objects.create( team=team, last_refresh=now(), filters=filters.to_dict() if filters else {}, **validated_data) return dashboard_item else: raise serializers.ValidationError("Dashboard not found")
def calculate_trends(self, request: request.Request) -> Dict[str, Any]: team = self.team filter = Filter(request=request, team=self.team) if filter.insight == INSIGHT_STICKINESS or filter.shown_as == TRENDS_STICKINESS: stickiness_filter = StickinessFilter( request=request, team=team, get_earliest_timestamp=get_earliest_timestamp ) result = ClickhouseStickiness().run(stickiness_filter, team) else: trends_query = ClickhouseTrends() result = trends_query.run(filter, team) self._refresh_dashboard(request=request) return {"result": result}
def test_lifecycle_trend(self): self._create_events(data=[ ( "p1", [ "2020-01-11T12:00:00Z", "2020-01-12T12:00:00Z", "2020-01-13T12:00:00Z", "2020-01-15T12:00:00Z", "2020-01-17T12:00:00Z", "2020-01-19T12:00:00Z", ], ), ("p2", ["2020-01-09T12:00:00Z", "2020-01-12T12:00:00Z"]), ("p3", ["2020-01-12T12:00:00Z"]), ("p4", ["2020-01-15T12:00:00Z"]), ]) result = trends().run( Filter( data={ "date_from": "2020-01-12T00:00:00Z", "date_to": "2020-01-19T00:00:00Z", "events": [{ "id": "$pageview", "type": "events", "order": 0 }], "shown_as": TRENDS_LIFECYCLE, }), self.team, ) self.assertEqual(len(result), 4) self.assertEqual(sorted([res["status"] for res in result]), ["dormant", "new", "resurrecting", "returning"]) for res in result: if res["status"] == "dormant": self.assertEqual(res["data"], [0, -2, -1, 0, -2, 0, -1, 0]) elif res["status"] == "returning": self.assertEqual(res["data"], [1, 1, 0, 0, 0, 0, 0, 0]) elif res["status"] == "resurrecting": self.assertEqual(res["data"], [1, 0, 0, 1, 0, 1, 0, 1]) elif res["status"] == "new": self.assertEqual(res["data"], [1, 0, 0, 1, 0, 0, 0, 0])
def test_retention(self): person1 = Person.objects.create(team=self.team, distinct_ids=["person1", "alias1"]) person2 = Person.objects.create(team=self.team, distinct_ids=["person2"]) self._create_pageviews([ ("person1", self._date(0)), ("person1", self._date(1)), ("person1", self._date(2)), ("person1", self._date(5)), ("alias1", self._date(5, 9)), ("person1", self._date(6)), ("person2", self._date(1)), ("person2", self._date(2)), ("person2", self._date(3)), ("person2", self._date(6)), ]) result = Retention().run( Filter(data={"date_from": self._date(0, hour=0)}), self.team) self.assertEqual(len(result), 11) self.assertEqual( self.pluck(result, "label"), [ "Day 0", "Day 1", "Day 2", "Day 3", "Day 4", "Day 5", "Day 6", "Day 7", "Day 8", "Day 9", "Day 10" ], ) self.assertEqual(result[0]["date"], "Wed. 10 June") self.assertEqual( self.pluck(result, "values", "count"), [ [1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0], [2, 2, 1, 0, 1, 2, 0, 0, 0, 0], [2, 1, 0, 1, 2, 0, 0, 0, 0], [1, 0, 0, 1, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0], [1, 1, 0, 0, 0, 0], [2, 0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0], [0, 0], [0], ], )
def calculate_sessions(self, events: QuerySet, filter: Filter, team: Team) -> List[Dict[str, Any]]: all_sessions, sessions_sql_params = self.build_all_sessions_query(events) if filter.session == SESSION_AVG: if not filter.date_from: filter = filter.with_data( { "date_from": Event.objects.filter(team=team) .order_by("timestamp")[0] .timestamp.replace(hour=0, minute=0, second=0, microsecond=0) .isoformat(), } ) return self._session_avg(all_sessions, sessions_sql_params, filter) else: # SESSION_DIST return self._session_dist(all_sessions, sessions_sql_params)
def update_cache_item(key: str, cache_type: str, payload: dict) -> None: result: Optional[Union[List, Dict]] = None filter_dict = json.loads(payload["filter"]) filter = Filter(data=filter_dict) if cache_type == CacheType.TRENDS: result = _calculate_trends(filter, int(payload["team_id"])) elif cache_type == CacheType.FUNNEL: result = _calculate_funnel(filter, int(payload["team_id"])) if result: cache.set(key, { "result": result, "details": payload, "type": cache_type }, 25 * 60)
def forwards_func(apps, schema_editor): Dashboard = apps.get_model("posthog", "Dashboard") DashboardItem = apps.get_model("posthog", "DashboardItem") dashboards = Dashboard.objects.all() for dashboard in dashboards: dashboard.share_token = secrets.token_urlsafe(22) dashboard.save() items = DashboardItem.objects.filter(filters__isnull=False) for item in items: if item.filters == {}: continue if item.filters.get("funnel_id"): item.funnel_id = item.filters["funnel_id"] item.filters = Filter(data=item.filters).to_dict() item.save()
def test_contains(self): event_factory(team=self.team, distinct_id="test", event="$pageview") event2 = event_factory( team=self.team, event="$pageview", distinct_id="test", properties={"$current_url": "https://whatever.com"}, ) filter = Filter( data={"properties": { "$current_url__icontains": "whatever" }}) events = filter_events(filter, self.team) self.assertEqual(events[0]["id"], event2.pk)
def test_person_cohort_properties(self): person1_distinct_id = "person1" person1 = Person.objects.create( team=self.team, distinct_ids=[person1_distinct_id], properties={"$some_prop": 1} ) cohort1 = Cohort.objects.create(team=self.team, groups=[{"properties": {"$some_prop": 1}}], name="cohort1") cohort1.people.add(person1) filter = Filter(data={"properties": [{"key": "id", "value": cohort1.pk, "type": "cohort"}],}) matched_person = ( Person.objects.filter(team_id=self.team.pk, persondistinctid__distinct_id=person1_distinct_id) .filter(properties_to_Q(filter.property_groups.flat, team_id=self.team.pk, is_direct_query=True)) .exists() ) self.assertTrue(matched_person)
def test_simple(self): event_factory(team=self.team, distinct_id="test", event="$pageview") event_factory( team=self.team, event="$pageview", distinct_id="test", properties={"$current_url": "https://whatever.com"}, ) filter = Filter( data={"properties": { "$current_url": "https://whatever.com" }}) events = filter_events(filter, self.team) self.assertEqual(len(events), 1)
def _calculate_trends(self, request: request.Request) -> List[Dict[str, Any]]: team = self.team filter = Filter(request=request) if filter.shown_as == TRENDS_STICKINESS: filter = StickinessFilter( request=request, team=team, get_earliest_timestamp=Event.objects.earliest_timestamp ) result = stickiness.Stickiness().run(filter, team) else: result = trends.Trends().run(filter, team) dashboard_id = request.GET.get("from_dashboard", None) if dashboard_id: DashboardItem.objects.filter(pk=dashboard_id).update(last_refresh=now()) return result