def test_is_set(self): event1 = Event.objects.create(team=self.team, event='$pageview') event2 = Event.objects.create(team=self.team, event='$pageview', properties={'is_first_user': True}) filter = Filter( data={ 'properties': [{ 'key': 'is_first_user', 'operator': 'is_set', 'value': 'false' }] }) events = Event.objects.filter(filter.properties_to_Q()) self.assertEqual(events[0], event1) self.assertEqual(len(events), 1) filter = Filter( data={ 'properties': [{ 'key': 'is_first_user', 'operator': 'is_set', 'value': 'true' }] }) events = Event.objects.filter(filter.properties_to_Q())
def test_is_not_set_and_is_set(self): event1 = Event.objects.create(team=self.team, event="$pageview") event2 = Event.objects.create(team=self.team, event="$pageview", properties={"is_first_user": True}) filter = Filter( data={ "properties": [{ "key": "is_first_user", "operator": "is_not_set", "value": "is_not_set", }] }) events = Event.objects.filter( filter.properties_to_Q(team_id=self.team.pk)) self.assertEqual(events[0], event1) self.assertEqual(len(events), 1) filter = Filter( data={ "properties": [{ "key": "is_first_user", "operator": "is_set", "value": "is_set" }] }) events = Event.objects.filter( filter.properties_to_Q(team_id=self.team.pk))
def test_user_properties(self): person1 = Person.objects.create(team=self.team, distinct_ids=['person1'], properties={'group': 1}) person2 = Person.objects.create(team=self.team, distinct_ids=['person2'], properties={'group': 2}) event2 = Event.objects.create(team=self.team, distinct_id='person1', event='$pageview', properties={ '$current_url': 'https://something.com', 'another_key': 'value' }) Event.objects.create( team=self.team, distinct_id='person2', event='$pageview', properties={'$current_url': 'https://something.com'}) filter = Filter(data={ 'properties': [{ 'key': 'group', 'value': 1, 'type': 'person' }] }) events = Event.objects.add_person_id(self.team.pk).filter( filter.properties_to_Q()) self.assertEqual(events[0], event2) self.assertEqual(len(events), 1)
def stats(self, request: request.Request) -> response.Response: team = self.request.user.team_set.get() filter = Filter(request=request) events = (Event.objects.filter(team=team, event="$autocapture").filter( filter.properties_to_Q(team_id=team.pk)).filter( filter.date_filter_Q)) events = events.values("elements_hash").annotate( count=Count(1)).order_by("-count")[0:100] groups = ElementGroup.objects.filter( team=team, hash__in=[ item["elements_hash"] for item in events ]).prefetch_related( Prefetch("element_set", queryset=Element.objects.order_by("order", "id"))) return response.Response([{ "count": item["count"], "hash": item["elements_hash"], "elements": [ ElementSerializer(element).data for element in [ group for group in groups if group.hash == item["elements_hash"] ][0].element_set.all() ], } for item in events])
def run(self, filter: Filter, team: Team, *args, **kwargs) -> List[Dict[str, Any]]: events = (Event.objects.filter(team=team).filter( filter.properties_to_Q(team_id=team.pk)).add_person_id( team.pk).order_by("-timestamp")) limit = int(kwargs.get("limit", SESSIONS_LIST_DEFAULT_LIMIT)) offset = filter.offset calculated = [] # get compared period if filter.compare and filter._date_from != "all" and filter.session_type == SESSION_AVG: calculated = self.calculate_sessions( events.filter(filter.date_filter_Q), filter, team, limit, offset) calculated = convert_to_comparison(calculated, filter, "current") compare_filter = determine_compared_filter(filter) compared_calculated = self.calculate_sessions( events.filter(compare_filter.date_filter_Q), compare_filter, team, limit, offset) converted_compared_calculated = convert_to_comparison( compared_calculated, filter, "previous") calculated.extend(converted_compared_calculated) else: # if session_type is None, it's a list of sessions which shouldn't have any date filtering if filter.session_type is not None: events = events.filter(filter.date_filter_Q) calculated = self.calculate_sessions(events, filter, team, limit, offset) return calculated
def test_person_cohort_properties(self): person1_distinct_id = "person1" person1 = Person.objects.create(team=self.team, distinct_ids=[person1_distinct_id], properties={"$some_prop": 1}) cohort1 = Cohort.objects.create(team=self.team, groups=[{ "properties": { "$some_prop": 1 } }], name="cohort1") cohort1.people.add(person1) filter = Filter(data={ "properties": [{ "key": "id", "value": cohort1.pk, "type": "cohort" }], }) matched_person = (Person.objects.filter( team_id=self.team.pk, persondistinctid__distinct_id=person1_distinct_id).filter( filter.properties_to_Q(team_id=self.team.pk, is_person_query=True)).exists()) self.assertTrue(matched_person)
def test_boolean_filters(self): event1 = Event.objects.create(team=self.team, event="$pageview") event2 = Event.objects.create(team=self.team, event="$pageview", properties={"is_first_user": True}) filter = Filter(data={"properties": [{"key": "is_first_user", "value": "true"}]}) events = Event.objects.filter(filter.properties_to_Q(team_id=self.team.pk)) self.assertEqual(events[0], event2) self.assertEqual(len(events), 1)
def stats(self, request: request.Request) -> response.Response: team = self.request.user.team_set.get() filter = Filter(request=request) events = Event.objects\ .filter(team=team, event='$autocapture')\ .filter(filter.properties_to_Q(team_id=team.pk))\ .filter(filter.date_filter_Q) events = events\ .values('elements_hash')\ .annotate(count=Count(1))\ .order_by('-count')[0: 100] groups = ElementGroup.objects\ .filter(team=team, hash__in=[item['elements_hash'] for item in events])\ .prefetch_related('element_set') return response.Response([{ 'count': item['count'], 'hash': item['elements_hash'], 'elements': [ ElementSerializer(element).data for element in [ group for group in groups if group.hash == item['elements_hash'] ][0].element_set.all() ] } for item in events])
def events_query(self, filter: Filter, team: Team) -> QuerySet: return ( Event.objects.filter(team=team) .add_person_id(team.pk) .filter(filter.properties_to_Q(team_id=team.pk)) .order_by("-timestamp") )
def _filter_request(self, request: request.Request, queryset: QuerySet, team: Team) -> QuerySet: for key, value in request.GET.items(): if key == "event": queryset = queryset.filter(event=request.GET["event"]) elif key == "after": queryset = queryset.filter(timestamp__gt=request.GET["after"]) elif key == "before": queryset = queryset.filter(timestamp__lt=request.GET["before"]) elif key == "person_id": person = Person.objects.get(pk=request.GET["person_id"]) queryset = queryset.filter( distinct_id__in=PersonDistinctId.objects.filter( person_id=request.GET["person_id"]).values( "distinct_id")) elif key == "distinct_id": queryset = queryset.filter( distinct_id=request.GET["distinct_id"]) elif key == "action_id": queryset = queryset.filter_by_action( Action.objects.get(pk=value)) # type: ignore elif key == "properties": filter = Filter(data={"properties": json.loads(value)}) queryset = queryset.filter( filter.properties_to_Q(team_id=team.pk)) return queryset
def test_multiple(self): event2 = Event.objects.create( team=self.team, event="$pageview", properties={ "$current_url": "https://something.com", "another_key": "value", }, ) Event.objects.create( team=self.team, event="$pageview", properties={"$current_url": "https://something.com"}, ) filter = Filter( data={ "properties": { "$current_url__icontains": "something.com", "another_key": "value", } }) events = Event.objects.filter( filter.properties_to_Q(team_id=self.team.pk)) self.assertEqual(events[0], event2) self.assertEqual(len(events), 1)
def test_json_object(self): person1 = Person.objects.create( team=self.team, distinct_ids=['person1'], properties={'name': { 'first_name': 'Mary', 'last_name': 'Smith' }}) event1 = Event.objects.create( team=self.team, distinct_id='person1', event='$pageview', properties={'$current_url': 'https://something.com'}) filter = Filter( data={ 'properties': [{ 'key': 'name', 'value': json.dumps({ 'first_name': 'Mary', 'last_name': 'Smith' }), 'type': 'person' }] }) events = Event.objects.add_person_id(self.team.pk).filter( filter.properties_to_Q()) self.assertEqual(events[0], event1) self.assertEqual(len(events), 1)
def test_user_properties(self): person1 = Person.objects.create(team=self.team, distinct_ids=["person1"], properties={"group": 1}) person2 = Person.objects.create(team=self.team, distinct_ids=["person2"], properties={"group": 2}) event2 = Event.objects.create( team=self.team, distinct_id="person1", event="$pageview", properties={ "$current_url": "https://something.com", "another_key": "value", }, ) Event.objects.create( team=self.team, distinct_id="person2", event="$pageview", properties={"$current_url": "https://something.com"}, ) filter = Filter(data={ "properties": [{ "key": "group", "value": 1, "type": "person" }] }) events = Event.objects.add_person_id(self.team.pk).filter( filter.properties_to_Q(team_id=self.team.pk)) self.assertEqual(events[0], event2) self.assertEqual(len(events), 1)
def _filter_request(self, request: request.Request, queryset: QuerySet) -> QuerySet: for key, value in request.GET.items(): if key == 'event': queryset = queryset.filter(event=request.GET['event']) elif key == 'after': queryset = queryset.filter(timestamp__gt=request.GET['after']) elif key == 'before': queryset = queryset.filter(timestamp__lt=request.GET['before']) elif key == 'person_id': person = Person.objects.get(pk=request.GET['person_id']) queryset = queryset.filter( distinct_id__in=PersonDistinctId.objects.filter( person_id=request.GET['person_id']).values( 'distinct_id')) elif key == 'distinct_id': queryset = queryset.filter( distinct_id=request.GET['distinct_id']) elif key == 'action_id': queryset = queryset.filter_by_action( Action.objects.get(pk=value)) # type: ignore elif key == 'properties': filter = Filter(data={'properties': json.loads(value)}) queryset = queryset.filter(filter.properties_to_Q()) return queryset
def test_json_object(self): person1 = Person.objects.create( team=self.team, distinct_ids=["person1"], properties={"name": { "first_name": "Mary", "last_name": "Smith" }}, ) event1 = Event.objects.create( team=self.team, distinct_id="person1", event="$pageview", properties={"$current_url": "https://something.com"}, ) filter = Filter( data={ "properties": [{ "key": "name", "value": json.dumps({ "first_name": "Mary", "last_name": "Smith" }), "type": "person", }] }) events = Event.objects.add_person_id(self.team.pk).filter( filter.properties_to_Q(team_id=self.team.pk)) self.assertEqual(events[0], event1) self.assertEqual(len(events), 1)
def test_does_not_contain(self): event1 = Event.objects.create(team=self.team, event="$pageview") event2 = Event.objects.create( team=self.team, event="$pageview", properties={"$current_url": "https://something.com"}, ) Event.objects.create( team=self.team, event="$pageview", properties={"$current_url": "https://whatever.com"}, ) event3 = Event.objects.create( team=self.team, event="$pageview", properties={"$current_url": None}, ) filter = Filter(data={ "properties": { "$current_url__not_icontains": "whatever.com" } }) events = Event.objects.filter( filter.properties_to_Q(team_id=self.team.pk)).order_by("id") self.assertEqual(events[0], event1) self.assertEqual(events[1], event2) self.assertEqual(events[2], event3) self.assertEqual(len(events), 3)
def run(self, filter: Filter, team: Team, *args, **kwargs) -> List[Dict[str, Any]]: events = (Event.objects.filter(team=team).filter( filter.properties_to_Q(team_id=team.pk)).add_person_id( team.pk).order_by("-timestamp")) session_type = kwargs.get("session_type", None) offset = kwargs.get("offset", 0) if not filter.date_to: filter._date_to = now().isoformat() calculated = [] # get compared period if filter.compare and filter._date_from != "all" and session_type == "avg": calculated = self.calculate_sessions( events.filter(filter.date_filter_Q), session_type, filter, team, offset) calculated = self._convert_to_comparison(calculated, "current") compare_filter = determine_compared_filter(filter) compared_calculated = self.calculate_sessions( events.filter(compare_filter.date_filter_Q), session_type, compare_filter, team, offset) converted_compared_calculated = self._convert_to_comparison( compared_calculated, "previous") calculated.extend(converted_compared_calculated) else: # if session_type is None, it's a list of sessions which shouldn't have any date filtering if session_type is not None: events = events.filter(filter.date_filter_Q) calculated = self.calculate_sessions(events, session_type, filter, team, offset) return calculated
def test_contains(self): Event.objects.create(team=self.team, event='$pageview') event2 = Event.objects.create(team=self.team, event='$pageview', properties={'$current_url': 'https://whatever.com'}) filter = Filter(data={ 'properties': {'$current_url__icontains': 'whatever'} }) events = Event.objects.filter(filter.properties_to_Q(team_id=self.team.pk)) self.assertEqual(events.get(), event2)
def test_simple(self): Event.objects.create(team=self.team, event='$pageview') Event.objects.create(team=self.team, event='$pageview', properties={'$current_url': 'https://whatever.com'}) filter = Filter(data={ 'properties': {'$current_url': 'https://whatever.com'} }) events = Event.objects.filter(filter.properties_to_Q(team_id=self.team.pk)) self.assertEqual(events.count(), 1)
def test_regex(self): Event.objects.create(team=self.team, event="$pageview") event2 = Event.objects.create( team=self.team, event="$pageview", properties={"$current_url": "https://whatever.com"}, ) filter = Filter(data={"properties": {"$current_url__regex": "\.com$"}}) events = Event.objects.filter(filter.properties_to_Q(team_id=self.team.pk)) self.assertEqual(events.get(), event2)
def test_multiple(self): event2 = Event.objects.create(team=self.team, event='$pageview', properties={'$current_url': 'https://something.com', 'another_key': 'value'}) Event.objects.create(team=self.team, event='$pageview', properties={'$current_url': 'https://something.com'}) filter = Filter(data={ 'properties': {'$current_url__icontains': 'something.com', 'another_key': 'value'} }) events = Event.objects.filter(filter.properties_to_Q(team_id=self.team.pk)) self.assertEqual(events[0], event2) self.assertEqual(len(events), 1)
def test_boolean_filters(self): event1 = Event.objects.create(team=self.team, event='$pageview') event2 = Event.objects.create(team=self.team, event='$pageview', properties={'is_first_user': True}) filter = Filter(data={ 'properties': [{'key': 'is_first_user', 'value': 'true'}] }) events = Event.objects.filter(filter.properties_to_Q(team_id=self.team.pk)) self.assertEqual(events[0], event2) self.assertEqual(len(events), 1)
def _filter_events(filter: Filter, team: Team, person_query: Optional[bool] = False, order_by: Optional[str] = None): events = Event.objects if person_query: events = events.add_person_id(team.pk) events = events.filter(filter.properties_to_Q(team_id=team.pk)) if order_by: events = events.order_by(order_by) return events.values()
def test_selectors(self): event1 = Event.objects.create( team=self.team, event="$autocapture", elements=[Element.objects.create(tag_name="a"), Element.objects.create(tag_name="div"),], ) event2 = Event.objects.create(team=self.team, event="$autocapture") filter = Filter(data={"properties": [{"key": "selector", "value": "div > a", "type": "element"}]}) events = Event.objects.filter(filter.properties_to_Q(team_id=self.team.pk)) self.assertEqual(events.count(), 1)
def test_does_not_contain(self): event1 = Event.objects.create(team=self.team, event='$pageview') event2 = Event.objects.create(team=self.team, event='$pageview', properties={'$current_url': 'https://something.com'}) Event.objects.create(team=self.team, event='$pageview', properties={'$current_url': 'https://whatever.com'}) filter = Filter(data={ 'properties': {'$current_url__not_icontains': 'whatever.com'} }) events = Event.objects.filter(filter.properties_to_Q(team_id=self.team.pk)) self.assertEqual(events[0], event1) self.assertEqual(events[1], event2) self.assertEqual(len(events), 2)
def test_selectors(self): event1 = Event.objects.create(team=self.team, event='$autocapture', elements=[ Element.objects.create(tag_name='a', order=0), Element.objects.create(tag_name='div', order=1) ]) event2 = Event.objects.create(team=self.team, event='$autocapture') filter = Filter(data={ 'properties': [{'key': 'selector', 'value': 'div > a', 'type': 'element'}] }) events = Event.objects.filter(filter.properties_to_Q(team_id=self.team.pk)) self.assertEqual(events.count(), 1)
def test_is_not(self): event1 = Event.objects.create(team=self.team, event="$pageview") event2 = Event.objects.create( team=self.team, event="$pageview", properties={"$current_url": "https://something.com"}, ) Event.objects.create( team=self.team, event="$pageview", properties={"$current_url": "https://whatever.com"}, ) filter = Filter(data={"properties": {"$current_url__is_not": "https://whatever.com"}}) events = Event.objects.filter(filter.properties_to_Q(team_id=self.team.pk)) self.assertEqual(events[0], event1) self.assertEqual(events[1], event2) self.assertEqual(len(events), 2)
def filter_events(team_id: int, filter: Filter, entity: Optional[Entity] = None) -> Q: filters = Q() if filter.date_from: filters &= Q(timestamp__gte=filter.date_from) if filter.date_to: relativity = relativedelta(days=1) if filter.interval == "hour": relativity = relativedelta(hours=1) elif filter.interval == "minute": relativity = relativedelta(minutes=1) elif filter.interval == "week": relativity = relativedelta(weeks=1) elif filter.interval == "month": relativity = relativedelta(months=1) - relativity # go to last day of month instead of first of next filters &= Q(timestamp__lte=filter.date_to + relativity) if filter.properties: filters &= filter.properties_to_Q(team_id=team_id) if entity and entity.properties: filters &= entity.properties_to_Q(team_id=team_id) return filters
def _filter_events(self, team: Team, filter: Filter, entity: Optional[Entity]=None) -> Q: filters = Q() if filter.date_from: filters &= Q(timestamp__gte=filter.date_from) if filter.date_to: relativity = relativedelta(days=1) if filter.interval == 'hour': relativity = relativedelta(hours=1) elif filter.interval == 'minute': relativity = relativedelta(minutes=1) elif filter.interval == 'week': relativity = relativedelta(weeks=1) elif filter.interval == 'month': relativity = relativedelta(months=1) - relativity # go to last day of month instead of first of next filters &= Q(timestamp__lte=filter.date_to + relativity) if filter.properties: filters &= filter.properties_to_Q(team_id=team.pk) if entity and entity.properties: filters &= entity.properties_to_Q(team_id=team.pk) return filters
def calculate_paths(self, filter: Filter, team: Team): date_query = request_to_date_query( { "date_from": filter._date_from, "date_to": filter._date_to }, exact=False) resp = [] event, path_type, event_filter, start_comparator = self._determine_path_type( filter.path_type if filter else None) sessions = (Event.objects.add_person_id(team.pk).filter( team=team, **(event_filter), **date_query ).filter(~Q(event__in=[ "$autocapture", "$pageview", "$identify", "$pageleave", "$screen" ]) if event is None else Q()).filter( filter.properties_to_Q(team_id=team.pk) if filter and filter.properties else Q()).annotate(previous_timestamp=Window( expression=Lag("timestamp", default=None), partition_by=F("person_id"), order_by=F("timestamp").asc(), ))) sessions_sql, sessions_sql_params = sessions.query.sql_with_params() if event == "$autocapture": sessions_sql = self._add_elements(query_string=sessions_sql) events_notated = "\ SELECT *, CASE WHEN EXTRACT('EPOCH' FROM (timestamp - previous_timestamp)) >= (60 * 30) OR previous_timestamp IS NULL THEN 1 ELSE 0 END AS new_session\ FROM ({}) AS inner_sessions\ ".format(sessions_sql) sessionified = "\ SELECT events_notated.*, SUM(new_session) OVER (\ ORDER BY person_id\ ,timestamp\ ) AS session\ FROM ({}) as events_notated\ ".format(events_notated) if filter and filter.start_point: sessionified = self._apply_start_point( start_comparator=start_comparator, query_string=sessionified, start_point=filter.start_point, ) final = "\ SELECT {} as path_type, id, sessionified.session\ ,ROW_NUMBER() OVER (\ PARTITION BY person_id\ ,session ORDER BY timestamp\ ) AS event_number\ FROM ({}) as sessionified\ ".format(path_type, sessionified) counts = "\ SELECT event_number || '_' || path_type as target_event, id as target_id, LAG(event_number || '_' || path_type, 1) OVER (\ PARTITION BY session\ ) AS source_event , LAG(id, 1) OVER (\ PARTITION BY session\ ) AS source_id from \ ({}) as final\ where event_number <= 4\ ".format(final) query = "\ SELECT source_event, target_event, MAX(target_id), MAX(source_id), count(*) from ({}) as counts\ where source_event is not null and target_event is not null\ group by source_event, target_event order by count desc limit 20\ ".format(counts) cursor = connection.cursor() cursor.execute(query, sessions_sql_params) rows = cursor.fetchall() for row in rows: resp.append({ "source": row[0], "target": row[1], "target_id": row[2], "source_id": row[3], "value": row[4], }) resp = sorted(resp, key=lambda x: x["value"], reverse=True) return resp