def calculate_list(self, filter: Filter, team: Team, limit: int, offset: int): filters, params = parse_prop_clauses("uuid", filter.properties, team) if not filter._date_from: filter._date_from = timezone.now().replace(hour=0, minute=0, second=0, microsecond=0) if not filter._date_to and filter.date_from: filter._date_to = filter.date_from + relativedelta(days=1) date_from, date_to = parse_timestamps(filter) params = { **params, "team_id": team.pk, "limit": limit, "offset": offset } query = SESSION_SQL.format( date_from=date_from, date_to=date_to, filters="{}".format(filters) if filter.properties else "", sessions_limit="LIMIT %(offset)s, %(limit)s", ) query_result = sync_execute(query, params) result = self._parse_list_results(query_result) self._add_person_properties(team, result) return result
def _calculate_trends(self, filter: Filter, team: Team) -> List[Dict[str, Any]]: # format default dates if not filter._date_from: filter._date_from = relative_date_parse("-7d") if not filter._date_to: filter._date_to = timezone.now() result = [] for entity in filter.entities: if filter.compare: compare_filter = determine_compared_filter(filter=filter) entity_result = self._serialize_entity(entity, filter, team) entity_result = convert_to_comparison( entity_result, filter, "{} - {}".format(entity.name, "current")) result.extend(entity_result) previous_entity_result = self._serialize_entity( entity, compare_filter, team) previous_entity_result = convert_to_comparison( previous_entity_result, filter, "{} - {}".format(entity.name, "previous")) result.extend(previous_entity_result) else: entity_result = self._serialize_entity(entity, filter, team) result.extend(entity_result) return result
def test_prop_cohort_basic(self): _create_person(distinct_ids=["some_other_id"], team_id=self.team.pk, properties={"$some_prop": "something"}) _create_person( distinct_ids=["some_id"], team_id=self.team.pk, properties={ "$some_prop": "something", "$another_prop": "something" }, ) _create_person(distinct_ids=["no_match"], team_id=self.team.pk) _create_event( event="$pageview", team=self.team, distinct_id="some_id", properties={"attr": "some_val"}, ) _create_event( event="$pageview", team=self.team, distinct_id="some_other_id", properties={"attr": "some_val"}, ) cohort1 = Cohort.objects.create( team=self.team, groups=[{ "properties": { "$some_prop": "something", "$another_prop": "something" } }], name="cohort1", ) filter = Filter(data={ "properties": [{ "key": "id", "value": cohort1.pk, "type": "cohort" }], }) query, params = parse_prop_clauses(filter.properties, self.team) final_query = "SELECT uuid FROM events WHERE team_id = %(team_id)s {}".format( query) result = sync_execute(final_query, {**params, "team_id": self.team.pk}) self.assertEqual(len(result), 1) feature_flag = FeatureFlag.objects.create(filters=filter.to_dict(), created_by=self.user, name="test", key="test", team=self.team) self.assertTrue(feature_flag.distinct_id_matches("some_id")) self.assertFalse(feature_flag.distinct_id_matches("no_match"))
def test_get_insight_items(self): filter_dict = { "events": [{ "id": "$pageview" }], "properties": [{ "key": "$browser", "value": "Mac OS X" }], } DashboardItem.objects.create( filters=Filter(data=filter_dict).to_dict(), team=self.team, created_by=self.user) # create without user DashboardItem.objects.create( filters=Filter(data=filter_dict).to_dict(), team=self.team) response = self.client.get("/api/insight/", data={ "user": "******" }).json() self.assertEqual(len(response["results"]), 1)
def calculate_paths(self, filter: Filter, team: Team): # format default dates if not filter._date_from: filter._date_from = relative_date_parse("-7d") if not filter._date_to: filter._date_to = timezone.now() parsed_date_from, parsed_date_to = parse_timestamps(filter=filter) event, path_type, start_comparator = self._determine_path_type(filter.path_type if filter else None) prop_filters, prop_filter_params = parse_prop_clauses("uuid", filter.properties, team) # Step 0. Event culling subexpression for step 1. # Make an expression that removes events in a session that are definitely unused. # For example the 4th, 5th, etc row after a "new_session = 1" or "marked_session_start = 1" row gets removed excess_row_filter = "(" for i in range(4): if i > 0: excess_row_filter += " or " excess_row_filter += "neighbor(new_session, {}, 0) = 1".format(-i) if filter and filter.start_point: excess_row_filter += " or neighbor(marked_session_start, {}, 0) = 1".format(-i) excess_row_filter += ")" paths_query = PATHS_QUERY_FINAL.format( event_query="event = %(event)s" if event else "event NOT IN ('$autocapture', '$pageview', '$identify', '$pageleave', '$screen')", path_type=path_type, parsed_date_from=parsed_date_from, parsed_date_to=parsed_date_to, filters=prop_filters if filter.properties else "", marked_session_start="{} = %(start_point)s".format(start_comparator) if filter and filter.start_point else "new_session", excess_row_filter=excess_row_filter, select_elements_chain=", events.elements_chain as elements_chain" if event == AUTOCAPTURE_EVENT else "", group_by_elements_chain=", events.elements_chain" if event == AUTOCAPTURE_EVENT else "", ) params: Dict = { "team_id": team.pk, "property": "$current_url", "event": event, "start_point": filter.start_point, } params = {**params, **prop_filter_params} rows = sync_execute(paths_query, params) resp: List[Dict[str, str]] = [] for row in rows: resp.append( {"source": row[0], "source_id": row[1], "target": row[2], "target_id": row[3], "value": row[4],} ) resp = sorted(resp, key=lambda x: x["value"], reverse=True) return resp
def people(self, request: Request, *args: Any, **kwargs: Any) -> Response: if not endpoint_enabled(CH_ACTION_ENDPOINT, request.user.distinct_id): result = super().get_people(request) return Response(result) team = request.user.team filter = Filter(request=request) shown_as = request.GET.get("shown_as") if len(filter.entities) >= 1: entity = filter.entities[0] else: entity = Entity({ "id": request.GET["entityId"], "type": request.GET["type"] }) # adhoc date handling. parsed differently with django orm if filter.interval == "month": filter._date_to = (timezone.now() if not filter.date_from else ( filter.date_from + timedelta(days=31)).strftime("%Y-%m-%d %H:%M:%S")) current_url = request.get_full_path() if shown_as is not None and shown_as == "Stickiness": stickiness_day = int(request.GET["stickiness_days"]) serialized_people = self._calculate_stickiness_entity_people( team, entity, filter, stickiness_day) else: serialized_people = self._calculate_entity_people( team, entity, filter) current_url = request.get_full_path() next_url: Optional[str] = request.get_full_path() offset = filter.offset if len(serialized_people) > 100 and next_url: if "offset" in next_url: next_url = next_url[1:] next_url = next_url.replace("offset=" + str(offset), "offset=" + str(offset + 100)) else: next_url = request.build_absolute_uri("{}{}offset={}".format( next_url, "&" if "?" in next_url else "?", offset + 100)) else: next_url = None return Response({ "results": [{ "people": serialized_people[0:100], "count": len(serialized_people[0:99]) }], "next": next_url, "previous": current_url[1:], })
def test_breakdown_by_person_property(self): person1, person2, person3, person4 = self._create_multiple_people() action = _create_action(name="watched movie", team=self.team) with freeze_time("2020-01-04T13:01:01Z"): action_response = ClickhouseTrends().run( Filter( data={ "date_from": "-14d", "breakdown": "name", "breakdown_type": "person", "actions": [{ "id": action.pk, "type": "actions", "order": 0 }], }), self.team, ) event_response = ClickhouseTrends().run( Filter( data={ "date_from": "-14d", "breakdown": "name", "breakdown_type": "person", "events": [{ "id": "watched movie", "name": "watched movie", "type": "events", "order": 0, }], }), self.team, ) self.assertListEqual( [res["breakdown_value"] for res in event_response], ["person1", "person2", "person3"]) for response in event_response: if response["breakdown_value"] == "person1": self.assertEqual(response["count"], 1) self.assertEqual(response["label"], "watched movie - person1") if response["breakdown_value"] == "person2": self.assertEqual(response["count"], 3) if response["breakdown_value"] == "person3": self.assertEqual(response["count"], 3) self.assertTrue( self._compare_entity_response( event_response, action_response, ))
def test_dau_with_breakdown_filtering_with_prop_filter(self): sign_up_action, _ = self._create_events() with freeze_time("2020-01-02T13:01:01Z"): _create_event( team=self.team, event="sign up", distinct_id="blabla", properties={ "$some_property": "other_value", "$os": "Windows" }, ) with freeze_time("2020-01-04T13:01:01Z"): action_response = ClickhouseTrends().run( Filter( data={ "breakdown": "$some_property", "actions": [{ "id": sign_up_action.id, "math": "dau" }], "properties": [{ "key": "$os", "value": "Windows" }], }), self.team, ) event_response = ClickhouseTrends().run( Filter( data={ "breakdown": "$some_property", "events": [{ "id": "sign up", "math": "dau" }], "properties": [{ "key": "$os", "value": "Windows" }], }), self.team, ) self.assertEqual(event_response[0]["label"], "sign up - value") self.assertEqual(event_response[1]["label"], "sign up - other_value") self.assertEqual(sum(event_response[1]["data"]), 1) self.assertEqual(event_response[1]["data"][5], 1) # property not defined self.assertTrue( self._compare_entity_response(action_response, event_response))
def _calculate_stickiness(self, filter: Filter, team: Team) -> List[Dict[str, Any]]: if not filter._date_from: filter._date_from = relative_date_parse("-7d") if not filter._date_to: filter._date_to = timezone.now() result = [] for entity in filter.entities: if entity.type == TREND_FILTER_TYPE_ACTIONS: entity.name = Action.objects.only("name").get(team=team, pk=entity.id).name entity_result = self._serialize_entity(entity, filter, team) result.extend(entity_result) return result
def test_prop_person(self): _create_person(distinct_ids=["some_other_id"], team_id=self.team.pk, properties={"email": "*****@*****.**"}) _create_person(distinct_ids=["some_id"], team_id=self.team.pk, properties={"email": "*****@*****.**"}) _create_event( event="$pageview", team=self.team, distinct_id="some_id", properties={"attr": "some_val"}, ) filter = Filter( data={ "properties": [{ "key": "email", "value": "*****@*****.**", "type": "person" }], }) query, params = parse_prop_clauses(filter.properties, self.team.pk) final_query = "SELECT uuid FROM events WHERE team_id = %(team_id)s {}".format( query) result = sync_execute(final_query, {**params, "team_id": self.team.pk}) self.assertEqual(len(result), 1)
def stats(self, request: request.Request) -> response.Response: filter = Filter(request=request) team = request.user.team assert team is not None date_from, date_to = parse_timestamps(filter) prop_filters, prop_filter_params = parse_prop_clauses( filter.properties, team.pk) result = sync_execute( GET_ELEMENTS.format(date_from=date_from, date_to=date_to, query=prop_filters), { "team_id": team.id, **prop_filter_params }, ) return response.Response([{ "count": elements[1], "hash": None, "elements": [ ElementSerializer(element).data for element in chain_to_elements(elements[0]) ], } for elements in result])
def test_retention_period(self): Person.objects.create( team=self.team, distinct_ids=["person1", "alias1"], properties={"email": "*****@*****.**"}, ) Person.objects.create( team=self.team, distinct_ids=["person2"], properties={"email": "*****@*****.**"}, ) self._create_pageviews( [ ("person1", self._date(0)), ("person1", self._date(1)), ("person1", self._date(2, month=1)), ("person1", self._date(10, month=1)), ("person1", self._date(15)), ("person1", self._date(18)), ("person2", self._date(13)), ] ) filter = Filter(data={"date_from": self._date(0, hour=0), "period": "Week"}) result = ClickhouseRetention().run(filter, self.team, total_intervals=7) self.assertEqual( self.pluck(result, "values", "count"), [[1, 0, 1, 1, 0, 1, 1], [0, 0, 0, 0, 0, 0], [2, 1, 0, 1, 1], [1, 0, 1, 1], [0, 0, 0], [1, 1], [1],], )
def test_prop_event(self): _create_event( event="$pageview", team=self.team, distinct_id="whatever", properties={"attr": "some_other_val"}, ) _create_event( event="$pageview", team=self.team, distinct_id="whatever", properties={"attr": "some_val"}, ) filter = Filter(data={ "properties": [{ "key": "attr", "value": "some_val" }], }) query, params = parse_prop_clauses(filter.properties, self.team.pk) final_query = "SELECT uuid FROM events WHERE team_id = %(team_id)s {}".format( query) result = sync_execute(final_query, {**params, "team_id": self.team.pk}) self.assertEqual(len(result), 1)
def session(self, request: Request, *args: Any, **kwargs: Any) -> Response: team = request.user.team filter = Filter(request=request) limit = int(request.GET.get("limit", SESSIONS_LIST_DEFAULT_LIMIT)) offset = int(request.GET.get("offset", 0)) response = ClickhouseSessions().run(team=team, filter=filter, limit=limit + 1, offset=offset) if "distinct_id" in request.GET and request.GET["distinct_id"]: try: person_ids = get_persons_by_distinct_ids( team.pk, [request.GET["distinct_id"]])[0].distinct_ids response = [ session for i, session in enumerate(response) if response[i]["distinct_id"] in person_ids ] except IndexError: response = [] if len(response) > limit: response.pop() return Response({"result": response, "offset": offset + limit}) else: return Response({ "result": response, })
def retention(self, request: Request, *args: Any, **kwargs: Any) -> Response: team = request.user.team filter = Filter(request=request) result = ClickhouseRetention().run(filter, team) return Response({"data": result})
def test_breakdown_filtering(self): self._create_events() # test breakdown filtering with freeze_time("2020-01-04T13:01:01Z"): response = ClickhouseTrends().run( Filter( data={ "date_from": "-14d", "breakdown": "$some_property", "events": [ {"id": "sign up", "name": "sign up", "type": "events", "order": 0,}, {"id": "no events"}, ], } ), self.team, ) self.assertEqual(response[0]["label"], 'sign up - "value"') self.assertEqual(response[1]["label"], 'sign up - "other_value"') self.assertEqual(response[2]["label"], 'no events - "value"') self.assertEqual(response[3]["label"], 'no events - "other_value"') self.assertEqual(sum(response[0]["data"]), 2) self.assertEqual(response[0]["breakdown_value"], '"value"') self.assertEqual(sum(response[1]["data"]), 1) self.assertEqual(response[1]["breakdown_value"], '"other_value"')
def path(self, request: Request, *args: Any, **kwargs: Any) -> Response: team = request.user.team assert team is not None filter = Filter(request=request) resp = ClickhousePaths().run(filter=filter, team=team) return Response(resp)
def funnel(self, request: Request, *args: Any, **kwargs: Any) -> Response: team = request.user.team assert team is not None filter = Filter(request=request) response = ClickhouseFunnel(team=team, filter=filter).run() return Response(response)
def _serialize_breakdown(self, entity: Entity, filter: Filter, team_id: int): if isinstance(filter.breakdown, list) and "all" in filter.breakdown: result = [] filter.breakdown = filter.breakdown if filter.breakdown and isinstance(filter.breakdown, list) else [] filter.breakdown.remove("all") # handle breakdown by all and by specific props separately if filter.breakdown: result.extend(self._format_breakdown_query(entity, filter, team_id)) filter.breakdown = ["all"] all_result = self._format_breakdown_query(entity, filter, team_id) result.extend(all_result) else: result = self._format_breakdown_query(entity, filter, team_id) return result
def _serialize_entity(self, entity: Entity, filter: Filter, team: Team) -> List[Dict[str, Any]]: serialized: Dict[str, Any] = { "action": entity.to_dict(), "label": entity.name, "count": 0, "data": [], "labels": [], "days": [], } if filter.breakdown: if "all" in filter.breakdown and isinstance( filter.breakdown, list): result = [] filter.breakdown = filter.breakdown if filter.breakdown and isinstance( filter.breakdown, list) else [] filter.breakdown.remove("all") if filter.breakdown: result.extend( self._format_breakdown_query(entity, filter, team)) filter.breakdown = ["all"] all_result = self._format_breakdown_query(entity, filter, team) result.extend(all_result) else: result = self._format_breakdown_query(entity, filter, team) else: result = self._format_normal_query(entity, filter, team) serialized_data = [] for queried_metric in result: serialized_copy = copy.deepcopy(serialized) serialized_copy.update(queried_metric) serialized_data.append(serialized_copy) if filter.display == TRENDS_CUMULATIVE: serialized_data = self._handle_cumulative(serialized_data) return serialized_data
def funnel(self, request: Request, *args: Any, **kwargs: Any) -> Response: if not endpoint_enabled(CH_FUNNEL_ENDPOINT, request.user.distinct_id): result = super().calculate_funnel(request) return Response(result) team = request.user.team filter = Filter(request=request) response = ClickhouseFunnel(team=team, filter=filter).run() return Response(response)
def path(self, request: Request, *args: Any, **kwargs: Any) -> Response: if not endpoint_enabled(CH_PATH_ENDPOINT, request.user.distinct_id): result = super().calculate_path(request) return Response(result) team = request.user.team filter = Filter(request=request) resp = ClickhousePaths().run(filter=filter, team=team) return Response(resp)
def test_person_cohort_properties(self): person1_distinct_id = "person1" person1 = Person.objects.create( team=self.team, distinct_ids=[person1_distinct_id], properties={"$some_prop": "something"} ) cohort1 = Cohort.objects.create( team=self.team, groups=[{"properties": {"$some_prop": "something"}}], name="cohort1" ) person2_distinct_id = "person2" person2 = Person.objects.create( team=self.team, distinct_ids=[person2_distinct_id], properties={"$some_prop": "different"} ) cohort2 = Cohort.objects.create( team=self.team, groups=[{"properties": {"$some_prop__is_not": "something"}}], name="cohort2" ) filter = Filter(data={"properties": [{"key": "id", "value": cohort1.pk, "type": "cohort"}],}) prop_clause, prop_clause_params = parse_prop_clauses("uuid", filter.properties, self.team) query = """ SELECT * FROM person_distinct_id WHERE team_id = %(team_id)s {prop_clause} """.format( prop_clause=prop_clause ) # get distinct_id column of result result = sync_execute(query, {"team_id": self.team.pk, **prop_clause_params})[0][1] self.assertEqual(result, person1_distinct_id) # test cohort2 with negation filter = Filter(data={"properties": [{"key": "id", "value": cohort2.pk, "type": "cohort"}],}) prop_clause, prop_clause_params = parse_prop_clauses("uuid", filter.properties, self.team) query = """ SELECT * FROM person_distinct_id WHERE team_id = %(team_id)s {prop_clause} """.format( prop_clause=prop_clause ) # get distinct_id column of result result = sync_execute(query, {"team_id": self.team.pk, **prop_clause_params})[0][1] self.assertEqual(result, person2_distinct_id)
def _single_step_funnel(self, properties=None, filters=None): if filters is None: filters = { "events": [{"id": "user signed up", "type": "events", "order": 0},], } if properties is not None: filters.update({"properties": properties}) filter = Filter(data=filters) return Funnel(filter=filter, team=self.team)
def retention(self, request: Request, *args: Any, **kwargs: Any) -> Response: if not endpoint_enabled(CH_RETENTION_ENDPOINT, request.user.distinct_id): result = super().calculate_retention(request) return Response({"data": result}) team = request.user.team filter = Filter(request=request) result = ClickhouseRetention().run(filter, team) return Response({"data": result})
def trend(self, request: Request, *args: Any, **kwargs: Any) -> Response: team = request.user.team filter = Filter(request=request) if filter.shown_as == TRENDS_STICKINESS: result = ClickhouseStickiness().run(filter, team) else: result = ClickhouseTrends().run(filter, team) self._refresh_dashboard(request=request) return Response(result)
def test_prop_cohort_basic_action(self): _create_person(distinct_ids=["some_other_id"], team_id=self.team.pk, properties={"$some_prop": "something"}) _create_person( distinct_ids=["some_id"], team_id=self.team.pk, properties={ "$some_prop": "something", "$another_prop": "something" }, ) _create_person(distinct_ids=["no_match"], team_id=self.team.pk) action = _create_action(team=self.team, name="$pageview") _create_event( event="$pageview", team=self.team, distinct_id="some_id", properties={"attr": "some_val"}, ) _create_event( event="$not_pageview", team=self.team, distinct_id="some_other_id", properties={"attr": "some_val"}, ) cohort1 = Cohort.objects.create( team=self.team, groups=[{ "action_id": action.pk }], name="cohort1", ) filter = Filter(data={ "properties": [{ "key": "id", "value": cohort1.pk, "type": "cohort" }], }) query, params = parse_prop_clauses(filter.properties, self.team.pk) final_query = "SELECT uuid FROM events WHERE team_id = %(team_id)s {}".format( query) result = sync_execute(final_query, {**params, "team_id": self.team.pk}) self.assertEqual(len(result), 1)
def trend(self, request: Request, *args: Any, **kwargs: Any) -> Response: if not endpoint_enabled(CH_TREND_ENDPOINT, request.user.distinct_id): result = super().calculate_trends(request) return Response(result) team = request.user.team filter = Filter(request=request) if filter.shown_as == TRENDS_STICKINESS: result = ClickhouseStickiness().run(filter, team) else: result = ClickhouseTrends().run(filter, team) self._refresh_dashboard(request=request) return Response(result)
def _basic_funnel(self, properties=None, filters=None): action_credit_card = Action.objects.create(team=self.team, name="paid") ActionStep.objects.create(action=action_credit_card, event="$autocapture", tag_name="button", text="Pay $10") action_play_movie = Action.objects.create(team=self.team, name="watched movie") ActionStep.objects.create(action=action_play_movie, event="$autocapture", tag_name="a", href="/movie") if filters is None: filters = { "events": [ { "id": "user signed up", "type": "events", "order": 0 }, ], "actions": [ { "id": action_credit_card.pk, "type": "actions", "order": 1 }, { "id": action_play_movie.pk, "type": "actions", "order": 2 }, ], } if properties is not None: filters.update({"properties": properties}) filter = Filter(data=filters) return Funnel(filter=filter, team=self.team)
def list(self, request): team = request.user.team filter = Filter(request=request) limit = "LIMIT 100" if not filter._date_from and not filter._date_to else "" conditions, condition_params = determine_event_conditions(request.GET) prop_filters, prop_filter_params = parse_filter(filter.properties) if prop_filters: query_result = sync_execute( SELECT_EVENT_WITH_PROP_SQL.format(conditions=conditions, limit=limit, filters=prop_filters), {"team_id": team.pk, **condition_params, **prop_filter_params}, ) else: query_result = sync_execute( SELECT_EVENT_WITH_ARRAY_PROPS_SQL.format(conditions=conditions, limit=limit), {"team_id": team.pk, **condition_params}, ) result = ClickhouseEventSerializer(query_result, many=True, context={"elements": None, "people": None}).data return Response({"next": None, "results": result})