def test_funnel_multiple_actions(self): # we had an issue on clickhouse where multiple actions with different property filters would incorrectly grab only the last # properties. # This test prevents a regression person_factory(distinct_ids=["person1"], team_id=self.team.pk) event_factory(distinct_id="person1", event="event1", team=self.team) event_factory(distinct_id="person1", event="event2", properties={"test_prop": "a"}, team=self.team) action1 = Action.objects.create(team_id=self.team.pk, name="event2") ActionStep.objects.create(action=action1, event="event2", properties=[{"key": "test_prop", "value": "a"}]) action1.calculate_events() action2 = Action.objects.create(team_id=self.team.pk, name="event2") ActionStep.objects.create(action=action2, event="event2", properties=[{"key": "test_prop", "value": "c"}]) action2.calculate_events() result = Funnel( filter=Filter( data={ "events": [{"id": "event1", "order": 0}], "actions": [{"id": action1.pk, "order": 1,}, {"id": action2.pk, "order": 2,},], "insight": INSIGHT_FUNNELS, "funnel_window_days": 14, } ), team=self.team, ).run() self.assertEqual(result[0]["count"], 1) self.assertEqual(result[1]["count"], 1) self.assertEqual(result[2]["count"], 0)
def _run(self, date_from=None, date_to=None, interval=None): self._create_events() return Funnel( team=self.team, filter=Filter( data={ "insight": INSIGHT_FUNNELS, "display": TRENDS_LINEAR, "interval": interval if interval else "day", "date_from": date_from, **({ "date_to": date_to } if date_to else {}), "events": [ { "id": "sign up", "order": 0 }, { "id": "pay", "order": 1 }, ], }), ).run()
def test_prop_event(self): _create_event( event="$pageview", team=self.team, distinct_id="whatever", properties={"attr": "some_other_val"}, ) _create_event( event="$pageview", team=self.team, distinct_id="whatever", properties={"attr": "some_val"}, ) filter = Filter(data={ "properties": [{ "key": "attr", "value": "some_val" }], }) query, params = parse_prop_clauses(filter.properties, self.team.pk) final_query = "SELECT uuid FROM events WHERE team_id = %(team_id)s {}".format( query) result = sync_execute(final_query, {**params, "team_id": self.team.pk}) self.assertEqual(len(result), 1)
def test_basic_event_filter(self): filter = Filter( data={ "date_from": "2021-05-01 00:00:00", "date_to": "2021-05-07 00:00:00", "events": [{"id": "viewed", "order": 0},], } ) entity = Entity({"id": "viewed", "type": "events"}) query, params = ClickhouseEventQuery(filter, entity, self.team.pk).get_query() correct = """ SELECT e.timestamp as timestamp, e.properties as properties FROM events e WHERE team_id = %(team_id)s AND event = %(event)s AND timestamp >= '2021-05-01 00:00:00' AND timestamp <= '2021-05-07 23:59:59' """ self.assertEqual(sqlparse.format(query, reindent=True), sqlparse.format(correct, reindent=True)) sync_execute(query, params)
def test_second_step(self): self._create_sample_data_multiple_dropoffs() data = { "insight": INSIGHT_FUNNELS, "interval": "day", "date_from": "2021-05-01 00:00:00", "date_to": "2021-05-07 00:00:00", "funnel_window_days": 7, "funnel_step": 2, "events": [ { "id": "step one", "order": 0 }, { "id": "step two", "order": 1 }, { "id": "step three", "order": 2 }, ], } filter = Filter(data=data) results, _ = ClickhouseFunnelStrictPersons(filter, self.team).run() self.assertEqual(10, len(results))
def test_groups_filters_mixed(self): self._create_groups_test_data() filter = Filter( { "date_from": "2020-01-01T00:00:00Z", "date_to": "2020-01-12T00:00:00Z", "events": [{ "id": "$pageview", "type": "events", "order": 0 }], "properties": [ { "key": "industry", "value": "finance", "type": "group", "group_type_index": 0 }, { "key": "$browser", "value": "test", "type": "person" }, ], }, team=self.team, ) results, _ = self._run_query(filter) self.assertEqual(len(results), 2)
def session(self, request: Request, *args: Any, **kwargs: Any) -> Response: team = self.team filter = Filter(request=request) limit = int(request.GET.get("limit", SESSIONS_LIST_DEFAULT_LIMIT)) offset = int(request.GET.get("offset", 0)) response = ClickhouseSessions().run(team=team, filter=filter, limit=limit + 1, offset=offset) if "distinct_id" in request.GET and request.GET["distinct_id"]: try: person_ids = get_persons_by_distinct_ids( team.pk, [request.GET["distinct_id"]])[0].distinct_ids response = [ session for i, session in enumerate(response) if response[i]["distinct_id"] in person_ids ] except IndexError: response = [] if len(response) > limit: response.pop() return Response({"result": response, "offset": offset + limit}) else: return Response({ "result": response, })
def test_breakdown_filtering_persons_with_action_props(self): Person.objects.create(team_id=self.team.pk, distinct_ids=["person1"], properties={"email": "*****@*****.**"}) Person.objects.create(team_id=self.team.pk, distinct_ids=["person2"], properties={"email": "*****@*****.**"}) Person.objects.create(team_id=self.team.pk, distinct_ids=["person3"], properties={}) _create_event(event="sign up", distinct_id="person1", team=self.team, properties={"key": "val"}) _create_event(event="sign up", distinct_id="person2", team=self.team, properties={"key": "val"}) _create_event(event="sign up", distinct_id="person3", team=self.team, properties={"key": "val"}) action = _create_action( name="sign up", team=self.team, properties=[{"key": "key", "type": "event", "value": ["val"], "operator": "exact"}], ) response = ClickhouseTrends().run( Filter( data={ "date_from": "-14d", "breakdown": "email", "breakdown_type": "person", "actions": [{"id": action.pk, "type": "actions", "order": 0}], } ), self.team, ) self.assertEqual(response[0]["label"], "sign up - none") self.assertEqual(response[1]["label"], "sign up - [email protected]") self.assertEqual(response[2]["label"], "sign up - [email protected]") self.assertEqual(response[0]["count"], 1) self.assertEqual(response[1]["count"], 1) self.assertEqual(response[2]["count"], 1)
def test_action_with_prop(self): person = Person.objects.create( team_id=self.team.pk, distinct_ids=["blabla", "anonymous_id"], properties={"$some_prop": "some_val"} ) sign_up_action = Action.objects.create(team=self.team, name="sign up") ActionStep.objects.create( action=sign_up_action, event="sign up", properties={"$current_url": "https://posthog.com/feedback/1234"} ) with freeze_time("2020-01-02T13:01:01Z"): _create_event( team=self.team, event="sign up", distinct_id="blabla", properties={"$current_url": "https://posthog.com/feedback/1234"}, ) with freeze_time("2020-01-04T13:01:01Z"): action_response = ClickhouseTrends().run( Filter( data={ "actions": [{"id": sign_up_action.id, "math": "dau"}], "properties": [{"key": "$current_url", "value": "fake"}], } ), self.team, ) # if the params were shared it would be 1 because action would take precedence self.assertEqual(action_response[0]["count"], 0)
def test_first_step(self): self._create_sample_data_multiple_dropoffs() data = { "insight": INSIGHT_FUNNELS, "interval": "day", "date_from": "2021-05-01 00:00:00", "date_to": "2021-05-07 00:00:00", "funnel_window_days": 7, "funnel_step": 1, "events": [ { "id": "step one", "order": 0 }, { "id": "step two", "order": 1 }, { "id": "step three", "order": 2 }, ], } filter = Filter(data=data) _, serialized_results = ClickhouseFunnelStrictActors( filter, self.team).get_actors() self.assertEqual(35, len(serialized_results))
def test_breakdown_filtering(self): self._create_events() # test breakdown filtering with freeze_time("2020-01-04T13:01:01Z"): response = ClickhouseTrends().run( Filter( data={ "date_from": "-14d", "breakdown": "$some_property", "events": [ {"id": "sign up", "name": "sign up", "type": "events", "order": 0,}, {"id": "no events"}, ], } ), self.team, ) self.assertEqual(response[0]["label"], "sign up - none") self.assertEqual(response[1]["label"], "sign up - value") self.assertEqual(response[2]["label"], "sign up - other_value") self.assertEqual(response[3]["label"], "no events - none") self.assertEqual(sum(response[0]["data"]), 2) self.assertEqual(sum(response[1]["data"]), 2) self.assertEqual(sum(response[2]["data"]), 1) self.assertEqual(sum(response[3]["data"]), 1)
def test_month_interval(self): filter = Filter( data={ "insight": INSIGHT_FUNNELS, "display": TRENDS_LINEAR, "interval": "month", "date_from": "2021-05-01 00:00:00", "date_to": "2021-05-07 00:00:00", "funnel_window_days": 7, "events": [ { "id": "step one", "order": 0 }, { "id": "step two", "order": 1 }, { "id": "step three", "order": 2 }, ], }) results = ClickhouseFunnelTrends(filter, self.team).perform_query() self.assertEqual(len(results), 1)
def test_no_event_in_period(self): _create_person(distinct_ids=["user a"], team=self.team) _create_event(event="step one", distinct_id="user a", team=self.team, timestamp="2021-06-06 21:00:00") filter = Filter( data={ "insight": INSIGHT_FUNNELS, "display": TRENDS_LINEAR, "interval": "day", "date_from": "2021-06-07 00:00:00", "date_to": "2021-06-13 23:59:59", "funnel_window_days": 7, "events": [ {"id": "step one", "order": 0}, {"id": "step two", "order": 1}, {"id": "step three", "order": 2}, ], } ) funnel_trends = ClickhouseFunnelTrends(filter, self.team, ClickhouseFunnel) results = funnel_trends._exec_query() formatted_results = funnel_trends._format_results(results) self.assertEqual(len(results), 7) self.assertEqual(formatted_results[0]["days"][0], "2021-06-07")
def test_steps_performed_in_period_but_in_reverse(self): _create_person(distinct_ids=["user_one"], team=self.team) _create_event(event="step three", distinct_id="user_one", team=self.team, timestamp="2021-05-01 01:00:00") _create_event(event="step two", distinct_id="user_one", team=self.team, timestamp="2021-05-01 02:00:00") _create_event(event="step one", distinct_id="user_one", team=self.team, timestamp="2021-05-01 03:00:00") filter = Filter( data={ "insight": INSIGHT_FUNNELS, "display": TRENDS_LINEAR, "interval": "day", "date_from": "2021-05-01 00:00:00", "date_to": "2021-05-01 23:59:59", "funnel_window_days": 1, "events": [ {"id": "step one", "order": 0}, {"id": "step two", "order": 1}, {"id": "step three", "order": 2}, ], } ) results = ClickhouseFunnelTrends(filter, self.team, ClickhouseFunnel)._exec_query() self.assertEqual(len(results), 1) day_1 = results[0] # 2021-05-01 self.assertEqual(day_1["reached_from_step_count"], 1) self.assertEqual(day_1["reached_to_step_count"], 0) self.assertEqual(day_1["conversion_rate"], 0) self.assertEqual(day_1["is_period_final"], True)
def _get_persons_url(self, filter: Filter, entity: Entity, team_id: int, dates: List[str]) -> List[Dict[str, Any]]: persons_url = [] for date in dates: filter_params = filter.to_params() extra_params = { "entity_id": entity.id, "entity_type": entity.type, "entity_math": entity.math, "date_from": filter.date_from if filter.display == TRENDS_CUMULATIVE else date, "date_to": date, } parsed_params: Dict[str, str] = encode_get_request_params({ **filter_params, **extra_params }) persons_url.append({ "filter": extra_params, "url": f"api/projects/{team_id}/actions/people/?{urllib.parse.urlencode(parsed_params)}", }) return persons_url
def test_breakdown_user_props_with_filter(self): Person.objects.create(team_id=self.team.pk, distinct_ids=["person1"], properties={"email": "*****@*****.**"}) Person.objects.create(team_id=self.team.pk, distinct_ids=["person2"], properties={"email": "*****@*****.**"}) person = Person.objects.create( team_id=self.team.pk, distinct_ids=["person3"], properties={"email": "*****@*****.**"} ) create_person_distinct_id(person.id, self.team.pk, "person1", str(person.uuid)) _create_event(event="sign up", distinct_id="person1", team=self.team, properties={"key": "val"}) _create_event(event="sign up", distinct_id="person2", team=self.team, properties={"key": "val"}) response = ClickhouseTrends().run( Filter( data={ "date_from": "-14d", "breakdown": "email", "breakdown_type": "person", "events": [{"id": "sign up", "name": "sign up", "type": "events", "order": 0,}], "properties": [ {"key": "email", "value": "@posthog.com", "operator": "not_icontains", "type": "person"}, {"key": "key", "value": "val"}, ], } ), self.team, ) self.assertEqual(len(response), 1) self.assertEqual(response[0]["breakdown_value"], "*****@*****.**")
def test_static_cohort_filter(self): cohort = _create_cohort(team=self.team, name="cohort1", groups=[], is_static=True) filter = Filter( data={ "date_from": "2021-05-01 00:00:00", "date_to": "2021-05-07 00:00:00", "events": [ { "id": "viewed", "order": 0 }, ], "properties": [{ "key": "id", "value": cohort.pk, "type": "cohort" }], }, team=self.team, ) self._run_query(filter)
def test_breakdown_filtering_bar_chart_by_value(self): self._create_events() # test breakdown filtering with freeze_time("2020-01-04T13:01:01Z"): response = ClickhouseTrends().run( Filter( data={ "date_from": "-7d", "breakdown": "$some_property", "events": [{"id": "sign up", "name": "sign up", "type": "events", "order": 0,},], "display": TRENDS_BAR_VALUE, } ), self.team, ) self.assertEqual(response[0]["aggregated_value"], 2) # the events without breakdown value self.assertEqual(response[1]["aggregated_value"], 1) self.assertEqual(response[2]["aggregated_value"], 1) self.assertEqual( response[0]["days"], [ "2019-12-28", "2019-12-29", "2019-12-30", "2019-12-31", "2020-01-01", "2020-01-02", "2020-01-03", "2020-01-04", ], )
def test_combine_all_cohort_and_icontains(self): # This caused some issues with SQL parsing sign_up_action, _ = self._create_events() cohort = Cohort.objects.create(team=self.team, name="a", groups=[{ "properties": { "key": "value" } }]) action_response = ClickhouseTrends().run( Filter( data={ "actions": [{ "id": sign_up_action.id, "math": "dau" }], "properties": [{ "key": "$current_url", "value": "ii", "operator": "icontains" }], "breakdown": [cohort.pk, "all"], "breakdown_type": "cohort", }), self.team, ) self.assertEqual(action_response[0]["count"], 0)
def test_static_cohort_filter(self): cohort = _create_cohort(team=self.team, name="cohort1", groups=[], is_static=True) filter = Filter( data={ "date_from": "2021-05-01 00:00:00", "date_to": "2021-05-07 00:00:00", "events": [ { "id": "viewed", "order": 0 }, ], "properties": [{ "key": "id", "value": cohort.pk, "type": "cohort" }], }) entity = Entity({ "id": "viewed", "type": "events", }) query, params = TrendsEventQuery(filter=filter, entity=entity, team_id=self.team.pk).get_query() sync_execute(query, params)
def test_get_insight_items(self): filter_dict = { "events": [{"id": "$pageview"}], "properties": [{"key": "$browser", "value": "Mac OS X"}], } DashboardItem.objects.create( filters=Filter(data=filter_dict).to_dict(), team=self.team, created_by=self.user ) # create without user DashboardItem.objects.create(filters=Filter(data=filter_dict).to_dict(), team=self.team) response = self.client.get("/api/insight/", data={"user": "******"}).json() self.assertEqual(len(response["results"]), 1)
def test_prop_cohort_with_negation(self): team2 = Organization.objects.bootstrap(None)[2] _create_person(distinct_ids=["some_other_id"], team_id=self.team.pk, properties={"$some_prop": "something"}) _create_person(distinct_ids=["some_id"], team_id=team2.pk, properties={"$another_prop": "something"}) _create_event( event="$pageview", team=self.team, distinct_id="some_id", properties={"attr": "some_val"}, ) _create_event( event="$pageview", team=self.team, distinct_id="some_other_id", properties={"attr": "some_val"}, ) cohort1 = Cohort.objects.create( team=self.team, groups=[ {"properties": [{"type": "person", "key": "$some_prop", "operator": "is_not", "value": "something"}]} ], name="cohort1", ) filter = Filter(data={"properties": [{"key": "id", "value": cohort1.pk, "type": "cohort"}],}, team=self.team) query, params = parse_prop_grouped_clauses(team_id=self.team.pk, property_group=filter.property_groups) final_query = "SELECT uuid FROM events WHERE team_id = %(team_id)s {}".format(query) self.assertIn("\nFROM person_distinct_id2\n", final_query) result = sync_execute(final_query, {**params, "team_id": self.team.pk}) self.assertEqual(len(result), 0)
def test_prop_cohort_basic_action(self): _create_person(distinct_ids=["some_other_id"], team_id=self.team.pk, properties={"$some_prop": "something"}) _create_person( distinct_ids=["some_id"], team_id=self.team.pk, properties={"$some_prop": "something", "$another_prop": "something"}, ) _create_person(distinct_ids=["no_match"], team_id=self.team.pk) action = _create_action(team=self.team, name="$pageview") _create_event( event="$pageview", team=self.team, distinct_id="some_id", properties={"attr": "some_val"}, ) _create_event( event="$not_pageview", team=self.team, distinct_id="some_other_id", properties={"attr": "some_val"}, ) cohort1 = Cohort.objects.create(team=self.team, groups=[{"action_id": action.pk}], name="cohort1",) filter = Filter(data={"properties": [{"key": "id", "value": cohort1.pk, "type": "cohort"}],}) query, params = parse_prop_clauses(filter.properties, self.team.pk) final_query = "SELECT uuid FROM events WHERE team_id = %(team_id)s {}".format(query) result = sync_execute(final_query, {**params, "team_id": self.team.pk}) self.assertEqual(len(result), 1)
def test_incomplete_status(self): today = datetime.utcnow().strftime(FORMAT_TIME) tomorrow_delta = datetime.utcnow() + timedelta(days=1) tomorrow = tomorrow_delta.strftime(FORMAT_TIME) filter = Filter( data={ "insight": INSIGHT_FUNNELS, "display": TRENDS_LINEAR, "interval": "day", "date_from": today, "date_to": tomorrow, "funnel_window_days": 1, "events": [ {"id": "step one", "order": 0}, {"id": "step two", "order": 1}, {"id": "step three", "order": 2}, ], } ) results = ClickhouseFunnelTrends(filter, self.team).perform_query() current = results[0] # today self.assertEqual(1, current["completed_funnels"]) self.assertEqual(1, current["total"]) self.assertEqual(100.00, current["percent_complete"]) self.assertEqual(False, current["is_complete"]) self.assertEqual(1, len(current["cohort"]))
def _serialize_lifecycle(self, entity: Entity, filter: Filter, team_id: int) -> List[Dict[str, Any]]: period = filter.interval or "day" num_intervals, prev_date_from, date_from, date_to, after_date_to = get_time_diff( period, filter.date_from, filter.date_to, team_id) interval_trunc, sub_interval = get_trunc_func(period=period) # include the before and after when filteirng all events filter = filter.with_data({ "date_from": prev_date_from.isoformat(), "date_to": after_date_to.isoformat() }) filtered_events = (Event.objects.filter( team_id=team_id).add_person_id(team_id).filter( filter_events(team_id, filter, entity))) event_query, event_params = queryset_to_named_query( filtered_events, "events") earliest_events_filtered = (Event.objects.filter( team_id=team_id).add_person_id(team_id).filter( filter_events(team_id, filter, entity, include_dates=False))) earliest_events_query, earliest_events_params = queryset_to_named_query( earliest_events_filtered, "earliest_events") with connection.cursor() as cursor: cursor.execute( LIFECYCLE_SQL.format( action_join=ACTION_JOIN if entity.type == TREND_FILTER_TYPE_ACTIONS else "", event_condition="{} = %(event)s".format( "action_id" if entity.type == TREND_FILTER_TYPE_ACTIONS else "event"), events=event_query, earliest_events=earliest_events_query, ), { "team_id": team_id, "event": entity.id, "interval": interval_trunc, "one_interval": "1 " + interval_trunc, "sub_interval": "1 " + sub_interval, "num_intervals": num_intervals, "prev_date_from": prev_date_from, "date_from": date_from, "date_to": date_to, "after_date_to": after_date_to, **event_params, **earliest_events_params, }, ) res = [] for val in cursor.fetchall(): label = "{} - {}".format(entity.name, val[2]) additional_values = {"label": label, "status": val[2]} parsed_result = parse_response(val, filter, additional_values) res.append(parsed_result) return res
def _exec_query(self) -> List[Tuple]: prop_filters, prop_filter_params = parse_prop_clauses( self._filter.properties, self._team.pk, prepend="global", allow_denormalized_props=True) # format default dates data = {} if not self._filter._date_from: data.update({"date_from": relative_date_parse("-7d")}) if not self._filter._date_to: data.update({"date_to": timezone.now()}) self._filter = Filter(data={**self._filter._data, **data}) parsed_date_from, parsed_date_to, _ = parse_timestamps( filter=self._filter, table="events.", team_id=self._team.pk) self.params: Dict = { "team_id": self._team.pk, "events": [], # purely a speed optimization, don't need this for filtering **prop_filter_params, } steps = [ self._build_steps_query(entity, index) for index, entity in enumerate(self._filter.entities) ] query = FUNNEL_SQL.format( team_id=self._team.id, steps=", ".join(steps), filters=prop_filters.replace("uuid IN", "events.uuid IN", 1), parsed_date_from=parsed_date_from, parsed_date_to=parsed_date_to, ) return sync_execute(query, self.params)
def test_prop_person(self): _create_person(distinct_ids=["some_other_id"], team_id=self.team.pk, properties={"email": "*****@*****.**"}) _create_person(distinct_ids=["some_id"], team_id=self.team.pk, properties={"email": "*****@*****.**"}) _create_event( event="$pageview", team=self.team, distinct_id="some_id", properties={"attr": "some_val"}, ) filter = Filter( data={ "properties": [{ "key": "email", "value": "*****@*****.**", "type": "person" }], }) query, params = parse_prop_clauses(filter.properties, self.team.pk) final_query = "SELECT uuid FROM events WHERE team_id = %(team_id)s {}".format( query) result = sync_execute(final_query, {**params, "team_id": self.team.pk}) self.assertEqual(len(result), 1)
def people(self, request: Request, *args: Any, **kwargs: Any) -> Response: team = self.team filter = Filter(request=request) entity = get_target_entity(request) current_url = request.get_full_path() serialized_people = calculate_entity_people(team, entity, filter) current_url = request.get_full_path() next_url: Optional[str] = request.get_full_path() offset = filter.offset if len(serialized_people) > 100 and next_url: if "offset" in next_url: next_url = next_url[1:] next_url = next_url.replace("offset=" + str(offset), "offset=" + str(offset + 100)) else: next_url = request.build_absolute_uri("{}{}offset={}".format( next_url, "&" if "?" in next_url else "?", offset + 100)) else: next_url = None return Response({ "results": [{ "people": serialized_people[0:100], "count": len(serialized_people[0:99]) }], "next": next_url, "previous": current_url[1:], })
def test_prop_person(self): _create_person(distinct_ids=["some_other_id"], team_id=self.team.pk, properties={"email": "*****@*****.**"}) _create_person(distinct_ids=["some_id"], team_id=self.team.pk, properties={"email": "*****@*****.**"}) _create_event( event="$pageview", team=self.team, distinct_id="some_id", properties={"attr": "some_val"}, ) filter = Filter( data={ "properties": [{ "key": "email", "value": "*****@*****.**", "type": "person" }], }) self.assertEqual(len(self._run_query(filter)), 1)
def test_person_query_does_not_include_recording_events_if_flag_not_set( self): _create_person(team_id=self.team.pk, distinct_ids=["u1"], properties={"email": "bla"}) _create_event(event="pageview", distinct_id="u1", team=self.team, timestamp=timezone.now()) event = { "id": "pageview", "name": "pageview", "type": "events", "order": 0, } filter = Filter( data={ "date_from": "2021-01-21T00:00:00Z", "date_to": "2021-01-22T00:00:00Z", "events": [event], }) entity = Entity(event) _, serialized_actors = ClickhouseTrendsActors(self.team, entity, filter).get_actors() self.assertEqual(serialized_actors[0].get("matched_recordings"), None)