def test_status_report_duplicate_distinct_ids(self) -> None: create_person_distinct_id(self.team.id, "duplicate_id1", str(UUIDT())) create_person_distinct_id(self.team.id, "duplicate_id1", str(UUIDT())) create_person_distinct_id(self.team.id, "duplicate_id2", str(UUIDT())) create_person_distinct_id(self.team.id, "duplicate_id2", str(UUIDT())) create_person_distinct_id(self.team.id, "duplicate_id2", str(UUIDT())) for index in range(0, 2): sync_execute( "INSERT INTO person_distinct_id SELECT %(distinct_id)s, %(person_id)s, %(team_id)s, 1, %(timestamp)s, 0 VALUES", { "distinct_id": "duplicate_id_old", "person_id": str(UUIDT()), "team_id": self.team.id, "timestamp": "2020-01-01 12:01:0%s" % index, }, ) report = status_report(dry_run=True).get("teams")[self.team.id] # type: ignore duplicate_ids_report = report["duplicate_distinct_ids"] expected_result = { "prev_total_ids_with_duplicates": 1, "prev_total_extra_distinct_id_rows": 1, "new_total_ids_with_duplicates": 2, "new_total_extra_distinct_id_rows": 4, } self.assertEqual(duplicate_ids_report, expected_result)
def test_breakdown_user_props_with_filter(self): Person.objects.create(team_id=self.team.pk, distinct_ids=["person1"], properties={"email": "*****@*****.**"}) Person.objects.create(team_id=self.team.pk, distinct_ids=["person2"], properties={"email": "*****@*****.**"}) person = Person.objects.create( team_id=self.team.pk, distinct_ids=["person3"], properties={"email": "*****@*****.**"} ) create_person_distinct_id(person.id, self.team.pk, "person1", str(person.uuid)) _create_event(event="sign up", distinct_id="person1", team=self.team, properties={"key": "val"}) _create_event(event="sign up", distinct_id="person2", team=self.team, properties={"key": "val"}) response = ClickhouseTrends().run( Filter( data={ "date_from": "-14d", "breakdown": "email", "breakdown_type": "person", "events": [{"id": "sign up", "name": "sign up", "type": "events", "order": 0,}], "properties": [ {"key": "email", "value": "@posthog.com", "operator": "not_icontains", "type": "person"}, {"key": "key", "value": "val"}, ], } ), self.team, ) self.assertEqual(len(response), 2) self.assertEqual(response[1]["breakdown_value"], "*****@*****.**")
def split_person(self, main_distinct_id: Optional[str]): distinct_ids = Person.objects.get(pk=self.pk).distinct_ids if not main_distinct_id: self.properties = {} self.save() main_distinct_id = distinct_ids[0] for distinct_id in distinct_ids: if not distinct_id == main_distinct_id: with transaction.atomic(): pdi = PersonDistinctId.objects.select_for_update().get( person=self, distinct_id=distinct_id) person = Person.objects.create(team_id=self.team_id) pdi.person_id = str(person.id) pdi.version = (pdi.version or 0) + 1 pdi.save(update_fields=["version", "person_id"]) from ee.clickhouse.models.person import create_person, create_person_distinct_id create_person_distinct_id(team_id=self.team_id, distinct_id=distinct_id, person_id=str(self.uuid), sign=-1) create_person_distinct_id( team_id=self.team_id, distinct_id=distinct_id, person_id=str(person.uuid), sign=1, version=pdi.version, ) create_person( team_id=self.team_id, uuid=str(person.uuid), )
def create_people(self): self.people = [self.make_person(i) for i in range(self.n_people)] self.distinct_ids = [str(UUIDT()) for _ in self.people] self.people = Person.objects.bulk_create(self.people) pids = [ PersonDistinctId(team=self.team, person=person, distinct_id=distinct_id) for person, distinct_id in zip(self.people, self.distinct_ids) ] PersonDistinctId.objects.bulk_create(pids) from ee.clickhouse.models.person import create_person, create_person_distinct_id for person in self.people: create_person( uuid=str(person.uuid), team_id=person.team.pk, properties=person.properties, is_identified=person.is_identified, ) for pid in pids: create_person_distinct_id( pid.team.pk, pid.distinct_id, str(pid.person.uuid)) # use dummy number for id
def _create_person(**kwargs) -> Person: if kwargs.get("uuid"): uuid = str(kwargs.pop("uuid")) else: uuid = str(UUIDT()) distinct_ids = kwargs.pop("distinct_ids") person = create_person(uuid=uuid, **kwargs) for id in distinct_ids: create_person_distinct_id(0, kwargs["team_id"], id, str(person)) return Person(id=person, uuid=person)
def test_delete_persons(self): uuid0 = create_person(self.teams[0].pk, properties={"x": 0}) uuid1 = create_person(self.teams[1].pk, properties={"x": 1}) uuid2 = create_person(self.teams[2].pk, properties={"x": 2}) create_person_distinct_id(self.teams[0].pk, "0", uuid0) create_person_distinct_id(self.teams[1].pk, "1", uuid1) create_person_distinct_id(self.teams[2].pk, "2", uuid2) delete_teams_data([self.teams[0].pk, self.teams[1].pk]) self.assertEqual(self.select_remaining("person", "properties"), ['{"x": 2}']) self.assertEqual(self.select_remaining("person_distinct_id", "distinct_id"), ["2"])
def test_status_report_multiple_ids_per_person(self) -> None: person_id1 = str(UUIDT()) person_id2 = str(UUIDT()) create_person_distinct_id(self.team.id, "id1", person_id1) create_person_distinct_id(self.team.id, "id2", person_id1) create_person_distinct_id(self.team.id, "id3", person_id1) create_person_distinct_id(self.team.id, "id4", person_id1) create_person_distinct_id(self.team.id, "id5", person_id1) create_person_distinct_id(self.team.id, "id6", person_id2) create_person_distinct_id(self.team.id, "id7", person_id2) create_person_distinct_id(self.team.id, "id8", person_id2) report = status_report(dry_run=True).get("teams")[self.team.id] # type: ignore multiple_ids_report = report["multiple_ids_per_person"] expected_result = { "total_persons_with_more_than_2_ids": 2, "max_distinct_ids_for_one_person": 5, } self.assertEqual(multiple_ids_report, expected_result)