Esempio n. 1
0
    def test_status_report_duplicate_distinct_ids(self) -> None:
        create_person_distinct_id(self.team.id, "duplicate_id1", str(UUIDT()))
        create_person_distinct_id(self.team.id, "duplicate_id1", str(UUIDT()))
        create_person_distinct_id(self.team.id, "duplicate_id2", str(UUIDT()))
        create_person_distinct_id(self.team.id, "duplicate_id2", str(UUIDT()))
        create_person_distinct_id(self.team.id, "duplicate_id2", str(UUIDT()))

        for index in range(0, 2):
            sync_execute(
                "INSERT INTO person_distinct_id SELECT %(distinct_id)s, %(person_id)s, %(team_id)s, 1, %(timestamp)s, 0 VALUES",
                {
                    "distinct_id": "duplicate_id_old",
                    "person_id": str(UUIDT()),
                    "team_id": self.team.id,
                    "timestamp": "2020-01-01 12:01:0%s" % index,
                },
            )

        report = status_report(dry_run=True).get("teams")[self.team.id]  # type: ignore

        duplicate_ids_report = report["duplicate_distinct_ids"]

        expected_result = {
            "prev_total_ids_with_duplicates": 1,
            "prev_total_extra_distinct_id_rows": 1,
            "new_total_ids_with_duplicates": 2,
            "new_total_extra_distinct_id_rows": 4,
        }

        self.assertEqual(duplicate_ids_report, expected_result)
Esempio n. 2
0
    def test_breakdown_user_props_with_filter(self):
        Person.objects.create(team_id=self.team.pk, distinct_ids=["person1"], properties={"email": "*****@*****.**"})
        Person.objects.create(team_id=self.team.pk, distinct_ids=["person2"], properties={"email": "*****@*****.**"})
        person = Person.objects.create(
            team_id=self.team.pk, distinct_ids=["person3"], properties={"email": "*****@*****.**"}
        )
        create_person_distinct_id(person.id, self.team.pk, "person1", str(person.uuid))

        _create_event(event="sign up", distinct_id="person1", team=self.team, properties={"key": "val"})
        _create_event(event="sign up", distinct_id="person2", team=self.team, properties={"key": "val"})
        response = ClickhouseTrends().run(
            Filter(
                data={
                    "date_from": "-14d",
                    "breakdown": "email",
                    "breakdown_type": "person",
                    "events": [{"id": "sign up", "name": "sign up", "type": "events", "order": 0,}],
                    "properties": [
                        {"key": "email", "value": "@posthog.com", "operator": "not_icontains", "type": "person"},
                        {"key": "key", "value": "val"},
                    ],
                }
            ),
            self.team,
        )
        self.assertEqual(len(response), 2)
        self.assertEqual(response[1]["breakdown_value"], "*****@*****.**")
Esempio n. 3
0
    def split_person(self, main_distinct_id: Optional[str]):
        distinct_ids = Person.objects.get(pk=self.pk).distinct_ids
        if not main_distinct_id:
            self.properties = {}
            self.save()
            main_distinct_id = distinct_ids[0]

        for distinct_id in distinct_ids:
            if not distinct_id == main_distinct_id:
                with transaction.atomic():
                    pdi = PersonDistinctId.objects.select_for_update().get(
                        person=self, distinct_id=distinct_id)
                    person = Person.objects.create(team_id=self.team_id)
                    pdi.person_id = str(person.id)
                    pdi.version = (pdi.version or 0) + 1
                    pdi.save(update_fields=["version", "person_id"])

                from ee.clickhouse.models.person import create_person, create_person_distinct_id

                create_person_distinct_id(team_id=self.team_id,
                                          distinct_id=distinct_id,
                                          person_id=str(self.uuid),
                                          sign=-1)
                create_person_distinct_id(
                    team_id=self.team_id,
                    distinct_id=distinct_id,
                    person_id=str(person.uuid),
                    sign=1,
                    version=pdi.version,
                )
                create_person(
                    team_id=self.team_id,
                    uuid=str(person.uuid),
                )
Esempio n. 4
0
    def create_people(self):
        self.people = [self.make_person(i) for i in range(self.n_people)]
        self.distinct_ids = [str(UUIDT()) for _ in self.people]
        self.people = Person.objects.bulk_create(self.people)

        pids = [
            PersonDistinctId(team=self.team,
                             person=person,
                             distinct_id=distinct_id)
            for person, distinct_id in zip(self.people, self.distinct_ids)
        ]
        PersonDistinctId.objects.bulk_create(pids)
        from ee.clickhouse.models.person import create_person, create_person_distinct_id

        for person in self.people:
            create_person(
                uuid=str(person.uuid),
                team_id=person.team.pk,
                properties=person.properties,
                is_identified=person.is_identified,
            )
        for pid in pids:
            create_person_distinct_id(
                pid.team.pk, pid.distinct_id,
                str(pid.person.uuid))  # use dummy number for id
Esempio n. 5
0
def _create_person(**kwargs) -> Person:
    if kwargs.get("uuid"):
        uuid = str(kwargs.pop("uuid"))
    else:
        uuid = str(UUIDT())
    distinct_ids = kwargs.pop("distinct_ids")
    person = create_person(uuid=uuid, **kwargs)
    for id in distinct_ids:
        create_person_distinct_id(0, kwargs["team_id"], id, str(person))
    return Person(id=person, uuid=person)
Esempio n. 6
0
    def test_delete_persons(self):
        uuid0 = create_person(self.teams[0].pk, properties={"x": 0})
        uuid1 = create_person(self.teams[1].pk, properties={"x": 1})
        uuid2 = create_person(self.teams[2].pk, properties={"x": 2})
        create_person_distinct_id(self.teams[0].pk, "0", uuid0)
        create_person_distinct_id(self.teams[1].pk, "1", uuid1)
        create_person_distinct_id(self.teams[2].pk, "2", uuid2)

        delete_teams_data([self.teams[0].pk, self.teams[1].pk])

        self.assertEqual(self.select_remaining("person", "properties"), ['{"x": 2}'])
        self.assertEqual(self.select_remaining("person_distinct_id", "distinct_id"), ["2"])
Esempio n. 7
0
    def test_status_report_multiple_ids_per_person(self) -> None:
        person_id1 = str(UUIDT())
        person_id2 = str(UUIDT())

        create_person_distinct_id(self.team.id, "id1", person_id1)
        create_person_distinct_id(self.team.id, "id2", person_id1)
        create_person_distinct_id(self.team.id, "id3", person_id1)
        create_person_distinct_id(self.team.id, "id4", person_id1)
        create_person_distinct_id(self.team.id, "id5", person_id1)

        create_person_distinct_id(self.team.id, "id6", person_id2)
        create_person_distinct_id(self.team.id, "id7", person_id2)
        create_person_distinct_id(self.team.id, "id8", person_id2)

        report = status_report(dry_run=True).get("teams")[self.team.id]  # type: ignore

        multiple_ids_report = report["multiple_ids_per_person"]

        expected_result = {
            "total_persons_with_more_than_2_ids": 2,
            "max_distinct_ids_for_one_person": 5,
        }

        self.assertEqual(multiple_ids_report, expected_result)