예제 #1
0
    def handle(self, *args, **options):
        for person in Person.objects.all():
            person.last_name_uk = person.last_name_uk or ""
            person.first_name_uk = person.first_name_uk or ""
            person.patronymic_uk = person.patronymic_uk or ""

            names = self.transliterate(
                person.last_name_uk, person.first_name_uk,
                person.patronymic_uk
            )

            if person.also_known_as_uk:
                for aka_name in filter(None, person.also_known_as_uk.split("\n")):
                    last_name, first_name, patronymic, _ = parse_fullname(aka_name)
                    names |= self.transliterate(
                        last_name, first_name, patronymic
                    )

            person.names = "\n".join(names)

            person.first_name_uk = person.first_name_uk.strip()
            person.last_name_uk = person.last_name_uk.strip()
            person.patronymic_uk = person.patronymic_uk.strip()

            if len(person.first_name) == 1:
                person.first_name += "."

            if len(person.patronymic) == 1:
                person.patronymic += "."

            person.save()
예제 #2
0
    def family(self):
        if not self.source:
            return []

        res = []
        if "family" in self.source["general"] and self.source["general"][
                "family"]:

            res = [
                {
                    "relation":
                    member.get("relations", member.get("relations_other", "")),
                    "name":
                    member.get("family_name", ""),
                } for member in self.source["general"]["family"]
                if (member.get("family_name", "") and (
                    member["relations"] + member.get("relations_other", "")))
            ]
        elif ("family_raw" in self.source["general"]
              and self.source["general"]["family_raw"]):
            res = map(
                parse_family_member,
                filter(None, self.source["general"]["family_raw"].split(";")),
            )

        res = filter(None, res)

        for i, r in enumerate(res):
            res[i]["mapped"] = RELATIONS_MAPPING.get(
                r["relation"].lower(), "особи, які спільно проживають")

            (
                res[i]["last_name"],
                res[i]["first_name"],
                res[i]["patronymic"],
                res[i]["dob"],
            ) = parse_fullname(r["name"])

        return res
예제 #3
0
    def to_dict(self):
        """
        Convert Person model to an indexable presentation for ES.
        """
        d = model_to_dict(
            self,
            fields=[
                "id",
                "last_name",
                "first_name",
                "patronymic",
                "dob",
                "last_name_en",
                "first_name_en",
                "patronymic_en",
                "dob_details",
                "is_pep",
                "names",
                "wiki_uk",
                "wiki_en",
                "city_of_birth_uk",
                "city_of_birth_en",
                "reputation_sanctions_uk",
                "reputation_sanctions_en",
                "reputation_convictions_uk",
                "reputation_convictions_en",
                "reputation_assets_uk",
                "reputation_assets_en",
                "reputation_crimes_uk",
                "reputation_crimes_en",
                "reputation_manhunt_uk",
                "reputation_manhunt_en",
                "also_known_as_uk",
                "also_known_as_en",
                "last_change",
                "inn",
                "inn_source",
                "passport",
                "passport_source",
            ],
        )

        d["related_persons"] = [
            i.to_dict() for i in self.to_persons.prefetch_related("to_person")
        ] + [
            i.to_dict_reverse()
            for i in self.from_persons.prefetch_related("from_person")
        ]
        d["related_countries"] = [
            i.to_dict()
            for i in self.person2country_set.prefetch_related("to_country")
        ]
        d["related_companies"] = [
            i.to_company_dict()
            for i in self.person2company_set.prefetch_related("to_company")
        ]

        d["declarations"] = [
            i.to_dict()
            for i in Declaration.objects.filter(person=self, confirmed="a")
        ]

        manhunt_records = self.manhunt_records
        if manhunt_records:
            curr_lang = get_language()

            activate("uk")
            d["reputation_manhunt_uk"] = render_to_string(
                "_manhunt_records_uk.jinja",
                {"manhunt_records": manhunt_records
                 }) + (d["reputation_manhunt_uk"] or "")

            activate("en")
            d["reputation_manhunt_en"] = render_to_string(
                "_manhunt_records_en.jinja",
                {"manhunt_records": manhunt_records
                 }) + (d["reputation_manhunt_en"] or "")
            activate(curr_lang)

        d["inn_source"] = (settings.SITE_URL +
                           self.inn_source.doc.url if self.inn_source else "")
        d["passport_source"] = (settings.SITE_URL +
                                self.passport_source.doc.url
                                if self.passport_source else "")

        d["photo"] = settings.SITE_URL + self.photo.url if self.photo else ""
        d["photo_path"] = self.photo.name if self.photo else ""
        d["date_of_birth"] = self.date_of_birth
        d["terminated"] = self.terminated
        d["last_modified"] = self.last_modified
        d["died"] = self.died
        if d["terminated"]:
            d["reason_of_termination"] = self.get_reason_of_termination_display(
            )
            d["reason_of_termination_en"] = translate_into(
                self.get_reason_of_termination_display(), "en")
            d["termination_date_human"] = self.termination_date_human

        last_workplace = self.last_workplace
        if last_workplace:
            d["last_workplace"] = last_workplace["company"]
            d["last_job_title"] = last_workplace["position"]
            d["last_job_id"] = last_workplace["company_id"]

            last_workplace_en = self.last_workplace_en
            d["last_workplace_en"] = last_workplace_en["company"]
            d["last_job_title_en"] = last_workplace_en["position"]

        d["type_of_official"] = self.get_type_of_official_display()

        d["type_of_official_en"] = translate_into(
            self.get_type_of_official_display(), "en")

        d["full_name"] = self.full_name
        d["full_name_en"] = self.full_name_en

        def generate_suggestions(last_name, first_name, patronymic, *args):
            if not last_name:
                return []

            return [
                {
                    "input": " ".join([last_name, first_name, patronymic]),
                    "weight": 5
                },
                {
                    "input": " ".join([first_name, patronymic, last_name]),
                    "weight": 2
                },
                {
                    "input": " ".join([first_name, last_name]),
                    "weight": 2
                },
            ]

        input_variants = [
            generate_suggestions(d["last_name"], d["first_name"],
                                 d["patronymic"])
        ]

        input_variants += list(
            map(lambda x: generate_suggestions(*parse_fullname(x)),
                self.parsed_names))

        d["full_name_suggest"] = list(chain.from_iterable(input_variants))

        d["_id"] = d["id"]

        return d
예제 #4
0
    def handle(self, *args, **options):
        peklun = User.objects.get(username="******")

        wks = get_spreadsheet().sheet1

        for i, l in enumerate(wks.get_all_records()):
            # reopen it time from time to avoid disconnect by timeout
            if i % 2000 == 0 and i:
                wks = get_spreadsheet().sheet1

            self.stdout.write('Processing line #{}'.format(i))

            company_ipn = l.get("ІПН", "")
            company_name = l.get("Назва", "")
            person_id = l.get("id персони", "")
            company_id = l.get("id компанії", "")
            photo_url = l.get("Фото", "")

            person = None
            # First let's search for appropriate company
            company = self.process_company(company_id, company_ipn,
                                           company_name)

            # No company — no go
            if company is None:
                continue

            # Let's backwrite company id to the spreadsheet for further use
            if company.pk != company_id:
                company_id = company.pk
                wks.update_cell(i + 2, len(l.keys()), company.pk)

            person_name = l.get("ПІБ", "").strip()
            position = l.get("Посада", "").strip()
            person_dob = unicode(l.get("Дата народження", "")).strip()
            person_from = parse_date(l.get("Дата призначення", ""))
            person_to = parse_date(l.get("Дата звільнення", ""))

            doc_received = parse_date(l.get("Дата відповіді", ""))
            docs = l.get("Лінк на відповідь", "").strip()
            website = l.get("лінк на сайт", "").strip()

            # Now let's search for the person
            if person_name:
                last_name, first_name, patronymic, _ = parse_fullname(
                    person_name)

                if not last_name:
                    continue

                # First we search by person_id (if it's present)
                if person_id:
                    try:
                        person = Person.objects.get(pk=person_id)
                    except Person.DoesNotExist:
                        pass

                # If nothing is found we search for name (for now)
                if not person:
                    try:
                        person = Person.objects.get(
                            first_name_uk__iexact=first_name,
                            last_name_uk__iexact=last_name,
                            patronymic_uk__iexact=patronymic)
                    except Person.MultipleObjectsReturned:
                        self.stderr.write(
                            "Double person {}!".format(person_name))
                    except Person.DoesNotExist:
                        pass

                # If nothing is found, let's create a record for that person
                if not person:
                    person = Person()
                    self.stderr.write(
                        "Created new person {}".format(person_name))

                person.first_name_uk = first_name
                person.last_name_uk = last_name
                person.patronymic_uk = patronymic

                Ua2RuDictionary.objects.get_or_create(term=first_name)
                Ua2RuDictionary.objects.get_or_create(term=last_name)
                Ua2RuDictionary.objects.get_or_create(term=patronymic)

                person.first_name_en = translitua(first_name)
                person.last_name_en = translitua(last_name)
                person.patronymic_en = translitua(patronymic)

                person.is_pep = True
                person.imported = True
                person.type_of_official = 1

                # Parsing date (can be a full date or just a year or
                # year/month)
                if person_dob:
                    person.dob = parse_date(person_dob)
                    if len(person_dob) == 4:
                        person.dob_details = 2  # Only year

                    if len(person_dob) > 4 and len(person_dob) < 7:
                        person.dob_details = 1  # month and year

                # Let's download the photo (if any)
                if not person.photo and photo_url:
                    photo_name, photo_san_name, photo_content = download(
                        photo_url, translitua(person_name))

                    if photo_name:
                        person.photo.save(photo_san_name,
                                          ContentFile(photo_content))
                    else:
                        self.stdout.write("Cannot download image %s for %s" %
                                          (photo_url, person_name))

                person.save()

                # Let's write the person id back to the table.
                if person.pk != person_id:
                    person_id = person.pk
                    wks.update_cell(i + 2, len(l.keys()) - 1, person.pk)

                # Now let's download all supporting docs
                docs_downloaded = []
                first_doc_name = False

                # There might be many of them
                for doc in docs.split(", "):
                    doc_instance = None

                    # we cannot download folders from google docs, so let's
                    # skip them

                    if doc and "folderview" not in doc \
                            and "drive/#folders" not in doc:
                        doc = expand_gdrive_download_url(doc)
                        doc_hash = sha1(doc).hexdigest()

                        # Check, if docs
                        try:
                            doc_instance = Document.objects.get(hash=doc_hash)
                        except Document.DoesNotExist:
                            self.stdout.write(
                                'Downloading file {}'.format(doc))
                            doc_name, doc_san_name, doc_content = download(doc)
                            doc_san_name = translitua(doc_san_name)

                            if doc_name:
                                doc_instance = Document(name_uk=doc_name,
                                                        uploader=peklun,
                                                        hash=doc_hash)

                                doc_instance.doc.save(doc_san_name,
                                                      ContentFile(doc_content))
                                doc_instance.save()
                            else:
                                self.stdout.write(
                                    'Cannot download file {}'.format(doc))

                        if doc_instance:
                            first_doc_name = doc_instance.name_uk
                            docs_downloaded.append(doc_instance.doc.url)

                # Now let's setup links between person and companies
                links = Person2Company.objects.filter(
                    (Q(date_established=person_from)
                     | Q(date_established=mangle_date(person_from))
                     | Q(date_established__isnull=True)),
                    (Q(date_finished=person_to)
                     | Q(date_finished=mangle_date(person_to))
                     | Q(date_finished__isnull=True)),
                    from_person=person,
                    to_company=company)

                # Delete if there are doubling links
                # including those cases when dates were imported incorrectly
                # because of parse_date
                if len(links) > 1:
                    links.delete()

                link, _ = Person2Company.objects.update_or_create(
                    from_person=person,
                    to_company=company,
                    date_established=person_from,
                    date_established_details=0,
                    date_finished=person_to,
                    date_finished_details=0)

                if not link.relationship_type:
                    link.relationship_type = position

                # And translate them
                Ua2EnDictionary.objects.get_or_create(
                    term=lookup_term(position))

                # oh, and add links to supporting docs
                all_docs = docs_downloaded + website.split(", ")
                if all_docs:
                    link.proof = ", ".join(filter(None, all_docs))

                    if first_doc_name:
                        link.proof_title = first_doc_name

                link.date_confirmed = doc_received
                link.is_employee = True

                link.save()
예제 #5
0
    def resolve_person(self, family_id):
        """
        Finds the relative mentioned in the declaration in
        PEP db.
        Returns person id and a flag set to true, if it was fuzzy
        match
        """

        if str(family_id) == "1":
            return self.person, False

        def _is_fuzzy_match(lastname, firstname, middlename, person_rec):
            if lastname.strip().lower() != person_rec.last_name.strip().lower(
            ):
                return True
            if firstname.strip().lower() != person_rec.first_name.strip(
            ).lower():
                return True
            if middlename.strip().lower() != person_rec.patronymic.strip(
            ).lower():
                return True

            return False

        data = self.source["nacp_orig"]
        family = data.get("step_2")

        if isinstance(family, dict):
            if not family_id or family_id not in family:
                raise CannotResolveRelativeException(
                    "Cannot find person %s in the declaration %s" %
                    (family_id, self.declaration_id))

            member = family[family_id]
        else:
            raise CannotResolveRelativeException(
                "Cannot find family section in the declaration %s" %
                (self.declaration_id))

        try:
            lastname = member["lastname"].strip()
            firstname = member["firstname"].strip()
            middlename = member["middlename"].strip()
        except KeyError:
            if "ukr_full_name" in member:
                lastname, firstname, middlename, _ = parse_fullname(
                    member["ukr_full_name"])
            else:
                raise CannotResolveRelativeException(
                    "Cannot find name of a person %s in the declaration %s" %
                    (family_id, self.declaration_id))

        chunk1 = list(
            Person2Person.objects.filter(
                from_person_id=self.person_id,
                to_person__last_name_uk__iexact=lastname.strip(),
                to_person__first_name_uk__iexact=firstname.strip(),
                to_person__patronymic_uk__iexact=middlename.strip(),
            ).select_related("to_person")
        ) + list(
            Person2Person.objects.filter(
                from_person_id=self.person_id,
                to_person__last_name_uk__trigram_similar=lastname.strip(),
                to_person__first_name_uk__trigram_similar=firstname.strip(),
                to_person__patronymic_uk__trigram_similar=middlename.strip(),
            ).select_related("to_person"))

        chunk2 = list(
            Person2Person.objects.filter(
                to_person_id=self.person_id,
                from_person__last_name_uk__iexact=lastname.strip(),
                from_person__first_name_uk__iexact=firstname.strip(),
                from_person__patronymic_uk__iexact=middlename.strip(),
            ).select_related("from_person")
        ) + list(
            Person2Person.objects.filter(
                to_person_id=self.person_id,
                from_person__last_name_uk__trigram_similar=lastname.strip(),
                from_person__first_name_uk__trigram_similar=firstname.strip(),
                from_person__patronymic_uk__trigram_similar=middlename.strip(),
            ).select_related("from_person"))

        if len(set(chunk1)) + len(set(chunk2)) > 1:
            raise CannotResolveRelativeException(
                "Uh, oh, more than one connection between %s and %s %s %s" %
                (self.person, lastname, firstname, middlename))

        for conn in chunk1:
            fuzzy_match = _is_fuzzy_match(lastname, firstname, middlename,
                                          conn.to_person)
            if fuzzy_match:
                logger.warning(
                    "It was fuzzy match between %s %s %s and the declarant %s"
                    % (lastname, firstname, middlename, conn.to_person))
            return conn.to_person, fuzzy_match

        for conn in chunk2:
            fuzzy_match = _is_fuzzy_match(lastname, firstname, middlename,
                                          conn.from_person)
            if fuzzy_match:
                logger.warning(
                    "It was fuzzy match between %s %s %s and the declarant %s"
                    % (lastname, firstname, middlename, conn.from_person))

            return conn.from_person, fuzzy_match

        raise CannotResolveRelativeException(
            "Cannot find person %s %s %s for the declarant %s" %
            (lastname, firstname, middlename, self.person))
예제 #6
0
    def create_person(self, person_name, is_pep, yob, real_run=False):

        def create_new_person():
            person = Person(
                last_name=title(last_name),
                first_name=title(first_name),
                patronymic=title(patronymic),
                is_pep=is_pep,
                type_of_official=1 if is_pep else 4
            )

            if yob and yob > 1850:
                dob = dt_parse("{}-01-01".format(yob))
                person.dob = dob
                person.dob_details = 2

            if real_run:
                person.save()
                self.new_persons_pk.append(person.pk)

            self.persons_dict[person_name] = person
            self.persons_stats["created_total"] += 1
            return person

        qs = Person.objects.all()

        last_name, first_name, patronymic, _ = parse_fullname(person_name)
        if not last_name or not first_name:
            tqdm.write("Can not split name: {}".format(person_name))
            return

        qs = qs.filter(last_name_uk__icontains=last_name,
                       first_name_uk__icontains=first_name)

        if patronymic:
            qs = qs.filter(patronymic_uk__icontains=patronymic)

        name_matches = qs.count()

        if name_matches == 0:
            tqdm.write("No matches for: {}. Person will be created"
                       .format(person_name))
            return create_new_person()

        for person in qs.iterator():
            edrpou_list = [edrpou.rjust(8, "0") for edrpou in self.smida_p2c[person_name]]
            p2c_qs = Person2Company.objects.filter(from_person=person, to_company__edrpou__in=edrpou_list)
            if p2c_qs.count():
                tqdm.write("Matched {}. Found common P2C relation: {}, [{}]"
                           .format(person.full_name, person.url_uk,
                                   " ".join([p2c.to_company.url_uk for p2c in p2c_qs.iterator()])))
                self.persons_dict[person_name] = person
                self.persons_stats["matched_resolved"] += 1

                # Update DoB
                self.update_person_dob(person, yob, real_run)

                return person

        tqdm.write("Found matches for name: {}. Person with same name will be created."
                   .format(person_name))

        self.persons_stats["matched_not_resolved"] += 1
        return create_new_person()
예제 #7
0
    def handle(self, *args, **options):
        activate(settings.LANGUAGE_CODE)

        successful = 0
        failed = 0

        exact_matches = 0
        fuzzy_matches = 0
        connections_created = 0
        persons_created = 0

        for company in Company.objects.filter(state_company=True).exclude(
                edrpou=""):
            k = company.edrpou.lstrip("0")

            # Because open copy of registry has no dates and some of companies
            # has more than one record we are using heuristic here to determine
            # latest record using registration status (they have "priorities")
            for order in self.status_order:
                res = EDRPOU.search().query(
                    "bool",
                    must=[Q("term", edrpou=k),
                          Q("term", status=order)])
                ans = res.execute()
                if ans:
                    break

            # Last attempt
            if not ans:
                res = EDRPOU.search().query(
                    "term",
                    edrpou=k,
                )
                ans = res.execute()

            if len(ans) > 1:
                self.stderr.write(
                    "Too many companies found by code %s, for the name %s, skipping"
                    % (k, company))

                failed += 1
                continue

            if len(ans) == 0:
                self.stderr.write("Cannot find the company by code %s" % (k, ))

                failed += 1
                continue

            edr_company = ans[0]
            if not edr_company.head:
                self.stderr.write("Cannot find head for the company %s, (%s)" %
                                  (ans[0].name, k))

                failed += 1
                continue

            successful += 1
            lastname, firstname, patronymic, _ = parse_fullname(
                edr_company.head)

            exact_links = Person2Company.objects.select_related(
                "from_person").filter(
                    to_company_id=company.pk,
                    from_person__first_name__iexact=firstname,
                    from_person__last_name__iexact=lastname)

            if patronymic:
                exact_links = exact_links.filter(
                    from_person__patronymic__iexact=patronymic)

            if exact_links.count():
                exact_matches += 1
                for l in exact_links:
                    l.created_from_edr = True
                    l.date_confirmed = edr_company.last_update
                    l.date_confirmed_details = 0
                    l.save()

                    if l.relationship_type != "Керівник":
                        self.stdout.write(
                            "Relation %s exists but has different type: %s" %
                            (l, l.relationship_type))

                continue
            else:
                fuzzy_links = Person2Company.objects.select_related(
                    "from_person").filter(
                        to_company_id=company.pk,
                        from_person__last_name__iexact=lastname,
                        from_person__first_name__istartswith=firstname[0],
                    )

                if patronymic:
                    fuzzy_links = fuzzy_links.filter(
                        from_person__patronymic__istartswith=patronymic[0])

                if fuzzy_links:
                    fuzzy_matches += 1
                    for l in fuzzy_links:
                        l.created_from_edr = True
                        l.date_confirmed = edr_company.last_update
                        l.date_confirmed_details = 0
                        l.save()

                        self.stdout.write(
                            "Fuzzy match: %s vs %s" %
                            (edr_company.head, l.from_person.full_name))

                        if l.relationship_type != "Керівник":
                            self.stdout.write(
                                "Relation %s exists but has different type: %s"
                                % (l, l.relationship_type))

                    continue

            try:
                if options["real_run"]:
                    person = Person.objects.create(first_name=firstname,
                                                   last_name=lastname,
                                                   patronymic=patronymic,
                                                   is_pep=True,
                                                   type_of_official=1)
                persons_created += 1

                if options["real_run"]:
                    Person2Company.objects.create(
                        from_person=person,
                        to_company=company,
                        relationship_type="Керівник",
                        is_employee=True,
                        created_from_edr=True,
                        date_confirmed=edr_company.last_update,
                        # TODO: decide what to do with connection proofs
                        proof_title="Інформація, отримана з ЄДР",
                    )

                connections_created += 1
            except DataError:
                self.stdout.write("Cannot create %s person or connection" %
                                  edr_company.head)

        self.stdout.write("Creation failed: %s, creation successful: %s" %
                          (failed, successful))
        self.stdout.write("Exact matches: %s, fuzzy matches: %s" %
                          (exact_matches, fuzzy_matches))
        self.stdout.write("Persons created: %s, connections created: %s" %
                          (persons_created, connections_created))
예제 #8
0
    def handle(self, *args, **options):
        activate(settings.LANGUAGE_CODE)
        all_persons = []

        keys = ["pk", "key", "fullname", "has_initials", "last_name",
                "first_name", "patronymic"]

        for p in Person.objects.all().nocache().iterator():
            all_persons.append(dict(zip(keys, [
                p.pk,
                ("%s %s %s" % (
                    p.last_name, p.first_name[:1], p.patronymic[:1])).lower(),
                ("%s %s %s" % (
                    p.last_name, p.first_name, p.patronymic)).lower(),
                is_initial(p.first_name) or is_initial(p.patronymic),
                p.last_name,
                p.first_name,
                p.patronymic])))

            for aka in map(unicode.strip, (p.also_known_as_uk or "").replace(",", "\n").split("\n")):
                if not aka:
                    continue

                last_name, first_name, patronymic, _ = parse_fullname(aka)
                if not(all([last_name, first_name, patronymic])):
                    continue

                all_persons.append(dict(zip(keys, [
                    p.pk,
                    ("%s %s %s" % (
                        last_name, first_name[:1], patronymic[:1])).lower(),
                    ("%s %s %s" % (
                        last_name, first_name, patronymic)).lower(),
                    is_initial(first_name) or is_initial(patronymic),
                    last_name,
                    first_name,
                    patronymic])))

        grouped_by_fullname = defaultdict(list)
        grouped_by_shortenedname = defaultdict(list)

        # First pass: exact matches by full name (even if those are given with initials)
        for l in tqdm(all_persons):
            if l["has_initials"]:
                grouped_by_shortenedname[l["key"]].append(l["pk"])
            else:
                grouped_by_fullname[l["fullname"]].append(l["pk"])

        spoiled_ids = set()
        chunks_to_review = list()

        for k, v in grouped_by_fullname.items():
            if len(v) > 1:
                spoiled_ids |= set(v)
                chunks_to_review.append(v)

        for k, v in grouped_by_shortenedname.items():
            if len(v) > 1:
                spoiled_ids |= set(v)
                chunks_to_review.append(v)

        mixed_grouping = defaultdict(list)

        # Second pass: initials vs full names
        for l in tqdm(all_persons):
            if l["pk"] not in spoiled_ids and l["has_initials"]:
                mixed_grouping[l["key"]].append(l["pk"])

        for l in tqdm(all_persons):
            if l["pk"] not in spoiled_ids and not l["has_initials"] and l["key"] in mixed_grouping:
                mixed_grouping[l["key"]].append(l["pk"])

        for k, v in tqdm(mixed_grouping.items()):
            if len(v) > 1:
                spoiled_ids |= set(v)
                chunks_to_review.append(v)

        for chunk in chunks_to_review:
            try:
                PersonDeduplication(
                    person1_id=chunk[0],
                    person2_id=chunk[1],
                    person1_json=Person.objects.get(pk=chunk[0]).to_dict(),
                    person2_json=Person.objects.get(pk=chunk[1]).to_dict(),
                ).save()
            except IntegrityError:
                pass

        candidates_for_fuzzy = [
            l for l in all_persons
            if l["pk"] not in spoiled_ids and not l["has_initials"]
        ]

        for a, b in tqdm(combinations(candidates_for_fuzzy, 2)):
            score = jaro(a["fullname"], b["fullname"])
            if score > 0.93:
                try:
                    PersonDeduplication(
                        person1_id=a["pk"],
                        person2_id=b["pk"],
                        fuzzy=True,
                        person1_json=Person.objects.get(pk=a["pk"]).to_dict(),
                        person2_json=Person.objects.get(pk=b["pk"]).to_dict(),
                    ).save()
                except IntegrityError:
                    pass