Exemple #1
0
    def link_sections_to_a_new_person(self, sections, section_distances,
                                      person_id):
        assert len(section_distances) == len(sections)
        if person_id is None:
            person_id = self.permalinks_db._get_new_id()
            self.logger.debug("create new person.id: {}".format(person_id))
        else:
            self.logger.debug("use old person.id: {}".format(person_id))

        try:
            person = models.Person.objects.get(id=person_id)
            #reuse person record from declarator
            if person.declarator_person_id is None:
                self.logger.error(
                    "Warning! Reuse existing person_id = {} for different sections (cluster), it could happen"
                    "if this person_id was used for different person created by copy_person_id.py "
                    "but should not happen if declarator_person_id is None. ".
                    format(person_id))
        except models.Person.DoesNotExist as exp:
            #create new person record
            person = models.Person(id=person_id)
            person.save()

        for (section, distance) in zip(sections, section_distances):
            self.link_section_to_person(section, person, distance)
    def test(self):
        src_doc = create_default_source_document()

        person = models.Person(id=2, declarator_person_id=1111, person_name="Иванов Иван Иванович")
        person.save()

        models.Section(id=1, source_document=src_doc, person_name="Иванов Иван Иванович", person=person).save()
        models.Section(id=2, source_document=src_doc, person_name="Иванов И. И.").save()

        person.refresh_from_db()

        permalinks_path = os.path.join(os.path.dirname(__file__), "permalinks.dbm")
        p = TPermaLinksDB(permalinks_path)
        p.create_db()
        p.save_next_primary_key_value(models.Person, 3)
        p.create_sql_sequences()
        p.close()

        run_dedupe = RunDedupe(None, None)
        run_dedupe.handle(None,
                          permanent_links_db=permalinks_path,
                          write_to_db=True,
                          fake_dedupe=True,
                          surname_bounds=',',
                          rebuild=True)

        sec1 = models.Section.objects.get(id=1)
        self.assertEqual(sec1.person_id, person.id)

        sec2 = models.Section.objects.get(id=2)
        self.assertEqual(sec2.person_id, person.id)
Exemple #3
0
    def test(self):
        self.create_test_db()

        person_id = 1
        person = models.Person(id=person_id, person_name=self.fio)
        self.assertIsNone(person.declarator_person_id)
        person.save()

        section1 = models.Section.objects.get(id=self.section_id1)
        section1.person = person
        section1.save()

        TPermaLinksPerson(CopyPersonIdTestCaseBase.permalinks_folder
                          ).create_and_save_empty_db()

        CreatePermalinksStorageCommand(None, None).handle(
            None, directory=CopyPersonIdTestCaseBase.permalinks_folder)
        permalinks_db = TPermaLinksPerson(
            CopyPersonIdTestCaseBase.permalinks_folder)
        permalinks_db.open_db_read_only()
        permalinks_db.recreate_auto_increment_table()

        self.run_copy_person_id(False, False)
        self.assertEqual(models.Person.objects.count(), 1)
        section1 = models.Section.objects.get(id=self.section_id1)
        self.assertEqual(section1.person.declarator_person_id,
                         self.declarator_person_id)
        self.assertEqual(section1.person.id, person_id)
    def test(self):
        src_doc = create_default_source_document()
        models.Section(id=1, source_document=src_doc, person_name="Иванов Иван Иванович").save()
        models.Section(id=2, source_document=src_doc, person_name="Иванов И. И.").save()

        permalinks_path = os.path.join(os.path.dirname(__file__), "permalinks.dbm")
        db = TPermaLinksDB(permalinks_path)
        db.create_db()
        person = models.Person(id=99)
        person.tmp_section_set = {str(1), str(2)}
        db.put_record_id(person)
        db.save_next_primary_key_value(models.Person, 100)
        db.create_sql_sequences()
        db.close()

        run_dedupe = RunDedupe(None, None)
        run_dedupe.handle(None,
                          permanent_links_db=permalinks_path,
                          write_to_db=True,
                          fake_dedupe=True,
                          surname_bounds=',',
                          rebuild=True)

        sec1 = models.Section.objects.get(id=1)
        self.assertEqual(sec1.person_id, person.id)

        sec2 = models.Section.objects.get(id=2)
        self.assertEqual(sec2.person_id, person.id)
 def link_sections_to_a_new_person(self, section_ids):
     person = models.Person()
     person.tmp_section_set = set(str(id) for (id, score) in section_ids)
     person.id = self.primary_keys_builder.get_record_id(person)
     person.save()
     for (section_id, score) in section_ids:
         section = models.Section.objects.get(id=section_id)
         self.link_section_to_person(section, person, score)
 def copy_human_merge(self, section, declarator_person_id):
     # we think that person ids in declarator db are stable
     person = models.Person(declarator_person_id=declarator_person_id)
     person.id = self.primary_keys_builder.get_record_id(person)
     self.logger.debug("connect section {} to person {}, declarator_person_id={}".format(
         section.id, person.id, declarator_person_id))
     if person.person_name is None or len(person.person_name) < len(section.person_name):
         person.person_name = section.person_name
     person.save()
     section.person = person
     section.dedupe_score = None
     section.save()
Exemple #7
0
    def test_rating(self):
        models.Person.objects.all().delete()
        models.Section.objects.all().delete()
        models.Source_Document.objects.all().delete()

        src_doc = models.Source_Document(id=1)
        src_doc.save()

        person_id = 99
        person = models.Person(id=person_id)
        person.save()

        models.Section(id=1,
                       source_document=src_doc,
                       person_name="i1",
                       income_year=2019,
                       office_id=1,
                       person=person).save()
        models.Section(id=2,
                       source_document=src_doc,
                       person_name="i2",
                       income_year=2019,
                       office_id=1,
                       person=person).save()
        models.Section(id=3,
                       source_document=src_doc,
                       person_name="i3",
                       income_year=2019,
                       office_id=1,
                       person=person).save()
        models.Income(section_id=1, size=1, relative=models.Relative.main_declarant_code).save()
        models.Income(section_id=2, size=2, relative=models.Relative.main_declarant_code).save()
        models.Income(section_id=3, size=3, relative=models.Relative.main_declarant_code).save()

        builder = BuildRatingCommand(None, None)
        builder.handle(None, min_members_count=3)

        self.assertEqual(models.Person_Rating_Items.objects.count(), 3)
        rating = list(models.Person_Rating_Items.objects.all())

        self.assertEqual(rating[0].rating_value, 3)
        self.assertEqual(rating[0].person_place, 1)

        self.assertEqual(rating[1].rating_value, 2)
        self.assertEqual(rating[1].person_place, 2)

        self.assertEqual(rating[2].rating_value,  1)
        self.assertEqual(rating[2].person_place, 3)
    def test(self):
        self.initialize()
        permalinks_folder = os.path.dirname(__file__)

        person_id = 99
        person = models.Person(id=person_id)
        person.save()
        section1 = self.create_section(1,
                                       "Иванов Иван Иванович",
                                       person=person)
        section2 = self.create_section(2,
                                       "Иванов Иван Иванович",
                                       person=person)

        CreatePermalinksStorageCommand(None, None).handle(
            None, directory=permalinks_folder)
        TPermaLinksPerson(permalinks_folder).open_db_read_only(
        ).recreate_auto_increment_table()

        section1.person = None
        section1.save()

        section2.person = None
        section2.save()

        person.delete()

        run_dedupe = RunDedupe(None, None)
        run_dedupe.handle(None,
                          permalinks_folder=permalinks_folder,
                          write_to_db=True,
                          fake_dedupe=True,
                          separate_sections=True,
                          surname_bounds=',',
                          take_sections_with_empty_income=True,
                          rebuild=True)

        self.assertEqual(2, models.Person.objects.count())

        #"person_id" is inherited by the minimal section_id, if there is no other grounds
        sec1 = models.Section.objects.get(id=1)
        self.assertEqual(sec1.person_id, person_id)

        sec2 = models.Section.objects.get(id=2)
        self.assertEqual(sec2.person_id, person_id + 1)  # a new person_id
Exemple #9
0
 def read_dumped_objects(self, file_name):
     if self.options.get('recreate_db'):
         assert models.Section.objects.count() == 0
     with open(file_name) as inp:
         for line in inp:
             js = json.loads(line)
             o = TDeduplicationObject().from_json(js)
             if self.options.get('recreate_db'):
                 if o.record_id.source_table == TDeduplicationObject.SECTION:
                     assert len(o.offices) == 1
                     s = models.Section(id=o.record_id.id,
                                        office_id=list(o.offices)[0])
                     self.section_cache[o.record_id.id] = s
                     s.save()
                 else:
                     models.Person(id=o.record_id.id).save()
             self.cluster_by_minimal_fio[
                 o.fio.build_fio_with_initials()].append(o)
Exemple #10
0
    def test(self):
        self.initialize()

        person_id = 99
        person = models.Person(id=person_id)
        person.save()
        section1 = self.create_section(1,
                                       "Иванов Иван Иванович",
                                       person=person)
        section2 = self.create_section(2, "Иванов И. И.", person=person)
        section3 = self.create_section(3, "Петров И. И.")

        permalinks_folder = os.path.dirname(__file__)
        db = TPermaLinksPerson(permalinks_folder)
        db.create_db()
        db.save_dataset(setup_logging())
        db.recreate_auto_increment_table()
        db.close_db()

        section1.person = None
        section1.save()
        section2.person = None
        section2.save()
        person.delete()

        run_dedupe = RunDedupe(None, None)
        run_dedupe.handle(None,
                          permalinks_folder=permalinks_folder,
                          write_to_db=True,
                          fake_dedupe=True,
                          surname_bounds=',',
                          take_sections_with_empty_income=True,
                          rebuild=True)

        self.assertEqual(models.Person.objects.count(), 1)

        sec1 = models.Section.objects.get(id=1)
        self.assertEqual(sec1.person_id, person_id)

        sec2 = models.Section.objects.get(id=2)
        self.assertEqual(sec2.person_id, person_id)

        sec3 = models.Section.objects.get(id=3)
        self.assertEqual(sec3.person_id, person_id)
Exemple #11
0
    def create_records(self, records):
        models.Section.objects.all().delete()
        models.Source_Document.objects.all().delete()
        models.Person.objects.all().delete()
        models.PersonRedirect.objects.all().delete()
        assert models.Office.objects.all().count() > 0
        for d in records.get('source_documents', []):
            d = models.Source_Document(**d)
            d.save()

        for d in records.get('persons', []):
            models.Person(**d).save()

        for d in records.get('sections', []):
            if len(models.Office.objects.filter(id=d['office_id'])) == 0:
                o = models.Office(id=d['office_id'], name="aaa")
                o.save()
            models.Section(**d).save()

        for d in records.get('redirects', []):
            models.PersonRedirect(**d).save()
Exemple #12
0
    def test(self):
        self.initialize()

        person_id = 2
        declarator_person_id = 1111
        person = models.Person(id=person_id,
                               declarator_person_id=declarator_person_id,
                               person_name="Иванов Иван Иванович")
        person.save()

        self.create_section(1, "Иванов Иван Иванович", person)
        self.create_section(2, "Иванов И. И.")

        permalinks_folder = os.path.dirname(__file__)
        db = TPermaLinksPerson(permalinks_folder)
        db.create_db()
        db.save_dataset(setup_logging())
        #db.save_max_plus_one_primary_key(3)
        db.recreate_auto_increment_table()
        db.close_db()

        run_dedupe = RunDedupe(None, None)
        run_dedupe.handle(None,
                          permalinks_folder=permalinks_folder,
                          write_to_db=True,
                          fake_dedupe=True,
                          surname_bounds=',',
                          take_sections_with_empty_income=True,
                          rebuild=True)

        self.assertEqual(models.Person.objects.count(), 1)
        person = models.Person.objects.get(id=person_id)
        self.assertIsNotNone(person)
        self.assertEqual(declarator_person_id, person.declarator_person_id)

        sec1 = models.Section.objects.get(id=1)
        self.assertEqual(sec1.person_id, person.id)

        sec2 = models.Section.objects.get(id=2)
        self.assertEqual(sec2.person_id, person.id)
Exemple #13
0
    def copy_human_merge(self, section, declarator_person_id):
        person = self.declarator_person_id_to_disclosures_person.get(
            declarator_person_id)
        if person is None:
            person_id = self.permalinks_db.get_person_id_by_declarator_id(
                declarator_person_id, section.id)
            if person_id in self.disclosures_person_id_to_disclosures_person:
                person = self.disclosures_person_id_to_disclosures_person.get(
                    person_id)
                if declarator_person_id != person.declarator_person_id:
                    self.logger.error(
                        "Person id={} has conflict declarator_person_id ({} != {}), use the first person id {}"
                        .format(person_id, declarator_person_id,
                                person.declarator_person_id,
                                person.declarator_person_id))

            else:
                person = models.Person(
                    id=person_id,
                    declarator_person_id=declarator_person_id,
                    person_name=section.person_name)
                person.save()
                self.declarator_person_id_to_disclosures_person[
                    declarator_person_id] = person
                self.disclosures_person_id_to_disclosures_person[
                    declarator_person_id] = person
        elif person.person_name is None or len(person.person_name) < len(
                section.person_name):
            person.person_name = section.person_name
            person.save()

        assert person.declarator_person_id is not None
        self.logger.debug(
            "connect section {} to person {}, declarator_person_id={}".format(
                section.id, person.id, person.declarator_person_id))

        section.person = person
        section.dedupe_score = None
        section.save()
Exemple #14
0
    def test_corrected_person(self):
        models.Section.objects.all().delete()
        models.Source_Document.objects.all().delete()
        models.Person.objects.all().delete()
        src_doc = models.Source_Document(id=1)
        src_doc.save()
        assert SECTION_CORRECTIONS.get_corrected_section_id(8048661) == 9798543
        models.Person(id=1, person_name="Иванов Иван Ильич").save()
        models.Section(id=8048661,
                       income_year=2016,
                       person_name="Иванов Иван Ильич",
                       source_document=src_doc,
                       office_id=1,
                       person_id=1).save()
        models.Section(id=9798543,
                       income_year=2016,
                       person_name="Иванов Иван Ильич",
                       source_document=src_doc,
                       office_id=1,
                       person_id=1).save()

        person = models.Person.objects.get(id=1)
        sections = person.sections_ordered_by_year
        self.assertEqual(1, len(sections))