예제 #1
0
    def test(self):
        logger = setup_logging(logger_name="test_real_dedupe")
        models.Section.objects.all().delete()

        permalinks_folder = os.path.dirname(__file__)

        db = TPermaLinksPerson(permalinks_folder)
        db.open_db_read_only()
        db.recreate_auto_increment_table()
        db.close_db()

        model_path = os.path.join(
            os.path.dirname(__file__),
            "../../../deduplicate/model/random_forest.pickle")
        dedupe_objects = os.path.join(os.path.dirname(__file__),
                                      "dedupe_objects.dump")
        run_dedupe = RunDedupe(None, None)
        run_dedupe.handle(None,
                          permalinks_folder=permalinks_folder,
                          input_dedupe_objects=dedupe_objects,
                          model_file=model_path,
                          threshold=0.6,
                          recreate_db=True,
                          surname_bounds=',',
                          write_to_db=True)
        sec = models.Section.objects.get(id=757036)
        self.assertEqual(1406125, sec.person_id)
예제 #2
0
    def test(self):
        self.create_test_db()

        person_id = 1
        person = models.Person(id=person_id, person_name=self.fio)
        self.assertIsNone(person.declarator_person_id)
        person.save()

        section1 = models.Section.objects.get(id=self.section_id1)
        section1.person = person
        section1.save()

        TPermaLinksPerson(CopyPersonIdTestCaseBase.permalinks_folder
                          ).create_and_save_empty_db()

        CreatePermalinksStorageCommand(None, None).handle(
            None, directory=CopyPersonIdTestCaseBase.permalinks_folder)
        permalinks_db = TPermaLinksPerson(
            CopyPersonIdTestCaseBase.permalinks_folder)
        permalinks_db.open_db_read_only()
        permalinks_db.recreate_auto_increment_table()

        self.run_copy_person_id(False, False)
        self.assertEqual(models.Person.objects.count(), 1)
        section1 = models.Section.objects.get(id=self.section_id1)
        self.assertEqual(section1.person.declarator_person_id,
                         self.declarator_person_id)
        self.assertEqual(section1.person.id, person_id)
예제 #3
0
    def test(self):
        TPermaLinksPerson(CopyPersonIdTestCaseBase.permalinks_folder
                          ).create_and_save_empty_db()
        self.run_copy_person_id(False, False)

        # check that we reuse old person ids
        CreatePermalinksStorageCommand(None, None).handle(
            None, directory=CopyPersonIdTestCaseBase.permalinks_folder)
        permalinks_db = TPermaLinksPerson(
            CopyPersonIdTestCaseBase.permalinks_folder)
        permalinks_db.open_db_read_only()
        permalinks_db.recreate_auto_increment_table()

        self.run_copy_person_id(False, False)
        self.assertEqual(permalinks_db.get_last_inserted_id_for_testing(),
                         None)
예제 #4
0
    def test(self):
        self.initialize()

        person_id = 99
        person = models.Person(id=person_id)
        person.save()
        section1 = self.create_section(1,
                                       "Иванов Иван Иванович",
                                       person=person)
        section2 = self.create_section(2, "Иванов И. И.", person=person)
        section3 = self.create_section(3, "Петров И. И.")

        permalinks_folder = os.path.dirname(__file__)
        db = TPermaLinksPerson(permalinks_folder)
        db.create_db()
        db.save_dataset(setup_logging())
        db.recreate_auto_increment_table()
        db.close_db()

        section1.person = None
        section1.save()
        section2.person = None
        section2.save()
        person.delete()

        run_dedupe = RunDedupe(None, None)
        run_dedupe.handle(None,
                          permalinks_folder=permalinks_folder,
                          write_to_db=True,
                          fake_dedupe=True,
                          surname_bounds=',',
                          take_sections_with_empty_income=True,
                          rebuild=True)

        self.assertEqual(models.Person.objects.count(), 1)

        sec1 = models.Section.objects.get(id=1)
        self.assertEqual(sec1.person_id, person_id)

        sec2 = models.Section.objects.get(id=2)
        self.assertEqual(sec2.person_id, person_id)

        sec3 = models.Section.objects.get(id=3)
        self.assertEqual(sec3.person_id, person_id)
예제 #5
0
    def test(self):
        logger = setup_logging(logger_name="test_real_dedupe")
        sql_script = os.path.join( os.path.dirname(__file__), "disclosures.sql.person_id_5295.n")
        run_sql_script(logger, sql_script)

        permalinks_folder = os.path.dirname(__file__)
        db = TPermaLinksPerson(permalinks_folder)
        db.create_db()
        db.save_dataset(setup_logging())
        db.recreate_auto_increment_table()
        db.close_db()

        model_path = os.path.join(os.path.dirname(__file__), "../../../deduplicate/model/random_forest.pickle" )
        run_dedupe = RunDedupe(None, None)
        run_dedupe.handle(None,
                          permalinks_folder=permalinks_folder,
                          write_to_db=True,
                          surname_bounds=',',
                          model_file=model_path,
                          threshold=0.6
                          )

        person_id = 5295
        self.assertEqual(models.Person.objects.count(), 3)
        person = models.Person.objects.get(id=person_id)
        self.assertIsNotNone(person)
        self.assertEqual(5295, person.declarator_person_id)
        canon_sections  =  [
            (451721,	5295,	True),
            (452066,	5295,	True),
            (452420,	5295, True),
            (453686,	5295, False),
            (455039,	5295,	False),
            (1801614,	5296,	True),
            (5105303,	5295,	True),
            (6437989,	5297,	True),
            (6672563,	5297,	True),
            (6674154,	5297,	True),
            (6773981,	5297,	True),
        ]
        sections = []
        for s in models.Section.objects.all():
            sections.append ((s.id, s.person_id, s.dedupe_score is not None))
        self.assertListEqual(canon_sections, sections)
예제 #6
0
    def test(self):
        self.initialize()

        person_id = 2
        declarator_person_id = 1111
        person = models.Person(id=person_id,
                               declarator_person_id=declarator_person_id,
                               person_name="Иванов Иван Иванович")
        person.save()

        self.create_section(1, "Иванов Иван Иванович", person)
        self.create_section(2, "Иванов И. И.")

        permalinks_folder = os.path.dirname(__file__)
        db = TPermaLinksPerson(permalinks_folder)
        db.create_db()
        db.save_dataset(setup_logging())
        #db.save_max_plus_one_primary_key(3)
        db.recreate_auto_increment_table()
        db.close_db()

        run_dedupe = RunDedupe(None, None)
        run_dedupe.handle(None,
                          permalinks_folder=permalinks_folder,
                          write_to_db=True,
                          fake_dedupe=True,
                          surname_bounds=',',
                          take_sections_with_empty_income=True,
                          rebuild=True)

        self.assertEqual(models.Person.objects.count(), 1)
        person = models.Person.objects.get(id=person_id)
        self.assertIsNotNone(person)
        self.assertEqual(declarator_person_id, person.declarator_person_id)

        sec1 = models.Section.objects.get(id=1)
        self.assertEqual(sec1.person_id, person.id)

        sec2 = models.Section.objects.get(id=2)
        self.assertEqual(sec2.person_id, person.id)
예제 #7
0
    def test(self):
        TPermaLinksPerson(CopyPersonIdTestCaseBase.permalinks_folder
                          ).create_and_save_empty_db()
        self.run_copy_person_id(False, False)

        # check that we reuse old person ids
        CreatePermalinksStorageCommand(None, None).handle(
            None, directory=CopyPersonIdTestCaseBase.permalinks_folder)
        permalinks_db = TPermaLinksPerson(
            CopyPersonIdTestCaseBase.permalinks_folder)
        permalinks_db.open_db_read_only()
        permalinks_db.recreate_auto_increment_table()

        new_declarator_person_id = self.declarator_person_id + 1
        self.run_copy_person_id(False,
                                False,
                                declarator_person_id=new_declarator_person_id)
        self.assertEqual(models.Person.objects.count(), 1)
        section1 = models.Section.objects.get(id=self.section_id1)
        self.assertEqual(section1.person.declarator_person_id,
                         new_declarator_person_id)
        self.assertEqual(section1.person.id, 1)