Beispiel #1
0
    def test(self):
        self.initialize()

        person_id = 99
        person = models.Person(id=person_id)
        person.save()
        section1 = self.create_section(1,
                                       "Иванов Иван Иванович",
                                       person=person)
        section2 = self.create_section(2, "Иванов И. И.", person=person)
        section3 = self.create_section(3, "Петров И. И.")

        permalinks_folder = os.path.dirname(__file__)
        db = TPermaLinksPerson(permalinks_folder)
        db.create_db()
        db.save_dataset(setup_logging())
        db.recreate_auto_increment_table()
        db.close_db()

        section1.person = None
        section1.save()
        section2.person = None
        section2.save()
        person.delete()

        run_dedupe = RunDedupe(None, None)
        run_dedupe.handle(None,
                          permalinks_folder=permalinks_folder,
                          write_to_db=True,
                          fake_dedupe=True,
                          surname_bounds=',',
                          take_sections_with_empty_income=True,
                          rebuild=True)

        self.assertEqual(models.Person.objects.count(), 1)

        sec1 = models.Section.objects.get(id=1)
        self.assertEqual(sec1.person_id, person_id)

        sec2 = models.Section.objects.get(id=2)
        self.assertEqual(sec2.person_id, person_id)

        sec3 = models.Section.objects.get(id=3)
        self.assertEqual(sec3.person_id, person_id)
Beispiel #2
0
    def test(self):
        logger = setup_logging(logger_name="test_real_dedupe")
        sql_script = os.path.join( os.path.dirname(__file__), "disclosures.sql.person_id_5295.n")
        run_sql_script(logger, sql_script)

        permalinks_folder = os.path.dirname(__file__)
        db = TPermaLinksPerson(permalinks_folder)
        db.create_db()
        db.save_dataset(setup_logging())
        db.recreate_auto_increment_table()
        db.close_db()

        model_path = os.path.join(os.path.dirname(__file__), "../../../deduplicate/model/random_forest.pickle" )
        run_dedupe = RunDedupe(None, None)
        run_dedupe.handle(None,
                          permalinks_folder=permalinks_folder,
                          write_to_db=True,
                          surname_bounds=',',
                          model_file=model_path,
                          threshold=0.6
                          )

        person_id = 5295
        self.assertEqual(models.Person.objects.count(), 3)
        person = models.Person.objects.get(id=person_id)
        self.assertIsNotNone(person)
        self.assertEqual(5295, person.declarator_person_id)
        canon_sections  =  [
            (451721,	5295,	True),
            (452066,	5295,	True),
            (452420,	5295, True),
            (453686,	5295, False),
            (455039,	5295,	False),
            (1801614,	5296,	True),
            (5105303,	5295,	True),
            (6437989,	5297,	True),
            (6672563,	5297,	True),
            (6674154,	5297,	True),
            (6773981,	5297,	True),
        ]
        sections = []
        for s in models.Section.objects.all():
            sections.append ((s.id, s.person_id, s.dedupe_score is not None))
        self.assertListEqual(canon_sections, sections)
Beispiel #3
0
    def test(self):
        self.initialize()

        person_id = 2
        declarator_person_id = 1111
        person = models.Person(id=person_id,
                               declarator_person_id=declarator_person_id,
                               person_name="Иванов Иван Иванович")
        person.save()

        self.create_section(1, "Иванов Иван Иванович", person)
        self.create_section(2, "Иванов И. И.")

        permalinks_folder = os.path.dirname(__file__)
        db = TPermaLinksPerson(permalinks_folder)
        db.create_db()
        db.save_dataset(setup_logging())
        #db.save_max_plus_one_primary_key(3)
        db.recreate_auto_increment_table()
        db.close_db()

        run_dedupe = RunDedupe(None, None)
        run_dedupe.handle(None,
                          permalinks_folder=permalinks_folder,
                          write_to_db=True,
                          fake_dedupe=True,
                          surname_bounds=',',
                          take_sections_with_empty_income=True,
                          rebuild=True)

        self.assertEqual(models.Person.objects.count(), 1)
        person = models.Person.objects.get(id=person_id)
        self.assertIsNotNone(person)
        self.assertEqual(declarator_person_id, person.declarator_person_id)

        sec1 = models.Section.objects.get(id=1)
        self.assertEqual(sec1.person_id, person.id)

        sec2 = models.Section.objects.get(id=2)
        self.assertEqual(sec2.person_id, person.id)