Beispiel #1
0
 def test_record_generation_catches_invalid_profiles(self):
     with self.assertRaises(ValueError):
         record = gen_record({'forename': 'Joe'})
     with self.assertRaises(ValueError):
         record = gen_record({'birth_surname': 'Smith'})
     with self.assertRaises(ValueError):
         record = gen_record({'test': 'test'})
Beispiel #2
0
 def setUp(self):
     # Female profile with different birth surname and current surname
     female = {
         'forename': 'Adelyn',
         'mid_forename': 'Heidenreich',
         'current_surname': 'Bartell',
         'birth_surname': 'Gerlach'
     }
     self.female_record = gen_record(female)
     # Male profile with some missing name fields
     male = {
         'forename': 'Oliver',
         'current_surname': 'Nader'
     }
     self.male_record = gen_record(male)
Beispiel #3
0
 def setUp(self):
     data_path = os.path.join(os.path.dirname(__file__), 'profiles_100.json')
     with open(data_path, 'r') as data_file:
         population = tuple(json.load(data_file))
     records = [gen_record(profile) for profile in population]
     self.herd = Herd()
     self.herd.populate(records)
Beispiel #4
0
 def setUp(self):
     population = (
         {
             'forename': 'Adelyn',
             'mid_forename': 'Heidenreich',
             'current_surname': 'Bartell',
             'birth_surname': 'Gerlach'
         },
         {
             'forename': 'John',
             'mid_forename': 'Frederich',
             'current_surname': 'Sanders'
         },
         {
             'forename': 'Joseph',
             'current_surname': 'Smith'
         },
         {
             'forename': 'John',
             'mid_forename': 'Heidenreich',
             'current_surname': 'Smith',
             'birth_surname': 'Gerlach'
         }
     )
     records = [gen_record(profile) for profile in population]
     self.herd = Herd()
     self.herd.populate(records)
     self.herd.corral()
Beispiel #5
0
 def test_no_errors(self):
     """See how the probability matrix looks with no textual errors."""
     herd = Herd()
     data_path = os.path.join(os.path.dirname(__file__),
                              'error_free_3x2.json')
     with open(data_path, 'r') as data_file:
         population = tuple(json.load(data_file))
     records = [gen_record(profile) for profile in population]
     herd = Herd()
     herd.populate(records)
     herd.corral()
     print()
     print(herd.similarity_matrix)
Beispiel #6
0
 def test_gender_misclassification(self):
     """See how gender misclassification impacts probability matrix.
     """
     herd = Herd()
     data_path = os.path.join(os.path.dirname(__file__),
                              'gender_misclassification_3x2.json')
     with open(data_path, 'r') as data_file:
         population = tuple(json.load(data_file))
     records = [gen_record(profile) for profile in population]
     herd = Herd()
     herd.populate(records)
     herd.corral()
     print()
     print(herd.similarity_matrix)
Beispiel #7
0
 def test_character_transposition(self):
     """See how character transposition in someone's name impacts probability
     matrix.
     """
     herd = Herd()
     data_path = os.path.join(os.path.dirname(__file__),
                              'character_transposition_3x2.json')
     with open(data_path, 'r') as data_file:
         population = tuple(json.load(data_file))
     records = [gen_record(profile) for profile in population]
     herd = Herd()
     herd.populate(records)
     herd.corral()
     print()
     print(herd.similarity_matrix)
Beispiel #8
0
 def setUp(self):
     population = (
         {
             'forename': 'Adelyn',
             'mid_forename': 'Heidenreich',
             'current_surname': 'Bartell',
             'birth_surname': 'Gerlach',
             'address1': '448 Jones Street',
             'postal_code': '95786',
             'sex': 'F',
             'birth_year': '1977',
             'birth_month': '08',
             'birth_day': '27'
         },
         {
             'forename': 'Jane',
             'current_surname': 'Doe',
             'address1': '448 Jones Street',
             'postal_code': '95786',
             'sex': 'F',
             'birth_year': '1977',
             'birth_month': '08',
             'birth_day': '27'
         },
         {
             'forename': 'Adelyn',
             'current_surname': 'Bartell',
             'address1': '612 Johson Ave',
             'postal_code': '92436',
             'sex': 'F',
             'birth_year': '1977',
             'birth_month': '08',
             'birth_day': '27'
         }
     )
     records = [gen_record(profile) for profile in population]
     self.herd = Herd()
     self.herd.populate(records)
     self.herd.corral()
Beispiel #9
0
 def test_populating_herd(self):
     records = [gen_record(profile) for profile in self.profiles]
     herd = Herd()
     herd.populate(records)
     self.assertEqual(herd.size, 100)
Beispiel #10
0
 def setUp(self):
     population = (
         {
             'forename': 'Adelyn',
             'mid_forename': 'Heidenreich',
             'current_surname': 'Bartell',
             'birth_surname': 'Gerlach',
             'address1': '448 Jones Street',
             'postal_code': '95786',
             'sex': 'M',
             'national_id1': 'D599776',
             'birth_year': '1977',
             'birth_month': '08',
             'birth_day': '27'
         },
         {
             'forename': 'Adelyn',
             'mid_forename': 'Frederich',
             'current_surname': 'Gerlach'
         },
         {
             'forename': 'Joseph',
             'current_surname': 'Smith',
             'address1': '448 Jones Street',
             'postal_code': '95786',
             'sex': 'M',
             'national_id1': 'D599776',
             'birth_year': '1977',
             'birth_month': '08',
             'birth_day': '27'
         },
         {
             'forename': 'John',
             'mid_forename': 'Heidenreich',
             'current_surname': 'Smith',
             'birth_surname': 'Gerlach',
             'address1': '448 Jones Avenue',
             'postal_code': '97856',
             'sex': 'F',
             'national_id1': 'D599886',
             'birth_year': '1986',
             'birth_month': '10',
             'birth_day': '27'
         },
         {
             'forename': 'Jason',
             'current_surname': 'Sanders',
             'address1': '448 Jones Street',
             'address2': 'Apt. A',
             'postal_code': '97586',
             'sex': 'F',
             'national_id1': 'D597976',
             'birth_year': '1977',
             'birth_month': '10',
             'birth_day': '27'
         }
     )
     records = [gen_record(profile) for profile in population]
     self.herd = Herd()
     self.herd.populate(records)
     self.herd.corral()
Beispiel #11
0
 def test_record_generation_from_fake_profiles(self):
     records = [gen_record(profile) for profile in self.profiles]
     self.assertEqual(len(records), 100)