def test_record_generation_catches_invalid_profiles(self): with self.assertRaises(ValueError): record = gen_record({'forename': 'Joe'}) with self.assertRaises(ValueError): record = gen_record({'birth_surname': 'Smith'}) with self.assertRaises(ValueError): record = gen_record({'test': 'test'})
def setUp(self): # Female profile with different birth surname and current surname female = { 'forename': 'Adelyn', 'mid_forename': 'Heidenreich', 'current_surname': 'Bartell', 'birth_surname': 'Gerlach' } self.female_record = gen_record(female) # Male profile with some missing name fields male = { 'forename': 'Oliver', 'current_surname': 'Nader' } self.male_record = gen_record(male)
def setUp(self): data_path = os.path.join(os.path.dirname(__file__), 'profiles_100.json') with open(data_path, 'r') as data_file: population = tuple(json.load(data_file)) records = [gen_record(profile) for profile in population] self.herd = Herd() self.herd.populate(records)
def setUp(self): population = ( { 'forename': 'Adelyn', 'mid_forename': 'Heidenreich', 'current_surname': 'Bartell', 'birth_surname': 'Gerlach' }, { 'forename': 'John', 'mid_forename': 'Frederich', 'current_surname': 'Sanders' }, { 'forename': 'Joseph', 'current_surname': 'Smith' }, { 'forename': 'John', 'mid_forename': 'Heidenreich', 'current_surname': 'Smith', 'birth_surname': 'Gerlach' } ) records = [gen_record(profile) for profile in population] self.herd = Herd() self.herd.populate(records) self.herd.corral()
def test_no_errors(self): """See how the probability matrix looks with no textual errors.""" herd = Herd() data_path = os.path.join(os.path.dirname(__file__), 'error_free_3x2.json') with open(data_path, 'r') as data_file: population = tuple(json.load(data_file)) records = [gen_record(profile) for profile in population] herd = Herd() herd.populate(records) herd.corral() print() print(herd.similarity_matrix)
def test_gender_misclassification(self): """See how gender misclassification impacts probability matrix. """ herd = Herd() data_path = os.path.join(os.path.dirname(__file__), 'gender_misclassification_3x2.json') with open(data_path, 'r') as data_file: population = tuple(json.load(data_file)) records = [gen_record(profile) for profile in population] herd = Herd() herd.populate(records) herd.corral() print() print(herd.similarity_matrix)
def test_character_transposition(self): """See how character transposition in someone's name impacts probability matrix. """ herd = Herd() data_path = os.path.join(os.path.dirname(__file__), 'character_transposition_3x2.json') with open(data_path, 'r') as data_file: population = tuple(json.load(data_file)) records = [gen_record(profile) for profile in population] herd = Herd() herd.populate(records) herd.corral() print() print(herd.similarity_matrix)
def setUp(self): population = ( { 'forename': 'Adelyn', 'mid_forename': 'Heidenreich', 'current_surname': 'Bartell', 'birth_surname': 'Gerlach', 'address1': '448 Jones Street', 'postal_code': '95786', 'sex': 'F', 'birth_year': '1977', 'birth_month': '08', 'birth_day': '27' }, { 'forename': 'Jane', 'current_surname': 'Doe', 'address1': '448 Jones Street', 'postal_code': '95786', 'sex': 'F', 'birth_year': '1977', 'birth_month': '08', 'birth_day': '27' }, { 'forename': 'Adelyn', 'current_surname': 'Bartell', 'address1': '612 Johson Ave', 'postal_code': '92436', 'sex': 'F', 'birth_year': '1977', 'birth_month': '08', 'birth_day': '27' } ) records = [gen_record(profile) for profile in population] self.herd = Herd() self.herd.populate(records) self.herd.corral()
def test_populating_herd(self): records = [gen_record(profile) for profile in self.profiles] herd = Herd() herd.populate(records) self.assertEqual(herd.size, 100)
def setUp(self): population = ( { 'forename': 'Adelyn', 'mid_forename': 'Heidenreich', 'current_surname': 'Bartell', 'birth_surname': 'Gerlach', 'address1': '448 Jones Street', 'postal_code': '95786', 'sex': 'M', 'national_id1': 'D599776', 'birth_year': '1977', 'birth_month': '08', 'birth_day': '27' }, { 'forename': 'Adelyn', 'mid_forename': 'Frederich', 'current_surname': 'Gerlach' }, { 'forename': 'Joseph', 'current_surname': 'Smith', 'address1': '448 Jones Street', 'postal_code': '95786', 'sex': 'M', 'national_id1': 'D599776', 'birth_year': '1977', 'birth_month': '08', 'birth_day': '27' }, { 'forename': 'John', 'mid_forename': 'Heidenreich', 'current_surname': 'Smith', 'birth_surname': 'Gerlach', 'address1': '448 Jones Avenue', 'postal_code': '97856', 'sex': 'F', 'national_id1': 'D599886', 'birth_year': '1986', 'birth_month': '10', 'birth_day': '27' }, { 'forename': 'Jason', 'current_surname': 'Sanders', 'address1': '448 Jones Street', 'address2': 'Apt. A', 'postal_code': '97586', 'sex': 'F', 'national_id1': 'D597976', 'birth_year': '1977', 'birth_month': '10', 'birth_day': '27' } ) records = [gen_record(profile) for profile in population] self.herd = Herd() self.herd.populate(records) self.herd.corral()
def test_record_generation_from_fake_profiles(self): records = [gen_record(profile) for profile in self.profiles] self.assertEqual(len(records), 100)