def test_unify_fast_matching(self): """Test unify method using a default matcher and fast matching mode""" before = api.unique_identities(self.db) self.assertEqual(len(before), 6) code = self.cmd.unify(matching='default', fast_matching=True) self.assertEqual(code, CMD_SUCCESS) after = api.unique_identities(self.db) self.assertEqual(len(after), 5) # jsmith identities with same email address jsmith = after[0] self.assertEqual(jsmith.uuid, '178315df7941fc76a6ffb06fd5b00f6932ad9c41') identities = jsmith.identities identities.sort(key=lambda x: x.id) self.assertEqual(len(identities), 7) id_ = identities[1] self.assertEqual(id_.email, '*****@*****.**') self.assertEqual(id_.source, 'mls') id_ = identities[3] self.assertEqual(id_.email, '*****@*****.**') self.assertEqual(id_.source, 'scm') output = sys.stdout.getvalue().strip() self.assertEqual(output, UNIFY_DEFAULT_OUTPUT)
def test_unify_email_name_matcher_with_blacklist(self): """Test unify method using a blacklist""" # Add some entries to the blacklist api.add_to_matching_blacklist(self.db, 'Jane Rae Doe') api.add_to_matching_blacklist(self.db, '*****@*****.**') before = api.unique_identities(self.db) self.assertEqual(len(before), 6) code = self.cmd.unify(matching='email-name') self.assertEqual(code, CMD_SUCCESS) after = api.unique_identities(self.db) self.assertEqual(len(after), 5) # Only two identities were merged due to the blacklist jsmith = after[0] self.assertEqual(jsmith.uuid, '178315df7941fc76a6ffb06fd5b00f6932ad9c41') self.assertEqual(len(jsmith.identities), 4) jsmith = after[1] self.assertEqual(jsmith.uuid, '400fdfaab5918d1b7e0e0efba4797abdc378bd7d') self.assertEqual(len(jsmith.identities), 6)
def test_unify(self): """Test unify method using a default matcher""" before = api.unique_identities(self.db) self.assertEqual(len(before), 6) code = self.cmd.unify(matching='default') self.assertEqual(code, CMD_SUCCESS) after = api.unique_identities(self.db) self.assertEqual(len(after), 5) # jsmith identities with same email address jsmith = after[1] self.assertEqual(jsmith.uuid, '72ae225d363c83456d788da14eeb0718efe7a0fc') identities = jsmith.identities identities.sort(key=lambda x: x.id) self.assertEqual(len(identities), 7) id_ = identities[0] self.assertEqual(id_.email, '*****@*****.**') self.assertEqual(id_.source, 'mls') id_ = identities[2] self.assertEqual(id_.email, '*****@*****.**') self.assertEqual(id_.source, 'scm') output = sys.stdout.getvalue().strip() self.assertEqual(output, UNIFY_DEFAULT_OUTPUT)
def test_unify_email_name_matcher(self): """Test unify method using the email-name matcher""" before = api.unique_identities(self.db) self.assertEqual(len(before), 6) code = self.cmd.unify(matching='email-name') self.assertEqual(code, CMD_SUCCESS) after = api.unique_identities(self.db) self.assertEqual(len(after), 3) output = sys.stdout.getvalue().strip() self.assertEqual(output, UNIFY_EMAIL_NAME_OUTPUT)
def test_unify_no_strict(self): """Test unify method with no strict mode set""" before = api.unique_identities(self.db) self.assertEqual(len(before), 6) code = self.cmd.unify(matching='email-name', no_strict_matching=True) self.assertEqual(code, CMD_SUCCESS) after = api.unique_identities(self.db) self.assertEqual(len(after), 2) output = sys.stdout.getvalue().strip() self.assertEqual(output, UNIFY_NO_STRICT_OUTPUT)
def test_no_error_email_pattern_for_name_profile(self): """Check if does not fail when email pattern is checked with empty values""" # None values for name and username jrae_uuid = api.add_identity(self.db, 'mls', '*****@*****.**', None, None) api.add_identity(self.db, 'mls', '*****@*****.**', None, '', uuid=jrae_uuid) api.add_identity(self.db, 'mls', '*****@*****.**', None, None, uuid=jrae_uuid) # Empty values on name and username fields do not crash # when email pattern is check self.cmd.autocomplete(['mls', 'its']) uids = api.unique_identities(self.db, uuid=jrae_uuid) uid = uids[0] self.assertEqual(uid.uuid, jrae_uuid) self.assertEqual(uid.profile.name, None) self.assertEqual(uid.profile.email, '*****@*****.**')
def test_dates_out_of_bounds(self): """Check dates when they are out of bounds""" parser = self.get_parser(datadir('sortinghat_ids_dates_out_of_bounds.json')) # This command returns a success value even when some data is wrong code = self.cmd.import_identities(parser) self.assertEqual(code, CMD_SUCCESS) # Check the contents of the registry uids = api.unique_identities(self.db) self.assertEqual(len(uids), 1) # Jane Roe uid = uids[0] self.assertEqual(uid.uuid, '17ab00ed3825ec2f50483e33c88df223264182ba') enrollments = api.enrollments(self.db, uid.uuid) self.assertEqual(len(enrollments), 2) rol0 = enrollments[0] self.assertEqual(rol0.organization.name, 'Bitergia') self.assertEqual(rol0.start, datetime.datetime(1999, 1, 1, 0, 0)) # The json file has 2200-01-01T00:00:00 self.assertEqual(rol0.end, datetime.datetime(2100, 1, 1, 0, 0)) rol1 = enrollments[1] self.assertEqual(rol1.organization.name, 'Example') # The json file has 1800-01-01T00:00:00 self.assertEqual(rol1.start, datetime.datetime(1900, 1, 1, 0, 0)) self.assertEqual(rol1.end, datetime.datetime(2100, 1, 1, 0, 0))
def test_dates_out_of_bounds(self): """Check dates when they are out of bounds""" parser = self.get_parser( datadir('sortinghat_ids_dates_out_of_bounds.json')) # This command returns a success value even when some data is wrong code = self.cmd.import_identities(parser) self.assertEqual(code, CMD_SUCCESS) # Check the contents of the registry uids = api.unique_identities(self.db) self.assertEqual(len(uids), 1) # Jane Roe uid = uids[0] self.assertEqual(uid.uuid, '17ab00ed3825ec2f50483e33c88df223264182ba') enrollments = api.enrollments(self.db, uid.uuid) self.assertEqual(len(enrollments), 2) rol0 = enrollments[0] self.assertEqual(rol0.organization.name, 'Bitergia') self.assertEqual(rol0.start, datetime.datetime(1999, 1, 1, 0, 0)) # The json file has 2200-01-01T00:00:00 self.assertEqual(rol0.end, datetime.datetime(2100, 1, 1, 0, 0)) rol1 = enrollments[1] self.assertEqual(rol1.organization.name, 'Example') # The json file has 1800-01-01T00:00:00 self.assertEqual(rol1.start, datetime.datetime(1900, 1, 1, 0, 0)) self.assertEqual(rol1.end, datetime.datetime(2100, 1, 1, 0, 0))
def test_unify_with_blacklist(self): """Test unify method using a blacklist""" # Add some entries to the blacklist api.add_to_matching_blacklist(self.db, 'Jane Rae Doe') api.add_to_matching_blacklist(self.db, '*****@*****.**') before = api.unique_identities(self.db) self.assertEqual(len(before), 6) code = self.cmd.unify(matching='default') self.assertEqual(code, CMD_SUCCESS) # No match was found after = api.unique_identities(self.db) self.assertEqual(len(after), 6)
def test_autocomplete_profiles(self): """Check whether it autocompletes the profiles based on a priority list""" code = self.cmd.autocomplete(['mls', 'its']) uids = api.unique_identities(self.db) # It mixes the information of the identities with # maximum priority, using the longest name value self.assertEqual(uids[0].uuid, '02f161840469eb5348dec798166a171b34f0bc8a') self.assertEqual(uids[0].profile.name, 'John Smith') self.assertEqual(uids[0].profile.email, '*****@*****.**') # Unique identities without identities from # the given sources are not updated self.assertEqual(uids[1].uuid, '52e0aa0a14826627e633fd15332988686b730ab3') self.assertEqual(uids[1].profile, None) # Only one source available self.assertEqual(uids[2].uuid, '65fa77134a2d0bb4ed9252b853d9074e4d4c2eb4') self.assertEqual(uids[2].profile.name, 'jdoe') self.assertEqual(uids[2].profile.email, None)
def test_autocomplete_profiles(self): """Check whether it autocompletes the profiles based on a priority list""" code = self.cmd.autocomplete(['mls', 'its']) uids = api.unique_identities(self.db) # Unique identities without identities from # the given sources are not updated self.assertEqual(uids[0].uuid, '17ab00ed3825ec2f50483e33c88df223264182ba') self.assertEqual(uids[0].profile, None) # Only one source available self.assertEqual(uids[1].uuid, 'eb10fb9519d69d75a6cdcd76707943a513685c09') self.assertEqual(uids[1].profile.name, 'jdoe') self.assertEqual(uids[1].profile.email, None) # It mixes the information of the identities with # maximum priority, using the longest name value self.assertEqual(uids[2].uuid, 'ffefc2e3f2a255e9450ac9e2d36f37c28f51bd73') self.assertEqual(uids[2].profile.name, 'John Smith') self.assertEqual(uids[2].profile.email, '*****@*****.**')
def test_unify_with_sources_list(self): """Test unify method using a sources list""" sources = ['mls', 'alt'] before = api.unique_identities(self.db) self.assertEqual(len(before), 6) code = self.cmd.unify(matching='email-name', sources=sources) self.assertEqual(code, CMD_SUCCESS) # Only jrae identities are merged after = api.unique_identities(self.db) self.assertEqual(len(after), 5) output = sys.stdout.getvalue().strip() self.assertEqual(output, UNIFY_SOURCES_OUTPUT)
def test_valid_identities_already_exist(self): """Check method when an identity already exists but with distinct UUID""" # The identity already exists but with a different UUID uuid = api.add_identity(self.db, 'unknown', email='*****@*****.**') api.add_identity(self.db, source='scm', email='*****@*****.**', name='John Smith', username='******', uuid=uuid) api.edit_profile(self.db, uuid, name='John Smith', is_bot=False, country_code='US') parser = self.get_parser(datadir('sortinghat_valid.json')) code = self.cmd.import_identities(parser) self.assertEqual(code, CMD_SUCCESS) # Check the contents of the registry uids = api.unique_identities(self.db) self.assertEqual(len(uids), 2) # John Smith uid = uids[1] self.assertEqual(uid.uuid, '2371a34a0ac65fbd9d631464ee41d583ec0e1e88') # The profile is updated because a new one was given prf = uid.profile self.assertEqual(prf.uuid, '2371a34a0ac65fbd9d631464ee41d583ec0e1e88') self.assertEqual(prf.name, None) self.assertEqual(prf.email, '*****@*****.**') self.assertEqual(prf.gender, 'male') self.assertEqual(prf.gender_acc, 100) self.assertEqual(prf.is_bot, True) self.assertEqual(prf.country, None) ids = self.sort_identities(uid.identities) self.assertEqual(len(ids), 3) id0 = ids[0] self.assertEqual(id0.id, '2371a34a0ac65fbd9d631464ee41d583ec0e1e88') self.assertEqual(id0.name, None) self.assertEqual(id0.email, '*****@*****.**') self.assertEqual(id0.username, None) self.assertEqual(id0.source, 'unknown') id1 = ids[1] self.assertEqual(id1.id, '880b3dfcb3a08712e5831bddc3dfe81fc5d7b331') self.assertEqual(id1.name, 'John Smith') self.assertEqual(id1.email, '*****@*****.**') self.assertEqual(id1.username, None) self.assertEqual(id1.source, 'scm') id2 = ids[2] self.assertEqual(id2.id, 'a9b403e150dd4af8953a52a4bb841051e4b705d9') self.assertEqual(id2.name, 'John Smith') self.assertEqual(id2.email, '*****@*****.**') self.assertEqual(id2.username, 'jsmith') self.assertEqual(id2.source, 'scm')
def test_valid_identities_already_exist(self): """Check method when an identity already exists but with distinct UUID""" # The identity already exists but with a different UUID uuid = api.add_identity(self.db, 'unknown', email='*****@*****.**') api.add_identity(self.db, source='scm', email='*****@*****.**', name='John Smith', username='******', uuid=uuid) api.edit_profile(self.db, uuid, name='John Smith', is_bot=False, country_code='US') parser = self.get_parser('data/sortinghat_valid.json') code = self.cmd.import_identities(parser) self.assertEqual(code, CMD_SUCCESS) # Check the contents of the registry uids = api.unique_identities(self.db) self.assertEqual(len(uids), 2) # John Smith uid = uids[0] self.assertEqual(uid.uuid, '23fe3a011190a27a7c5cf6f8925de38ff0994d8d') # The profile was not updated because it was already available prf = uid.profile self.assertEqual(prf.uuid, '23fe3a011190a27a7c5cf6f8925de38ff0994d8d') self.assertEqual(prf.name, 'John Smith') self.assertEqual(prf.email, None) self.assertEqual(prf.is_bot, False) self.assertEqual(prf.country_code, 'US') self.assertEqual(prf.country.code, 'US') self.assertEqual(prf.country.name, 'United States of America') ids = self.sort_identities(uid.identities) self.assertEqual(len(ids), 3) id0 = ids[0] self.assertEqual(id0.id, '03e12d00e37fd45593c49a5a5a1652deca4cf302') self.assertEqual(id0.name, 'John Smith') self.assertEqual(id0.email, '*****@*****.**') self.assertEqual(id0.username, 'jsmith') self.assertEqual(id0.source, 'scm') id1 = ids[1] self.assertEqual(id1.id, '23fe3a011190a27a7c5cf6f8925de38ff0994d8d') self.assertEqual(id1.name, None) self.assertEqual(id1.email, '*****@*****.**') self.assertEqual(id1.username, None) self.assertEqual(id1.source, 'unknown') id2 = ids[2] self.assertEqual(id2.id, '75d95d6c8492fd36d24a18bd45d62161e05fbc97') self.assertEqual(id2.name, 'John Smith') self.assertEqual(id2.email, '*****@*****.**') self.assertEqual(id2.username, None) self.assertEqual(id2.source, 'scm')
def test_identities_load_file(self): """ Check the local loading of identities files """ setup_http_server() config = Config(CONF_FILE) task = TaskIdentitiesLoad(config) task.execute() # Check the number of identities loaded from local and remote files nuids = len(api.unique_identities(self.sh_db)) self.assertEqual(nuids, 4)
def test_unify_success_no_recovery_mode(self): """Test unify method when the recovery file exists but the recovery mode is not active""" original_log = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'data/unify_matches.log') shutil.copyfile(original_log, self.recovery_path) with unittest.mock.patch( 'sortinghat.cmd.unify.RecoveryFile.location') as mock_location: mock_location.return_value = self.recovery_path before = api.unique_identities(self.db) self.assertEqual(len(before), 6) self.assertTrue(os.path.exists(self.recovery_path)) code = self.cmd.unify(matching='default') self.assertEqual(code, CMD_SUCCESS) after = api.unique_identities(self.db) self.assertEqual(len(after), 5) # jsmith identities with same email address jsmith = after[0] self.assertEqual(jsmith.uuid, '178315df7941fc76a6ffb06fd5b00f6932ad9c41') identities = jsmith.identities identities.sort(key=lambda x: x.id) self.assertEqual(len(identities), 7) id_ = identities[1] self.assertEqual(id_.email, '*****@*****.**') self.assertEqual(id_.source, 'mls') id_ = identities[3] self.assertEqual(id_.email, '*****@*****.**') self.assertEqual(id_.source, 'scm') output = sys.stdout.getvalue().strip() self.assertEqual(output, UNIFY_DEFAULT_OUTPUT) self.assertTrue(os.path.exists(self.recovery_path))
def test_unify_email_name_matcher_with_blacklist(self): """Test unify method using a blacklist""" # Add some entries to the blacklist api.add_to_matching_blacklist(self.db, 'Jane Rae Doe') api.add_to_matching_blacklist(self.db, '*****@*****.**') before = api.unique_identities(self.db) self.assertEqual(len(before), 6) code = self.cmd.unify(matching='email-name') self.assertEqual(code, CMD_SUCCESS) after = api.unique_identities(self.db) self.assertEqual(len(after), 5) # Only two identities were merged due to the blacklist jsmith = after[2] self.assertEqual(jsmith.uuid, '75d95d6c8492fd36d24a18bd45d62161e05fbc97') self.assertEqual(len(jsmith.identities), 6)
def test_create_profile_from_identities(self): """Check whether a profile is created using the data identities""" parser = self.get_parser( datadir('sortinghat_identities_profiles.json')) code = self.cmd.import_identities(parser) self.assertEqual(code, CMD_SUCCESS) # Check the contents of the registry uids = api.unique_identities(self.db) self.assertEqual(len(uids), 3) # Jane Rae uid = uids[0] self.assertEqual(uid.uuid, '17ab00ed3825ec2f50483e33c88df223264182ba') prf = uid.profile self.assertEqual(prf.uuid, '17ab00ed3825ec2f50483e33c88df223264182ba') self.assertEqual(prf.name, 'Jane Roe') self.assertEqual(prf.email, '*****@*****.**') self.assertEqual(prf.gender, None) self.assertEqual(prf.gender_acc, None) self.assertEqual(prf.is_bot, False) self.assertEqual(prf.country_code, None) self.assertEqual(prf.country, None) # John Smith uid = uids[1] self.assertEqual(uid.uuid, 'a9b403e150dd4af8953a52a4bb841051e4b705d9') prf = uid.profile self.assertEqual(prf.uuid, 'a9b403e150dd4af8953a52a4bb841051e4b705d9') self.assertEqual(prf.name, 'John Smith') self.assertEqual(prf.email, '*****@*****.**') self.assertEqual(prf.gender, None) self.assertEqual(prf.gender_acc, None) self.assertEqual(prf.is_bot, False) self.assertEqual(prf.country_code, None) self.assertEqual(prf.country, None) # John Doe uid = uids[2] self.assertEqual(uid.uuid, 'c2f8c3d7b49cdbfb0af9fc9db2ca098ec6c06c2f') prf = uid.profile self.assertEqual(prf.uuid, 'c2f8c3d7b49cdbfb0af9fc9db2ca098ec6c06c2f') self.assertEqual(prf.name, 'jdoe') self.assertEqual(prf.email, '*****@*****.**') self.assertEqual(prf.gender, None) self.assertEqual(prf.gender_acc, None) self.assertEqual(prf.is_bot, False) self.assertEqual(prf.country_code, None) self.assertEqual(prf.country, None)
def test_valid_identities_already_exist(self): """Check method when an identity already exists but with distinct UUID""" # The identity already exists but with a different UUID uuid = api.add_identity(self.db, 'unknown', email='*****@*****.**') api.add_identity(self.db, source='scm', email='*****@*****.**', name='John Smith', username='******', uuid=uuid) api.edit_profile(self.db, uuid, name='John Smith', is_bot=False, country_code='US') parser = self.get_parser('data/sortinghat_valid.json') code = self.cmd.import_identities(parser) self.assertEqual(code, CMD_SUCCESS) # Check the contents of the registry uids = api.unique_identities(self.db) self.assertEqual(len(uids), 2) # John Smith uid = uids[1] self.assertEqual(uid.uuid, '2371a34a0ac65fbd9d631464ee41d583ec0e1e88') # The profile is updated because a new one was given prf = uid.profile self.assertEqual(prf.uuid, '2371a34a0ac65fbd9d631464ee41d583ec0e1e88') self.assertEqual(prf.name, None) self.assertEqual(prf.email, '*****@*****.**') self.assertEqual(prf.is_bot, True) self.assertEqual(prf.country, None) ids = self.sort_identities(uid.identities) self.assertEqual(len(ids), 3) id0 = ids[0] self.assertEqual(id0.id, '2371a34a0ac65fbd9d631464ee41d583ec0e1e88') self.assertEqual(id0.name, None) self.assertEqual(id0.email, '*****@*****.**') self.assertEqual(id0.username, None) self.assertEqual(id0.source, 'unknown') id1 = ids[1] self.assertEqual(id1.id, '880b3dfcb3a08712e5831bddc3dfe81fc5d7b331') self.assertEqual(id1.name, 'John Smith') self.assertEqual(id1.email, '*****@*****.**') self.assertEqual(id1.username, None) self.assertEqual(id1.source, 'scm') id2 = ids[2] self.assertEqual(id2.id, 'a9b403e150dd4af8953a52a4bb841051e4b705d9') self.assertEqual(id2.name, 'John Smith') self.assertEqual(id2.email, '*****@*****.**') self.assertEqual(id2.username, 'jsmith') self.assertEqual(id2.source, 'scm')
def test_create_profile_from_identities(self): """Check whether a profile is created using the data identities""" parser = self.get_parser(datadir('sortinghat_identities_profiles.json')) code = self.cmd.import_identities(parser) self.assertEqual(code, CMD_SUCCESS) # Check the contents of the registry uids = api.unique_identities(self.db) self.assertEqual(len(uids), 3) # Jane Rae uid = uids[0] self.assertEqual(uid.uuid, '17ab00ed3825ec2f50483e33c88df223264182ba') prf = uid.profile self.assertEqual(prf.uuid, '17ab00ed3825ec2f50483e33c88df223264182ba') self.assertEqual(prf.name, 'Jane Roe') self.assertEqual(prf.email, '*****@*****.**') self.assertEqual(prf.gender, None) self.assertEqual(prf.gender_acc, None) self.assertEqual(prf.is_bot, False) self.assertEqual(prf.country_code, None) self.assertEqual(prf.country, None) # John Smith uid = uids[1] self.assertEqual(uid.uuid, 'a9b403e150dd4af8953a52a4bb841051e4b705d9') prf = uid.profile self.assertEqual(prf.uuid, 'a9b403e150dd4af8953a52a4bb841051e4b705d9') self.assertEqual(prf.name, 'John Smith') self.assertEqual(prf.email, '*****@*****.**') self.assertEqual(prf.gender, None) self.assertEqual(prf.gender_acc, None) self.assertEqual(prf.is_bot, False) self.assertEqual(prf.country_code, None) self.assertEqual(prf.country, None) # John Doe uid = uids[2] self.assertEqual(uid.uuid, 'c2f8c3d7b49cdbfb0af9fc9db2ca098ec6c06c2f') prf = uid.profile self.assertEqual(prf.uuid, 'c2f8c3d7b49cdbfb0af9fc9db2ca098ec6c06c2f') self.assertEqual(prf.name, 'jdoe') self.assertEqual(prf.email, '*****@*****.**') self.assertEqual(prf.gender, None) self.assertEqual(prf.gender_acc, None) self.assertEqual(prf.is_bot, False) self.assertEqual(prf.country_code, None) self.assertEqual(prf.country, None)
def unique_identities(cls, sh_db): """List the unique identities available in SortingHat. :param sh_db: SortingHat database """ try: for unique_identity in api.unique_identities(sh_db): yield unique_identity except Exception as e: logger.debug( "Unique identities not returned from SortingHat due to %s", str(e))
def unique_identities(cls, sh_db): """List the unique identities available in SortingHat. :param sh_db: SortingHat database """ try: for unique_identity in api.unique_identities(sh_db): yield unique_identity except Exception as e: logger.debug( "[sortinghat] Unique identities not returned due to {}".format( e))
def test_unify_success_no_recovery_mode(self): """Test unify method when the recovery file exists but the recovery mode is not active""" original_log = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'data/unify_matches.log') shutil.copyfile(original_log, self.recovery_path) with unittest.mock.patch('sortinghat.cmd.unify.RecoveryFile.location') as mock_location: mock_location.return_value = self.recovery_path before = api.unique_identities(self.db) self.assertEqual(len(before), 6) self.assertTrue(os.path.exists(self.recovery_path)) code = self.cmd.unify(matching='default') self.assertEqual(code, CMD_SUCCESS) after = api.unique_identities(self.db) self.assertEqual(len(after), 5) # jsmith identities with same email address jsmith = after[0] self.assertEqual(jsmith.uuid, '178315df7941fc76a6ffb06fd5b00f6932ad9c41') identities = jsmith.identities identities.sort(key=lambda x: x.id) self.assertEqual(len(identities), 7) id_ = identities[1] self.assertEqual(id_.email, '*****@*****.**') self.assertEqual(id_.source, 'mls') id_ = identities[3] self.assertEqual(id_.email, '*****@*****.**') self.assertEqual(id_.source, 'scm') output = sys.stdout.getvalue().strip() self.assertEqual(output, UNIFY_DEFAULT_OUTPUT) self.assertTrue(os.path.exists(self.recovery_path))
def test_load(self): """Test to load identities and organizations from a file""" code = self.cmd.run(datadir('sortinghat_valid.json'), '--verbose') self.assertEqual(code, CMD_SUCCESS) uids = api.unique_identities(self.db) self.assertEqual(len(uids), 2) orgs = api.registry(self.db) self.assertEqual(len(orgs), 3) output = sys.stdout.getvalue().strip() self.assertEqual(output, LOAD_OUTPUT)
def test_autogender(self): """Test whether autogender SH command is executed""" config = Config(CONF_FILE) # Test default value self.assertEqual(config.get_conf()['sortinghat']['gender'], False) config.get_conf()['sortinghat']['gender'] = True # Load some identities task = TaskIdentitiesLoad(config) task.execute() # Check the number of identities loaded from local and remote files uids = api.unique_identities(self.sh_db) task = TaskIdentitiesMerge(config) self.assertEqual(task.do_autogender(), None) uids = api.unique_identities(self.sh_db) found_genders = [uid.profile.gender for uid in uids] expected_genders = ['male', 'female', 'male', 'male'] self.assertEqual(found_genders, expected_genders)
def test_autocomplete_no_sources(self): """Check whether it does nothing when there are no identities from a source""" code = self.cmd.run('source1', 'source2') self.assertEqual(code, CMD_SUCCESS) output = sys.stdout.getvalue().strip() self.assertEqual(output, "") # Unique identities remain without profiles uids = api.unique_identities(self.db) self.assertEqual(uids[0].profile, None) self.assertEqual(uids[1].profile, None) self.assertEqual(uids[2].profile, None)
def test_create_profile_from_identities(self): """Check whether a profile is created using the data identities""" parser = self.get_parser('data/sortinghat_identities_profiles.json') code = self.cmd.import_identities(parser) self.assertEqual(code, CMD_SUCCESS) # Check the contents of the registry uids = api.unique_identities(self.db) self.assertEqual(len(uids), 3) # John Smith uid = uids[0] self.assertEqual(uid.uuid, '03e12d00e37fd45593c49a5a5a1652deca4cf302') prf = uid.profile self.assertEqual(prf.uuid, '03e12d00e37fd45593c49a5a5a1652deca4cf302') self.assertEqual(prf.name, 'John Smith') self.assertEqual(prf.email, '*****@*****.**') self.assertEqual(prf.is_bot, False) self.assertEqual(prf.country_code, None) self.assertEqual(prf.country, None) # John Doe uid = uids[1] self.assertEqual(uid.uuid, '3c3c71c67952135eb92a9cace538ffbe6cb39d88') prf = uid.profile self.assertEqual(prf.uuid, '3c3c71c67952135eb92a9cace538ffbe6cb39d88') self.assertEqual(prf.name, 'jdoe') self.assertEqual(prf.email, '*****@*****.**') self.assertEqual(prf.is_bot, False) self.assertEqual(prf.country_code, None) self.assertEqual(prf.country, None) # Jane Rae uid = uids[2] self.assertEqual(uid.uuid, '52e0aa0a14826627e633fd15332988686b730ab3') prf = uid.profile self.assertEqual(prf.uuid, '52e0aa0a14826627e633fd15332988686b730ab3') self.assertEqual(prf.name, 'Jane Roe') self.assertEqual(prf.email, '*****@*****.**') self.assertEqual(prf.is_bot, False) self.assertEqual(prf.country_code, None) self.assertEqual(prf.country, None)
def test_load_organizations(self): """Test to load organizations from a file""" code = self.cmd.run('--orgs', datadir('sortinghat_orgs_valid.json')) self.assertEqual(code, CMD_SUCCESS) uids = api.unique_identities(self.db) self.assertEqual(len(uids), 0) orgs = api.registry(self.db) self.assertEqual(len(orgs), 3) output = sys.stdout.getvalue().strip() self.assertEqual(output, LOAD_SH_ORGS_OUTPUT) output = sys.stderr.getvalue().strip() self.assertEqual(output, LOAD_ORGS_OUTPUT_WARNING)
def test_autogender(self): """Test autogender method""" http_requests = setup_genderize_server() self.cmd.autogender(api_token='abcdefghi') uids = api.unique_identities(self.db) prf = uids[0].profile self.assertEqual(prf.uuid, '2a9ec221b8dd5d5a85ae0e3276b8b2c3618ee15e') self.assertEqual(prf.gender, 'female') self.assertEqual(prf.gender_acc, 100) # Jane Rae gender is not updated because it was already set prf = uids[1].profile self.assertEqual(prf.uuid, '3e1eccdb1e52ea56225f419d3e532fe9133c7821') self.assertEqual(prf.gender, 'unknown') self.assertEqual(prf.gender_acc, 100) prf = uids[2].profile self.assertEqual(prf.uuid, '539acca35c2e8502951a97d2d5af8b0857440b50') self.assertEqual(prf.gender, 'male') self.assertEqual(prf.gender_acc, 99) prf = uids[3].profile self.assertEqual(prf.uuid, 'a39ac334be9f17bfc7f9f21bbb25f389388f8e18') self.assertEqual(prf.gender, 'male') self.assertEqual(prf.gender_acc, 99) # Check requests expected = [ { 'name': ['jane'], 'apikey': ['abcdefghi'] }, { 'name': ['john'], 'apikey': ['abcdefghi'] }, ] self.assertEqual(len(http_requests), len(expected)) for i in range(len(expected)): self.assertDictEqual(http_requests[i].querystring, expected[i])
def test_matching_no_strict(self): """Check if identities with no strict matching are merged""" # First, insert the identity that will match with one # from the file api.add_identity(self.db, 'unknown', email='jsmith@example') parser = self.get_parser(datadir('sortinghat_no_strict_valid.json')) code = self.cmd.import_identities(parser, matching='default', no_strict_matching=True) self.assertEqual(code, CMD_SUCCESS) # Check whether identities were merged uids = api.unique_identities(self.db) self.assertEqual(len(uids), 1) self.assertEqual(len(uids[0].identities), 2)
def test_autocomplete_no_sources(self): """Check whether it does nothing when there are no identities from a source""" code = self.cmd.run('source1', 'source2') self.assertEqual(code, CMD_SUCCESS) output = sys.stdout.getvalue().strip() self.assertEqual(output, "") # Unique identities remain without profiles uids = api.unique_identities(self.db) self.assertEqual(uids[0].profile.name, None) self.assertEqual(uids[0].profile.email, None) self.assertEqual(uids[1].profile.name, None) self.assertEqual(uids[1].profile.email, None) self.assertEqual(uids[2].profile.name, None) self.assertEqual(uids[2].profile.email, None)
def __get_sh_ids_cache(self, identity_tuple, backend_name): # Convert tuple to the original dict identity = dict((x, y) for x, y in identity_tuple) if not self.sortinghat: raise RuntimeError("Sorting Hat not active during enrich") iden = {} sh_ids = {"id": None, "uuid": None} for field in ['email', 'name', 'username']: iden[field] = None if field in identity: iden[field] = identity[field] try: # Find the uuid for a given id. A bit hacky in SH yet api.add_identity(self.sh_db, backend_name, iden['email'], iden['name'], iden['username']) except AlreadyExistsError as ex: uuid = ex.uuid u = api.unique_identities(self.sh_db, uuid)[0] sh_ids['id'] = utils.uuid(backend_name, email=iden['email'], name=iden['name'], username=iden['username']) sh_ids['uuid'] = u.uuid except WrappedValueError: logger.warning("None Identity found %s", backend_name) logger.warning(identity) except NotFoundError: logger.error("Identity not found in Sorting Hat %s", backend_name) logger.error(identity) except UnicodeEncodeError: logger.error("UnicodeEncodeError %s", backend_name) logger.error(identity) except Exception as ex: logger.error("Unknown error adding sorting hat identity %s %s", ex, backend_name) logger.error(identity) logger.error(ex) return sh_ids
def test_load_identities(self): """Test to load identities from a file""" code = self.cmd.run('--identities', 'data/sortinghat_valid.json') self.assertEqual(code, CMD_SUCCESS) uids = api.unique_identities(self.db) self.assertEqual(len(uids), 2) # This has imported the organizations from the enrollments, # not thouse from organizations section orgs = api.registry(self.db) self.assertEqual(len(orgs), 2) output = sys.stdout.getvalue().strip() self.assertEqual(output, LOAD_IDENTITIES_OUTPUT) output = sys.stderr.getvalue().strip() self.assertEqual(output, LOAD_IDENTITIES_OUTPUT_ERROR)
def __get_sh_ids_cache(self, identity_tuple, backend_name): # Convert tuple to the original dict identity = dict((x, y) for x, y in identity_tuple) if not self.sortinghat: raise RuntimeError("Sorting Hat not active during enrich") iden = {} sh_ids = {"id": None, "uuid": None} for field in ['email', 'name', 'username']: iden[field] = None if field in identity: iden[field] = identity[field] try: # Find the uuid for a given id. A bit hacky in SH yet api.add_identity(self.sh_db, backend_name, iden['email'], iden['name'], iden['username']) except AlreadyExistsError as ex: uuid = ex.uuid u = api.unique_identities(self.sh_db, uuid)[0] sh_ids['id'] = utils.uuid(backend_name, email=iden['email'], name=iden['name'], username=iden['username']) sh_ids['uuid'] = u.uuid except WrappedValueError: logger.error("None Identity found") logger.error(identity) except NotFoundError: logger.error("Identity not found in Sorting Hat") logger.error(identity) except UnicodeEncodeError: logger.error("UnicodeEncodeError") logger.error(identity) except Exception as ex: logger.error("Unknown error adding sorting hat identity %s", ex) logger.error(identity) logger.error(ex) return sh_ids
def __get_uuid_cache(self, identity_tuple, backend_name): # Convert tuple to the original dict identity = dict((x, y) for x, y in identity_tuple) if not self.sortinghat: raise RuntimeError("Sorting Hat not active during enrich") iden = {} uuid = None for field in ['email', 'name', 'username']: iden[field] = None if field in identity: iden[field] = identity[field] try: # Find the uuid for a given id. A bit hacky in SH yet api.add_identity(self.sh_db, backend_name, iden['email'], iden['name'], iden['username']) except AlreadyExistsError as ex: uuid = ex.uuid u = api.unique_identities(self.sh_db, uuid)[0] uuid = u.uuid except WrappedValueError: logger.error("None Identity found") logger.error("%s %s" % (identity, uuid)) uuid = None except NotFoundError: logger.error("Identity found in Sorting Hat which is not unique") logger.error("%s %s" % (identity, uuid)) uuid = None except UnicodeEncodeError: logger.error("UnicodeEncodeError") logger.error("%s %s" % (identity, uuid)) uuid = None except Exception as ex: logger.error("Unknown error adding sorting hat identity.") logger.error("%s %s" % (identity, uuid)) uuid = None return uuid
def test_no_email_on_name_field(self): """Check whether an email address is not set as the name in the profile""" # Email value as username jrae_uuid = api.add_identity(self.db, 'mls', None, None, '*****@*****.**') api.add_identity(self.db, 'mls', None, '*****@*****.**', None, uuid=jrae_uuid) api.add_identity(self.db, 'mls', None, None, 'jrae', uuid=jrae_uuid) self.cmd.autocomplete(['mls', 'its']) uids = api.unique_identities(self.db, uuid=jrae_uuid) uid = uids[0] # Email values on name and username fields are ignored # when the profile is set self.assertEqual(uid.uuid, jrae_uuid) self.assertEqual(uid.profile.name, 'jrae') self.assertEqual(uid.profile.email, None)
def get_item_sh(self, item): """ Add sorting hat enrichment fields """ eitem = {} # Item enriched item = item['data'] # Enrich SH if "From" not in item: return eitem identity = self.get_sh_identity(item["From"]) eitem["from_uuid"] = self.get_uuid(identity, self.get_connector_name()) eitem["from_name"] = identity['name'] # bot u = api.unique_identities(self.sh_db, eitem["from_uuid"])[0] if u.profile: eitem["from_bot"] = u.profile.is_bot else: eitem["from_bot"] = False # By default, identities are not bots enrollments = self.get_enrollments(eitem["from_uuid"]) if len(enrollments) > 0: eitem["from_org_name"] = enrollments[0].organization.name else: eitem["from_org_name"] = None if identity['email']: try: eitem["domain"] = identity['email'].split("@")[1] except IndexError: logging.warning("Bad email format: %s" % (identity['email'])) eitem["domain"] = None else: eitem["domain"] = None # Unify fields name eitem["author_uuid"] = eitem["from_uuid"] eitem["author_name"] = eitem["from_name"] eitem["author_org_name"] = eitem["from_org_name"] eitem["author_domain"] = eitem["domain"] return eitem
def test_autocomplete_profiles(self): """Check whether it autocompletes the profiles based on a priority list""" self.cmd.autocomplete(['mls', 'its']) uids = api.unique_identities(self.db) # Unique identities without identities from # the given sources are not updated self.assertEqual(uids[0].uuid, '17ab00ed3825ec2f50483e33c88df223264182ba') self.assertEqual(uids[0].profile.name, None) self.assertEqual(uids[0].profile.email, None) # Only one source available self.assertEqual(uids[1].uuid, 'eb10fb9519d69d75a6cdcd76707943a513685c09') self.assertEqual(uids[1].profile.name, 'jdoe') self.assertEqual(uids[1].profile.email, None) # It mixes the information of the identities with # maximum priority, using the longest name value self.assertEqual(uids[2].uuid, 'ffefc2e3f2a255e9450ac9e2d36f37c28f51bd73') self.assertEqual(uids[2].profile.name, 'John Smith') self.assertEqual(uids[2].profile.email, '*****@*****.**')
def test_identities_load_file(self): """ Check the local loading of identities files """ setup_http_server() config = Config(CONF_FILE) task = TaskIdentitiesLoad(config) with self.assertLogs(logger, level='INFO') as cm: task.execute() self.assertEqual( cm.output[0], 'INFO:sirmordred.task_identities:[sortinghat] ' 'Loading orgs from file data/orgs_sortinghat.json') self.assertEqual( cm.output[1], 'INFO:sirmordred.task_identities:[sortinghat] 20 organizations loaded' ) self.assertEqual( cm.output[2], 'INFO:sirmordred.task_identities:[sortinghat] ' 'Loading identities from file data/perceval_identities_sortinghat.json' ) # Check the number of identities loaded from local and remote files nuids = len(api.unique_identities(self.sh_db)) self.assertEqual(nuids, 4) with self.assertLogs(logger, level='INFO') as cm: task.execute() self.assertEqual( cm.output[0], 'INFO:sirmordred.task_identities:[sortinghat] No changes in ' 'file data/orgs_sortinghat.json, organizations won\'t be loaded' ) self.assertEqual( cm.output[1], 'INFO:sirmordred.task_identities:[sortinghat] No changes in ' 'file data/perceval_identities_sortinghat.json, identities won\'t be loaded' )
def add_identities(cls, db, identities, backend): """ Load identities list from backend in Sorting Hat """ merge_identities = False logger.info("Adding the identities to SortingHat") if not merge_identities: logger.info("Not doing identities merge") total = 0 lidentities = len(identities) if merge_identities: merged_identities = [] # old identities merged into new ones blacklist = api.blacklist(db) matching = 'email-name' # Not active matcher = create_identity_matcher(matching, blacklist) for identity in identities: try: uuid = api.add_identity(db, backend, identity['email'], identity['name'], identity['username']) logger.debug("New sortinghat identity %s %s,%s,%s (%i/%i)", uuid, identity['username'], identity['name'], identity['email'], total, lidentities) profile = {"name": identity['name'] if identity['name'] else identity['username'], "email": identity['email']} api.edit_profile(db, uuid, **profile) total += 1 if not merge_identities: continue # Don't do the merge here. Too slow in large projects # Time to merge matches = api.match_identities(db, uuid, matcher) if len(matches) > 1: u = api.unique_identities(db, uuid)[0] for m in matches: # First add the old uuid to the list of changed by merge uuids if m.uuid not in merged_identities: merged_identities.append(m.uuid) if m.uuid == uuid: continue # Merge matched identity into added identity api.merge_unique_identities(db, m.uuid, u.uuid) # uuid = m.uuid # u = api.unique_identities(db, uuid, backend)[0] # Include all identities related to this uuid # merged_identities.append(m.uuid) except AlreadyExistsError as ex: uuid = ex.uuid continue except WrappedValueError as ex: logging.warning("Trying to add a None identity. Ignoring it.") continue except UnicodeEncodeError as ex: logging.warning("UnicodeEncodeError. Ignoring it. %s %s %s" % \ (identity['email'], identity['name'], identity['username'])) continue except Exception as ex: logging.warning("Unknown exception adding identity. Ignoring it. %s %s %s" % \ (identity['email'], identity['name'], identity['username'])) traceback.print_exc() continue if 'company' in identity and identity['company'] is not None: try: api.add_organization(db, identity['company']) api.add_enrollment(db, uuid, identity['company'], datetime(1900, 1, 1), datetime(2100, 1, 1)) except AlreadyExistsError: pass logger.info("Total NEW identities: %i" % (total)) if merge_identities: logger.info("Total NEW identities merged: %i" % \ (len(merged_identities))) return merged_identities else: return []
def get_unique_identities(self, uuid): return api.unique_identities(self.sh_db, uuid)