def test_filter_identities_with_blacklist(self): """Test if identities are filtered when there is a blacklist""" # Let's define some identities first jsmith = UniqueIdentity(uuid='jsmith') jsmith.identities = [ Identity(name='John Smith', email='*****@*****.**', source='scm', uuid='jsmith'), Identity(name='John Smith', source='scm', uuid='jsmith'), Identity(name='John Smith JR', source='scm', uuid='jsmith'), Identity(username='******', source='scm', uuid='jsmith'), Identity(email='', source='scm', uuid='jsmith') ] jrae = UniqueIdentity(uuid='jrae') jrae.identities = [ Identity(name='Jane Rae', source='scm', uuid='jrae'), Identity(name='Jane Rae Doe', email='*****@*****.**', source='mls', uuid='jrae'), Identity(name='jrae', source='scm', uuid='jrae'), Identity(email='*****@*****.**', source='scm', uuid='jrae') ] # Tests bl = [ MatchingBlacklist(excluded='John Smith'), MatchingBlacklist(excluded='*****@*****.**') ] matcher = EmailNameMatcher(blacklist=bl) result = matcher.filter(jsmith) self.assertEqual(len(result), 1) fid = result[0] self.assertIsInstance(fid, EmailNameIdentity) self.assertEqual(fid.uuid, 'jsmith') self.assertEqual(fid.name, 'john smith jr') self.assertEqual(fid.email, None) result = matcher.filter(jrae) self.assertEqual(len(result), 2) fid = result[0] self.assertIsInstance(fid, EmailNameIdentity) self.assertEqual(fid.uuid, 'jrae') self.assertEqual(fid.name, 'jane rae') self.assertEqual(fid.email, None) fid = result[1] self.assertIsInstance(fid, EmailNameIdentity) self.assertEqual(fid.uuid, 'jrae') self.assertEqual(fid.name, 'jane rae doe') self.assertEqual(fid.email, '*****@*****.**')
def test_match_with_blacklist(self): """Test match when there are entries in the blacklist""" jsmith = UniqueIdentity(uuid='jsmith') jsmith.identities = [ Identity(name='John Smith', email='*****@*****.**', source='scm'), Identity(name='John Smith', source='scm'), Identity(username='******', source='scm'), Identity(email='', source='scm') ] john_smith = UniqueIdentity(uuid='js') john_smith.identities = [ Identity(name='John Smith JR', username='******', source='scm'), Identity(username='******', source='scm'), Identity(name='Smith. J', source='mls'), Identity(name='Smith. J', email='*****@*****.**', source='mls') ] jsmith_alt = UniqueIdentity(uuid='J. Smith') jsmith_alt.identities = [ Identity(name='John Smith JR', username='******', source='alt'), Identity(name='John Smith', username='******', source='alt'), Identity(email='', source='alt'), Identity(email='jsmith', source='alt') ] # Tests bl = [ MatchingBlacklist(excluded='John Smith'), MatchingBlacklist(excluded='*****@*****.**') ] matcher = EmailNameMatcher(blacklist=bl) result = matcher.match(jsmith, john_smith) self.assertEqual(result, False) result = matcher.match(john_smith, jsmith) self.assertEqual(result, False) # John Smith is blacklisted, so no match result = matcher.match(jsmith, jsmith_alt) self.assertEqual(result, False) result = matcher.match(jsmith_alt, jsmith) self.assertEqual(result, False) result = matcher.match(john_smith, jsmith_alt) self.assertEqual(result, True) result = matcher.match(jsmith_alt, john_smith) self.assertEqual(result, True)
def test_unique_excluded(self): """Check whether the excluded term is in fact unique""" with self.assertRaisesRegexp(IntegrityError, DUP_CHECK_ERROR): mb1 = MatchingBlacklist(excluded='John Smith') mb2 = MatchingBlacklist(excluded='John Smith') self.session.add(mb1) self.session.add(mb2) self.session.commit()
def test_match_filtered_identities_with_blacklist(self): """Test whether filtered identities match when there is a blacklist""" jsmith = GitHubUsernameIdentity('1', None, 'jsmith', 'github-commits') jsmith_alt = GitHubUsernameIdentity('2', 'jsmith', 'jsmith', 'github') jsmith_uuid = GitHubUsernameIdentity('3', 'jsmith', 'jsmith', 'GitHub-API') john_none = GitHubUsernameIdentity('4', None, 'jsmith', 'github-issues') bl = [MatchingBlacklist(excluded='jsmith')] matcher = GitHubMatcher(blacklist=bl) result = matcher.match_filtered_identities(jsmith, jsmith_alt) self.assertEqual(result, False) result = matcher.match_filtered_identities(jsmith, jsmith_uuid) self.assertEqual(result, False) result = matcher.match_filtered_identities(jsmith_alt, jsmith) self.assertEqual(result, False) # Same UUID result = matcher.match_filtered_identities(jsmith_alt, jsmith_uuid) self.assertEqual(result, True) result = matcher.match_filtered_identities(jsmith_uuid, jsmith) self.assertEqual(result, False) # Although the UUID is equal to None, these two does not match result = matcher.match_filtered_identities(jsmith, john_none) self.assertEqual(result, False)
def test_identity_matcher_instance_with_blacklist(self): """Test if the factory function adds a blacklist to the matcher instance""" # The blacklist is empty matcher = create_identity_matcher('default') self.assertIsInstance(matcher, IdentityMatcher) self.assertEqual(len(matcher.blacklist), 0) # Create a matcher with a blacklist blacklist = [MatchingBlacklist(excluded='*****@*****.**'), MatchingBlacklist(excluded='*****@*****.**'), MatchingBlacklist(excluded='*****@*****.**'), MatchingBlacklist(excluded='John Smith'), MatchingBlacklist(excluded='root')] matcher = create_identity_matcher('default', blacklist=blacklist) self.assertIsInstance(matcher, IdentityMatcher) self.assertEqual(len(matcher.blacklist), 5)
def test_blacklist(self): """Test blacklist contents""" m = IdentityMatcher() self.assertListEqual(m.blacklist, []) m = IdentityMatcher(blacklist=[]) self.assertListEqual(m.blacklist, []) blacklist = [MatchingBlacklist(excluded='*****@*****.**'), MatchingBlacklist(excluded='*****@*****.**'), MatchingBlacklist(excluded='*****@*****.**'), MatchingBlacklist(excluded='John Smith'), MatchingBlacklist(excluded='root')] m = IdentityMatcher(blacklist=blacklist) self.assertListEqual(m.blacklist, ['john smith', '*****@*****.**', '*****@*****.**', '*****@*****.**', 'root'])
def test_filter_identities_with_blacklist(self): """Test if identities are filtered when there is a blacklist""" # Let's define some identities first jsmith = UniqueIdentity(uuid='jsmith') jsmith.identities = [ Identity(name='John Smith', email='*****@*****.**', source='scm', uuid='jsmith'), Identity(name='John Smith', source='scm', uuid='jsmith'), Identity(username='******', source='github', uuid='jsmith'), Identity(email='', source='scm', uuid='jsmith') ] jrae = UniqueIdentity(uuid='jrae') jrae.identities = [ Identity(username='******', source='GitHub-API', uuid='jrae'), Identity(name='Jane Rae Doe', email='*****@*****.**', source='mls', uuid='jrae'), Identity(username='******', source='github', uuid='jrae'), Identity(email='*****@*****.**', source='scm', uuid='jrae') ] bl = [MatchingBlacklist(excluded='jrae')] matcher = GitHubMatcher(blacklist=bl) result = matcher.filter(jsmith) self.assertEqual(len(result), 1) fid = result[0] self.assertIsInstance(fid, GitHubUsernameIdentity) self.assertEqual(fid.uuid, 'jsmith') self.assertEqual(fid.username, 'jsmith') self.assertEqual(fid.source, 'github') result = matcher.filter(jrae) self.assertEqual(len(result), 1) fid = result[0] self.assertIsInstance(fid, GitHubUsernameIdentity) self.assertEqual(fid.uuid, 'jrae') self.assertEqual(fid.username, 'janerae') self.assertEqual(fid.source, 'GitHub-API')
def test_filter_identities_with_blacklist_not_strict(self): """Test if identities are filtered when there is a blacklist and strict mode is False""" # Let's define some identities first jsmith = UniqueIdentity(uuid='jsmith') jsmith.identities = [ Identity(name='John Smith', email='jsmith_at_example.com', source='scm', uuid='jsmith'), Identity(name='John Smith', source='scm', uuid='jsmith'), Identity(username='******', source='scm', uuid='jsmith'), Identity(email='', source='scm', uuid='jsmith') ] jrae = UniqueIdentity(uuid='jrae') jrae.identities = [ Identity(name='Jane Rae', source='scm', uuid='jrae'), Identity(name='Jane Rae Doe', email='jane.rae_at_example.net', source='mls', uuid='jrae'), Identity(name='jrae', source='scm', uuid='jrae'), Identity(email='JRAE_at_example.net', source='scm', uuid='jrae') ] bl = [MatchingBlacklist(excluded='jrae_at_example.net')] matcher = EmailMatcher(blacklist=bl, strict=False) result = matcher.filter(jsmith) self.assertEqual(len(result), 1) fid = result[0] self.assertIsInstance(fid, EmailIdentity) self.assertEqual(fid.uuid, 'jsmith') self.assertEqual(fid.email, 'jsmith_at_example.com') result = matcher.filter(jrae) self.assertEqual(len(result), 1) fid = result[0] self.assertIsInstance(fid, EmailIdentity) self.assertEqual(fid.uuid, 'jrae') self.assertEqual(fid.email, 'jane.rae_at_example.net')
def test_match_filtered_identities_with_blacklist(self): """Test whether filtered identities match when there is a blacklist""" jsmith = UsernameIdentity('1', None, 'jsmith') jsmith_alt = UsernameIdentity('2', 'jsmith', 'jsmith') jsmith_uuid = UsernameIdentity('3', 'jsmith', 'john.smith') john_alt = UsernameIdentity('4', None, 'john.smith') jsmith_none = UsernameIdentity('4', 'john.smith', None) jdoe_none = UsernameIdentity('4', 'jdoe', None) bl = [MatchingBlacklist(excluded='JSMITH')] matcher = UsernameMatcher(blacklist=bl) result = matcher.match_filtered_identities(jsmith, jsmith_alt) self.assertEqual(result, False) result = matcher.match_filtered_identities(jsmith, jsmith_uuid) self.assertEqual(result, False) result = matcher.match_filtered_identities(jsmith_alt, jsmith) self.assertEqual(result, False) # Same UUID result = matcher.match_filtered_identities(jsmith_alt, jsmith_uuid) self.assertEqual(result, True) result = matcher.match_filtered_identities(jsmith_uuid, jsmith) self.assertEqual(result, False) # Same UUID result = matcher.match_filtered_identities(jsmith_uuid, jsmith_alt) self.assertEqual(result, True) result = matcher.match_filtered_identities(jsmith_uuid, john_alt) self.assertEqual(result, True) result = matcher.match_filtered_identities(john_alt, jsmith_uuid) self.assertEqual(result, True) # Although the UUID is equal to None, these two does not match result = matcher.match_filtered_identities(jsmith_none, jdoe_none) self.assertEqual(result, False)
def test_match_with_blacklist(self): """Test match when there are entries in the blacklist""" # Let's define some identities first jsmith = UniqueIdentity(uuid='jsmith') jsmith.identities = [ Identity(name='John Smith', username='******', source='scm'), Identity(name='John Smith', source='scm'), Identity(username='******', source='scm'), Identity(email='', source='scm') ] john_smith = UniqueIdentity(uuid='js') john_smith.identities = [ Identity(name='J. Smith', email='*****@*****.**', source='scm'), Identity(username='******', source='scm'), Identity(name='Smith. J', source='mls'), Identity(name='Smith. J', username='******', source='mls') ] jrae = UniqueIdentity(uuid='jrae') jrae.identities = [ Identity(name='Jane Rae', source='scm', uuid='jrae'), Identity(name='Jane Rae Doe', username='******', email='*****@*****.**', source='mls', uuid='jrae'), Identity(name='jrae', source='scm', uuid='jrae'), Identity(email='*****@*****.**', source='scm', uuid='jrae') ] jane_rae = UniqueIdentity(uuid='Jane Rae') jane_rae.identities = [ Identity(name='Jane Rae', source='scm', uuid='Jane Rae'), Identity(email='*****@*****.**', username='******', source='mls', uuid='Jane Rae') ] # Check matching matcher = UsernameMatcher() # First two unique identities must match result = matcher.match(jsmith, john_smith) self.assertEqual(result, True) result = matcher.match(john_smith, jsmith) self.assertEqual(result, True) result = matcher.match(jrae, jane_rae) self.assertEqual(result, True) result = matcher.match(jane_rae, jrae) self.assertEqual(result, True) # Add a blacklist bl = [MatchingBlacklist(excluded='john_smith')] matcher = UsernameMatcher(blacklist=bl) result = matcher.match(jsmith, john_smith) self.assertEqual(result, False) result = matcher.match(john_smith, jsmith) self.assertEqual(result, False) result = matcher.match(jrae, jane_rae) self.assertEqual(result, True) result = matcher.match(jane_rae, jrae) self.assertEqual(result, True) # In this case, no match will be found bl = [ MatchingBlacklist(excluded='John_Smith'), MatchingBlacklist(excluded='Jane.rae') ] matcher = UsernameMatcher(blacklist=bl) result = matcher.match(jsmith, john_smith) self.assertEqual(result, False) result = matcher.match(john_smith, jsmith) self.assertEqual(result, False) result = matcher.match(jrae, jane_rae) self.assertEqual(result, False) result = matcher.match(jane_rae, jrae) self.assertEqual(result, False)