def test_filter_identities_with_sources_list(self): """Test if identities are filtered when there is a sources list""" # Let's define some identities first jsmith = UniqueIdentity(uuid='jsmith') jsmith.identities = [Identity(name='John Smith', email='*****@*****.**', source='scm', uuid='jsmith'), Identity(name='John Smith', source='scm', uuid='jsmith'), Identity(name='John Smith JR', source='scm', uuid='jsmith'), Identity(username='******', source='mls', uuid='jsmith'), Identity(email='', source='scm', uuid='jsmith')] jrae = UniqueIdentity(uuid='jrae') jrae.identities = [Identity(name='Jane Rae', source='scm', uuid='jrae'), Identity(name='Jane Rae Doe', email='*****@*****.**', source='mls', uuid='jrae'), Identity(name='jrae', source='scm', uuid='jrae'), Identity(email='*****@*****.**', source='scm', uuid='jrae')] # Tests matcher = EmailMatcher(sources=['mls', 'alt']) result = matcher.filter(jsmith) self.assertEqual(len(result), 0) result = matcher.filter(jrae) self.assertEqual(len(result), 1) fid = result[0] self.assertIsInstance(fid, EmailIdentity) self.assertEqual(fid.uuid, 'jrae') self.assertEqual(fid.email, '*****@*****.**')
def test_match_with_sources_list(self): """Test match when a list of sources to filter is given""" jsmith = UniqueIdentity(uuid='jsmith') jsmith.identities = [ Identity(name='John Smith', email='*****@*****.**', source='scm'), Identity(name='John Smith', source='scm'), Identity(username='******', source='scm'), Identity(email='', source='scm') ] jsmith_alt = UniqueIdentity(uuid='J. Smith') jsmith_alt.identities = [ Identity(name='John Smith JR', username='******', source='alt'), Identity(name='John Smith', username='******', source='alt'), Identity(email='', source='alt'), Identity(email='jsmith', source='alt') ] # With these lists there are not matches matcher = EmailNameMatcher(sources=['github']) result = matcher.match(jsmith, jsmith_alt) self.assertEqual(result, False) matcher = EmailNameMatcher(sources=['scm']) result = matcher.match(jsmith, jsmith_alt) self.assertEqual(result, False) # Only when scm and alt are combined there is a match matcher = EmailNameMatcher(sources=['scm', 'alt']) result = matcher.match(jsmith, jsmith_alt) self.assertEqual(result, True)
def test_match_strict(self): """Test strict matching""" # Let's define some identities first jsmith_alt = UniqueIdentity(uuid='J. Smith') jsmith_alt.identities = [ Identity(name='J. Smith', username='******', source='alt'), Identity(name='John Smith', username='******', source='alt'), Identity(email='', source='alt'), Identity(email='jsmith', source='alt') ] jsmith_not_email = UniqueIdentity(uuid='John Smith') jsmith_not_email.identities = [Identity(email='jsmith', source='mls')] # Tests strict_matcher = EmailMatcher(strict=True) no_strict_matcher = EmailMatcher(strict=False) # This two unique identities have the same email address # but due to 'jsmith' is not a valid email address, they # do not match result = strict_matcher.match(jsmith_alt, jsmith_not_email) self.assertEqual(result, False) # With strict mode set to False, both identities match result = no_strict_matcher.match(jsmith_alt, jsmith_not_email) self.assertEqual(result, True)
def test_filter_identities_with_blacklist(self): """Test if identities are filtered when there is a blacklist""" # Let's define some identities first jsmith = UniqueIdentity(uuid='jsmith') jsmith.identities = [Identity(name='John Smith', username='******', source='scm', uuid='jsmith'), Identity(name='John Smith', source='scm', uuid='jsmith'), Identity(email='*****@*****.**', source='scm', uuid='jsmith'), Identity(email='', source='scm', uuid='jsmith')] jrae = UniqueIdentity(uuid='jrae') jrae.identities = [Identity(name='Jane Rae', source='scm', uuid='jrae'), Identity(name='Jane Rae Doe', username='******', source='mls', uuid='jrae'), Identity(name='jrae', source='scm', uuid='jrae'), Identity(username='******', source='scm', uuid='jrae')] bl = [MatchingBlacklist(excluded='jrae')] matcher = UsernameMatcher(blacklist=bl) result = matcher.filter(jsmith) self.assertEqual(len(result), 1) fid = result[0] self.assertIsInstance(fid, UsernameIdentity) self.assertEqual(fid.uuid, 'jsmith') self.assertEqual(fid.username, 'jsmith') result = matcher.filter(jrae) self.assertEqual(len(result), 1) fid = result[0] self.assertIsInstance(fid, UsernameIdentity) self.assertEqual(fid.uuid, 'jrae') self.assertEqual(fid.username, 'jane.rae')
def test_match_with_sources_list(self): """Test match when a list of sources to filter is given""" jsmith = UniqueIdentity(uuid='jsmith') jsmith.identities = [Identity(name='John Smith', email='*****@*****.**', source='scm'), Identity(name='John Smith', source='scm'), Identity(username='******', source='scm'), Identity(email='', source='scm')] jsmith_alt = UniqueIdentity(uuid='J. Smith') jsmith_alt.identities = [Identity(name='John Smith JR', email='*****@*****.**', source='alt'), Identity(name='John Smith', username='******', source='alt'), Identity(email='', source='alt'), Identity(email='jsmith', source='alt')] # With these lists there are not matches matcher = EmailMatcher(sources=['github']) result = matcher.match(jsmith, jsmith_alt) self.assertEqual(result, False) matcher = EmailMatcher(sources=['scm']) result = matcher.match(jsmith, jsmith_alt) self.assertEqual(result, False) # Only when scm and alt are combined there is a match matcher = EmailMatcher(sources=['scm', 'alt']) result = matcher.match(jsmith, jsmith_alt) self.assertEqual(result, True)
def test_match_with_sources_list(self): """Test match when a list of sources to filter is given""" jsmith = UniqueIdentity(uuid='jsmith') jsmith.identities = [ Identity(name='John Smith', email='*****@*****.**', source='scm', uuid='jsmith'), Identity(name='John Smith', source='scm', uuid='jsmith'), Identity(username='******', source='github', uuid='jsmith'), Identity(email='', source='scm', uuid='jsmith') ] john_smith = UniqueIdentity(uuid='js') john_smith.identities = [ Identity(name='J. Smith', username='******', source='scm'), Identity(username='******', source='GitHub-API'), Identity(name='Smith. J', source='mls'), Identity(name='Smith. J', email='*****@*****.**', source='mls') ] # With these lists there are not matches matcher = GitHubMatcher(sources=['scm']) result = matcher.match(jsmith, john_smith) self.assertEqual(result, False) matcher = GitHubMatcher(sources=['github']) result = matcher.match(jsmith, john_smith) self.assertEqual(result, False) # Only when github-api and github are combined there is a match matcher = GitHubMatcher(sources=['github-api', 'github']) result = matcher.match(jsmith, john_smith) self.assertEqual(result, True)
def test_match_strict(self): """Test strict matching""" # Let's define some identities first jsmith_alt = UniqueIdentity(uuid='J. Smith') jsmith_alt.identities = [Identity(name='J. Smith', username='******', source='alt'), Identity(name='John Smith', username='******', source='alt'), Identity(email='', source='alt'), Identity(email='jsmith', source='alt')] jsmith_not_email = UniqueIdentity(uuid='John Smith') jsmith_not_email.identities = [Identity(email='jsmith', source='mls')] # Tests strict_matcher = EmailMatcher(strict=True) no_strict_matcher = EmailMatcher(strict=False) # This two unique identities have the same email address # but due to 'jsmith' is not a valid email address, they # do not match result = strict_matcher.match(jsmith_alt, jsmith_not_email) self.assertEqual(result, False) # With strict mode set to False, both identities match result = no_strict_matcher.match(jsmith_alt, jsmith_not_email) self.assertEqual(result, True)
def test_match_strict(self): """Test strict matching""" # Let's define some identities first jsmith_alt = UniqueIdentity(uuid='J. Smith') jsmith_alt.identities = [ Identity(name='J. Smith', username='******', source='alt'), Identity(name='John Smith', username='******', source='alt'), Identity(email='', source='alt'), Identity(email='jsmith', source='alt') ] jsmith_not_email = UniqueIdentity(uuid='John Smith') jsmith_not_email.identities = [Identity(email='jsmith', source='mls')] jrae = UniqueIdentity(uuid='jrae') jrae.identities = [ Identity(name='Jane Rae', source='scm'), Identity(name='Jane Rae Doe', email='*****@*****.**', source='mls') ] jrae_doe = UniqueIdentity(uuid='jraedoe') jrae_doe.identities = [ Identity(name='Jane Rae Doe', email='*****@*****.**', source='mls'), Identity(name='jrae', source='scm') ] jrae_no_name = UniqueIdentity(uuid='Jane Rae') jrae_no_name.identities = [Identity(name='jrae', source='scm')] # Tests strict_matcher = EmailNameMatcher(strict=True) no_strict_matcher = EmailNameMatcher(strict=False) # This two unique identities have the same email address # but due to 'jsmith' is not a valid email address, they # do not match result = strict_matcher.match(jsmith_alt, jsmith_not_email) self.assertEqual(result, False) # But with strict mode off they do result = no_strict_matcher.match(jsmith_alt, jsmith_not_email) self.assertEqual(result, True) # This two do not match although they share the same name. # In this case the name is invalid because is not formed # like "firstname lastname" result = strict_matcher.match(jrae_doe, jrae_no_name) self.assertEqual(result, False) # But with strict mode off they do result = no_strict_matcher.match(jrae_doe, jrae_no_name) self.assertEqual(result, True)
def test_match_with_blacklist(self): """Test match when there are entries in the blacklist""" jsmith = UniqueIdentity(uuid='jsmith') jsmith.identities = [ Identity(name='John Smith', email='*****@*****.**', source='scm'), Identity(name='John Smith', source='scm'), Identity(username='******', source='scm'), Identity(email='', source='scm') ] john_smith = UniqueIdentity(uuid='js') john_smith.identities = [ Identity(name='John Smith JR', username='******', source='scm'), Identity(username='******', source='scm'), Identity(name='Smith. J', source='mls'), Identity(name='Smith. J', email='*****@*****.**', source='mls') ] jsmith_alt = UniqueIdentity(uuid='J. Smith') jsmith_alt.identities = [ Identity(name='John Smith JR', username='******', source='alt'), Identity(name='John Smith', username='******', source='alt'), Identity(email='', source='alt'), Identity(email='jsmith', source='alt') ] # Tests bl = [ MatchingBlacklist(excluded='John Smith'), MatchingBlacklist(excluded='*****@*****.**') ] matcher = EmailNameMatcher(blacklist=bl) result = matcher.match(jsmith, john_smith) self.assertEqual(result, False) result = matcher.match(john_smith, jsmith) self.assertEqual(result, False) # John Smith is blacklisted, so no match result = matcher.match(jsmith, jsmith_alt) self.assertEqual(result, False) result = matcher.match(jsmith_alt, jsmith) self.assertEqual(result, False) result = matcher.match(john_smith, jsmith_alt) self.assertEqual(result, True) result = matcher.match(jsmith_alt, john_smith) self.assertEqual(result, True)
def test_match(self): """Test match method""" # Let's define some identities first jsmith = UniqueIdentity(uuid='jsmith') jsmith.identities = [ Identity(name='John Smith', email='*****@*****.**', source='scm'), Identity(name='John Smith', source='scm'), Identity(username='******', source='github'), Identity(email='', source='scm') ] john_smith = UniqueIdentity(uuid='js') john_smith.identities = [ Identity(name='J. Smith', username='******', source='scm'), Identity(username='******', source='scm'), Identity(name='Smith. J', source='mls'), Identity(name='Smith. J', email='*****@*****.**', source='mls') ] jsmith_alt = UniqueIdentity(uuid='J. Smith') jsmith_alt.identities = [ Identity(name='J. Smith', username='******', source='alt'), Identity(name='John Smith', username='******', source='GitHub-API'), Identity(email='', source='alt'), Identity(email='jsmith', source='alt') ] jsmith_not_email = UniqueIdentity(uuid='John Smith') jsmith_not_email.identities = [Identity(email='jsmith', source='mls')] # Tests matcher = GitHubMatcher() # First and third must match result = matcher.match(jsmith, jsmith_alt) self.assertEqual(result, True) result = matcher.match(jsmith_alt, jsmith) self.assertEqual(result, True) # Comparing with the second and fourth does not produce any match result = matcher.match(jsmith, john_smith) self.assertEqual(result, False) result = matcher.match(jsmith, jsmith_not_email) self.assertEqual(result, False) result = matcher.match(jsmith_alt, john_smith) self.assertEqual(result, False) result = matcher.match(jsmith_alt, jsmith_not_email) self.assertEqual(result, False)
def test_filter_identities_no_strict(self): """Test if identities are filtered in no strict mode""" # Let's define some identities first jsmith = UniqueIdentity(uuid='jsmith') jsmith.identities = [ Identity(name='John Smith', email='*****@*****.**', source='scm', uuid='jsmith'), Identity(name='John Smith', source='scm', uuid='jsmith'), Identity(username='******', source='scm', uuid='jsmith'), Identity(email='jsmith@test', uuid='jsmith'), Identity(email='', source='scm', uuid='jsmith') ] jrae = UniqueIdentity(uuid='jrae') jrae.identities = [ Identity(name='Jane Rae', source='scm', uuid='jrae'), Identity(name='Jane Rae Doe', email='*****@*****.**', source='mls', uuid='jrae'), Identity(name='jrae', source='scm', uuid='jrae'), Identity(email='*****@*****.**', source='scm', uuid='jrae') ] matcher = EmailMatcher(strict=False) result = matcher.filter(jsmith) self.assertEqual(len(result), 2) fid = result[0] self.assertIsInstance(fid, EmailIdentity) self.assertEqual(fid.uuid, 'jsmith') self.assertEqual(fid.email, '*****@*****.**') fid = result[1] self.assertIsInstance(fid, EmailIdentity) self.assertEqual(fid.uuid, 'jsmith') self.assertEqual(fid.email, 'jsmith@test') result = matcher.filter(jrae) self.assertEqual(len(result), 2) fid = result[0] self.assertIsInstance(fid, EmailIdentity) self.assertEqual(fid.uuid, 'jrae') self.assertEqual(fid.email, '*****@*****.**') fid = result[1] self.assertIsInstance(fid, EmailIdentity) self.assertEqual(fid.uuid, 'jrae') self.assertEqual(fid.email, '*****@*****.**')
def test_filter_identities(self): """Test if identities are filtered""" # Let's define some identities first jsmith = UniqueIdentity(uuid='jsmith') jsmith.identities = [Identity(name='John Smith', email='*****@*****.**', source='scm', uuid='jsmith'), Identity(name='John Smith', source='scm', uuid='jsmith'), Identity(username='******', source='scm', uuid='jsmith'), Identity(email='', source='scm', uuid='jsmith')] jrae = UniqueIdentity(uuid='jrae') jrae.identities = [Identity(name='Jane Rae', source='scm', uuid='jrae'), Identity(name='Jane Rae Doe', email='*****@*****.**', source='mls', uuid='jrae'), Identity(name='jrae', source='scm', uuid='jrae'), Identity(email='*****@*****.**', source='scm', uuid='jrae')] matcher = EmailNameMatcher() result = matcher.filter(jsmith) self.assertEqual(len(result), 2) fid = result[0] self.assertIsInstance(fid, EmailNameIdentity) self.assertEqual(fid.uuid, 'jsmith') self.assertEqual(fid.name, 'john smith') self.assertEqual(fid.email, '*****@*****.**') fid = result[1] self.assertIsInstance(fid, EmailNameIdentity) self.assertEqual(fid.uuid, 'jsmith') self.assertEqual(fid.name, 'john smith') self.assertEqual(fid.email, None) result = matcher.filter(jrae) self.assertEqual(len(result), 3) fid = result[0] self.assertIsInstance(fid, EmailNameIdentity) self.assertEqual(fid.uuid, 'jrae') self.assertEqual(fid.name, 'jane rae') self.assertEqual(fid.email, None) fid = result[1] self.assertIsInstance(fid, EmailNameIdentity) self.assertEqual(fid.uuid, 'jrae') self.assertEqual(fid.name, 'jane rae doe') self.assertEqual(fid.email, '*****@*****.**') fid = result[2] self.assertIsInstance(fid, EmailNameIdentity) self.assertEqual(fid.uuid, 'jrae') self.assertEqual(fid.name, None) self.assertEqual(fid.email, '*****@*****.**')
def test_match(self): """Test match method""" # Let's define some identities first jsmith = UniqueIdentity(uuid='jsmith') jsmith.identities = [Identity(name='John Smith', email='*****@*****.**', source='scm'), Identity(name='John Smith', source='scm'), Identity(username='******', source='scm'), Identity(email='', source='scm')] john_smith = UniqueIdentity(uuid='js') john_smith.identities = [Identity(name='J. Smith', username='******', source='scm'), Identity(username='******', source='scm'), Identity(name='Smith. J', source='mls'), Identity(name='Smith. J', email='*****@*****.**', source='mls')] jsmith_alt = UniqueIdentity(uuid='J. Smith') jsmith_alt.identities = [Identity(name='J. Smith', username='******', source='alt'), Identity(name='John Smith', username='******', source='alt'), Identity(email='', source='alt'), Identity(email='jsmith', source='alt')] jsmith_not_email = UniqueIdentity(uuid='John Smith') jsmith_not_email.identities = [Identity(email='jsmith', source='mls')] # Tests matcher = EmailMatcher() # First two unique identities must match result = matcher.match(jsmith, john_smith) self.assertEqual(result, True) result = matcher.match(john_smith, jsmith) self.assertEqual(result, True) # Comparing with the third does not produce any match result = matcher.match(jsmith, jsmith_alt) self.assertEqual(result, False) result = matcher.match(jsmith_alt, jsmith) self.assertEqual(result, False) result = matcher.match(john_smith, jsmith_alt) self.assertEqual(result, False) result = matcher.match(jsmith_alt, john_smith) self.assertEqual(result, False) # This two unique identities have the same email address # but due to 'jsmith' is not a valid email address, they # do not match result = matcher.match(jsmith_alt, jsmith_not_email) self.assertEqual(result, False)
def test_filter_identities(self): """Test if identities are filtered""" # Let's define some identities first jsmith = UniqueIdentity(uuid='jsmith') jsmith.identities = [ Identity(name='John Smith', email='*****@*****.**', source='scm', uuid='jsmith'), Identity(name='John Smith', source='scm', uuid='jsmith'), Identity(username='******', source='github', uuid='jsmith'), Identity(email='', source='scm', uuid='jsmith') ] jrae = UniqueIdentity(uuid='jrae') jrae.identities = [ Identity(username='******', source='GitHub-API', uuid='jrae'), Identity(name='Jane Rae Doe', email='*****@*****.**', source='mls', uuid='jrae'), Identity(username='******', source='github', uuid='jrae'), Identity(email='*****@*****.**', source='scm', uuid='jrae') ] matcher = GitHubMatcher() result = matcher.filter(jsmith) self.assertEqual(len(result), 1) fid = result[0] self.assertIsInstance(fid, GitHubUsernameIdentity) self.assertEqual(fid.uuid, 'jsmith') self.assertEqual(fid.username, 'jsmith') self.assertEqual(fid.source, 'github') result = matcher.filter(jrae) self.assertEqual(len(result), 2) fid = result[0] self.assertIsInstance(fid, GitHubUsernameIdentity) self.assertEqual(fid.uuid, 'jrae') self.assertEqual(fid.username, 'janerae') self.assertEqual(fid.source, 'GitHub-API') fid = result[1] self.assertIsInstance(fid, GitHubUsernameIdentity) self.assertEqual(fid.uuid, 'jrae') self.assertEqual(fid.username, 'jrae') self.assertEqual(fid.source, 'github')
def test_match_with_blacklist(self): """Test match when there are entries in the blacklist""" jsmith = UniqueIdentity(uuid='jsmith') jsmith.identities = [Identity(name='John Smith', email='*****@*****.**', source='scm'), Identity(name='John Smith', source='scm'), Identity(username='******', source='scm'), Identity(email='', source='scm')] john_smith = UniqueIdentity(uuid='js') john_smith.identities = [Identity(name='John Smith JR', username='******', source='scm'), Identity(username='******', source='scm'), Identity(name='Smith. J', source='mls'), Identity(name='Smith. J', email='*****@*****.**', source='mls')] jsmith_alt = UniqueIdentity(uuid='J. Smith') jsmith_alt.identities = [Identity(name='John Smith JR', username='******', source='alt'), Identity(name='John Smith', username='******', source='alt'), Identity(email='', source='alt'), Identity(email='jsmith', source='alt')] # Tests bl = [MatchingBlacklist(excluded='John Smith'), MatchingBlacklist(excluded='*****@*****.**')] matcher = EmailNameMatcher(blacklist=bl) result = matcher.match(jsmith, john_smith) self.assertEqual(result, False) result = matcher.match(john_smith, jsmith) self.assertEqual(result, False) # John Smith is blacklisted, so no match result = matcher.match(jsmith, jsmith_alt) self.assertEqual(result, False) result = matcher.match(jsmith_alt, jsmith) self.assertEqual(result, False) result = matcher.match(john_smith, jsmith_alt) self.assertEqual(result, True) result = matcher.match(jsmith_alt, john_smith) self.assertEqual(result, True)
def test_filter_identities_with_blacklist_not_strict(self): """Test if identities are filtered when there is a blacklist and strict mode is False""" # Let's define some identities first jsmith = UniqueIdentity(uuid='jsmith') jsmith.identities = [ Identity(name='John Smith', email='jsmith_at_example.com', source='scm', uuid='jsmith'), Identity(name='John Smith', source='scm', uuid='jsmith'), Identity(username='******', source='scm', uuid='jsmith'), Identity(email='', source='scm', uuid='jsmith') ] jrae = UniqueIdentity(uuid='jrae') jrae.identities = [ Identity(name='Jane Rae', source='scm', uuid='jrae'), Identity(name='Jane Rae Doe', email='jane.rae_at_example.net', source='mls', uuid='jrae'), Identity(name='jrae', source='scm', uuid='jrae'), Identity(email='JRAE_at_example.net', source='scm', uuid='jrae') ] bl = [MatchingBlacklist(excluded='jrae_at_example.net')] matcher = EmailMatcher(blacklist=bl, strict=False) result = matcher.filter(jsmith) self.assertEqual(len(result), 1) fid = result[0] self.assertIsInstance(fid, EmailIdentity) self.assertEqual(fid.uuid, 'jsmith') self.assertEqual(fid.email, 'jsmith_at_example.com') result = matcher.filter(jrae) self.assertEqual(len(result), 1) fid = result[0] self.assertIsInstance(fid, EmailIdentity) self.assertEqual(fid.uuid, 'jrae') self.assertEqual(fid.email, 'jane.rae_at_example.net')
def test_filter_identities_no_strict(self): """Test if identities are filtered in no strict mode""" # Let's define some identities first jsmith = UniqueIdentity(uuid='jsmith') jsmith.identities = [Identity(name='John Smith', email='*****@*****.**', source='scm', uuid='jsmith'), Identity(name='John Smith', source='scm', uuid='jsmith'), Identity(username='******', source='scm', uuid='jsmith'), Identity(email='jsmith@test', uuid='jsmith'), Identity(email='', source='scm', uuid='jsmith')] jrae = UniqueIdentity(uuid='jrae') jrae.identities = [Identity(name='Jane Rae', source='scm', uuid='jrae'), Identity(name='Jane Rae Doe', email='*****@*****.**', source='mls', uuid='jrae'), Identity(name='jrae', source='scm', uuid='jrae'), Identity(email='*****@*****.**', source='scm', uuid='jrae')] matcher = EmailMatcher(strict=False) result = matcher.filter(jsmith) self.assertEqual(len(result), 2) fid = result[0] self.assertIsInstance(fid, EmailIdentity) self.assertEqual(fid.uuid, 'jsmith') self.assertEqual(fid.email, '*****@*****.**') fid = result[1] self.assertIsInstance(fid, EmailIdentity) self.assertEqual(fid.uuid, 'jsmith') self.assertEqual(fid.email, 'jsmith@test') result = matcher.filter(jrae) self.assertEqual(len(result), 2) fid = result[0] self.assertIsInstance(fid, EmailIdentity) self.assertEqual(fid.uuid, 'jrae') self.assertEqual(fid.email, '*****@*****.**') fid = result[1] self.assertIsInstance(fid, EmailIdentity) self.assertEqual(fid.uuid, 'jrae') self.assertEqual(fid.email, '*****@*****.**')
def test_match(self): """Test match method""" # Let's define some identities first jsmith = UniqueIdentity(uuid='jsmith') jsmith.identities = [Identity(name='John Smith', email='*****@*****.**', source='scm'), Identity(name='John Smith', source='scm'), Identity(username='******', source='scm'), Identity(email='', source='scm')] john_smith = UniqueIdentity(uuid='js') john_smith.identities = [Identity(name='J. Smith', username='******', source='scm'), Identity(username='******', source='scm'), Identity(name='Smith. J', source='mls'), Identity(name='Smith. J', email='*****@*****.**', source='mls')] jsmith_alt = UniqueIdentity(uuid='J. Smith') jsmith_alt.identities = [Identity(name='J. Smith', username='******', source='alt'), Identity(name='John Smith', username='******', source='alt'), Identity(email='', source='alt'), Identity(email='jsmith', source='alt')] # Tests matcher = UsernameMatcher() # First two unique does not produce any match result = matcher.match(jsmith, john_smith) self.assertEqual(result, False) result = matcher.match(john_smith, jsmith) self.assertEqual(result, False) # Comparing the third match with the second result = matcher.match(jsmith, jsmith_alt) self.assertEqual(result, True) result = matcher.match(jsmith_alt, jsmith) self.assertEqual(result, True) result = matcher.match(john_smith, jsmith_alt) self.assertEqual(result, True) result = matcher.match(jsmith_alt, john_smith) self.assertEqual(result, True)
def test_match(self): """Test match method""" # Let's define some identities first jsmith = UniqueIdentity(uuid='jsmith') jsmith.identities = [Identity(name='John Smith', email='*****@*****.**', source='scm'), Identity(name='John Smith', source='scm'), Identity(username='******', source='scm'), Identity(email='', source='scm')] john_smith = UniqueIdentity(uuid='js') john_smith.identities = [Identity(name='J. Smith', username='******', source='scm'), Identity(username='******', source='scm'), Identity(name='Smith. J', source='mls'), Identity(name='Smith. J', email='*****@*****.**', source='mls')] jsmith_alt = UniqueIdentity(uuid='J. Smith') jsmith_alt.identities = [Identity(name='J. Smith', username='******', source='alt'), Identity(name='John Smith', username='******', source='alt'), Identity(email='', source='alt'), Identity(email='jsmith', source='alt')] jsmith_not_email = UniqueIdentity(uuid='John Smith') jsmith_not_email.identities = [Identity(email='jsmith', source='mls')] jrae = UniqueIdentity(uuid='jrae') jrae.identities = [Identity(name='Jane Rae', source='scm'), Identity(name='Jane Rae Doe', email='*****@*****.**', source='mls')] jrae_doe = UniqueIdentity(uuid='jraedoe') jrae_doe.identities = [Identity(name='Jane Rae Doe', email='*****@*****.**', source='mls'), Identity(name='jrae', source='scm')] jrae_no_name = UniqueIdentity(uuid='Jane Rae') jrae_no_name.identities = [Identity(name='jrae', source='scm')] # Tests matcher = EmailNameMatcher() # First two unique identities must match result = matcher.match(jsmith, john_smith) self.assertEqual(result, True) result = matcher.match(john_smith, jsmith) self.assertEqual(result, True) # Comparing with the third only the first one # produces a match because of "John Smith" name result = matcher.match(jsmith, jsmith_alt) self.assertEqual(result, True) result = matcher.match(jsmith_alt, jsmith) self.assertEqual(result, True) result = matcher.match(john_smith, jsmith_alt) self.assertEqual(result, False) result = matcher.match(jsmith_alt, john_smith) self.assertEqual(result, False) # Jane Rae matches Jane Rae Doe because they share # the same name "Jane Rae Doe" result = matcher.match(jrae, jrae_doe) self.assertEqual(result, True) result = matcher.match(jrae, jrae_doe) self.assertEqual(result, True) # No match with Jane Rae result = matcher.match(jsmith, jrae) self.assertEqual(result, False) result = matcher.match(jsmith, jrae_doe) self.assertEqual(result, False) result = matcher.match(john_smith, jrae) self.assertEqual(result, False) result = matcher.match(john_smith, jrae_doe) self.assertEqual(result, False) result = matcher.match(jsmith_alt, jrae) self.assertEqual(result, False) result = matcher.match(jsmith_alt, jrae_doe) self.assertEqual(result, False) # This two unique identities have the same email address # but due to 'jsmith' is not a valid email address, they # do not match result = matcher.match(jsmith_alt, jsmith_not_email) self.assertEqual(result, False) # This two do not match although they share the same name. # In this case the name is invalid because is not formed # like "firstname lastname" result = matcher.match(jrae_doe, jrae_no_name) self.assertEqual(result, False)
def test_match(self): """Test match method""" # Let's define some identities first jsmith = UniqueIdentity(uuid='jsmith') jsmith.identities = [ Identity(name='John Smith', email='*****@*****.**', source='scm'), Identity(name='John Smith', source='scm'), Identity(username='******', source='scm'), Identity(email='', source='scm') ] john_smith = UniqueIdentity(uuid='js') john_smith.identities = [ Identity(name='J. Smith', username='******', source='scm'), Identity(username='******', source='scm'), Identity(name='Smith. J', source='mls'), Identity(name='Smith. J', email='*****@*****.**', source='mls') ] jsmith_alt = UniqueIdentity(uuid='J. Smith') jsmith_alt.identities = [ Identity(name='J. Smith', username='******', source='alt'), Identity(name='John Smith', username='******', source='alt'), Identity(email='', source='alt'), Identity(email='jsmith', source='alt') ] jsmith_not_email = UniqueIdentity(uuid='John Smith') jsmith_not_email.identities = [Identity(email='jsmith', source='mls')] jrae = UniqueIdentity(uuid='jrae') jrae.identities = [ Identity(name='Jane Rae', source='scm'), Identity(name='Jane Rae Doe', email='*****@*****.**', source='mls') ] jrae_doe = UniqueIdentity(uuid='jraedoe') jrae_doe.identities = [ Identity(name='Jane Rae Doe', email='*****@*****.**', source='mls'), Identity(name='jrae', source='scm') ] jrae_no_name = UniqueIdentity(uuid='Jane Rae') jrae_no_name.identities = [Identity(name='jrae', source='scm')] # Tests matcher = EmailNameMatcher() # First two unique identities must match result = matcher.match(jsmith, john_smith) self.assertEqual(result, True) result = matcher.match(john_smith, jsmith) self.assertEqual(result, True) # Comparing with the third only the first one # produces a match because of "John Smith" name result = matcher.match(jsmith, jsmith_alt) self.assertEqual(result, True) result = matcher.match(jsmith_alt, jsmith) self.assertEqual(result, True) result = matcher.match(john_smith, jsmith_alt) self.assertEqual(result, False) result = matcher.match(jsmith_alt, john_smith) self.assertEqual(result, False) # Jane Rae matches Jane Rae Doe because they share # the same name "Jane Rae Doe" result = matcher.match(jrae, jrae_doe) self.assertEqual(result, True) result = matcher.match(jrae, jrae_doe) self.assertEqual(result, True) # No match with Jane Rae result = matcher.match(jsmith, jrae) self.assertEqual(result, False) result = matcher.match(jsmith, jrae_doe) self.assertEqual(result, False) result = matcher.match(john_smith, jrae) self.assertEqual(result, False) result = matcher.match(john_smith, jrae_doe) self.assertEqual(result, False) result = matcher.match(jsmith_alt, jrae) self.assertEqual(result, False) result = matcher.match(jsmith_alt, jrae_doe) self.assertEqual(result, False) # This two unique identities have the same email address # but due to 'jsmith' is not a valid email address, they # do not match result = matcher.match(jsmith_alt, jsmith_not_email) self.assertEqual(result, False) # This two do not match although they share the same name. # In this case the name is invalid because is not formed # like "firstname lastname" result = matcher.match(jrae_doe, jrae_no_name) self.assertEqual(result, False)
def test_match_with_blacklist(self): """Test match when there are entries in the blacklist""" # Let's define some identities first jsmith = UniqueIdentity(uuid='jsmith') jsmith.identities = [ Identity(name='John Smith', username='******', source='scm'), Identity(name='John Smith', source='scm'), Identity(username='******', source='scm'), Identity(email='', source='scm') ] john_smith = UniqueIdentity(uuid='js') john_smith.identities = [ Identity(name='J. Smith', email='*****@*****.**', source='scm'), Identity(username='******', source='scm'), Identity(name='Smith. J', source='mls'), Identity(name='Smith. J', username='******', source='mls') ] jrae = UniqueIdentity(uuid='jrae') jrae.identities = [ Identity(name='Jane Rae', source='scm', uuid='jrae'), Identity(name='Jane Rae Doe', username='******', email='*****@*****.**', source='mls', uuid='jrae'), Identity(name='jrae', source='scm', uuid='jrae'), Identity(email='*****@*****.**', source='scm', uuid='jrae') ] jane_rae = UniqueIdentity(uuid='Jane Rae') jane_rae.identities = [ Identity(name='Jane Rae', source='scm', uuid='Jane Rae'), Identity(email='*****@*****.**', username='******', source='mls', uuid='Jane Rae') ] # Check matching matcher = UsernameMatcher() # First two unique identities must match result = matcher.match(jsmith, john_smith) self.assertEqual(result, True) result = matcher.match(john_smith, jsmith) self.assertEqual(result, True) result = matcher.match(jrae, jane_rae) self.assertEqual(result, True) result = matcher.match(jane_rae, jrae) self.assertEqual(result, True) # Add a blacklist bl = [MatchingBlacklist(excluded='john_smith')] matcher = UsernameMatcher(blacklist=bl) result = matcher.match(jsmith, john_smith) self.assertEqual(result, False) result = matcher.match(john_smith, jsmith) self.assertEqual(result, False) result = matcher.match(jrae, jane_rae) self.assertEqual(result, True) result = matcher.match(jane_rae, jrae) self.assertEqual(result, True) # In this case, no match will be found bl = [ MatchingBlacklist(excluded='John_Smith'), MatchingBlacklist(excluded='Jane.rae') ] matcher = UsernameMatcher(blacklist=bl) result = matcher.match(jsmith, john_smith) self.assertEqual(result, False) result = matcher.match(john_smith, jsmith) self.assertEqual(result, False) result = matcher.match(jrae, jane_rae) self.assertEqual(result, False) result = matcher.match(jane_rae, jrae) self.assertEqual(result, False)
def test_match_with_blacklist(self): """Test match when there are entries in the blacklist""" # Let's define some identities first jsmith = UniqueIdentity(uuid='jsmith') jsmith.identities = [Identity(name='John Smith', email='*****@*****.**', source='scm'), Identity(name='John Smith', source='scm'), Identity(username='******', source='scm'), Identity(email='', source='scm')] john_smith = UniqueIdentity(uuid='js') john_smith.identities = [Identity(name='J. Smith', username='******', source='scm'), Identity(username='******', source='scm'), Identity(name='Smith. J', source='mls'), Identity(name='Smith. J', email='*****@*****.**', source='mls')] jrae = UniqueIdentity(uuid='jrae') jrae.identities = [Identity(name='Jane Rae', source='scm', uuid='jrae'), Identity(name='Jane Rae Doe', email='*****@*****.**', source='mls', uuid='jrae'), Identity(name='jrae', source='scm', uuid='jrae'), Identity(email='*****@*****.**', source='scm', uuid='jrae')] jane_rae = UniqueIdentity(uuid='Jane Rae') jane_rae.identities = [Identity(name='Jane Rae', source='scm', uuid='Jane Rae'), Identity(email='*****@*****.**', source='mls', uuid='Jane Rae')] # Check matching matcher = EmailMatcher() # First two unique identities must match result = matcher.match(jsmith, john_smith) self.assertEqual(result, True) result = matcher.match(john_smith, jsmith) self.assertEqual(result, True) result = matcher.match(jrae, jane_rae) self.assertEqual(result, True) result = matcher.match(jane_rae, jrae) self.assertEqual(result, True) # Add a blacklist bl = [MatchingBlacklist(excluded='*****@*****.**'), MatchingBlacklist(excluded='*****@*****.**')] matcher = EmailMatcher(blacklist=bl) result = matcher.match(jsmith, john_smith) self.assertEqual(result, False) result = matcher.match(john_smith, jsmith) self.assertEqual(result, False) result = matcher.match(jrae, jane_rae) self.assertEqual(result, True) result = matcher.match(jane_rae, jrae) self.assertEqual(result, True) # In this case, no match will be found bl = [MatchingBlacklist(excluded='*****@*****.**'), MatchingBlacklist(excluded='*****@*****.**'), MatchingBlacklist(excluded='*****@*****.**')] matcher = EmailMatcher(blacklist=bl) result = matcher.match(jsmith, john_smith) self.assertEqual(result, False) result = matcher.match(john_smith, jsmith) self.assertEqual(result, False) result = matcher.match(jrae, jane_rae) self.assertEqual(result, False) result = matcher.match(jane_rae, jrae) self.assertEqual(result, False)