def test_not_null_source(self): """Check whether every identity has a source""" if sys.version_info[0] >= 3: # Python 3 expected = IntegrityError else: # Python 2 expected = OperationalError with self.assertRaisesRegexp(expected, NULL_CHECK_ERROR): id1 = Identity() self.session.add(id1) self.session.commit()
def test_unique_identities(self): """Check if there is only one tuple with the same values""" id1 = Identity(id='A', name='John Smith', email='*****@*****.**', username='******', source='scm') id2 = Identity(id='B', name='John Smith', email='*****@*****.**', username='******', source='scm') with self.assertRaisesRegexp(IntegrityError, DUP_CHECK_ERROR): self.session.add(id1) self.session.add(id2) self.session.commit() self.session.rollback() # Changing an property should not raise any error id2.source = 'mls' self.session.add(id1) self.session.add(id2) self.session.commit() self.assertNotEqual(id1.id, id2.id)
def test_filter_identities_no_strict(self): """Test if identities are filtered in no strict mode""" # Let's define some identities first jsmith = UniqueIdentity(uuid='jsmith') jsmith.identities = [Identity(name='John Smith', email='*****@*****.**', source='scm', uuid='jsmith'), Identity(name='John Smith', source='scm', uuid='jsmith'), Identity(username='******', source='scm', uuid='jsmith'), Identity(email='jsmith@test', uuid='jsmith'), Identity(email='', source='scm', uuid='jsmith')] jrae = UniqueIdentity(uuid='jrae') jrae.identities = [Identity(name='Jane Rae', source='scm', uuid='jrae'), Identity(name='Jane Rae Doe', email='*****@*****.**', source='mls', uuid='jrae'), Identity(name='jrae', source='scm', uuid='jrae'), Identity(email='*****@*****.**', source='scm', uuid='jrae')] matcher = EmailMatcher(strict=False) result = matcher.filter(jsmith) self.assertEqual(len(result), 2) fid = result[0] self.assertIsInstance(fid, EmailIdentity) self.assertEqual(fid.uuid, 'jsmith') self.assertEqual(fid.email, '*****@*****.**') fid = result[1] self.assertIsInstance(fid, EmailIdentity) self.assertEqual(fid.uuid, 'jsmith') self.assertEqual(fid.email, 'jsmith@test') result = matcher.filter(jrae) self.assertEqual(len(result), 2) fid = result[0] self.assertIsInstance(fid, EmailIdentity) self.assertEqual(fid.uuid, 'jrae') self.assertEqual(fid.email, '*****@*****.**') fid = result[1] self.assertIsInstance(fid, EmailIdentity) self.assertEqual(fid.uuid, 'jrae') self.assertEqual(fid.email, '*****@*****.**')
def test_to_dict(self): """Test output of to_dict() method""" uid = UniqueIdentity(uuid='John Smith') self.session.add(uid) id1 = Identity(id='A', name='John Smith', email='*****@*****.**', username='******', source='scm', uuid='John Smith') self.session.add(id1) self.session.commit() # Tests d = id1.to_dict() self.assertIsInstance(d, dict) self.assertEqual(d['id'], 'A') self.assertEqual(d['name'], 'John Smith') self.assertEqual(d['email'], '*****@*****.**') self.assertEqual(d['username'], 'jsmith') self.assertEqual(d['source'], 'scm') self.assertEqual(d['uuid'], 'John Smith')
def test_unique_identities(self): """Check if there is only one tuple with the same values""" id1 = Identity(id='A', name='John Smith', email='*****@*****.**', username='******', source='scm') id2 = Identity(id='B', name='John Smith', email='*****@*****.**', username='******', source='scm') with self.assertRaisesRegex(IntegrityError, DUP_CHECK_ERROR): self.session.add(id1) self.session.add(id2) self.session.commit() self.session.rollback() # Changing an property should not raise any error id2.source = 'mls' self.session.add(id1) self.session.add(id2) self.session.commit() self.assertNotEqual(id1.id, id2.id)
def test_filter_identities(self): """Test if identities are filtered""" # Let's define some identities first jsmith = UniqueIdentity(uuid='jsmith') jsmith.identities = [ Identity(name='John Smith', username='******', source='scm', uuid='jsmith'), Identity(name='John Smith', source='scm', uuid='jsmith'), Identity(email='*****@*****.**', source='scm', uuid='jsmith'), Identity(username='', source='scm', uuid='jsmith') ] jrae = UniqueIdentity(uuid='jrae') jrae.identities = [ Identity(name='Jane Rae', source='scm', uuid='jrae'), Identity(name='Jane Rae Doe', username='******', source='mls', uuid='jrae'), Identity(name='jrae', source='scm', uuid='jrae'), Identity(username='******', source='scm', uuid='jrae') ] matcher = UsernameMatcher() result = matcher.filter(jsmith) self.assertEqual(len(result), 1) fid = result[0] self.assertIsInstance(fid, UsernameIdentity) self.assertEqual(fid.uuid, 'jsmith') self.assertEqual(fid.username, 'jsmith') result = matcher.filter(jrae) self.assertEqual(len(result), 2) fid = result[0] self.assertIsInstance(fid, UsernameIdentity) self.assertEqual(fid.uuid, 'jrae') self.assertEqual(fid.username, 'jane.rae') fid = result[1] self.assertIsInstance(fid, UsernameIdentity) self.assertEqual(fid.uuid, 'jrae') self.assertEqual(fid.username, 'jrae')
def test_filter_identities_with_blacklist(self): """Test if identities are filtered when there is a blacklist""" # Let's define some identities first jsmith = UniqueIdentity(uuid='jsmith') jsmith.identities = [ Identity(name='John Smith', email='*****@*****.**', source='scm', uuid='jsmith'), Identity(name='John Smith', source='scm', uuid='jsmith'), Identity(username='******', source='github', uuid='jsmith'), Identity(email='', source='scm', uuid='jsmith') ] jrae = UniqueIdentity(uuid='jrae') jrae.identities = [ Identity(username='******', source='GitHub-API', uuid='jrae'), Identity(name='Jane Rae Doe', email='*****@*****.**', source='mls', uuid='jrae'), Identity(username='******', source='github', uuid='jrae'), Identity(email='*****@*****.**', source='scm', uuid='jrae') ] bl = [MatchingBlacklist(excluded='jrae')] matcher = GitHubMatcher(blacklist=bl) result = matcher.filter(jsmith) self.assertEqual(len(result), 1) fid = result[0] self.assertIsInstance(fid, GitHubUsernameIdentity) self.assertEqual(fid.uuid, 'jsmith') self.assertEqual(fid.username, 'jsmith') self.assertEqual(fid.source, 'github') result = matcher.filter(jrae) self.assertEqual(len(result), 1) fid = result[0] self.assertIsInstance(fid, GitHubUsernameIdentity) self.assertEqual(fid.uuid, 'jrae') self.assertEqual(fid.username, 'janerae') self.assertEqual(fid.source, 'GitHub-API')
def test_filter_identities_with_blacklist_not_strict(self): """Test if identities are filtered when there is a blacklist and strict mode is False""" # Let's define some identities first jsmith = UniqueIdentity(uuid='jsmith') jsmith.identities = [ Identity(name='John Smith', email='jsmith_at_example.com', source='scm', uuid='jsmith'), Identity(name='John Smith', source='scm', uuid='jsmith'), Identity(username='******', source='scm', uuid='jsmith'), Identity(email='', source='scm', uuid='jsmith') ] jrae = UniqueIdentity(uuid='jrae') jrae.identities = [ Identity(name='Jane Rae', source='scm', uuid='jrae'), Identity(name='Jane Rae Doe', email='jane.rae_at_example.net', source='mls', uuid='jrae'), Identity(name='jrae', source='scm', uuid='jrae'), Identity(email='JRAE_at_example.net', source='scm', uuid='jrae') ] bl = [MatchingBlacklist(excluded='jrae_at_example.net')] matcher = EmailMatcher(blacklist=bl, strict=False) result = matcher.filter(jsmith) self.assertEqual(len(result), 1) fid = result[0] self.assertIsInstance(fid, EmailIdentity) self.assertEqual(fid.uuid, 'jsmith') self.assertEqual(fid.email, 'jsmith_at_example.com') result = matcher.filter(jrae) self.assertEqual(len(result), 1) fid = result[0] self.assertIsInstance(fid, EmailIdentity) self.assertEqual(fid.uuid, 'jrae') self.assertEqual(fid.email, 'jane.rae_at_example.net')
def test_filter_identities_with_sources_list(self): """Test if identities are filtered when there is a sources list""" # Let's define some identities first jsmith = UniqueIdentity(uuid='jsmith') jsmith.identities = [ Identity(name='John Smith', email='*****@*****.**', source='scm', uuid='jsmith'), Identity(name='John Smith', source='scm', uuid='jsmith'), Identity(name='John Smith JR', source='scm', uuid='jsmith'), Identity(username='******', source='mls', uuid='jsmith'), Identity(email='', source='scm', uuid='jsmith') ] jrae = UniqueIdentity(uuid='jrae') jrae.identities = [ Identity(name='Jane Rae', source='scm', uuid='jrae'), Identity(name='Jane Rae Doe', email='*****@*****.**', source='mls', uuid='jrae'), Identity(name='jrae', source='scm', uuid='jrae'), Identity(email='*****@*****.**', source='scm', uuid='jrae') ] # Tests matcher = EmailMatcher(sources=['mls', 'alt']) result = matcher.filter(jsmith) self.assertEqual(len(result), 0) result = matcher.filter(jrae) self.assertEqual(len(result), 1) fid = result[0] self.assertIsInstance(fid, EmailIdentity) self.assertEqual(fid.uuid, 'jrae') self.assertEqual(fid.email, '*****@*****.**')
def test_match_with_sources_list(self): """Test match when a list of sources to filter is given""" jsmith = UniqueIdentity(uuid='jsmith') jsmith.identities = [ Identity(name='John Smith', email='*****@*****.**', source='scm'), Identity(name='John Smith', source='scm'), Identity(username='******', source='scm'), Identity(email='', source='scm') ] jsmith_alt = UniqueIdentity(uuid='J. Smith') jsmith_alt.identities = [ Identity(name='John Smith JR', email='*****@*****.**', source='alt'), Identity(name='John Smith', username='******', source='alt'), Identity(email='', source='alt'), Identity(email='jsmith', source='alt') ] # With these lists there are not matches matcher = UsernameMatcher(sources=['github']) result = matcher.match(jsmith, jsmith_alt) self.assertEqual(result, False) matcher = UsernameMatcher(sources=['scm']) result = matcher.match(jsmith, jsmith_alt) self.assertEqual(result, False) # Only when scm and alt are combined there is a match matcher = UsernameMatcher(sources=['scm', 'alt']) result = matcher.match(jsmith, jsmith_alt) self.assertEqual(result, True)
def test_to_dict(self): """Test output of to_dict() method""" c = Country(code='US', name='United States of America', alpha3='USA') self.session.add(c) uid = UniqueIdentity(uuid='John Smith') self.session.add(uid) id1 = Identity(id='A', name='John Smith', email='*****@*****.**', username='******', source='scm', uuid='John Smith') id2 = Identity(id='B', name=None, email='*****@*****.**', username=None, source='scm', uuid='John Smith') self.session.add(id1) self.session.add(id2) self.session.commit() # Tests d = uid.to_dict() self.assertIsInstance(d, dict) self.assertEqual(d['uuid'], 'John Smith') self.assertEqual(d['profile'], None) identities = d['identities'] self.assertEqual(len(identities), 2) d0 = d['identities'][0] self.assertEqual(d0['id'], 'A') self.assertEqual(d0['name'], 'John Smith') self.assertEqual(d0['email'], '*****@*****.**') self.assertEqual(d0['username'], 'jsmith') self.assertEqual(d0['source'], 'scm') self.assertEqual(d0['uuid'], 'John Smith') d1 = d['identities'][1] self.assertEqual(d1['id'], 'B') self.assertEqual(d1['name'], None) self.assertEqual(d1['email'], '*****@*****.**') self.assertEqual(d1['username'], None) self.assertEqual(d1['source'], 'scm') self.assertEqual(d1['uuid'], 'John Smith') prf = Profile(uuid='John Smith', name='Smith, J.', email='*****@*****.**', is_bot=True, country_code='US') # Add profile information self.session.add(prf) self.session.commit() d = uid.to_dict() dp = d['profile'] self.assertEqual(dp['uuid'], 'John Smith') self.assertEqual(dp['name'], 'Smith, J.') self.assertEqual(dp['email'], '*****@*****.**') self.assertEqual(dp['is_bot'], True) self.assertEqual(dp['country']['code'], 'US') self.assertEqual(dp['country']['name'], 'United States of America')
def test_match_with_blacklist(self): """Test match when there are entries in the blacklist""" # Let's define some identities first jsmith = UniqueIdentity(uuid='jsmith') jsmith.identities = [ Identity(name='John Smith', username='******', source='scm'), Identity(name='John Smith', source='scm'), Identity(username='******', source='scm'), Identity(email='', source='scm') ] john_smith = UniqueIdentity(uuid='js') john_smith.identities = [ Identity(name='J. Smith', email='*****@*****.**', source='scm'), Identity(username='******', source='scm'), Identity(name='Smith. J', source='mls'), Identity(name='Smith. J', username='******', source='mls') ] jrae = UniqueIdentity(uuid='jrae') jrae.identities = [ Identity(name='Jane Rae', source='scm', uuid='jrae'), Identity(name='Jane Rae Doe', username='******', email='*****@*****.**', source='mls', uuid='jrae'), Identity(name='jrae', source='scm', uuid='jrae'), Identity(email='*****@*****.**', source='scm', uuid='jrae') ] jane_rae = UniqueIdentity(uuid='Jane Rae') jane_rae.identities = [ Identity(name='Jane Rae', source='scm', uuid='Jane Rae'), Identity(email='*****@*****.**', username='******', source='mls', uuid='Jane Rae') ] # Check matching matcher = UsernameMatcher() # First two unique identities must match result = matcher.match(jsmith, john_smith) self.assertEqual(result, True) result = matcher.match(john_smith, jsmith) self.assertEqual(result, True) result = matcher.match(jrae, jane_rae) self.assertEqual(result, True) result = matcher.match(jane_rae, jrae) self.assertEqual(result, True) # Add a blacklist bl = [MatchingBlacklist(excluded='john_smith')] matcher = UsernameMatcher(blacklist=bl) result = matcher.match(jsmith, john_smith) self.assertEqual(result, False) result = matcher.match(john_smith, jsmith) self.assertEqual(result, False) result = matcher.match(jrae, jane_rae) self.assertEqual(result, True) result = matcher.match(jane_rae, jrae) self.assertEqual(result, True) # In this case, no match will be found bl = [ MatchingBlacklist(excluded='John_Smith'), MatchingBlacklist(excluded='Jane.rae') ] matcher = UsernameMatcher(blacklist=bl) result = matcher.match(jsmith, john_smith) self.assertEqual(result, False) result = matcher.match(john_smith, jsmith) self.assertEqual(result, False) result = matcher.match(jrae, jane_rae) self.assertEqual(result, False) result = matcher.match(jane_rae, jrae) self.assertEqual(result, False)
def test_match(self): """Test match method""" # Let's define some identities first jsmith = UniqueIdentity(uuid='jsmith') jsmith.identities = [ Identity(name='John Smith', email='*****@*****.**', source='scm'), Identity(name='John Smith', source='scm'), Identity(username='******', source='scm'), Identity(email='', source='scm') ] john_smith = UniqueIdentity(uuid='js') john_smith.identities = [ Identity(name='J. Smith', username='******', source='scm'), Identity(username='******', source='scm'), Identity(name='Smith. J', source='mls'), Identity(name='Smith. J', email='*****@*****.**', source='mls') ] jsmith_alt = UniqueIdentity(uuid='J. Smith') jsmith_alt.identities = [ Identity(name='J. Smith', username='******', source='alt'), Identity(name='John Smith', username='******', source='alt'), Identity(email='', source='alt'), Identity(email='jsmith', source='alt') ] # Tests matcher = UsernameMatcher() # First two unique does not produce any match result = matcher.match(jsmith, john_smith) self.assertEqual(result, False) result = matcher.match(john_smith, jsmith) self.assertEqual(result, False) # Comparing the third match with the second result = matcher.match(jsmith, jsmith_alt) self.assertEqual(result, True) result = matcher.match(jsmith_alt, jsmith) self.assertEqual(result, True) result = matcher.match(john_smith, jsmith_alt) self.assertEqual(result, True) result = matcher.match(jsmith_alt, john_smith) self.assertEqual(result, True)
def setUp(self): # Add some unique identities self.john_smith = UniqueIdentity('John Smith') self.john_smith.identities = [ Identity(email='*****@*****.**', name='John Smith', source='scm', uuid='John Smith'), Identity(name='John Smith', source='scm', uuid='John Smith'), Identity(username='******', source='scm', uuid='John Smith') ] self.jsmith = UniqueIdentity('J. Smith') self.jsmith.identities = [ Identity(name='J. Smith', username='******', source='alt', uuid='J. Smith'), Identity(name='John Smith', username='******', source='alt', uuid='J. Smith'), Identity(email='jsmith', source='alt', uuid='J. Smith') ] self.jane_rae = UniqueIdentity('Jane Rae') self.jane_rae.identities = [ Identity(name='Janer Rae', source='mls', uuid='Jane Rae'), Identity(email='*****@*****.**', name='Jane Rae Doe', source='mls', uuid='Jane Rae') ] self.js_alt = UniqueIdentity('john_smith') self.js_alt.identities = [ Identity(name='J. Smith', username='******', source='scm', uuid='john_smith'), Identity(username='******', source='mls', uuid='john_smith'), Identity(username='******', source='mls', uuid='john_smith'), Identity(email='*****@*****.**', name='Smith. J', source='mls', uuid='john_smith') ] self.jrae = UniqueIdentity('jrae') self.jrae.identities = [ Identity(email='*****@*****.**', name='Jane Rae Doe', source='mls', uuid='jrae'), Identity(name='jrae', source='mls', uuid='jrae'), Identity(name='jrae', source='scm', uuid='jrae') ]
def test_match(self): """Test match method""" # Let's define some identities first jsmith = UniqueIdentity(uuid='jsmith') jsmith.identities = [ Identity(name='John Smith', email='*****@*****.**', source='scm'), Identity(name='John Smith', source='scm'), Identity(username='******', source='scm'), Identity(email='', source='scm') ] john_smith = UniqueIdentity(uuid='js') john_smith.identities = [ Identity(name='J. Smith', username='******', source='scm'), Identity(username='******', source='scm'), Identity(name='Smith. J', source='mls'), Identity(name='Smith. J', email='*****@*****.**', source='mls') ] jsmith_alt = UniqueIdentity(uuid='J. Smith') jsmith_alt.identities = [ Identity(name='J. Smith', username='******', source='alt'), Identity(name='John Smith', username='******', source='alt'), Identity(email='', source='alt'), Identity(email='jsmith', source='alt') ] jsmith_not_email = UniqueIdentity(uuid='John Smith') jsmith_not_email.identities = [Identity(email='jsmith', source='mls')] jrae = UniqueIdentity(uuid='jrae') jrae.identities = [ Identity(name='Jane Rae', source='scm'), Identity(name='Jane Rae Doe', email='*****@*****.**', source='mls') ] jrae_doe = UniqueIdentity(uuid='jraedoe') jrae_doe.identities = [ Identity(name='Jane Rae Doe', email='*****@*****.**', source='mls'), Identity(name='jrae', source='scm') ] jrae_no_name = UniqueIdentity(uuid='Jane Rae') jrae_no_name.identities = [Identity(name='jrae', source='scm')] # Tests matcher = EmailNameMatcher() # First two unique identities must match result = matcher.match(jsmith, john_smith) self.assertEqual(result, True) result = matcher.match(john_smith, jsmith) self.assertEqual(result, True) # Comparing with the third only the first one # produces a match because of "John Smith" name result = matcher.match(jsmith, jsmith_alt) self.assertEqual(result, True) result = matcher.match(jsmith_alt, jsmith) self.assertEqual(result, True) result = matcher.match(john_smith, jsmith_alt) self.assertEqual(result, False) result = matcher.match(jsmith_alt, john_smith) self.assertEqual(result, False) # Jane Rae matches Jane Rae Doe because they share # the same name "Jane Rae Doe" result = matcher.match(jrae, jrae_doe) self.assertEqual(result, True) result = matcher.match(jrae, jrae_doe) self.assertEqual(result, True) # No match with Jane Rae result = matcher.match(jsmith, jrae) self.assertEqual(result, False) result = matcher.match(jsmith, jrae_doe) self.assertEqual(result, False) result = matcher.match(john_smith, jrae) self.assertEqual(result, False) result = matcher.match(john_smith, jrae_doe) self.assertEqual(result, False) result = matcher.match(jsmith_alt, jrae) self.assertEqual(result, False) result = matcher.match(jsmith_alt, jrae_doe) self.assertEqual(result, False) # This two unique identities have the same email address # but due to 'jsmith' is not a valid email address, they # do not match result = matcher.match(jsmith_alt, jsmith_not_email) self.assertEqual(result, False) # This two do not match although they share the same name. # In this case the name is invalid because is not formed # like "firstname lastname" result = matcher.match(jrae_doe, jrae_no_name) self.assertEqual(result, False)
def test_match_with_blacklist(self): """Test match when there are entries in the blacklist""" jsmith = UniqueIdentity(uuid='jsmith') jsmith.identities = [ Identity(name='John Smith', email='*****@*****.**', source='scm'), Identity(name='John Smith', source='scm'), Identity(username='******', source='scm'), Identity(email='', source='scm') ] john_smith = UniqueIdentity(uuid='js') john_smith.identities = [ Identity(name='John Smith JR', username='******', source='scm'), Identity(username='******', source='scm'), Identity(name='Smith. J', source='mls'), Identity(name='Smith. J', email='*****@*****.**', source='mls') ] jsmith_alt = UniqueIdentity(uuid='J. Smith') jsmith_alt.identities = [ Identity(name='John Smith JR', username='******', source='alt'), Identity(name='John Smith', username='******', source='alt'), Identity(email='', source='alt'), Identity(email='jsmith', source='alt') ] # Tests bl = [ MatchingBlacklist(excluded='John Smith'), MatchingBlacklist(excluded='*****@*****.**') ] matcher = EmailNameMatcher(blacklist=bl) result = matcher.match(jsmith, john_smith) self.assertEqual(result, False) result = matcher.match(john_smith, jsmith) self.assertEqual(result, False) # John Smith is blacklisted, so no match result = matcher.match(jsmith, jsmith_alt) self.assertEqual(result, False) result = matcher.match(jsmith_alt, jsmith) self.assertEqual(result, False) result = matcher.match(john_smith, jsmith_alt) self.assertEqual(result, True) result = matcher.match(jsmith_alt, john_smith) self.assertEqual(result, True)
def test_match(self): """Test match method""" # Let's define some identities first jsmith = UniqueIdentity(uuid='jsmith') jsmith.identities = [ Identity(name='John Smith', email='*****@*****.**', source='scm'), Identity(name='John Smith', source='scm'), Identity(username='******', source='scm'), Identity(email='', source='scm') ] john_smith = UniqueIdentity(uuid='js') john_smith.identities = [ Identity(name='J. Smith', username='******', source='scm'), Identity(username='******', source='scm'), Identity(name='Smith. J', source='mls'), Identity(name='Smith. J', email='*****@*****.**', source='mls') ] jsmith_alt = UniqueIdentity(uuid='J. Smith') jsmith_alt.identities = [ Identity(name='J. Smith', username='******', source='alt'), Identity(name='John Smith', username='******', source='alt'), Identity(email='', source='alt'), Identity(email='jsmith', source='alt') ] jsmith_not_email = UniqueIdentity(uuid='John Smith') jsmith_not_email.identities = [Identity(email='jsmith', source='mls')] # Tests matcher = EmailMatcher() # First two unique identities must match result = matcher.match(jsmith, john_smith) self.assertEqual(result, True) result = matcher.match(john_smith, jsmith) self.assertEqual(result, True) # Comparing with the third does not produce any match result = matcher.match(jsmith, jsmith_alt) self.assertEqual(result, False) result = matcher.match(jsmith_alt, jsmith) self.assertEqual(result, False) result = matcher.match(john_smith, jsmith_alt) self.assertEqual(result, False) result = matcher.match(jsmith_alt, john_smith) self.assertEqual(result, False) # This two unique identities have the same email address # but due to 'jsmith' is not a valid email address, they # do not match result = matcher.match(jsmith_alt, jsmith_not_email) self.assertEqual(result, False)