def test_not_valid_organizations_stream(self): """Check whether it raises an error when parsing invalid streams""" with self.assertRaisesRegexp(InvalidFormatError, DOMAINS_INVALID_FORMAT_ERROR % {'line' : '10'}): stream = self.read_file('data/gitdm_orgs_invalid_comments.txt') GitdmParser(domain_to_employer=stream) with self.assertRaisesRegexp(InvalidFormatError, DOMAINS_INVALID_FORMAT_ERROR % {'line' : '8'}): stream = self.read_file('data/gitdm_orgs_invalid_entries.txt') GitdmParser(domain_to_employer=stream)
def test_supress_email_validation(self): email_to_employer = self.read_file( datadir('gitdm_email_to_employer_invalid.txt')) parser = GitdmParser(email_to_employer=email_to_employer, source='unknown', email_validation=False) uids = parser.identities self.assertEqual(len(uids), 5) expected_emails = [ 'jsmith.example.com', 'jdoe$example.com', 'jsmith!example.com', 'jrae-example-net', 'john_doeexample' ] for uid in uids: id = uid.identities[0] self.assertIsInstance(uid, UniqueIdentity) self.assertIsInstance(id, Identity) self.assertIn(uid.uuid, expected_emails) self.assertEqual(uid.uuid, id.email) self.assertEqual(id.name, None) self.assertEqual(id.username, None) self.assertEqual(id.source, 'unknown') self.assertEqual(id.uuid, None)
def test_email_validation(self): aliases = self.read_file(datadir('gitdm_email_aliases_valid.txt')) email_to_employer = self.read_file( datadir('gitdm_email_to_employer_invalid.txt')) expected_log = [ "Skip: 'jsmith.example.com Example Company # John Smith' -> line 5: invalid email format: 'jsmith.example.com'", "Skip: 'jdoe$example.com Example Company # John Doe' -> line 6: invalid email format: 'jdoe$example.com'", "Skip: 'jsmith!example.com Bitergia < 2015-01-01 # John Smith - Bitergia' -> line 7: invalid email format: 'jsmith!example.com'", "Skip: 'jrae-example-net Bitergia' -> line 8: invalid email format: 'jrae-example-net'", "Skip: 'john_doeexample LibreSoft' -> line 9: invalid email format: 'john_doeexample'", "Skip: 'J < 2021-04-06' -> line 10: invalid email format: 'J'" ] with self.assertLogs() as captured: GitdmParser(aliases=aliases, email_to_employer=email_to_employer, source='unknown', email_validation=True) self.assertEqual(len(captured.records), 6) self.assertEqual(captured.records[0].getMessage(), expected_log[0]) self.assertEqual(captured.records[1].getMessage(), expected_log[1]) self.assertEqual(captured.records[2].getMessage(), expected_log[2]) self.assertEqual(captured.records[3].getMessage(), expected_log[3]) self.assertEqual(captured.records[4].getMessage(), expected_log[4]) self.assertEqual(captured.records[5].getMessage(), expected_log[5])
def test_not_valid_organizations_stream(self): """Check whether it raises an error when parsing invalid streams""" with self.assertRaisesRegex( InvalidFormatError, DOMAINS_INVALID_FORMAT_ERROR % {'line': '10'}): stream = self.read_file(datadir('gitdm_orgs_invalid_comments.txt')) GitdmParser(domain_to_employer=stream) expected_log = [ "Skip: 'example.org ' -> line 8: invalid organization format: ' '" ] with self.assertLogs() as captured: stream = self.read_file(datadir('gitdm_orgs_invalid_entries.txt')) GitdmParser(domain_to_employer=stream) self.assertEqual(len(captured.records), 1) self.assertEqual(captured.records[0].getMessage(), expected_log[0])
def test_email_validation(self): aliases = self.read_file(datadir('gitdm_email_aliases_valid.txt')) email_to_employer = self.read_file( datadir('gitdm_email_to_employer_invalid.txt')) with self.assertRaises(InvalidFormatError): GitdmParser(aliases=aliases, email_to_employer=email_to_employer, source='unknown', email_validation=True)
def test_aliases_parser(self): aliases = self.read_file(datadir('gitdm_email_aliases_valid.txt')) parser = GitdmParser(aliases=aliases) # Parsed unique identities uids = parser.identities self.assertEqual(len(uids), 3) # [email protected] & [email protected] uid = uids[0] self.assertIsInstance(uid, UniqueIdentity) self.assertEqual(uid.uuid, '*****@*****.**') self.assertIsInstance(uid, UniqueIdentity) ids = uid.identities self.assertEqual(len(ids), 2) id0 = ids[0] self.assertIsInstance(id0, Identity) self.assertEqual(id0.name, None) self.assertEqual(id0.email, '*****@*****.**') self.assertEqual(id0.username, None) self.assertEqual(id0.source, 'gitdm') self.assertEqual(id0.uuid, None) id1 = ids[1] self.assertIsInstance(id1, Identity) self.assertEqual(id1.name, None) self.assertEqual(id1.email, '*****@*****.**') self.assertEqual(id1.username, None) self.assertEqual(id1.source, 'gitdm') self.assertEqual(id1.uuid, None) self.assertEqual(len(uid.enrollments), 0) # [email protected] & [email protected] uid = uids[1] self.assertIsInstance(uid, UniqueIdentity) self.assertEqual(uid.uuid, '*****@*****.**') ids = uid.identities self.assertEqual(len(ids), 2) id0 = ids[0] self.assertIsInstance(id0, Identity) self.assertEqual(id0.name, None) self.assertEqual(id0.email, '*****@*****.**') self.assertEqual(id0.username, None) self.assertEqual(id0.source, 'gitdm') self.assertEqual(id0.uuid, None) id0 = ids[1] self.assertIsInstance(id0, Identity) self.assertEqual(id0.name, None) self.assertEqual(id0.email, '*****@*****.**') self.assertEqual(id0.username, None) self.assertEqual(id0.source, 'gitdm') self.assertEqual(id0.uuid, None) self.assertEqual(len(uid.enrollments), 0) # jrae@laptop & jrae@mylaptop uid = uids[2] self.assertIsInstance(uid, UniqueIdentity) self.assertEqual(uid.uuid, 'jrae@mylaptop') ids = uid.identities self.assertEqual(len(ids), 2) id0 = ids[0] self.assertIsInstance(id0, Identity) self.assertEqual(id0.name, None) self.assertEqual(id0.email, None) self.assertEqual(id0.username, 'jrae@mylaptop') self.assertEqual(id0.source, 'gitdm') self.assertEqual(id0.uuid, None) id0 = ids[1] self.assertIsInstance(id0, Identity) self.assertEqual(id0.name, None) self.assertEqual(id0.email, None) self.assertEqual(id0.username, 'jrae@laptop') self.assertEqual(id0.source, 'gitdm') self.assertEqual(id0.uuid, None) self.assertEqual(len(uid.enrollments), 0)
def test_organizations_parser(self): """Check whether it parses a valid organizations file""" stream = self.read_file(datadir('gitdm_orgs_valid.txt')) parser = GitdmParser(domain_to_employer=stream) # Parsed unique identities uids = parser.identities self.assertEqual(len(uids), 0) # Parsed organizations orgs = parser.organizations self.assertEqual(len(orgs), 3) # Bitergia entries org = orgs[0] self.assertIsInstance(org, Organization) self.assertEqual(org.name, 'Bitergia') doms = org.domains self.assertEqual(len(doms), 3) dom = doms[0] self.assertIsInstance(dom, Domain) self.assertEqual(dom.domain, 'bitergia.com') self.assertEqual(dom.is_top_domain, False) dom = doms[1] self.assertIsInstance(dom, Domain) self.assertEqual(dom.domain, 'bitergia.net') self.assertEqual(dom.is_top_domain, False) dom = doms[2] self.assertIsInstance(dom, Domain) self.assertEqual(dom.domain, 'example.com') self.assertEqual(dom.is_top_domain, False) # Example entries org = orgs[1] self.assertIsInstance(org, Organization) self.assertEqual(org.name, 'Example') doms = org.domains self.assertEqual(len(doms), 3) dom = doms[0] self.assertIsInstance(dom, Domain) self.assertEqual(dom.domain, 'example.com') self.assertEqual(dom.is_top_domain, False) dom = doms[1] self.assertIsInstance(dom, Domain) self.assertEqual(dom.domain, 'example.org') self.assertEqual(dom.is_top_domain, False) dom = doms[2] self.assertIsInstance(dom, Domain) self.assertEqual(dom.domain, 'example.net') self.assertEqual(dom.is_top_domain, False) # GSyC/Libresof entries org = orgs[2] self.assertIsInstance(org, Organization) self.assertEqual(org.name, 'GSyC/LibreSoft') doms = org.domains self.assertEqual(len(doms), 2) dom = doms[0] self.assertIsInstance(dom, Domain) self.assertEqual(dom.domain, 'libresoft.es') self.assertEqual(dom.is_top_domain, False) dom = doms[1] self.assertIsInstance(dom, Domain) self.assertEqual(dom.domain, 'gsyc.es') self.assertEqual(dom.is_top_domain, False)
def test_enrollments_parser(self): aliases = self.read_file(datadir('gitdm_email_aliases_valid.txt')) email_to_employer = self.read_file( datadir('gitdm_email_to_employer_valid.txt')) parser = GitdmParser(aliases=aliases, email_to_employer=email_to_employer, source='unknown') # Parsed unique identities uids = parser.identities self.assertEqual(len(uids), 4) # [email protected] & [email protected] uid = uids[0] self.assertIsInstance(uid, UniqueIdentity) self.assertEqual(uid.uuid, '*****@*****.**') self.assertIsInstance(uid, UniqueIdentity) ids = uid.identities self.assertEqual(len(ids), 2) id0 = ids[0] self.assertIsInstance(id0, Identity) self.assertEqual(id0.name, None) self.assertEqual(id0.email, '*****@*****.**') self.assertEqual(id0.username, None) self.assertEqual(id0.source, 'unknown') self.assertEqual(id0.uuid, None) id1 = ids[1] self.assertIsInstance(id1, Identity) self.assertEqual(id1.name, None) self.assertEqual(id1.email, '*****@*****.**') self.assertEqual(id1.username, None) self.assertEqual(id1.source, 'unknown') self.assertEqual(id1.uuid, None) enrollments = uid.enrollments enrollments.sort(key=lambda x: x.organization.name) self.assertEqual(len(uid.enrollments), 2) rol = uid.enrollments[0] self.assertIsInstance(rol, Enrollment) self.assertEqual(rol.organization.name, 'Example Company') self.assertEqual(rol.start, datetime.datetime(1900, 1, 1, 0, 0)) self.assertEqual(rol.end, datetime.datetime(2100, 1, 1, 0, 0)) rol = uid.enrollments[1] self.assertIsInstance(rol, Enrollment) self.assertEqual(rol.organization.name, 'LibreSoft') self.assertEqual(rol.start, datetime.datetime(1900, 1, 1, 0, 0)) self.assertEqual(rol.end, datetime.datetime(2100, 1, 1, 0, 0)) # [email protected] & [email protected] uid = uids[1] self.assertIsInstance(uid, UniqueIdentity) self.assertEqual(uid.uuid, '*****@*****.**') ids = uid.identities self.assertEqual(len(ids), 2) id0 = ids[0] self.assertIsInstance(id0, Identity) self.assertEqual(id0.name, None) self.assertEqual(id0.email, '*****@*****.**') self.assertEqual(id0.username, None) self.assertEqual(id0.source, 'unknown') self.assertEqual(id0.uuid, None) id0 = ids[1] self.assertIsInstance(id0, Identity) self.assertEqual(id0.name, None) self.assertEqual(id0.email, '*****@*****.**') self.assertEqual(id0.username, None) self.assertEqual(id0.source, 'unknown') self.assertEqual(id0.uuid, None) self.assertEqual(len(uid.enrollments), 1) rol = uid.enrollments[0] self.assertIsInstance(rol, Enrollment) self.assertEqual(rol.organization.name, 'Bitergia') self.assertEqual(rol.start, datetime.datetime(1900, 1, 1, 0, 0)) self.assertEqual(rol.end, datetime.datetime(2100, 1, 1, 0, 0)) # jrae@laptop & jrae@mylaptop uid = uids[2] self.assertIsInstance(uid, UniqueIdentity) self.assertEqual(uid.uuid, 'jrae@mylaptop') ids = uid.identities self.assertEqual(len(ids), 2) id0 = ids[0] self.assertIsInstance(id0, Identity) self.assertEqual(id0.name, None) self.assertEqual(id0.email, None) self.assertEqual(id0.username, 'jrae@mylaptop') self.assertEqual(id0.source, 'unknown') self.assertEqual(id0.uuid, None) id0 = ids[1] self.assertIsInstance(id0, Identity) self.assertEqual(id0.name, None) self.assertEqual(id0.email, None) self.assertEqual(id0.username, 'jrae@laptop') self.assertEqual(id0.source, 'unknown') self.assertEqual(id0.uuid, None) self.assertEqual(len(uid.enrollments), 0) # [email protected] uid = uids[3] self.assertIsInstance(uid, UniqueIdentity) self.assertEqual(uid.uuid, '*****@*****.**') ids = uid.identities self.assertEqual(len(ids), 1) id0 = ids[0] self.assertIsInstance(id0, Identity) self.assertEqual(id0.name, None) self.assertEqual(id0.email, '*****@*****.**') self.assertEqual(id0.username, None) self.assertEqual(id0.source, 'unknown') self.assertEqual(id0.uuid, None) self.assertEqual(len(uid.enrollments), 2) rol = uid.enrollments[0] self.assertIsInstance(rol, Enrollment) self.assertEqual(rol.organization.name, 'Bitergia') self.assertEqual(rol.start, datetime.datetime(1900, 1, 1, 0, 0)) self.assertEqual(rol.end, datetime.datetime(2015, 1, 1, 0, 0)) rol = uid.enrollments[1] self.assertIsInstance(rol, Enrollment) self.assertEqual(rol.organization.name, 'Example Company') self.assertEqual(rol.start, datetime.datetime(2015, 1, 1, 0, 0)) self.assertEqual(rol.end, datetime.datetime(2100, 1, 1, 0, 0)) # Parsed organizations orgs = parser.organizations self.assertEqual(len(orgs), 3) org = orgs[0] self.assertIsInstance(org, Organization) self.assertEqual(org.name, 'Bitergia') org = orgs[1] self.assertIsInstance(org, Organization) self.assertEqual(org.name, 'Example Company') org = orgs[2] self.assertIsInstance(org, Organization) self.assertEqual(org.name, 'LibreSoft')