Beispiel #1
0
    def test_not_valid_organizations_stream(self):
        """Check whether it raises an error when parsing invalid streams"""

        with self.assertRaisesRegexp(InvalidFormatError,
                                     DOMAINS_INVALID_FORMAT_ERROR % {'line' : '10'}):
            stream = self.read_file('data/gitdm_orgs_invalid_comments.txt')
            GitdmParser(domain_to_employer=stream)

        with self.assertRaisesRegexp(InvalidFormatError,
                                     DOMAINS_INVALID_FORMAT_ERROR % {'line' : '8'}):
            stream = self.read_file('data/gitdm_orgs_invalid_entries.txt')
            GitdmParser(domain_to_employer=stream)
Beispiel #2
0
    def test_supress_email_validation(self):
        email_to_employer = self.read_file(
            datadir('gitdm_email_to_employer_invalid.txt'))

        parser = GitdmParser(email_to_employer=email_to_employer,
                             source='unknown',
                             email_validation=False)

        uids = parser.identities
        self.assertEqual(len(uids), 5)

        expected_emails = [
            'jsmith.example.com', 'jdoe$example.com', 'jsmith!example.com',
            'jrae-example-net', 'john_doeexample'
        ]

        for uid in uids:
            id = uid.identities[0]
            self.assertIsInstance(uid, UniqueIdentity)
            self.assertIsInstance(id, Identity)
            self.assertIn(uid.uuid, expected_emails)
            self.assertEqual(uid.uuid, id.email)
            self.assertEqual(id.name, None)
            self.assertEqual(id.username, None)
            self.assertEqual(id.source, 'unknown')
            self.assertEqual(id.uuid, None)
Beispiel #3
0
    def test_email_validation(self):
        aliases = self.read_file(datadir('gitdm_email_aliases_valid.txt'))
        email_to_employer = self.read_file(
            datadir('gitdm_email_to_employer_invalid.txt'))

        expected_log = [
            "Skip: 'jsmith.example.com	Example Company		# John Smith' -> line 5: invalid email format: 'jsmith.example.com'",
            "Skip: 'jdoe$example.com	Example Company		# John Doe' -> line 6: invalid email format: 'jdoe$example.com'",
            "Skip: 'jsmith!example.com	Bitergia < 2015-01-01	# John Smith - Bitergia' -> line 7: invalid email format: 'jsmith!example.com'",
            "Skip: 'jrae-example-net	Bitergia' -> line 8: invalid email format: 'jrae-example-net'",
            "Skip: 'john_doeexample	LibreSoft' -> line 9: invalid email format: 'john_doeexample'",
            "Skip: 'J < 2021-04-06' -> line 10: invalid email format: 'J'"
        ]
        with self.assertLogs() as captured:
            GitdmParser(aliases=aliases,
                        email_to_employer=email_to_employer,
                        source='unknown',
                        email_validation=True)
            self.assertEqual(len(captured.records), 6)
            self.assertEqual(captured.records[0].getMessage(), expected_log[0])
            self.assertEqual(captured.records[1].getMessage(), expected_log[1])
            self.assertEqual(captured.records[2].getMessage(), expected_log[2])
            self.assertEqual(captured.records[3].getMessage(), expected_log[3])
            self.assertEqual(captured.records[4].getMessage(), expected_log[4])
            self.assertEqual(captured.records[5].getMessage(), expected_log[5])
Beispiel #4
0
    def test_not_valid_organizations_stream(self):
        """Check whether it raises an error when parsing invalid streams"""

        with self.assertRaisesRegex(
                InvalidFormatError,
                DOMAINS_INVALID_FORMAT_ERROR % {'line': '10'}):
            stream = self.read_file(datadir('gitdm_orgs_invalid_comments.txt'))
            GitdmParser(domain_to_employer=stream)

        expected_log = [
            "Skip: 'example.org        ' -> line 8: invalid organization format: ' '"
        ]
        with self.assertLogs() as captured:
            stream = self.read_file(datadir('gitdm_orgs_invalid_entries.txt'))
            GitdmParser(domain_to_employer=stream)
            self.assertEqual(len(captured.records), 1)
            self.assertEqual(captured.records[0].getMessage(), expected_log[0])
Beispiel #5
0
    def test_email_validation(self):
        aliases = self.read_file(datadir('gitdm_email_aliases_valid.txt'))
        email_to_employer = self.read_file(
            datadir('gitdm_email_to_employer_invalid.txt'))

        with self.assertRaises(InvalidFormatError):
            GitdmParser(aliases=aliases,
                        email_to_employer=email_to_employer,
                        source='unknown',
                        email_validation=True)
Beispiel #6
0
    def test_aliases_parser(self):
        aliases = self.read_file(datadir('gitdm_email_aliases_valid.txt'))

        parser = GitdmParser(aliases=aliases)

        # Parsed unique identities
        uids = parser.identities
        self.assertEqual(len(uids), 3)

        # [email protected] & [email protected]
        uid = uids[0]
        self.assertIsInstance(uid, UniqueIdentity)
        self.assertEqual(uid.uuid, '*****@*****.**')

        self.assertIsInstance(uid, UniqueIdentity)

        ids = uid.identities
        self.assertEqual(len(ids), 2)

        id0 = ids[0]
        self.assertIsInstance(id0, Identity)
        self.assertEqual(id0.name, None)
        self.assertEqual(id0.email, '*****@*****.**')
        self.assertEqual(id0.username, None)
        self.assertEqual(id0.source, 'gitdm')
        self.assertEqual(id0.uuid, None)

        id1 = ids[1]
        self.assertIsInstance(id1, Identity)
        self.assertEqual(id1.name, None)
        self.assertEqual(id1.email, '*****@*****.**')
        self.assertEqual(id1.username, None)
        self.assertEqual(id1.source, 'gitdm')
        self.assertEqual(id1.uuid, None)

        self.assertEqual(len(uid.enrollments), 0)

        # [email protected] & [email protected]
        uid = uids[1]
        self.assertIsInstance(uid, UniqueIdentity)
        self.assertEqual(uid.uuid, '*****@*****.**')

        ids = uid.identities
        self.assertEqual(len(ids), 2)

        id0 = ids[0]
        self.assertIsInstance(id0, Identity)
        self.assertEqual(id0.name, None)
        self.assertEqual(id0.email, '*****@*****.**')
        self.assertEqual(id0.username, None)
        self.assertEqual(id0.source, 'gitdm')
        self.assertEqual(id0.uuid, None)

        id0 = ids[1]
        self.assertIsInstance(id0, Identity)
        self.assertEqual(id0.name, None)
        self.assertEqual(id0.email, '*****@*****.**')
        self.assertEqual(id0.username, None)
        self.assertEqual(id0.source, 'gitdm')
        self.assertEqual(id0.uuid, None)

        self.assertEqual(len(uid.enrollments), 0)

        # jrae@laptop & jrae@mylaptop
        uid = uids[2]
        self.assertIsInstance(uid, UniqueIdentity)
        self.assertEqual(uid.uuid, 'jrae@mylaptop')

        ids = uid.identities
        self.assertEqual(len(ids), 2)

        id0 = ids[0]
        self.assertIsInstance(id0, Identity)
        self.assertEqual(id0.name, None)
        self.assertEqual(id0.email, None)
        self.assertEqual(id0.username, 'jrae@mylaptop')
        self.assertEqual(id0.source, 'gitdm')
        self.assertEqual(id0.uuid, None)

        id0 = ids[1]
        self.assertIsInstance(id0, Identity)
        self.assertEqual(id0.name, None)
        self.assertEqual(id0.email, None)
        self.assertEqual(id0.username, 'jrae@laptop')
        self.assertEqual(id0.source, 'gitdm')
        self.assertEqual(id0.uuid, None)

        self.assertEqual(len(uid.enrollments), 0)
Beispiel #7
0
    def test_organizations_parser(self):
        """Check whether it parses a valid organizations file"""

        stream = self.read_file(datadir('gitdm_orgs_valid.txt'))

        parser = GitdmParser(domain_to_employer=stream)

        # Parsed unique identities
        uids = parser.identities
        self.assertEqual(len(uids), 0)

        # Parsed organizations
        orgs = parser.organizations

        self.assertEqual(len(orgs), 3)

        # Bitergia entries
        org = orgs[0]
        self.assertIsInstance(org, Organization)
        self.assertEqual(org.name, 'Bitergia')

        doms = org.domains
        self.assertEqual(len(doms), 3)

        dom = doms[0]
        self.assertIsInstance(dom, Domain)
        self.assertEqual(dom.domain, 'bitergia.com')
        self.assertEqual(dom.is_top_domain, False)

        dom = doms[1]
        self.assertIsInstance(dom, Domain)
        self.assertEqual(dom.domain, 'bitergia.net')
        self.assertEqual(dom.is_top_domain, False)

        dom = doms[2]
        self.assertIsInstance(dom, Domain)
        self.assertEqual(dom.domain, 'example.com')
        self.assertEqual(dom.is_top_domain, False)

        # Example entries
        org = orgs[1]
        self.assertIsInstance(org, Organization)
        self.assertEqual(org.name, 'Example')

        doms = org.domains
        self.assertEqual(len(doms), 3)

        dom = doms[0]
        self.assertIsInstance(dom, Domain)
        self.assertEqual(dom.domain, 'example.com')
        self.assertEqual(dom.is_top_domain, False)

        dom = doms[1]
        self.assertIsInstance(dom, Domain)
        self.assertEqual(dom.domain, 'example.org')
        self.assertEqual(dom.is_top_domain, False)

        dom = doms[2]
        self.assertIsInstance(dom, Domain)
        self.assertEqual(dom.domain, 'example.net')
        self.assertEqual(dom.is_top_domain, False)

        # GSyC/Libresof entries
        org = orgs[2]
        self.assertIsInstance(org, Organization)
        self.assertEqual(org.name, 'GSyC/LibreSoft')

        doms = org.domains
        self.assertEqual(len(doms), 2)

        dom = doms[0]
        self.assertIsInstance(dom, Domain)
        self.assertEqual(dom.domain, 'libresoft.es')
        self.assertEqual(dom.is_top_domain, False)

        dom = doms[1]
        self.assertIsInstance(dom, Domain)
        self.assertEqual(dom.domain, 'gsyc.es')
        self.assertEqual(dom.is_top_domain, False)
Beispiel #8
0
    def test_enrollments_parser(self):
        aliases = self.read_file(datadir('gitdm_email_aliases_valid.txt'))
        email_to_employer = self.read_file(
            datadir('gitdm_email_to_employer_valid.txt'))

        parser = GitdmParser(aliases=aliases,
                             email_to_employer=email_to_employer,
                             source='unknown')

        # Parsed unique identities
        uids = parser.identities
        self.assertEqual(len(uids), 4)

        # [email protected] & [email protected]
        uid = uids[0]
        self.assertIsInstance(uid, UniqueIdentity)
        self.assertEqual(uid.uuid, '*****@*****.**')

        self.assertIsInstance(uid, UniqueIdentity)

        ids = uid.identities
        self.assertEqual(len(ids), 2)

        id0 = ids[0]
        self.assertIsInstance(id0, Identity)
        self.assertEqual(id0.name, None)
        self.assertEqual(id0.email, '*****@*****.**')
        self.assertEqual(id0.username, None)
        self.assertEqual(id0.source, 'unknown')
        self.assertEqual(id0.uuid, None)

        id1 = ids[1]
        self.assertIsInstance(id1, Identity)
        self.assertEqual(id1.name, None)
        self.assertEqual(id1.email, '*****@*****.**')
        self.assertEqual(id1.username, None)
        self.assertEqual(id1.source, 'unknown')
        self.assertEqual(id1.uuid, None)

        enrollments = uid.enrollments
        enrollments.sort(key=lambda x: x.organization.name)
        self.assertEqual(len(uid.enrollments), 2)

        rol = uid.enrollments[0]
        self.assertIsInstance(rol, Enrollment)
        self.assertEqual(rol.organization.name, 'Example Company')
        self.assertEqual(rol.start, datetime.datetime(1900, 1, 1, 0, 0))
        self.assertEqual(rol.end, datetime.datetime(2100, 1, 1, 0, 0))

        rol = uid.enrollments[1]
        self.assertIsInstance(rol, Enrollment)
        self.assertEqual(rol.organization.name, 'LibreSoft')
        self.assertEqual(rol.start, datetime.datetime(1900, 1, 1, 0, 0))
        self.assertEqual(rol.end, datetime.datetime(2100, 1, 1, 0, 0))

        # [email protected] & [email protected]
        uid = uids[1]
        self.assertIsInstance(uid, UniqueIdentity)
        self.assertEqual(uid.uuid, '*****@*****.**')

        ids = uid.identities
        self.assertEqual(len(ids), 2)

        id0 = ids[0]
        self.assertIsInstance(id0, Identity)
        self.assertEqual(id0.name, None)
        self.assertEqual(id0.email, '*****@*****.**')
        self.assertEqual(id0.username, None)
        self.assertEqual(id0.source, 'unknown')
        self.assertEqual(id0.uuid, None)

        id0 = ids[1]
        self.assertIsInstance(id0, Identity)
        self.assertEqual(id0.name, None)
        self.assertEqual(id0.email, '*****@*****.**')
        self.assertEqual(id0.username, None)
        self.assertEqual(id0.source, 'unknown')
        self.assertEqual(id0.uuid, None)

        self.assertEqual(len(uid.enrollments), 1)

        rol = uid.enrollments[0]
        self.assertIsInstance(rol, Enrollment)
        self.assertEqual(rol.organization.name, 'Bitergia')
        self.assertEqual(rol.start, datetime.datetime(1900, 1, 1, 0, 0))
        self.assertEqual(rol.end, datetime.datetime(2100, 1, 1, 0, 0))

        # jrae@laptop & jrae@mylaptop
        uid = uids[2]
        self.assertIsInstance(uid, UniqueIdentity)
        self.assertEqual(uid.uuid, 'jrae@mylaptop')

        ids = uid.identities
        self.assertEqual(len(ids), 2)

        id0 = ids[0]
        self.assertIsInstance(id0, Identity)
        self.assertEqual(id0.name, None)
        self.assertEqual(id0.email, None)
        self.assertEqual(id0.username, 'jrae@mylaptop')
        self.assertEqual(id0.source, 'unknown')
        self.assertEqual(id0.uuid, None)

        id0 = ids[1]
        self.assertIsInstance(id0, Identity)
        self.assertEqual(id0.name, None)
        self.assertEqual(id0.email, None)
        self.assertEqual(id0.username, 'jrae@laptop')
        self.assertEqual(id0.source, 'unknown')
        self.assertEqual(id0.uuid, None)

        self.assertEqual(len(uid.enrollments), 0)

        # [email protected]
        uid = uids[3]
        self.assertIsInstance(uid, UniqueIdentity)
        self.assertEqual(uid.uuid, '*****@*****.**')

        ids = uid.identities
        self.assertEqual(len(ids), 1)

        id0 = ids[0]
        self.assertIsInstance(id0, Identity)
        self.assertEqual(id0.name, None)
        self.assertEqual(id0.email, '*****@*****.**')
        self.assertEqual(id0.username, None)
        self.assertEqual(id0.source, 'unknown')
        self.assertEqual(id0.uuid, None)

        self.assertEqual(len(uid.enrollments), 2)

        rol = uid.enrollments[0]
        self.assertIsInstance(rol, Enrollment)
        self.assertEqual(rol.organization.name, 'Bitergia')
        self.assertEqual(rol.start, datetime.datetime(1900, 1, 1, 0, 0))
        self.assertEqual(rol.end, datetime.datetime(2015, 1, 1, 0, 0))

        rol = uid.enrollments[1]
        self.assertIsInstance(rol, Enrollment)
        self.assertEqual(rol.organization.name, 'Example Company')
        self.assertEqual(rol.start, datetime.datetime(2015, 1, 1, 0, 0))
        self.assertEqual(rol.end, datetime.datetime(2100, 1, 1, 0, 0))

        # Parsed organizations
        orgs = parser.organizations
        self.assertEqual(len(orgs), 3)

        org = orgs[0]
        self.assertIsInstance(org, Organization)
        self.assertEqual(org.name, 'Bitergia')

        org = orgs[1]
        self.assertIsInstance(org, Organization)
        self.assertEqual(org.name, 'Example Company')

        org = orgs[2]
        self.assertIsInstance(org, Organization)
        self.assertEqual(org.name, 'LibreSoft')