def test_match_same_uuid(self):
        """Test if there is a match when compares identities with the same UUID"""

        uid1 = UniqueIdentity(uuid='John Smith')
        uid2 = UniqueIdentity(uuid='John Smith')

        matcher = EmailNameMatcher()

        result = matcher.match(uid1, uid2)
        self.assertEqual(result, True)

        result = matcher.match(uid2, uid1)
        self.assertEqual(result, True)

        # None UUIDs do not produce a positive match
        uid1 = UniqueIdentity(uuid=None)
        uid2 = UniqueIdentity(uuid=None)

        matcher = EmailNameMatcher()

        result = matcher.match(uid1, uid2)
        self.assertEqual(result, False)

        result = matcher.match(uid2, uid1)
        self.assertEqual(result, False)
    def test_match_with_sources_list(self):
        """Test match when a list of sources to filter is given"""

        jsmith = UniqueIdentity(uuid='jsmith')
        jsmith.identities = [Identity(name='John Smith', email='*****@*****.**', source='scm'),
                             Identity(name='John Smith', source='scm'),
                             Identity(username='******', source='scm'),
                             Identity(email='', source='scm')]

        jsmith_alt = UniqueIdentity(uuid='J. Smith')
        jsmith_alt.identities = [Identity(name='John Smith JR', email='*****@*****.**', source='alt'),
                                 Identity(name='John Smith', username='******', source='alt'),
                                 Identity(email='', source='alt'),
                                 Identity(email='jsmith', source='alt')]

        # With these lists there are not matches
        matcher = EmailMatcher(sources=['github'])
        result = matcher.match(jsmith, jsmith_alt)
        self.assertEqual(result, False)

        matcher = EmailMatcher(sources=['scm'])
        result = matcher.match(jsmith, jsmith_alt)
        self.assertEqual(result, False)

        # Only when scm and alt are combined there is a match
        matcher = EmailMatcher(sources=['scm', 'alt'])
        result = matcher.match(jsmith, jsmith_alt)
        self.assertEqual(result, True)
    def test_match_strict(self):
        """Test strict matching"""

        # Let's define some identities first
        jsmith_alt = UniqueIdentity(uuid='J. Smith')
        jsmith_alt.identities = [Identity(name='J. Smith', username='******', source='alt'),
                                 Identity(name='John Smith', username='******', source='alt'),
                                 Identity(email='', source='alt'),
                                 Identity(email='jsmith', source='alt')]

        jsmith_not_email = UniqueIdentity(uuid='John Smith')
        jsmith_not_email.identities = [Identity(email='jsmith', source='mls')]

        # Tests
        strict_matcher = EmailMatcher(strict=True)
        no_strict_matcher = EmailMatcher(strict=False)

        # This two unique identities have the same email address
        # but due to 'jsmith' is not a valid email address, they
        # do not match
        result = strict_matcher.match(jsmith_alt, jsmith_not_email)
        self.assertEqual(result, False)

        # With strict mode set to False, both identities match
        result = no_strict_matcher.match(jsmith_alt, jsmith_not_email)
        self.assertEqual(result, True)
Beispiel #4
0
    def setUp(self):
        # Add some unique identities

        self.john_smith = UniqueIdentity('John Smith')
        self.john_smith.identities = [
            Identity(email='*****@*****.**',
                     name='John Smith',
                     source='scm',
                     uuid='John Smith'),
            Identity(name='John Smith', source='scm', uuid='John Smith'),
            Identity(username='******', source='scm', uuid='John Smith')
        ]

        self.jsmith = UniqueIdentity('J. Smith')
        self.jsmith.identities = [
            Identity(name='J. Smith',
                     username='******',
                     source='alt',
                     uuid='J. Smith'),
            Identity(name='John Smith',
                     username='******',
                     source='alt',
                     uuid='J. Smith'),
            Identity(email='jsmith', source='alt', uuid='J. Smith')
        ]

        self.jane_rae = UniqueIdentity('Jane Rae')
        self.jane_rae.identities = [
            Identity(name='Janer Rae', source='mls', uuid='Jane Rae'),
            Identity(email='*****@*****.**',
                     name='Jane Rae Doe',
                     source='mls',
                     uuid='Jane Rae')
        ]

        self.js_alt = UniqueIdentity('john_smith')
        self.js_alt.identities = [
            Identity(name='J. Smith',
                     username='******',
                     source='scm',
                     uuid='john_smith'),
            Identity(username='******', source='mls', uuid='john_smith'),
            Identity(username='******', source='mls', uuid='john_smith'),
            Identity(email='*****@*****.**',
                     name='Smith. J',
                     source='mls',
                     uuid='john_smith')
        ]

        self.jrae = UniqueIdentity('jrae')
        self.jrae.identities = [
            Identity(email='*****@*****.**',
                     name='Jane Rae Doe',
                     source='mls',
                     uuid='jrae'),
            Identity(name='jrae', source='mls', uuid='jrae'),
            Identity(name='jrae', source='scm', uuid='jrae')
        ]
Beispiel #5
0
    def test_unique_uuid(self):
        """Check whether the uuid is in fact unique"""

        with self.assertRaisesRegexp(IntegrityError, DUP_CHECK_ERROR):
            uid1 = UniqueIdentity(uuid='John Smith')
            uid2 = UniqueIdentity(uuid='John Smith')

            self.session.add(uid1)
            self.session.add(uid2)
            self.session.commit()
    def test_match_with_blacklist(self):
        """Test match when there are entries in the blacklist"""

        jsmith = UniqueIdentity(uuid='jsmith')
        jsmith.identities = [
            Identity(name='John Smith',
                     email='*****@*****.**',
                     source='scm'),
            Identity(name='John Smith', source='scm'),
            Identity(username='******', source='scm'),
            Identity(email='', source='scm')
        ]

        john_smith = UniqueIdentity(uuid='js')
        john_smith.identities = [
            Identity(name='John Smith JR', username='******',
                     source='scm'),
            Identity(username='******', source='scm'),
            Identity(name='Smith. J', source='mls'),
            Identity(name='Smith. J', email='*****@*****.**', source='mls')
        ]

        jsmith_alt = UniqueIdentity(uuid='J. Smith')
        jsmith_alt.identities = [
            Identity(name='John Smith JR', username='******',
                     source='alt'),
            Identity(name='John Smith', username='******', source='alt'),
            Identity(email='', source='alt'),
            Identity(email='jsmith', source='alt')
        ]

        # Tests
        bl = [
            MatchingBlacklist(excluded='John Smith'),
            MatchingBlacklist(excluded='*****@*****.**')
        ]

        matcher = EmailNameMatcher(blacklist=bl)

        result = matcher.match(jsmith, john_smith)
        self.assertEqual(result, False)

        result = matcher.match(john_smith, jsmith)
        self.assertEqual(result, False)

        # John Smith is blacklisted, so no match
        result = matcher.match(jsmith, jsmith_alt)
        self.assertEqual(result, False)

        result = matcher.match(jsmith_alt, jsmith)
        self.assertEqual(result, False)

        result = matcher.match(john_smith, jsmith_alt)
        self.assertEqual(result, True)

        result = matcher.match(jsmith_alt, john_smith)
        self.assertEqual(result, True)
Beispiel #7
0
    def test_to_dict(self):
        """Test output of to_dict() method"""

        uid = UniqueIdentity(uuid='John Smith')
        self.session.add(uid)

        org = Organization(name='Example')
        self.session.add(org)

        rol = Enrollment(uidentity=uid,
                         organization=org,
                         start=datetime.datetime(1999, 1, 1, 0, 0, 0),
                         end=datetime.datetime(2001, 1, 1, 0, 0, 0))

        self.session.add(rol)
        self.session.commit()

        # Tests
        d = rol.to_dict()

        self.assertIsInstance(d, dict)
        self.assertEqual(d['uuid'], 'John Smith')
        self.assertEqual(d['organization'], 'Example')
        self.assertEqual(d['start'], datetime.datetime(1999, 1, 1, 0, 0, 0))
        self.assertEqual(d['end'], datetime.datetime(2001, 1, 1, 0, 0, 0))
    def test_not_null_relationships(self):
        """Check whether every enrollment is assigned organizations and unique identities"""

        with self.assertRaisesRegex(IntegrityError, NULL_CHECK_ERROR):
            rol1 = Enrollment()
            self.session.add(rol1)
            self.session.commit()

        self.session.rollback()

        with self.assertRaisesRegex(IntegrityError, NULL_CHECK_ERROR):
            uid = UniqueIdentity(uuid='John Smith')
            self.session.add(uid)

            rol2 = Enrollment(uidentity=uid)
            self.session.add(rol2)
            self.session.commit()

        self.session.rollback()

        with self.assertRaisesRegex(IntegrityError, NULL_CHECK_ERROR):
            org = Organization(name='Example')
            self.session.add(org)

            rol3 = Enrollment(organization=org)
            self.session.add(rol3)
            self.session.commit()

        self.session.rollback()
Beispiel #9
0
    def test_to_dict(self):
        """Test output of to_dict() method"""

        uid = UniqueIdentity(uuid='John Smith')
        self.session.add(uid)

        id1 = Identity(id='A',
                       name='John Smith',
                       email='*****@*****.**',
                       username='******',
                       source='scm',
                       uuid='John Smith')

        self.session.add(id1)
        self.session.commit()

        # Tests
        d = id1.to_dict()

        self.assertIsInstance(d, dict)
        self.assertEqual(d['id'], 'A')
        self.assertEqual(d['name'], 'John Smith')
        self.assertEqual(d['email'], '*****@*****.**')
        self.assertEqual(d['username'], 'jsmith')
        self.assertEqual(d['source'], 'scm')
        self.assertEqual(d['uuid'], 'John Smith')
Beispiel #10
0
    def test_not_null_relationships(self):
        """Check whether every enrollment is assigned organizations and unique identities"""

        if sys.version_info[0] >= 3:  # Python 3
            expected = IntegrityError
        else:  # Python 2
            expected = OperationalError

        with self.assertRaisesRegexp(expected, NULL_CHECK_ERROR):
            rol1 = Enrollment()
            self.session.add(rol1)
            self.session.commit()

        self.session.rollback()

        with self.assertRaisesRegexp(expected, NULL_CHECK_ERROR):
            uid = UniqueIdentity(uuid='John Smith')
            self.session.add(uid)

            rol2 = Enrollment(uidentity=uid)
            self.session.add(rol2)
            self.session.commit()

        self.session.rollback()

        with self.assertRaisesRegexp(expected, NULL_CHECK_ERROR):
            org = Organization(name='Example')
            self.session.add(org)

            rol3 = Enrollment(organization=org)
            self.session.add(rol3)
            self.session.commit()

        self.session.rollback()
Beispiel #11
0
    def test_default_enrollment_period(self):
        """Check whether the default period is set when initializing the class"""

        uid = UniqueIdentity(uuid='John Smith')
        self.session.add(uid)

        org = Organization(name='Example')
        self.session.add(org)

        rol1 = Enrollment(uidentity=uid, organization=org)
        self.session.add(rol1)
        self.session.commit()

        self.assertEqual(rol1.start, datetime.datetime(1900, 1, 1, 0, 0, 0))
        self.assertEqual(rol1.end, datetime.datetime(2100, 1, 1, 0, 0, 0))

        # Setting start and end dates to None produce the same result
        rol2 = Enrollment(uidentity=uid, organization=org,
                          start=None, end=datetime.datetime(2222, 1, 1, 0, 0, 0))
        self.session.add(rol2)
        self.session.commit()

        self.assertEqual(rol2.start, datetime.datetime(1900, 1, 1, 0, 0, 0))
        self.assertEqual(rol2.end, datetime.datetime(2222, 1, 1, 0, 0, 0))

        rol3 = Enrollment(uidentity=uid, organization=org,
                          start=datetime.datetime(1999, 1, 1, 0, 0, 0), end=None)
        self.session.add(rol3)
        self.session.commit()

        self.assertEqual(rol3.start, datetime.datetime(1999, 1, 1, 0, 0, 0))
        self.assertEqual(rol3.end, datetime.datetime(2100, 1, 1, 0, 0, 0))
Beispiel #12
0
    def test_to_dict(self):
        """Test output of to_dict() method"""

        uid = UniqueIdentity(uuid='John Smith')
        self.session.add(uid)

        c = Country(code='US', name='United States of America', alpha3='USA')
        self.session.add(c)

        prf = Profile(uuid='John Smith', name='Smith, J.',
                      email='*****@*****.**', is_bot=True,
                      country_code='US')

        self.session.add(prf)
        self.session.commit()

        # Tests
        d = prf.to_dict()

        self.assertIsInstance(d, dict)
        self.assertEqual(d['uuid'], 'John Smith')
        self.assertEqual(d['name'], 'Smith, J.')
        self.assertEqual(d['email'], '*****@*****.**')
        self.assertEqual(d['is_bot'], True)
        self.assertEqual(d['country']['code'], 'US')
        self.assertEqual(d['country']['name'], 'United States of America')

        # No country set
        prf = Profile(uuid='John Smith', name='Smith, J.',
                      email='*****@*****.**', is_bot=True,
                      country_code=None)

        d = prf.to_dict()
        self.assertEqual(d['country'], None)
    def test_match(self):
        """Test match method"""

        # Let's define some identities first
        jsmith = UniqueIdentity(uuid='jsmith')
        jsmith.identities = [
            Identity(name='John Smith',
                     email='*****@*****.**',
                     source='scm'),
            Identity(name='John Smith', source='scm'),
            Identity(username='******', source='scm'),
            Identity(email='', source='scm')
        ]

        john_smith = UniqueIdentity(uuid='js')
        john_smith.identities = [
            Identity(name='J. Smith', username='******', source='scm'),
            Identity(username='******', source='scm'),
            Identity(name='Smith. J', source='mls'),
            Identity(name='Smith. J', email='*****@*****.**', source='mls')
        ]

        jsmith_alt = UniqueIdentity(uuid='J. Smith')
        jsmith_alt.identities = [
            Identity(name='J. Smith', username='******', source='alt'),
            Identity(name='John Smith', username='******', source='alt'),
            Identity(email='', source='alt'),
            Identity(email='jsmith', source='alt')
        ]

        # Tests
        matcher = UsernameMatcher()

        # First two unique does not produce any match
        result = matcher.match(jsmith, john_smith)
        self.assertEqual(result, False)

        result = matcher.match(john_smith, jsmith)
        self.assertEqual(result, False)

        # Comparing the third match with the second
        result = matcher.match(jsmith, jsmith_alt)
        self.assertEqual(result, True)

        result = matcher.match(jsmith_alt, jsmith)
        self.assertEqual(result, True)

        result = matcher.match(john_smith, jsmith_alt)
        self.assertEqual(result, True)

        result = matcher.match(jsmith_alt, john_smith)
        self.assertEqual(result, True)
    def test_match_same_identity(self):
        """Test whether there is a match comparing the same identity"""

        uid = UniqueIdentity(uuid='John Smith')

        matcher = UsernameMatcher()
        result = matcher.match(uid, uid)

        self.assertEqual(result, True)
    def test_filter_identities_no_strict(self):
        """Test if identities are filtered in no strict mode"""

        # Let's define some identities first
        jsmith = UniqueIdentity(uuid='jsmith')
        jsmith.identities = [Identity(name='John Smith', email='*****@*****.**', source='scm', uuid='jsmith'),
                             Identity(name='John Smith', source='scm', uuid='jsmith'),
                             Identity(username='******', source='scm', uuid='jsmith'),
                             Identity(email='jsmith@test', uuid='jsmith'),
                             Identity(email='', source='scm', uuid='jsmith')]

        jrae = UniqueIdentity(uuid='jrae')
        jrae.identities = [Identity(name='Jane Rae', source='scm', uuid='jrae'),
                           Identity(name='Jane Rae Doe', email='*****@*****.**', source='mls', uuid='jrae'),
                           Identity(name='jrae', source='scm', uuid='jrae'),
                           Identity(email='*****@*****.**', source='scm', uuid='jrae')]

        matcher = EmailMatcher(strict=False)

        result = matcher.filter(jsmith)
        self.assertEqual(len(result), 2)

        fid = result[0]
        self.assertIsInstance(fid, EmailIdentity)
        self.assertEqual(fid.uuid, 'jsmith')
        self.assertEqual(fid.email, '*****@*****.**')

        fid = result[1]
        self.assertIsInstance(fid, EmailIdentity)
        self.assertEqual(fid.uuid, 'jsmith')
        self.assertEqual(fid.email, 'jsmith@test')

        result = matcher.filter(jrae)
        self.assertEqual(len(result), 2)

        fid = result[0]
        self.assertIsInstance(fid, EmailIdentity)
        self.assertEqual(fid.uuid, 'jrae')
        self.assertEqual(fid.email, '*****@*****.**')

        fid = result[1]
        self.assertIsInstance(fid, EmailIdentity)
        self.assertEqual(fid.uuid, 'jrae')
        self.assertEqual(fid.email, '*****@*****.**')
    def test_match_same_uuid(self):
        """Test if there is a match when compares identities with the same UUID"""

        uid1 = UniqueIdentity(uuid='John Smith')
        uid2 = UniqueIdentity(uuid='John Smith')

        matcher = GitHubMatcher()

        result = matcher.match(uid1, uid2)
        self.assertEqual(result, True)

        result = matcher.match(uid2, uid1)
        self.assertEqual(result, True)

        uid1 = UniqueIdentity(uuid=None)
        uid2 = UniqueIdentity(uuid=None)

        result = matcher.match(uid1, uid2)
        self.assertEqual(result, False)
    def test_match(self):
        """Test match method"""

        # Let's define some identities first
        jsmith = UniqueIdentity(uuid='jsmith')
        jsmith.identities = [Identity(name='John Smith', email='*****@*****.**', source='scm'),
                             Identity(name='John Smith', source='scm'),
                             Identity(username='******', source='scm'),
                             Identity(email='', source='scm')]

        john_smith = UniqueIdentity(uuid='js')
        john_smith.identities = [Identity(name='J. Smith', username='******', source='scm'),
                                 Identity(username='******', source='scm'),
                                 Identity(name='Smith. J', source='mls'),
                                 Identity(name='Smith. J', email='*****@*****.**', source='mls')]

        jsmith_alt = UniqueIdentity(uuid='J. Smith')
        jsmith_alt.identities = [Identity(name='J. Smith', username='******', source='alt'),
                                 Identity(name='John Smith', username='******', source='alt'),
                                 Identity(email='', source='alt'),
                                 Identity(email='jsmith', source='alt')]

        jsmith_not_email = UniqueIdentity(uuid='John Smith')
        jsmith_not_email.identities = [Identity(email='jsmith', source='mls')]

        # Tests
        matcher = EmailMatcher()

        # First two unique identities must match
        result = matcher.match(jsmith, john_smith)
        self.assertEqual(result, True)

        result = matcher.match(john_smith, jsmith)
        self.assertEqual(result, True)

        # Comparing with the third does not produce any match
        result = matcher.match(jsmith, jsmith_alt)
        self.assertEqual(result, False)

        result = matcher.match(jsmith_alt, jsmith)
        self.assertEqual(result, False)

        result = matcher.match(john_smith, jsmith_alt)
        self.assertEqual(result, False)

        result = matcher.match(jsmith_alt, john_smith)
        self.assertEqual(result, False)

        # This two unique identities have the same email address
        # but due to 'jsmith' is not a valid email address, they
        # do not match
        result = matcher.match(jsmith_alt, jsmith_not_email)
        self.assertEqual(result, False)
Beispiel #18
0
    def test_is_bot_invalid_type(self):
        """Check invalid values on is_bot bool column"""

        with self.assertRaisesRegexp(StatementError, INVALID_DATATYPE_ERROR):
            uid = UniqueIdentity(uuid='John Smith')
            self.session.add(uid)

            prf = Profile(uuid='John Smith', name='John Smith', is_bot='True')

            self.session.add(prf)
            self.session.commit()
    def test_match_identities_instances(self):
        """Test whether it raises an error when ids are not UniqueIdentities"""

        uid = UniqueIdentity(uuid='John Smith')

        matcher = UsernameMatcher()

        self.assertRaises(ValueError, matcher.match, 'John Smith', uid)
        self.assertRaises(ValueError, matcher.match, uid, 'John Smith')
        self.assertRaises(ValueError, matcher.match, None, uid)
        self.assertRaises(ValueError, matcher.match, uid, None)
        self.assertRaises(ValueError, matcher.match, 'John Smith', 'John Doe')
Beispiel #20
0
    def test_match_with_sources_list(self):
        """Test match when a list of sources to filter is given"""

        jsmith = UniqueIdentity(uuid='jsmith')
        jsmith.identities = [
            Identity(name='John Smith',
                     email='*****@*****.**',
                     source='scm',
                     uuid='jsmith'),
            Identity(name='John Smith', source='scm', uuid='jsmith'),
            Identity(username='******', source='github', uuid='jsmith'),
            Identity(email='', source='scm', uuid='jsmith')
        ]

        john_smith = UniqueIdentity(uuid='js')
        john_smith.identities = [
            Identity(name='J. Smith', username='******', source='scm'),
            Identity(username='******', source='GitHub-API'),
            Identity(name='Smith. J', source='mls'),
            Identity(name='Smith. J', email='*****@*****.**', source='mls')
        ]

        # With these lists there are not matches
        matcher = GitHubMatcher(sources=['scm'])
        result = matcher.match(jsmith, john_smith)
        self.assertEqual(result, False)

        matcher = GitHubMatcher(sources=['github'])
        result = matcher.match(jsmith, john_smith)
        self.assertEqual(result, False)

        # Only when github-api and github are combined there is a match
        matcher = GitHubMatcher(sources=['github-api', 'github'])
        result = matcher.match(jsmith, john_smith)
        self.assertEqual(result, True)
Beispiel #21
0
    def test_filter_identities_with_sources_list(self):
        """Test if identities are filtered when there is a sources list"""

        # Let's define some identities first
        jsmith = UniqueIdentity(uuid='jsmith')
        jsmith.identities = [Identity(name='John Smith', email='*****@*****.**', source='scm', uuid='jsmith'),
                             Identity(name='John Smith', source='scm', uuid='jsmith'),
                             Identity(name='John Smith JR', source='scm', uuid='jsmith'),
                             Identity(username='******', source='mls', uuid='jsmith'),
                             Identity(email='', source='scm', uuid='jsmith')]

        jrae = UniqueIdentity(uuid='jrae')
        jrae.identities = [Identity(name='Jane Rae', source='scm', uuid='jrae'),
                           Identity(name='Jane Rae Doe', email='*****@*****.**', source='mls', uuid='jrae'),
                           Identity(name='jrae', source='scm', uuid='jrae'),
                           Identity(email='*****@*****.**', source='scm', uuid='jrae')]

        # Tests
        matcher = EmailMatcher(sources=['mls', 'alt'])

        result = matcher.filter(jsmith)
        self.assertEqual(len(result), 0)

        result = matcher.filter(jrae)
        self.assertEqual(len(result), 1)

        fid = result[0]
        self.assertIsInstance(fid, EmailIdentity)
        self.assertEqual(fid.uuid, 'jrae')
        self.assertEqual(fid.email, '*****@*****.**')
    def test_match_with_sources_list(self):
        """Test match when a list of sources to filter is given"""

        jsmith = UniqueIdentity(uuid='jsmith')
        jsmith.identities = [
            Identity(name='John Smith',
                     email='*****@*****.**',
                     source='scm'),
            Identity(name='John Smith', source='scm'),
            Identity(username='******', source='scm'),
            Identity(email='', source='scm')
        ]

        jsmith_alt = UniqueIdentity(uuid='J. Smith')
        jsmith_alt.identities = [
            Identity(name='John Smith JR', username='******',
                     source='alt'),
            Identity(name='John Smith', username='******', source='alt'),
            Identity(email='', source='alt'),
            Identity(email='jsmith', source='alt')
        ]

        # With these lists there are not matches
        matcher = EmailNameMatcher(sources=['github'])
        result = matcher.match(jsmith, jsmith_alt)
        self.assertEqual(result, False)

        matcher = EmailNameMatcher(sources=['scm'])
        result = matcher.match(jsmith, jsmith_alt)
        self.assertEqual(result, False)

        # Only when scm and alt are combined there is a match
        matcher = EmailNameMatcher(sources=['scm', 'alt'])
        result = matcher.match(jsmith, jsmith_alt)
        self.assertEqual(result, True)
    def test_match_strict(self):
        """Test strict matching"""

        # Let's define some identities first
        jsmith_alt = UniqueIdentity(uuid='J. Smith')
        jsmith_alt.identities = [
            Identity(name='J. Smith', username='******', source='alt'),
            Identity(name='John Smith', username='******', source='alt'),
            Identity(email='', source='alt'),
            Identity(email='jsmith', source='alt')
        ]

        jsmith_not_email = UniqueIdentity(uuid='John Smith')
        jsmith_not_email.identities = [Identity(email='jsmith', source='mls')]

        # Tests
        strict_matcher = EmailMatcher(strict=True)
        no_strict_matcher = EmailMatcher(strict=False)

        # This two unique identities have the same email address
        # but due to 'jsmith' is not a valid email address, they
        # do not match
        result = strict_matcher.match(jsmith_alt, jsmith_not_email)
        self.assertEqual(result, False)

        # With strict mode set to False, both identities match
        result = no_strict_matcher.match(jsmith_alt, jsmith_not_email)
        self.assertEqual(result, True)
Beispiel #24
0
    def test_filter_identities_with_blacklist(self):
        """Test if identities are filtered when there is a blacklist"""

        # Let's define some identities first
        jsmith = UniqueIdentity(uuid='jsmith')
        jsmith.identities = [Identity(name='John Smith', username='******', source='scm', uuid='jsmith'),
                             Identity(name='John Smith', source='scm', uuid='jsmith'),
                             Identity(email='*****@*****.**', source='scm', uuid='jsmith'),
                             Identity(email='', source='scm', uuid='jsmith')]

        jrae = UniqueIdentity(uuid='jrae')
        jrae.identities = [Identity(name='Jane Rae', source='scm', uuid='jrae'),
                           Identity(name='Jane Rae Doe', username='******', source='mls', uuid='jrae'),
                           Identity(name='jrae', source='scm', uuid='jrae'),
                           Identity(username='******', source='scm', uuid='jrae')]

        bl = [MatchingBlacklist(excluded='jrae')]

        matcher = UsernameMatcher(blacklist=bl)

        result = matcher.filter(jsmith)
        self.assertEqual(len(result), 1)

        fid = result[0]
        self.assertIsInstance(fid, UsernameIdentity)
        self.assertEqual(fid.uuid, 'jsmith')
        self.assertEqual(fid.username, 'jsmith')

        result = matcher.filter(jrae)
        self.assertEqual(len(result), 1)

        fid = result[0]
        self.assertIsInstance(fid, UsernameIdentity)
        self.assertEqual(fid.uuid, 'jrae')
        self.assertEqual(fid.username, 'jane.rae')
Beispiel #25
0
    def test_get_profile_sh(self):
        """Test whether a profile from sortinghat model is correctly retrieved as a dict"""

        p = Profile()
        p.name = 'pepe'
        p.email = '*****@*****.**'
        p.gender = 'male'
        p.gender_acc = 100
        uidentity = UniqueIdentity()
        uidentity.profile = p

        vals = {'00000': uidentity}

        def side_effect(uuid):
            return vals[uuid]
        self._enrich.get_unique_identity = MagicMock(side_effect=side_effect)

        profile = self._enrich.get_profile_sh('00000')
        self.assertEqual(profile['name'], uidentity.profile.name)
        self.assertEqual(profile['email'], uidentity.profile.email)
        self.assertEqual(profile['gender'], uidentity.profile.gender)
        self.assertEqual(profile['gender_acc'], uidentity.profile.gender_acc)
Beispiel #26
0
    def test_unique_profile(self):
        """Check if there is only one profile for each unique identity"""

        uid = UniqueIdentity(uuid='John Smith')
        self.session.add(uid)

        prf1 = Profile(uuid='John Smith', name='John Smith')
        prf2 = Profile(uuid='John Smith', name='Smith, J.')

        with self.assertRaisesRegexp(IntegrityError, DUP_CHECK_ERROR):
            self.session.add(prf1)
            self.session.add(prf2)
            self.session.commit()
Beispiel #27
0
    def test_unique_enrollments(self):
        """Check if there is only one tuple with the same values"""

        with self.assertRaisesRegexp(IntegrityError, DUP_CHECK_ERROR):
            uid = UniqueIdentity(uuid='John Smith')
            self.session.add(uid)

            org = Organization(name='Example')
            self.session.add(org)

            rol1 = Enrollment(uidentity=uid, organization=org)
            rol2 = Enrollment(uidentity=uid, organization=org)

            self.session.add(rol1)
            self.session.add(rol2)
            self.session.commit()
    def test_filter_identities_no_strict(self):
        """Test if identities are filtered in no strict mode"""

        # Let's define some identities first
        jsmith = UniqueIdentity(uuid='jsmith')
        jsmith.identities = [
            Identity(name='John Smith',
                     email='*****@*****.**',
                     source='scm',
                     uuid='jsmith'),
            Identity(name='John Smith', source='scm', uuid='jsmith'),
            Identity(username='******', source='scm', uuid='jsmith'),
            Identity(email='jsmith@test', uuid='jsmith'),
            Identity(email='', source='scm', uuid='jsmith')
        ]

        jrae = UniqueIdentity(uuid='jrae')
        jrae.identities = [
            Identity(name='Jane Rae', source='scm', uuid='jrae'),
            Identity(name='Jane Rae Doe',
                     email='*****@*****.**',
                     source='mls',
                     uuid='jrae'),
            Identity(name='jrae', source='scm', uuid='jrae'),
            Identity(email='*****@*****.**', source='scm', uuid='jrae')
        ]

        matcher = EmailMatcher(strict=False)

        result = matcher.filter(jsmith)
        self.assertEqual(len(result), 2)

        fid = result[0]
        self.assertIsInstance(fid, EmailIdentity)
        self.assertEqual(fid.uuid, 'jsmith')
        self.assertEqual(fid.email, '*****@*****.**')

        fid = result[1]
        self.assertIsInstance(fid, EmailIdentity)
        self.assertEqual(fid.uuid, 'jsmith')
        self.assertEqual(fid.email, 'jsmith@test')

        result = matcher.filter(jrae)
        self.assertEqual(len(result), 2)

        fid = result[0]
        self.assertIsInstance(fid, EmailIdentity)
        self.assertEqual(fid.uuid, 'jrae')
        self.assertEqual(fid.email, '*****@*****.**')

        fid = result[1]
        self.assertIsInstance(fid, EmailIdentity)
        self.assertEqual(fid.uuid, 'jrae')
        self.assertEqual(fid.email, '*****@*****.**')
    def test_filter_identities(self):
        """Test if identities are filtered"""

        # Let's define some identities first
        jsmith = UniqueIdentity(uuid='jsmith')
        jsmith.identities = [Identity(name='John Smith', email='*****@*****.**', source='scm', uuid='jsmith'),
                             Identity(name='John Smith', source='scm', uuid='jsmith'),
                             Identity(username='******', source='scm', uuid='jsmith'),
                             Identity(email='', source='scm', uuid='jsmith')]

        jrae = UniqueIdentity(uuid='jrae')
        jrae.identities = [Identity(name='Jane Rae', source='scm', uuid='jrae'),
                           Identity(name='Jane Rae Doe', email='*****@*****.**', source='mls', uuid='jrae'),
                           Identity(name='jrae', source='scm', uuid='jrae'),
                           Identity(email='*****@*****.**', source='scm', uuid='jrae')]

        matcher = EmailNameMatcher()

        result = matcher.filter(jsmith)
        self.assertEqual(len(result), 2)

        fid = result[0]
        self.assertIsInstance(fid, EmailNameIdentity)
        self.assertEqual(fid.uuid, 'jsmith')
        self.assertEqual(fid.name, 'john smith')
        self.assertEqual(fid.email, '*****@*****.**')

        fid = result[1]
        self.assertIsInstance(fid, EmailNameIdentity)
        self.assertEqual(fid.uuid, 'jsmith')
        self.assertEqual(fid.name, 'john smith')
        self.assertEqual(fid.email, None)

        result = matcher.filter(jrae)
        self.assertEqual(len(result), 3)

        fid = result[0]
        self.assertIsInstance(fid, EmailNameIdentity)
        self.assertEqual(fid.uuid, 'jrae')
        self.assertEqual(fid.name, 'jane rae')
        self.assertEqual(fid.email, None)

        fid = result[1]
        self.assertIsInstance(fid, EmailNameIdentity)
        self.assertEqual(fid.uuid, 'jrae')
        self.assertEqual(fid.name, 'jane rae doe')
        self.assertEqual(fid.email, '*****@*****.**')

        fid = result[2]
        self.assertIsInstance(fid, EmailNameIdentity)
        self.assertEqual(fid.uuid, 'jrae')
        self.assertEqual(fid.name, None)
        self.assertEqual(fid.email, '*****@*****.**')
    def test_filter_identities(self):
        """Test if identities are filtered"""

        # Let's define some identities first
        jsmith = UniqueIdentity(uuid='jsmith')
        jsmith.identities = [
            Identity(name='John Smith',
                     email='*****@*****.**',
                     source='scm',
                     uuid='jsmith'),
            Identity(name='John Smith', source='scm', uuid='jsmith'),
            Identity(username='******', source='github', uuid='jsmith'),
            Identity(email='', source='scm', uuid='jsmith')
        ]

        jrae = UniqueIdentity(uuid='jrae')
        jrae.identities = [
            Identity(username='******', source='GitHub-API', uuid='jrae'),
            Identity(name='Jane Rae Doe',
                     email='*****@*****.**',
                     source='mls',
                     uuid='jrae'),
            Identity(username='******', source='github', uuid='jrae'),
            Identity(email='*****@*****.**', source='scm', uuid='jrae')
        ]

        matcher = GitHubMatcher()

        result = matcher.filter(jsmith)
        self.assertEqual(len(result), 1)

        fid = result[0]
        self.assertIsInstance(fid, GitHubUsernameIdentity)
        self.assertEqual(fid.uuid, 'jsmith')
        self.assertEqual(fid.username, 'jsmith')
        self.assertEqual(fid.source, 'github')

        result = matcher.filter(jrae)
        self.assertEqual(len(result), 2)

        fid = result[0]
        self.assertIsInstance(fid, GitHubUsernameIdentity)
        self.assertEqual(fid.uuid, 'jrae')
        self.assertEqual(fid.username, 'janerae')
        self.assertEqual(fid.source, 'GitHub-API')

        fid = result[1]
        self.assertIsInstance(fid, GitHubUsernameIdentity)
        self.assertEqual(fid.uuid, 'jrae')
        self.assertEqual(fid.username, 'jrae')
        self.assertEqual(fid.source, 'github')
    def test_match_with_blacklist(self):
        """Test match when there are entries in the blacklist"""

        jsmith = UniqueIdentity(uuid='jsmith')
        jsmith.identities = [Identity(name='John Smith', email='*****@*****.**', source='scm'),
                             Identity(name='John Smith', source='scm'),
                             Identity(username='******', source='scm'),
                             Identity(email='', source='scm')]

        john_smith = UniqueIdentity(uuid='js')
        john_smith.identities = [Identity(name='John Smith JR', username='******', source='scm'),
                                 Identity(username='******', source='scm'),
                                 Identity(name='Smith. J', source='mls'),
                                 Identity(name='Smith. J', email='*****@*****.**', source='mls')]

        jsmith_alt = UniqueIdentity(uuid='J. Smith')
        jsmith_alt.identities = [Identity(name='John Smith JR', username='******', source='alt'),
                                 Identity(name='John Smith', username='******', source='alt'),
                                 Identity(email='', source='alt'),
                                 Identity(email='jsmith', source='alt')]

        # Tests
        bl = [MatchingBlacklist(excluded='John Smith'),
              MatchingBlacklist(excluded='*****@*****.**')]

        matcher = EmailNameMatcher(blacklist=bl)

        result = matcher.match(jsmith, john_smith)
        self.assertEqual(result, False)

        result = matcher.match(john_smith, jsmith)
        self.assertEqual(result, False)

        # John Smith is blacklisted, so no match
        result = matcher.match(jsmith, jsmith_alt)
        self.assertEqual(result, False)

        result = matcher.match(jsmith_alt, jsmith)
        self.assertEqual(result, False)

        result = matcher.match(john_smith, jsmith_alt)
        self.assertEqual(result, True)

        result = matcher.match(jsmith_alt, john_smith)
        self.assertEqual(result, True)
    def test_filter_identities_with_blacklist_not_strict(self):
        """Test if identities are filtered when there is a blacklist and strict mode is False"""

        # Let's define some identities first
        jsmith = UniqueIdentity(uuid='jsmith')
        jsmith.identities = [
            Identity(name='John Smith',
                     email='jsmith_at_example.com',
                     source='scm',
                     uuid='jsmith'),
            Identity(name='John Smith', source='scm', uuid='jsmith'),
            Identity(username='******', source='scm', uuid='jsmith'),
            Identity(email='', source='scm', uuid='jsmith')
        ]

        jrae = UniqueIdentity(uuid='jrae')
        jrae.identities = [
            Identity(name='Jane Rae', source='scm', uuid='jrae'),
            Identity(name='Jane Rae Doe',
                     email='jane.rae_at_example.net',
                     source='mls',
                     uuid='jrae'),
            Identity(name='jrae', source='scm', uuid='jrae'),
            Identity(email='JRAE_at_example.net', source='scm', uuid='jrae')
        ]

        bl = [MatchingBlacklist(excluded='jrae_at_example.net')]

        matcher = EmailMatcher(blacklist=bl, strict=False)

        result = matcher.filter(jsmith)
        self.assertEqual(len(result), 1)

        fid = result[0]
        self.assertIsInstance(fid, EmailIdentity)
        self.assertEqual(fid.uuid, 'jsmith')
        self.assertEqual(fid.email, 'jsmith_at_example.com')

        result = matcher.filter(jrae)
        self.assertEqual(len(result), 1)

        fid = result[0]
        self.assertIsInstance(fid, EmailIdentity)
        self.assertEqual(fid.uuid, 'jrae')
        self.assertEqual(fid.email, 'jane.rae_at_example.net')
    def test_match(self):
        """Test match method"""

        # Let's define some identities first
        jsmith = UniqueIdentity(uuid='jsmith')
        jsmith.identities = [Identity(name='John Smith', email='*****@*****.**', source='scm'),
                             Identity(name='John Smith', source='scm'),
                             Identity(username='******', source='scm'),
                             Identity(email='', source='scm')]

        john_smith = UniqueIdentity(uuid='js')
        john_smith.identities = [Identity(name='J. Smith', username='******', source='scm'),
                                 Identity(username='******', source='scm'),
                                 Identity(name='Smith. J', source='mls'),
                                 Identity(name='Smith. J', email='*****@*****.**', source='mls')]

        jsmith_alt = UniqueIdentity(uuid='J. Smith')
        jsmith_alt.identities = [Identity(name='J. Smith', username='******', source='alt'),
                                 Identity(name='John Smith', username='******', source='alt'),
                                 Identity(email='', source='alt'),
                                 Identity(email='jsmith', source='alt')]

        # Tests
        matcher = UsernameMatcher()

        # First two unique does not produce any match
        result = matcher.match(jsmith, john_smith)
        self.assertEqual(result, False)

        result = matcher.match(john_smith, jsmith)
        self.assertEqual(result, False)

        # Comparing the third match with the second
        result = matcher.match(jsmith, jsmith_alt)
        self.assertEqual(result, True)

        result = matcher.match(jsmith_alt, jsmith)
        self.assertEqual(result, True)

        result = matcher.match(john_smith, jsmith_alt)
        self.assertEqual(result, True)

        result = matcher.match(jsmith_alt, john_smith)
        self.assertEqual(result, True)
    def test_match(self):
        """Test match method"""

        # Let's define some identities first
        jsmith = UniqueIdentity(uuid='jsmith')
        jsmith.identities = [Identity(name='John Smith', email='*****@*****.**', source='scm'),
                             Identity(name='John Smith', source='scm'),
                             Identity(username='******', source='scm'),
                             Identity(email='', source='scm')]

        john_smith = UniqueIdentity(uuid='js')
        john_smith.identities = [Identity(name='J. Smith', username='******', source='scm'),
                                 Identity(username='******', source='scm'),
                                 Identity(name='Smith. J', source='mls'),
                                 Identity(name='Smith. J', email='*****@*****.**', source='mls')]

        jsmith_alt = UniqueIdentity(uuid='J. Smith')
        jsmith_alt.identities = [Identity(name='J. Smith', username='******', source='alt'),
                                 Identity(name='John Smith', username='******', source='alt'),
                                 Identity(email='', source='alt'),
                                 Identity(email='jsmith', source='alt')]

        jsmith_not_email = UniqueIdentity(uuid='John Smith')
        jsmith_not_email.identities = [Identity(email='jsmith', source='mls')]

        jrae = UniqueIdentity(uuid='jrae')
        jrae.identities = [Identity(name='Jane Rae', source='scm'),
                           Identity(name='Jane Rae Doe', email='*****@*****.**', source='mls')]

        jrae_doe = UniqueIdentity(uuid='jraedoe')
        jrae_doe.identities = [Identity(name='Jane Rae Doe', email='*****@*****.**', source='mls'),
                               Identity(name='jrae', source='scm')]

        jrae_no_name = UniqueIdentity(uuid='Jane Rae')
        jrae_no_name.identities = [Identity(name='jrae', source='scm')]

        # Tests
        matcher = EmailNameMatcher()

        # First two unique identities must match
        result = matcher.match(jsmith, john_smith)
        self.assertEqual(result, True)

        result = matcher.match(john_smith, jsmith)
        self.assertEqual(result, True)

        # Comparing with the third only the first one
        # produces a match because of "John Smith" name
        result = matcher.match(jsmith, jsmith_alt)
        self.assertEqual(result, True)

        result = matcher.match(jsmith_alt, jsmith)
        self.assertEqual(result, True)

        result = matcher.match(john_smith, jsmith_alt)
        self.assertEqual(result, False)

        result = matcher.match(jsmith_alt, john_smith)
        self.assertEqual(result, False)

        # Jane Rae matches Jane Rae Doe because they share
        # the same name "Jane Rae Doe"
        result = matcher.match(jrae, jrae_doe)
        self.assertEqual(result, True)

        result = matcher.match(jrae, jrae_doe)
        self.assertEqual(result, True)

        # No match with Jane Rae
        result = matcher.match(jsmith, jrae)
        self.assertEqual(result, False)

        result = matcher.match(jsmith, jrae_doe)
        self.assertEqual(result, False)

        result = matcher.match(john_smith, jrae)
        self.assertEqual(result, False)

        result = matcher.match(john_smith, jrae_doe)
        self.assertEqual(result, False)

        result = matcher.match(jsmith_alt, jrae)
        self.assertEqual(result, False)

        result = matcher.match(jsmith_alt, jrae_doe)
        self.assertEqual(result, False)

        # This two unique identities have the same email address
        # but due to 'jsmith' is not a valid email address, they
        # do not match
        result = matcher.match(jsmith_alt, jsmith_not_email)
        self.assertEqual(result, False)

        # This two do not match although they share the same name.
        # In this case the name is invalid because is not formed
        # like "firstname lastname"
        result = matcher.match(jrae_doe, jrae_no_name)
        self.assertEqual(result, False)
    def test_match_with_blacklist(self):
        """Test match when there are entries in the blacklist"""

        # Let's define some identities first
        jsmith = UniqueIdentity(uuid='jsmith')
        jsmith.identities = [Identity(name='John Smith', email='*****@*****.**', source='scm'),
                             Identity(name='John Smith', source='scm'),
                             Identity(username='******', source='scm'),
                             Identity(email='', source='scm')]

        john_smith = UniqueIdentity(uuid='js')
        john_smith.identities = [Identity(name='J. Smith', username='******', source='scm'),
                                 Identity(username='******', source='scm'),
                                 Identity(name='Smith. J', source='mls'),
                                 Identity(name='Smith. J', email='*****@*****.**', source='mls')]

        jrae = UniqueIdentity(uuid='jrae')
        jrae.identities = [Identity(name='Jane Rae', source='scm', uuid='jrae'),
                           Identity(name='Jane Rae Doe', email='*****@*****.**', source='mls', uuid='jrae'),
                           Identity(name='jrae', source='scm', uuid='jrae'),
                           Identity(email='*****@*****.**', source='scm', uuid='jrae')]

        jane_rae = UniqueIdentity(uuid='Jane Rae')
        jane_rae.identities = [Identity(name='Jane Rae', source='scm', uuid='Jane Rae'),
                               Identity(email='*****@*****.**', source='mls', uuid='Jane Rae')]

        # Check matching
        matcher = EmailMatcher()

        # First two unique identities must match
        result = matcher.match(jsmith, john_smith)
        self.assertEqual(result, True)

        result = matcher.match(john_smith, jsmith)
        self.assertEqual(result, True)

        result = matcher.match(jrae, jane_rae)
        self.assertEqual(result, True)

        result = matcher.match(jane_rae, jrae)
        self.assertEqual(result, True)

        # Add a blacklist
        bl = [MatchingBlacklist(excluded='*****@*****.**'),
              MatchingBlacklist(excluded='*****@*****.**')]

        matcher = EmailMatcher(blacklist=bl)

        result = matcher.match(jsmith, john_smith)
        self.assertEqual(result, False)

        result = matcher.match(john_smith, jsmith)
        self.assertEqual(result, False)

        result = matcher.match(jrae, jane_rae)
        self.assertEqual(result, True)

        result = matcher.match(jane_rae, jrae)
        self.assertEqual(result, True)

        # In this case, no match will be found
        bl = [MatchingBlacklist(excluded='*****@*****.**'),
              MatchingBlacklist(excluded='*****@*****.**'),
              MatchingBlacklist(excluded='*****@*****.**')]

        matcher = EmailMatcher(blacklist=bl)

        result = matcher.match(jsmith, john_smith)
        self.assertEqual(result, False)

        result = matcher.match(john_smith, jsmith)
        self.assertEqual(result, False)

        result = matcher.match(jrae, jane_rae)
        self.assertEqual(result, False)

        result = matcher.match(jane_rae, jrae)
        self.assertEqual(result, False)
Beispiel #36
0
    def test_to_dict(self):
        """Test output of to_dict() method"""

        c = Country(code='US', name='United States of America', alpha3='USA')
        self.session.add(c)

        uid = UniqueIdentity(uuid='John Smith')
        self.session.add(uid)

        id1 = Identity(id='A',
                       name='John Smith',
                       email='*****@*****.**',
                       username='******',
                       source='scm',
                       uuid='John Smith')
        id2 = Identity(id='B',
                       name=None,
                       email='*****@*****.**',
                       username=None,
                       source='scm',
                       uuid='John Smith')

        self.session.add(id1)
        self.session.add(id2)
        self.session.commit()

        # Tests
        d = uid.to_dict()

        self.assertIsInstance(d, dict)
        self.assertEqual(d['uuid'], 'John Smith')

        self.assertEqual(d['profile'], None)

        identities = d['identities']
        self.assertEqual(len(identities), 2)

        d0 = d['identities'][0]
        self.assertEqual(d0['id'], 'A')
        self.assertEqual(d0['name'], 'John Smith')
        self.assertEqual(d0['email'], '*****@*****.**')
        self.assertEqual(d0['username'], 'jsmith')
        self.assertEqual(d0['source'], 'scm')
        self.assertEqual(d0['uuid'], 'John Smith')

        d1 = d['identities'][1]
        self.assertEqual(d1['id'], 'B')
        self.assertEqual(d1['name'], None)
        self.assertEqual(d1['email'], '*****@*****.**')
        self.assertEqual(d1['username'], None)
        self.assertEqual(d1['source'], 'scm')
        self.assertEqual(d1['uuid'], 'John Smith')

        prf = Profile(uuid='John Smith',
                      name='Smith, J.',
                      email='*****@*****.**',
                      is_bot=True,
                      country_code='US')

        # Add profile information
        self.session.add(prf)
        self.session.commit()

        d = uid.to_dict()

        dp = d['profile']
        self.assertEqual(dp['uuid'], 'John Smith')
        self.assertEqual(dp['name'], 'Smith, J.')
        self.assertEqual(dp['email'], '*****@*****.**')
        self.assertEqual(dp['is_bot'], True)
        self.assertEqual(dp['country']['code'], 'US')
        self.assertEqual(dp['country']['name'], 'United States of America')
    def test_match_with_blacklist(self):
        """Test match when there are entries in the blacklist"""

        # Let's define some identities first
        jsmith = UniqueIdentity(uuid='jsmith')
        jsmith.identities = [
            Identity(name='John Smith', username='******', source='scm'),
            Identity(name='John Smith', source='scm'),
            Identity(username='******', source='scm'),
            Identity(email='', source='scm')
        ]

        john_smith = UniqueIdentity(uuid='js')
        john_smith.identities = [
            Identity(name='J. Smith', email='*****@*****.**',
                     source='scm'),
            Identity(username='******', source='scm'),
            Identity(name='Smith. J', source='mls'),
            Identity(name='Smith. J', username='******', source='mls')
        ]

        jrae = UniqueIdentity(uuid='jrae')
        jrae.identities = [
            Identity(name='Jane Rae', source='scm', uuid='jrae'),
            Identity(name='Jane Rae Doe',
                     username='******',
                     email='*****@*****.**',
                     source='mls',
                     uuid='jrae'),
            Identity(name='jrae', source='scm', uuid='jrae'),
            Identity(email='*****@*****.**', source='scm', uuid='jrae')
        ]

        jane_rae = UniqueIdentity(uuid='Jane Rae')
        jane_rae.identities = [
            Identity(name='Jane Rae', source='scm', uuid='Jane Rae'),
            Identity(email='*****@*****.**',
                     username='******',
                     source='mls',
                     uuid='Jane Rae')
        ]

        # Check matching
        matcher = UsernameMatcher()

        # First two unique identities must match
        result = matcher.match(jsmith, john_smith)
        self.assertEqual(result, True)

        result = matcher.match(john_smith, jsmith)
        self.assertEqual(result, True)

        result = matcher.match(jrae, jane_rae)
        self.assertEqual(result, True)

        result = matcher.match(jane_rae, jrae)
        self.assertEqual(result, True)

        # Add a blacklist
        bl = [MatchingBlacklist(excluded='john_smith')]

        matcher = UsernameMatcher(blacklist=bl)

        result = matcher.match(jsmith, john_smith)
        self.assertEqual(result, False)

        result = matcher.match(john_smith, jsmith)
        self.assertEqual(result, False)

        result = matcher.match(jrae, jane_rae)
        self.assertEqual(result, True)

        result = matcher.match(jane_rae, jrae)
        self.assertEqual(result, True)

        # In this case, no match will be found
        bl = [
            MatchingBlacklist(excluded='John_Smith'),
            MatchingBlacklist(excluded='Jane.rae')
        ]

        matcher = UsernameMatcher(blacklist=bl)

        result = matcher.match(jsmith, john_smith)
        self.assertEqual(result, False)

        result = matcher.match(john_smith, jsmith)
        self.assertEqual(result, False)

        result = matcher.match(jrae, jane_rae)
        self.assertEqual(result, False)

        result = matcher.match(jane_rae, jrae)
        self.assertEqual(result, False)
    def test_match(self):
        """Test match method"""

        # Let's define some identities first
        jsmith = UniqueIdentity(uuid='jsmith')
        jsmith.identities = [
            Identity(name='John Smith',
                     email='*****@*****.**',
                     source='scm'),
            Identity(name='John Smith', source='scm'),
            Identity(username='******', source='scm'),
            Identity(email='', source='scm')
        ]

        john_smith = UniqueIdentity(uuid='js')
        john_smith.identities = [
            Identity(name='J. Smith', username='******', source='scm'),
            Identity(username='******', source='scm'),
            Identity(name='Smith. J', source='mls'),
            Identity(name='Smith. J', email='*****@*****.**', source='mls')
        ]

        jsmith_alt = UniqueIdentity(uuid='J. Smith')
        jsmith_alt.identities = [
            Identity(name='J. Smith', username='******', source='alt'),
            Identity(name='John Smith', username='******', source='alt'),
            Identity(email='', source='alt'),
            Identity(email='jsmith', source='alt')
        ]

        jsmith_not_email = UniqueIdentity(uuid='John Smith')
        jsmith_not_email.identities = [Identity(email='jsmith', source='mls')]

        jrae = UniqueIdentity(uuid='jrae')
        jrae.identities = [
            Identity(name='Jane Rae', source='scm'),
            Identity(name='Jane Rae Doe',
                     email='*****@*****.**',
                     source='mls')
        ]

        jrae_doe = UniqueIdentity(uuid='jraedoe')
        jrae_doe.identities = [
            Identity(name='Jane Rae Doe',
                     email='*****@*****.**',
                     source='mls'),
            Identity(name='jrae', source='scm')
        ]

        jrae_no_name = UniqueIdentity(uuid='Jane Rae')
        jrae_no_name.identities = [Identity(name='jrae', source='scm')]

        # Tests
        matcher = EmailNameMatcher()

        # First two unique identities must match
        result = matcher.match(jsmith, john_smith)
        self.assertEqual(result, True)

        result = matcher.match(john_smith, jsmith)
        self.assertEqual(result, True)

        # Comparing with the third only the first one
        # produces a match because of "John Smith" name
        result = matcher.match(jsmith, jsmith_alt)
        self.assertEqual(result, True)

        result = matcher.match(jsmith_alt, jsmith)
        self.assertEqual(result, True)

        result = matcher.match(john_smith, jsmith_alt)
        self.assertEqual(result, False)

        result = matcher.match(jsmith_alt, john_smith)
        self.assertEqual(result, False)

        # Jane Rae matches Jane Rae Doe because they share
        # the same name "Jane Rae Doe"
        result = matcher.match(jrae, jrae_doe)
        self.assertEqual(result, True)

        result = matcher.match(jrae, jrae_doe)
        self.assertEqual(result, True)

        # No match with Jane Rae
        result = matcher.match(jsmith, jrae)
        self.assertEqual(result, False)

        result = matcher.match(jsmith, jrae_doe)
        self.assertEqual(result, False)

        result = matcher.match(john_smith, jrae)
        self.assertEqual(result, False)

        result = matcher.match(john_smith, jrae_doe)
        self.assertEqual(result, False)

        result = matcher.match(jsmith_alt, jrae)
        self.assertEqual(result, False)

        result = matcher.match(jsmith_alt, jrae_doe)
        self.assertEqual(result, False)

        # This two unique identities have the same email address
        # but due to 'jsmith' is not a valid email address, they
        # do not match
        result = matcher.match(jsmith_alt, jsmith_not_email)
        self.assertEqual(result, False)

        # This two do not match although they share the same name.
        # In this case the name is invalid because is not formed
        # like "firstname lastname"
        result = matcher.match(jrae_doe, jrae_no_name)
        self.assertEqual(result, False)
Beispiel #39
0
    def test_to_dict(self):
        """Test output of to_dict() method"""

        c = Country(code='US', name='United States of America', alpha3='USA')
        self.session.add(c)

        uid = UniqueIdentity(uuid='John Smith')
        self.session.add(uid)

        id1 = Identity(id='A', name='John Smith', email='*****@*****.**',
                       username='******', source='scm', uuid='John Smith')
        id2 = Identity(id='B', name=None, email='*****@*****.**',
                       username=None, source='scm', uuid='John Smith')

        self.session.add(id1)
        self.session.add(id2)
        self.session.commit()

        # Tests
        d = uid.to_dict()

        self.assertIsInstance(d, dict)
        self.assertEqual(d['uuid'], 'John Smith')

        self.assertEqual(d['profile'], None)

        identities = d['identities']
        self.assertEqual(len(identities), 2)

        d0 = d['identities'][0]
        self.assertEqual(d0['id'], 'A')
        self.assertEqual(d0['name'], 'John Smith')
        self.assertEqual(d0['email'], '*****@*****.**')
        self.assertEqual(d0['username'], 'jsmith')
        self.assertEqual(d0['source'], 'scm')
        self.assertEqual(d0['uuid'], 'John Smith')

        d1 = d['identities'][1]
        self.assertEqual(d1['id'], 'B')
        self.assertEqual(d1['name'], None)
        self.assertEqual(d1['email'], '*****@*****.**')
        self.assertEqual(d1['username'], None)
        self.assertEqual(d1['source'], 'scm')
        self.assertEqual(d1['uuid'], 'John Smith')


        prf = Profile(uuid='John Smith', name='Smith, J.',
                      email='*****@*****.**', is_bot=True,
                      country_code='US')

        # Add profile information
        self.session.add(prf)
        self.session.commit()

        d = uid.to_dict()

        dp = d['profile']
        self.assertEqual(dp['uuid'], 'John Smith')
        self.assertEqual(dp['name'], 'Smith, J.')
        self.assertEqual(dp['email'], '*****@*****.**')
        self.assertEqual(dp['is_bot'], True)
        self.assertEqual(dp['country']['code'], 'US')
        self.assertEqual(dp['country']['name'], 'United States of America')