def _load_test_dataset(self):
        # Add some domains
        api.add_organization(self.db, 'Example')
        api.add_domain(self.db, 'Example', 'example.com', is_top_domain=True)

        api.add_organization(self.db, 'Example Alt')
        api.add_domain(self.db, 'Example Alt', 'u.example.com', is_top_domain=True)
        api.add_domain(self.db, 'Example Alt', 'es.u.example.com')
        api.add_domain(self.db, 'Example Alt', 'en.u.example.com')

        api.add_organization(self.db, 'Bitergia')
        api.add_domain(self.db, 'Bitergia', 'bitergia.com')
        api.add_domain(self.db, 'Bitergia', 'bitergia.org')

        api.add_organization(self.db, 'LibreSoft')

        # Add some unique identities
        jsmith_uuid = api.add_identity(self.db, 'scm', '*****@*****.**',
                                       'John Smith', 'jsmith')
        api.add_identity(self.db, 'scm', '*****@*****.**', 'John Smith',
                         uuid=jsmith_uuid)
        api.add_identity(self.db, 'scm', '*****@*****.**', 'John Smith', 'jsmith',
                         uuid=jsmith_uuid)
        api.add_enrollment(self.db, jsmith_uuid, 'Bitergia')

        # Add John Doe identity
        api.add_identity(self.db, 'unknown', None, 'John Doe', 'jdoe')

        # Add Jane Rae identity
        jroe_uuid = api.add_identity(self.db, 'scm', '*****@*****.**',
                                     'Jane Roe', 'jroe')
        api.add_identity(self.db, 'scm', '*****@*****.**',
                         uuid=jroe_uuid)
        api.add_identity(self.db, 'unknown', '*****@*****.**',
                         uuid=jroe_uuid)
Beispiel #2
0
    def load_test_dataset(self):
        # Add country
        with self.db.connect() as session:
            # Add a country
            us = Country(code='US', name='United States of America', alpha3='USA')
            session.add(us)

        api.add_unique_identity(self.db, 'John Smith')
        api.add_identity(self.db, 'scm', '*****@*****.**',
                         uuid='John Smith')
        api.add_identity(self.db, 'scm', '*****@*****.**', 'John Smith',
                         uuid='John Smith')
        api.edit_profile(self.db, 'John Smith', name='John Smith', is_bot=False)

        api.add_unique_identity(self.db, 'John Doe')
        api.add_identity(self.db, 'scm', '*****@*****.**',
                         uuid='John Doe')
        api.edit_profile(self.db, 'John Doe', email='*****@*****.**', is_bot=True,
                         country_code='US')

        api.add_organization(self.db, 'Example')
        api.add_enrollment(self.db, 'John Smith', 'Example')
        api.add_enrollment(self.db, 'John Doe', 'Example')

        api.add_organization(self.db, 'Bitergia')
        api.add_enrollment(self.db, 'John Smith', 'Bitergia')
        api.add_enrollment(self.db, 'John Doe', 'Bitergia',
                           datetime.datetime(1999, 1, 1),
                           datetime.datetime(2000, 1, 1))

        api.add_organization(self.db, 'LibreSoft')
    def _load_test_dataset(self):
        import datetime

        self.db.clear()

        # Add country
        with self.db.connect() as session:
            # Add a country
            us = Country(code='US', name='United States of America', alpha3='USA')
            session.add(us)

        # Add organizations
        api.add_organization(self.db, 'Example')
        api.add_domain(self.db, 'Example', 'example.com', is_top_domain=True)
        api.add_domain(self.db, 'Example', 'example.net', is_top_domain=True)

        api.add_organization(self.db, 'Bitergia')
        api.add_domain(self.db, 'Bitergia', 'bitergia.net', is_top_domain=True)
        api.add_domain(self.db, 'Bitergia', 'bitergia.com', is_top_domain=True)
        api.add_domain(self.db, 'Bitergia', 'api.bitergia.com', is_top_domain=False)
        api.add_domain(self.db, 'Bitergia', 'test.bitergia.com', is_top_domain=False)

        api.add_organization(self.db, 'Unknown')

        # Add John Smith identity
        jsmith_uuid = api.add_identity(self.db, 'scm', '*****@*****.**',
                                       'John Smith', 'jsmith')
        api.add_identity(self.db, 'scm', '*****@*****.**', 'John Smith',
                         uuid=jsmith_uuid)
        api.edit_profile(self.db, jsmith_uuid, email='*****@*****.**', is_bot=True)

        # Add Joe Roe identity
        jroe_uuid = api.add_identity(self.db, 'scm', '*****@*****.**',
                                     'Jane Roe', 'jroe')
        api.add_identity(self.db, 'scm', '*****@*****.**',
                         uuid=jroe_uuid)
        api.add_identity(self.db, 'unknown', '*****@*****.**',
                         uuid=jroe_uuid)
        api.edit_profile(self.db, jroe_uuid, name='Jane Roe', email='*****@*****.**',
                         is_bot=False, country_code='US')

        # Add unique identity, this one won't have neither identities
        # nor enrollments
        api.add_unique_identity(self.db,
                                '0000000000000000000000000000000000000000')

        # Add enrollments
        api.add_enrollment(self.db, jsmith_uuid, 'Example')

        api.add_enrollment(self.db, jroe_uuid, 'Example')
        api.add_enrollment(self.db, jroe_uuid, 'Bitergia',
                           datetime.datetime(1999, 1, 1),
                           datetime.datetime(2000, 1, 1))
        api.add_enrollment(self.db, jroe_uuid, 'Bitergia',
                           datetime.datetime(2006, 1, 1),
                           datetime.datetime(2008, 1, 1))

        # Add blacklist
        api.add_to_matching_blacklist(self.db, '*****@*****.**')
        api.add_to_matching_blacklist(self.db, 'John Smith')
    def load_test_dataset(self):
        # Add some domains
        api.add_organization(self.db, 'Example')
        api.add_domain(self.db, 'Example', 'example.com', is_top_domain=True)

        api.add_organization(self.db, 'Example Alt')
        api.add_domain(self.db,
                       'Example Alt',
                       'u.example.com',
                       is_top_domain=True)
        api.add_domain(self.db, 'Example Alt', 'es.u.example.com')
        api.add_domain(self.db, 'Example Alt', 'en.u.example.com')

        api.add_organization(self.db, 'Bitergia')
        api.add_domain(self.db, 'Bitergia', 'bitergia.com')
        api.add_domain(self.db, 'Bitergia', 'bitergia.org')

        api.add_organization(self.db, 'LibreSoft')

        # Add some unique identities
        jsmith_uuid = api.add_identity(self.db, 'scm', '*****@*****.**',
                                       'John Smith', 'jsmith')
        api.add_identity(self.db,
                         'scm',
                         '*****@*****.**',
                         'John Smith',
                         uuid=jsmith_uuid)
        api.add_identity(self.db,
                         'scm',
                         '*****@*****.**',
                         'John Smith',
                         'jsmith',
                         uuid=jsmith_uuid)
        api.add_enrollment(self.db, jsmith_uuid, 'Bitergia')

        # Add John Doe identity
        api.add_identity(self.db, 'unknown', None, 'John Doe', 'jdoe')

        # Add Jane Rae identity
        jroe_uuid = api.add_identity(self.db, 'scm', '*****@*****.**',
                                     'Jane Roe', 'jroe')
        api.add_identity(self.db, 'scm', '*****@*****.**', uuid=jroe_uuid)
        api.add_identity(self.db,
                         'unknown',
                         '*****@*****.**',
                         uuid=jroe_uuid)

        # Add no valid email identity
        api.add_identity(self.db, 'test', 'novalidemail@')
Beispiel #5
0
    def add_identity(cls, db, identity, backend):
        """ Load and identity list from backend in Sorting Hat """
        uuid = None

        try:
            uuid = api.add_identity(db, backend, identity['email'],
                                    identity['name'], identity['username'])

            logger.debug("New sortinghat identity %s %s,%s,%s ", uuid,
                         identity['username'], identity['name'],
                         identity['email'])

            profile = {
                "name":
                identity['name'] if identity['name'] else identity['username'],
                "email":
                identity['email']
            }

            api.edit_profile(db, uuid, **profile)

        except AlreadyExistsError as ex:
            uuid = ex.eid
        except InvalidValueError as ex:
            logger.warning("Trying to add a None identity. Ignoring it.")
        except UnicodeEncodeError as ex:
            logger.warning("UnicodeEncodeError. Ignoring it. %s %s %s",
                           identity['email'], identity['name'],
                           identity['username'])
        except Exception as ex:
            logger.warning(
                "Unknown exception adding identity. Ignoring it. %s %s %s",
                identity['email'],
                identity['name'],
                identity['username'],
                exc_info=True)

        if 'company' in identity and identity['company'] is not None:
            try:
                api.add_organization(db, identity['company'])
                api.add_enrollment(db, uuid, identity['company'],
                                   datetime(1900, 1, 1), datetime(2100, 1, 1))
            except AlreadyExistsError:
                pass

        return uuid
Beispiel #6
0
    def load_test_dataset(self):
        self.db.clear()

        api.add_unique_identity(self.db, 'John Smith')
        api.add_unique_identity(self.db, 'John Doe')

        api.add_organization(self.db, 'Example')
        api.add_organization(self.db, 'Bitergia')

        api.add_enrollment(self.db, 'John Smith', 'Example')
        api.add_enrollment(self.db, 'John Doe', 'Example')

        api.add_enrollment(self.db, 'John Smith', 'Bitergia')
        api.add_enrollment(self.db, 'John Smith', 'Bitergia',
                           datetime.datetime(1999, 1, 1),
                           datetime.datetime(2000, 1, 1))
        api.add_enrollment(self.db, 'John Smith', 'Bitergia',
                           datetime.datetime(2006, 1, 1),
                           datetime.datetime(2008, 1, 1))
def enroll_identities(sh_db, identities, insert_orgs=False):
    for uuid in identities:
        for enrollment in identities[uuid]['enrollments']:
            try:
                if insert_orgs:
                    try:
                        api.add_organization(sh_db, enrollment[0])
                    except AlreadyExistsError:
                        pass

                api.add_enrollment(sh_db, uuid, enrollment[0],
                                   enrollment[1], enrollment[2])
                api.merge_enrollments(sh_db, uuid, enrollment[0])
            except (NotFoundError, ValueError), e:
                msg = "Error: %s - (%s, %s, %s, %s)" % (unicode(e), uuid, enrollment[0],
                                                        enrollment[1], enrollment[2])
                print msg.encode('UTF-8')
            except AlreadyExistsError, e:
                pass
Beispiel #8
0
def enroll_identities(sh_db, identities, insert_orgs=False):
    for uuid in identities:
        for enrollment in identities[uuid]['enrollments']:
            try:
                if insert_orgs:
                    try:
                        api.add_organization(sh_db, enrollment[0])
                    except AlreadyExistsError:
                        pass

                api.add_enrollment(sh_db, uuid, enrollment[0], enrollment[1],
                                   enrollment[2])
                api.merge_enrollments(sh_db, uuid, enrollment[0])
            except (NotFoundError, ValueError), e:
                msg = "Error: %s - (%s, %s, %s, %s)" % (unicode(
                    e), uuid, enrollment[0], enrollment[1], enrollment[2])
                print msg.encode('UTF-8')
            except AlreadyExistsError, e:
                pass
Beispiel #9
0
    def add_identity(cls, db, identity, backend):
        """ Load and identity list from backend in Sorting Hat """
        uuid = None

        try:
            uuid = api.add_identity(db, backend, identity['email'],
                                    identity['name'], identity['username'])

            logger.debug("New sortinghat identity %s %s,%s,%s ",
                         uuid, identity['username'], identity['name'], identity['email'])

            profile = {"name": identity['name'] if identity['name'] else identity['username'],
                       "email": identity['email']}

            api.edit_profile(db, uuid, **profile)

        except AlreadyExistsError as ex:
            uuid = ex.uuid
        except WrappedValueError as ex:
            logger.warning("Trying to add a None identity. Ignoring it.")
        except UnicodeEncodeError as ex:
            logger.warning("UnicodeEncodeError. Ignoring it. %s %s %s",
                           identity['email'], identity['name'],
                           identity['username'])
        except Exception as ex:
            logger.warning("Unknown exception adding identity. Ignoring it. %s %s %s",
                           identity['email'], identity['name'],
                           identity['username'])
            traceback.print_exc()

        if 'company' in identity and identity['company'] is not None:
            try:
                api.add_organization(db, identity['company'])
                api.add_enrollment(db, uuid, identity['company'],
                                   datetime(1900, 1, 1),
                                   datetime(2100, 1, 1))
            except AlreadyExistsError:
                pass

        return uuid
Beispiel #10
0
    def load_test_dataset(self):
        api.add_unique_identity(self.db, 'John Smith')
        api.add_unique_identity(self.db, 'John Doe')

        api.add_organization(self.db, 'Example')
        api.add_organization(self.db, 'Bitergia')

        api.add_enrollment(self.db, 'John Doe', 'Bitergia')
        api.add_enrollment(self.db, 'John Doe', 'Example',
                           datetime.datetime(1999, 1, 1),
                           datetime.datetime(2010, 1, 1))

        api.add_enrollment(self.db, 'John Smith', 'Example')
        api.add_enrollment(self.db, 'John Smith', 'Example',
                           datetime.datetime(1999, 1, 1),
                           datetime.datetime(2010, 1, 1))
        api.add_enrollment(self.db, 'John Smith', 'Example',
                           datetime.datetime(1981, 1, 1),
                           datetime.datetime(1990, 1, 1))
        api.add_enrollment(self.db, 'John Smith', 'Example',
                           datetime.datetime(1991, 1, 1),
                           datetime.datetime(1993, 1, 1))
Beispiel #11
0
    def test_reset(self):
        """Check if stored relationships and enrollments are removed before loading"""

        # These identities will be split and enrollments removed
        uuid = api.add_identity(self.db, 'unknown', email='*****@*****.**')
        api.add_identity(self.db,
                         source='scm',
                         email='*****@*****.**',
                         name='John Smith',
                         username='******',
                         uuid=uuid)

        api.add_organization(self.db, 'LibreSoft')
        api.add_enrollment(self.db, uuid, 'LibreSoft',
                           datetime.datetime(2000, 1, 1, 0, 0),
                           datetime.datetime(2100, 1, 1, 0, 0))

        parser = self.get_parser(datadir('sortinghat_valid.json'))

        code = self.cmd.import_identities(parser, reset=True)
        self.assertEqual(code, CMD_SUCCESS)

        # Check the contents of the registry
        uids = api.unique_identities(self.db)
        self.assertEqual(len(uids), 3)

        # Jane Roe
        uid = uids[0]
        self.assertEqual(uid.uuid, '17ab00ed3825ec2f50483e33c88df223264182ba')

        ids = self.sort_identities(uid.identities)
        self.assertEqual(len(ids), 3)

        id0 = ids[0]
        self.assertEqual(id0.id, '17ab00ed3825ec2f50483e33c88df223264182ba')
        self.assertEqual(id0.name, 'Jane Roe')
        self.assertEqual(id0.email, '*****@*****.**')
        self.assertEqual(id0.username, 'jroe')
        self.assertEqual(id0.source, 'scm')

        enrollments = api.enrollments(self.db, uid.uuid)
        self.assertEqual(len(enrollments), 3)

        # [email protected]
        uid = uids[1]
        self.assertEqual(uid.uuid, '2371a34a0ac65fbd9d631464ee41d583ec0e1e88')

        ids = self.sort_identities(uid.identities)
        self.assertEqual(len(ids), 1)

        id0 = ids[0]
        self.assertEqual(id0.id, '2371a34a0ac65fbd9d631464ee41d583ec0e1e88')
        self.assertEqual(id0.name, None)
        self.assertEqual(id0.email, '*****@*****.**')
        self.assertEqual(id0.username, None)
        self.assertEqual(id0.source, 'unknown')

        enrollments = api.enrollments(self.db, uid.uuid)
        self.assertEqual(len(enrollments), 0)

        # John Smith
        uid = uids[2]
        self.assertEqual(uid.uuid, 'a9b403e150dd4af8953a52a4bb841051e4b705d9')

        ids = self.sort_identities(uid.identities)
        self.assertEqual(len(ids), 2)

        id1 = ids[0]
        self.assertEqual(id1.id, '880b3dfcb3a08712e5831bddc3dfe81fc5d7b331')
        self.assertEqual(id1.name, 'John Smith')
        self.assertEqual(id1.email, '*****@*****.**')
        self.assertEqual(id1.username, None)
        self.assertEqual(id1.source, 'scm')

        id2 = ids[1]
        self.assertEqual(id2.id, 'a9b403e150dd4af8953a52a4bb841051e4b705d9')
        self.assertEqual(id2.name, 'John Smith')
        self.assertEqual(id2.email, '*****@*****.**')
        self.assertEqual(id2.username, 'jsmith')
        self.assertEqual(id2.source, 'scm')

        enrollments = api.enrollments(self.db, uid.uuid)
        self.assertEqual(len(enrollments), 1)
Beispiel #12
0
    def load_test_dataset(self):
        api.add_unique_identity(self.db, 'John Smith')
        api.add_unique_identity(self.db, 'John Doe')

        api.add_organization(self.db, 'Example')
        api.add_organization(self.db, 'Bitergia')

        api.add_enrollment(self.db, 'John Doe', 'Bitergia')
        api.add_enrollment(self.db, 'John Doe', 'Example',
                           datetime.datetime(1999, 1, 1),
                           datetime.datetime(2010, 1, 1))

        api.add_enrollment(self.db, 'John Smith', 'Example')
        api.add_enrollment(self.db, 'John Smith', 'Example',
                           datetime.datetime(1999, 1, 1),
                           datetime.datetime(2010, 1, 1))
        api.add_enrollment(self.db, 'John Smith', 'Example',
                           datetime.datetime(1981, 1, 1),
                           datetime.datetime(1990, 1, 1))
        api.add_enrollment(self.db, 'John Smith', 'Example',
                           datetime.datetime(1991, 1, 1),
                           datetime.datetime(1993, 1, 1))
Beispiel #13
0
    def add_identities(cls, db, identities, backend):
        """ Load identities list from backend in Sorting Hat """

        merge_identities = False

        logger.info("Adding the identities to SortingHat")
        if not merge_identities:
            logger.info("Not doing identities merge")

        total = 0
        lidentities = len(identities)


        if merge_identities:
            merged_identities = []  # old identities merged into new ones
            blacklist = api.blacklist(db)
            matching = 'email-name'  # Not active
            matcher = create_identity_matcher(matching, blacklist)

        for identity in identities:
            try:
                uuid = api.add_identity(db, backend, identity['email'],
                                        identity['name'], identity['username'])

                logger.debug("New sortinghat identity %s %s,%s,%s (%i/%i)",
                            uuid, identity['username'], identity['name'], identity['email'],
                            total, lidentities)

                profile = {"name": identity['name'] if identity['name'] else identity['username'],
                           "email": identity['email']}

                api.edit_profile(db, uuid, **profile)

                total += 1
                if not merge_identities:
                    continue  # Don't do the merge here. Too slow in large projects

                # Time to  merge
                matches = api.match_identities(db, uuid, matcher)

                if len(matches) > 1:
                    u = api.unique_identities(db, uuid)[0]
                    for m in matches:
                        # First add the old uuid to the list of changed by merge uuids
                        if m.uuid not in merged_identities:
                            merged_identities.append(m.uuid)
                        if m.uuid == uuid:
                            continue
                        # Merge matched identity into added identity
                        api.merge_unique_identities(db, m.uuid, u.uuid)
                        # uuid = m.uuid
                        # u = api.unique_identities(db, uuid, backend)[0]
                        # Include all identities related to this uuid
                        # merged_identities.append(m.uuid)

            except AlreadyExistsError as ex:
                uuid = ex.uuid
                continue
            except WrappedValueError as ex:
                logging.warning("Trying to add a None identity. Ignoring it.")
                continue
            except UnicodeEncodeError as ex:
                logging.warning("UnicodeEncodeError. Ignoring it. %s %s %s" % \
                                (identity['email'], identity['name'],
                                identity['username']))
                continue
            except Exception as ex:
                logging.warning("Unknown exception adding identity. Ignoring it. %s %s %s" % \
                                (identity['email'], identity['name'],
                                identity['username']))
                traceback.print_exc()
                continue

            if 'company' in identity and identity['company'] is not None:
                try:
                    api.add_organization(db, identity['company'])
                    api.add_enrollment(db, uuid, identity['company'],
                                       datetime(1900, 1, 1),
                                       datetime(2100, 1, 1))
                except AlreadyExistsError:
                    pass

        logger.info("Total NEW identities: %i" % (total))

        if merge_identities:
            logger.info("Total NEW identities merged: %i" % \
                        (len(merged_identities)))
            return merged_identities
        else:
            return []
Beispiel #14
0
    def test_reset(self):
        """Check if stored relationships and enrollments are removed before loading"""

        # These identities will be split and enrollments removed
        uuid = api.add_identity(self.db, 'unknown', email='*****@*****.**')
        api.add_identity(self.db, source='scm', email='*****@*****.**',
                         name='John Smith', username='******', uuid=uuid)

        api.add_organization(self.db, 'LibreSoft')
        api.add_enrollment(self.db, uuid, 'LibreSoft',
                           datetime.datetime(2000, 1, 1, 0, 0),
                           datetime.datetime(2100, 1, 1, 0, 0))

        parser = self.get_parser(datadir('sortinghat_valid.json'))

        code = self.cmd.import_identities(parser, reset=True)
        self.assertEqual(code, CMD_SUCCESS)

        # Check the contents of the registry
        uids = api.unique_identities(self.db)
        self.assertEqual(len(uids), 3)

        # Jane Roe
        uid = uids[0]
        self.assertEqual(uid.uuid, '17ab00ed3825ec2f50483e33c88df223264182ba')

        ids = self.sort_identities(uid.identities)
        self.assertEqual(len(ids), 3)

        id0 = ids[0]
        self.assertEqual(id0.id, '17ab00ed3825ec2f50483e33c88df223264182ba')
        self.assertEqual(id0.name, 'Jane Roe')
        self.assertEqual(id0.email, '*****@*****.**')
        self.assertEqual(id0.username, 'jroe')
        self.assertEqual(id0.source, 'scm')

        enrollments = api.enrollments(self.db, uid.uuid)
        self.assertEqual(len(enrollments), 3)

        # [email protected]
        uid = uids[1]
        self.assertEqual(uid.uuid, '2371a34a0ac65fbd9d631464ee41d583ec0e1e88')

        ids = self.sort_identities(uid.identities)
        self.assertEqual(len(ids), 1)

        id0 = ids[0]
        self.assertEqual(id0.id, '2371a34a0ac65fbd9d631464ee41d583ec0e1e88')
        self.assertEqual(id0.name, None)
        self.assertEqual(id0.email, '*****@*****.**')
        self.assertEqual(id0.username, None)
        self.assertEqual(id0.source, 'unknown')

        enrollments = api.enrollments(self.db, uid.uuid)
        self.assertEqual(len(enrollments), 0)

        # John Smith
        uid = uids[2]
        self.assertEqual(uid.uuid, 'a9b403e150dd4af8953a52a4bb841051e4b705d9')

        ids = self.sort_identities(uid.identities)
        self.assertEqual(len(ids), 2)

        id1 = ids[0]
        self.assertEqual(id1.id, '880b3dfcb3a08712e5831bddc3dfe81fc5d7b331')
        self.assertEqual(id1.name, 'John Smith')
        self.assertEqual(id1.email, '*****@*****.**')
        self.assertEqual(id1.username, None)
        self.assertEqual(id1.source, 'scm')

        id2 = ids[1]
        self.assertEqual(id2.id, 'a9b403e150dd4af8953a52a4bb841051e4b705d9')
        self.assertEqual(id2.name, 'John Smith')
        self.assertEqual(id2.email, '*****@*****.**')
        self.assertEqual(id2.username, 'jsmith')
        self.assertEqual(id2.source, 'scm')

        enrollments = api.enrollments(self.db, uid.uuid)
        self.assertEqual(len(enrollments), 1)
Beispiel #15
0
    def test_valid_identities_with_default_matching(self):
        """Check insertion, matching and merging of valid data"""

        # First, insert the identity that will match with one
        # from the file
        api.add_organization(self.db, 'Example')
        uuid = api.add_identity(self.db, 'unknown', email='*****@*****.**')
        api.add_enrollment(self.db, uuid, 'Example',
                           datetime.datetime(2000, 1, 1, 0, 0),
                           datetime.datetime(2100, 1, 1, 0, 0))
        api.edit_profile(self.db, uuid, name='John Smith', is_bot=False,
                         country_code='US')

        parser = self.get_parser(datadir('sortinghat_valid.json'))

        code = self.cmd.import_identities(parser, matching='default')
        self.assertEqual(code, CMD_SUCCESS)

        # Check the contents of the registry
        uids = api.unique_identities(self.db)
        self.assertEqual(len(uids), 2)

        # Jane Roe
        uid = uids[0]
        self.assertEqual(uid.uuid, '17ab00ed3825ec2f50483e33c88df223264182ba')

        prf = uid.profile
        self.assertEqual(prf.uuid, '17ab00ed3825ec2f50483e33c88df223264182ba')
        self.assertEqual(prf.name, 'Jane Roe')
        self.assertEqual(prf.email, '*****@*****.**')
        self.assertEqual(prf.gender, None)
        self.assertEqual(prf.gender_acc, None)
        self.assertEqual(prf.is_bot, False)
        self.assertEqual(prf.country_code, 'US')
        self.assertEqual(prf.country.alpha3, 'USA')
        self.assertEqual(prf.country.code, 'US')
        self.assertEqual(prf.country.name, 'United States of America')

        ids = self.sort_identities(uid.identities)
        self.assertEqual(len(ids), 3)

        id0 = ids[0]
        self.assertEqual(id0.id, '17ab00ed3825ec2f50483e33c88df223264182ba')
        self.assertEqual(id0.name, 'Jane Roe')
        self.assertEqual(id0.email, '*****@*****.**')
        self.assertEqual(id0.username, 'jroe')
        self.assertEqual(id0.source, 'scm')

        id1 = ids[1]
        self.assertEqual(id1.id, '22d1b20763c6f5822bdda8508957486c547bb9de')
        self.assertEqual(id1.name, None)
        self.assertEqual(id1.email, '*****@*****.**')
        self.assertEqual(id1.username, None)
        self.assertEqual(id1.source, 'unknown')

        id2 = ids[2]
        self.assertEqual(id2.id, '322397ed782a798ffd9d0bc7e293df4292fe075d')
        self.assertEqual(id2.name, None)
        self.assertEqual(id2.email, '*****@*****.**')
        self.assertEqual(id2.username, None)
        self.assertEqual(id2.source, 'scm')

        enrollments = api.enrollments(self.db, uid.uuid)
        self.assertEqual(len(enrollments), 3)

        # John Smith
        uid = uids[1]
        self.assertEqual(uid.uuid, '2371a34a0ac65fbd9d631464ee41d583ec0e1e88')

        ids = self.sort_identities(uid.identities)
        self.assertEqual(len(ids), 3)

        # The profile was merged
        prf = uid.profile
        self.assertEqual(prf.uuid, '2371a34a0ac65fbd9d631464ee41d583ec0e1e88')
        self.assertEqual(prf.name, 'John Smith')
        self.assertEqual(prf.email, '*****@*****.**')
        self.assertEqual(prf.gender, 'male')
        self.assertEqual(prf.gender_acc, 100)
        self.assertEqual(prf.is_bot, True)
        self.assertEqual(prf.country_code, 'US')
        self.assertEqual(prf.country.code, 'US')
        self.assertEqual(prf.country.name, 'United States of America')

        id0 = ids[0]
        self.assertEqual(id0.id, '2371a34a0ac65fbd9d631464ee41d583ec0e1e88')
        self.assertEqual(id0.name, None)
        self.assertEqual(id0.email, '*****@*****.**')
        self.assertEqual(id0.username, None)
        self.assertEqual(id0.source, 'unknown')

        id1 = ids[1]
        self.assertEqual(id1.id, '880b3dfcb3a08712e5831bddc3dfe81fc5d7b331')
        self.assertEqual(id1.name, 'John Smith')
        self.assertEqual(id1.email, '*****@*****.**')
        self.assertEqual(id1.username, None)
        self.assertEqual(id1.source, 'scm')

        id2 = ids[2]
        self.assertEqual(id2.id, 'a9b403e150dd4af8953a52a4bb841051e4b705d9')
        self.assertEqual(id2.name, 'John Smith')
        self.assertEqual(id2.email, '*****@*****.**')
        self.assertEqual(id2.username, 'jsmith')
        self.assertEqual(id2.source, 'scm')

        # Enrollments were merged
        enrollments = api.enrollments(self.db, uid.uuid)
        self.assertEqual(len(enrollments), 1)

        rol0 = enrollments[0]
        self.assertEqual(rol0.organization.name, 'Example')
        self.assertEqual(rol0.start, datetime.datetime(2000, 1, 1, 0, 0))
        self.assertEqual(rol0.end, datetime.datetime(2100, 1, 1, 0, 0))
Beispiel #16
0
    def test_valid_identities_with_default_matching(self):
        """Check insertion, matching and merging of valid data"""

        # First, insert the identity that will match with one
        # from the file
        api.add_organization(self.db, 'Example')
        uuid = api.add_identity(self.db, 'unknown', email='*****@*****.**')
        api.add_enrollment(self.db, uuid, 'Example',
                           datetime.datetime(2000, 1, 1, 0, 0),
                           datetime.datetime(2100, 1, 1, 0, 0))
        api.edit_profile(self.db,
                         uuid,
                         name='John Smith',
                         is_bot=False,
                         country_code='US')

        parser = self.get_parser('data/sortinghat_valid.json')

        code = self.cmd.import_identities(parser, matching='default')
        self.assertEqual(code, CMD_SUCCESS)

        # Check the contents of the registry
        uids = api.unique_identities(self.db)
        self.assertEqual(len(uids), 2)

        # John Smith
        uid = uids[0]
        self.assertEqual(uid.uuid, '23fe3a011190a27a7c5cf6f8925de38ff0994d8d')

        ids = self.sort_identities(uid.identities)
        self.assertEqual(len(ids), 3)

        # The profile was merged
        prf = uid.profile
        self.assertEqual(prf.uuid, '23fe3a011190a27a7c5cf6f8925de38ff0994d8d')
        self.assertEqual(prf.name, 'John Smith')
        self.assertEqual(prf.email, '*****@*****.**')
        self.assertEqual(prf.is_bot, True)
        self.assertEqual(prf.country_code, 'US')
        self.assertEqual(prf.country.code, 'US')
        self.assertEqual(prf.country.name, 'United States of America')

        id0 = ids[0]
        self.assertEqual(id0.id, '03e12d00e37fd45593c49a5a5a1652deca4cf302')
        self.assertEqual(id0.name, 'John Smith')
        self.assertEqual(id0.email, '*****@*****.**')
        self.assertEqual(id0.username, 'jsmith')
        self.assertEqual(id0.source, 'scm')

        id1 = ids[1]
        self.assertEqual(id1.id, '23fe3a011190a27a7c5cf6f8925de38ff0994d8d')
        self.assertEqual(id1.name, None)
        self.assertEqual(id1.email, '*****@*****.**')
        self.assertEqual(id1.username, None)
        self.assertEqual(id1.source, 'unknown')

        id2 = ids[2]
        self.assertEqual(id2.id, '75d95d6c8492fd36d24a18bd45d62161e05fbc97')
        self.assertEqual(id2.name, 'John Smith')
        self.assertEqual(id2.email, '*****@*****.**')
        self.assertEqual(id2.username, None)
        self.assertEqual(id2.source, 'scm')

        # Enrollments were merged
        enrollments = api.enrollments(self.db, uid.uuid)
        self.assertEqual(len(enrollments), 1)

        rol0 = enrollments[0]
        self.assertEqual(rol0.organization.name, 'Example')
        self.assertEqual(rol0.start, datetime.datetime(2000, 1, 1, 0, 0))
        self.assertEqual(rol0.end, datetime.datetime(2100, 1, 1, 0, 0))

        # Jane Roe
        uid = uids[1]
        self.assertEqual(uid.uuid, '52e0aa0a14826627e633fd15332988686b730ab3')

        prf = uid.profile
        self.assertEqual(prf.uuid, '52e0aa0a14826627e633fd15332988686b730ab3')
        self.assertEqual(prf.name, 'Jane Roe')
        self.assertEqual(prf.email, '*****@*****.**')
        self.assertEqual(prf.is_bot, False)
        self.assertEqual(prf.country_code, 'US')
        self.assertEqual(prf.country.alpha3, 'USA')
        self.assertEqual(prf.country.code, 'US')
        self.assertEqual(prf.country.name, 'United States of America')

        ids = self.sort_identities(uid.identities)
        self.assertEqual(len(ids), 3)

        id0 = ids[0]
        self.assertEqual(id0.id, '52e0aa0a14826627e633fd15332988686b730ab3')
        self.assertEqual(id0.name, 'Jane Roe')
        self.assertEqual(id0.email, '*****@*****.**')
        self.assertEqual(id0.username, 'jroe')
        self.assertEqual(id0.source, 'scm')

        id1 = ids[1]
        self.assertEqual(id1.id, 'cbfb7bd31d556322c640f5bc7b31d58a12b15904')
        self.assertEqual(id1.name, None)
        self.assertEqual(id1.email, '*****@*****.**')
        self.assertEqual(id1.username, None)
        self.assertEqual(id1.source, 'unknown')

        id2 = ids[2]
        self.assertEqual(id2.id, 'fef873c50a48cfc057f7aa19f423f81889a8907f')
        self.assertEqual(id2.name, None)
        self.assertEqual(id2.email, '*****@*****.**')
        self.assertEqual(id2.username, None)
        self.assertEqual(id2.source, 'scm')

        enrollments = api.enrollments(self.db, uid.uuid)
        self.assertEqual(len(enrollments), 3)
Beispiel #17
0
    def test_valid_identities_with_default_matching(self):
        """Check insertion, matching and merging of valid data"""

        # First, insert the identity that will match with one
        # from the file
        api.add_organization(self.db, 'Example')
        uuid = api.add_identity(self.db, 'unknown', email='*****@*****.**')
        api.add_enrollment(self.db, uuid, 'Example',
                           datetime.datetime(2000, 1, 1, 0, 0),
                           datetime.datetime(2100, 1, 1, 0, 0))
        api.edit_profile(self.db,
                         uuid,
                         name='John Smith',
                         is_bot=False,
                         country_code='US')

        parser = self.get_parser(datadir('sortinghat_valid.json'))

        code = self.cmd.import_identities(parser, matching='default')
        self.assertEqual(code, CMD_SUCCESS)

        # Check the contents of the registry
        uids = api.unique_identities(self.db)
        self.assertEqual(len(uids), 2)

        # Jane Roe
        uid = uids[0]
        self.assertEqual(uid.uuid, '17ab00ed3825ec2f50483e33c88df223264182ba')

        prf = uid.profile
        self.assertEqual(prf.uuid, '17ab00ed3825ec2f50483e33c88df223264182ba')
        self.assertEqual(prf.name, 'Jane Roe')
        self.assertEqual(prf.email, '*****@*****.**')
        self.assertEqual(prf.gender, None)
        self.assertEqual(prf.gender_acc, None)
        self.assertEqual(prf.is_bot, False)
        self.assertEqual(prf.country_code, 'US')
        self.assertEqual(prf.country.alpha3, 'USA')
        self.assertEqual(prf.country.code, 'US')
        self.assertEqual(prf.country.name, 'United States of America')

        ids = self.sort_identities(uid.identities)
        self.assertEqual(len(ids), 3)

        id0 = ids[0]
        self.assertEqual(id0.id, '17ab00ed3825ec2f50483e33c88df223264182ba')
        self.assertEqual(id0.name, 'Jane Roe')
        self.assertEqual(id0.email, '*****@*****.**')
        self.assertEqual(id0.username, 'jroe')
        self.assertEqual(id0.source, 'scm')

        id1 = ids[1]
        self.assertEqual(id1.id, '22d1b20763c6f5822bdda8508957486c547bb9de')
        self.assertEqual(id1.name, None)
        self.assertEqual(id1.email, '*****@*****.**')
        self.assertEqual(id1.username, None)
        self.assertEqual(id1.source, 'unknown')

        id2 = ids[2]
        self.assertEqual(id2.id, '322397ed782a798ffd9d0bc7e293df4292fe075d')
        self.assertEqual(id2.name, None)
        self.assertEqual(id2.email, '*****@*****.**')
        self.assertEqual(id2.username, None)
        self.assertEqual(id2.source, 'scm')

        enrollments = api.enrollments(self.db, uid.uuid)
        self.assertEqual(len(enrollments), 3)

        # John Smith
        uid = uids[1]
        self.assertEqual(uid.uuid, '2371a34a0ac65fbd9d631464ee41d583ec0e1e88')

        ids = self.sort_identities(uid.identities)
        self.assertEqual(len(ids), 3)

        # The profile was merged
        prf = uid.profile
        self.assertEqual(prf.uuid, '2371a34a0ac65fbd9d631464ee41d583ec0e1e88')
        self.assertEqual(prf.name, 'John Smith')
        self.assertEqual(prf.email, '*****@*****.**')
        self.assertEqual(prf.gender, 'male')
        self.assertEqual(prf.gender_acc, 100)
        self.assertEqual(prf.is_bot, True)
        self.assertEqual(prf.country_code, 'US')
        self.assertEqual(prf.country.code, 'US')
        self.assertEqual(prf.country.name, 'United States of America')

        id0 = ids[0]
        self.assertEqual(id0.id, '2371a34a0ac65fbd9d631464ee41d583ec0e1e88')
        self.assertEqual(id0.name, None)
        self.assertEqual(id0.email, '*****@*****.**')
        self.assertEqual(id0.username, None)
        self.assertEqual(id0.source, 'unknown')

        id1 = ids[1]
        self.assertEqual(id1.id, '880b3dfcb3a08712e5831bddc3dfe81fc5d7b331')
        self.assertEqual(id1.name, 'John Smith')
        self.assertEqual(id1.email, '*****@*****.**')
        self.assertEqual(id1.username, None)
        self.assertEqual(id1.source, 'scm')

        id2 = ids[2]
        self.assertEqual(id2.id, 'a9b403e150dd4af8953a52a4bb841051e4b705d9')
        self.assertEqual(id2.name, 'John Smith')
        self.assertEqual(id2.email, '*****@*****.**')
        self.assertEqual(id2.username, 'jsmith')
        self.assertEqual(id2.source, 'scm')

        # Enrollments were merged
        enrollments = api.enrollments(self.db, uid.uuid)
        self.assertEqual(len(enrollments), 1)

        rol0 = enrollments[0]
        self.assertEqual(rol0.organization.name, 'Example')
        self.assertEqual(rol0.start, datetime.datetime(2000, 1, 1, 0, 0))
        self.assertEqual(rol0.end, datetime.datetime(2100, 1, 1, 0, 0))
Beispiel #18
0
    def add_identities(cls, db, identities, backend):
        """ Load identities list from backend in Sorting Hat """

        merge_identities = False

        logger.info("Adding the identities to SortingHat")
        if not merge_identities:
            logger.info("Not doing identities merge")

        total = 0
        lidentities = len(identities)

        if merge_identities:
            merged_identities = []  # old identities merged into new ones
            blacklist = api.blacklist(db)
            matching = 'email-name'  # Not active
            matcher = create_identity_matcher(matching, blacklist)

        for identity in identities:
            try:
                uuid = api.add_identity(db, backend, identity['email'],
                                        identity['name'], identity['username'])

                logger.debug("New sortinghat identity %s %s,%s,%s (%i/%i)" % \
                            (uuid, identity['username'], identity['name'], identity['email'],
                            total, lidentities))

                total += 1
                if not merge_identities:
                    continue  # Don't do the merge here. Too slow in large projects

                # Time to  merge
                matches = api.match_identities(db, uuid, matcher)

                if len(matches) > 1:
                    u = api.unique_identities(db, uuid)[0]
                    for m in matches:
                        # First add the old uuid to the list of changed by merge uuids
                        if m.uuid not in merged_identities:
                            merged_identities.append(m.uuid)
                        if m.uuid == uuid:
                            continue
                        # Merge matched identity into added identity
                        api.merge_unique_identities(db, m.uuid, u.uuid)
                        # uuid = m.uuid
                        # u = api.unique_identities(db, uuid, backend)[0]
                        # Include all identities related to this uuid
                        # merged_identities.append(m.uuid)

            except AlreadyExistsError as ex:
                uuid = ex.uuid
                continue
            except WrappedValueError as ex:
                logging.warning("Trying to add a None identity. Ignoring it.")
                continue
            except UnicodeEncodeError as ex:
                logging.warning("UnicodeEncodeError. Ignoring it. %s %s %s" % \
                                (identity['email'], identity['name'],
                                identity['username']))
                continue
            except Exception as ex:
                logging.warning("Unknown exception adding identity. Ignoring it. %s %s %s" % \
                                (identity['email'], identity['name'],
                                identity['username']))
                continue

            if 'company' in identity and identity['company'] is not None:
                try:
                    api.add_organization(db, identity['company'])
                    api.add_enrollment(db, uuid, identity['company'],
                                       datetime(1900, 1, 1),
                                       datetime(2100, 1, 1))
                except AlreadyExistsError:
                    pass

        logger.info("Total NEW identities: %i" % (total))

        if merge_identities:
            logger.info("Total NEW identities merged: %i" % \
                        (len(merged_identities)))
            return merged_identities
        else:
            return []
    def test_valid_identities_with_default_matching(self):
        """Check insertion, matching and merging of valid data"""

        # First, insert the identity that will match with one
        # from the file
        api.add_organization(self.db, 'Example')
        uuid = api.add_identity(self.db, 'unknown', email='*****@*****.**')
        api.add_enrollment(self.db, uuid, 'Example',
                           datetime.datetime(2000, 1, 1, 0, 0),
                           datetime.datetime(2100, 1, 1, 0, 0))
        api.edit_profile(self.db, uuid, name='John Smith', is_bot=False,
                         country_code='US')

        parser = self.get_parser('data/sortinghat_valid.json')

        code = self.cmd.import_identities(parser, matching='default')
        self.assertEqual(code, CMD_SUCCESS)

        # Check the contents of the registry
        uids = api.unique_identities(self.db)
        self.assertEqual(len(uids), 2)

        # John Smith
        uid = uids[0]
        self.assertEqual(uid.uuid, '23fe3a011190a27a7c5cf6f8925de38ff0994d8d')

        ids = self.sort_identities(uid.identities)
        self.assertEqual(len(ids), 3)

        # The profile was merged
        prf = uid.profile
        self.assertEqual(prf.uuid, '23fe3a011190a27a7c5cf6f8925de38ff0994d8d')
        self.assertEqual(prf.name, 'John Smith')
        self.assertEqual(prf.email, '*****@*****.**')
        self.assertEqual(prf.is_bot, True)
        self.assertEqual(prf.country_code, 'US')
        self.assertEqual(prf.country.code, 'US')
        self.assertEqual(prf.country.name, 'United States of America')

        id0 = ids[0]
        self.assertEqual(id0.id, '03e12d00e37fd45593c49a5a5a1652deca4cf302')
        self.assertEqual(id0.name, 'John Smith')
        self.assertEqual(id0.email, '*****@*****.**')
        self.assertEqual(id0.username, 'jsmith')
        self.assertEqual(id0.source, 'scm')

        id1 = ids[1]
        self.assertEqual(id1.id, '23fe3a011190a27a7c5cf6f8925de38ff0994d8d')
        self.assertEqual(id1.name, None)
        self.assertEqual(id1.email, '*****@*****.**')
        self.assertEqual(id1.username, None)
        self.assertEqual(id1.source, 'unknown')

        id2 = ids[2]
        self.assertEqual(id2.id, '75d95d6c8492fd36d24a18bd45d62161e05fbc97')
        self.assertEqual(id2.name, 'John Smith')
        self.assertEqual(id2.email, '*****@*****.**')
        self.assertEqual(id2.username, None)
        self.assertEqual(id2.source, 'scm')

        # Enrollments were merged
        enrollments = api.enrollments(self.db, uid.uuid)
        self.assertEqual(len(enrollments), 1)

        rol0 = enrollments[0]
        self.assertEqual(rol0.organization.name, 'Example')
        self.assertEqual(rol0.start, datetime.datetime(2000, 1, 1, 0, 0))
        self.assertEqual(rol0.end, datetime.datetime(2100, 1, 1, 0, 0))

        # Jane Roe
        uid = uids[1]
        self.assertEqual(uid.uuid, '52e0aa0a14826627e633fd15332988686b730ab3')

        prf = uid.profile
        self.assertEqual(prf.uuid, '52e0aa0a14826627e633fd15332988686b730ab3')
        self.assertEqual(prf.name, 'Jane Roe')
        self.assertEqual(prf.email, '*****@*****.**')
        self.assertEqual(prf.is_bot, False)
        self.assertEqual(prf.country_code, 'US')
        self.assertEqual(prf.country.alpha3, 'USA')
        self.assertEqual(prf.country.code, 'US')
        self.assertEqual(prf.country.name, 'United States of America')

        ids = self.sort_identities(uid.identities)
        self.assertEqual(len(ids), 3)

        id0 = ids[0]
        self.assertEqual(id0.id, '52e0aa0a14826627e633fd15332988686b730ab3')
        self.assertEqual(id0.name, 'Jane Roe')
        self.assertEqual(id0.email, '*****@*****.**')
        self.assertEqual(id0.username, 'jroe')
        self.assertEqual(id0.source, 'scm')

        id1 = ids[1]
        self.assertEqual(id1.id, 'cbfb7bd31d556322c640f5bc7b31d58a12b15904')
        self.assertEqual(id1.name, None)
        self.assertEqual(id1.email, '*****@*****.**')
        self.assertEqual(id1.username, None)
        self.assertEqual(id1.source, 'unknown')

        id2 = ids[2]
        self.assertEqual(id2.id, 'fef873c50a48cfc057f7aa19f423f81889a8907f')
        self.assertEqual(id2.name, None)
        self.assertEqual(id2.email, '*****@*****.**')
        self.assertEqual(id2.username, None)
        self.assertEqual(id2.source, 'scm')

        enrollments = api.enrollments(self.db, uid.uuid)
        self.assertEqual(len(enrollments), 3)