class TestBaseCase(unittest.TestCase): """Defines common setup and teardown methods on countries unit tests""" def setUp(self): if not hasattr(sys.stdout, 'getvalue') and not hasattr(sys.stderr, 'getvalue'): self.fail('This test needs to be run in buffered mode') self.db = Database(DB_USER, DB_PASSWORD, DB_NAME, DB_HOST, DB_PORT) self._load_test_dataset() # Create command self.kwargs = {'user' : DB_USER, 'password' : DB_PASSWORD, 'database' : DB_NAME, 'host' : DB_HOST, 'port' : DB_PORT} self.cmd = Countries(**self.kwargs) def tearDown(self): self.db.clear() def _load_test_dataset(self): with self.db.connect() as session: us = Country(code='US', name='United States of America', alpha3='USA') es = Country(code='ES', name='Spain', alpha3='ESP') gb = Country(code='GB', name='United Kingdom', alpha3='GBR') session.add(es) session.add(us) session.add(gb)
class TestBaseCase(unittest.TestCase): """Defines common setup and teardown methods on countries unit tests""" def setUp(self): if not hasattr(sys.stdout, 'getvalue') and not hasattr( sys.stderr, 'getvalue'): self.fail('This test needs to be run in buffered mode') self.db = Database(DB_USER, DB_PASSWORD, DB_NAME, DB_HOST, DB_PORT) self._load_test_dataset() # Create command self.kwargs = { 'user': DB_USER, 'password': DB_PASSWORD, 'database': DB_NAME, 'host': DB_HOST, 'port': DB_PORT } self.cmd = Countries(**self.kwargs) def tearDown(self): self.db.clear() def _load_test_dataset(self): with self.db.connect() as session: us = Country(code='US', name='United States of America', alpha3='USA') es = Country(code='ES', name='Spain', alpha3='ESP') gb = Country(code='GB', name='United Kingdom', alpha3='GBR') session.add(es) session.add(us) session.add(gb)
class TestBaseCase(unittest.TestCase): """Defines common setup and teardown methods on profile unit tests""" def setUp(self): if not hasattr(sys.stdout, 'getvalue'): self.fail('This test needs to be run in buffered mode') # Create a connection to check the contents of the registry self.db = Database(DB_USER, DB_PASSWORD, DB_NAME, DB_HOST, DB_PORT) # Import predefined dataset for testing self._load_test_dataset() # Create command self.kwargs = {'user' : DB_USER, 'password' : DB_PASSWORD, 'database' :DB_NAME, 'host' : DB_HOST, 'port' : DB_PORT} self.cmd = Profile(**self.kwargs) def tearDown(self): self.db.clear() def _load_test_dataset(self): # Add country with self.db.connect() as session: # Add a country us = Country(code='US', name='United States of America', alpha3='USA') session.add(us) # Add identity api.add_identity(self.db, 'scm', '*****@*****.**', 'Jane Roe', 'jroe')
class TestBaseCase(unittest.TestCase): """Defines common setup and teardown methods on add unit tests""" def setUp(self): if not hasattr(sys.stdout, 'getvalue') and not hasattr(sys.stderr, 'getvalue'): self.fail('This test needs to be run in buffered mode') # Create a connection to check the contents of the registry self.db = Database(DB_USER, DB_PASSWORD, DB_NAME, DB_HOST, DB_PORT) self.db.clear() self._load_test_dataset() # Create command self.kwargs = {'user' : DB_USER, 'password' : DB_PASSWORD, 'database' :DB_NAME, 'host' : DB_HOST, 'port' : DB_PORT} self.cmd = Merge(**self.kwargs) def tearDown(self): self.db.clear() def _load_test_dataset(self): # Add country with self.db.connect() as session: # Add a country us = Country(code='US', name='United States of America', alpha3='USA') session.add(us) api.add_unique_identity(self.db, 'John Smith') api.add_identity(self.db, 'scm', '*****@*****.**', uuid='John Smith') api.add_identity(self.db, 'scm', '*****@*****.**', 'John Smith', uuid='John Smith') api.edit_profile(self.db, 'John Smith', name='John Smith', is_bot=False) api.add_unique_identity(self.db, 'John Doe') api.add_identity(self.db, 'scm', '*****@*****.**', uuid='John Doe') api.edit_profile(self.db, 'John Doe', email='*****@*****.**', is_bot=True, country_code='US') api.add_organization(self.db, 'Example') api.add_enrollment(self.db, 'John Smith', 'Example') api.add_enrollment(self.db, 'John Doe', 'Example') api.add_organization(self.db, 'Bitergia') api.add_enrollment(self.db, 'John Smith', 'Bitergia') api.add_enrollment(self.db, 'John Doe', 'Bitergia', datetime.datetime(1999, 1, 1), datetime.datetime(2000, 1, 1)) api.add_organization(self.db, 'LibreSoft')
class TestBaseCase(unittest.TestCase): """Defines common setup and teardown methods on show unit tests""" def setUp(self): if not hasattr(sys.stdout, 'getvalue'): self.fail('This test needs to be run in buffered mode') # Create a connection to check the contents of the registry self.db = Database(DB_USER, DB_PASSWORD, DB_NAME, DB_HOST, DB_PORT) self._load_test_dataset() # Create command self.kwargs = { 'user': DB_USER, 'password': DB_PASSWORD, 'database': DB_NAME, 'host': DB_HOST, 'port': DB_PORT } self.cmd = Load(**self.kwargs) def _load_test_dataset(self): # Add country with self.db.connect() as session: # Add a country us = Country(code='US', name='United States of America', alpha3='USA') session.add(us) def tearDown(self): self.db.clear() def get_parser(self, filename): if sys.version_info[0] >= 3: # Python 3 with open(filename, 'r', encoding='UTF-8') as f: content = f.read() else: # Python 2 with open(filename, 'r') as f: content = f.read().decode('UTF-8') return SortingHatParser(content) def sort_identities(self, ids): return sorted(ids, key=lambda x: x.id)
class TestBaseCase(unittest.TestCase): """Defines common setup and teardown methods on show unit tests""" def setUp(self): if not hasattr(sys.stdout, 'getvalue'): self.fail('This test needs to be run in buffered mode') # Create a connection to check the contents of the registry self.db = Database(DB_USER, DB_PASSWORD, DB_NAME, DB_HOST, DB_PORT) self._load_test_dataset() # Create command self.kwargs = {'user' : DB_USER, 'password' : DB_PASSWORD, 'database' : DB_NAME, 'host' : DB_HOST, 'port' : DB_PORT} self.cmd = Load(**self.kwargs) def _load_test_dataset(self): # Add country with self.db.connect() as session: # Add a country us = Country(code='US', name='United States of America', alpha3='USA') session.add(us) def tearDown(self): self.db.clear() def get_parser(self, filename): if sys.version_info[0] >= 3: # Python 3 with open(filename, 'r', encoding='UTF-8') as f: content = f.read() else: # Python 2 with open(filename, 'r') as f: content = f.read().decode('UTF-8') return SortingHatParser(content) def sort_identities(self, ids): return sorted(ids, key=lambda x: x.id)
class TestBaseCase(unittest.TestCase): """Defines common setup and teardown methods on profile unit tests""" def setUp(self): if not hasattr(sys.stdout, 'getvalue'): self.fail('This test needs to be run in buffered mode') # Create a connection to check the contents of the registry self.db = Database(DB_USER, DB_PASSWORD, DB_NAME, DB_HOST, DB_PORT) # Import predefined dataset for testing self._load_test_dataset() # Create command self.kwargs = { 'user': DB_USER, 'password': DB_PASSWORD, 'database': DB_NAME, 'host': DB_HOST, 'port': DB_PORT } self.cmd = Profile(**self.kwargs) def tearDown(self): self.db.clear() def _load_test_dataset(self): # Add country with self.db.connect() as session: # Add a country us = Country(code='US', name='United States of America', alpha3='USA') session.add(us) # Add identity api.add_identity(self.db, 'scm', '*****@*****.**', 'Jane Roe', 'jroe')
class TaskIdentitiesMerge(Task): """ Basic class shared by all Sorting Hat tasks """ def __init__(self, conf, load_orgs=True, load_ids=True, unify=True, autoprofile=True, affiliate=True, bots=True): super().__init__(conf) self.load_ids = load_ids # Load identities from raw index self.unify = unify # Unify identities self.autoprofile = autoprofile # Execute autoprofile self.affiliate = affiliate # Affiliate identities self.bots = bots # Mark bots in SH self.sh_kwargs = { 'user': self.db_user, 'password': self.db_password, 'database': self.db_sh, 'host': self.db_host, 'port': None } self.db = Database(**self.sh_kwargs) def is_backend_task(self): return False def __get_uuids_from_profile_name(self, profile_name): """ Get the uuid for a profile name """ uuids = [] with self.db.connect() as session: query = session.query(Profile).\ filter(Profile.name == profile_name) profiles = query.all() if profiles: for p in profiles: uuids.append(p.uuid) return uuids def __build_sh_command(self): cfg = self.config.get_conf() db_user = cfg['sortinghat']['user'] db_password = cfg['sortinghat']['password'] db_host = cfg['sortinghat']['host'] db_name = cfg['sortinghat']['database'] cmd = [ 'sortinghat', '-u', db_user, '-p', db_password, '--host', db_host, '-d', db_name ] return cmd def __execute_sh_command(self, cmd): logger.debug("Executing %s", cmd) proc = subprocess.Popen(cmd, stdout=subprocess.PIPE) outs, errs = proc.communicate() uuids = self.__get_uuids_to_refresh(outs.decode("utf8")) return_code = proc.returncode if return_code != 0: logger.error("[sortinghat] Error in command %s", cmd) uuids = [] return uuids def __get_uuids_to_refresh(self, data): """ Return the Sortinggat unique identifiers that must be refreshed after a unify and affiliate command Formats: Unique identity ab882b9c6f29837b263448aeb6eab1ec373d7688 merged on 75fc28ef4643de5323e89fb26e4e67c97b24f507 Unique identity 12deb94aa946193e28c2a933cbee4b338a928042 (acs_at_bitergia.com) affiliated to Bitergia """ if data is None: return None lines = data.split("\n") uuids = [] for line in lines: fields = line.split() if 'merged' in line: uuids.append(fields[2]) if fields[5] not in uuids: uuids.append(fields[5]) elif 'affiliated' in line: uuids.append(fields[2]) return uuids def do_affiliate(self): cmd = self.__build_sh_command() cmd += ['affiliate'] uuids = self.__execute_sh_command(cmd) return uuids def do_autoprofile(self, sources): cmd = self.__build_sh_command() cmd += ['autoprofile'] + sources self.__execute_sh_command(cmd) return None def do_unify(self, kwargs): cmd = self.__build_sh_command() cmd += ['unify', '--fast-matching', '-m', kwargs['matching']] if not kwargs['strict_mapping']: cmd += ['--no-strict-matching'] uuids = self.__execute_sh_command(cmd) return uuids def execute(self): # ** START SYNC LOGIC ** # Check that enrichment tasks are not active before loading identities while True: time.sleep( 10) # check each 10 seconds if the identities load could start with TasksManager.IDENTITIES_TASKS_ON_LOCK: with TasksManager.NUMBER_ENRICH_TASKS_ON_LOCK: enrich_tasks = TasksManager.NUMBER_ENRICH_TASKS_ON logger.debug("Enrich tasks active: %i", enrich_tasks) if enrich_tasks == 0: # The load of identities can be started TasksManager.IDENTITIES_TASKS_ON = True break # ** END SYNC LOGIC ** cfg = self.config.get_conf() uuids_refresh = [] if self.unify: for algo in cfg['sortinghat']['matching']: if not algo: # cfg['sortinghat']['matching'] is an empty list logger.debug('Unify not executed because empty algorithm') continue kwargs = { 'matching': algo, 'fast_matching': True, 'strict_mapping': cfg['sortinghat']['strict_mapping'] } logger.info( "[sortinghat] Unifying identities using algorithm %s", kwargs['matching']) uuids = self.do_unify(kwargs) uuids_refresh += uuids logger.debug("uuids to refresh from unify: %s", uuids) if self.affiliate: if not cfg['sortinghat']['affiliate']: logger.debug("Not doing affiliation") else: # Global enrollments using domains logger.info("[sortinghat] Executing affiliate") uuids = self.do_affiliate() uuids_refresh += uuids logger.debug("uuids to refresh from affiliate: %s", uuids) if self.autoprofile: # autoprofile = [] -> cfg['sortinghat']['autoprofile'][0] = [''] if ('autoprofile' not in cfg['sortinghat'] or not cfg['sortinghat']['autoprofile'][0]): logger.info( "[sortinghat] Autoprofile not configured. Skipping.") else: logger.info( "[sortinghat] Executing autoprofile for sources: %s", cfg['sortinghat']['autoprofile']) sources = cfg['sortinghat']['autoprofile'] self.do_autoprofile(sources) # The uuids must be refreshed in all backends (data sources) # Give 5s so the queue is filled and if not, continue without it try: autorefresh_backends_uuids = TasksManager.UPDATED_UUIDS_QUEUE.get( timeout=5) for backend_section in autorefresh_backends_uuids: autorefresh_backends_uuids[backend_section] += uuids_refresh TasksManager.UPDATED_UUIDS_QUEUE.put(autorefresh_backends_uuids) logger.debug( "Autorefresh uuids queue after processing identities: %s", autorefresh_backends_uuids) except Empty: logger.warning( "Autorefresh uuids not active because the queue for it is empty." ) if self.bots: if 'bots_names' not in cfg['sortinghat']: logger.info( "[sortinghat] Bots name list not configured. Skipping.") else: logger.info("[sortinghat] Marking bots: %s", cfg['sortinghat']['bots_names']) for name in cfg['sortinghat']['bots_names']: # First we need the uuids for the profile name uuids = self.__get_uuids_from_profile_name(name) # Then we can modify the profile setting bot flag profile = {"is_bot": True} for uuid in uuids: api.edit_profile(self.db, uuid, **profile) # For quitting the bot flag - debug feature if 'no_bots_names' in cfg['sortinghat']: logger.info("[sortinghat] Removing Marking bots: %s", cfg['sortinghat']['no_bots_names']) for name in cfg['sortinghat']['no_bots_names']: uuids = self.__get_uuids_from_profile_name(name) profile = {"is_bot": False} for uuid in uuids: api.edit_profile(self.db, uuid, **profile) # Autorefresh must be done once identities processing has finished # Give 5s so the queue is filled and if not, continue without it try: autorefresh_backends = TasksManager.AUTOREFRESH_QUEUE.get( timeout=5) for backend_section in autorefresh_backends: autorefresh_backends[backend_section] = True TasksManager.AUTOREFRESH_QUEUE.put(autorefresh_backends) logger.debug("Autorefresh queue after processing identities: %s", autorefresh_backends) except Empty: logger.warning( "Autorefresh not active because the queue for it is empty.") with TasksManager.IDENTITIES_TASKS_ON_LOCK: TasksManager.IDENTITIES_TASKS_ON = False
class TaskIdentitiesMerge(Task): """ Task for processing identities in SortingHat """ def __init__(self, conf): super().__init__(conf) self.db = Database(**self.sh_kwargs) self.last_autorefresh = datetime.utcnow() # Last autorefresh date def is_backend_task(self): return False def __get_uuids_from_profile_name(self, profile_name): """ Get the uuid for a profile name """ uuids = [] with self.db.connect() as session: query = session.query(Profile).\ filter(Profile.name == profile_name) profiles = query.all() if profiles: for p in profiles: uuids.append(p.uuid) return uuids def __build_sh_command(self): cfg = self.config.get_conf() db_user = cfg['sortinghat']['user'] db_password = cfg['sortinghat']['password'] db_host = cfg['sortinghat']['host'] db_name = cfg['sortinghat']['database'] cmd = [ 'sortinghat', '-u', db_user, '-p', db_password, '--host', db_host, '-d', db_name ] return cmd def __execute_sh_command(self, cmd): logger.debug("Executing %s", cmd) proc = subprocess.Popen(cmd, stdout=subprocess.PIPE) outs, errs = proc.communicate() return_code = proc.returncode if return_code != 0: logger.error("[sortinghat] Error in command %s", cmd) return return_code def do_affiliate(self): cmd = self.__build_sh_command() cmd += ['affiliate'] self.__execute_sh_command(cmd) return def do_autogender(self): cmd = self.__build_sh_command() cmd += ['autogender'] self.__execute_sh_command(cmd) return None def do_autoprofile(self, sources): cmd = self.__build_sh_command() cmd += ['autoprofile'] + sources self.__execute_sh_command(cmd) return None def do_unify(self, kwargs): cmd = self.__build_sh_command() cmd += ['unify', '--fast-matching', '-m', kwargs['matching']] if not kwargs['strict_mapping']: cmd += ['--no-strict-matching'] self.__execute_sh_command(cmd) return def execute(self): # ** START SYNC LOGIC ** # Check that enrichment tasks are not active before loading identities while True: time.sleep(1) # check each second if the task could start with TasksManager.IDENTITIES_TASKS_ON_LOCK: with TasksManager.NUMBER_ENRICH_TASKS_ON_LOCK: enrich_tasks = TasksManager.NUMBER_ENRICH_TASKS_ON logger.debug("[unify] Enrich tasks active: %i", enrich_tasks) if enrich_tasks == 0: # The load of identities can be started TasksManager.IDENTITIES_TASKS_ON = True break # ** END SYNC LOGIC ** cfg = self.config.get_conf() uuids_refresh = [] for algo in cfg['sortinghat']['matching']: if not algo: # cfg['sortinghat']['matching'] is an empty list logger.debug('Unify not executed because empty algorithm') continue kwargs = { 'matching': algo, 'fast_matching': True, 'strict_mapping': cfg['sortinghat']['strict_mapping'] } logger.info("[sortinghat] Unifying identities using algorithm %s", kwargs['matching']) self.do_unify(kwargs) if not cfg['sortinghat']['affiliate']: logger.debug("Not doing affiliation") else: # Global enrollments using domains logger.info("[sortinghat] Executing affiliate") self.do_affiliate() if 'autoprofile' not in cfg['sortinghat'] or \ not cfg['sortinghat']['autoprofile'][0]: logger.info("[sortinghat] Autoprofile not configured. Skipping.") else: logger.info("[sortinghat] Executing autoprofile for sources: %s", cfg['sortinghat']['autoprofile']) sources = cfg['sortinghat']['autoprofile'] self.do_autoprofile(sources) if 'autogender' not in cfg['sortinghat'] or \ not cfg['sortinghat']['autogender']: logger.info("[sortinghat] Autogender not configured. Skipping.") else: logger.info("[sortinghat] Executing autogender") self.do_autogender() if 'bots_names' not in cfg['sortinghat']: logger.info( "[sortinghat] Bots name list not configured. Skipping.") else: logger.info("[sortinghat] Marking bots: %s", cfg['sortinghat']['bots_names']) for name in cfg['sortinghat']['bots_names']: # First we need the uuids for the profile name uuids = self.__get_uuids_from_profile_name(name) # Then we can modify the profile setting bot flag profile = {"is_bot": True} for uuid in uuids: api.edit_profile(self.db, uuid, **profile) # For quitting the bot flag - debug feature if 'no_bots_names' in cfg['sortinghat']: logger.info("[sortinghat] Removing Marking bots: %s", cfg['sortinghat']['no_bots_names']) for name in cfg['sortinghat']['no_bots_names']: uuids = self.__get_uuids_from_profile_name(name) profile = {"is_bot": False} for uuid in uuids: api.edit_profile(self.db, uuid, **profile) with TasksManager.IDENTITIES_TASKS_ON_LOCK: TasksManager.IDENTITIES_TASKS_ON = False
class TestBaseCase(unittest.TestCase): """Defines common setup and teardown methods on show unit tests""" def setUp(self): if not hasattr(sys.stdout, 'getvalue'): self.fail('This test needs to be run in buffered mode') # Create a connection to check the contents of the registry self.db = Database(DB_USER, DB_PASSWORD, DB_NAME, DB_HOST, DB_PORT) # Import predefined dataset for testing self._load_test_dataset() # Create command self.kwargs = {'user' : DB_USER, 'password' : DB_PASSWORD, 'database' :DB_NAME, 'host' : DB_HOST, 'port' : DB_PORT} self.cmd = Show(**self.kwargs) def tearDown(self): self.db.clear() def _load_test_dataset(self): # Add country with self.db.connect() as session: # Add a country us = Country(code='US', name='United States of America', alpha3='USA') session.add(us) # Add organizations api.add_organization(self.db, 'Example') api.add_organization(self.db, 'Bitergia') # Add John Smith identity jsmith_uuid = api.add_identity(self.db, 'scm', '*****@*****.**', 'John Smith', 'jsmith') api.add_identity(self.db, 'scm', '*****@*****.**', 'John Smith', uuid=jsmith_uuid) api.edit_profile(self.db, jsmith_uuid, email='*****@*****.**', is_bot=True) # Add Joe Roe identity jroe_uuid = api.add_identity(self.db, 'scm', '*****@*****.**', 'Jane Roe', 'jroe') api.add_identity(self.db, 'scm', '*****@*****.**', uuid=jroe_uuid) api.add_identity(self.db, 'unknown', '*****@*****.**', uuid=jroe_uuid) api.edit_profile(self.db, jroe_uuid, name='Jane Roe', email='*****@*****.**', is_bot=False, country_code='US') # Add unique identity, this one won't have neither identities # nor enrollments api.add_unique_identity(self.db, '0000000000000000000000000000000000000000') # Add enrollments api.add_enrollment(self.db, jsmith_uuid, 'Example') api.add_enrollment(self.db, jroe_uuid, 'Example') api.add_enrollment(self.db, jroe_uuid, 'Bitergia', datetime.datetime(1999, 1, 1), datetime.datetime(2000, 1, 1)) api.add_enrollment(self.db, jroe_uuid, 'Bitergia', datetime.datetime(2006, 1, 1), datetime.datetime(2008, 1, 1))
class TaskIdentitiesMerge(Task): """ Task for processing identities in SortingHat """ def __init__(self, conf): super().__init__(conf) self.sh_kwargs = {'user': self.db_user, 'password': self.db_password, 'database': self.db_sh, 'host': self.db_host, 'port': None} self.db = Database(**self.sh_kwargs) self.last_autorefresh = datetime.utcnow() # Last autorefresh date def is_backend_task(self): return False def __get_uuids_from_profile_name(self, profile_name): """ Get the uuid for a profile name """ uuids = [] with self.db.connect() as session: query = session.query(Profile).\ filter(Profile.name == profile_name) profiles = query.all() if profiles: for p in profiles: uuids.append(p.uuid) return uuids def __build_sh_command(self): cfg = self.config.get_conf() db_user = cfg['sortinghat']['user'] db_password = cfg['sortinghat']['password'] db_host = cfg['sortinghat']['host'] db_name = cfg['sortinghat']['database'] cmd = ['sortinghat', '-u', db_user, '-p', db_password, '--host', db_host, '-d', db_name] return cmd def __execute_sh_command(self, cmd): logger.debug("Executing %s", cmd) proc = subprocess.Popen(cmd, stdout=subprocess.PIPE) outs, errs = proc.communicate() return_code = proc.returncode if return_code != 0: logger.error("[sortinghat] Error in command %s", cmd) return return_code def do_affiliate(self): cmd = self.__build_sh_command() cmd += ['affiliate'] self.__execute_sh_command(cmd) return def do_autogender(self): cmd = self.__build_sh_command() cmd += ['autogender'] self.__execute_sh_command(cmd) return None def do_autoprofile(self, sources): cmd = self.__build_sh_command() cmd += ['autoprofile'] + sources self.__execute_sh_command(cmd) return None def do_unify(self, kwargs): cmd = self.__build_sh_command() cmd += ['unify', '--fast-matching', '-m', kwargs['matching']] if not kwargs['strict_mapping']: cmd += ['--no-strict-matching'] self.__execute_sh_command(cmd) return def execute(self): # ** START SYNC LOGIC ** # Check that enrichment tasks are not active before loading identities while True: time.sleep(1) # check each second if the task could start with TasksManager.IDENTITIES_TASKS_ON_LOCK: with TasksManager.NUMBER_ENRICH_TASKS_ON_LOCK: enrich_tasks = TasksManager.NUMBER_ENRICH_TASKS_ON logger.debug("[unify] Enrich tasks active: %i", enrich_tasks) if enrich_tasks == 0: # The load of identities can be started TasksManager.IDENTITIES_TASKS_ON = True break # ** END SYNC LOGIC ** cfg = self.config.get_conf() uuids_refresh = [] for algo in cfg['sortinghat']['matching']: if not algo: # cfg['sortinghat']['matching'] is an empty list logger.debug('Unify not executed because empty algorithm') continue kwargs = {'matching': algo, 'fast_matching': True, 'strict_mapping': cfg['sortinghat']['strict_mapping']} logger.info("[sortinghat] Unifying identities using algorithm %s", kwargs['matching']) self.do_unify(kwargs) if not cfg['sortinghat']['affiliate']: logger.debug("Not doing affiliation") else: # Global enrollments using domains logger.info("[sortinghat] Executing affiliate") self.do_affiliate() if 'autoprofile' not in cfg['sortinghat'] or \ not cfg['sortinghat']['autoprofile'][0]: logger.info("[sortinghat] Autoprofile not configured. Skipping.") else: logger.info("[sortinghat] Executing autoprofile for sources: %s", cfg['sortinghat']['autoprofile']) sources = cfg['sortinghat']['autoprofile'] self.do_autoprofile(sources) if 'autogender' not in cfg['sortinghat'] or \ not cfg['sortinghat']['autogender']: logger.info("[sortinghat] Autogender not configured. Skipping.") else: logger.info("[sortinghat] Executing autogender") self.do_autogender() if 'bots_names' not in cfg['sortinghat']: logger.info("[sortinghat] Bots name list not configured. Skipping.") else: logger.info("[sortinghat] Marking bots: %s", cfg['sortinghat']['bots_names']) for name in cfg['sortinghat']['bots_names']: # First we need the uuids for the profile name uuids = self.__get_uuids_from_profile_name(name) # Then we can modify the profile setting bot flag profile = {"is_bot": True} for uuid in uuids: api.edit_profile(self.db, uuid, **profile) # For quitting the bot flag - debug feature if 'no_bots_names' in cfg['sortinghat']: logger.info("[sortinghat] Removing Marking bots: %s", cfg['sortinghat']['no_bots_names']) for name in cfg['sortinghat']['no_bots_names']: uuids = self.__get_uuids_from_profile_name(name) profile = {"is_bot": False} for uuid in uuids: api.edit_profile(self.db, uuid, **profile) with TasksManager.IDENTITIES_TASKS_ON_LOCK: TasksManager.IDENTITIES_TASKS_ON = False
class TaskIdentitiesMerge(Task): """ Basic class shared by all Sorting Hat tasks """ def __init__(self, conf, load_orgs=True, load_ids=True, unify=True, autoprofile=True, affiliate=True, bots=True): super().__init__(conf) self.load_ids = load_ids # Load identities from raw index self.unify = unify # Unify identities self.autoprofile = autoprofile # Execute autoprofile self.affiliate = affiliate # Affiliate identities self.bots = bots # Mark bots in SH self.sh_kwargs = { 'user': self.db_user, 'password': self.db_password, 'database': self.db_sh, 'host': self.db_host, 'port': None } self.db = Database(**self.sh_kwargs) def is_backend_task(self): return False def __get_uuids_from_profile_name(self, profile_name): """ Get the uuid for a profile name """ uuids = [] with self.db.connect() as session: query = session.query(Profile).\ filter(Profile.name == profile_name) profiles = query.all() if profiles: for p in profiles: uuids.append(p.uuid) return uuids def run(self): if self.unify: for algo in self.conf['sh_matching']: kwargs = {'matching': algo, 'fast_matching': True} logger.info( "[sortinghat] Unifying identities using algorithm %s", kwargs['matching']) code = Unify(**self.sh_kwargs).unify(**kwargs) if code != CMD_SUCCESS: logger.error("[sortinghat] Error in unify %s", kwargs) if self.affiliate: # Global enrollments using domains logger.info("[sortinghat] Executing affiliate") code = Affiliate(**self.sh_kwargs).affiliate() if code != CMD_SUCCESS: logger.error("[sortinghat] Error in affiliate %s", kwargs) if self.autoprofile: if not 'sh_autoprofile' in self.conf: logger.info( "[sortinghat] Autoprofile not configured. Skipping.") else: logger.info("[sortinghat] Executing autoprofile: %s", self.conf['sh_autoprofile']) sources = self.conf['sh_autoprofile'] code = AutoProfile(**self.sh_kwargs).autocomplete(sources) if code != CMD_SUCCESS: logger.error("Error in autoprofile %s", kwargs) if self.bots: if not 'sh_bots_names' in self.conf: logger.info( "[sortinghat] Bots name list not configured. Skipping.") else: logger.info("[sortinghat] Marking bots: %s", self.conf['sh_bots_names']) for name in self.conf['sh_bots_names']: # First we need the uuids for the profile name uuids = self.__get_uuids_from_profile_name(name) # Then we can modify the profile setting bot flag profile = {"is_bot": True} for uuid in uuids: api.edit_profile(self.db, uuid, **profile) # For quitting the bot flag - debug feature if 'sh_no_bots_names' in self.conf: logger.info("[sortinghat] Removing Marking bots: %s", self.conf['sh_no_bots_names']) for name in self.conf['sh_no_bots_names']: uuids = self.__get_uuids_from_profile_name(name) profile = {"is_bot": False} for uuid in uuids: api.edit_profile(self.db, uuid, **profile)
class TestBaseCase(unittest.TestCase): """Defines common setup and teardown methods on show unit tests""" def setUp(self): import tempfile if not hasattr(sys.stdout, 'getvalue'): self.fail('This test needs to be run in buffered mode') # Create a connection to check the contents of the registry self.db = Database(DB_USER, DB_PASSWORD, DB_NAME, DB_HOST, DB_PORT) # Import predefined dataset for testing self._load_test_dataset() # Temporary file for outputs self.tmpfile = tempfile.mkstemp()[1] # Create command self.kwargs = {'user' : DB_USER, 'password' : DB_PASSWORD, 'database' : DB_NAME, 'host' : DB_HOST, 'port' : DB_PORT} self.cmd = Export(**self.kwargs) def tearDown(self): import os self.db.clear() os.remove(self.tmpfile) def read_json(self, filename): if sys.version_info[0] >= 3: # Python 3 with open(filename, 'r', encoding='UTF-8') as f: content = f.read() else: # Python 2 with open(filename, 'r') as f: content = f.read().decode('UTF-8') obj = json.loads(content) return obj def _load_test_dataset(self): import datetime self.db.clear() # Add country with self.db.connect() as session: # Add a country us = Country(code='US', name='United States of America', alpha3='USA') session.add(us) # Add organizations api.add_organization(self.db, 'Example') api.add_domain(self.db, 'Example', 'example.com', is_top_domain=True) api.add_domain(self.db, 'Example', 'example.net', is_top_domain=True) api.add_organization(self.db, 'Bitergia') api.add_domain(self.db, 'Bitergia', 'bitergia.net', is_top_domain=True) api.add_domain(self.db, 'Bitergia', 'bitergia.com', is_top_domain=True) api.add_domain(self.db, 'Bitergia', 'api.bitergia.com', is_top_domain=False) api.add_domain(self.db, 'Bitergia', 'test.bitergia.com', is_top_domain=False) api.add_organization(self.db, 'Unknown') # Add John Smith identity jsmith_uuid = api.add_identity(self.db, 'scm', '*****@*****.**', 'John Smith', 'jsmith') api.add_identity(self.db, 'scm', '*****@*****.**', 'John Smith', uuid=jsmith_uuid) api.edit_profile(self.db, jsmith_uuid, email='*****@*****.**', is_bot=True) # Add Joe Roe identity jroe_uuid = api.add_identity(self.db, 'scm', '*****@*****.**', 'Jane Roe', 'jroe') api.add_identity(self.db, 'scm', '*****@*****.**', uuid=jroe_uuid) api.add_identity(self.db, 'unknown', '*****@*****.**', uuid=jroe_uuid) api.edit_profile(self.db, jroe_uuid, name='Jane Roe', email='*****@*****.**', is_bot=False, country_code='US') # Add unique identity, this one won't have neither identities # nor enrollments api.add_unique_identity(self.db, '0000000000000000000000000000000000000000') # Add enrollments api.add_enrollment(self.db, jsmith_uuid, 'Example') api.add_enrollment(self.db, jroe_uuid, 'Example') api.add_enrollment(self.db, jroe_uuid, 'Bitergia', datetime.datetime(1999, 1, 1), datetime.datetime(2000, 1, 1)) api.add_enrollment(self.db, jroe_uuid, 'Bitergia', datetime.datetime(2006, 1, 1), datetime.datetime(2008, 1, 1)) # Add blacklist api.add_to_matching_blacklist(self.db, '*****@*****.**') api.add_to_matching_blacklist(self.db, 'John Smith')
class TestBaseCase(unittest.TestCase): """Defines common setup and teardown methods on show unit tests""" def setUp(self): if not hasattr(sys.stdout, 'getvalue'): self.fail('This test needs to be run in buffered mode') # Create a connection to check the contents of the registry self.db = Database(DB_USER, DB_PASSWORD, DB_NAME, DB_HOST, DB_PORT) # Import predefined dataset for testing self._load_test_dataset() # Create command self.kwargs = { 'user': DB_USER, 'password': DB_PASSWORD, 'database': DB_NAME, 'host': DB_HOST, 'port': DB_PORT } self.cmd = Show(**self.kwargs) def tearDown(self): self.db.clear() def _load_test_dataset(self): # Add country with self.db.connect() as session: # Add a country us = Country(code='US', name='United States of America', alpha3='USA') session.add(us) # Add organizations api.add_organization(self.db, 'Example') api.add_organization(self.db, 'Bitergia') # Add John Smith identity jsmith_uuid = api.add_identity(self.db, 'scm', '*****@*****.**', 'John Smith', 'jsmith') api.add_identity(self.db, 'scm', '*****@*****.**', 'John Smith', uuid=jsmith_uuid) api.edit_profile(self.db, jsmith_uuid, email='*****@*****.**', is_bot=True) # Add Joe Roe identity jroe_uuid = api.add_identity(self.db, 'scm', '*****@*****.**', 'Jane Roe', 'jroe') api.add_identity(self.db, 'scm', '*****@*****.**', uuid=jroe_uuid) api.add_identity(self.db, 'unknown', '*****@*****.**', uuid=jroe_uuid) api.edit_profile(self.db, jroe_uuid, name='Jane Roe', email='*****@*****.**', is_bot=False, country_code='US') # Add unique identity, this one won't have neither identities # nor enrollments api.add_unique_identity(self.db, '0000000000000000000000000000000000000000') # Add enrollments api.add_enrollment(self.db, jsmith_uuid, 'Example') api.add_enrollment(self.db, jroe_uuid, 'Example') api.add_enrollment(self.db, jroe_uuid, 'Bitergia', datetime.datetime(1999, 1, 1), datetime.datetime(2000, 1, 1)) api.add_enrollment(self.db, jroe_uuid, 'Bitergia', datetime.datetime(2006, 1, 1), datetime.datetime(2008, 1, 1))
class TestBaseCase(unittest.TestCase): """Defines common setup and teardown methods on add unit tests""" def setUp(self): if not hasattr(sys.stdout, 'getvalue') and not hasattr( sys.stderr, 'getvalue'): self.fail('This test needs to be run in buffered mode') # Create a connection to check the contents of the registry self.db = Database(DB_USER, DB_PASSWORD, DB_NAME, DB_HOST, DB_PORT) self.db.clear() self._load_test_dataset() # Create command self.kwargs = { 'user': DB_USER, 'password': DB_PASSWORD, 'database': DB_NAME, 'host': DB_HOST, 'port': DB_PORT } self.cmd = Merge(**self.kwargs) def tearDown(self): self.db.clear() def _load_test_dataset(self): # Add country with self.db.connect() as session: # Add a country us = Country(code='US', name='United States of America', alpha3='USA') session.add(us) api.add_unique_identity(self.db, 'John Smith') api.add_identity(self.db, 'scm', '*****@*****.**', uuid='John Smith') api.add_identity(self.db, 'scm', '*****@*****.**', 'John Smith', uuid='John Smith') api.edit_profile(self.db, 'John Smith', name='John Smith', is_bot=False) api.add_unique_identity(self.db, 'John Doe') api.add_identity(self.db, 'scm', '*****@*****.**', uuid='John Doe') api.edit_profile(self.db, 'John Doe', email='*****@*****.**', is_bot=True, country_code='US') api.add_organization(self.db, 'Example') api.add_enrollment(self.db, 'John Smith', 'Example') api.add_enrollment(self.db, 'John Doe', 'Example') api.add_organization(self.db, 'Bitergia') api.add_enrollment(self.db, 'John Smith', 'Bitergia') api.add_enrollment(self.db, 'John Doe', 'Bitergia', datetime.datetime(1999, 1, 1), datetime.datetime(2000, 1, 1)) api.add_organization(self.db, 'LibreSoft')
def main(): """ Read emails and look for uuids. """ # Parse args args = parse_args() # Read config file parser = configparser.ConfigParser() parser.read('.settings') section = parser['SortingHat'] db_user = section['db_user'] db_password = section['password'] db_name = section['db_name'] db_host = section['host'] db_port = section['port'] db = Database(db_user, db_password, db_name, db_host, db_port) # Get email blacklist from SH print('Reading email blacklist from SH') blacklist = sortinghat.api.blacklist(db) email_blacklist = [] for identity in blacklist: email_blacklist.append(identity.excluded) with db.connect() as session: print('Searching for E-Mails in SH...') query = session.query(UniqueIdentity) query = query.filter(Identity.source == 'git', UniqueIdentity.uuid == Identity.uuid) uidentities = query.order_by(UniqueIdentity.uuid).all() print(len(uidentities), ' entities read from SH') print('Creating E-Mails dict...') email_dict = {} dups = 0 for uidentity in uidentities: for identity in uidentity.identities: if identity.email is None or identity.email == 'none@none' \ or identity.email == '' or identity.email == 'unknown' \ or identity.email in email_blacklist: continue if identity.email in email_dict: if identity.uuid != email_dict[identity.email]: dups += 1 email_dict[identity.email] = identity.uuid print('Done! Entities in emails dict: ', len(email_dict), ' Dups: ', dups) email_list = read_emails(args.input) print(len(email_dict), ' emails read from file') # Find UUIDS matches = {} uuids = set() dups_in_csv = 0 not_found_count = 0 for email in email_list: if email in matches: dups_in_csv += 1 #print("Duplicated email in list:", email) elif email in email_dict: matches[email] = email_dict[email] uuids.add(email_dict[email]) else: #print('Not Found: E-Mail:', email) not_found_count += 1 print('dups in csv:', dups_in_csv) print('Not found:', not_found_count) print('Found : ', len(matches)) print('Found (unique): ', len(uuids)) # Export results print('Writing results...') csv_array = [] for email, uuid in matches.items(): csv_array.append({'email': email, 'uuid': uuid}) fieldnames = ['email', 'uuid'] with open(args.output, 'w') as csv_out: csvwriter = csv.DictWriter(csv_out, delimiter=',', fieldnames=fieldnames) csvwriter.writeheader() for row in csv_array: csvwriter.writerow(row) print('Results wrote to file ', args.output)