def add_sh_github_identity(user, user_field, rol): """ Add a new github identity to SH if it does not exists """ github_repo = None if GITHUB in item['origin']: github_repo = item['origin'].replace(GITHUB,'') github_repo = re.sub('.git$', '', github_repo) if not github_repo: return # Try to get the identity from SH user_data = item['data'][user_field] sh_identity = SortingHat.get_github_commit_username(self.sh_db, user, SH_GIT_COMMIT) if not sh_identity: # Get the usename from GitHub gh_username = self.get_github_login(user_data, rol, commit_hash, github_repo) # Create a new SH identity with name, email from git and username from github logging.debug("Adding new identity %s to SH %s: %s", gh_username, SH_GIT_COMMIT, user) user = self.get_sh_identity(user_data) user['username'] = gh_username SortingHat.add_identity(self.sh_db, user, SH_GIT_COMMIT) else: if user_data not in self.github_logins: self.github_logins[user_data] = sh_identity['username'] logging.debug("GitHub-commit exists. username:%s user:%s", sh_identity['username'], user_data)
def load_identities(ocean_backend, enrich_backend): try: from grimoire.elk.sortinghat import SortingHat except ImportError: logger.warning("SortingHat not available.") # First we add all new identities to SH items_count = 0 new_identities = [] for item in ocean_backend: items_count += 1 # Get identities from new items to be added to SortingHat identities = enrich_backend.get_identities(item) for identity in identities: if identity not in new_identities: new_identities.append(identity) if items_count % 100 == 0: logger.debug("Processed %i items identities (%i identities)", items_count, len(new_identities)) logger.debug("TOTAL ITEMS: %i", items_count) logger.info("Total new identities to be checked %i", len(new_identities)) SortingHat.add_identities(enrich_backend.sh_db, new_identities, enrich_backend.get_connector_name()) return items_count
def enrich_sortinghat(ocean_backend, enrich_backend): # First we add all new identities to SH item_count = 0 new_identities = [] for item in ocean_backend: item_count += 1 # Get identities from new items to be added to SortingHat identities = enrich_backend.get_identities(item) for identity in identities: if identity not in new_identities: new_identities.append(identity) if item_count % 1000 == 0: logging.debug("Processed %i items identities (%i identities)" \ % (item_count, len(new_identities))) logging.debug("TOTAL ITEMS: %i" % (item_count)) logging.info("Total new identities to be checked %i" % len(new_identities)) merged_identities = SortingHat.add_identities( enrich_backend.sh_db, new_identities, enrich_backend.get_connector_name()) # Redo enrich for items with new merged identities renrich_items = [] # For testing # merged_identities = ['7e0bcf6ff46848403eaffa29ef46109f386fa24b'] for mid in merged_identities: renrich_items += get_items_from_uuid(mid, enrich_backend, ocean_backend) # Enrich items with merged identities enrich_count_merged = enrich_items(renrich_items, enrich_backend) return enrich_count_merged
def enrich_sortinghat(backend_name, ocean_backend, enrich_backend): # First we add all new identities to SH item_count = 0 new_identities = [] for item in ocean_backend: item_count += 1 # Get identities from new items to be added to SortingHat identities = enrich_backend.get_identities(item) for identity in identities: if identity not in new_identities: new_identities.append(identity) if item_count % 1000 == 0: logging.debug("Processed %i items identities (%i identities)" \ % (item_count, len(new_identities))) logging.debug("TOTAL ITEMS: %i" % (item_count)) logging.info("Total new identities to be checked %i" % len(new_identities)) merged_identities = SortingHat.add_identities(enrich_backend.sh_db, new_identities, backend_name) # Redo enrich for items with new merged identities renrich_items = [] # For testing # merged_identities = ['7e0bcf6ff46848403eaffa29ef46109f386fa24b'] for mid in merged_identities: renrich_items += get_items_from_uuid(mid, enrich_backend, ocean_backend) # Enrich items with merged identities enrich_count_merged = enrich_items(renrich_items, enrich_backend) return enrich_count_merged
def get_uuid_from_id(self, sh_id): """ Get the SH identity uuid from the id """ return SortingHat.get_uuid_from_id(self.sh_db, sh_id)
enrich_backend.enrich_items(items) items = [] items.append(item) # Get identities from new items to be added to SortingHat identities = ocean_backend.get_identities(item) for identity in identities: if identity not in new_identities: new_identities.append(identity) items_count += 1 enrich_backend.enrich_items(items) logging.info("Total items enriched %i " % items_count) logging.info("Total new identities to be checked %i" % len(new_identities)) merged_identities = SortingHat.add_identities(new_identities, backend_name) # Redo enrich for items with new merged identities except KeyboardInterrupt: logging.info("\n\nReceived Ctrl-C or other break signal. Exiting.\n") logging.debug("Recovering cache") backend.cache.recover() sys.exit(0) total_time_min = (datetime.now() - app_init).total_seconds() / 60 logging.info("Finished in %.2f min" % (total_time_min))
enrich_backend.enrich_items(items) items = [] items.append(item) # Get identities from new items to be added to SortingHat identities = ocean_backend.get_identities(item) for identity in identities: if identity not in new_identities: new_identities.append(identity) items_count += 1 enrich_backend.enrich_items(items) logging.info("Total items enriched %i " % items_count) logging.info("Total new identities to be checked %i" % len(new_identities)) merged_identities = SortingHat.add_identities(new_identities, backend_name) # Redo enrich for items with new merged identities except KeyboardInterrupt: logging.info("\n\nReceived Ctrl-C or other break signal. Exiting.\n") logging.debug("Recovering cache") backend.cache.recover() sys.exit(0) total_time_min = (datetime.now()-app_init).total_seconds()/60 logging.info("Finished in %.2f min" % (total_time_min))
if __name__ == '__main__': app_init = datetime.now() args = get_params() config_logging(args.debug) if args.index is None: # Extract identities from all indexes pass else: logging.info("Extracting identities from: %s" % (args.index)) perceval_params = get_perceval_params(args.elastic_url, args.index) backend_name = perceval_params['backend'] connector = get_connector_from_name(backend_name) perceval_backend_class = connector[0] ocean_backend_class = connector[1] perceval_backend = None # Don't use perceval perceval_backend = perceval_backend_class(**perceval_params) obackend = ocean_backend_class(perceval_backend, incremental=False) obackend.set_elastic(get_elastic(args.elastic_url, args.index)) identities = get_identities(obackend) SortingHat.add_identities(identities, backend_name) # Add the identities to Sorting Hat print ("Total identities processed: %i" % (len(identities)))