def load_identities(ocean_backend, enrich_backend): try: from grimoire.elk.sortinghat import SortingHat except ImportError: logger.warning("SortingHat not available.") # First we add all new identities to SH items_count = 0 new_identities = [] for item in ocean_backend: items_count += 1 # Get identities from new items to be added to SortingHat identities = enrich_backend.get_identities(item) for identity in identities: if identity not in new_identities: new_identities.append(identity) if items_count % 100 == 0: logger.debug("Processed %i items identities (%i identities)", items_count, len(new_identities)) logger.debug("TOTAL ITEMS: %i", items_count) logger.info("Total new identities to be checked %i", len(new_identities)) SortingHat.add_identities(enrich_backend.sh_db, new_identities, enrich_backend.get_connector_name()) return items_count
def enrich_sortinghat(ocean_backend, enrich_backend): # First we add all new identities to SH item_count = 0 new_identities = [] for item in ocean_backend: item_count += 1 # Get identities from new items to be added to SortingHat identities = enrich_backend.get_identities(item) for identity in identities: if identity not in new_identities: new_identities.append(identity) if item_count % 1000 == 0: logging.debug("Processed %i items identities (%i identities)" \ % (item_count, len(new_identities))) logging.debug("TOTAL ITEMS: %i" % (item_count)) logging.info("Total new identities to be checked %i" % len(new_identities)) merged_identities = SortingHat.add_identities( enrich_backend.sh_db, new_identities, enrich_backend.get_connector_name()) # Redo enrich for items with new merged identities renrich_items = [] # For testing # merged_identities = ['7e0bcf6ff46848403eaffa29ef46109f386fa24b'] for mid in merged_identities: renrich_items += get_items_from_uuid(mid, enrich_backend, ocean_backend) # Enrich items with merged identities enrich_count_merged = enrich_items(renrich_items, enrich_backend) return enrich_count_merged
def enrich_sortinghat(backend_name, ocean_backend, enrich_backend): # First we add all new identities to SH item_count = 0 new_identities = [] for item in ocean_backend: item_count += 1 # Get identities from new items to be added to SortingHat identities = enrich_backend.get_identities(item) for identity in identities: if identity not in new_identities: new_identities.append(identity) if item_count % 1000 == 0: logging.debug("Processed %i items identities (%i identities)" \ % (item_count, len(new_identities))) logging.debug("TOTAL ITEMS: %i" % (item_count)) logging.info("Total new identities to be checked %i" % len(new_identities)) merged_identities = SortingHat.add_identities(enrich_backend.sh_db, new_identities, backend_name) # Redo enrich for items with new merged identities renrich_items = [] # For testing # merged_identities = ['7e0bcf6ff46848403eaffa29ef46109f386fa24b'] for mid in merged_identities: renrich_items += get_items_from_uuid(mid, enrich_backend, ocean_backend) # Enrich items with merged identities enrich_count_merged = enrich_items(renrich_items, enrich_backend) return enrich_count_merged
enrich_backend.enrich_items(items) items = [] items.append(item) # Get identities from new items to be added to SortingHat identities = ocean_backend.get_identities(item) for identity in identities: if identity not in new_identities: new_identities.append(identity) items_count += 1 enrich_backend.enrich_items(items) logging.info("Total items enriched %i " % items_count) logging.info("Total new identities to be checked %i" % len(new_identities)) merged_identities = SortingHat.add_identities(new_identities, backend_name) # Redo enrich for items with new merged identities except KeyboardInterrupt: logging.info("\n\nReceived Ctrl-C or other break signal. Exiting.\n") logging.debug("Recovering cache") backend.cache.recover() sys.exit(0) total_time_min = (datetime.now() - app_init).total_seconds() / 60 logging.info("Finished in %.2f min" % (total_time_min))
enrich_backend.enrich_items(items) items = [] items.append(item) # Get identities from new items to be added to SortingHat identities = ocean_backend.get_identities(item) for identity in identities: if identity not in new_identities: new_identities.append(identity) items_count += 1 enrich_backend.enrich_items(items) logging.info("Total items enriched %i " % items_count) logging.info("Total new identities to be checked %i" % len(new_identities)) merged_identities = SortingHat.add_identities(new_identities, backend_name) # Redo enrich for items with new merged identities except KeyboardInterrupt: logging.info("\n\nReceived Ctrl-C or other break signal. Exiting.\n") logging.debug("Recovering cache") backend.cache.recover() sys.exit(0) total_time_min = (datetime.now()-app_init).total_seconds()/60 logging.info("Finished in %.2f min" % (total_time_min))
if __name__ == '__main__': app_init = datetime.now() args = get_params() config_logging(args.debug) if args.index is None: # Extract identities from all indexes pass else: logging.info("Extracting identities from: %s" % (args.index)) perceval_params = get_perceval_params(args.elastic_url, args.index) backend_name = perceval_params['backend'] connector = get_connector_from_name(backend_name) perceval_backend_class = connector[0] ocean_backend_class = connector[1] perceval_backend = None # Don't use perceval perceval_backend = perceval_backend_class(**perceval_params) obackend = ocean_backend_class(perceval_backend, incremental=False) obackend.set_elastic(get_elastic(args.elastic_url, args.index)) identities = get_identities(obackend) SortingHat.add_identities(identities, backend_name) # Add the identities to Sorting Hat print ("Total identities processed: %i" % (len(identities)))