def process(agent_name): """Method initializing the class. Args: agent_name: agent name Returns: None """ # Initialize key variables uid_metadata = defaultdict(lambda: defaultdict(dict)) # Configuration setup config = jm_configuration.Config() threads_in_pool = config.ingest_threads() # Make sure we have database connectivity if db.connectivity() is False: log_message = ( 'No connectivity to database. Check if running. ' 'Check database authentication parameters.' '') log.log2warn(1053, log_message) return # Get meta data on files uid_metadata = validate_cache_files() # Spawn processes only if we have files to process if bool(uid_metadata.keys()) is True: # Process lock file f_obj = hidden.File() lockfile = f_obj.lock(agent_name) if os.path.exists(lockfile) is True: # Return if lock file is present log_message = ( 'Ingest lock file %s exists. Multiple ingest daemons running ' 'or lots of cache files to ingest. Ingester may have died ' 'catastrophically in the past, in which case the lockfile ' 'should be deleted. Exiting ingest process. ' 'Will try again later.' '') % (lockfile) log.log2warn(1069, log_message) return else: # Create lockfile open(lockfile, 'a').close() # Spawn a pool of threads, and pass them queue instance # Only create the required number of threads up to the # threads_in_pool maximum for _ in range( min(threads_in_pool, len(uid_metadata))): update_thread = ProcessUID(THREAD_QUEUE) update_thread.daemon = True # Sometimes we exhaust the thread abilities of the OS # even with the "threads_in_pool" limit. This is because # there could be a backlog of files to cache files process # and we have overlapping ingests due to a deleted lockfile. # This code ensures we don't exceed the limits. try: update_thread.start() except RuntimeError: log_message = ( 'Too many threads created for cache ingest. ' 'Verify that ingest lock file is present.') # Remove the lockfile so we can restart later then die os.remove(lockfile) log.log2die(1067, log_message) except: log_message = ( 'Unknown error occurred when trying to ' 'create cache ingest threads') # Remove the lockfile so we can restart later then die os.remove(lockfile) log.log2die(1072, log_message) # Read each cache file for hosthash in uid_metadata.keys(): for uid in uid_metadata[hosthash].keys(): ############################################################## # # Define variables that will be required for the threading # We have to initialize the dict during every loop to prevent # data corruption # ############################################################## data_dict = {} data_dict['uid'] = uid data_dict['metadata'] = uid_metadata[hosthash][uid] data_dict['config'] = config THREAD_QUEUE.put(data_dict) # Wait on the queue until everything has been processed THREAD_QUEUE.join() # PYTHON BUG. Join can occur while threads are still shutting down. # This can create spurious "Exception in thread (most likely raised # during interpreter shutdown)" errors. # The "time.sleep(1)" adds a delay to make sure things really terminate # properly. This seems to be an issue on virtual machines in Dev only time.sleep(1) # Return if lock file is present if os.path.exists(lockfile) is True: os.remove(lockfile)
def process(ingester_agent_name): """Process cache data by adding it to the database using subprocesses. Args: ingester_agent_name: Ingester agent name Returns: None """ # Initialize key variables argument_list = [] id_agent_metadata = defaultdict(lambda: defaultdict(dict)) # Configuration setup config = configuration.Config() configured_pool_size = config.ingest_pool_size() # Make sure we have database connectivity if db.connectivity() is False: log_message = ('No connectivity to database. Check if running. ' 'Check database authentication parameters.' '') log.log2warning(1053, log_message) return # Get meta data on files id_agent_metadata = validate_cache_files() # Spawn processes only if we have files to process if bool(id_agent_metadata.keys()) is True: # Process lock file lockfile = daemon.lock_file(ingester_agent_name) if os.path.exists(lockfile) is True: # Return if lock file is present log_message = ( 'Ingest lock file %s exists. Multiple ingest daemons running ' 'or lots of cache files to ingest. Ingester may have died ' 'catastrophically in the past, in which case the lockfile ' 'should be deleted. Exiting ingest process. ' 'Will try again later.' '') % (lockfile) log.log2warning(1069, log_message) return else: # Create lockfile open(lockfile, 'a').close() # Read each cache file for devicehash in id_agent_metadata.keys(): for id_agent in id_agent_metadata[devicehash].keys(): # Create a list of arguments to process argument_list.append( (config, id_agent_metadata[devicehash][id_agent], ingester_agent_name)) # Create a pool of sub process resources pool_size = int(min(configured_pool_size, len(id_agent_metadata))) with Pool(processes=pool_size) as pool: # Create sub processes from the pool pool.map(_wrapper_process, argument_list) # Wait for all the processes to end # pool.join() # Return if lock file is present if os.path.exists(lockfile) is True: os.remove(lockfile)
def process(agent_name): """Method initializing the class. Args: agent_name: agent name Returns: None """ # Initialize key variables argument_list = [] uid_metadata = defaultdict(lambda: defaultdict(dict)) # Configuration setup config = jm_configuration.Config() threads_in_pool = config.ingest_threads() # Make sure we have database connectivity if db.connectivity() is False: log_message = ( 'No connectivity to database. Check if running. ' 'Check database authentication parameters.' '') log.log2warn(1053, log_message) return # Get meta data on files uid_metadata = validate_cache_files() # Spawn processes only if we have files to process if bool(uid_metadata.keys()) is True: # Process lock file f_obj = hidden.File() lockfile = f_obj.lock(agent_name) if os.path.exists(lockfile) is True: # Return if lock file is present log_message = ( 'Ingest lock file %s exists. Multiple ingest daemons running ' 'or lots of cache files to ingest. Ingester may have died ' 'catastrophically in the past, in which case the lockfile ' 'should be deleted. Exiting ingest process. ' 'Will try again later.' '') % (lockfile) log.log2warn(1069, log_message) return else: # Create lockfile open(lockfile, 'a').close() # Read each cache file for hosthash in uid_metadata.keys(): for uid in uid_metadata[hosthash].keys(): # Create a list of arguments to process argument_list.append( (config, uid_metadata[hosthash][uid]) ) # Create a pool of sub process resources with Pool(processes=threads_in_pool) as pool: # Create sub processes from the pool pool.map(_wrapper_process, argument_list) # Wait for all the processes to end # pool.join() # Return if lock file is present if os.path.exists(lockfile) is True: os.remove(lockfile)