Beispiel #1
0
def process(agent_name):
    """Method initializing the class.

    Args:
        agent_name: agent name

    Returns:
        None

    """
    # Initialize key variables
    uid_metadata = defaultdict(lambda: defaultdict(dict))

    # Configuration setup
    config = jm_configuration.Config()
    threads_in_pool = config.ingest_threads()

    # Make sure we have database connectivity
    if db.connectivity() is False:
        log_message = (
            'No connectivity to database. Check if running. '
            'Check database authentication parameters.'
            '')
        log.log2warn(1053, log_message)
        return

    # Get meta data on files
    uid_metadata = validate_cache_files()

    # Spawn processes only if we have files to process
    if bool(uid_metadata.keys()) is True:
        # Process lock file
        f_obj = hidden.File()
        lockfile = f_obj.lock(agent_name)
        if os.path.exists(lockfile) is True:
            # Return if lock file is present
            log_message = (
                'Ingest lock file %s exists. Multiple ingest daemons running '
                'or lots of cache files to ingest. Ingester may have died '
                'catastrophically in the past, in which case the lockfile '
                'should be deleted. Exiting ingest process. '
                'Will try again later.'
                '') % (lockfile)
            log.log2warn(1069, log_message)
            return
        else:
            # Create lockfile
            open(lockfile, 'a').close()

        # Spawn a pool of threads, and pass them queue instance
        # Only create the required number of threads up to the
        # threads_in_pool maximum
        for _ in range(
                min(threads_in_pool, len(uid_metadata))):
            update_thread = ProcessUID(THREAD_QUEUE)
            update_thread.daemon = True

            # Sometimes we exhaust the thread abilities of the OS
            # even with the "threads_in_pool" limit. This is because
            # there could be a backlog of files to cache files process
            # and we have overlapping ingests due to a deleted lockfile.
            # This code ensures we don't exceed the limits.
            try:
                update_thread.start()
            except RuntimeError:
                log_message = (
                    'Too many threads created for cache ingest. '
                    'Verify that ingest lock file is present.')

                # Remove the lockfile so we can restart later then die
                os.remove(lockfile)
                log.log2die(1067, log_message)
            except:
                log_message = (
                    'Unknown error occurred when trying to '
                    'create cache ingest threads')

                # Remove the lockfile so we can restart later then die
                os.remove(lockfile)
                log.log2die(1072, log_message)

        # Read each cache file
        for hosthash in uid_metadata.keys():
            for uid in uid_metadata[hosthash].keys():
                ##############################################################
                #
                # Define variables that will be required for the threading
                # We have to initialize the dict during every loop to prevent
                # data corruption
                #
                ##############################################################
                data_dict = {}
                data_dict['uid'] = uid
                data_dict['metadata'] = uid_metadata[hosthash][uid]
                data_dict['config'] = config
                THREAD_QUEUE.put(data_dict)

        # Wait on the queue until everything has been processed
        THREAD_QUEUE.join()

        # PYTHON BUG. Join can occur while threads are still shutting down.
        # This can create spurious "Exception in thread (most likely raised
        # during interpreter shutdown)" errors.
        # The "time.sleep(1)" adds a delay to make sure things really terminate
        # properly. This seems to be an issue on virtual machines in Dev only
        time.sleep(1)

        # Return if lock file is present
        if os.path.exists(lockfile) is True:
            os.remove(lockfile)
Beispiel #2
0
def process(ingester_agent_name):
    """Process cache data by adding it to the database using subprocesses.

    Args:
        ingester_agent_name: Ingester agent name

    Returns:
        None

    """
    # Initialize key variables
    argument_list = []
    id_agent_metadata = defaultdict(lambda: defaultdict(dict))

    # Configuration setup
    config = configuration.Config()
    configured_pool_size = config.ingest_pool_size()

    # Make sure we have database connectivity
    if db.connectivity() is False:
        log_message = ('No connectivity to database. Check if running. '
                       'Check database authentication parameters.'
                       '')
        log.log2warning(1053, log_message)
        return

    # Get meta data on files
    id_agent_metadata = validate_cache_files()

    # Spawn processes only if we have files to process
    if bool(id_agent_metadata.keys()) is True:
        # Process lock file
        lockfile = daemon.lock_file(ingester_agent_name)
        if os.path.exists(lockfile) is True:
            # Return if lock file is present
            log_message = (
                'Ingest lock file %s exists. Multiple ingest daemons running '
                'or lots of cache files to ingest. Ingester may have died '
                'catastrophically in the past, in which case the lockfile '
                'should be deleted. Exiting ingest process. '
                'Will try again later.'
                '') % (lockfile)
            log.log2warning(1069, log_message)
            return
        else:
            # Create lockfile
            open(lockfile, 'a').close()

        # Read each cache file
        for devicehash in id_agent_metadata.keys():
            for id_agent in id_agent_metadata[devicehash].keys():
                # Create a list of arguments to process
                argument_list.append(
                    (config, id_agent_metadata[devicehash][id_agent],
                     ingester_agent_name))

        # Create a pool of sub process resources
        pool_size = int(min(configured_pool_size, len(id_agent_metadata)))
        with Pool(processes=pool_size) as pool:

            # Create sub processes from the pool
            pool.map(_wrapper_process, argument_list)

        # Wait for all the processes to end
        # pool.join()

        # Return if lock file is present
        if os.path.exists(lockfile) is True:
            os.remove(lockfile)
Beispiel #3
0
def process(agent_name):
    """Method initializing the class.

    Args:
        agent_name: agent name

    Returns:
        None

    """
    # Initialize key variables
    argument_list = []
    uid_metadata = defaultdict(lambda: defaultdict(dict))

    # Configuration setup
    config = jm_configuration.Config()
    threads_in_pool = config.ingest_threads()

    # Make sure we have database connectivity
    if db.connectivity() is False:
        log_message = (
            'No connectivity to database. Check if running. '
            'Check database authentication parameters.'
            '')
        log.log2warn(1053, log_message)
        return

    # Get meta data on files
    uid_metadata = validate_cache_files()

    # Spawn processes only if we have files to process
    if bool(uid_metadata.keys()) is True:
        # Process lock file
        f_obj = hidden.File()
        lockfile = f_obj.lock(agent_name)
        if os.path.exists(lockfile) is True:
            # Return if lock file is present
            log_message = (
                'Ingest lock file %s exists. Multiple ingest daemons running '
                'or lots of cache files to ingest. Ingester may have died '
                'catastrophically in the past, in which case the lockfile '
                'should be deleted. Exiting ingest process. '
                'Will try again later.'
                '') % (lockfile)
            log.log2warn(1069, log_message)
            return
        else:
            # Create lockfile
            open(lockfile, 'a').close()

        # Read each cache file
        for hosthash in uid_metadata.keys():
            for uid in uid_metadata[hosthash].keys():
                # Create a list of arguments to process
                argument_list.append(
                    (config, uid_metadata[hosthash][uid])
                )

        # Create a pool of sub process resources
        with Pool(processes=threads_in_pool) as pool:

            # Create sub processes from the pool
            pool.map(_wrapper_process, argument_list)

        # Wait for all the processes to end
        # pool.join()

        # Return if lock file is present
        if os.path.exists(lockfile) is True:
            os.remove(lockfile)