Ejemplo n.º 1
0
    def __init__(self, pattoo_db_records_lists):
        """Initialize the class.

        Args:
            pattoo_db_records_lists: List of PattooDBrecord oject lists
                grouped by source and sorted by timestamp. This data is
                obtained from PattooShared.converter.extract

        Returns:
            None

        """
        # Initialize key variables
        config = Config()

        # Setup the arguments for multiprocessing
        self._arguments = [(_, ) for _ in pattoo_db_records_lists
                           if bool(_) is True]
        self._multiprocess = config.multiprocessing()
        self._pool_size = cpu_count()
Ejemplo n.º 2
0
    def __init__(self, batch_size=500, age=0):
        """Initialize the class.

        Args:
            batch_size: Number of files to read
            age: Minimum age of files to be read per batch

        Returns:
            None

        """
        # Get cache directory
        config = Config()
        directory = config.agent_cache_directory(PATTOO_API_AGENT_NAME)
        self._batch_id = int(time.time() * 1000)

        # Read data from cache. Stop if there is no data found.
        self._data = files.read_json_files(directory,
                                           die=False,
                                           age=age,
                                           count=batch_size)

        # Save the number of files read
        self.files = len(self._data)
Ejemplo n.º 3
0
class TestConfigIngester(unittest.TestCase):
    """Checks all ConfigIngester methods."""

    ##########################################################################
    # Initialize variable class
    ##########################################################################
    config = ConfigIngester()

    def test___init__(self):
        """Testing function __init__."""
        pass

    def test_ingester_interval(self):
        """Testing function ingester_interval."""
        # Initialize key values
        expected = 45

        # Test
        result = self.config.ingester_interval()
        self.assertEqual(result, expected)

    def test_multiprocessing(self):
        """Testing function multiprocessing."""
        # Initialize key values
        expected = True

        # Test
        result = self.config.multiprocessing()
        self.assertEqual(result, expected)

    def test_batch_size(self):
        """Testing function batch_size."""
        # Initialize key values
        expected = 1503

        # Test
        result = self.config.batch_size()
        self.assertEqual(result, expected)

    def test_daemon_directory(self):
        """Test pattoo_shared.Config inherited method daemon_directory."""
        # Nothing should happen. Directory exists in testing.
        _ = self.config.daemon_directory()

    def test_log_directory(self):
        """Test pattoo_shared.Config inherited method log_directory."""
        # Nothing should happen. Directory exists in testing.
        _ = self.config.log_directory()

    def test_log_file(self):
        """Test pattoo_shared.Config inherited method log_file."""
        # Initialize key values
        expected = '{1}{0}pattoo.log'.format(os.sep,
                                             self.config.log_directory())

        # Test
        result = self.config.log_file()
        self.assertEqual(result, expected)

    def test_log_file_api(self):
        """Test pattoo_shared.Config inherited method log_file_api."""
        # Initialize key values
        expected = '{1}{0}pattoo-api.log'.format(os.sep,
                                                 self.config.log_directory())

        # Test
        result = self.config.log_file_api()
        self.assertEqual(result, expected)

    def test_log_level(self):
        """Test pattoo_shared.Config inherited method log_level."""
        # Initialize key values
        expected = 'debug'

        # Test
        result = self.config.log_level()
        self.assertEqual(result, expected)

    def test_log_file_daemon(self):
        """Test pattoo_shared.Config inherited method log_file_daemon."""
        # Initialize key values
        expected = '{1}{0}pattoo-daemon.log'.format(
            os.sep, self.config.log_directory())

        # Test
        result = self.config.log_file_daemon()
        self.assertEqual(result, expected)

    def test_cache_directory(self):
        """Test pattoo_shared.Config inherited method cache_directory."""
        # Nothing should happen. Directory exists in testing.
        _ = self.config.cache_directory()

    def test_agent_cache_directory(self):
        """Test pattoo_shared.Config inherited method agent_cache_directory."""
        # Initialize key values
        agent_id = 123
        expected = '{1}{0}{2}'.format(os.sep, self.config.cache_directory(),
                                      agent_id)

        # Test
        result = self.config.agent_cache_directory(agent_id)
        self.assertEqual(result, expected)
Ejemplo n.º 4
0
def process_cache(batch_size=500, max_duration=3600, fileage=10, script=False):
    """Ingest data.

    Args:
        batch_size: Number of files to process at a time
        max_duration: Maximum duration
        fileage: Minimum age of files to be processed in seconds

    Returns:
        success: True if successful

    Method:
        1) Read the files in the cache directory older than a threshold
        2) Process the data in the files
        3) Repeat, if new files are found that are older than the threshold,
           or we have been running too long.

        Batches of files are read to reduce the risk of overloading available
        memory, and ensure we can exit if we are running too long.

    """
    # Initialize key variables
    records = 0
    start = time.time()
    looptime = 0
    files_read = 0
    success = True

    # Get cache directory
    config = Config()
    directory = config.agent_cache_directory(PATTOO_API_AGENT_NAME)

    # Log what we are doing
    log_message = 'Processing ingest cache.'
    log.log2info(20085, log_message)

    # Get the number of files in the directory
    files_found = len(
        [_ for _ in os.listdir(directory) if _.endswith('.json')])

    # Create lockfile only if running as a script.
    # The daemon has its own locking mechanism
    if bool(script) is True:
        success = _lock()
        if bool(success) is False:
            return bool(success)

    # Process the files in batches to reduce the database connection count
    # This can cause errors
    while True:
        # Agents constantly update files. We don't want an infinite loop
        # situation where we always have files available that are newer than
        # the desired fileage.
        loopstart = time.time()
        fileage = fileage + looptime

        # Automatically stop if we are going on too long.(1 of 2)
        duration = loopstart - start
        if duration > max_duration:
            log_message = ('''\
Stopping ingester after exceeding the maximum runtime duration of {}s. \
This can be adjusted on the CLI.'''.format(max_duration))
            log.log2info(20022, log_message)
            break

        # Automatically stop if we are going on too long.(2 of 2)
        if files_read >= files_found:
            # No need to log. This is an expected outcome.
            break

        # Read data from cache. Stop if there is no data found.
        cache = Cache(batch_size=batch_size, age=fileage)
        count = cache.ingest()

        # Automatically stop if we are going on too long.(2 of 2)
        if bool(cache.files) is False:
            # No need to log. This is an expected outcome.
            break

        # Get the records processed, looptime and files read
        records += count
        files_read += cache.files
        looptime = max(time.time() - loopstart, looptime)

    # Print result
    duration = time.time() - start
    if bool(records) is True and bool(duration) is True:
        log_message = ('''\
Agent cache ingest completed. {0} records processed in {1:.2f} seconds, \
{2:.2f} records / second. {3} files read. \
'''.format(records, duration, records / duration, files_read))
        log.log2info(20084, log_message)
    else:
        log_message = 'No files found to ingest'
        log.log2info(20021, log_message)

    # Delete lockfile only if running as a script.
    # The daemon has its own locking mechanism
    if bool(script) is True:
        success = _lock(delete=True)

    # Log what we are doing
    log_message = 'Finished processing ingest cache.'
    log.log2info(20020, log_message)

    return bool(success)