def __init__(self, pattoo_db_records_lists): """Initialize the class. Args: pattoo_db_records_lists: List of PattooDBrecord oject lists grouped by source and sorted by timestamp. This data is obtained from PattooShared.converter.extract Returns: None """ # Initialize key variables config = Config() # Setup the arguments for multiprocessing self._arguments = [(_, ) for _ in pattoo_db_records_lists if bool(_) is True] self._multiprocess = config.multiprocessing() self._pool_size = cpu_count()
def __init__(self, batch_size=500, age=0): """Initialize the class. Args: batch_size: Number of files to read age: Minimum age of files to be read per batch Returns: None """ # Get cache directory config = Config() directory = config.agent_cache_directory(PATTOO_API_AGENT_NAME) self._batch_id = int(time.time() * 1000) # Read data from cache. Stop if there is no data found. self._data = files.read_json_files(directory, die=False, age=age, count=batch_size) # Save the number of files read self.files = len(self._data)
class TestConfigIngester(unittest.TestCase): """Checks all ConfigIngester methods.""" ########################################################################## # Initialize variable class ########################################################################## config = ConfigIngester() def test___init__(self): """Testing function __init__.""" pass def test_ingester_interval(self): """Testing function ingester_interval.""" # Initialize key values expected = 45 # Test result = self.config.ingester_interval() self.assertEqual(result, expected) def test_multiprocessing(self): """Testing function multiprocessing.""" # Initialize key values expected = True # Test result = self.config.multiprocessing() self.assertEqual(result, expected) def test_batch_size(self): """Testing function batch_size.""" # Initialize key values expected = 1503 # Test result = self.config.batch_size() self.assertEqual(result, expected) def test_daemon_directory(self): """Test pattoo_shared.Config inherited method daemon_directory.""" # Nothing should happen. Directory exists in testing. _ = self.config.daemon_directory() def test_log_directory(self): """Test pattoo_shared.Config inherited method log_directory.""" # Nothing should happen. Directory exists in testing. _ = self.config.log_directory() def test_log_file(self): """Test pattoo_shared.Config inherited method log_file.""" # Initialize key values expected = '{1}{0}pattoo.log'.format(os.sep, self.config.log_directory()) # Test result = self.config.log_file() self.assertEqual(result, expected) def test_log_file_api(self): """Test pattoo_shared.Config inherited method log_file_api.""" # Initialize key values expected = '{1}{0}pattoo-api.log'.format(os.sep, self.config.log_directory()) # Test result = self.config.log_file_api() self.assertEqual(result, expected) def test_log_level(self): """Test pattoo_shared.Config inherited method log_level.""" # Initialize key values expected = 'debug' # Test result = self.config.log_level() self.assertEqual(result, expected) def test_log_file_daemon(self): """Test pattoo_shared.Config inherited method log_file_daemon.""" # Initialize key values expected = '{1}{0}pattoo-daemon.log'.format( os.sep, self.config.log_directory()) # Test result = self.config.log_file_daemon() self.assertEqual(result, expected) def test_cache_directory(self): """Test pattoo_shared.Config inherited method cache_directory.""" # Nothing should happen. Directory exists in testing. _ = self.config.cache_directory() def test_agent_cache_directory(self): """Test pattoo_shared.Config inherited method agent_cache_directory.""" # Initialize key values agent_id = 123 expected = '{1}{0}{2}'.format(os.sep, self.config.cache_directory(), agent_id) # Test result = self.config.agent_cache_directory(agent_id) self.assertEqual(result, expected)
def process_cache(batch_size=500, max_duration=3600, fileage=10, script=False): """Ingest data. Args: batch_size: Number of files to process at a time max_duration: Maximum duration fileage: Minimum age of files to be processed in seconds Returns: success: True if successful Method: 1) Read the files in the cache directory older than a threshold 2) Process the data in the files 3) Repeat, if new files are found that are older than the threshold, or we have been running too long. Batches of files are read to reduce the risk of overloading available memory, and ensure we can exit if we are running too long. """ # Initialize key variables records = 0 start = time.time() looptime = 0 files_read = 0 success = True # Get cache directory config = Config() directory = config.agent_cache_directory(PATTOO_API_AGENT_NAME) # Log what we are doing log_message = 'Processing ingest cache.' log.log2info(20085, log_message) # Get the number of files in the directory files_found = len( [_ for _ in os.listdir(directory) if _.endswith('.json')]) # Create lockfile only if running as a script. # The daemon has its own locking mechanism if bool(script) is True: success = _lock() if bool(success) is False: return bool(success) # Process the files in batches to reduce the database connection count # This can cause errors while True: # Agents constantly update files. We don't want an infinite loop # situation where we always have files available that are newer than # the desired fileage. loopstart = time.time() fileage = fileage + looptime # Automatically stop if we are going on too long.(1 of 2) duration = loopstart - start if duration > max_duration: log_message = ('''\ Stopping ingester after exceeding the maximum runtime duration of {}s. \ This can be adjusted on the CLI.'''.format(max_duration)) log.log2info(20022, log_message) break # Automatically stop if we are going on too long.(2 of 2) if files_read >= files_found: # No need to log. This is an expected outcome. break # Read data from cache. Stop if there is no data found. cache = Cache(batch_size=batch_size, age=fileage) count = cache.ingest() # Automatically stop if we are going on too long.(2 of 2) if bool(cache.files) is False: # No need to log. This is an expected outcome. break # Get the records processed, looptime and files read records += count files_read += cache.files looptime = max(time.time() - loopstart, looptime) # Print result duration = time.time() - start if bool(records) is True and bool(duration) is True: log_message = ('''\ Agent cache ingest completed. {0} records processed in {1:.2f} seconds, \ {2:.2f} records / second. {3} files read. \ '''.format(records, duration, records / duration, files_read)) log.log2info(20084, log_message) else: log_message = 'No files found to ingest' log.log2info(20021, log_message) # Delete lockfile only if running as a script. # The daemon has its own locking mechanism if bool(script) is True: success = _lock(delete=True) # Log what we are doing log_message = 'Finished processing ingest cache.' log.log2info(20020, log_message) return bool(success)