def __init__(self, data): do_update = False success = None agent_data = { 'devicename': None, 'id_agent': None, 'sources': [], 'timeseries': [], 'timefixed': [], 'max_timestamp': 0 } ingest = drain.Drain(filename=None, data=data) if ingest.valid() is False: log_message = ( 'Cache data at timestamp %s is invalid. Moving.' '') % (ingest.timestamp()) log.log2warning(1054, log_message) else: agent_data['timeseries'] = ingest.timeseries() agent_data['timefixed'] = ingest.timefixed() agent_data['sources'] = ingest.sources() agent_data['devicename'] = ingest.devicename() agent_data['id_agent'] = ingest.id_agent() agent_data['agent_name'] = ingest.agent() agent_data['max_timestamp'] = ingest.timestamp() # Upadate and note success (success, datapoints_processed) = self._do_update( agent_data)
def test_purge(self): """Testing function purge.""" directory = tempfile.mkdtemp() filepath = ('%s/%s_%s_%s.json') % ( directory, self.data['timestamp'], self.data['id_agent'], general.hashstring(self.data['devicename'])) with open(filepath, 'w') as f_handle: json.dump(self.data, f_handle) # Create a valid Drain object ingest = drain.Drain(filepath) # Test self.assertEqual(os.path.exists(filepath), True) self.assertEqual(os.path.isfile(filepath), True) ingest.purge() self.assertEqual(os.path.exists(filepath), False) self.assertEqual(os.path.isfile(filepath), False)
def process(self): """Update the database using threads.""" # Initialize key variables do_update = False success = None ingests = [] agent_data = { 'devicename': None, 'id_agent': None, 'sources': [], 'timeseries': [], 'timefixed': [] } # Get the directory to which failed files will be moved failure_directory = self.config.ingest_failures_directory() # Initialize other values max_timestamp = 0 # Get start time for activity start_ts = time.time() # Process file for each timestamp, starting from the oldes file for data_dict in self.metadata: # Initialize key variables timestamp = data_dict['timestamp'] filepath = data_dict['filepath'] # Read in data ingest = drain.Drain(filepath) # Make sure file is OK # Move it to a directory for further analysis # by administrators if ingest.valid() is False: log_message = ('Cache ingest file %s is invalid. Moving.' '') % (filepath) log.log2warning(1054, log_message) shutil.copy(filepath, failure_directory) os.remove(filepath) continue # Append data agent_data['timeseries'].extend(ingest.timeseries()) agent_data['timefixed'].extend(ingest.timefixed()) agent_data['sources'].extend(ingest.sources()) # Append ingest object to a list for later processing ingests.append(ingest) # Get the max timestamp max_timestamp = max(timestamp, max_timestamp) # Update information that doesn't change if do_update is False: agent_data['devicename'] = ingest.devicename() agent_data['id_agent'] = ingest.id_agent() agent_data['agent_name'] = ingest.agent() # Get the PID file for the agent pid_file = daemon.pid_file(self.ingester_agent_name) # Update the PID file for the agent to ensure agentd.py # doesn't kill the ingest while processing a long stream # of files. If we are running this using __main__ = process() # then the pid file wouldn't have been created, hence the logic. if os.path.isfile(pid_file) is True: daemon.update_pid(self.ingester_agent_name) # Update update flag do_update = True # Process the rest if do_update is True: # Update remaining agent data agent_data['max_timestamp'] = max_timestamp # Add datapoints to the database db_prepare = _PrepareDatabase(agent_data) db_prepare.add_datapoints() # Get the latest datapoints datapoints = db_prepare.get_datapoints() # Get the assigned index values for the device and agent idx_device = db_prepare.idx_device() idx_agent = db_prepare.idx_agent() # Update database with data db_update = _UpdateDB(agent_data, datapoints) success = db_update.update() # Update database table timestamps update_timestamps = _UpdateLastTimestamp(idx_device, idx_agent, max_timestamp) update_timestamps.agent() update_timestamps.deviceagent() update_timestamps.datapoint() # Purge source files. Only done after complete # success of database updates. If not we could lose data in the # event of an ingester crash. Ingester would re-read the files # and process the non-duplicates, while deleting the duplicates. for ingest in ingests: ingest.purge() # Log duration of activity duration = time.time() - start_ts if success is True: log_message = ( 'Agent %s was processed from %s cache files in %s ' 'seconds (%s seconds/file, %s seconds/datapoint)' '') % (agent_data['id_agent'], len(ingests), round(duration, 4), round( duration / len(ingests), 4), round(duration / len(datapoints), 6)) log.log2info(1007, log_message) else: log_message = ( 'Failed to process all cache files for agent %s. ' 'Investigate.') % (agent_data['id_agent']) log.log2info(1008, log_message)
class TestDrain(unittest.TestCase): """Checks all functions and methods.""" # Initialize key variables setup = unittest_setup.TestVariables() data = setup.cache_data() # Create valid file filled with valid data directory = tempfile.mkdtemp() filepath = ('%s/%s_%s_%s.json') % ( directory, data['timestamp'], data['id_agent'], general.hashstring(data['devicename'])) with open(filepath, 'w') as f_handle: json.dump(data, f_handle) # Create a valid Drain object ingest = drain.Drain(filepath) @classmethod def tearDownClass(cls): """Clean up when all over.""" # Delete unnecessary files os.remove(cls.filepath) def test___init__(self): """Testing function __init__.""" pass def test_valid(self): """Testing function valid.""" # Test result = self.ingest.valid() self.assertEqual(result, True) def test_id_agent(self): """Testing function id_agent.""" # Test result = self.ingest.id_agent() self.assertEqual(result, self.data['id_agent']) def test_timestamp(self): """Testing function timestamp.""" # Test result = self.ingest.timestamp() self.assertEqual(result, self.data['timestamp']) def test_agent(self): """Testing function agent.""" # Test result = self.ingest.agent() self.assertEqual(result, self.data['agent']) def test_devicename(self): """Testing function devicename.""" # Test result = self.ingest.devicename() self.assertEqual(result, self.data['devicename']) def test_counter32(self): """Testing function counter32.""" # Initialize key variables datapoints = _expected(self.data, 32) found = 0 # Test results = self.ingest.counter32() for datapoint in datapoints: for result in results: if result['id_datapoint'] == datapoint['id_datapoint']: self.assertEqual( result['timestamp'], datapoint['timestamp']) self.assertEqual( result['value'], datapoint['value']) self.assertEqual( result['id_agent'], datapoint['id_agent']) # Increment found found += 1 # Make sure that all are found self.assertEqual(len(results), len(datapoints)) self.assertEqual(len(results), found) def test_counter64(self): """Testing function counter64.""" # Initialize key variables datapoints = _expected(self.data, 64) found = 0 # Test results = self.ingest.counter64() for datapoint in datapoints: for result in results: if result['id_datapoint'] == datapoint['id_datapoint']: self.assertEqual( result['timestamp'], datapoint['timestamp']) self.assertEqual( result['value'], datapoint['value']) self.assertEqual( result['id_agent'], datapoint['id_agent']) # Increment found found += 1 # Make sure that all are found self.assertEqual(len(results), len(datapoints)) self.assertEqual(len(results), found) def test_floating(self): """Testing function floating.""" # Initialize key variables datapoints = _expected(self.data, 1) found = 0 # Test results = self.ingest.floating() for datapoint in datapoints: for result in results: if result['id_datapoint'] == datapoint['id_datapoint']: self.assertEqual( result['timestamp'], datapoint['timestamp']) self.assertEqual( result['value'], datapoint['value']) self.assertEqual( result['id_agent'], datapoint['id_agent']) # Increment found found += 1 # Make sure that all are found self.assertEqual(len(results), len(datapoints)) self.assertEqual(len(results), found) def test_timeseries(self): """Testing function timeseries.""" # Initialize key variables datapoints = [] found = 0 # Populate datapoints list datapoints.extend(_expected(self.data, 1)) datapoints.extend(_expected(self.data, 32)) datapoints.extend(_expected(self.data, 64)) # Test results = self.ingest.timeseries() for datapoint in datapoints: for result in results: if result['id_datapoint'] == datapoint['id_datapoint']: self.assertEqual( result['timestamp'], datapoint['timestamp']) self.assertEqual( result['value'], datapoint['value']) self.assertEqual( result['id_agent'], datapoint['id_agent']) # Increment found found += 1 # Make sure that all are found self.assertEqual(len(results), len(datapoints)) self.assertEqual(len(results), found) def test_timefixed(self): """Testing function timefixed.""" # Initialize key variables datapoints = _expected(self.data, None) found = 0 # Test results = self.ingest.timefixed() for datapoint in datapoints: for result in results: if result['id_datapoint'] == datapoint['id_datapoint']: self.assertEqual( result['timestamp'], datapoint['timestamp']) self.assertEqual( result['value'], datapoint['value']) self.assertEqual( result['id_agent'], datapoint['id_agent']) # Increment found found += 1 # Make sure that all are found self.assertEqual(len(results), len(datapoints)) self.assertEqual(len(results), found) def test_sources(self): """Testing function sources.""" # Initialize key variables sources = _sources(self.data) found = 0 # Test results = self.ingest.sources() for source in sources: for result in results: if result['id_datapoint'] == source['id_datapoint']: self.assertEqual( result['id_agent'], source['id_agent']) self.assertEqual( result['agent_label'], source['agent_label']) self.assertEqual( result['agent_source'], source['agent_source']) self.assertEqual( result['description'], source['description']) self.assertEqual( result['base_type'], source['base_type']) # Increment found found += 1 # Make sure that all are found self.assertEqual(len(results), len(sources)) self.assertEqual(len(results), found) def test_purge(self): """Testing function purge.""" directory = tempfile.mkdtemp() filepath = ('%s/%s_%s_%s.json') % ( directory, self.data['timestamp'], self.data['id_agent'], general.hashstring(self.data['devicename'])) with open(filepath, 'w') as f_handle: json.dump(self.data, f_handle) # Create a valid Drain object ingest = drain.Drain(filepath) # Test self.assertEqual(os.path.exists(filepath), True) self.assertEqual(os.path.isfile(filepath), True) ingest.purge() self.assertEqual(os.path.exists(filepath), False) self.assertEqual(os.path.isfile(filepath), False)
def process(self): """Update the database using threads.""" # Initialize key variables do_update = False success = None ingests = [] agent_data = { 'devicename': None, 'id_agent': None, 'sources': [], 'timeseries': [], 'timefixed': [], 'max_timestamp': 0 } # Get the directory to which failed files will be moved failure_directory = self.config.ingest_failures_directory() # Get start time for activity start_ts = time.time() # Process file for each timestamp, starting from the oldes file for data_dict in self.metadata: # Initialize key variables timestamp = data_dict['timestamp'] filepath = data_dict['filepath'] # Read in data ingest = drain.Drain(filepath) # Make sure file is OK # Move it to a directory for further analysis # by administrators if ingest.valid() is False: log_message = ('Cache ingest file %s is invalid. Moving.' '') % (filepath) log.log2warning(1054, log_message) shutil.copy(filepath, failure_directory) os.remove(filepath) continue # Append data agent_data['timeseries'].extend(ingest.timeseries()) agent_data['timefixed'].extend(ingest.timefixed()) agent_data['sources'].extend(ingest.sources()) # Append ingest object to a list for later processing ingests.append(ingest) # Update information that doesn't change if do_update is False: agent_data['devicename'] = ingest.devicename() agent_data['id_agent'] = ingest.id_agent() agent_data['agent_name'] = ingest.agent() # Get the PID file for the agent pid_file = daemon.pid_file(self.ingester_agent_name) else: # Get the max timestamp agent_data['max_timestamp'] = max(timestamp, agent_data['max_timestamp']) # Update the PID file for the agent to ensure agentd.py # doesn't kill the ingest while processing a long stream # of files. If we are running this using __main__ = process() # then the pid file wouldn't have been created, hence the logic. if os.path.isfile(pid_file) is True: daemon.update_pid(self.ingester_agent_name) # Update update flag do_update = True # Process the rest if do_update is True: # Upadate and note success (success, datapoints_processed) = self._do_update(agent_data, ingests) # Log duration of activity duration = time.time() - start_ts if success is True: log_message = ( 'Agent %s was processed from %s cache files in %s ' 'seconds (%s seconds/file, %s seconds/datapoint)' '') % (agent_data['id_agent'], len(ingests), round(duration, 4), round( duration / len(ingests), 4), round(duration / datapoints_processed, 6)) log.log2info(1007, log_message) else: log_message = ( 'Failed to process all cache files for agent %s. ' 'Investigate.') % (agent_data['id_agent']) log.log2info(1008, log_message)