def start(self): """Start the daemon. Args: None Returns: """ # Check for a pidfile to see if the daemon already runs try: with open(self.pidfile, 'r') as pf_handle: pid = int(pf_handle.read().strip()) except IOError: pid = None if pid: log_message = ( 'PID file: %s already exists. Daemon already running?' '') % (self.pidfile) log.log2die(1062, log_message) # Start the daemon self.daemonize() # Log success log_message = ('Daemon Started - PID file: %s') % (self.pidfile) log.log2info(1070, log_message) # Run code for daemon self.run()
def query(self): """Query all remote devices for data. Args: None Returns: None """ # Initialize key variables config = self.config # Check for lock and pid files if os.path.exists(self.lockfile_parent) is True: log_message = ( 'Lock file %s exists. Multiple API daemons running ' 'API may have died ' 'catastrophically in the past, in which case the lockfile ' 'should be deleted. ' '') % (self.lockfile_parent) log.log2see(1083, log_message) if os.path.exists(self.pidfile_parent) is True: log_message = ( 'PID file: %s already exists. Daemon already running? ' 'If not, it may have died catastrophically in the past ' 'in which case you should use --stop --force to fix.' '') % (self.pidfile_parent) log.log2see(1084, log_message) ###################################################################### # # Assign options in format that the Gunicorn WSGI will accept # # NOTE! to get a full set of valid options pprint(self.cfg.settings) # in the instantiation of StandaloneApplication. The option names # do not exactly match the CLI options found at # http://docs.gunicorn.org/en/stable/settings.html # ###################################################################### options = { 'bind': '%s:%s' % (config.listen_address(), config.bind_port()), 'accesslog': config.web_log_file(), 'errorlog': config.web_log_file(), 'capture_output': True, 'pidfile': self.pidfile_child, 'loglevel': config.log_level(), 'workers': _number_of_workers(), } # Log so that user running the script from the CLI knows that something # is happening log_message = ('Infoset API running on %s:%s and logging to file %s.' '') % (config.listen_address(), config.bind_port(), config.web_log_file()) log.log2info(1022, log_message) # Run StandaloneApplication(API, options).run()
def _check_when_disabled(config): """Stop agent. Args: agent_filepath: Filepath of agent to be restarted. agent_name: Agent name Returns: None """ # Get agent status variables agent_name = config.agent_name() pidfile = daemon.pid_file(agent_name) # Shutdown agent if running if os.path.isfile(pidfile) is True: with open(pidfile, 'r') as f_handle: pidvalue = int(f_handle.readline().strip()) if psutil.pid_exists(pidvalue) is True: log_message = ('Agent "%s" is alive, but should be disabled. ' 'Attempting to stop.' '') % (agent_name) log.log2info(1032, log_message) _stop(config)
def stop(self): """Stop the daemon. Args: None Returns: """ # Get the pid from the pidfile try: with open(self.pidfile, 'r') as pf_handle: pid = int(pf_handle.read().strip()) except IOError: pid = None if not pid: log_message = ( 'PID file: %s does not exist. Daemon not running?' '') % (self.pidfile) log.log2warning(1063, log_message) # Not an error in a restart return # Try killing the daemon process try: while 1: # Sleep a while time.sleep(0.3) # Process lockfile state when trying to stop if self.lockfile is None: os.kill(pid, signal.SIGTERM) else: if os.path.exists(self.lockfile) is True: continue else: os.kill(pid, signal.SIGTERM) except OSError as err: error = str(err.args) if error.find("No such process") > 0: self.delpid() self.dellock() else: log_message = (str(err.args)) log_message = ( '%s - PID file: %s') % (log_message, self.pidfile) log.log2die(1068, log_message) except: log_message = ( 'Unknown daemon "stop" error for PID file: %s' '') % (self.pidfile) log.log2die(1066, log_message) # Log success self.delpid() self.dellock() log_message = ('Daemon Stopped - PID file: %s') % (self.pidfile) log.log2info(1071, log_message)
def purge(self): """Purge data from cache by posting to central server. Args: None Returns: success: "True: if successful """ # Initialize key variables id_agent = self.data['id_agent'] # Add files in cache directory to list only if they match the # cache suffix all_filenames = [ filename for filename in os.listdir(self.cache_dir) if os.path.isfile(os.path.join(self.cache_dir, filename)) ] filenames = [ filename for filename in all_filenames if filename.endswith(self.cache_suffix) ] # Read cache file in sorted order. # NOTE: We must post data in timestamp sorted order. for filename in filenames.sorted(): # Only post files for our own UID value if id_agent not in filename: continue # Get the full filepath for the cache file and post filepath = os.path.join(self.cache_dir, filename) with open(filepath, 'r') as f_handle: try: data = json.load(f_handle) except: # Log removal log_message = ( 'Error reading previously cached agent data file %s ' 'for agent %s. May be corrupted.' '') % (filepath, self.name()) log.log2die(1058, log_message) # Post file success = self.post(save=False, data=data) # Delete file if successful if success is True: os.remove(filepath) # Log removal log_message = ('Purging cache file %s after successfully ' 'contacting server' '') % (filepath) log.log2info(1110, log_message)
def post(self, save=True, data=None): """Post data to central server. Args: save: When True, save data to cache directory if postinf fails data: Data to post. If None, then uses self.data Returns: success: "True: if successful """ # Initialize key variables success = False response = False timestamp = self.data['timestamp'] id_agent = self.data['id_agent'] # Create data to post if data is None: data = self.data # Post data save to cache if this fails try: result = requests.post(self.url, json=data) response = True except: if save is True: # Create a unique very long filename to reduce risk of devicehash = general.hashstring(self.data['devicename'], sha=1) filename = ('%s/%s_%s_%s.json') % (self.cache_dir, timestamp, id_agent, devicehash) # Save data with open(filename, 'w') as f_handle: json.dump(data, f_handle) # Define success if response is True: if result.status_code == 200: success = True # Log message if success is True: log_message = ('Agent "%s" successfully contacted server %s' '') % (self.name(), self.url) log.log2info(1027, log_message) else: log_message = ('Agent "%s" failed to contact server %s' '') % (self.name(), self.url) log.log2warning(1028, log_message) # Return return success
def post(self, save=True, data=None): """Post data to central server. Args: save: When True, save data to cache directory if postinf fails data: Data to post. If None, then uses self.data Returns: success: "True: if successful """ # Initialize key variables success = False timestamp = self.data['timestamp'] id_agent = self.data['id_agent'] # Create data to post if data is None: data = self.data # Post data save to cache if this fails uri = ('/receive/%s') % (id_agent) success = self._api.post(uri, data) # Log message if success is True: log_message = ('Agent "%s" successfully contacted server' '') % (self.name()) log.log2info(1012, log_message) else: # Save data if requested if save is True: # Create a unique very long filename to reduce risk of filename = ('%s/%s_%s.json') % (self.cache_dir, timestamp, self.cache_suffix) # Save data with open(filename, 'w') as f_handle: json.dump(data, f_handle) # Log message log_message = ('Agent "%s" failed to contact server' '') % (self.name()) log.log2warning(1013, log_message) # Return return success
def _start(config): """Start agent. Args: config: Agent configuration object Returns: None """ # Initialize key variables agent_name = config.agent_name() agent_filepath = _agent_filepath(config) # Start log_message = ('Starting agent "%s".' '') % (agent_name) log.log2info(1077, log_message) command2run = ('%s --start') % (agent_filepath) _execute(command2run)
def _check_when_enabled(config): """Stop agent. Args: config: Agent configuration object Returns: None """ # Initialize key variables agent_name = config.agent_name() agent_filepath = _agent_filepath(config) # Get agent status variables pidfile = daemon.pid_file(agent_name) lockfile = daemon.lock_file(agent_name) # Ignore agents that cannot be found if os.path.isfile(agent_filepath) is False: log_message = ('Agent executable file %s listed in the ' 'configuration file ' 'of agent "%s" does not exist. Please fix.' '') % (agent_filepath, agent_name) log.log2info(1075, log_message) return # Check for pid file if os.path.isfile(pidfile) is True: with open(pidfile, 'r') as f_handle: pidvalue = int(f_handle.readline().strip()) # Check if service died catastrophically. No PID file if psutil.pid_exists(pidvalue) is False: log_message = ('Agent "%s" is dead. Attempting to restart.' '') % (agent_name) log.log2info(1041, log_message) # Remove PID file and restart os.remove(pidfile) _restart(config) else: # Check if agent hung without updating the PID if config.monitor_agent_pid() is True: try: mtime = os.path.getmtime(pidfile) except OSError: mtime = 0 if mtime < int(time.time()) - (60 * 10): log_message = ('Agent "%s" is hung. Attempting to restart.' '') % (agent_name) log.log2info(1076, log_message) _restart(config) else: if os.path.isfile(lockfile) is True: _restart(config) else: _start(config)
def process(self): """Update the database using threads.""" # Initialize key variables do_update = False success = None ingests = [] agent_data = { 'devicename': None, 'id_agent': None, 'sources': [], 'timeseries': [], 'timefixed': [] } # Get the directory to which failed files will be moved failure_directory = self.config.ingest_failures_directory() # Initialize other values max_timestamp = 0 # Get start time for activity start_ts = time.time() # Process file for each timestamp, starting from the oldes file for data_dict in self.metadata: # Initialize key variables timestamp = data_dict['timestamp'] filepath = data_dict['filepath'] # Read in data ingest = drain.Drain(filepath) # Make sure file is OK # Move it to a directory for further analysis # by administrators if ingest.valid() is False: log_message = ('Cache ingest file %s is invalid. Moving.' '') % (filepath) log.log2warning(1054, log_message) shutil.copy(filepath, failure_directory) os.remove(filepath) continue # Append data agent_data['timeseries'].extend(ingest.timeseries()) agent_data['timefixed'].extend(ingest.timefixed()) agent_data['sources'].extend(ingest.sources()) # Append ingest object to a list for later processing ingests.append(ingest) # Get the max timestamp max_timestamp = max(timestamp, max_timestamp) # Update information that doesn't change if do_update is False: agent_data['devicename'] = ingest.devicename() agent_data['id_agent'] = ingest.id_agent() agent_data['agent_name'] = ingest.agent() # Get the PID file for the agent pid_file = daemon.pid_file(self.ingester_agent_name) # Update the PID file for the agent to ensure agentd.py # doesn't kill the ingest while processing a long stream # of files. If we are running this using __main__ = process() # then the pid file wouldn't have been created, hence the logic. if os.path.isfile(pid_file) is True: daemon.update_pid(self.ingester_agent_name) # Update update flag do_update = True # Process the rest if do_update is True: # Update remaining agent data agent_data['max_timestamp'] = max_timestamp # Add datapoints to the database db_prepare = _PrepareDatabase(agent_data) db_prepare.add_datapoints() # Get the latest datapoints datapoints = db_prepare.get_datapoints() # Get the assigned index values for the device and agent idx_device = db_prepare.idx_device() idx_agent = db_prepare.idx_agent() # Update database with data db_update = _UpdateDB(agent_data, datapoints) success = db_update.update() # Update database table timestamps update_timestamps = _UpdateLastTimestamp(idx_device, idx_agent, max_timestamp) update_timestamps.agent() update_timestamps.deviceagent() update_timestamps.datapoint() # Purge source files. Only done after complete # success of database updates. If not we could lose data in the # event of an ingester crash. Ingester would re-read the files # and process the non-duplicates, while deleting the duplicates. for ingest in ingests: ingest.purge() # Log duration of activity duration = time.time() - start_ts if success is True: log_message = ( 'Agent %s was processed from %s cache files in %s ' 'seconds (%s seconds/file, %s seconds/datapoint)' '') % (agent_data['id_agent'], len(ingests), round(duration, 4), round( duration / len(ingests), 4), round(duration / len(datapoints), 6)) log.log2info(1007, log_message) else: log_message = ( 'Failed to process all cache files for agent %s. ' 'Investigate.') % (agent_data['id_agent']) log.log2info(1008, log_message)
def process(self): """Update the database using threads.""" # Initialize key variables do_update = False success = None ingests = [] agent_data = { 'devicename': None, 'id_agent': None, 'sources': [], 'timeseries': [], 'timefixed': [], 'max_timestamp': 0 } # Get the directory to which failed files will be moved failure_directory = self.config.ingest_failures_directory() # Get start time for activity start_ts = time.time() # Process file for each timestamp, starting from the oldes file for data_dict in self.metadata: # Initialize key variables timestamp = data_dict['timestamp'] filepath = data_dict['filepath'] # Read in data ingest = drain.Drain(filepath) # Make sure file is OK # Move it to a directory for further analysis # by administrators if ingest.valid() is False: log_message = ('Cache ingest file %s is invalid. Moving.' '') % (filepath) log.log2warning(1054, log_message) shutil.copy(filepath, failure_directory) os.remove(filepath) continue # Append data agent_data['timeseries'].extend(ingest.timeseries()) agent_data['timefixed'].extend(ingest.timefixed()) agent_data['sources'].extend(ingest.sources()) # Append ingest object to a list for later processing ingests.append(ingest) # Update information that doesn't change if do_update is False: agent_data['devicename'] = ingest.devicename() agent_data['id_agent'] = ingest.id_agent() agent_data['agent_name'] = ingest.agent() # Get the PID file for the agent pid_file = daemon.pid_file(self.ingester_agent_name) else: # Get the max timestamp agent_data['max_timestamp'] = max(timestamp, agent_data['max_timestamp']) # Update the PID file for the agent to ensure agentd.py # doesn't kill the ingest while processing a long stream # of files. If we are running this using __main__ = process() # then the pid file wouldn't have been created, hence the logic. if os.path.isfile(pid_file) is True: daemon.update_pid(self.ingester_agent_name) # Update update flag do_update = True # Process the rest if do_update is True: # Upadate and note success (success, datapoints_processed) = self._do_update(agent_data, ingests) # Log duration of activity duration = time.time() - start_ts if success is True: log_message = ( 'Agent %s was processed from %s cache files in %s ' 'seconds (%s seconds/file, %s seconds/datapoint)' '') % (agent_data['id_agent'], len(ingests), round(duration, 4), round( duration / len(ingests), 4), round(duration / datapoints_processed, 6)) log.log2info(1007, log_message) else: log_message = ( 'Failed to process all cache files for agent %s. ' 'Investigate.') % (agent_data['id_agent']) log.log2info(1008, log_message)