def main(config_file, std_err=False, verbose=True, clean_histdata=False): """ Main function of the script Args: config_file: file path of the config file to load std_err: whether print logging output to stderr verbose: whether to provide verbose logging messages clean_histdata: all historical data should be cleared """ try: # Configure logging: fmt = logging.Formatter('%(filename)s[%(process)d] %(levelname)s: ' + '%(message)s') logger = logging.getLogger() if verbose: logger.setLevel(logging.DEBUG) else: logger.setLevel(logging.INFO) if std_err: handler = logging.StreamHandler() else: handler = lh.SysLogHandler(address='/dev/log', facility=lh.SysLogHandler.LOG_USER) handler.setFormatter(fmt) logger.addHandler(handler) logger.debug("{0} is starting, ".format(os.path.basename(__file__)) + "command line arguments: " + "config_file={0}, ".format(config_file) + "std_err={0}, ".format(std_err) + "verbose={0}, ".format(verbose) + "clean_histdata={0}".format(clean_histdata) ) # FIXME - Remember to correctly configure syslog, otherwise rsyslog will # discard messages ScriptConfiguration.load_config(config_file) logger.debug("Loaded configuration: " + str(ScriptConfiguration.get_config()) ) # Initialize reporting to monitoring system: ScriptStatus.init(nrpe_enable=True) # Make sure that we are the only ones running on the server: ScriptLock.init(ScriptConfiguration.get_val('lockfile')) ScriptLock.aqquire() # Some basic sanity checking: verify_conf() # We are all set, lets do some real work: HistoryFile.init(location=ScriptConfiguration.get_val('history_file'), max_averaging_window=ScriptConfiguration.get_val( 'max_averaging_window'), min_averaging_window=ScriptConfiguration.get_val( 'min_averaging_window')) if clean_histdata: HistoryFile.clear_history() HistoryFile.save() ScriptStatus.notify_immediate('unknown', 'History data has been cleared.') timeframe = ScriptConfiguration.get_val('timeframe') # FIXME: not sure how to refactor this, copypaste does not seem the best # solution :( def do_status_processing(prefix, current_growth, planned_growth, mountpoint=None, data_type=None): warn_tresh = 1 + (ScriptConfiguration.get_val( prefix + '_mon_warn_reduction')/100) crit_tresh = 1 + (ScriptConfiguration.get_val( prefix + '_mon_crit_reduction')/100) if prefix == 'disk' and data_type == 'inode': units = 'inodes/day' else: units = 'MB/day' if prefix == 'disk': rname = data_type + \ ' usage growth for mount {0}'.format(mountpoint) else: rname = '{0} usage growth'.format(prefix) rname = rname.capitalize() if current_growth > planned_growth * warn_tresh: msg = '{0} exceeds planned growth '.format(rname) + \ '- current: {0} {1}'.format(current_growth, units) + \ ', planned: {0} {1}.'.format(planned_growth, units) if current_growth > planned_growth * crit_tresh: ScriptStatus.update('crit', msg) else: ScriptStatus.update('warn', msg) else: ScriptStatus.update('ok', '{0} is OK ({1} {2}).'.format( rname, current_growth, units)) if ScriptConfiguration.get_val('memory_mon_enabled'): cur_usage, max_usage = fetch_memory_usage() HistoryFile.add_datapoint('memory', cur_usage) tmp = HistoryFile.verify_dataspan('memory') if tmp < 0: ScriptStatus.update('unknown', 'There is not enough data ' + 'to calculate current memory ' + 'usage growth: {0} '.format(abs(tmp)) + 'days more is needed.') else: datapoints = HistoryFile.get_datapoints('memory') planned_growth = find_planned_grow_ratio(cur_usage, max_usage, timeframe) current_growth = find_current_grow_ratio(datapoints) logging.debug('memory -> ' + 'current_growth: {0}, '.format(current_growth) + 'planned_growth: {0}'.format(planned_growth)) do_status_processing('memory', current_growth, planned_growth) if ScriptConfiguration.get_val('disk_mon_enabled'): mountpoints = ScriptConfiguration.get_val('disk_mountpoints') for dtype in ['space', 'inode']: for mountpoint in mountpoints: if dtype == 'inode': cur_usage, max_usage = fetch_inode_usage(mountpoint) else: cur_usage, max_usage = fetch_disk_usage(mountpoint) HistoryFile.add_datapoint('disk', cur_usage, data_type=dtype, path=mountpoint) tmp = HistoryFile.verify_dataspan('disk', data_type=dtype, path=mountpoint) if tmp < 0: ScriptStatus.update('unknown', 'There is not enough data to ' + 'calculate current disk ' + dtype + ' usage growth for mountpoint ' + '{0}: {1} '.format( mountpoint, abs(tmp)) + 'days more is needed.') else: datapoints = HistoryFile.get_datapoints('disk', data_type=dtype, path=mountpoint) planned_growth = find_planned_grow_ratio(cur_usage, max_usage, timeframe) current_growth = find_current_grow_ratio(datapoints) logging.debug('disk, ' + 'mountpoint {0}, '.format(mountpoint) + 'data_type {0}: '.format(dtype) + 'current_growth: {0}'.format(current_growth) + 'planned_growth: {0}'.format(planned_growth)) do_status_processing('disk', current_growth, planned_growth, mountpoint=mountpoint, data_type=dtype) HistoryFile.save() ScriptStatus.notify_agregated() ScriptLock.release() except RecoverableException as e: msg = str(e) logging.critical(msg) ScriptStatus.notify_immediate('unknown', msg) sys.exit(1) except AssertionError as e: # Unittests require it: raise except Exception as e: msg = "Exception occured: {0}".format(e.__class__.__name__) logging.exception(msg) print(msg) # We can use notify immediate here :( sys.exit(3)