def main(args): """Main entry point of this program. :param args: The command-line argument list :return: Exit code :rtype: int """ parser = argparse.ArgumentParser(prog='resmon.py', description='Monitor the system resource utilisation, periodically') top = parser group1 = top.add_argument_group() group1.add_argument('-n', metavar='name', help='Name of the resource monitoring session, artifacts will follow this' ' name', type=str) group1.add_argument('-p', metavar='period', help='The sampling period of the monitoring process. E.g. 10s, 1m, 2h.' ' Default is 1m', default='1m', type=str) group2 = top.add_argument_group() group2.add_argument('-l', help='List all the resmon databases found in the working directory and return.', action='count') arg = parser.parse_args(args) if arg.l: print(execute_list_dbs()) return 0 if not arg.n: logger.error('No name specified') return errno.EINVAL if arg.p: period = parse_timespec(arg.p) else: period = 60 # Default period is 1m monitor = start_monitor(arg.n, period) try: while 1: # Run until stopped # Wake up periodically and do something, like kicking a watchdog or something like that, not sure what yet. sleep(600) except KeyboardInterrupt: logger.info('User interrupted') return errno.EINTR finally: monitor.stop() logger.info('resmon terminated') return errno.EINVAL # If we are here we were interrupted by something other than KeyboardInterrupt
def run(self): """The run method for this worker task. :return: None :rtype: None """ logger.info('{0} filling in {1} from {2} to {3} in steps of {4}'.format(repr(self), self.rrd_name, self.from_ts, self.to_ts, self.step)) self.rrd_init(self.rrd_name, self.from_ts - self.step, self.step) counter = 0 top = None try: # Populate the database with some 'fake' data for ts in range(int(self.from_ts), int(self.to_ts), int(self.step)): if not counter % 100: # Update the Linux top data at every 100 points top = com.mitel.pyrate.resmon_util.execute_top() total = (self.to_ts - self.from_ts)/self.step logger.info('{0} data generation progress = {1} of {2}, {3}%'. format(repr(self), counter, total, 100.0*(float(counter) / float(total)))) if top: # Now update the resource utilisation stats to the RRDs self.rrd_update(self.rrd_name, top, ts) else: logger.error('{0} failed to acquire result from top, exit'.format(repr(self))) break counter += 1 logger.info('{0} completed'.format(repr(self))) except BaseException as e: logger.error('{0} encountered unexpected {1} at {2}'.format(repr(self), e, traceback.format_tb(sys.exc_info()[2])))
def _init_rrd(rrd_name, start, period, create_command): """Create a new RRD, if not already existing. If the requested RRD already exists then do nothing. Given a command for creating a specific type of RRD, create a new RRD if needed. :param rrd_name: The name of the RRD :type rrd_name: str :param start: The start of the new RRD, if needed. None means now. :type start: int :param period: The sampling period, in number of seconds :type period: int :param create_command: The command (lambda expression) for creating the new RRD :type create_command: lambda expression :return: None :rtype: None :raise IOError: If the given RRD name references an existing directory. :raise ValueError: If the given period is outside the permissible range. :raise TypeError: If the given period is invalid in some way. """ if not isinstance(period, int): raise TypeError('Expected integer period_sec value, given {0}'.format(period)) if period > 60 or period <= 0: raise ValueError('The given period_sec parameter, {0}, is outside the range 1-60'.format(period)) # Create a new RRD if one matching the given name does not already exist if os.path.exists(rrd_name): # Determine if the existing rrdName is the name of a file or a directory if not os.path.isfile(rrd_name): raise IOError(rrd_name + ' is not a file, probably a directory, please choose a different name') else: logger.info("RRD {0} already existed, we'll use it".format(rrd_name)) else: # Create a new RRD create_command(rrd_name, start, period) logger.info('Created new RRD {0}'.format(rrd_name))
def fill_in_the_rrds(cpu_rrd_name, mem_rrd_name, from_ts, to_ts, step): """Fill in the memory and CPU utilisation RRDs with some fluff. This fills in the resource RRDs (memory and CPU) for a specific time frame with some random Linux top data. :param cpu_rrd_name: Name of the CPU utilisation RRD :type cpu_rrd_name: str :param mem_rrd_name: Name of the memory utilisation RRD :type mem_rrd_name: str :param from_ts: The beginning of the period in which the data is being filled in :type from_ts: int :param to_ts: The end of the period in which the data is being filled in :type to_ts: int :param step: Number of seconds between samples :type step: int :return: None :rtype: None """ logger.info('Data generation started from {0}, to {1}, in steps of {2}'.format(from_ts, to_ts, step)) try: # Use multi-threading to take advantage of multi-core architectures. mem_filler = RrdFiller('testmem.rrd', lambda a, b, c: rrdutil.init_mem_stats_rrd(a, b, c), lambda a, b, c: rrdutil.update_mem_stats_from_top(a, b, c), from_ts, to_ts, step) cpu_filler = RrdFiller('testcpu.rrd', lambda a, b, c: rrdutil.init_cpu_stats_rrd(a, b, c), lambda a, b, c: rrdutil.update_cpu_stats_from_top(a, b, c), from_ts, to_ts, step) mem_filler.start() cpu_filler.start() mem_filler.wait_for_completion() # Wait forever cpu_filler.wait_for_completion() # Wait forever finally: logger.info('Data generation completed from {0}, to {1}, in steps of {2}'.format(from_ts, to_ts, step))
def main(args): """Main entry to program. :param args: Parsed command-line args. :return: Exit code. """ try: start_ts = (int(time.time()) / _step) * _step # TS should be aligned with the period # Step 1: Start the monitoring if (not os.path.exists(_base_name + 'cpu.rrd')) or (not os.path.exists(_base_name + 'mem.rrd')): fill_in_the_rrds(_base_name + 'cpu.rrd', _base_name + 'mem.rrd', start_ts - (24 * 3600), start_ts, _step) else: fill_in_the_rrds(_base_name + 'cpu.rrd', _base_name + 'mem.rrd', int(rrdutil.get_last_update(_base_name + 'cpu.rrd')) + _step, start_ts, _step) monitor = resmon_util.start_monitor(_base_name, _step) try: prev_ts = start_ts for loop in range(0, 360): # Run for an hour (360 loops of 10 seconds). # Step 2: Wait for some time (10s) for the monitor to gather data logger.info('Waiting for data, loop #{0}'.format(loop)) time.sleep(10) # Step 3: Observe the result last_cpu_data = rrdutil.fetch_last_data(monitor.get_cpu_rrd(), start_ts=prev_ts, resolution=1) last_mem_data = rrdutil.fetch_last_data(monitor.get_mem_rrd(), start_ts=prev_ts) # Print out the result print('\nLAST CPU data {0}, curr time = {1}'.format(last_cpu_data[0], int(time.time()))) print(rrdutil.format_cpu_stats(last_cpu_data)) print('\nLAST Memory data {0}'.format(last_mem_data[0])) print(rrdutil.format_mem_stats(last_mem_data)) prev_ts = (int(time.time()) / _step) * _step # TS should be aligned with the period avg_data = rrdutil.fetch_avg_data(monitor.get_cpu_rrd(), start_ts=start_ts, resolution=10) print('\nAVG CPU data {0}'.format(avg_data[0])) print(rrdutil.format_cpu_stats(avg_data)) avg_data = rrdutil.fetch_avg_data(monitor.get_mem_rrd(), start_ts=start_ts, resolution=10) print('\nAVG Memory data {0}'.format(avg_data[0])) print(rrdutil.format_mem_stats(avg_data)) finally: # Don't forget to stop the monitor when finish logger.info('Stopping {0}'.format(monitor)) monitor.stop() # Generate a graph end_graf = time.time() start_graf = end_graf - (2 * SECONDS_IN_HOUR) gen_graph('cpu.png', rrd_name=monitor.get_cpu_rrd(), title='CPU Utilisation', cf='AVERAGE', start_ts=start_graf, end_ts=end_graf, dataset=[('idle', '%Idle'), ('wait', '%Wait'), ('load', 'Load avg')]) gen_graph('mem.png', rrd_name=monitor.get_mem_rrd(), title='Memory Utilisation', cf='AVERAGE', start_ts=start_graf, end_ts=end_graf, dataset=[('memtotal', 'Total'), ('memfree', 'Free'), ('buffers', 'Buffers'), ('cached', 'Cached')]) except (TypeError, ValueError) as e: logger.warning('Encountered invalid monitor period {0}, at {1}'.format(e, format_tb(sys.exc_info()[2]))) except KeyboardInterrupt: logger.warning('User interruption at {0}'.format(format_tb(sys.exc_info()[2]))) except BaseException as e: logger.error('Encountered unexpected exception {0} at {1}'.format(repr(e), format_tb(sys.exc_info()[2])))
import os import platform import sys import time import traceback from traceback import format_tb import com.mitel.pyrate.resmon_util import rrdutil from com.mitel.pyrate import logger, resmon_util from com.mitel.pyrate.gengraph import gen_graph from com.mitel.pyrate.task import WorkerTask logger.info("Testing, sys.path={0}, platform={1}/{2}".format(sys.path, platform.uname(), platform.dist())) _step = 2 SECONDS_IN_HOUR = 3600 _base_name = 'test' def fill_in_the_rrds(cpu_rrd_name, mem_rrd_name, from_ts, to_ts, step): """Fill in the memory and CPU utilisation RRDs with some fluff. This fills in the resource RRDs (memory and CPU) for a specific time frame with some random Linux top data. :param cpu_rrd_name: Name of the CPU utilisation RRD :type cpu_rrd_name: str :param mem_rrd_name: Name of the memory utilisation RRD :type mem_rrd_name: str :param from_ts: The beginning of the period in which the data is being filled in :type from_ts: int