def process_frontends(self, data_frame): """Process statistics for frontends. Arguments: data_frame (obj): A pandas data_frame ready for processing. """ # Filtering for Pandas cnt_metrics = 1 log.debug('processing statistics for frontends') is_frontend = data_frame['svname_'] == 'FRONTEND' excluded_frontends = [] metrics = self.config.get('process', 'frontend-metrics', fallback=None) if metrics is not None: metrics = metrics.split(' ') else: metrics = FRONTEND_METRICS log.debug('metric names for frontends %s', metrics) exclude_frontends_file = self.config.get('process', 'exclude-frontends', fallback=None) if exclude_frontends_file is not None: excluded_frontends = load_file_content(exclude_frontends_file) log.info('excluding frontends %s', excluded_frontends) # replace dots in frontend names excluded_frontends[:] = [x.replace('.', '_') for x in excluded_frontends] filter_frontend = (~data_frame['pxname_'] .isin(excluded_frontends)) frontend_stats = (data_frame[is_frontend & filter_frontend] .loc[:, ['pxname_'] + metrics]) # Group by frontend name and sum values for each column frontend_aggr_stats = frontend_stats.groupby(['pxname_']).sum() cnt_metrics += frontend_aggr_stats.size for index, row in frontend_aggr_stats.iterrows(): paths = self.get_metric_paths('frontend', index) for i in row.iteritems(): datapoints = [ "{p}.frontend.{f}.{m} {v} {t}\n" .format(p=path, f=index, m=i[0], v=i[1], t=self.timestamp) for path in paths ] for datapoint in datapoints: dispatcher.signal('send', data=datapoint) data = ("{p}.haproxystats.MetricsFrontend {v} {t}\n" .format(p=self.graphite_path, v=cnt_metrics, t=self.timestamp)) dispatcher.signal('send', data=data) log.info('number of frontend metrics %s', cnt_metrics) log.debug('finished processing statistics for frontends')
def process_frontends(self, data_frame): """ Process statistics for frontends. Arguments: data_frame (obj): A pandas data_frame ready for processing. """ # Filtering for Pandas cnt_metrics = 1 log.debug('processing statistics for frontends') is_frontend = data_frame['svname'] == 'FRONTEND' filter_frontend = None metrics = self.config.get('process', 'frontend-metrics', fallback=None) if metrics is not None: metrics = metrics.split(' ') else: metrics = FRONTEND_METRICS log.debug('metric names for frontends %s', metrics) exclude_frontends_file = self.config.get('process', 'exclude-frontends', fallback=None) if exclude_frontends_file is not None: excluded_frontends = load_file_content(exclude_frontends_file) if excluded_frontends: # in case the file is empty log.info('excluding frontends %s', excluded_frontends) filter_frontend = (~data_frame['pxname'] .isin(excluded_frontends)) if filter_frontend is not None: frontend_stats = (data_frame[is_frontend & filter_frontend] .loc[:, ['pxname'] + metrics]) else: frontend_stats = (data_frame[is_frontend] .loc[:, ['pxname'] + metrics]) # Group by frontend name and sum values for each column frontend_aggr_stats = frontend_stats.groupby(['pxname']).sum() cnt_metrics += frontend_aggr_stats.size for index, row in frontend_aggr_stats.iterrows(): name = index.replace('.', '_') for i in row.iteritems(): data = ("{p}.frontend.{f}.{m} {v} {t}\n" .format(p=self.graphite_path, f=name, m=i[0], v=i[1], t=self.timestamp)) dispatcher.signal('send', data=data) data = ("{p}.haproxystats.MetricsFrontend {v} {t}\n" .format(p=self.graphite_path, v=cnt_metrics, t=self.timestamp)) dispatcher.signal('send', data=data) log.info('number of frontend metrics %s', cnt_metrics) log.debug('finished processing statistics for frontends')
def process_backends(self, data_frame, *, filter_backend=None): """ Process statistics for backends. Arguments: data_frame (obj): A pandas data_frame ready for processing. filter_backend: A filter to apply on data_frame. """ cnt_metrics = 1 log.debug('processing statistics for backends') # Filtering for Pandas is_backend = data_frame['svname'] == 'BACKEND' metrics = self.config.get('process', 'backend-metrics', fallback=None) if metrics is not None: metrics = metrics.split(' ') else: metrics = BACKEND_METRICS log.debug('metric names for backends %s', metrics) # Get rows only for backends. For some metrics we need the sum and # for others the average, thus we split them. if filter_backend is not None: stats_sum = (data_frame[is_backend & filter_backend] .loc[:, ['pxname'] + metrics]) stats_avg = (data_frame[is_backend & filter_backend] .loc[:, ['pxname'] + BACKEND_AVG_METRICS]) else: stats_sum = data_frame[is_backend].loc[:, ['pxname'] + metrics] stats_avg = (data_frame[is_backend] .loc[:, ['pxname'] + BACKEND_AVG_METRICS]) aggr_sum = stats_sum.groupby(['pxname'], as_index=False).sum() aggr_avg = stats_avg.groupby(['pxname'], as_index=False).mean() merged_stats = pandas.merge(aggr_sum, aggr_avg, on='pxname') rows, columns = merged_stats.shape cnt_metrics += rows * (columns - 1) # minus the index for _, row in merged_stats.iterrows(): name = row[0].replace('.', '_') for i in row[1:].iteritems(): data = ("{p}.backend.{b}.{m} {v} {t}\n" .format(p=self.graphite_path, b=name, m=i[0], v=i[1], t=self.timestamp)) dispatcher.signal('send', data=data) data = ("{p}.haproxystats.MetricsBackend {v} {t}\n" .format(p=self.graphite_path, v=cnt_metrics, t=self.timestamp)) dispatcher.signal('send', data=data) log.info('number of backend metrics %s', cnt_metrics) log.debug('finished processing statistics for backends')
def process_backends(self, data_frame, filter_backend): """Process statistics for backends. Arguments: data_frame (obj): A pandas data_frame ready for processing. filter_backend: A filter to apply on data_frame. """ cnt_metrics = 1 log.debug('processing statistics for backends') # Filtering for Pandas is_backend = data_frame['svname_'] == 'BACKEND' # For averages only consider entries with actual connections made got_traffic = data_frame['lbtot'] > 0 metrics = self.config.get('process', 'backend-metrics', fallback=None) if metrics is not None: metrics = metrics.split(' ') else: metrics = BACKEND_METRICS log.debug('metric names for backends %s', metrics) # Get rows only for backends. For some metrics we need the sum and # for others the average, thus we split them. stats_sum = (data_frame[is_backend & filter_backend].loc[:, ['pxname_'] + metrics]) stats_avg = (data_frame[is_backend & filter_backend & got_traffic].loc[:, ['pxname_'] + BACKEND_AVG_METRICS]) aggr_sum = stats_sum.groupby(['pxname_'], as_index=False).sum() aggr_avg = stats_avg.groupby(['pxname_'], as_index=False).mean() merged_stats = pandas.merge(aggr_sum, aggr_avg, on='pxname_') rows, columns = merged_stats.shape cnt_metrics += rows * (columns - 1) # minus the index for _, row in merged_stats.iterrows(): backend = row[0] paths = self.get_metric_paths('backend', backend) for i in row[1:].iteritems(): datapoints = [ "{p}.backend.{b}.{m} {v} {t}\n".format(p=path, b=backend, m=i[0], v=i[1], t=self.timestamp) for path in paths ] for datapoint in datapoints: dispatcher.signal('send', data=datapoint) data = ("{p}.haproxystats.MetricsBackend {v} {t}\n".format( p=self.graphite_path, v=cnt_metrics, t=self.timestamp)) dispatcher.signal('send', data=data) log.info('number of backend metrics %s', cnt_metrics) log.debug('finished processing statistics for backends')
def run(self): """Consume item from queue and process it. It is the target function of Process class. Consumes items from the queue, processes data which are pulled down by haproxystats-pull program and uses Pandas to perform all computations of statistics. It exits when it receives STOP_SIGNAL as item. To avoid orphan processes on the system, it must be robust against failures and try very hard recover from failures. """ if self.config.has_section('local-store'): self.local_store = self.config.get('local-store', 'dir') self.file_handler = FileHandler() dispatcher.register('open', self.file_handler.open) dispatcher.register('send', self.file_handler.send) dispatcher.register('flush', self.file_handler.flush) dispatcher.register('loop', self.file_handler.loop) timeout = self.config.getfloat('graphite', 'timeout') connect_timeout = self.config.getfloat('graphite', 'connect-timeout', fallback=timeout) write_timeout = self.config.getfloat('graphite', 'write-timeout', fallback=timeout) graphite = GraphiteHandler( server=self.config.get('graphite', 'server'), port=self.config.getint('graphite', 'port'), connect_timeout=connect_timeout, write_timeout=write_timeout, retries=self.config.getint('graphite', 'retries'), interval=self.config.getfloat('graphite', 'interval'), delay=self.config.getfloat('graphite', 'delay'), backoff=self.config.getfloat('graphite', 'backoff'), queue_size=self.config.getint('graphite', 'queue-size')) dispatcher.register('open', graphite.open) dispatcher.register('send', graphite.send) dispatcher.signal('open') try: while True: log.info('waiting for item from the queue') incoming_dir = self.tasks.get() log.info('received item %s', incoming_dir) if incoming_dir == STOP_SIGNAL: break start_time = time.time() # incoming_dir => /var/lib/haproxystats/incoming/1454016646 # timestamp => 1454016646 self.timestamp = os.path.basename(incoming_dir) # update filename for file handler. # This *does not* error if a file handler is not registered. dispatcher.signal('loop', local_store=self.local_store, timestamp=self.timestamp) self.process_stats(incoming_dir) # This flushes data to file dispatcher.signal('flush') # Remove directory as data have been successfully processed. log.debug('removing %s', incoming_dir) try: shutil.rmtree(incoming_dir) except (FileNotFoundError, PermissionError, OSError) as exc: log.critical( 'failed to remove directory %s with:%s. ' 'This should not have happened as it means ' 'another worker processed data from this ' 'directory or something/someone removed the ' 'directory!', incoming_dir, exc) elapsed_time = time.time() - start_time log.info('total wall clock time in seconds %.3f', elapsed_time) data = ("{p}.haproxystats.{m} {v} {t}\n".format( p=self.graphite_path, m='TotalWallClockTime', v="{t:.3f}".format(t=elapsed_time), t=self.timestamp)) dispatcher.signal('send', data=data) log.info('finished with %s', incoming_dir) except KeyboardInterrupt: log.critical('Ctrl-C received') return
def process_servers(self, data_frame, filter_backend): """Process statistics for servers. Arguments: data_frame (obj): A pandas data_frame ready for processing. filter_backend: A filter to apply on data_frame. """ cnt_metrics = 1 # A filter for rows with stats for servers is_server = data_frame['type'] == 2 log.debug('processing statistics for servers') server_metrics = self.config.get('process', 'server-metrics', fallback=None) if server_metrics is not None: server_metrics = server_metrics.split(' ') else: server_metrics = SERVER_METRICS log.debug('metric names for servers %s', server_metrics) # Get rows only for servers. For some metrics we need the sum and # for others the average, thus we split them. stats_sum = (data_frame[is_server & filter_backend].loc[:, ['pxname_', 'svname_'] + server_metrics]) stats_avg = (data_frame[is_server & filter_backend].loc[:, ['pxname_', 'svname_'] + SERVER_AVG_METRICS]) servers = (data_frame[is_server & filter_backend].loc[:, ['pxname_', 'svname_']]) # Calculate the number of configured servers in a backend tot_servers = (servers.groupby(['pxname_']).agg( {'svname_': pandas.Series.nunique})) aggr_sum = (stats_sum.groupby(['pxname_', 'svname_'], as_index=False).sum()) aggr_avg = (stats_avg.groupby(['pxname_', 'svname_'], as_index=False).mean()) merged_stats = pandas.merge(aggr_sum, aggr_avg, on=['svname_', 'pxname_']) rows, columns = merged_stats.shape cnt_metrics += rows * (columns - 2) for backend, row in tot_servers.iterrows(): cnt_metrics += 1 data = ("{p}.backend.{b}.{m} {v} {t}\n".format( p=self.graphite_path, b=backend, m='TotalServers', v=row[0], t=self.timestamp)) dispatcher.signal('send', data=data) for _, row in merged_stats.iterrows(): backend = row[0] server = row[1] for i in row[2:].iteritems(): data = ("{p}.backend.{b}.server.{s}.{m} {v} {t}\n".format( p=self.graphite_path, b=backend, s=server, m=i[0], v=i[1], t=self.timestamp)) dispatcher.signal('send', data=data) if self.config.getboolean('process', 'aggr-server-metrics'): log.info('aggregate stats for servers across all backends') # Produce statistics for servers across all backends stats_sum = (data_frame[is_server].loc[:, ['svname_'] + SERVER_METRICS]) stats_avg = (data_frame[is_server].loc[:, ['svname_'] + SERVER_AVG_METRICS]) aggr_sum = (stats_sum.groupby(['svname_'], as_index=False).sum()) aggr_avg = (stats_avg.groupby(['svname_'], as_index=False).mean()) merged_stats = pandas.merge(aggr_sum, aggr_avg, on=['svname_']) rows, columns = merged_stats.shape cnt_metrics += rows * (columns - 1) # minus the index for _, row in merged_stats.iterrows(): server = row[0] for i in row[1:].iteritems(): data = ("{p}.server.{s}.{m} {v} {t}\n".format( p=self.graphite_path, s=server, m=i[0], v=i[1], t=self.timestamp)) dispatcher.signal('send', data=data) data = ("{p}.haproxystats.MetricsServer {v} {t}\n".format( p=self.graphite_path, v=cnt_metrics, t=self.timestamp)) dispatcher.signal('send', data=data) log.info('number of server metrics %s', cnt_metrics) log.debug('finished processing statistics for servers')
def haproxy_stats(self, files): """Process statistics for HAProxy daemon. Arguments: files (list): A list of files which contain the output of 'show info' command on the stats socket. """ cnt_metrics = 1 # a metric counter log.info('processing statistics for HAProxy daemon') log.debug('processing files %s', ' '.join(files)) raw_info_stats = defaultdict(list) # Parse raw data and build a data structure, input looks like: # Name: HAProxy # Version: 1.6.3-4d747c-52 # Release_date: 2016/02/25 # Nbproc: 4 # Uptime_sec: 59277 # SslFrontendSessionReuse_pct: 0 # .... with fileinput.input(files=files) as file_input: for line in file_input: if ': ' in line: key, value = line.split(': ', 1) try: numeric_value = int(value) except ValueError: pass else: raw_info_stats[key].append(numeric_value) if not raw_info_stats: log.error('failed to parse daemon statistics') return else: # Here is where Pandas enters and starts its magic. try: dataframe = pandas.DataFrame(raw_info_stats) except ValueError as exc: log.error( 'failed to create Pandas object for daemon ' 'statistics %s', exc) return sums = dataframe.loc[:, DAEMON_METRICS].sum() avgs = dataframe.loc[:, DAEMON_AVG_METRICS].mean() cnt_metrics += sums.size + avgs.size # Pandas did all the hard work, let's join above tables and extract # statistics for values in pandas.concat([sums, avgs], axis=0).items(): data = ("{p}.daemon.{m} {v} {t}\n".format(p=self.graphite_path, m=values[0].replace( '.', '_'), v=values[1], t=self.timestamp)) dispatcher.signal('send', data=data) dataframe['CpuUsagePct'] = ( dataframe.loc[:, 'Idle_pct'].map(lambda x: (x * -1) + 100)) if dataframe.loc[:, 'Idle_pct'].size > 1: log.info('calculating percentiles for CpuUsagePct') percentiles = (dataframe.loc[:, 'CpuUsagePct'].quantile( q=[0.25, 0.50, 0.75, 0.95, 0.99], interpolation='nearest')) for per in percentiles.items(): # per[0] = index => [0.25, 0.50, 0.75, 0.95, 0.99] # per[1] = percentile value cnt_metrics += 1 data = ("{p}.daemon.{m} {v} {t}\n".format( p=self.graphite_path, m=("{:.2f}PercentileCpuUsagePct".format( per[0]).split('.')[1]), v=per[1], t=self.timestamp)) dispatcher.signal('send', data=data) cnt_metrics += 1 data = ("{p}.daemon.{m} {v} {t}\n".format( p=self.graphite_path, m="StdCpuUsagePct", v=dataframe.loc[:, 'CpuUsagePct'].std(), t=self.timestamp)) dispatcher.signal('send', data=data) if self.config.getboolean('process', 'calculate-percentages'): for metric in daemon_percentage_metrics(): cnt_metrics += 1 log.info('calculating percentage for %s', metric.name) value = calculate_percentage_per_column(dataframe, metric) data = ("{p}.daemon.{m} {v} {t}\n".format( p=self.graphite_path, m=metric.title, v=value, t=self.timestamp)) dispatcher.signal('send', data=data) if self.config.getboolean('process', 'per-process-metrics'): log.info("processing statistics per daemon") indexed_by_worker = dataframe.set_index('Process_num') metrics_per_worker = ( indexed_by_worker.loc[:, DAEMON_METRICS + ['CpuUsagePct'] + DAEMON_AVG_METRICS]) cnt_metrics += metrics_per_worker.size for worker, row in metrics_per_worker.iterrows(): for values in row.iteritems(): data = ("{p}.daemon.process.{w}.{m} {v} {t}\n".format( p=self.graphite_path, w=worker, m=values[0].replace('.', '_'), v=values[1], t=self.timestamp)) dispatcher.signal('send', data=data) if self.config.getboolean('process', 'calculate-percentages'): for metric in daemon_percentage_metrics(): log.info('calculating percentage for %s per daemon', metric.name) _percentages = (metrics_per_worker. loc[:, [metric.limit, metric.name]].apply( calculate_percentage_per_row, axis=1, args=(metric, ))) cnt_metrics += _percentages.size for worker, row in _percentages.iterrows(): for values in row.iteritems(): data = ("{p}.daemon.process.{w}.{m} {v} {t}\n". format(p=self.graphite_path, w=worker, m=values[0].replace('.', '_'), v=values[1], t=self.timestamp)) dispatcher.signal('send', data=data) data = ("{p}.haproxystats.MetricsHAProxy {v} {t}\n".format( p=self.graphite_path, v=cnt_metrics, t=self.timestamp)) dispatcher.signal('send', data=data) log.info('number of HAProxy metrics %s', cnt_metrics) log.info('finished processing statistics for HAProxy daemon')
def run(self): """Consume item from queue and process it. It is the target function of Process class. Consumes items from the queue, processes data which are pulled down by haproxystats-pull program and uses Pandas to perform all computations of statistics. It exits when it receives STOP_SIGNAL as item. To avoid orphan processes on the system, it must be robust against failures and try very hard recover from failures. """ if self.config.has_section('local-store'): self.local_store = self.config.get('local-store', 'dir') self.file_handler = FileHandler() dispatcher.register('open', self.file_handler.open) dispatcher.register('send', self.file_handler.send) dispatcher.register('flush', self.file_handler.flush) dispatcher.register('loop', self.file_handler.loop) timeout = self.config.getfloat('graphite', 'timeout') connect_timeout = self.config.getfloat('graphite', 'connect-timeout', fallback=timeout) write_timeout = self.config.getfloat('graphite', 'write-timeout', fallback=timeout) graphite = GraphiteHandler( server=self.config.get('graphite', 'server'), port=self.config.getint('graphite', 'port'), connect_timeout=connect_timeout, write_timeout=write_timeout, retries=self.config.getint('graphite', 'retries'), interval=self.config.getfloat('graphite', 'interval'), delay=self.config.getfloat('graphite', 'delay'), backoff=self.config.getfloat('graphite', 'backoff'), queue_size=self.config.getint('graphite', 'queue-size') ) dispatcher.register('open', graphite.open) dispatcher.register('send', graphite.send) dispatcher.signal('open') try: while True: log.info('waiting for item from the queue') incoming_dir = self.tasks.get() log.info('received item %s', incoming_dir) if incoming_dir == STOP_SIGNAL: break start_time = time.time() # incoming_dir => /var/lib/haproxystats/incoming/1454016646 # timestamp => 1454016646 self.timestamp = os.path.basename(incoming_dir) # update filename for file handler. # This *does not* error if a file handler is not registered. dispatcher.signal('loop', local_store=self.local_store, timestamp=self.timestamp) self.process_stats(incoming_dir) # This flushes data to file dispatcher.signal('flush') # Remove directory as data have been successfully processed. log.debug('removing %s', incoming_dir) try: shutil.rmtree(incoming_dir) except (FileNotFoundError, PermissionError, OSError) as exc: log.critical('failed to remove directory %s with:%s. ' 'This should not have happened as it means ' 'another worker processed data from this ' 'directory or something/someone removed the ' 'directory!', incoming_dir, exc) elapsed_time = time.time() - start_time log.info('total wall clock time in seconds %.3f', elapsed_time) data = ("{p}.haproxystats.{m} {v} {t}\n" .format(p=self.graphite_path, m='TotalWallClockTime', v="{t:.3f}".format(t=elapsed_time), t=self.timestamp)) dispatcher.signal('send', data=data) log.info('finished with %s', incoming_dir) except KeyboardInterrupt: log.critical('Ctrl-C received') return
def process_servers(self, data_frame, filter_backend): """Process statistics for servers. Arguments: data_frame (obj): A pandas data_frame ready for processing. filter_backend: A filter to apply on data_frame. """ cnt_metrics = 1 # A filter for rows with stats for servers is_server = data_frame['type'] == 2 log.debug('processing statistics for servers') server_metrics = self.config.get('process', 'server-metrics', fallback=None) if server_metrics is not None: server_metrics = server_metrics.split(' ') else: server_metrics = SERVER_METRICS log.debug('metric names for servers %s', server_metrics) # Get rows only for servers. For some metrics we need the sum and # for others the average, thus we split them. stats_sum = (data_frame[is_server & filter_backend] .loc[:, ['pxname_', 'svname_'] + server_metrics]) stats_avg = (data_frame[is_server & filter_backend] .loc[:, ['pxname_', 'svname_'] + SERVER_AVG_METRICS]) servers = (data_frame[is_server & filter_backend] .loc[:, ['pxname_', 'svname_']]) # Calculate the number of configured servers in a backend tot_servers = (servers .groupby(['pxname_']) .agg({'svname_': pandas.Series.nunique})) aggr_sum = (stats_sum .groupby(['pxname_', 'svname_'], as_index=False) .sum()) aggr_avg = (stats_avg .groupby(['pxname_', 'svname_'], as_index=False) .mean()) merged_stats = pandas.merge(aggr_sum, aggr_avg, on=['svname_', 'pxname_']) rows, columns = merged_stats.shape cnt_metrics += rows * (columns - 2) for backend, row in tot_servers.iterrows(): cnt_metrics += 1 data = ("{p}.backend.{b}.{m} {v} {t}\n" .format(p=self.graphite_path, b=backend, m='TotalServers', v=row[0], t=self.timestamp)) dispatcher.signal('send', data=data) for _, row in merged_stats.iterrows(): backend = row[0] server = row[1] for i in row[2:].iteritems(): data = ("{p}.backend.{b}.server.{s}.{m} {v} {t}\n" .format(p=self.graphite_path, b=backend, s=server, m=i[0], v=i[1], t=self.timestamp)) dispatcher.signal('send', data=data) if self.config.getboolean('process', 'aggr-server-metrics'): log.info('aggregate stats for servers across all backends') # Produce statistics for servers across all backends stats_sum = (data_frame[is_server] .loc[:, ['svname_'] + SERVER_METRICS]) stats_avg = (data_frame[is_server] .loc[:, ['svname_'] + SERVER_AVG_METRICS]) aggr_sum = (stats_sum .groupby(['svname_'], as_index=False) .sum()) aggr_avg = (stats_avg .groupby(['svname_'], as_index=False) .mean()) merged_stats = pandas.merge(aggr_sum, aggr_avg, on=['svname_']) rows, columns = merged_stats.shape cnt_metrics += rows * (columns - 1) # minus the index for _, row in merged_stats.iterrows(): server = row[0] for i in row[1:].iteritems(): data = ("{p}.server.{s}.{m} {v} {t}\n" .format(p=self.graphite_path, s=server, m=i[0], v=i[1], t=self.timestamp)) dispatcher.signal('send', data=data) data = ("{p}.haproxystats.MetricsServer {v} {t}\n" .format(p=self.graphite_path, v=cnt_metrics, t=self.timestamp)) dispatcher.signal('send', data=data) log.info('number of server metrics %s', cnt_metrics) log.debug('finished processing statistics for servers')
def haproxy_stats(self, files): """Process statistics for HAProxy daemon. Arguments: files (list): A list of files which contain the output of 'show info' command on the stats socket. """ cnt_metrics = 1 # a metric counter log.info('processing statistics for HAProxy daemon') log.debug('processing files %s', ' '.join(files)) raw_info_stats = defaultdict(list) # Parse raw data and build a data structure, input looks like: # Name: HAProxy # Version: 1.6.3-4d747c-52 # Release_date: 2016/02/25 # Nbproc: 4 # Uptime_sec: 59277 # SslFrontendSessionReuse_pct: 0 # .... with fileinput.input(files=files) as file_input: for line in file_input: if ': ' in line: key, value = line.split(': ', 1) try: numeric_value = int(value) except ValueError: pass else: raw_info_stats[key].append(numeric_value) if not raw_info_stats: log.error('failed to parse daemon statistics') return else: # Here is where Pandas enters and starts its magic. try: dataframe = pandas.DataFrame(raw_info_stats) except ValueError as exc: log.error('failed to create Pandas object for daemon ' 'statistics %s', exc) return sums = dataframe.loc[:, DAEMON_METRICS].sum() avgs = dataframe.loc[:, DAEMON_AVG_METRICS].mean() cnt_metrics += sums.size + avgs.size # Pandas did all the hard work, let's join above tables and extract # statistics for values in pandas.concat([sums, avgs], axis=0).items(): data = ("{p}.daemon.{m} {v} {t}\n" .format(p=self.graphite_path, m=values[0].replace('.', '_'), v=values[1], t=self.timestamp)) dispatcher.signal('send', data=data) dataframe['CpuUsagePct'] = (dataframe.loc[:, 'Idle_pct'] .map(lambda x: (x * -1) + 100)) if dataframe.loc[:, 'Idle_pct'].size > 1: log.info('calculating percentiles for CpuUsagePct') percentiles = (dataframe.loc[:, 'CpuUsagePct'] .quantile(q=[0.25, 0.50, 0.75, 0.95, 0.99], interpolation='nearest')) for per in percentiles.items(): # per[0] = index => [0.25, 0.50, 0.75, 0.95, 0.99] # per[1] = percentile value cnt_metrics += 1 data = ("{p}.daemon.{m} {v} {t}\n" .format(p=self.graphite_path, m=("{:.2f}PercentileCpuUsagePct" .format(per[0]).split('.')[1]), v=per[1], t=self.timestamp)) dispatcher.signal('send', data=data) cnt_metrics += 1 data = ("{p}.daemon.{m} {v} {t}\n" .format(p=self.graphite_path, m="StdCpuUsagePct", v=dataframe.loc[:, 'CpuUsagePct'].std(), t=self.timestamp)) dispatcher.signal('send', data=data) if self.config.getboolean('process', 'calculate-percentages'): for metric in daemon_percentage_metrics(): cnt_metrics += 1 log.info('calculating percentage for %s', metric.name) try: value = calculate_percentage_per_column(dataframe, metric) except KeyError: log.warning("metric %s doesn't exist", metric.name) else: data = ("{p}.daemon.{m} {v} {t}\n" .format(p=self.graphite_path, m=metric.title, v=value, t=self.timestamp)) dispatcher.signal('send', data=data) if self.config.getboolean('process', 'per-process-metrics'): log.info("processing statistics per daemon") indexed_by_worker = dataframe.set_index('Process_num') metrics_per_worker = (indexed_by_worker .loc[:, DAEMON_METRICS + ['CpuUsagePct'] + DAEMON_AVG_METRICS]) cnt_metrics += metrics_per_worker.size for worker, row in metrics_per_worker.iterrows(): for values in row.iteritems(): data = ("{p}.daemon.process.{w}.{m} {v} {t}\n" .format(p=self.graphite_path, w=worker, m=values[0].replace('.', '_'), v=values[1], t=self.timestamp)) dispatcher.signal('send', data=data) if self.config.getboolean('process', 'calculate-percentages'): for metric in daemon_percentage_metrics(): log.info('calculating percentage for %s per daemon', metric.name) _percentages = (metrics_per_worker .loc[:, [metric.limit, metric.name]] .apply(calculate_percentage_per_row, axis=1, args=(metric,))) cnt_metrics += _percentages.size for worker, row in _percentages.iterrows(): for values in row.iteritems(): data = ("{p}.daemon.process.{w}.{m} {v} {t}\n" .format(p=self.graphite_path, w=worker, m=values[0].replace('.', '_'), v=values[1], t=self.timestamp)) dispatcher.signal('send', data=data) data = ("{p}.haproxystats.MetricsHAProxy {v} {t}\n" .format(p=self.graphite_path, v=cnt_metrics, t=self.timestamp)) dispatcher.signal('send', data=data) log.info('number of HAProxy metrics %s', cnt_metrics) log.info('finished processing statistics for HAProxy daemon')