Ejemplo n.º 1
0
    def process_frontends(self, data_frame):
        """Process statistics for frontends.

        Arguments:
            data_frame (obj): A pandas data_frame ready for processing.
        """
        # Filtering for Pandas
        cnt_metrics = 1
        log.debug('processing statistics for frontends')
        is_frontend = data_frame['svname_'] == 'FRONTEND'
        excluded_frontends = []
        metrics = self.config.get('process', 'frontend-metrics', fallback=None)

        if metrics is not None:
            metrics = metrics.split(' ')
        else:
            metrics = FRONTEND_METRICS
        log.debug('metric names for frontends %s', metrics)

        exclude_frontends_file = self.config.get('process',
                                                 'exclude-frontends',
                                                 fallback=None)
        if exclude_frontends_file is not None:
            excluded_frontends = load_file_content(exclude_frontends_file)
            log.info('excluding frontends %s', excluded_frontends)
            # replace dots in frontend names
            excluded_frontends[:] = [x.replace('.', '_')
                                     for x in excluded_frontends]
        filter_frontend = (~data_frame['pxname_']
                           .isin(excluded_frontends))

        frontend_stats = (data_frame[is_frontend & filter_frontend]
                          .loc[:, ['pxname_'] + metrics])

        # Group by frontend name and sum values for each column
        frontend_aggr_stats = frontend_stats.groupby(['pxname_']).sum()
        cnt_metrics += frontend_aggr_stats.size
        for index, row in frontend_aggr_stats.iterrows():
            paths = self.get_metric_paths('frontend', index)
            for i in row.iteritems():
                datapoints = [
                    "{p}.frontend.{f}.{m} {v} {t}\n"
                    .format(p=path,
                            f=index,
                            m=i[0],
                            v=i[1],
                            t=self.timestamp) for path in paths
                ]
                for datapoint in datapoints:
                    dispatcher.signal('send', data=datapoint)

        data = ("{p}.haproxystats.MetricsFrontend {v} {t}\n"
                .format(p=self.graphite_path,
                        v=cnt_metrics,
                        t=self.timestamp))
        dispatcher.signal('send', data=data)
        log.info('number of frontend metrics %s', cnt_metrics)

        log.debug('finished processing statistics for frontends')
Ejemplo n.º 2
0
    def process_frontends(self, data_frame):
        """
        Process statistics for frontends.

        Arguments:
            data_frame (obj): A pandas data_frame ready for processing.
        """
        # Filtering for Pandas
        cnt_metrics = 1
        log.debug('processing statistics for frontends')
        is_frontend = data_frame['svname'] == 'FRONTEND'
        filter_frontend = None
        metrics = self.config.get('process', 'frontend-metrics', fallback=None)

        if metrics is not None:
            metrics = metrics.split(' ')
        else:
            metrics = FRONTEND_METRICS
        log.debug('metric names for frontends %s', metrics)

        exclude_frontends_file = self.config.get('process',
                                                 'exclude-frontends',
                                                 fallback=None)
        if exclude_frontends_file is not None:
            excluded_frontends = load_file_content(exclude_frontends_file)
            if excluded_frontends:  # in case the file is empty
                log.info('excluding frontends %s', excluded_frontends)
                filter_frontend = (~data_frame['pxname']
                                   .isin(excluded_frontends))
        if filter_frontend is not None:
            frontend_stats = (data_frame[is_frontend & filter_frontend]
                              .loc[:, ['pxname'] + metrics])
        else:
            frontend_stats = (data_frame[is_frontend]
                              .loc[:, ['pxname'] + metrics])

        # Group by frontend name and sum values for each column
        frontend_aggr_stats = frontend_stats.groupby(['pxname']).sum()
        cnt_metrics += frontend_aggr_stats.size
        for index, row in frontend_aggr_stats.iterrows():
            name = index.replace('.', '_')
            for i in row.iteritems():
                data = ("{p}.frontend.{f}.{m} {v} {t}\n"
                        .format(p=self.graphite_path,
                                f=name,
                                m=i[0],
                                v=i[1],
                                t=self.timestamp))
                dispatcher.signal('send', data=data)

        data = ("{p}.haproxystats.MetricsFrontend {v} {t}\n"
                .format(p=self.graphite_path,
                        v=cnt_metrics,
                        t=self.timestamp))
        dispatcher.signal('send', data=data)
        log.info('number of frontend metrics %s', cnt_metrics)

        log.debug('finished processing statistics for frontends')
Ejemplo n.º 3
0
    def process_backends(self, data_frame, *, filter_backend=None):
        """
        Process statistics for backends.

        Arguments:
            data_frame (obj): A pandas data_frame ready for processing.
            filter_backend: A filter to apply on data_frame.
        """
        cnt_metrics = 1
        log.debug('processing statistics for backends')
        # Filtering for Pandas
        is_backend = data_frame['svname'] == 'BACKEND'

        metrics = self.config.get('process', 'backend-metrics', fallback=None)
        if metrics is not None:
            metrics = metrics.split(' ')
        else:
            metrics = BACKEND_METRICS
        log.debug('metric names for backends %s', metrics)
        # Get rows only for backends. For some metrics we need the sum and
        # for others the average, thus we split them.
        if filter_backend is not None:
            stats_sum = (data_frame[is_backend & filter_backend]
                         .loc[:, ['pxname'] + metrics])
            stats_avg = (data_frame[is_backend & filter_backend]
                         .loc[:, ['pxname'] + BACKEND_AVG_METRICS])
        else:
            stats_sum = data_frame[is_backend].loc[:, ['pxname'] + metrics]
            stats_avg = (data_frame[is_backend]
                         .loc[:, ['pxname'] + BACKEND_AVG_METRICS])

        aggr_sum = stats_sum.groupby(['pxname'], as_index=False).sum()
        aggr_avg = stats_avg.groupby(['pxname'], as_index=False).mean()
        merged_stats = pandas.merge(aggr_sum, aggr_avg, on='pxname')

        rows, columns = merged_stats.shape
        cnt_metrics += rows * (columns - 1)  # minus the index

        for _, row in merged_stats.iterrows():
            name = row[0].replace('.', '_')
            for i in row[1:].iteritems():
                data = ("{p}.backend.{b}.{m} {v} {t}\n"
                        .format(p=self.graphite_path,
                                b=name,
                                m=i[0],
                                v=i[1],
                                t=self.timestamp))
                dispatcher.signal('send', data=data)

        data = ("{p}.haproxystats.MetricsBackend {v} {t}\n"
                .format(p=self.graphite_path,
                        v=cnt_metrics,
                        t=self.timestamp))
        dispatcher.signal('send', data=data)

        log.info('number of backend metrics %s', cnt_metrics)
        log.debug('finished processing statistics for backends')
Ejemplo n.º 4
0
    def process_backends(self, data_frame, filter_backend):
        """Process statistics for backends.

        Arguments:
            data_frame (obj): A pandas data_frame ready for processing.
            filter_backend: A filter to apply on data_frame.
        """
        cnt_metrics = 1
        log.debug('processing statistics for backends')
        # Filtering for Pandas
        is_backend = data_frame['svname_'] == 'BACKEND'
        # For averages only consider entries with actual connections made
        got_traffic = data_frame['lbtot'] > 0

        metrics = self.config.get('process', 'backend-metrics', fallback=None)
        if metrics is not None:
            metrics = metrics.split(' ')
        else:
            metrics = BACKEND_METRICS
        log.debug('metric names for backends %s', metrics)
        # Get rows only for backends. For some metrics we need the sum and
        # for others the average, thus we split them.
        stats_sum = (data_frame[is_backend
                                & filter_backend].loc[:,
                                                      ['pxname_'] + metrics])
        stats_avg = (data_frame[is_backend & filter_backend
                                & got_traffic].loc[:, ['pxname_'] +
                                                   BACKEND_AVG_METRICS])

        aggr_sum = stats_sum.groupby(['pxname_'], as_index=False).sum()
        aggr_avg = stats_avg.groupby(['pxname_'], as_index=False).mean()
        merged_stats = pandas.merge(aggr_sum, aggr_avg, on='pxname_')

        rows, columns = merged_stats.shape
        cnt_metrics += rows * (columns - 1)  # minus the index

        for _, row in merged_stats.iterrows():
            backend = row[0]
            paths = self.get_metric_paths('backend', backend)
            for i in row[1:].iteritems():
                datapoints = [
                    "{p}.backend.{b}.{m} {v} {t}\n".format(p=path,
                                                           b=backend,
                                                           m=i[0],
                                                           v=i[1],
                                                           t=self.timestamp)
                    for path in paths
                ]
                for datapoint in datapoints:
                    dispatcher.signal('send', data=datapoint)

        data = ("{p}.haproxystats.MetricsBackend {v} {t}\n".format(
            p=self.graphite_path, v=cnt_metrics, t=self.timestamp))
        dispatcher.signal('send', data=data)

        log.info('number of backend metrics %s', cnt_metrics)
        log.debug('finished processing statistics for backends')
Ejemplo n.º 5
0
    def run(self):
        """Consume item from queue and process it.

        It is the target function of Process class. Consumes items from
        the queue, processes data which are pulled down by haproxystats-pull
        program and uses Pandas to perform all computations of statistics.

        It exits when it receives STOP_SIGNAL as item.

        To avoid orphan processes on the system, it must be robust against
        failures and try very hard recover from failures.
        """
        if self.config.has_section('local-store'):
            self.local_store = self.config.get('local-store', 'dir')
            self.file_handler = FileHandler()
            dispatcher.register('open', self.file_handler.open)
            dispatcher.register('send', self.file_handler.send)
            dispatcher.register('flush', self.file_handler.flush)
            dispatcher.register('loop', self.file_handler.loop)

        timeout = self.config.getfloat('graphite', 'timeout')
        connect_timeout = self.config.getfloat('graphite',
                                               'connect-timeout',
                                               fallback=timeout)
        write_timeout = self.config.getfloat('graphite',
                                             'write-timeout',
                                             fallback=timeout)
        graphite = GraphiteHandler(
            server=self.config.get('graphite', 'server'),
            port=self.config.getint('graphite', 'port'),
            connect_timeout=connect_timeout,
            write_timeout=write_timeout,
            retries=self.config.getint('graphite', 'retries'),
            interval=self.config.getfloat('graphite', 'interval'),
            delay=self.config.getfloat('graphite', 'delay'),
            backoff=self.config.getfloat('graphite', 'backoff'),
            queue_size=self.config.getint('graphite', 'queue-size'))
        dispatcher.register('open', graphite.open)
        dispatcher.register('send', graphite.send)

        dispatcher.signal('open')

        try:
            while True:
                log.info('waiting for item from the queue')
                incoming_dir = self.tasks.get()
                log.info('received item %s', incoming_dir)
                if incoming_dir == STOP_SIGNAL:
                    break
                start_time = time.time()

                # incoming_dir => /var/lib/haproxystats/incoming/1454016646
                # timestamp => 1454016646
                self.timestamp = os.path.basename(incoming_dir)

                # update filename for file handler.
                # This *does not* error if a file handler is not registered.
                dispatcher.signal('loop',
                                  local_store=self.local_store,
                                  timestamp=self.timestamp)

                self.process_stats(incoming_dir)

                # This flushes data to file
                dispatcher.signal('flush')

                # Remove directory as data have been successfully processed.
                log.debug('removing %s', incoming_dir)
                try:
                    shutil.rmtree(incoming_dir)
                except (FileNotFoundError, PermissionError, OSError) as exc:
                    log.critical(
                        'failed to remove directory %s with:%s. '
                        'This should not have happened as it means '
                        'another worker processed data from this '
                        'directory or something/someone removed the '
                        'directory!', incoming_dir, exc)
                elapsed_time = time.time() - start_time
                log.info('total wall clock time in seconds %.3f', elapsed_time)
                data = ("{p}.haproxystats.{m} {v} {t}\n".format(
                    p=self.graphite_path,
                    m='TotalWallClockTime',
                    v="{t:.3f}".format(t=elapsed_time),
                    t=self.timestamp))
                dispatcher.signal('send', data=data)
                log.info('finished with %s', incoming_dir)
        except KeyboardInterrupt:
            log.critical('Ctrl-C received')

        return
Ejemplo n.º 6
0
    def process_servers(self, data_frame, filter_backend):
        """Process statistics for servers.

        Arguments:
            data_frame (obj): A pandas data_frame ready for processing.
            filter_backend: A filter to apply on data_frame.
        """
        cnt_metrics = 1
        # A filter for rows with stats for servers
        is_server = data_frame['type'] == 2

        log.debug('processing statistics for servers')

        server_metrics = self.config.get('process',
                                         'server-metrics',
                                         fallback=None)
        if server_metrics is not None:
            server_metrics = server_metrics.split(' ')
        else:
            server_metrics = SERVER_METRICS
        log.debug('metric names for servers %s', server_metrics)
        # Get rows only for servers. For some metrics we need the sum and
        # for others the average, thus we split them.
        stats_sum = (data_frame[is_server
                                & filter_backend].loc[:,
                                                      ['pxname_', 'svname_'] +
                                                      server_metrics])
        stats_avg = (data_frame[is_server
                                & filter_backend].loc[:,
                                                      ['pxname_', 'svname_'] +
                                                      SERVER_AVG_METRICS])
        servers = (data_frame[is_server
                              & filter_backend].loc[:, ['pxname_', 'svname_']])

        # Calculate the number of configured servers in a backend
        tot_servers = (servers.groupby(['pxname_']).agg(
            {'svname_': pandas.Series.nunique}))
        aggr_sum = (stats_sum.groupby(['pxname_', 'svname_'],
                                      as_index=False).sum())
        aggr_avg = (stats_avg.groupby(['pxname_', 'svname_'],
                                      as_index=False).mean())
        merged_stats = pandas.merge(aggr_sum,
                                    aggr_avg,
                                    on=['svname_', 'pxname_'])
        rows, columns = merged_stats.shape
        cnt_metrics += rows * (columns - 2)
        for backend, row in tot_servers.iterrows():
            cnt_metrics += 1
            data = ("{p}.backend.{b}.{m} {v} {t}\n".format(
                p=self.graphite_path,
                b=backend,
                m='TotalServers',
                v=row[0],
                t=self.timestamp))
            dispatcher.signal('send', data=data)

        for _, row in merged_stats.iterrows():
            backend = row[0]
            server = row[1]
            for i in row[2:].iteritems():
                data = ("{p}.backend.{b}.server.{s}.{m} {v} {t}\n".format(
                    p=self.graphite_path,
                    b=backend,
                    s=server,
                    m=i[0],
                    v=i[1],
                    t=self.timestamp))
                dispatcher.signal('send', data=data)

        if self.config.getboolean('process', 'aggr-server-metrics'):
            log.info('aggregate stats for servers across all backends')
            # Produce statistics for servers across all backends
            stats_sum = (data_frame[is_server].loc[:, ['svname_'] +
                                                   SERVER_METRICS])
            stats_avg = (data_frame[is_server].loc[:, ['svname_'] +
                                                   SERVER_AVG_METRICS])
            aggr_sum = (stats_sum.groupby(['svname_'], as_index=False).sum())
            aggr_avg = (stats_avg.groupby(['svname_'], as_index=False).mean())
            merged_stats = pandas.merge(aggr_sum, aggr_avg, on=['svname_'])
            rows, columns = merged_stats.shape
            cnt_metrics += rows * (columns - 1)  # minus the index

            for _, row in merged_stats.iterrows():
                server = row[0]
                for i in row[1:].iteritems():
                    data = ("{p}.server.{s}.{m} {v} {t}\n".format(
                        p=self.graphite_path,
                        s=server,
                        m=i[0],
                        v=i[1],
                        t=self.timestamp))
                    dispatcher.signal('send', data=data)

        data = ("{p}.haproxystats.MetricsServer {v} {t}\n".format(
            p=self.graphite_path, v=cnt_metrics, t=self.timestamp))
        dispatcher.signal('send', data=data)

        log.info('number of server metrics %s', cnt_metrics)
        log.debug('finished processing statistics for servers')
Ejemplo n.º 7
0
    def haproxy_stats(self, files):
        """Process statistics for HAProxy daemon.

        Arguments:
            files (list): A list of files which contain the output of 'show
            info' command on the stats socket.
        """
        cnt_metrics = 1  # a metric counter
        log.info('processing statistics for HAProxy daemon')
        log.debug('processing files %s', ' '.join(files))
        raw_info_stats = defaultdict(list)
        # Parse raw data and build a data structure, input looks like:
        #     Name: HAProxy
        #     Version: 1.6.3-4d747c-52
        #     Release_date: 2016/02/25
        #     Nbproc: 4
        #     Uptime_sec: 59277
        #     SslFrontendSessionReuse_pct: 0
        #     ....
        with fileinput.input(files=files) as file_input:
            for line in file_input:
                if ': ' in line:
                    key, value = line.split(': ', 1)
                    try:
                        numeric_value = int(value)
                    except ValueError:
                        pass
                    else:
                        raw_info_stats[key].append(numeric_value)

        if not raw_info_stats:
            log.error('failed to parse daemon statistics')
            return
        else:
            # Here is where Pandas enters and starts its magic.
            try:
                dataframe = pandas.DataFrame(raw_info_stats)
            except ValueError as exc:
                log.error(
                    'failed to create Pandas object for daemon '
                    'statistics %s', exc)
                return

            sums = dataframe.loc[:, DAEMON_METRICS].sum()
            avgs = dataframe.loc[:, DAEMON_AVG_METRICS].mean()
            cnt_metrics += sums.size + avgs.size

            # Pandas did all the hard work, let's join above tables and extract
            # statistics
            for values in pandas.concat([sums, avgs], axis=0).items():
                data = ("{p}.daemon.{m} {v} {t}\n".format(p=self.graphite_path,
                                                          m=values[0].replace(
                                                              '.', '_'),
                                                          v=values[1],
                                                          t=self.timestamp))
                dispatcher.signal('send', data=data)

            dataframe['CpuUsagePct'] = (
                dataframe.loc[:, 'Idle_pct'].map(lambda x: (x * -1) + 100))
            if dataframe.loc[:, 'Idle_pct'].size > 1:
                log.info('calculating percentiles for CpuUsagePct')
                percentiles = (dataframe.loc[:, 'CpuUsagePct'].quantile(
                    q=[0.25, 0.50, 0.75, 0.95, 0.99], interpolation='nearest'))
                for per in percentiles.items():
                    # per[0] = index => [0.25, 0.50, 0.75, 0.95, 0.99]
                    # per[1] = percentile value
                    cnt_metrics += 1
                    data = ("{p}.daemon.{m} {v} {t}\n".format(
                        p=self.graphite_path,
                        m=("{:.2f}PercentileCpuUsagePct".format(
                            per[0]).split('.')[1]),
                        v=per[1],
                        t=self.timestamp))
                    dispatcher.signal('send', data=data)

                cnt_metrics += 1
                data = ("{p}.daemon.{m} {v} {t}\n".format(
                    p=self.graphite_path,
                    m="StdCpuUsagePct",
                    v=dataframe.loc[:, 'CpuUsagePct'].std(),
                    t=self.timestamp))
                dispatcher.signal('send', data=data)

            if self.config.getboolean('process', 'calculate-percentages'):
                for metric in daemon_percentage_metrics():
                    cnt_metrics += 1
                    log.info('calculating percentage for %s', metric.name)
                    value = calculate_percentage_per_column(dataframe, metric)
                    data = ("{p}.daemon.{m} {v} {t}\n".format(
                        p=self.graphite_path,
                        m=metric.title,
                        v=value,
                        t=self.timestamp))
                    dispatcher.signal('send', data=data)

            if self.config.getboolean('process', 'per-process-metrics'):
                log.info("processing statistics per daemon")
                indexed_by_worker = dataframe.set_index('Process_num')
                metrics_per_worker = (
                    indexed_by_worker.loc[:, DAEMON_METRICS + ['CpuUsagePct'] +
                                          DAEMON_AVG_METRICS])
                cnt_metrics += metrics_per_worker.size

                for worker, row in metrics_per_worker.iterrows():
                    for values in row.iteritems():
                        data = ("{p}.daemon.process.{w}.{m} {v} {t}\n".format(
                            p=self.graphite_path,
                            w=worker,
                            m=values[0].replace('.', '_'),
                            v=values[1],
                            t=self.timestamp))
                        dispatcher.signal('send', data=data)

                if self.config.getboolean('process', 'calculate-percentages'):
                    for metric in daemon_percentage_metrics():
                        log.info('calculating percentage for %s per daemon',
                                 metric.name)
                        _percentages = (metrics_per_worker.
                                        loc[:,
                                            [metric.limit, metric.name]].apply(
                                                calculate_percentage_per_row,
                                                axis=1,
                                                args=(metric, )))

                        cnt_metrics += _percentages.size
                        for worker, row in _percentages.iterrows():
                            for values in row.iteritems():
                                data = ("{p}.daemon.process.{w}.{m} {v} {t}\n".
                                        format(p=self.graphite_path,
                                               w=worker,
                                               m=values[0].replace('.', '_'),
                                               v=values[1],
                                               t=self.timestamp))
                                dispatcher.signal('send', data=data)

            data = ("{p}.haproxystats.MetricsHAProxy {v} {t}\n".format(
                p=self.graphite_path, v=cnt_metrics, t=self.timestamp))
            dispatcher.signal('send', data=data)

            log.info('number of HAProxy metrics %s', cnt_metrics)
            log.info('finished processing statistics for HAProxy daemon')
Ejemplo n.º 8
0
    def run(self):
        """Consume item from queue and process it.

        It is the target function of Process class. Consumes items from
        the queue, processes data which are pulled down by haproxystats-pull
        program and uses Pandas to perform all computations of statistics.

        It exits when it receives STOP_SIGNAL as item.

        To avoid orphan processes on the system, it must be robust against
        failures and try very hard recover from failures.
        """
        if self.config.has_section('local-store'):
            self.local_store = self.config.get('local-store', 'dir')
            self.file_handler = FileHandler()
            dispatcher.register('open', self.file_handler.open)
            dispatcher.register('send', self.file_handler.send)
            dispatcher.register('flush', self.file_handler.flush)
            dispatcher.register('loop', self.file_handler.loop)

        timeout = self.config.getfloat('graphite', 'timeout')
        connect_timeout = self.config.getfloat('graphite',
                                               'connect-timeout',
                                               fallback=timeout)
        write_timeout = self.config.getfloat('graphite',
                                             'write-timeout',
                                             fallback=timeout)
        graphite = GraphiteHandler(
            server=self.config.get('graphite', 'server'),
            port=self.config.getint('graphite', 'port'),
            connect_timeout=connect_timeout,
            write_timeout=write_timeout,
            retries=self.config.getint('graphite', 'retries'),
            interval=self.config.getfloat('graphite', 'interval'),
            delay=self.config.getfloat('graphite', 'delay'),
            backoff=self.config.getfloat('graphite', 'backoff'),
            queue_size=self.config.getint('graphite', 'queue-size')
        )
        dispatcher.register('open', graphite.open)
        dispatcher.register('send', graphite.send)

        dispatcher.signal('open')

        try:
            while True:
                log.info('waiting for item from the queue')
                incoming_dir = self.tasks.get()
                log.info('received item %s', incoming_dir)
                if incoming_dir == STOP_SIGNAL:
                    break
                start_time = time.time()

                # incoming_dir => /var/lib/haproxystats/incoming/1454016646
                # timestamp => 1454016646
                self.timestamp = os.path.basename(incoming_dir)

                # update filename for file handler.
                # This *does not* error if a file handler is not registered.
                dispatcher.signal('loop',
                                  local_store=self.local_store,
                                  timestamp=self.timestamp)

                self.process_stats(incoming_dir)

                # This flushes data to file
                dispatcher.signal('flush')

                # Remove directory as data have been successfully processed.
                log.debug('removing %s', incoming_dir)
                try:
                    shutil.rmtree(incoming_dir)
                except (FileNotFoundError, PermissionError, OSError) as exc:
                    log.critical('failed to remove directory %s with:%s. '
                                 'This should not have happened as it means '
                                 'another worker processed data from this '
                                 'directory or something/someone removed the '
                                 'directory!', incoming_dir, exc)
                elapsed_time = time.time() - start_time
                log.info('total wall clock time in seconds %.3f', elapsed_time)
                data = ("{p}.haproxystats.{m} {v} {t}\n"
                        .format(p=self.graphite_path,
                                m='TotalWallClockTime',
                                v="{t:.3f}".format(t=elapsed_time),
                                t=self.timestamp))
                dispatcher.signal('send', data=data)
                log.info('finished with %s', incoming_dir)
        except KeyboardInterrupt:
            log.critical('Ctrl-C received')

        return
Ejemplo n.º 9
0
    def process_servers(self, data_frame, filter_backend):
        """Process statistics for servers.

        Arguments:
            data_frame (obj): A pandas data_frame ready for processing.
            filter_backend: A filter to apply on data_frame.
        """
        cnt_metrics = 1
        # A filter for rows with stats for servers
        is_server = data_frame['type'] == 2

        log.debug('processing statistics for servers')

        server_metrics = self.config.get('process',
                                         'server-metrics',
                                         fallback=None)
        if server_metrics is not None:
            server_metrics = server_metrics.split(' ')
        else:
            server_metrics = SERVER_METRICS
        log.debug('metric names for servers %s', server_metrics)
        # Get rows only for servers. For some metrics we need the sum and
        # for others the average, thus we split them.
        stats_sum = (data_frame[is_server & filter_backend]
                     .loc[:, ['pxname_', 'svname_'] + server_metrics])
        stats_avg = (data_frame[is_server & filter_backend]
                     .loc[:, ['pxname_', 'svname_'] + SERVER_AVG_METRICS])
        servers = (data_frame[is_server & filter_backend]
                   .loc[:, ['pxname_', 'svname_']])

        # Calculate the number of configured servers in a backend
        tot_servers = (servers
                       .groupby(['pxname_'])
                       .agg({'svname_': pandas.Series.nunique}))
        aggr_sum = (stats_sum
                    .groupby(['pxname_', 'svname_'], as_index=False)
                    .sum())
        aggr_avg = (stats_avg
                    .groupby(['pxname_', 'svname_'], as_index=False)
                    .mean())
        merged_stats = pandas.merge(aggr_sum,
                                    aggr_avg,
                                    on=['svname_', 'pxname_'])
        rows, columns = merged_stats.shape
        cnt_metrics += rows * (columns - 2)
        for backend, row in tot_servers.iterrows():
            cnt_metrics += 1
            data = ("{p}.backend.{b}.{m} {v} {t}\n"
                    .format(p=self.graphite_path,
                            b=backend,
                            m='TotalServers',
                            v=row[0],
                            t=self.timestamp))
            dispatcher.signal('send', data=data)

        for _, row in merged_stats.iterrows():
            backend = row[0]
            server = row[1]
            for i in row[2:].iteritems():
                data = ("{p}.backend.{b}.server.{s}.{m} {v} {t}\n"
                        .format(p=self.graphite_path,
                                b=backend,
                                s=server,
                                m=i[0],
                                v=i[1],
                                t=self.timestamp))
                dispatcher.signal('send', data=data)

        if self.config.getboolean('process', 'aggr-server-metrics'):
            log.info('aggregate stats for servers across all backends')
            # Produce statistics for servers across all backends
            stats_sum = (data_frame[is_server]
                         .loc[:, ['svname_'] + SERVER_METRICS])
            stats_avg = (data_frame[is_server]
                         .loc[:, ['svname_'] + SERVER_AVG_METRICS])
            aggr_sum = (stats_sum
                        .groupby(['svname_'], as_index=False)
                        .sum())
            aggr_avg = (stats_avg
                        .groupby(['svname_'], as_index=False)
                        .mean())
            merged_stats = pandas.merge(aggr_sum, aggr_avg, on=['svname_'])
            rows, columns = merged_stats.shape
            cnt_metrics += rows * (columns - 1)  # minus the index

            for _, row in merged_stats.iterrows():
                server = row[0]
                for i in row[1:].iteritems():
                    data = ("{p}.server.{s}.{m} {v} {t}\n"
                            .format(p=self.graphite_path,
                                    s=server,
                                    m=i[0],
                                    v=i[1],
                                    t=self.timestamp))
                    dispatcher.signal('send', data=data)

        data = ("{p}.haproxystats.MetricsServer {v} {t}\n"
                .format(p=self.graphite_path,
                        v=cnt_metrics,
                        t=self.timestamp))
        dispatcher.signal('send', data=data)

        log.info('number of server metrics %s', cnt_metrics)
        log.debug('finished processing statistics for servers')
Ejemplo n.º 10
0
    def haproxy_stats(self, files):
        """Process statistics for HAProxy daemon.

        Arguments:
            files (list): A list of files which contain the output of 'show
            info' command on the stats socket.
        """
        cnt_metrics = 1  # a metric counter
        log.info('processing statistics for HAProxy daemon')
        log.debug('processing files %s', ' '.join(files))
        raw_info_stats = defaultdict(list)
        # Parse raw data and build a data structure, input looks like:
        #     Name: HAProxy
        #     Version: 1.6.3-4d747c-52
        #     Release_date: 2016/02/25
        #     Nbproc: 4
        #     Uptime_sec: 59277
        #     SslFrontendSessionReuse_pct: 0
        #     ....
        with fileinput.input(files=files) as file_input:
            for line in file_input:
                if ': ' in line:
                    key, value = line.split(': ', 1)
                    try:
                        numeric_value = int(value)
                    except ValueError:
                        pass
                    else:
                        raw_info_stats[key].append(numeric_value)

        if not raw_info_stats:
            log.error('failed to parse daemon statistics')
            return
        else:
            # Here is where Pandas enters and starts its magic.
            try:
                dataframe = pandas.DataFrame(raw_info_stats)
            except ValueError as exc:
                log.error('failed to create Pandas object for daemon '
                          'statistics %s', exc)
                return

            sums = dataframe.loc[:, DAEMON_METRICS].sum()
            avgs = dataframe.loc[:, DAEMON_AVG_METRICS].mean()
            cnt_metrics += sums.size + avgs.size

            # Pandas did all the hard work, let's join above tables and extract
            # statistics
            for values in pandas.concat([sums, avgs], axis=0).items():
                data = ("{p}.daemon.{m} {v} {t}\n"
                        .format(p=self.graphite_path,
                                m=values[0].replace('.', '_'),
                                v=values[1],
                                t=self.timestamp))
                dispatcher.signal('send', data=data)

            dataframe['CpuUsagePct'] = (dataframe.loc[:, 'Idle_pct']
                                        .map(lambda x: (x * -1) + 100))
            if dataframe.loc[:, 'Idle_pct'].size > 1:
                log.info('calculating percentiles for CpuUsagePct')
                percentiles = (dataframe.loc[:, 'CpuUsagePct']
                               .quantile(q=[0.25, 0.50, 0.75, 0.95, 0.99],
                                         interpolation='nearest'))
                for per in percentiles.items():
                    # per[0] = index => [0.25, 0.50, 0.75, 0.95, 0.99]
                    # per[1] = percentile value
                    cnt_metrics += 1
                    data = ("{p}.daemon.{m} {v} {t}\n"
                            .format(p=self.graphite_path,
                                    m=("{:.2f}PercentileCpuUsagePct"
                                       .format(per[0]).split('.')[1]),
                                    v=per[1],
                                    t=self.timestamp))
                    dispatcher.signal('send', data=data)

                cnt_metrics += 1
                data = ("{p}.daemon.{m} {v} {t}\n"
                        .format(p=self.graphite_path,
                                m="StdCpuUsagePct",
                                v=dataframe.loc[:, 'CpuUsagePct'].std(),
                                t=self.timestamp))
                dispatcher.signal('send', data=data)

            if self.config.getboolean('process', 'calculate-percentages'):
                for metric in daemon_percentage_metrics():
                    cnt_metrics += 1
                    log.info('calculating percentage for %s', metric.name)
                    try:
                        value = calculate_percentage_per_column(dataframe,
                                                                metric)
                    except KeyError:
                        log.warning("metric %s doesn't exist", metric.name)
                    else:
                        data = ("{p}.daemon.{m} {v} {t}\n"
                                .format(p=self.graphite_path,
                                        m=metric.title,
                                        v=value,
                                        t=self.timestamp))
                        dispatcher.signal('send', data=data)

            if self.config.getboolean('process', 'per-process-metrics'):
                log.info("processing statistics per daemon")
                indexed_by_worker = dataframe.set_index('Process_num')
                metrics_per_worker = (indexed_by_worker
                                      .loc[:, DAEMON_METRICS
                                           + ['CpuUsagePct']
                                           + DAEMON_AVG_METRICS])
                cnt_metrics += metrics_per_worker.size

                for worker, row in metrics_per_worker.iterrows():
                    for values in row.iteritems():
                        data = ("{p}.daemon.process.{w}.{m} {v} {t}\n"
                                .format(p=self.graphite_path,
                                        w=worker,
                                        m=values[0].replace('.', '_'),
                                        v=values[1],
                                        t=self.timestamp))
                        dispatcher.signal('send', data=data)

                if self.config.getboolean('process', 'calculate-percentages'):
                    for metric in daemon_percentage_metrics():
                        log.info('calculating percentage for %s per daemon',
                                 metric.name)
                        _percentages = (metrics_per_worker
                                        .loc[:, [metric.limit, metric.name]]
                                        .apply(calculate_percentage_per_row,
                                               axis=1,
                                               args=(metric,)))

                        cnt_metrics += _percentages.size
                        for worker, row in _percentages.iterrows():
                            for values in row.iteritems():
                                data = ("{p}.daemon.process.{w}.{m} {v} {t}\n"
                                        .format(p=self.graphite_path,
                                                w=worker,
                                                m=values[0].replace('.', '_'),
                                                v=values[1],
                                                t=self.timestamp))
                                dispatcher.signal('send', data=data)

            data = ("{p}.haproxystats.MetricsHAProxy {v} {t}\n"
                    .format(p=self.graphite_path,
                            v=cnt_metrics,
                            t=self.timestamp))
            dispatcher.signal('send', data=data)

            log.info('number of HAProxy metrics %s', cnt_metrics)
            log.info('finished processing statistics for HAProxy daemon')