Exemple #1
0
class KPISet(BetterDict):
    """
    Main entity in results, contains all KPIs for single label,
    capable of merging other KPISet's into it to compose cumulative results
    """
    ERRORS = "errors"
    SAMPLE_COUNT = "throughput"
    CONCURRENCY = "concurrency"
    SUCCESSES = "succ"
    FAILURES = "fail"
    RESP_TIMES = "rt"
    AVG_RESP_TIME = "avg_rt"
    STDEV_RESP_TIME = "stdev_rt"
    AVG_LATENCY = "avg_lt"
    AVG_CONN_TIME = "avg_ct"
    PERCENTILES = "perc"
    RESP_CODES = "rc"
    ERRTYPE_ERROR = 0
    ERRTYPE_ASSERT = 1

    def __init__(self, perc_levels=()):
        super(KPISet, self).__init__()
        self.sum_rt = 0
        self.sum_lt = 0
        self.sum_cn = 0
        self.perc_levels = perc_levels
        # scalars
        self.get(self.SAMPLE_COUNT, 0)
        self.get(self.CONCURRENCY, 0)
        self.get(self.SUCCESSES, 0)
        self.get(self.FAILURES, 0)
        self.get(self.AVG_RESP_TIME, 0)
        self.get(self.STDEV_RESP_TIME, 0)
        self.get(self.AVG_LATENCY, 0)
        self.get(self.AVG_CONN_TIME, 0)
        # vectors
        self.get(self.ERRORS, [])
        self.get(self.RESP_TIMES, Counter())
        self.get(self.RESP_CODES, Counter())
        self.get(self.PERCENTILES)
        self._concurrencies = BetterDict()

    def __deepcopy__(self, memo):
        mycopy = KPISet(self.perc_levels)
        mycopy.sum_rt = self.sum_rt
        mycopy.sum_lt = self.sum_lt
        mycopy.sum_cn = self.sum_cn
        for key, val in iteritems(self):
            mycopy[key] = copy.deepcopy(val, memo)
        return mycopy

    @staticmethod
    def error_item_skel(error, ret_c, cnt, errtype, urls):
        """

        :type error: str
        :type ret_c: str
        :type cnt: int
        :type errtype: int
        :type urls: Counter
        :rtype: dict
        """
        return {
            "cnt": cnt,
            "msg": error,
            "rc": ret_c,
            "type": errtype,
            "urls": urls
        }

    def add_sample(self, sample):
        """
        Add sample, consisting of: cnc, rt, cn, lt, rc, error, trname

        :type sample: tuple
        """
        # TODO: introduce a flag to not count failed in resp times? or offer it always?
        cnc, r_time, con_time, latency, r_code, error, trname = sample
        self[self.SAMPLE_COUNT] = self.get(self.SAMPLE_COUNT, 0) + 1
        if cnc:
            self._concurrencies[trname] = cnc

        if r_code is not None:
            resp_codes = self.get(self.RESP_CODES)
            resp_codes[r_code] = resp_codes.get(r_code, 0) + 1

            # count times only if we have RCs
            if con_time:
                self.sum_cn += con_time
            self.sum_lt += latency
            self.sum_rt += r_time

        if error is not None:
            self[self.FAILURES] = self.get(self.FAILURES, 0) + 1

            item = self.error_item_skel(error, r_code, 1, KPISet.ERRTYPE_ERROR, Counter())
            self.inc_list(self.get(self.ERRORS), ("msg", error), item)
        else:
            self[self.SUCCESSES] = self.get(self.SUCCESSES, 0) + 1

        self.get(self.RESP_TIMES)[r_time] += 1
        # TODO: max/min rt? there is percentiles...
        # TODO: throughput if interval is not 1s

    @staticmethod
    def inc_list(values, selector, value):
        """
        Increment list item, based on selector criteria

        :param values: list to update
        :type values: list
        :param selector: tuple of 2 values, field name and value to match
        :type selector: tuple
        :param value: dict to put into list
        :type value: dict
        """
        found = False
        for item in values:
            if item[selector[0]] == selector[1]:
                item['cnt'] += value['cnt']
                item['urls'] += value['urls']
                found = True
                break

        if not found:
            values.append(value)

    def recalculate(self):
        """
        Recalculate averages, stdev and percentiles

        :return:
        """
        if self[self.SAMPLE_COUNT]:
            self[self.AVG_CONN_TIME] = self.sum_cn / self[self.SAMPLE_COUNT]
            self[self.AVG_LATENCY] = self.sum_lt / self[self.SAMPLE_COUNT]
            self[self.AVG_RESP_TIME] = self.sum_rt / self[self.SAMPLE_COUNT]

        if self._concurrencies:
            self[self.CONCURRENCY] = sum(self._concurrencies.values())

        perc, stdev = self.__perc_and_stdev(self[self.RESP_TIMES], self.perc_levels, self[self.AVG_RESP_TIME])
        for level, val in perc:
            self[self.PERCENTILES][str(float(level))] = val

        self[self.STDEV_RESP_TIME] = stdev

        return self

    def merge_kpis(self, src, sid=None):
        """
        Merge other instance into self

        :param sid: source ID to use when suming up concurrency
        :type src: KPISet
        :return:
        """
        src.recalculate()

        self.sum_cn += src.sum_cn
        self.sum_lt += src.sum_lt
        self.sum_rt += src.sum_rt

        self[self.SAMPLE_COUNT] += src[self.SAMPLE_COUNT]
        self[self.SUCCESSES] += src[self.SUCCESSES]
        self[self.FAILURES] += src[self.FAILURES]
        # NOTE: should it be average? mind the timestamp gaps
        self._concurrencies[sid] = src[self.CONCURRENCY]

        self[self.RESP_TIMES].update(src[self.RESP_TIMES])
        self[self.RESP_CODES].update(src[self.RESP_CODES])

        for src_item in src[self.ERRORS]:
            self.inc_list(self[self.ERRORS], ('msg', src_item['msg']), src_item)

    @staticmethod
    def from_dict(obj):
        """

        :type obj: dict
        :rtype: KPISet
        """
        inst = KPISet()
        for key, val in iteritems(obj):
            inst[key] = val
        inst.sum_cn = obj[inst.AVG_CONN_TIME] * obj[inst.SAMPLE_COUNT]
        inst.sum_lt = obj[inst.AVG_LATENCY] * obj[inst.SAMPLE_COUNT]
        inst.sum_rt = obj[inst.AVG_RESP_TIME] * obj[inst.SAMPLE_COUNT]
        inst.perc_levels = [float(x) for x in inst[inst.PERCENTILES].keys()]
        inst[inst.RESP_TIMES] = {float(level): inst[inst.RESP_TIMES][level] for level in inst[inst.RESP_TIMES].keys()}
        for error in inst[KPISet.ERRORS]:
            error['urls'] = Counter(error['urls'])
        return inst

    @staticmethod
    def __perc_and_stdev(cnts_dict, percentiles_to_calc=(), avg=0):
        """
        from http://stackoverflow.com/questions/25070086/percentiles-from-counts-of-values
        Returns [(percentile, value)] with nearest rank percentiles.
        Percentile 0: <min_value>, 100: <max_value>.
        cnts_dict: { <value>: <count> }
        percentiles_to_calc: iterable for percentiles to calculate; 0 <= ~ <= 100

        upd: added stdev calc to have it in single-pass for mans of efficiency

        :type percentiles_to_calc: list(float)
        :type cnts_dict: collections.Counter
        """
        assert all(0 <= percentile <= 100 for percentile in percentiles_to_calc)
        percentiles = []
        if not cnts_dict:
            return percentiles, 0

        num = sum(cnts_dict.values())
        cnts = sorted(cnts_dict.items())
        curr_cnts_pos = 0  # current position in cnts
        curr_pos = cnts[0][1]  # sum of freqs up to current_cnts_pos

        sqr_diffs = 0
        for percentile in sorted(percentiles_to_calc):
            if percentile < 100:
                percentile_pos = percentile / 100.0 * num
                while curr_pos <= percentile_pos and curr_cnts_pos < len(cnts):
                    sqr_diffs += cnts[curr_cnts_pos][1] * math.pow(cnts[curr_cnts_pos][0] - avg, 2)

                    curr_cnts_pos += 1
                    curr_pos += cnts[curr_cnts_pos][1]

                percentiles.append((percentile, cnts[curr_cnts_pos][0]))
            else:
                percentiles.append((percentile, cnts[-1][0]))  # we could add a small value

        while curr_cnts_pos < len(cnts):
            sqr_diffs += cnts[curr_cnts_pos][1] * math.pow(cnts[curr_cnts_pos][0] - avg, 2)
            curr_cnts_pos += 1

        stdev = math.sqrt(sqr_diffs / len(cnts))
        return percentiles, stdev
Exemple #2
0
class KPISet(BetterDict):
    """
    Main entity in results, contains all KPIs for single label,
    capable of merging other KPISet's into it to compose cumulative results
    """
    ERRORS = "errors"
    SAMPLE_COUNT = "throughput"
    CONCURRENCY = "concurrency"
    SUCCESSES = "succ"
    FAILURES = "fail"
    BYTE_COUNT = "bytes"
    RESP_TIMES = "rt"
    AVG_RESP_TIME = "avg_rt"
    STDEV_RESP_TIME = "stdev_rt"
    AVG_LATENCY = "avg_lt"
    AVG_CONN_TIME = "avg_ct"
    PERCENTILES = "perc"
    RESP_CODES = "rc"
    ERRTYPE_ERROR = 0
    ERRTYPE_ASSERT = 1
    ERRTYPE_SUBSAMPLE = 2

    def __init__(self, perc_levels=(), rt_dist_maxlen=None):
        super(KPISet, self).__init__()
        self.sum_rt = 0
        self.sum_lt = 0
        self.sum_cn = 0
        self.perc_levels = perc_levels
        self.rtimes_len = rt_dist_maxlen
        self._concurrencies = BetterDict()  # NOTE: shouldn't it be Counter?
        # scalars
        self[KPISet.SAMPLE_COUNT] = 0
        self[KPISet.CONCURRENCY] = 0
        self[KPISet.SUCCESSES] = 0
        self[KPISet.FAILURES] = 0
        self[KPISet.AVG_RESP_TIME] = 0
        self[KPISet.STDEV_RESP_TIME] = 0
        self[KPISet.AVG_LATENCY] = 0
        self[KPISet.AVG_CONN_TIME] = 0
        self[KPISet.BYTE_COUNT] = 0
        # vectors
        self[KPISet.ERRORS] = []
        self[KPISet.RESP_TIMES] = RespTimesCounter(
            1, 60 * 30 * 1000, 3)  # is maximum value of 30 minutes enough?
        self[KPISet.RESP_CODES] = Counter()
        self[KPISet.PERCENTILES] = BetterDict()

    def __deepcopy__(self, memo):
        mycopy = KPISet(self.perc_levels)
        mycopy.sum_rt = self.sum_rt
        mycopy.sum_lt = self.sum_lt
        mycopy.sum_cn = self.sum_cn
        mycopy.rtimes_len = self.rtimes_len
        for key, val in iteritems(self):
            mycopy[key] = copy.deepcopy(val, memo)
        return mycopy

    @staticmethod
    def error_item_skel(error, ret_c, cnt, errtype, urls, tag):
        """

        :type error: str
        :type ret_c: str
        :type tag: str
        :type cnt: int
        :type errtype: int
        :type urls: collections.Counter
        :rtype: dict
        """
        assert isinstance(urls, collections.Counter)
        return {
            "cnt": cnt,
            "msg": error,
            "tag": tag,  # just one more string qualifier
            "rc": ret_c,
            "type": errtype,
            "urls": urls,
        }

    def add_sample(self, sample):
        """
        Add sample, consisting of: cnc, rt, cn, lt, rc, error, trname, byte_count

        :type sample: tuple
        """
        # TODO: introduce a flag to not count failed in resp times? or offer it always?
        cnc, r_time, con_time, latency, r_code, error, trname, byte_count = sample
        self[self.SAMPLE_COUNT] += 1
        if cnc:
            self._concurrencies[trname] = cnc

        if r_code is not None:
            self[self.RESP_CODES][r_code] += 1

            # count times only if we have RCs
            if con_time:
                self.sum_cn += con_time
            self.sum_lt += latency
            self.sum_rt += r_time

        if error is not None:
            self[self.FAILURES] += 1

            item = self.error_item_skel(error, r_code, 1, KPISet.ERRTYPE_ERROR,
                                        Counter(), None)
            self.inc_list(self[self.ERRORS], ("msg", error), item)
        else:
            self[self.SUCCESSES] += 1

        rtime_s = round(r_time * 1000, 3)
        self[self.RESP_TIMES].add(rtime_s, 1)

        if byte_count is not None:
            self[self.BYTE_COUNT] += byte_count
            # TODO: max/min rt? there is percentiles...
            # TODO: throughput if interval is not 1s

    @staticmethod
    def inc_list(values, selector, value):
        """
        Increment list item, based on selector criteria

        :param values: list to update
        :param selector: tuple of 2 values, field name and value to match
        :param value: dict to put into list
        :type values: list[dict]
        :type selector: tuple
        :type value: dict
        """
        found = False
        for item in values:
            if item[selector[0]] == selector[1]:
                item['cnt'] += value['cnt']
                item['urls'] += value['urls']
                found = True
                break

        if not found:
            values.append(copy.deepcopy(value))

    def recalculate(self):
        """
        Recalculate averages, stdev and percentiles

        :return:
        """
        if self[self.SAMPLE_COUNT]:
            self[self.AVG_CONN_TIME] = self.sum_cn / self[self.SAMPLE_COUNT]
            self[self.AVG_LATENCY] = self.sum_lt / self[self.SAMPLE_COUNT]
            self[self.AVG_RESP_TIME] = self.sum_rt / self[self.SAMPLE_COUNT]

        if len(self._concurrencies):
            self[self.CONCURRENCY] = sum(self._concurrencies.values())

        resp_times = self[self.RESP_TIMES]
        if resp_times:
            self[self.PERCENTILES] = {
                str(float(perc)): value / 1000.0
                for perc, value in iteritems(
                    resp_times.get_percentiles_dict(self.perc_levels))
            }

        return self

    def merge_kpis(self, src, sid=None):
        """
        Merge other instance into self

        :param sid: source ID to use when suming up concurrency
        :type src: KPISet
        :return:
        """
        src.recalculate()  # TODO: could be not resource efficient strat

        self.sum_cn += src.sum_cn
        self.sum_lt += src.sum_lt
        self.sum_rt += src.sum_rt

        self[self.SAMPLE_COUNT] += src[self.SAMPLE_COUNT]
        self[self.SUCCESSES] += src[self.SUCCESSES]
        self[self.FAILURES] += src[self.FAILURES]
        self[self.BYTE_COUNT] += src[self.BYTE_COUNT]
        # NOTE: should it be average? mind the timestamp gaps
        if src[self.CONCURRENCY]:
            self._concurrencies[sid] = src[self.CONCURRENCY]

        if src[self.RESP_TIMES]:
            self[self.RESP_TIMES].merge(src[self.RESP_TIMES])
        elif not self[self.PERCENTILES]:
            # using existing percentiles
            # FIXME: it's not valid to overwrite, better take average
            self[self.PERCENTILES] = copy.deepcopy(src[self.PERCENTILES])

        self[self.RESP_CODES].update(src[self.RESP_CODES])

        for src_item in src[self.ERRORS]:
            self.inc_list(self[self.ERRORS], ('msg', src_item['msg']),
                          src_item)

    @staticmethod
    def from_dict(obj):
        """
        :type obj: dict
        :rtype: KPISet
        """
        inst = KPISet()
        for key, val in iteritems(obj):
            if key == inst.RESP_TIMES:
                if isinstance(val, dict):
                    for value, count in iteritems(val):
                        inst[inst.RESP_TIMES].add(value, count)
            else:
                inst[key] = val

        inst.sum_cn = obj[inst.AVG_CONN_TIME] * obj[inst.SAMPLE_COUNT]
        inst.sum_lt = obj[inst.AVG_LATENCY] * obj[inst.SAMPLE_COUNT]
        inst.sum_rt = obj[inst.AVG_RESP_TIME] * obj[inst.SAMPLE_COUNT]
        inst.perc_levels = [float(x) for x in inst[inst.PERCENTILES].keys()]
        for error in inst[KPISet.ERRORS]:
            error['urls'] = Counter(error['urls'])
        return inst
Exemple #3
0
class KPISet(BetterDict):
    """
    Main entity in results, contains all KPIs for single label,
    capable of merging other KPISet's into it to compose cumulative results
    """
    ERRORS = "errors"
    SAMPLE_COUNT = "throughput"
    CONCURRENCY = "concurrency"
    SUCCESSES = "succ"
    FAILURES = "fail"
    RESP_TIMES = "rt"
    AVG_RESP_TIME = "avg_rt"
    STDEV_RESP_TIME = "stdev_rt"
    AVG_LATENCY = "avg_lt"
    AVG_CONN_TIME = "avg_ct"
    PERCENTILES = "perc"
    RESP_CODES = "rc"
    ERRTYPE_ERROR = 0
    ERRTYPE_ASSERT = 1

    def __init__(self, perc_levels=()):
        super(KPISet, self).__init__()
        self.sum_rt = 0
        self.sum_lt = 0
        self.sum_cn = 0
        self.perc_levels = perc_levels
        # scalars
        self.get(self.SAMPLE_COUNT, 0)
        self.get(self.CONCURRENCY, 0)
        self.get(self.SUCCESSES, 0)
        self.get(self.FAILURES, 0)
        self.get(self.AVG_RESP_TIME, 0)
        self.get(self.STDEV_RESP_TIME, 0)
        self.get(self.AVG_LATENCY, 0)
        self.get(self.AVG_CONN_TIME, 0)
        # vectors
        self.get(self.ERRORS, [])
        self.get(self.RESP_TIMES, Counter())
        self.get(self.RESP_CODES, Counter())
        self.get(self.PERCENTILES)
        self._concurrencies = BetterDict()  # NOTE: shouldn't it be Counter?

    def __deepcopy__(self, memo):
        mycopy = KPISet(self.perc_levels)
        mycopy.sum_rt = self.sum_rt
        mycopy.sum_lt = self.sum_lt
        mycopy.sum_cn = self.sum_cn
        for key, val in iteritems(self):
            mycopy[key] = copy.deepcopy(val, memo)
        return mycopy

    @staticmethod
    def error_item_skel(error, ret_c, cnt, errtype, urls):
        """

        :type error: str
        :type ret_c: str
        :type cnt: int
        :type errtype: int
        :type urls: Counter
        :rtype: dict
        """
        return {
            "cnt": cnt,
            "msg": error,
            "rc": ret_c,
            "type": errtype,
            "urls": urls
        }

    def add_sample(self, sample):
        """
        Add sample, consisting of: cnc, rt, cn, lt, rc, error, trname

        :type sample: tuple
        """
        # TODO: introduce a flag to not count failed in resp times? or offer it always?
        cnc, r_time, con_time, latency, r_code, error, trname = sample
        self[self.SAMPLE_COUNT] = self.get(self.SAMPLE_COUNT, 0) + 1
        if cnc:
            self._concurrencies[trname] = cnc

        if r_code is not None:
            resp_codes = self.get(self.RESP_CODES)
            resp_codes[r_code] = resp_codes.get(r_code, 0) + 1

            # count times only if we have RCs
            if con_time:
                self.sum_cn += con_time
            self.sum_lt += latency
            self.sum_rt += r_time

        if error is not None:
            self[self.FAILURES] = self.get(self.FAILURES, 0) + 1

            item = self.error_item_skel(error, r_code, 1, KPISet.ERRTYPE_ERROR,
                                        Counter())
            self.inc_list(self.get(self.ERRORS), ("msg", error), item)
        else:
            self[self.SUCCESSES] = self.get(self.SUCCESSES, 0) + 1

        self.get(self.RESP_TIMES)[r_time] += 1
        # TODO: max/min rt? there is percentiles...
        # TODO: throughput if interval is not 1s

    @staticmethod
    def inc_list(values, selector, value):
        """
        Increment list item, based on selector criteria

        :param values: list to update
        :type values: list
        :param selector: tuple of 2 values, field name and value to match
        :type selector: tuple
        :param value: dict to put into list
        :type value: dict
        """
        found = False
        for item in values:
            if item[selector[0]] == selector[1]:
                item['cnt'] += value['cnt']
                item['urls'] += value['urls']
                found = True
                break

        if not found:
            values.append(value)

    def recalculate(self):
        """
        Recalculate averages, stdev and percentiles

        :return:
        """
        if self[self.SAMPLE_COUNT]:
            self[self.AVG_CONN_TIME] = self.sum_cn / self[self.SAMPLE_COUNT]
            self[self.AVG_LATENCY] = self.sum_lt / self[self.SAMPLE_COUNT]
            self[self.AVG_RESP_TIME] = self.sum_rt / self[self.SAMPLE_COUNT]

        if len(self._concurrencies):
            self[self.CONCURRENCY] = sum(self._concurrencies.values())

        perc, stdev = self.__perc_and_stdev(self[self.RESP_TIMES],
                                            self.perc_levels,
                                            self[self.AVG_RESP_TIME])
        for level, val in perc:
            self[self.PERCENTILES][str(float(level))] = val

        self[self.STDEV_RESP_TIME] = stdev

        return self

    def merge_kpis(self, src, sid=None):
        """
        Merge other instance into self

        :param sid: source ID to use when suming up concurrency
        :type src: KPISet
        :return:
        """
        src.recalculate()

        self.sum_cn += src.sum_cn
        self.sum_lt += src.sum_lt
        self.sum_rt += src.sum_rt

        self[self.SAMPLE_COUNT] += src[self.SAMPLE_COUNT]
        self[self.SUCCESSES] += src[self.SUCCESSES]
        self[self.FAILURES] += src[self.FAILURES]
        # NOTE: should it be average? mind the timestamp gaps
        if src[self.CONCURRENCY]:
            self._concurrencies[sid] = src[self.CONCURRENCY]

        self[self.RESP_TIMES].update(src[self.RESP_TIMES])
        self[self.RESP_CODES].update(src[self.RESP_CODES])

        for src_item in src[self.ERRORS]:
            self.inc_list(self[self.ERRORS], ('msg', src_item['msg']),
                          src_item)

    @staticmethod
    def from_dict(obj):
        """

        :type obj: dict
        :rtype: KPISet
        """
        inst = KPISet()
        for key, val in iteritems(obj):
            inst[key] = val
        inst.sum_cn = obj[inst.AVG_CONN_TIME] * obj[inst.SAMPLE_COUNT]
        inst.sum_lt = obj[inst.AVG_LATENCY] * obj[inst.SAMPLE_COUNT]
        inst.sum_rt = obj[inst.AVG_RESP_TIME] * obj[inst.SAMPLE_COUNT]
        inst.perc_levels = [float(x) for x in inst[inst.PERCENTILES].keys()]
        inst[inst.RESP_TIMES] = {
            float(level): inst[inst.RESP_TIMES][level]
            for level in inst[inst.RESP_TIMES].keys()
        }
        for error in inst[KPISet.ERRORS]:
            error['urls'] = Counter(error['urls'])
        return inst

    @staticmethod
    def __perc_and_stdev(cnts_dict, percentiles_to_calc=(), avg=0):
        """
        from http://stackoverflow.com/questions/25070086/percentiles-from-counts-of-values
        Returns [(percentile, value)] with nearest rank percentiles.
        Percentile 0: <min_value>, 100: <max_value>.
        cnts_dict: { <value>: <count> }
        percentiles_to_calc: iterable for percentiles to calculate; 0 <= ~ <= 100

        upd: added stdev calc to have it in single-pass for mans of efficiency

        :type percentiles_to_calc: list(float)
        :type cnts_dict: collections.Counter
        """
        assert all(0 <= percentile <= 100
                   for percentile in percentiles_to_calc)
        percentiles = []
        if not cnts_dict:
            return percentiles, 0

        num = sum(cnts_dict.values())
        cnts = sorted(cnts_dict.items())
        curr_cnts_pos = 0  # current position in cnts
        curr_pos = cnts[0][1]  # sum of freqs up to current_cnts_pos

        sqr_diffs = 0
        for percentile in sorted(percentiles_to_calc):
            if percentile < 100:
                percentile_pos = percentile / 100.0 * num
                while curr_pos <= percentile_pos and curr_cnts_pos < len(cnts):
                    sqr_diffs += cnts[curr_cnts_pos][1] * math.pow(
                        cnts[curr_cnts_pos][0] - avg, 2)

                    curr_cnts_pos += 1
                    curr_pos += cnts[curr_cnts_pos][1]

                percentiles.append((percentile, cnts[curr_cnts_pos][0]))
            else:
                percentiles.append(
                    (percentile, cnts[-1][0]))  # we could add a small value

        while curr_cnts_pos < len(cnts):
            sqr_diffs += cnts[curr_cnts_pos][1] * math.pow(
                cnts[curr_cnts_pos][0] - avg, 2)
            curr_cnts_pos += 1

        stdev = math.sqrt(sqr_diffs / len(cnts))
        return percentiles, stdev
Exemple #4
0
class KPISet(BetterDict):
    """
    Main entity in results, contains all KPIs for single label,
    capable of merging other KPISet's into it to compose cumulative results
    """
    ERRORS = "errors"
    SAMPLE_COUNT = "throughput"
    CONCURRENCY = "concurrency"
    SUCCESSES = "succ"
    FAILURES = "fail"
    BYTE_COUNT = "bytes"
    RESP_TIMES = "rt"
    AVG_RESP_TIME = "avg_rt"
    STDEV_RESP_TIME = "stdev_rt"
    AVG_LATENCY = "avg_lt"
    AVG_CONN_TIME = "avg_ct"
    PERCENTILES = "perc"
    RESP_CODES = "rc"
    ERRTYPE_ERROR = 0
    ERRTYPE_ASSERT = 1

    def __init__(self, perc_levels=(), rt_dist_maxlen=None):
        super(KPISet, self).__init__()
        self.sum_rt = 0
        self.sum_lt = 0
        self.sum_cn = 0
        self.perc_levels = perc_levels
        self.rtimes_len = rt_dist_maxlen
        # scalars
        self.get(self.SAMPLE_COUNT, 0)
        self.get(self.CONCURRENCY, 0)
        self.get(self.SUCCESSES, 0)
        self.get(self.FAILURES, 0)
        self.get(self.AVG_RESP_TIME, 0)
        self.get(self.STDEV_RESP_TIME, 0)
        self.get(self.AVG_LATENCY, 0)
        self.get(self.AVG_CONN_TIME, 0)
        self.get(self.BYTE_COUNT, 0)
        # vectors
        self.get(self.ERRORS, [])
        self.get(self.RESP_TIMES, Counter())
        self.get(self.RESP_CODES, Counter())
        self.get(self.PERCENTILES)
        self._concurrencies = BetterDict()  # NOTE: shouldn't it be Counter?

    def __deepcopy__(self, memo):
        mycopy = KPISet(self.perc_levels)
        mycopy.sum_rt = self.sum_rt
        mycopy.sum_lt = self.sum_lt
        mycopy.sum_cn = self.sum_cn
        mycopy.rtimes_len = self.rtimes_len
        for key, val in iteritems(self):
            mycopy[key] = copy.deepcopy(val, memo)
        return mycopy

    @staticmethod
    def error_item_skel(error, ret_c, cnt, errtype, urls):
        """

        :type error: str
        :type ret_c: str
        :type cnt: int
        :type errtype: int
        :type urls: collections.Counter
        :rtype: dict
        """
        return {
            "cnt": cnt,
            "msg": error,
            "rc": ret_c,
            "type": errtype,
            "urls": urls
        }

    def add_sample(self, sample):
        """
        Add sample, consisting of: cnc, rt, cn, lt, rc, error, trname, byte_count

        :type sample: tuple
        """
        # TODO: introduce a flag to not count failed in resp times? or offer it always?
        cnc, r_time, con_time, latency, r_code, error, trname, byte_count = sample
        self[self.SAMPLE_COUNT] += 1
        if cnc:
            self._concurrencies[trname] = cnc

        if r_code is not None:
            self[self.RESP_CODES][r_code] += 1

            # count times only if we have RCs
            if con_time:
                self.sum_cn += con_time
            self.sum_lt += latency
            self.sum_rt += r_time

        if error is not None:
            self[self.FAILURES] += 1

            item = self.error_item_skel(error, r_code, 1, KPISet.ERRTYPE_ERROR, Counter())
            self.inc_list(self[self.ERRORS], ("msg", error), item)
        else:
            self[self.SUCCESSES] += 1

        self[self.RESP_TIMES][r_time] += 1

        if byte_count is not None:
            self[self.BYTE_COUNT] += byte_count
            # TODO: max/min rt? there is percentiles...
            # TODO: throughput if interval is not 1s

    @staticmethod
    def inc_list(values, selector, value):
        """
        Increment list item, based on selector criteria

        :param values: list to update
        :param selector: tuple of 2 values, field name and value to match
        :param value: dict to put into list
        :type values: list[dict]
        :type selector: tuple
        :type value: dict
        """
        found = False
        for item in values:
            if item[selector[0]] == selector[1]:
                item['cnt'] += value['cnt']
                item['urls'] += value['urls']
                found = True
                break

        if not found:
            values.append(copy.deepcopy(value))

    def recalculate(self):
        """
        Recalculate averages, stdev and percentiles

        :return:
        """
        if self[self.SAMPLE_COUNT]:
            self[self.AVG_CONN_TIME] = self.sum_cn / self[self.SAMPLE_COUNT]
            self[self.AVG_LATENCY] = self.sum_lt / self[self.SAMPLE_COUNT]
            self[self.AVG_RESP_TIME] = self.sum_rt / self[self.SAMPLE_COUNT]

        if len(self._concurrencies):
            self[self.CONCURRENCY] = sum(self._concurrencies.values())

        perc, stdev = self.__perc_and_stdev(self[self.RESP_TIMES], self.perc_levels, self[self.AVG_RESP_TIME])
        for level, val in perc:
            self[self.PERCENTILES][str(float(level))] = val

        self[self.STDEV_RESP_TIME] = stdev

        return self

    def compact_times(self):
        if not self.rtimes_len:
            return

        times = self[KPISet.RESP_TIMES]
        redundant_cnt = len(times) - self.rtimes_len
        if redundant_cnt > 0:
            logging.debug("Compacting %s response timing into %s", len(times), self.rtimes_len)

        while redundant_cnt > 0:
            keys = sorted(times.keys())
            distances = [(lidx, keys[lidx + 1] - keys[lidx]) for lidx in range(len(keys) - 1)]
            distances.sort(key=operator.itemgetter(1))  # sort by distance

            # cast candidates for consolidation
            lkeys_indexes = [lidx for lidx, _ in distances[:redundant_cnt]]

            while lkeys_indexes:
                lidx = lkeys_indexes.pop(0)
                lkey = keys[lidx]
                rkey = keys[lidx + 1]
                if lkey in times and rkey in times:  # neighbours aren't changed
                    lval = times.pop(lkey)
                    rval = times.pop(rkey)

                    # shift key proportionally to values
                    idx_new = lkey + (rkey - lkey) * float(rval) / (lval + rval)

                    # keep precision the same
                    lprec = len(str(math.modf(lkey)[0])) - 2
                    rprec = len(str(math.modf(rkey)[0])) - 2
                    idx_new = round(idx_new, max(lprec, rprec))

                    times[idx_new] = lval + rval
                    redundant_cnt -= 1

    def merge_kpis(self, src, sid=None):
        """
        Merge other instance into self

        :param sid: source ID to use when suming up concurrency
        :type src: KPISet
        :return:
        """
        src.recalculate() # TODO: could be not resource efficient strat

        self.sum_cn += src.sum_cn
        self.sum_lt += src.sum_lt
        self.sum_rt += src.sum_rt

        self[self.SAMPLE_COUNT] += src[self.SAMPLE_COUNT]
        self[self.SUCCESSES] += src[self.SUCCESSES]
        self[self.FAILURES] += src[self.FAILURES]
        self[self.BYTE_COUNT] += src[self.BYTE_COUNT]
        # NOTE: should it be average? mind the timestamp gaps
        if src[self.CONCURRENCY]:
            self._concurrencies[sid] = src[self.CONCURRENCY]

        if src[self.RESP_TIMES]:
            # using raw times to calculate percentiles
            self[self.RESP_TIMES].update(src[self.RESP_TIMES])
            self.compact_times()
        elif not self[self.PERCENTILES]:
            # using existing percentiles
            # FIXME: it's not valid to overwrite, better take average
            self[self.PERCENTILES] = copy.deepcopy(src[self.PERCENTILES])

        self[self.RESP_CODES].update(src[self.RESP_CODES])

        for src_item in src[self.ERRORS]:
            self.inc_list(self[self.ERRORS], ('msg', src_item['msg']), src_item)

    @staticmethod
    def from_dict(obj):
        """
        :type obj: dict
        :rtype: KPISet
        """
        inst = KPISet()
        for key, val in iteritems(obj):
            inst[key] = val
        inst.sum_cn = obj[inst.AVG_CONN_TIME] * obj[inst.SAMPLE_COUNT]
        inst.sum_lt = obj[inst.AVG_LATENCY] * obj[inst.SAMPLE_COUNT]
        inst.sum_rt = obj[inst.AVG_RESP_TIME] * obj[inst.SAMPLE_COUNT]
        inst.perc_levels = [float(x) for x in inst[inst.PERCENTILES].keys()]
        inst[inst.RESP_TIMES] = {float(level): inst[inst.RESP_TIMES][level] for level in inst[inst.RESP_TIMES].keys()}
        for error in inst[KPISet.ERRORS]:
            error['urls'] = Counter(error['urls'])
        return inst

    @staticmethod
    def __perc_and_stdev(cnts_dict, percentiles_to_calc=(), avg=0):
        """
        from http://stackoverflow.com/questions/25070086/percentiles-from-counts-of-values
        Returns [(percentile, value)] with nearest rank percentiles.
        Percentile 0: <min_value>, 100: <max_value>.
        cnts_dict: { <value>: <count> }
        percentiles_to_calc: iterable for percentiles to calculate; 0 <= ~ <= 100

        upd: added stdev calc to have it in single-pass for mans of efficiency

        :type percentiles_to_calc: list(float)
        :type cnts_dict: collections.Counter
        """
        assert all(0 <= percentile <= 100 for percentile in percentiles_to_calc)
        percentiles = []
        if not cnts_dict:
            return percentiles, 0

        num = sum(cnts_dict.values())
        cnts = sorted(cnts_dict.items())
        curr_cnts_pos = 0  # current position in cnts
        curr_pos = cnts[0][1]  # sum of freqs up to current_cnts_pos

        sqr_diffs = 0
        for percentile in sorted(percentiles_to_calc):
            if percentile < 100:
                percentile_pos = percentile / 100.0 * num
                while curr_pos <= percentile_pos and curr_cnts_pos < len(cnts):
                    sqr_diffs += cnts[curr_cnts_pos][1] * math.pow(cnts[curr_cnts_pos][0] - avg, 2)

                    curr_cnts_pos += 1
                    curr_pos += cnts[curr_cnts_pos][1]

                percentiles.append((percentile, cnts[curr_cnts_pos][0]))
            else:
                percentiles.append((percentile, cnts[-1][0]))  # we could add a small value

        while curr_cnts_pos < len(cnts):
            sqr_diffs += cnts[curr_cnts_pos][1] * math.pow(cnts[curr_cnts_pos][0] - avg, 2)
            curr_cnts_pos += 1

        stdev = math.sqrt(sqr_diffs / len(cnts))
        return percentiles, stdev
Exemple #5
0
class KPISet(BetterDict):
    """
    Main entity in results, contains all KPIs for single label,
    capable of merging other KPISet's into it to compose cumulative results
    """
    ERRORS = "errors"
    SAMPLE_COUNT = "throughput"
    CONCURRENCY = "concurrency"
    SUCCESSES = "succ"
    FAILURES = "fail"
    BYTE_COUNT = "bytes"
    RESP_TIMES = "rt"
    AVG_RESP_TIME = "avg_rt"
    STDEV_RESP_TIME = "stdev_rt"
    AVG_LATENCY = "avg_lt"
    AVG_CONN_TIME = "avg_ct"
    PERCENTILES = "perc"
    RESP_CODES = "rc"
    ERRTYPE_ERROR = 0
    ERRTYPE_ASSERT = 1

    def __init__(self, perc_levels=(), rt_dist_maxlen=None):
        super(KPISet, self).__init__()
        self.sum_rt = 0
        self.sum_lt = 0
        self.sum_cn = 0
        self.perc_levels = perc_levels
        self.rtimes_len = rt_dist_maxlen
        self._concurrencies = BetterDict()  # NOTE: shouldn't it be Counter?
        # scalars
        self[KPISet.SAMPLE_COUNT] = 0
        self[KPISet.CONCURRENCY] = 0
        self[KPISet.SUCCESSES] = 0
        self[KPISet.FAILURES] = 0
        self[KPISet.AVG_RESP_TIME] = 0
        self[KPISet.STDEV_RESP_TIME] = 0
        self[KPISet.AVG_LATENCY] = 0
        self[KPISet.AVG_CONN_TIME] = 0
        self[KPISet.BYTE_COUNT] = 0
        # vectors
        self[KPISet.ERRORS] = []
        self[KPISet.RESP_TIMES] = RespTimesCounter(1, 60 * 30 * 1000, 3)  # is maximum value of 30 minutes enough?
        self[KPISet.RESP_CODES] = Counter()
        self[KPISet.PERCENTILES] = BetterDict()

    def __deepcopy__(self, memo):
        mycopy = KPISet(self.perc_levels)
        mycopy.sum_rt = self.sum_rt
        mycopy.sum_lt = self.sum_lt
        mycopy.sum_cn = self.sum_cn
        mycopy.rtimes_len = self.rtimes_len
        for key, val in iteritems(self):
            mycopy[key] = copy.deepcopy(val, memo)
        return mycopy

    @staticmethod
    def error_item_skel(error, ret_c, cnt, errtype, urls):
        """

        :type error: str
        :type ret_c: str
        :type cnt: int
        :type errtype: int
        :type urls: collections.Counter
        :rtype: dict
        """
        return {
            "cnt": cnt,
            "msg": error,
            "rc": ret_c,
            "type": errtype,
            "urls": urls
        }

    def add_sample(self, sample):
        """
        Add sample, consisting of: cnc, rt, cn, lt, rc, error, trname, byte_count

        :type sample: tuple
        """
        # TODO: introduce a flag to not count failed in resp times? or offer it always?
        cnc, r_time, con_time, latency, r_code, error, trname, byte_count = sample
        self[self.SAMPLE_COUNT] += 1
        if cnc:
            self._concurrencies[trname] = cnc

        if r_code is not None:
            self[self.RESP_CODES][r_code] += 1

            # count times only if we have RCs
            if con_time:
                self.sum_cn += con_time
            self.sum_lt += latency
            self.sum_rt += r_time

        if error is not None:
            self[self.FAILURES] += 1

            item = self.error_item_skel(error, r_code, 1, KPISet.ERRTYPE_ERROR, Counter())
            self.inc_list(self[self.ERRORS], ("msg", error), item)
        else:
            self[self.SUCCESSES] += 1

        rtime_s = round(r_time * 1000, 3)
        self[self.RESP_TIMES].add(rtime_s, 1)

        if byte_count is not None:
            self[self.BYTE_COUNT] += byte_count
            # TODO: max/min rt? there is percentiles...
            # TODO: throughput if interval is not 1s

    @staticmethod
    def inc_list(values, selector, value):
        """
        Increment list item, based on selector criteria

        :param values: list to update
        :param selector: tuple of 2 values, field name and value to match
        :param value: dict to put into list
        :type values: list[dict]
        :type selector: tuple
        :type value: dict
        """
        found = False
        for item in values:
            if item[selector[0]] == selector[1]:
                item['cnt'] += value['cnt']
                item['urls'] += value['urls']
                found = True
                break

        if not found:
            values.append(copy.deepcopy(value))

    def recalculate(self):
        """
        Recalculate averages, stdev and percentiles

        :return:
        """
        if self[self.SAMPLE_COUNT]:
            self[self.AVG_CONN_TIME] = self.sum_cn / self[self.SAMPLE_COUNT]
            self[self.AVG_LATENCY] = self.sum_lt / self[self.SAMPLE_COUNT]
            self[self.AVG_RESP_TIME] = self.sum_rt / self[self.SAMPLE_COUNT]

        if len(self._concurrencies):
            self[self.CONCURRENCY] = sum(self._concurrencies.values())

        resp_times = self[self.RESP_TIMES]
        if resp_times:
            self[self.PERCENTILES] = {
                str(float(perc)): value / 1000.0
                for perc, value in iteritems(resp_times.get_percentiles_dict(self.perc_levels))
            }

        return self

    def merge_kpis(self, src, sid=None):
        """
        Merge other instance into self

        :param sid: source ID to use when suming up concurrency
        :type src: KPISet
        :return:
        """
        src.recalculate()  # TODO: could be not resource efficient strat

        self.sum_cn += src.sum_cn
        self.sum_lt += src.sum_lt
        self.sum_rt += src.sum_rt

        self[self.SAMPLE_COUNT] += src[self.SAMPLE_COUNT]
        self[self.SUCCESSES] += src[self.SUCCESSES]
        self[self.FAILURES] += src[self.FAILURES]
        self[self.BYTE_COUNT] += src[self.BYTE_COUNT]
        # NOTE: should it be average? mind the timestamp gaps
        if src[self.CONCURRENCY]:
            self._concurrencies[sid] = src[self.CONCURRENCY]

        if src[self.RESP_TIMES]:
            self[self.RESP_TIMES].merge(src[self.RESP_TIMES])
        elif not self[self.PERCENTILES]:
            # using existing percentiles
            # FIXME: it's not valid to overwrite, better take average
            self[self.PERCENTILES] = copy.deepcopy(src[self.PERCENTILES])

        self[self.RESP_CODES].update(src[self.RESP_CODES])

        for src_item in src[self.ERRORS]:
            self.inc_list(self[self.ERRORS], ('msg', src_item['msg']), src_item)

    @staticmethod
    def from_dict(obj):
        """
        :type obj: dict
        :rtype: KPISet
        """
        inst = KPISet()
        for key, val in iteritems(obj):
            if key == inst.RESP_TIMES:
                if isinstance(val, dict):
                    for value, count in iteritems(val):
                        inst[inst.RESP_TIMES].add(value, count)
            else:
                inst[key] = val

        inst.sum_cn = obj[inst.AVG_CONN_TIME] * obj[inst.SAMPLE_COUNT]
        inst.sum_lt = obj[inst.AVG_LATENCY] * obj[inst.SAMPLE_COUNT]
        inst.sum_rt = obj[inst.AVG_RESP_TIME] * obj[inst.SAMPLE_COUNT]
        inst.perc_levels = [float(x) for x in inst[inst.PERCENTILES].keys()]
        for error in inst[KPISet.ERRORS]:
            error['urls'] = Counter(error['urls'])
        return inst