class KPISet(BetterDict): """ Main entity in results, contains all KPIs for single label, capable of merging other KPISet's into it to compose cumulative results """ ERRORS = "errors" SAMPLE_COUNT = "throughput" CONCURRENCY = "concurrency" SUCCESSES = "succ" FAILURES = "fail" RESP_TIMES = "rt" AVG_RESP_TIME = "avg_rt" STDEV_RESP_TIME = "stdev_rt" AVG_LATENCY = "avg_lt" AVG_CONN_TIME = "avg_ct" PERCENTILES = "perc" RESP_CODES = "rc" ERRTYPE_ERROR = 0 ERRTYPE_ASSERT = 1 def __init__(self, perc_levels=()): super(KPISet, self).__init__() self.sum_rt = 0 self.sum_lt = 0 self.sum_cn = 0 self.perc_levels = perc_levels # scalars self.get(self.SAMPLE_COUNT, 0) self.get(self.CONCURRENCY, 0) self.get(self.SUCCESSES, 0) self.get(self.FAILURES, 0) self.get(self.AVG_RESP_TIME, 0) self.get(self.STDEV_RESP_TIME, 0) self.get(self.AVG_LATENCY, 0) self.get(self.AVG_CONN_TIME, 0) # vectors self.get(self.ERRORS, []) self.get(self.RESP_TIMES, Counter()) self.get(self.RESP_CODES, Counter()) self.get(self.PERCENTILES) self._concurrencies = BetterDict() def __deepcopy__(self, memo): mycopy = KPISet(self.perc_levels) mycopy.sum_rt = self.sum_rt mycopy.sum_lt = self.sum_lt mycopy.sum_cn = self.sum_cn for key, val in iteritems(self): mycopy[key] = copy.deepcopy(val, memo) return mycopy @staticmethod def error_item_skel(error, ret_c, cnt, errtype, urls): """ :type error: str :type ret_c: str :type cnt: int :type errtype: int :type urls: Counter :rtype: dict """ return { "cnt": cnt, "msg": error, "rc": ret_c, "type": errtype, "urls": urls } def add_sample(self, sample): """ Add sample, consisting of: cnc, rt, cn, lt, rc, error, trname :type sample: tuple """ # TODO: introduce a flag to not count failed in resp times? or offer it always? cnc, r_time, con_time, latency, r_code, error, trname = sample self[self.SAMPLE_COUNT] = self.get(self.SAMPLE_COUNT, 0) + 1 if cnc: self._concurrencies[trname] = cnc if r_code is not None: resp_codes = self.get(self.RESP_CODES) resp_codes[r_code] = resp_codes.get(r_code, 0) + 1 # count times only if we have RCs if con_time: self.sum_cn += con_time self.sum_lt += latency self.sum_rt += r_time if error is not None: self[self.FAILURES] = self.get(self.FAILURES, 0) + 1 item = self.error_item_skel(error, r_code, 1, KPISet.ERRTYPE_ERROR, Counter()) self.inc_list(self.get(self.ERRORS), ("msg", error), item) else: self[self.SUCCESSES] = self.get(self.SUCCESSES, 0) + 1 self.get(self.RESP_TIMES)[r_time] += 1 # TODO: max/min rt? there is percentiles... # TODO: throughput if interval is not 1s @staticmethod def inc_list(values, selector, value): """ Increment list item, based on selector criteria :param values: list to update :type values: list :param selector: tuple of 2 values, field name and value to match :type selector: tuple :param value: dict to put into list :type value: dict """ found = False for item in values: if item[selector[0]] == selector[1]: item['cnt'] += value['cnt'] item['urls'] += value['urls'] found = True break if not found: values.append(value) def recalculate(self): """ Recalculate averages, stdev and percentiles :return: """ if self[self.SAMPLE_COUNT]: self[self.AVG_CONN_TIME] = self.sum_cn / self[self.SAMPLE_COUNT] self[self.AVG_LATENCY] = self.sum_lt / self[self.SAMPLE_COUNT] self[self.AVG_RESP_TIME] = self.sum_rt / self[self.SAMPLE_COUNT] if self._concurrencies: self[self.CONCURRENCY] = sum(self._concurrencies.values()) perc, stdev = self.__perc_and_stdev(self[self.RESP_TIMES], self.perc_levels, self[self.AVG_RESP_TIME]) for level, val in perc: self[self.PERCENTILES][str(float(level))] = val self[self.STDEV_RESP_TIME] = stdev return self def merge_kpis(self, src, sid=None): """ Merge other instance into self :param sid: source ID to use when suming up concurrency :type src: KPISet :return: """ src.recalculate() self.sum_cn += src.sum_cn self.sum_lt += src.sum_lt self.sum_rt += src.sum_rt self[self.SAMPLE_COUNT] += src[self.SAMPLE_COUNT] self[self.SUCCESSES] += src[self.SUCCESSES] self[self.FAILURES] += src[self.FAILURES] # NOTE: should it be average? mind the timestamp gaps self._concurrencies[sid] = src[self.CONCURRENCY] self[self.RESP_TIMES].update(src[self.RESP_TIMES]) self[self.RESP_CODES].update(src[self.RESP_CODES]) for src_item in src[self.ERRORS]: self.inc_list(self[self.ERRORS], ('msg', src_item['msg']), src_item) @staticmethod def from_dict(obj): """ :type obj: dict :rtype: KPISet """ inst = KPISet() for key, val in iteritems(obj): inst[key] = val inst.sum_cn = obj[inst.AVG_CONN_TIME] * obj[inst.SAMPLE_COUNT] inst.sum_lt = obj[inst.AVG_LATENCY] * obj[inst.SAMPLE_COUNT] inst.sum_rt = obj[inst.AVG_RESP_TIME] * obj[inst.SAMPLE_COUNT] inst.perc_levels = [float(x) for x in inst[inst.PERCENTILES].keys()] inst[inst.RESP_TIMES] = {float(level): inst[inst.RESP_TIMES][level] for level in inst[inst.RESP_TIMES].keys()} for error in inst[KPISet.ERRORS]: error['urls'] = Counter(error['urls']) return inst @staticmethod def __perc_and_stdev(cnts_dict, percentiles_to_calc=(), avg=0): """ from http://stackoverflow.com/questions/25070086/percentiles-from-counts-of-values Returns [(percentile, value)] with nearest rank percentiles. Percentile 0: <min_value>, 100: <max_value>. cnts_dict: { <value>: <count> } percentiles_to_calc: iterable for percentiles to calculate; 0 <= ~ <= 100 upd: added stdev calc to have it in single-pass for mans of efficiency :type percentiles_to_calc: list(float) :type cnts_dict: collections.Counter """ assert all(0 <= percentile <= 100 for percentile in percentiles_to_calc) percentiles = [] if not cnts_dict: return percentiles, 0 num = sum(cnts_dict.values()) cnts = sorted(cnts_dict.items()) curr_cnts_pos = 0 # current position in cnts curr_pos = cnts[0][1] # sum of freqs up to current_cnts_pos sqr_diffs = 0 for percentile in sorted(percentiles_to_calc): if percentile < 100: percentile_pos = percentile / 100.0 * num while curr_pos <= percentile_pos and curr_cnts_pos < len(cnts): sqr_diffs += cnts[curr_cnts_pos][1] * math.pow(cnts[curr_cnts_pos][0] - avg, 2) curr_cnts_pos += 1 curr_pos += cnts[curr_cnts_pos][1] percentiles.append((percentile, cnts[curr_cnts_pos][0])) else: percentiles.append((percentile, cnts[-1][0])) # we could add a small value while curr_cnts_pos < len(cnts): sqr_diffs += cnts[curr_cnts_pos][1] * math.pow(cnts[curr_cnts_pos][0] - avg, 2) curr_cnts_pos += 1 stdev = math.sqrt(sqr_diffs / len(cnts)) return percentiles, stdev
class KPISet(BetterDict): """ Main entity in results, contains all KPIs for single label, capable of merging other KPISet's into it to compose cumulative results """ ERRORS = "errors" SAMPLE_COUNT = "throughput" CONCURRENCY = "concurrency" SUCCESSES = "succ" FAILURES = "fail" BYTE_COUNT = "bytes" RESP_TIMES = "rt" AVG_RESP_TIME = "avg_rt" STDEV_RESP_TIME = "stdev_rt" AVG_LATENCY = "avg_lt" AVG_CONN_TIME = "avg_ct" PERCENTILES = "perc" RESP_CODES = "rc" ERRTYPE_ERROR = 0 ERRTYPE_ASSERT = 1 ERRTYPE_SUBSAMPLE = 2 def __init__(self, perc_levels=(), rt_dist_maxlen=None): super(KPISet, self).__init__() self.sum_rt = 0 self.sum_lt = 0 self.sum_cn = 0 self.perc_levels = perc_levels self.rtimes_len = rt_dist_maxlen self._concurrencies = BetterDict() # NOTE: shouldn't it be Counter? # scalars self[KPISet.SAMPLE_COUNT] = 0 self[KPISet.CONCURRENCY] = 0 self[KPISet.SUCCESSES] = 0 self[KPISet.FAILURES] = 0 self[KPISet.AVG_RESP_TIME] = 0 self[KPISet.STDEV_RESP_TIME] = 0 self[KPISet.AVG_LATENCY] = 0 self[KPISet.AVG_CONN_TIME] = 0 self[KPISet.BYTE_COUNT] = 0 # vectors self[KPISet.ERRORS] = [] self[KPISet.RESP_TIMES] = RespTimesCounter( 1, 60 * 30 * 1000, 3) # is maximum value of 30 minutes enough? self[KPISet.RESP_CODES] = Counter() self[KPISet.PERCENTILES] = BetterDict() def __deepcopy__(self, memo): mycopy = KPISet(self.perc_levels) mycopy.sum_rt = self.sum_rt mycopy.sum_lt = self.sum_lt mycopy.sum_cn = self.sum_cn mycopy.rtimes_len = self.rtimes_len for key, val in iteritems(self): mycopy[key] = copy.deepcopy(val, memo) return mycopy @staticmethod def error_item_skel(error, ret_c, cnt, errtype, urls, tag): """ :type error: str :type ret_c: str :type tag: str :type cnt: int :type errtype: int :type urls: collections.Counter :rtype: dict """ assert isinstance(urls, collections.Counter) return { "cnt": cnt, "msg": error, "tag": tag, # just one more string qualifier "rc": ret_c, "type": errtype, "urls": urls, } def add_sample(self, sample): """ Add sample, consisting of: cnc, rt, cn, lt, rc, error, trname, byte_count :type sample: tuple """ # TODO: introduce a flag to not count failed in resp times? or offer it always? cnc, r_time, con_time, latency, r_code, error, trname, byte_count = sample self[self.SAMPLE_COUNT] += 1 if cnc: self._concurrencies[trname] = cnc if r_code is not None: self[self.RESP_CODES][r_code] += 1 # count times only if we have RCs if con_time: self.sum_cn += con_time self.sum_lt += latency self.sum_rt += r_time if error is not None: self[self.FAILURES] += 1 item = self.error_item_skel(error, r_code, 1, KPISet.ERRTYPE_ERROR, Counter(), None) self.inc_list(self[self.ERRORS], ("msg", error), item) else: self[self.SUCCESSES] += 1 rtime_s = round(r_time * 1000, 3) self[self.RESP_TIMES].add(rtime_s, 1) if byte_count is not None: self[self.BYTE_COUNT] += byte_count # TODO: max/min rt? there is percentiles... # TODO: throughput if interval is not 1s @staticmethod def inc_list(values, selector, value): """ Increment list item, based on selector criteria :param values: list to update :param selector: tuple of 2 values, field name and value to match :param value: dict to put into list :type values: list[dict] :type selector: tuple :type value: dict """ found = False for item in values: if item[selector[0]] == selector[1]: item['cnt'] += value['cnt'] item['urls'] += value['urls'] found = True break if not found: values.append(copy.deepcopy(value)) def recalculate(self): """ Recalculate averages, stdev and percentiles :return: """ if self[self.SAMPLE_COUNT]: self[self.AVG_CONN_TIME] = self.sum_cn / self[self.SAMPLE_COUNT] self[self.AVG_LATENCY] = self.sum_lt / self[self.SAMPLE_COUNT] self[self.AVG_RESP_TIME] = self.sum_rt / self[self.SAMPLE_COUNT] if len(self._concurrencies): self[self.CONCURRENCY] = sum(self._concurrencies.values()) resp_times = self[self.RESP_TIMES] if resp_times: self[self.PERCENTILES] = { str(float(perc)): value / 1000.0 for perc, value in iteritems( resp_times.get_percentiles_dict(self.perc_levels)) } return self def merge_kpis(self, src, sid=None): """ Merge other instance into self :param sid: source ID to use when suming up concurrency :type src: KPISet :return: """ src.recalculate() # TODO: could be not resource efficient strat self.sum_cn += src.sum_cn self.sum_lt += src.sum_lt self.sum_rt += src.sum_rt self[self.SAMPLE_COUNT] += src[self.SAMPLE_COUNT] self[self.SUCCESSES] += src[self.SUCCESSES] self[self.FAILURES] += src[self.FAILURES] self[self.BYTE_COUNT] += src[self.BYTE_COUNT] # NOTE: should it be average? mind the timestamp gaps if src[self.CONCURRENCY]: self._concurrencies[sid] = src[self.CONCURRENCY] if src[self.RESP_TIMES]: self[self.RESP_TIMES].merge(src[self.RESP_TIMES]) elif not self[self.PERCENTILES]: # using existing percentiles # FIXME: it's not valid to overwrite, better take average self[self.PERCENTILES] = copy.deepcopy(src[self.PERCENTILES]) self[self.RESP_CODES].update(src[self.RESP_CODES]) for src_item in src[self.ERRORS]: self.inc_list(self[self.ERRORS], ('msg', src_item['msg']), src_item) @staticmethod def from_dict(obj): """ :type obj: dict :rtype: KPISet """ inst = KPISet() for key, val in iteritems(obj): if key == inst.RESP_TIMES: if isinstance(val, dict): for value, count in iteritems(val): inst[inst.RESP_TIMES].add(value, count) else: inst[key] = val inst.sum_cn = obj[inst.AVG_CONN_TIME] * obj[inst.SAMPLE_COUNT] inst.sum_lt = obj[inst.AVG_LATENCY] * obj[inst.SAMPLE_COUNT] inst.sum_rt = obj[inst.AVG_RESP_TIME] * obj[inst.SAMPLE_COUNT] inst.perc_levels = [float(x) for x in inst[inst.PERCENTILES].keys()] for error in inst[KPISet.ERRORS]: error['urls'] = Counter(error['urls']) return inst
class KPISet(BetterDict): """ Main entity in results, contains all KPIs for single label, capable of merging other KPISet's into it to compose cumulative results """ ERRORS = "errors" SAMPLE_COUNT = "throughput" CONCURRENCY = "concurrency" SUCCESSES = "succ" FAILURES = "fail" RESP_TIMES = "rt" AVG_RESP_TIME = "avg_rt" STDEV_RESP_TIME = "stdev_rt" AVG_LATENCY = "avg_lt" AVG_CONN_TIME = "avg_ct" PERCENTILES = "perc" RESP_CODES = "rc" ERRTYPE_ERROR = 0 ERRTYPE_ASSERT = 1 def __init__(self, perc_levels=()): super(KPISet, self).__init__() self.sum_rt = 0 self.sum_lt = 0 self.sum_cn = 0 self.perc_levels = perc_levels # scalars self.get(self.SAMPLE_COUNT, 0) self.get(self.CONCURRENCY, 0) self.get(self.SUCCESSES, 0) self.get(self.FAILURES, 0) self.get(self.AVG_RESP_TIME, 0) self.get(self.STDEV_RESP_TIME, 0) self.get(self.AVG_LATENCY, 0) self.get(self.AVG_CONN_TIME, 0) # vectors self.get(self.ERRORS, []) self.get(self.RESP_TIMES, Counter()) self.get(self.RESP_CODES, Counter()) self.get(self.PERCENTILES) self._concurrencies = BetterDict() # NOTE: shouldn't it be Counter? def __deepcopy__(self, memo): mycopy = KPISet(self.perc_levels) mycopy.sum_rt = self.sum_rt mycopy.sum_lt = self.sum_lt mycopy.sum_cn = self.sum_cn for key, val in iteritems(self): mycopy[key] = copy.deepcopy(val, memo) return mycopy @staticmethod def error_item_skel(error, ret_c, cnt, errtype, urls): """ :type error: str :type ret_c: str :type cnt: int :type errtype: int :type urls: Counter :rtype: dict """ return { "cnt": cnt, "msg": error, "rc": ret_c, "type": errtype, "urls": urls } def add_sample(self, sample): """ Add sample, consisting of: cnc, rt, cn, lt, rc, error, trname :type sample: tuple """ # TODO: introduce a flag to not count failed in resp times? or offer it always? cnc, r_time, con_time, latency, r_code, error, trname = sample self[self.SAMPLE_COUNT] = self.get(self.SAMPLE_COUNT, 0) + 1 if cnc: self._concurrencies[trname] = cnc if r_code is not None: resp_codes = self.get(self.RESP_CODES) resp_codes[r_code] = resp_codes.get(r_code, 0) + 1 # count times only if we have RCs if con_time: self.sum_cn += con_time self.sum_lt += latency self.sum_rt += r_time if error is not None: self[self.FAILURES] = self.get(self.FAILURES, 0) + 1 item = self.error_item_skel(error, r_code, 1, KPISet.ERRTYPE_ERROR, Counter()) self.inc_list(self.get(self.ERRORS), ("msg", error), item) else: self[self.SUCCESSES] = self.get(self.SUCCESSES, 0) + 1 self.get(self.RESP_TIMES)[r_time] += 1 # TODO: max/min rt? there is percentiles... # TODO: throughput if interval is not 1s @staticmethod def inc_list(values, selector, value): """ Increment list item, based on selector criteria :param values: list to update :type values: list :param selector: tuple of 2 values, field name and value to match :type selector: tuple :param value: dict to put into list :type value: dict """ found = False for item in values: if item[selector[0]] == selector[1]: item['cnt'] += value['cnt'] item['urls'] += value['urls'] found = True break if not found: values.append(value) def recalculate(self): """ Recalculate averages, stdev and percentiles :return: """ if self[self.SAMPLE_COUNT]: self[self.AVG_CONN_TIME] = self.sum_cn / self[self.SAMPLE_COUNT] self[self.AVG_LATENCY] = self.sum_lt / self[self.SAMPLE_COUNT] self[self.AVG_RESP_TIME] = self.sum_rt / self[self.SAMPLE_COUNT] if len(self._concurrencies): self[self.CONCURRENCY] = sum(self._concurrencies.values()) perc, stdev = self.__perc_and_stdev(self[self.RESP_TIMES], self.perc_levels, self[self.AVG_RESP_TIME]) for level, val in perc: self[self.PERCENTILES][str(float(level))] = val self[self.STDEV_RESP_TIME] = stdev return self def merge_kpis(self, src, sid=None): """ Merge other instance into self :param sid: source ID to use when suming up concurrency :type src: KPISet :return: """ src.recalculate() self.sum_cn += src.sum_cn self.sum_lt += src.sum_lt self.sum_rt += src.sum_rt self[self.SAMPLE_COUNT] += src[self.SAMPLE_COUNT] self[self.SUCCESSES] += src[self.SUCCESSES] self[self.FAILURES] += src[self.FAILURES] # NOTE: should it be average? mind the timestamp gaps if src[self.CONCURRENCY]: self._concurrencies[sid] = src[self.CONCURRENCY] self[self.RESP_TIMES].update(src[self.RESP_TIMES]) self[self.RESP_CODES].update(src[self.RESP_CODES]) for src_item in src[self.ERRORS]: self.inc_list(self[self.ERRORS], ('msg', src_item['msg']), src_item) @staticmethod def from_dict(obj): """ :type obj: dict :rtype: KPISet """ inst = KPISet() for key, val in iteritems(obj): inst[key] = val inst.sum_cn = obj[inst.AVG_CONN_TIME] * obj[inst.SAMPLE_COUNT] inst.sum_lt = obj[inst.AVG_LATENCY] * obj[inst.SAMPLE_COUNT] inst.sum_rt = obj[inst.AVG_RESP_TIME] * obj[inst.SAMPLE_COUNT] inst.perc_levels = [float(x) for x in inst[inst.PERCENTILES].keys()] inst[inst.RESP_TIMES] = { float(level): inst[inst.RESP_TIMES][level] for level in inst[inst.RESP_TIMES].keys() } for error in inst[KPISet.ERRORS]: error['urls'] = Counter(error['urls']) return inst @staticmethod def __perc_and_stdev(cnts_dict, percentiles_to_calc=(), avg=0): """ from http://stackoverflow.com/questions/25070086/percentiles-from-counts-of-values Returns [(percentile, value)] with nearest rank percentiles. Percentile 0: <min_value>, 100: <max_value>. cnts_dict: { <value>: <count> } percentiles_to_calc: iterable for percentiles to calculate; 0 <= ~ <= 100 upd: added stdev calc to have it in single-pass for mans of efficiency :type percentiles_to_calc: list(float) :type cnts_dict: collections.Counter """ assert all(0 <= percentile <= 100 for percentile in percentiles_to_calc) percentiles = [] if not cnts_dict: return percentiles, 0 num = sum(cnts_dict.values()) cnts = sorted(cnts_dict.items()) curr_cnts_pos = 0 # current position in cnts curr_pos = cnts[0][1] # sum of freqs up to current_cnts_pos sqr_diffs = 0 for percentile in sorted(percentiles_to_calc): if percentile < 100: percentile_pos = percentile / 100.0 * num while curr_pos <= percentile_pos and curr_cnts_pos < len(cnts): sqr_diffs += cnts[curr_cnts_pos][1] * math.pow( cnts[curr_cnts_pos][0] - avg, 2) curr_cnts_pos += 1 curr_pos += cnts[curr_cnts_pos][1] percentiles.append((percentile, cnts[curr_cnts_pos][0])) else: percentiles.append( (percentile, cnts[-1][0])) # we could add a small value while curr_cnts_pos < len(cnts): sqr_diffs += cnts[curr_cnts_pos][1] * math.pow( cnts[curr_cnts_pos][0] - avg, 2) curr_cnts_pos += 1 stdev = math.sqrt(sqr_diffs / len(cnts)) return percentiles, stdev
class KPISet(BetterDict): """ Main entity in results, contains all KPIs for single label, capable of merging other KPISet's into it to compose cumulative results """ ERRORS = "errors" SAMPLE_COUNT = "throughput" CONCURRENCY = "concurrency" SUCCESSES = "succ" FAILURES = "fail" BYTE_COUNT = "bytes" RESP_TIMES = "rt" AVG_RESP_TIME = "avg_rt" STDEV_RESP_TIME = "stdev_rt" AVG_LATENCY = "avg_lt" AVG_CONN_TIME = "avg_ct" PERCENTILES = "perc" RESP_CODES = "rc" ERRTYPE_ERROR = 0 ERRTYPE_ASSERT = 1 def __init__(self, perc_levels=(), rt_dist_maxlen=None): super(KPISet, self).__init__() self.sum_rt = 0 self.sum_lt = 0 self.sum_cn = 0 self.perc_levels = perc_levels self.rtimes_len = rt_dist_maxlen # scalars self.get(self.SAMPLE_COUNT, 0) self.get(self.CONCURRENCY, 0) self.get(self.SUCCESSES, 0) self.get(self.FAILURES, 0) self.get(self.AVG_RESP_TIME, 0) self.get(self.STDEV_RESP_TIME, 0) self.get(self.AVG_LATENCY, 0) self.get(self.AVG_CONN_TIME, 0) self.get(self.BYTE_COUNT, 0) # vectors self.get(self.ERRORS, []) self.get(self.RESP_TIMES, Counter()) self.get(self.RESP_CODES, Counter()) self.get(self.PERCENTILES) self._concurrencies = BetterDict() # NOTE: shouldn't it be Counter? def __deepcopy__(self, memo): mycopy = KPISet(self.perc_levels) mycopy.sum_rt = self.sum_rt mycopy.sum_lt = self.sum_lt mycopy.sum_cn = self.sum_cn mycopy.rtimes_len = self.rtimes_len for key, val in iteritems(self): mycopy[key] = copy.deepcopy(val, memo) return mycopy @staticmethod def error_item_skel(error, ret_c, cnt, errtype, urls): """ :type error: str :type ret_c: str :type cnt: int :type errtype: int :type urls: collections.Counter :rtype: dict """ return { "cnt": cnt, "msg": error, "rc": ret_c, "type": errtype, "urls": urls } def add_sample(self, sample): """ Add sample, consisting of: cnc, rt, cn, lt, rc, error, trname, byte_count :type sample: tuple """ # TODO: introduce a flag to not count failed in resp times? or offer it always? cnc, r_time, con_time, latency, r_code, error, trname, byte_count = sample self[self.SAMPLE_COUNT] += 1 if cnc: self._concurrencies[trname] = cnc if r_code is not None: self[self.RESP_CODES][r_code] += 1 # count times only if we have RCs if con_time: self.sum_cn += con_time self.sum_lt += latency self.sum_rt += r_time if error is not None: self[self.FAILURES] += 1 item = self.error_item_skel(error, r_code, 1, KPISet.ERRTYPE_ERROR, Counter()) self.inc_list(self[self.ERRORS], ("msg", error), item) else: self[self.SUCCESSES] += 1 self[self.RESP_TIMES][r_time] += 1 if byte_count is not None: self[self.BYTE_COUNT] += byte_count # TODO: max/min rt? there is percentiles... # TODO: throughput if interval is not 1s @staticmethod def inc_list(values, selector, value): """ Increment list item, based on selector criteria :param values: list to update :param selector: tuple of 2 values, field name and value to match :param value: dict to put into list :type values: list[dict] :type selector: tuple :type value: dict """ found = False for item in values: if item[selector[0]] == selector[1]: item['cnt'] += value['cnt'] item['urls'] += value['urls'] found = True break if not found: values.append(copy.deepcopy(value)) def recalculate(self): """ Recalculate averages, stdev and percentiles :return: """ if self[self.SAMPLE_COUNT]: self[self.AVG_CONN_TIME] = self.sum_cn / self[self.SAMPLE_COUNT] self[self.AVG_LATENCY] = self.sum_lt / self[self.SAMPLE_COUNT] self[self.AVG_RESP_TIME] = self.sum_rt / self[self.SAMPLE_COUNT] if len(self._concurrencies): self[self.CONCURRENCY] = sum(self._concurrencies.values()) perc, stdev = self.__perc_and_stdev(self[self.RESP_TIMES], self.perc_levels, self[self.AVG_RESP_TIME]) for level, val in perc: self[self.PERCENTILES][str(float(level))] = val self[self.STDEV_RESP_TIME] = stdev return self def compact_times(self): if not self.rtimes_len: return times = self[KPISet.RESP_TIMES] redundant_cnt = len(times) - self.rtimes_len if redundant_cnt > 0: logging.debug("Compacting %s response timing into %s", len(times), self.rtimes_len) while redundant_cnt > 0: keys = sorted(times.keys()) distances = [(lidx, keys[lidx + 1] - keys[lidx]) for lidx in range(len(keys) - 1)] distances.sort(key=operator.itemgetter(1)) # sort by distance # cast candidates for consolidation lkeys_indexes = [lidx for lidx, _ in distances[:redundant_cnt]] while lkeys_indexes: lidx = lkeys_indexes.pop(0) lkey = keys[lidx] rkey = keys[lidx + 1] if lkey in times and rkey in times: # neighbours aren't changed lval = times.pop(lkey) rval = times.pop(rkey) # shift key proportionally to values idx_new = lkey + (rkey - lkey) * float(rval) / (lval + rval) # keep precision the same lprec = len(str(math.modf(lkey)[0])) - 2 rprec = len(str(math.modf(rkey)[0])) - 2 idx_new = round(idx_new, max(lprec, rprec)) times[idx_new] = lval + rval redundant_cnt -= 1 def merge_kpis(self, src, sid=None): """ Merge other instance into self :param sid: source ID to use when suming up concurrency :type src: KPISet :return: """ src.recalculate() # TODO: could be not resource efficient strat self.sum_cn += src.sum_cn self.sum_lt += src.sum_lt self.sum_rt += src.sum_rt self[self.SAMPLE_COUNT] += src[self.SAMPLE_COUNT] self[self.SUCCESSES] += src[self.SUCCESSES] self[self.FAILURES] += src[self.FAILURES] self[self.BYTE_COUNT] += src[self.BYTE_COUNT] # NOTE: should it be average? mind the timestamp gaps if src[self.CONCURRENCY]: self._concurrencies[sid] = src[self.CONCURRENCY] if src[self.RESP_TIMES]: # using raw times to calculate percentiles self[self.RESP_TIMES].update(src[self.RESP_TIMES]) self.compact_times() elif not self[self.PERCENTILES]: # using existing percentiles # FIXME: it's not valid to overwrite, better take average self[self.PERCENTILES] = copy.deepcopy(src[self.PERCENTILES]) self[self.RESP_CODES].update(src[self.RESP_CODES]) for src_item in src[self.ERRORS]: self.inc_list(self[self.ERRORS], ('msg', src_item['msg']), src_item) @staticmethod def from_dict(obj): """ :type obj: dict :rtype: KPISet """ inst = KPISet() for key, val in iteritems(obj): inst[key] = val inst.sum_cn = obj[inst.AVG_CONN_TIME] * obj[inst.SAMPLE_COUNT] inst.sum_lt = obj[inst.AVG_LATENCY] * obj[inst.SAMPLE_COUNT] inst.sum_rt = obj[inst.AVG_RESP_TIME] * obj[inst.SAMPLE_COUNT] inst.perc_levels = [float(x) for x in inst[inst.PERCENTILES].keys()] inst[inst.RESP_TIMES] = {float(level): inst[inst.RESP_TIMES][level] for level in inst[inst.RESP_TIMES].keys()} for error in inst[KPISet.ERRORS]: error['urls'] = Counter(error['urls']) return inst @staticmethod def __perc_and_stdev(cnts_dict, percentiles_to_calc=(), avg=0): """ from http://stackoverflow.com/questions/25070086/percentiles-from-counts-of-values Returns [(percentile, value)] with nearest rank percentiles. Percentile 0: <min_value>, 100: <max_value>. cnts_dict: { <value>: <count> } percentiles_to_calc: iterable for percentiles to calculate; 0 <= ~ <= 100 upd: added stdev calc to have it in single-pass for mans of efficiency :type percentiles_to_calc: list(float) :type cnts_dict: collections.Counter """ assert all(0 <= percentile <= 100 for percentile in percentiles_to_calc) percentiles = [] if not cnts_dict: return percentiles, 0 num = sum(cnts_dict.values()) cnts = sorted(cnts_dict.items()) curr_cnts_pos = 0 # current position in cnts curr_pos = cnts[0][1] # sum of freqs up to current_cnts_pos sqr_diffs = 0 for percentile in sorted(percentiles_to_calc): if percentile < 100: percentile_pos = percentile / 100.0 * num while curr_pos <= percentile_pos and curr_cnts_pos < len(cnts): sqr_diffs += cnts[curr_cnts_pos][1] * math.pow(cnts[curr_cnts_pos][0] - avg, 2) curr_cnts_pos += 1 curr_pos += cnts[curr_cnts_pos][1] percentiles.append((percentile, cnts[curr_cnts_pos][0])) else: percentiles.append((percentile, cnts[-1][0])) # we could add a small value while curr_cnts_pos < len(cnts): sqr_diffs += cnts[curr_cnts_pos][1] * math.pow(cnts[curr_cnts_pos][0] - avg, 2) curr_cnts_pos += 1 stdev = math.sqrt(sqr_diffs / len(cnts)) return percentiles, stdev
class KPISet(BetterDict): """ Main entity in results, contains all KPIs for single label, capable of merging other KPISet's into it to compose cumulative results """ ERRORS = "errors" SAMPLE_COUNT = "throughput" CONCURRENCY = "concurrency" SUCCESSES = "succ" FAILURES = "fail" BYTE_COUNT = "bytes" RESP_TIMES = "rt" AVG_RESP_TIME = "avg_rt" STDEV_RESP_TIME = "stdev_rt" AVG_LATENCY = "avg_lt" AVG_CONN_TIME = "avg_ct" PERCENTILES = "perc" RESP_CODES = "rc" ERRTYPE_ERROR = 0 ERRTYPE_ASSERT = 1 def __init__(self, perc_levels=(), rt_dist_maxlen=None): super(KPISet, self).__init__() self.sum_rt = 0 self.sum_lt = 0 self.sum_cn = 0 self.perc_levels = perc_levels self.rtimes_len = rt_dist_maxlen self._concurrencies = BetterDict() # NOTE: shouldn't it be Counter? # scalars self[KPISet.SAMPLE_COUNT] = 0 self[KPISet.CONCURRENCY] = 0 self[KPISet.SUCCESSES] = 0 self[KPISet.FAILURES] = 0 self[KPISet.AVG_RESP_TIME] = 0 self[KPISet.STDEV_RESP_TIME] = 0 self[KPISet.AVG_LATENCY] = 0 self[KPISet.AVG_CONN_TIME] = 0 self[KPISet.BYTE_COUNT] = 0 # vectors self[KPISet.ERRORS] = [] self[KPISet.RESP_TIMES] = RespTimesCounter(1, 60 * 30 * 1000, 3) # is maximum value of 30 minutes enough? self[KPISet.RESP_CODES] = Counter() self[KPISet.PERCENTILES] = BetterDict() def __deepcopy__(self, memo): mycopy = KPISet(self.perc_levels) mycopy.sum_rt = self.sum_rt mycopy.sum_lt = self.sum_lt mycopy.sum_cn = self.sum_cn mycopy.rtimes_len = self.rtimes_len for key, val in iteritems(self): mycopy[key] = copy.deepcopy(val, memo) return mycopy @staticmethod def error_item_skel(error, ret_c, cnt, errtype, urls): """ :type error: str :type ret_c: str :type cnt: int :type errtype: int :type urls: collections.Counter :rtype: dict """ return { "cnt": cnt, "msg": error, "rc": ret_c, "type": errtype, "urls": urls } def add_sample(self, sample): """ Add sample, consisting of: cnc, rt, cn, lt, rc, error, trname, byte_count :type sample: tuple """ # TODO: introduce a flag to not count failed in resp times? or offer it always? cnc, r_time, con_time, latency, r_code, error, trname, byte_count = sample self[self.SAMPLE_COUNT] += 1 if cnc: self._concurrencies[trname] = cnc if r_code is not None: self[self.RESP_CODES][r_code] += 1 # count times only if we have RCs if con_time: self.sum_cn += con_time self.sum_lt += latency self.sum_rt += r_time if error is not None: self[self.FAILURES] += 1 item = self.error_item_skel(error, r_code, 1, KPISet.ERRTYPE_ERROR, Counter()) self.inc_list(self[self.ERRORS], ("msg", error), item) else: self[self.SUCCESSES] += 1 rtime_s = round(r_time * 1000, 3) self[self.RESP_TIMES].add(rtime_s, 1) if byte_count is not None: self[self.BYTE_COUNT] += byte_count # TODO: max/min rt? there is percentiles... # TODO: throughput if interval is not 1s @staticmethod def inc_list(values, selector, value): """ Increment list item, based on selector criteria :param values: list to update :param selector: tuple of 2 values, field name and value to match :param value: dict to put into list :type values: list[dict] :type selector: tuple :type value: dict """ found = False for item in values: if item[selector[0]] == selector[1]: item['cnt'] += value['cnt'] item['urls'] += value['urls'] found = True break if not found: values.append(copy.deepcopy(value)) def recalculate(self): """ Recalculate averages, stdev and percentiles :return: """ if self[self.SAMPLE_COUNT]: self[self.AVG_CONN_TIME] = self.sum_cn / self[self.SAMPLE_COUNT] self[self.AVG_LATENCY] = self.sum_lt / self[self.SAMPLE_COUNT] self[self.AVG_RESP_TIME] = self.sum_rt / self[self.SAMPLE_COUNT] if len(self._concurrencies): self[self.CONCURRENCY] = sum(self._concurrencies.values()) resp_times = self[self.RESP_TIMES] if resp_times: self[self.PERCENTILES] = { str(float(perc)): value / 1000.0 for perc, value in iteritems(resp_times.get_percentiles_dict(self.perc_levels)) } return self def merge_kpis(self, src, sid=None): """ Merge other instance into self :param sid: source ID to use when suming up concurrency :type src: KPISet :return: """ src.recalculate() # TODO: could be not resource efficient strat self.sum_cn += src.sum_cn self.sum_lt += src.sum_lt self.sum_rt += src.sum_rt self[self.SAMPLE_COUNT] += src[self.SAMPLE_COUNT] self[self.SUCCESSES] += src[self.SUCCESSES] self[self.FAILURES] += src[self.FAILURES] self[self.BYTE_COUNT] += src[self.BYTE_COUNT] # NOTE: should it be average? mind the timestamp gaps if src[self.CONCURRENCY]: self._concurrencies[sid] = src[self.CONCURRENCY] if src[self.RESP_TIMES]: self[self.RESP_TIMES].merge(src[self.RESP_TIMES]) elif not self[self.PERCENTILES]: # using existing percentiles # FIXME: it's not valid to overwrite, better take average self[self.PERCENTILES] = copy.deepcopy(src[self.PERCENTILES]) self[self.RESP_CODES].update(src[self.RESP_CODES]) for src_item in src[self.ERRORS]: self.inc_list(self[self.ERRORS], ('msg', src_item['msg']), src_item) @staticmethod def from_dict(obj): """ :type obj: dict :rtype: KPISet """ inst = KPISet() for key, val in iteritems(obj): if key == inst.RESP_TIMES: if isinstance(val, dict): for value, count in iteritems(val): inst[inst.RESP_TIMES].add(value, count) else: inst[key] = val inst.sum_cn = obj[inst.AVG_CONN_TIME] * obj[inst.SAMPLE_COUNT] inst.sum_lt = obj[inst.AVG_LATENCY] * obj[inst.SAMPLE_COUNT] inst.sum_rt = obj[inst.AVG_RESP_TIME] * obj[inst.SAMPLE_COUNT] inst.perc_levels = [float(x) for x in inst[inst.PERCENTILES].keys()] for error in inst[KPISet.ERRORS]: error['urls'] = Counter(error['urls']) return inst