def calculate_degradation_area(table, smooth, etalon_stats, etalon_threshold): table = [p for p in table if not p.error] # rm errors if len(table) <= WINDOW_SIZE: return [] mean_times = [p.time for p in smooth] mean_durations = [p.duration for p in smooth] mean_vars = [p.var for p in smooth] clusters = find_clusters( mean_durations, filter_fn=lambda y: 0 if abs(y) < etalon_threshold else 1) # calculate cluster duration degradation_cluster_stats = [] for cluster in clusters: start_idx = int(cluster.inf) end_idx = int(cluster.sup) start_time = mean_times[start_idx] end_time = mean_times[end_idx] duration = end_time - start_time var = np.mean(mean_vars[start_idx: end_idx]) # point durations point_durations = [] for p in table: if start_time < p.time < end_time: point_durations.append(p.duration) # calculate difference between means # http://onlinestatbook.com/2/tests_of_means/difference_means.html anomaly_mean = np.mean(point_durations) anomaly_var = np.var(point_durations) se = math.sqrt(anomaly_var / len(point_durations) + etalon_stats.var / etalon_stats.count) dof = etalon_stats.count + len(point_durations) - 2 mean_diff = anomaly_mean - etalon_stats.mean conf_interval = stats.t.interval(0.95, dof, loc=mean_diff, scale=se) degradation = types.MeanVar( mean_diff, np.mean([mean_diff - conf_interval[0], conf_interval[1] - mean_diff])) degradation_ratio = types.MeanVar( anomaly_mean / etalon_stats.mean, np.mean([(mean_diff - conf_interval[0]) / etalon_stats.mean, (conf_interval[1] - mean_diff) / etalon_stats.mean])) logging.debug('Mean diff: %s' % mean_diff) logging.debug('Conf int: %s' % str(conf_interval)) degradation_cluster_stats.append(types.DegradationClusterStats( start=start_time, end=end_time, duration=types.MeanVar(duration, var), degradation=degradation, degradation_ratio=degradation_ratio, count=len(point_durations) )) return degradation_cluster_stats
def indexed_interval_to_time_interval(table, src_interval): """For given indexes in the table return time interval :param table: [DataRow] source data :param src_interval: interval of array indexes :return: ClusterStats """ start_index = int(src_interval.inf) end_index = int(src_interval.sup) if start_index > 0: d_start = (table[start_index].time - table[start_index - 1].time) / 2 else: d_start = 0 if end_index < len(table) - 1: d_end = (table[end_index + 1].time - table[end_index].time) / 2 else: d_end = 0 start_time = table[start_index].time - d_start end_time = table[end_index].time + d_end var = d_start + d_end duration = end_time - start_time count = sum(1 if start_time <= p.time <= end_time else 0 for p in table) return types.ClusterStats(start=start_time, end=end_time, count=count, duration=types.MeanVar(duration, var))
def process_all_runs(runs): """Process all runs from Rally raw data report This function returns summary stats for all runs, including downtime duration, MTTR, performance degradation. :param runs: collection of Rally runs :return: SummaryResult """ run_results = [] downtime_statistic = [] downtime_var = [] ttr_statistic = [] ttr_var = [] degradation_statistic = [] degradation_var = [] degradation_ratio_statistic = [] degradation_ratio_var = [] for i, one_run in enumerate(runs): run_result = process_one_run(one_run) run_results.append(run_result) ds = 0 for index, stat in enumerate(run_result.error_area): ds += stat.duration.statistic downtime_var.append(stat.duration.var) if run_result.error_area: downtime_statistic.append(ds) ts = ss = sr = 0 for index, stat in enumerate(run_result.degradation_area): ts += stat.duration.statistic ttr_var.append(stat.duration.var) ss += stat.degradation.statistic degradation_var.append(stat.degradation.var) sr += stat.degradation_ratio.statistic degradation_ratio_var.append(stat.degradation_ratio.var) if run_result.degradation_area: ttr_statistic.append(ts) degradation_statistic.append(ss) degradation_ratio_statistic.append(sr) downtime = None if downtime_statistic: downtime_mean = np.mean(downtime_statistic) se = math.sqrt((sum(downtime_var) + np.var(downtime_statistic)) / len(downtime_statistic)) downtime = types.MeanVar(downtime_mean, se) mttr = None if ttr_statistic: ttr_mean = np.mean(ttr_statistic) se = math.sqrt((sum(ttr_var) + np.var(ttr_statistic)) / len(ttr_statistic)) mttr = types.MeanVar(ttr_mean, se) degradation = None degradation_ratio = None if degradation_statistic: degradation = types.MeanVar(np.mean(degradation_statistic), np.mean(degradation_var)) degradation_ratio = types.MeanVar(np.mean(degradation_ratio_statistic), np.mean(degradation_ratio_var)) return types.SummaryResult(run_results=run_results, mttr=mttr, degradation=degradation, degradation_ratio=degradation_ratio, downtime=downtime)