def filter_zone_changes(old, new, pct_lim, zone_lim, sum_lim, evt_lim): print((" - showing all zones with pct_lim={:,d}, zone_lim={:s}, " + "sum_lim={:s} and evt_lim={:,d}\n").format(pct_lim, fmt_time(zone_lim), fmt_time(sum_lim), evt_lim)) out = [] for (zone, new_data) in new.items(): if zone in old.keys(): old_data = old[zone] old_evt = len(old_data) new_evt = len(new_data) if old_evt > 2 and new_evt > 2: if old_evt < evt_lim and new_evt < evt_lim: continue old_sum = sum(old_data) new_sum = sum(new_data) if old_sum < sum_lim and new_sum < sum_lim: continue # Quantiles returns an n-1 cutpoint list qq, so median is # at qq[4] and p90 is at qq[8]. old_qq = statistics.quantiles(old_data, n=10) new_qq = statistics.quantiles(new_data, n=10) m1 = chk_diff(old_qq[4], new_qq[4], pct_lim, zone_lim) m2 = chk_diff(old_qq[8], new_qq[8], pct_lim, zone_lim) m3 = chk_diff(old_sum, new_sum, pct_lim, sum_lim) m4 = chk_diff(old_evt, new_evt, pct_lim, evt_lim) if m1.flag or m2.flag or m3.flag or m4.flag: ms = Measures(median=m1, p90=m2, sum=m3, events=m4) z = "{} @ {}:{}".format(*zone) out.append(Changes(zone=z, measures=ms)) return out
def plot_iterations(score_history1, score_history2): """Plot iterations""" length_history = max(len(score_history1), len(score_history2)) py_mean1 = mean(score_history1).item() py_mean2 = mean(score_history2).item() print(py_mean1, py_mean2) quart1 = quantiles(score_history1, n=4) quart2 = quantiles(score_history2, n=4) print(quart1) print(quart2) x = list(range(1, length_history+1)) x1 = list(range(1, len(score_history1)+1)) x2 = list(range(1, len(score_history2)+1)) average1 = [py_mean1] * length_history average2 = [py_mean2] * length_history fig = plt.figure() ax = fig.add_subplot(111) #ax.scatter(x1, score_history1, color='red', marker='*', label='1st Line') #ax.scatter(x2, score_history2, color='blue', marker='x', label='2nd Line') ax.plot(x, average1, color='red', linewidth=2, label='1st Line Avg') ax.plot(x, average2, color='blue', linewidth=2, label='2st Line Avg') ax.plot(x, [quart1[0]]*length_history, color='red', linewidth=3, dashes=[5, 2, 1, 2]) ax.plot(x, [quart1[1]]*length_history, color='red', linewidth=1, dashes=[5, 2, 1, 2]) ax.plot(x, [quart1[2]]*length_history, color='red', linewidth=1, dashes=[5, 2, 1, 2]) ax.plot(x, [quart2[0]]*length_history, color='blue', linewidth=1, dashes=[5, 2, 1, 2]) ax.plot(x, [quart2[1]]*length_history, color='blue', linewidth=1, dashes=[5, 2, 1, 2]) ax.plot(x, [quart2[2]]*length_history, color='blue', linewidth=1, dashes=[5, 2, 1, 2]) ax.legend() ax.grid(True) ax.set(title='Lineup Comparison', ylabel='Score', xlabel='iterations') figure_name = 'iteration_history.png' plt.savefig(figure_name) print(f'Plot Created and saved as {figure_name}')
def read_text_file(filepath): with open(filepath, 'r') as txt_file: counter = 0 number = 1 graph_list = [] graph_matrix = [] round_values = [] for line in txt_file: counter += 1 splitted = line.split(",") graph_list.append(int(splitted[2])) graph_matrix.append(int(splitted[3])) if counter == 100: g_list = [ round(q, 1) for q in statistics.quantiles(graph_list, n=10) ] g_matrix = [ round(q, 1) for q in statistics.quantiles(graph_matrix, n=10) ] g_list_mean = statistics.mean(g_list) g_matrix_mean = statistics.mean(g_matrix) round_values.append([ int(splitted[0]), int(splitted[1]), g_list_mean, g_matrix_mean ]) if int(splitted[0]) % 20 == 0: print("|" + str(number) + "|" + str(splitted[0]) + "|" + str(splitted[1]) + "%|" + str(int(g_list_mean)) + "|" + str(int(g_matrix_mean)) + "|") number += 1 graph_list.clear() graph_matrix.clear() counter = 0 results.append([filepath.split(".")[0], round_values])
def test_secint(self): secint = mpc.SecInt() y = [1, 3, -2, 3, 1, -2, -2, 4] * 5 random.shuffle(y) x = list(map(secint, y)) self.assertEqual(mpc.run(mpc.output(mean(x))), round(statistics.mean(y))) self.assertEqual(mpc.run(mpc.output(variance(x))), round(statistics.variance(y))) self.assertEqual(mpc.run(mpc.output(variance(x, mean(x)))), round(statistics.variance(y))) self.assertEqual(mpc.run(mpc.output(stdev(x))), round(statistics.stdev(y))) self.assertEqual(mpc.run(mpc.output(pvariance(x))), round(statistics.pvariance(y))) self.assertEqual(mpc.run(mpc.output(pstdev(x))), round(statistics.pstdev(y))) self.assertEqual(mpc.run(mpc.output(mode(x))), round(statistics.mode(y))) self.assertEqual(mpc.run(mpc.output(median(x))), round(statistics.median(y))) self.assertEqual(mpc.run(mpc.output(median_low(x))), round(statistics.median_low(y))) self.assertEqual(mpc.run(mpc.output(median_high(x))), round(statistics.median_high(y))) self.assertEqual(mpc.run(mpc.output(quantiles(x[:2], n=3))), statistics.quantiles(y[:2], n=3)) self.assertEqual(mpc.run(mpc.output(quantiles(x, n=1))), statistics.quantiles(y, n=1)) self.assertEqual(mpc.run(mpc.output(quantiles(x))), statistics.quantiles(y)) x = list(range(16)) y = list(reversed(x)) self.assertAlmostEqual(covariance(x, y), -22.667, 3) x = list(map(secint, x)) y = list(map(secint, y)) self.assertEqual(mpc.run(mpc.output(covariance(x, y))), -23) self.assertRaises(ValueError, quantiles, x, method='wrong')
def test_secfxp(self): secfxp = mpc.SecFxp() x = [1, 1, 2, 2, 3, 4, 4, 4, 6] * 5 random.shuffle(x) x = list(map(secfxp, x)) self.assertAlmostEqual(mpc.run(mpc.output(mean(x))), 3, delta=1) self.assertAlmostEqual(mpc.run(mpc.output(median(x))), 3) self.assertAlmostEqual(mpc.run(mpc.output(mode(x))), 4) x = [1, 1, 1, 1, 2, 2, 3, 4, 4, 4, 4, 5, 6, 6, 6] * 100 random.shuffle(x) x = list(map(lambda a: a * 2**-4, x)) x = list(map(secfxp, x)) self.assertAlmostEqual(mpc.run(mpc.output(mean(x))), (2**-4) * 10/3, delta=1) y = [1.75, 1.25, -0.25, 0.5, 1.25, -3.5] * 5 random.shuffle(y) x = list(map(secfxp, y)) self.assertAlmostEqual(mpc.run(mpc.output(mean(x))), statistics.mean(y), 4) self.assertAlmostEqual(mpc.run(mpc.output(variance(x))), statistics.variance(y), 2) self.assertAlmostEqual(mpc.run(mpc.output(stdev(x))), statistics.stdev(y), 3) self.assertAlmostEqual(mpc.run(mpc.output(pvariance(x))), statistics.pvariance(y), 2) self.assertAlmostEqual(mpc.run(mpc.output(pstdev(x))), statistics.pstdev(y), 3) self.assertAlmostEqual(mpc.run(mpc.output(median(x))), statistics.median(y), 4) self.assertAlmostEqual(mpc.run(mpc.output(quantiles(x))), statistics.quantiles(y), 4) self.assertAlmostEqual(mpc.run(mpc.output(quantiles(x, method='inclusive'))), statistics.quantiles(y, method='inclusive'), 4) x = list(map(secfxp, [1.0]*10)) self.assertAlmostEqual(mpc.run(mpc.output(mode(x))), 1) k = mpc.options.sec_param mpc.options.sec_param = 1 # force no privacy case self.assertAlmostEqual(mpc.run(mpc.output(mode(x))), 1) mpc.options.sec_param = k x[0] = secfxp(1.5) self.assertRaises(ValueError, mode, x) x = [1, 2, 3, 4, 5, 6, 7, 8, 9] y = [1, 2, 3, 1, 2, 3, 1, 2, 3] self.assertEqual(covariance(x, y), 0.75) self.assertEqual(correlation(x, x), 1.0) self.assertAlmostEqual(correlation(x, y), 0.316, 3) self.assertEqual(linear_regression(x, y)[1], 1.5) x = list(map(secfxp, x)) y = list(map(secfxp, y)) self.assertEqual(mpc.run(mpc.output(covariance(x, y))), 0.75) self.assertAlmostEqual(mpc.run(mpc.output(correlation(x, x))), 1.0, 2) self.assertAlmostEqual(mpc.run(mpc.output(correlation(x, y))), 0.32, 2) self.assertAlmostEqual(mpc.run(mpc.output(linear_regression(x, y)[1])), 1.5, 2) x = [1, 2, 3, 4, 5, 6, 7, 8, 9] y = [9, 8, 7, 6, 5, 4, 3, 2, 1] self.assertEqual(covariance(x, y), -7.5) self.assertEqual(correlation(x, y), -1.0) self.assertEqual(linear_regression(x, y)[1], 10.0) x = list(map(secfxp, x)) y = list(map(secfxp, y)) self.assertAlmostEqual(mpc.run(mpc.output(covariance(x, y))), -7.5, 2) self.assertAlmostEqual(mpc.run(mpc.output(correlation(x, y))), -1.0, 2) self.assertAlmostEqual(mpc.run(mpc.output(linear_regression(x, y)[1])), 10.0, 2)
def process(fqp, resultsfile): # gather the max per line of file of round 1 prev_fqp = fqp.replace("Round2", "Round1") r1max = [] with open(prev_fqp, "r") as csvfile: datareader = csv.reader(csvfile, delimiter=',') titles = next(datareader) total_pos = [_ for _, y in enumerate(titles) if y == "Total"] for row in datareader: r1max.append(max([float(row[_]) for _ in total_pos])) print(r1max) # parse file of round 2 threads = -1 category = -1 senders = -1 totals = [] with open(fqp, "r") as csvfile: datareader = csv.reader(csvfile, delimiter=',') titles = next(datareader) total_pos = [_ for _, y in enumerate(titles) if y == "Total"] node_pos = [_ for _, y in enumerate(titles) if y.startswith("Node")] for row in datareader: if threads == -1: threads = int(row[1]) category = row[0][0] senders = [row[_] for _ in node_pos].count("sending") prev_max = r1max.pop(0) totals.extend([float(row[_]) + prev_max for _ in total_pos]) nodes = len(node_pos) ## calculate statistics mind = min(totals) q1 = quantiles(totals)[0] medi = median(totals) avrg = mean(totals) q3 = quantiles(totals)[2] maxd = max(totals) ## write results if not DEBUG: with open(resultsfile, "a") as f: f.write( f"{category},{nodes},{threads},{senders},{mind},{q1},{medi},{avrg},{q3},{maxd}\n" ) with open(resultsfile.replace(".csv", "all_totals.csv"), "a") as f: f.write(f"{category},{nodes},{threads},{senders}," + ",".join(map(str, totals)) + "\n") print( f"{category},{nodes},{threads},{senders},{mind},{q1},{medi},{avrg},{q3},{maxd}" )
def __init__(self, setOfNumbers=[1, 2, 3, 4, 5]): self.setOfNumbers = setOfNumbers self.mean = statistics.mean(self.setOfNumbers) self.mode = statistics.mode(self.setOfNumbers) self.median = statistics.median(self.setOfNumbers) self.standardDeviation = statistics.stdev(self.setOfNumbers) self.variance = statistics.variance(self.setOfNumbers) self.interquartileRange = statistics.quantiles( self.setOfNumbers)[2] - statistics.quantiles(self.setOfNumbers)[0]
def output_file(sample_id, results): out_dir = f'/data/scratch/DMP/UCEC/UROTRBIO/slise/ROSETREES/{patID}/HAP_BAF' if binType == 'by_snp': if n_snp_bin >= 1000: int_size_f = f"{n_snp_bin//1000:d}Ksnp" else: int_size_f = f"{n_snp_bin}snp" elif binType == 'by_bp': pass else: exit('Problems') out_file = f'{out_dir}/{sample_id}.{array}.{phasingMethod}.{int_size_f}.tsv' if covGL: out_file = f'{out_dir}/{sample_id}.{array}.GLIMPSE.{int_size_f}.tsv' try: fout = open(out_file, 'w') except: exit(f"Can not open: {out_file}") print('chrom', 'bin_start', 'bin_end', 'n_snp', 'int_size', 'af_l', 'af_r', 'daf', sep="\t", file=fout) hap_bin_size = [] daf_bin_list = [] i_bin = 0 for out in results: (chrom, pos_start, pos_end, n_snp, int_size, af_l, af_r, daf_bin) = out print(*out, sep="\t", file=fout) if (n_snp > n_snp_min) and (int_size < max_interval_size): hap_bin_size.append(int_size) daf_bin = float(daf_bin) daf_bin_list.append(daf_bin) # if daf_bin > 0.03 or daf_bin < -0.03: # print(*[i_bin,*out],sep="\t") i_bin += 1 fout.close() print(statistics.mean(hap_bin_size), statistics.stdev(hap_bin_size), max(hap_bin_size), min(hap_bin_size), len(hap_bin_size)) print(statistics.quantiles(hap_bin_size, n=10)) print(statistics.mean(daf_bin_list), statistics.stdev(daf_bin_list), max(daf_bin_list), min(daf_bin_list), len(daf_bin_list)) print(statistics.quantiles(daf_bin_list, n=10))
def _calc_ms_mg_quantiles(mg_ms_it: Iterator[Tuple[str, float, float]], n=100) -> [Tuple[List[float], List[float]]]: """ 根据输入的全市场盈利能力成长性指标和盈利能力稳定性指标,计算并返回分位表 :param mg_ms_it: 迭代器,每个元素中第一项是公司代码,第二项为MG,第三项为MS :return: 元组,第一项为盈利能力成长性指标的分位表,长度n+1; 第二项为盈利能力稳定性指标的分位表,长度n+1; 第三项是输入参数的拷贝 """ it1, it2, it3 = tee(mg_ms_it, 3) return quantiles([value[1] for value in it1], n=n), \ quantiles([value[2] for value in it2], n=n), \ it3
def get_metrics(request_info: List[RequestInfo]) -> str: """ Generates metrics of request times and returns a string. """ if len(request_info) < 2: return "Two or more successful requests needed to generate metrics." response_times = [item.total_time for item in request_info] mean_response = mean(response_times) median_response = median(response_times) ninetieth_percentile = quantiles(response_times, n=10)[-1] rounding_factor = 3 mean_millis = round(mean_response * 1000, rounding_factor) median_millis = round(median_response * 1000, rounding_factor) ninetieth_percentile_millis = round(ninetieth_percentile * 1000, rounding_factor) output_summary = textwrap.dedent(f"""\ Mean response time = {mean_millis}ms Median response time = {median_millis}ms 90th percentile of response times = {ninetieth_percentile_millis}ms""" # noqa: E501 ) return output_summary
def print_metric_stats(self, users, key, f): self.print_line(f, [ '\tmean:\t', statistics.mean([u.original_metrics[key] for u in users]) ]) self.print_line(f, [ '\tmedian:\t', statistics.median([u.original_metrics[key] for u in users]) ]) self.print_line(f, [ '\tpopulation std dev:\t', statistics.pstdev([u.original_metrics[key] for u in users]) ]) quantiles_n = 100 quantiles = statistics.quantiles( [u.original_metrics[key] for u in users], n=quantiles_n) quantile_intervals = [(i + 1) * 100 / quantiles_n for i in range(quantiles_n - 1)] self.print_line(f, [ '\tquantile intervals:\t', quantile_intervals[int(quantiles_n / 10) - 1::int(quantiles_n / 10)] ]) self.print_line(f, [ '\tquantiles:\t', quantiles[int(quantiles_n / 10) - 1::int(quantiles_n / 10)] ]) plt.bar(quantile_intervals, quantiles) plt.savefig(key + '_quantile_bar.png') plt.clf()
def participation_per_group(): """ Returns a dictionary with information for each group { group: { 'participating': int, # number of participating users 'acc': int, # no. users with at least one AC submission 'all': int # total no. users 'avg': float # avg. submission per participating user 'stdev': float # stdev of submissions per participating user 'quantiles': float # cut points 0%-25%-50%-75%-100% of submissions per participating user } } Users are considered "participating" if they have sent one submission, and only non-staff and active users are counted. """ participating = dict() for group in Group.objects.all(): users = group.user_set.filter(is_staff=False, is_active=True) participating_count = Submission.objects.filter(user__in=users).order_by('user').distinct('user').count() acc_count = (Submission.objects.filter(verdict_code=VerdictCode.AC, user__in=users).order_by('user') .distinct('user').count()) # Statistics of submissions per user subs_per_user = (Submission.objects.filter(user__in=users).values('user').annotate(count=Count('user'))) list_num_subs = [entry['count'] for entry in subs_per_user] participating[group.name] = { 'participating': participating_count, 'all': users.count(), 'acc': acc_count, 'avg': mean(list_num_subs), 'stdev': stdev(list_num_subs), 'quantiles': ' - '.join(map(str, [min(list_num_subs)] + quantiles(list_num_subs) + [max(list_num_subs)])), } return participating
def is_overloaded(self): # check window time window_cutoff = time.time()-self.window_time i = 0 for i,t in enumerate(self.times): if t >= window_cutoff: break if i > 0: logging.debug('routestats: removing %d entries due to age', i) self.data = deque((self.data[j] for j in range(i,len(self.data))), maxlen=self.window_size) self.times = deque((self.times[j] for j in range(i,len(self.times))), maxlen=self.window_size) # check if we have enough data to form stats if len(self.data) < 4: return False # now check stats median = 0 try: stats = statistics.quantiles(self.data) logging.debug('routestats: %r',stats) if stats[1] >= self.timeout or stats[2] >= 2*self.timeout: median = stats[1] except AttributeError: med = statistics.median(self.data) logging.debug('routestats: %r',med) if med >= self.timeout: median = med return median > 0 and random.random()*median >= self.timeout
def reportSolutionStatistics(verdicts: typing.List[Const.Verdict], dtDistribution: typing.List[float], quantilesCount: int = 4) -> None: """ Report statistics based on verdicts and dt distribution. """ # Brief report first logger.info( "Verdict brief: %s", " / ".join( "%s %g%%" % (verdict.name, 1e2 * verdicts.count(verdict) / len(verdicts)) for verdict in Const.Verdict)) if len(dtDistribution) > 1: dtQuantiles = statistics.quantiles(dtDistribution, n=quantilesCount) dtQuantiles = [min(dtDistribution)] + \ dtQuantiles + [max(dtDistribution)] logger.info( "DT brief (not precise): %s", " / ".join("Q%d %gs" % (i, dtQuantiles[i]) for i in range(len(dtQuantiles)))) # Detail individuals logger.debug("Verdicts: [%s]", ", ".join(v.name for v in verdicts)) logger.debug("DT distribution: [%s]", ", ".join("%gs" % (dt, ) for dt in dtDistribution))
def display_stats_for_enemies_in_match(self, match_id: str) -> None: team_avg_kd, player_to_kd_dict = self.pull_stats_for_enemies_in_match(match_id) if not team_avg_kd or not player_to_kd_dict: return all_player_kd = player_to_kd_dict.values() min_kd_in_match = min(all_player_kd) max_kd_in_match = max(all_player_kd) best_player = max(player_to_kd_dict, key=lambda x: player_to_kd_dict[x]) avg_kd_in_match = round(sum(all_player_kd) / len(all_player_kd), 2) std_dev_of_indiv_kd = round(pstdev(all_player_kd), 4) quantiles_breakdown = quantiles(all_player_kd, method="inclusive") avg_kd_of_teams = round(sum(team_avg_kd) / len(team_avg_kd), 2) kd_top_15 = team_avg_kd[:15] print( f""" Match {match_id} Player Stats There is data for {self.total_players_in_match - self.skipped_players}/{self.total_players_in_match} players The average K/D for all players is {avg_kd_in_match} and the standard deviation is {std_dev_of_indiv_kd} The lowest K/D is {min_kd_in_match} 25% Percentile: {quantiles_breakdown[0]} 50% Percentile: {quantiles_breakdown[1]} 75% Percentile: {quantiles_breakdown[2]} The best K/D is {max_kd_in_match} ({best_player}) There is data for {len(team_avg_kd)} teams (Expected {self.warzone_match_data.metadata.team_count} teams) The average K/D of all teams is {avg_kd_of_teams} """ ) for idx, top_15_team in enumerate(kd_top_15): print(f"Team #{idx + 1}'s K/D is {top_15_team}")
def makePondDict(self, variable, viablePonds, quantiles=2, n=0, breakpoint=None): pondDict = {} variableValues = [float(p.getPondStat(variable)) for p in viablePonds] if breakpoint: breakpoints = [ breakpoint, ] else: breakpoints = statistics.quantiles(variableValues, n=quantiles) print(breakpoints) for i in viablePonds: if float(i.getPondStat(variable)) <= breakpoints[0]: pondDict[i.getPondName()] = [ i.getHighestCov(n), "low", float(i.getPondStat(variable)) ] elif float(i.getPondStat(variable)) >= breakpoints[-1]: pondDict[i.getPondName()] = [ i.getHighestCov(n), "high", float(i.getPondStat(variable)) ] return (pondDict)
def iqr(*data): "Interquartile range" if len(data) == 1 and isinstance(data[0], (list, tuple)): data = data[0] q = stat.quantiles(data) return q[2] - q[0]
def team_stats(team_members): from statistics import mean, quantiles mean_age = mean([member['age'] for member in team_members]) print(f"Team members are {mean_age} years old, on average") scores = [member['score'] for member in team_members] q1, q2, q3 = quantiles(scores, n=4) print( f"Team's median score is {q2}, whereas the interquartile range is [{q1:.2f}, {q3:.2f}]" ) # Option 1 for max_player # max_player = team_members[0] # for player in [member for member in team_members if member['age'] < 21]: # if player['score'] > max_player['score']: # max_player = player # Option 2 for max_player # max_player = max([member for member in team_members if member['age'] < 21], key=get_member_score) # Option 2.1 for max_player max_player = max([member for member in team_members if member['age'] < 21], key=itemgetter('score')) print( f"{max_player['name']} is the best player among those under 21 years of age" )
def main(): # get command-line args cmd_args = commandLineParser() # bring in station distances st_coords = fp.precipFileParser('./resources/precip_data/precip.1977', [4, 8], return_coords=True) raw_distances_list = fp.shapeFileParser(cmd_args.shapefile_path, st_coords, cmd_args, testing=cmd_args.testing) # drop the ones at the origin clust_nums = cp.interpretOriginLog('origin_log.csv') os.system('rm origin_log.csv') clust_indices = [clust_num - 1 for clust_num in clust_nums] distances_list = [raw_dist for index, raw_dist in enumerate(raw_distances_list) if index not in clust_indices] # sort each location for lst in distances_list: lst.sort() # create a list of the distance to have cmd_args.num_stations captured minimum_distances = [lst[cmd_args.num_stations - 1] for lst in distances_list] # output dataframe df = pd.DataFrame(minimum_distances, columns=['Distances']) df.to_csv('distances.csv', index=False) # terminal output quartiles = [round(i, 2) for i in statistics.quantiles(minimum_distances)] print(f'Q1: {quartiles[0]}\nMedian: {quartiles[1]}\nQ3: {quartiles[2]}') print(f'Mean: {round(statistics.mean(minimum_distances), 2)}') print(f'Max: {round(max(minimum_distances), 2)}') # histogram output bin_num = int(1 + 3.322*math.log10(len(minimum_distances))) plt.hist(minimum_distances, weights=np.ones(len(minimum_distances)) / len(minimum_distances), bins=bin_num) plt.xlabel('Distance Value') plt.title('Determine Distance') plt.ylabel('Percentage of total DHS Clusters') plt.gca().yaxis.set_major_formatter(PercentFormatter(1)) plt.show()
def percentiles(self, durations: list): if len(durations) == 0: return 0, 0 if len(durations) == 1: return durations[0], durations[0] percentiles = statistics.quantiles(durations, n=100) return percentiles[80 - 2], percentiles[95 - 2]
def processRollResults(summonerLevels, numTests): objShell = win32com.client.Dispatch("WScript.Shell") myDocuments = objShell.SpecialFolders("MyDocuments") fileName = myDocuments + r"C:/Users/OQA597/OneDrive - SUEZ/Documents/testResults" + str( numTests) + "Tests.csv" with open(fileName, "w") as outputFile: outputFile.write( f"Results from {numTests} tests for each data point\n") outputFile.write( "Summoner Level,Champion Tier,Champions Wanted,Champions Taken,Champions Missing,Average,Median,Standard Deviation,Q1-Q2Division,Q2-Q3Division,Q3-Q4Division\n" ) for (summonerLevel, championsWantedList) in enumerate(summonerLevels): for (championsWanted, champTierList) in enumerate(championsWantedList): for (championTier, champsTakenList) in enumerate(champTierList): for (championsTaken, championsMissingList) in enumerate(champsTakenList): for (championsMissingInTier, rollList) in enumerate(championsMissingList): average = statistics.mean(rollList) median = statistics.median(rollList) stdev = statistics.stdev(rollList) quantiles = statistics.quantiles( rollList, method="inclusive") outputFile.write( f"{summonerLevel+1},{championTier+1},{championsWanted+1},{championsTaken},{championsMissingInTier},{average},{median},{stdev},{quantiles[0]},{quantiles[1]},{quantiles[2]}\n" )
def test_plain(self): f = lambda: (i * j for i in range(-1, 2, 1) for j in range(2, -2, -1)) self.assertEqual(mean(f()), statistics.mean(f())) self.assertEqual(variance(f()), statistics.variance(f())) self.assertEqual(stdev(f()), statistics.stdev(f())) self.assertEqual(pvariance(f()), statistics.pvariance(f())) self.assertEqual(pstdev(f()), statistics.pstdev(f())) self.assertEqual(mode(f()), statistics.mode(f())) self.assertEqual(median(f()), statistics.median(f())) self.assertEqual(quantiles(f()), statistics.quantiles(f())) self.assertEqual(quantiles(f(), n=6, method='inclusive'), statistics.quantiles(f(), n=6, method='inclusive')) x = list(f()) y = list(reversed(x)) self.assertEqual(covariance(x, y), statistics.covariance(x, y)) self.assertEqual(correlation(x, y), statistics.correlation(x, y)) self.assertEqual(linear_regression(x, y), statistics.linear_regression(x, y))
def __init__( self, throughput_metrics: List[int], latency_metrics: List[int], execution_metrics: List[ProcessStats], ): self.average_throughput = mean(throughput_metrics) self.average_latency = mean(latency_metrics) self.max_throughput = max(throughput_metrics) self.max_latency = max(latency_metrics) self.min_throughput = min(throughput_metrics) self.min_latency = min(latency_metrics) self.ninety_fifth_percentile_throughput = quantiles(throughput_metrics, n=20)[-1] self.ninety_fifth_percentile_latency = quantiles(latency_metrics, n=20)[-1] self.number_of_threads_run = len(execution_metrics) self.execution_info = execution_metrics
async def log_latency(interval): while True: await asyncio.sleep(interval) p = statistics.quantiles(LATENCIES.values(), n=100) print(f"clients = {len(LATENCIES)}") print(f"p50 = {p[49] / 1e6:.1f}ms, " f"p95 = {p[94] / 1e6:.1f}ms, " f"p99 = {p[98] / 1e6:.1f}ms") print()
def percentile(values: List[float], percent: float): """ Find the percentile of a list of values :param values: list of values :param percent: float value from 0.0 to 1.0 :return: the percentile of the values """ return statistics.quantiles(values, n=100, method="inclusive")[int(percent * 100 - 1)]
def analyse(results: dict): """Analyses sending data test results in form of a dictionary of lists.""" for desc, results_list in results.items( ): # desc = key, results_list = item mode = statistics.mode(results_list) print(f"{desc}: Mode = {mode}") average = statistics.mean(results_list) print(f"{desc}: Average = {average}") standard_deviation = statistics.stdev(results_list) print(f"{desc}: Standard deviation = {standard_deviation}") quartiles = statistics.quantiles(results_list, method='inclusive') print(f"{desc}: Quartiles = {quartiles}") iqr = quartiles[2] - quartiles[0] if standard_deviation: skewness_mode = (average - mode) / standard_deviation skewness_median = 3 * (average - quartiles[1]) / standard_deviation print(f"{desc}: Pearson skewness (mode) = {skewness_mode}") print(f"{desc}: Pearson skewness (median) = {skewness_median}") else: print("Skewness = 0") fig = plt.figure() grid = GridSpec(5, 1, figure=fig) ax_box = fig.add_subplot(grid[0, 0]) ax_hist = fig.add_subplot(grid[1:, 0]) ax_box.axis('off') ax_hist.grid() ax_box.get_shared_x_axes().join(ax_box, ax_hist) fig.suptitle(f"{desc}: transmissions") ax_hist.set_xlabel(f"Number of packets sent: {desc}") ax_hist.set_ylabel("Occurences") q0 = quartiles[0] - 1.5 * iqr q4 = quartiles[2] + 1.5 * iqr # boxplot and histogram ax_box.boxplot([q0, quartiles[0], quartiles[1], quartiles[2], q4], vert=False) hist_bins = np.arange(min(results_list), max(results_list) + 1, 1) if len(hist_bins) > 20: hist_bins = 20 counts, bins, bars = ax_hist.hist(results_list, bins=hist_bins) x_data = [] for i in range(len(bins) - 1): x_data.append((bins[i] + bins[i + 1]) / 2) y_data = counts try: params, params_cov = opt.curve_fit( gauss_function, x_data, y_data, p0=[max(y_data), quartiles[1], iqr / 1.349]) print(f"{desc}: Gauss parameters (a, mu, sigma): {params}") ax_hist.plot( x_data, gauss_function(x_data, params[0], params[1], params[2])) except Exception as e: print("Couldn't estimate Gauss function") plt.waitforbuttonpress()
def read_text_file(filepath): with open(filepath, 'r') as txt_file: my_list = [tuple(map(float, i.split(','))) for i in txt_file] graph_name = txt_file.name.split("\\")[7].split(".")[0] list_quantity = [] list_time = [] for par in my_list: list_quantity.append(par[0]) list_time.append(par[1]) list_time = [ round(q, 1) for q in statistics.quantiles(list_time, n=10) ] list_quantity = [ round(q, 1) for q in statistics.quantiles(list_quantity, n=10) ] pars = [] for i in range(0, 9): pars.append([list_quantity[i], list_time[i]]) draw_graph([list_quantity, list_time], graph_name)
def floodlight_analysis( config, task: dict, rows: Generator[list[str, str, str, str, str, str, int], None, None] ) -> list[str, list[str, str, str, str, str, str, int, str]]: """ Perform outlier analysis and return last row by date with satatus indicator. Groups all floodlight data by activity, checking for ourliers using. See: http://www.mathwords.com/o/outlier.htm Args: rows - A stream of rows, see FLOODLIGHT_* constants for definitions. Returns: A date string for the last date as well as the last row for each activity with status appended (LOW, HIGH, NORMAL). Possibly None, None if no rows. """ outliers_today = [] activities = {} for row in rows: activities.setdefault(row[FLOODLIGHT_ACTIVITY_ID], []).append(row) for activity in activities.values(): data = sorted(activity, key=lambda k: k[FLOODLIGHT_IMPRESSIONS]) quartile_1, quartile_median, quartile_3 = quantiles(map( lambda d: d[FLOODLIGHT_IMPRESSIONS], data), n=4) quartile_range = quartile_3 - quartile_1 outlier_top = quartile_3 + (1.5 * quartile_range) outlier_bottom = quartile_1 - (1.5 * quartile_range) last_day = max(data, key=lambda k: k[FLOODLIGHT_DATE]) if last_day[FLOODLIGHT_IMPRESSIONS] == 0 or last_day[ FLOODLIGHT_IMPRESSIONS] < outlier_bottom: last_day.append('LOW') elif last_day[FLOODLIGHT_IMPRESSIONS] > outlier_top: last_day.append('HIGH') else: last_day.append('NORMAL') outliers_today.append(( last_day[FLOODLIGHT_DATE], last_day[FLOODLIGHT_CONFIG_ID], last_day[FLOODLIGHT_ACTIVITY_ID], last_day[FLOODLIGHT_ACTIVITY], last_day[FLOODLIGHT_IMPRESSIONS], last_day[FLOODLIGHT_STATUS], )) if len(outliers_today) > 0: return outliers_today[0][FLOODLIGHT_DATE], outliers_today else: return None, None
def _append_reduction(self, fun_list): '''Reduce column and append result to foot of table fun is the name, func is the callable. first see if this is the name of something in stats or something from builtins that we like, if none of those ignore it with msg ''' if not fun_list: fun_list = 'total' else: fun_list = fun_list.replace('summary', 'min q25 median mean q75 max') for fun in (f.lower() for f in fun_list.split()): if hasattr(statistics, fun): func = getattr(statistics, fun) elif fun == "q25" and hasattr(statistics, "quantiles"): func = lambda data: statistics.quantiles(data, n=4)[0] elif fun == "q75" and hasattr(statistics, "quantiles"): func = lambda data: statistics.quantiles(data, n=4)[2] elif fun == "q95" and hasattr(statistics, "quantiles"): func = lambda data: statistics.quantiles(data, n=20)[-1] elif fun in "min max all any sum".split(): func = getattr(builtins, fun) elif fun == "prod" and hasattr(math, "prod"): func = math.prod elif fun == "total": func = builtins.sum else: self.messages.append(f'? {fun}') continue footer = [] for c in range(self.cols): booleans, values = zip(*self.column(c)) decimals = list(itertools.compress(values, booleans)) if not any(booleans) or (c == 0 and looks_like_sequence(decimals)): footer.append(fun.title()) else: footer.append(func(decimals)) self.append(footer)
def _stat_quantiles(self) -> StateType: if len(self.states) > self._quantile_intervals: return str([ round(quantile, self._precision) for quantile in statistics.quantiles( self.states, n=self._quantile_intervals, method=self._quantile_method, ) ]) return None