def get_timer_groups(merged_log): # get the stats by timer group stats_lists = [line.split(',') for line in merged_log] uniq_timer_groups = list( set((stats_list[10].strip() for stats_list in stats_lists))) timer_group_stats = {} for timer_group in uniq_timer_groups: elapsed_times = [] for stat_list in stats_lists: if timer_group == stat_list[10].strip(): if stat_list[ 5] == '200': # just concerned with valid responses elapsed_times.append(stat_list[8]) stats = corestats.Stats(elapsed_times) stat_group = [ stats.count(), stats.avg(), stats.stdev(), stats.min(), stats.percentile(50), stats.percentile(80), stats.percentile(90), stats.percentile(95), stats.percentile(99), stats.max() ] timer_group_stats[timer_group] = stat_group return timer_group_stats
def find_outliers(vals): """Calculates the upper and lower bounds of a set of sample/case values >>> find_outliers([3.504, 5.234, 6.123, 7.234, 3.542, 5.341, 7.852, 4.555, 12.537]) (11.252500000000001, 0.5364999999999993) >>> >>> find_outliers([9,12,15,17,31,50,7,5,6,8]) (32.0, -8.0) If there are no vals, returns None for the upper and lower bounds, which code that calls it will have to deal with. >>> find_outliers([]) (None, None) """ logger.debug("xerxes vals is:", pf(vals)) if vals: #logger.debug("vals is:", pf(vals)) stats = corestats.Stats(vals) low_hinge = stats.percentile(25) up_hinge = stats.percentile(75) hstep = 1.5 * (up_hinge - low_hinge) upper_bound = up_hinge + hstep lower_bound = low_hinge - hstep else: upper_bound = None lower_bound = None logger.debug(pf(locals())) return upper_bound, lower_bound
def trainForAllFeatures(self): #此函数是用来检验去掉低增益的特征后模型的效果,因此在使用前务必将main()函数中的fileName选为'./features_nomain.csv'(辅眼为准的滤波) w, h = self.datafortrain.shape kernels = ['linear', 'poly', 'rbf'] for j in range(3): LCC_rbf = []; SROCC_rbf = []; RMSE_rbf = [] for i in range(2000): print u"第%d次训练" % i randlist = range(w) np.random.shuffle(randlist) trainindex = randlist[0:60] testindex = randlist[60:w] # if j not in basecolumn: # basecolumn.append(j) # if j==0: # basecolumn.remove(j) train_feature = self.datafortrain[trainindex, :] train_feature = train_feature[:, 1:] train_label = self.datafortrain[trainindex, 0] test_feature = self.datafortrain[testindex, :] test_feature = test_feature[:, 1:] test_label = self.datafortrain[testindex, 0] '''============= train and predict by svr with rbf kernel ===============''' # rbf kernel clf = svm.SVR(kernel=kernels[j], C=1000, gamma=0.1) pred = clf.fit(train_feature, train_label).predict(test_feature) # rbf metrics srocc, p1 = statstool.spearmanr(pred, test_label) plcc, p2 = statstool.pearsonr(pred, test_label) rmse = sqrt(mean_squared_error(pred, test_label)) LCC_rbf.append(plcc); SROCC_rbf.append(srocc); RMSE_rbf.append(rmse) sts_lcc = corestats.Stats(LCC_rbf) sts_srocc = corestats.Stats(SROCC_rbf) sts_rmse = corestats.Stats(RMSE_rbf) print "all_feature metrics %s:plcc:%f,srocc:%f,rmse:%f " % ( kernels[j], sts_lcc.avg(), sts_srocc.avg(), sts_rmse.avg())
def main(): try: optlist, args = getopt.getopt(sys.argv[1:], 'd:h', ["deadline=", "help"]) except getopt.GetoptError as err: print str(err) sys.exit(2) deadline = 0 deadline_miss = 0 for opt, val in optlist: if opt in ("-h", "--help"): print args[0] + " [-d <deadline (ms)>]" elif opt in ("-d", "--deadline"): deadline = float(val) else: assert False, "unhandled option" print "deadline: ", deadline file1 = open(args[0], 'r') items = [] while (True): line = file1.readline() if not line: break tokens = line.split() # print tokens try: num = float(tokens[0]) except ValueError: break items[len(items):] = [num] if deadline > 0 and num > deadline: deadline_miss += 1 stats = corestats.Stats(items) print print "----[", args[0], "]---" print "count: ", stats.count() print "deadline miss: ", deadline_miss print "deadline miss ratio: (%f)" % (float(deadline_miss) / stats.count()) print "min: ", stats.min() print "avg: ", stats.avg() print "90pctile: ", stats.percentile(90) print "95pctile: ", stats.percentile(95) print "99pctile: ", stats.percentile(99) #print "median: ", stats.median() print "max: ", stats.max() print "stdev: ", stats.stdev() #avg min max 99pctile print "LINE(avg|min|max|99pct|stdev): ", stats.avg(), \ stats.min(), stats.max(), stats.percentile(99), stats.stdev()
def stats_per_second(self, *args): superstats = [] for game_log, matrix in args: cstats = corestats.Stats() stats = {} mode = cstats.mode(matrix) stats['mode'] = mode[0][0] stats['modenext'] = mode[1][0] stats['mean'] = cstats.mean(matrix) stats['median'] = cstats.median(matrix) #stats['harmonicmean'] = mstats.harmonicmean(matrix) stats['variance'] = cstats.variance(matrix) stats['stddeviation'] = stats['variance']**0.5 stats['3sigma'] = 3 * stats['stddeviation'] stats['cumfreq'] = mstats.cumfreq(matrix) stats['itemfreq'] = mstats.itemfreq( matrix ) # frequency of each item (each item being the count of the occurrencies for each number of lines per second) stats['min'] = min(matrix) stats['max'] = max(matrix) stats['samplespace'] = stats['max'] - stats['min'] stats['count'] = len(matrix) stats['kurtosis'] = mstats.kurtosis(matrix) stats['perfectvalue'] = int( math.ceil(stats['3sigma'] + stats['mean'])) stats['perfectscore'] = cstats.percentileforvalue( matrix, math.ceil(stats['3sigma'] + stats['mean'])) scorepercentiles = [ 10, 30, 50, 70, 80, 85, 90, 95, 99, 99.9, 99.99 ] stats['itemscore'] = [ (percentile, cstats.valueforpercentile(matrix, percentile)) for percentile in scorepercentiles ] stats['skew'] = mstats.skew( matrix ) # if positive, there are more smaller than higher values from the mean. If negative, there are more higher than smaller values from the mean. if stats['skew'] > 0: stats[ 'skewmeaning'] = 'There exist more smaller values from the mean than higher' else: stats[ 'skewmeaning'] = 'There exist more higher values from the mean than smaller' superstats.append((game_log, stats)) return superstats
def generate_results(dir, test_name): print '\nGenerating Results...' try: merged_log = open(dir + '/agent_stats.csv', 'rb').readlines( ) # this log contains commingled results from all agents except IOError: sys.stderr.write('ERROR: Can not find your results log file\n') merged_error_log = merge_error_files(dir) if len(merged_log) == 0: fh = open(dir + '/results.html', 'w') fh.write( r'<html><body><p>None of the agents finished successfully. There is no data to report.</p></body></html>\n' ) fh.close() sys.stdout.write( 'ERROR: None of the agents finished successfully. There is no data to report.\n' ) return timings = list_timings(merged_log) best_times, worst_times = best_and_worst_requests(merged_log) timer_group_stats = get_timer_groups(merged_log) timing_secs = [int(x[0]) for x in timings] # grab just the secs (rounded-down) throughputs = calc_throughputs(timing_secs) # dict of secs and throughputs #save throughputs to file fh = open('%s/agent_throughputs.csv' % dir, 'w') fh.close() for q_tuple in throughputs: through = (q_tuple, throughputs[q_tuple]) f = open('%s/agent_throughputs.csv' % dir, 'a') f.write('%s,%f\n' % through) # log as csv f.flush() f.close() throughput_stats = corestats.Stats(throughputs.values()) resp_data_set = [x[1] for x in timings] # grab just the timings response_stats = corestats.Stats(resp_data_set) # calc the stats and load up a dictionary with the results stats_dict = get_stats(response_stats, throughput_stats) # get the pickled stats dictionaries we saved runtime_stats_dict, workload_dict = load_dat_detail(dir) # get the summary stats and load up a dictionary with the results summary_dict = {} summary_dict['cur_time'] = time.strftime('%m/%d/%Y %H:%M:%S', time.localtime()) summary_dict['duration'] = int(timings[-1][0] - timings[0][0]) + 1 # add 1 to round up summary_dict['num_agents'] = workload_dict['num_agents'] summary_dict['req_count'] = len(timing_secs) summary_dict['err_count'] = len(merged_error_log) summary_dict['bytes_received'] = calc_bytes(merged_log) # write html report fh = open(dir + '/results.html', 'w') reportwriter.write_head_html(fh) reportwriter.write_starting_content(fh, test_name) reportwriter.write_summary_results(fh, summary_dict, workload_dict) reportwriter.write_stats_tables(fh, stats_dict) reportwriter.write_images(fh) reportwriter.write_timer_group_stats(fh, timer_group_stats) reportwriter.write_agent_detail_table(fh, runtime_stats_dict) reportwriter.write_best_worst_requests(fh, best_times, worst_times) reportwriter.write_closing_html(fh) fh.close() try: # graphing only works on systems with Matplotlib installed print 'Generating Graphs...' import graph graph.resp_graph(timings, dir=dir + '/') graph.tp_graph(throughputs, dir=dir + '/') except: sys.stderr.write('ERROR: Unable to generate graphs with Matplotlib\n') print '\nDone generating results. You can view your test at:' print '%s/results.html\n' % dir
def lowgaininthebasicfeatures(self): w, h = self.datafortrain.shape kernels = ['linear', 'poly', 'rbf'] LCC = []; SROCC = []; RMSE = []; baseplcc = 0; basesrocc = 0; basermse = 0 for j in [0, 1, 2, 3, 4, 5, 7, 13, 14, 15]: print j LCC_rbf = []; SROCC_rbf = []; RMSE_rbf = [] for i in range(2000): randlist = range(w) np.random.shuffle(randlist) trainindex = randlist[0:60] testindex = randlist[60:w] basecolumn = [6, 7, 8, 9, 10, 11, 12, 13, 15, 14] if j not in basecolumn: basecolumn.append(j) if j == 0: basecolumn.remove(j) train_feature = self.datafortrain[trainindex, :] train_feature = train_feature[:, basecolumn] train_label = self.datafortrain[trainindex, 0] test_feature = self.datafortrain[testindex, :] test_feature = test_feature[:, basecolumn] test_label = self.datafortrain[testindex, 0] '''============= train and predict by svr with rbf kernel ===============''' # rbf kernel clf = svm.SVR(kernel='rbf', C=1000, gamma=1 / 77) pred = clf.fit(train_feature, train_label).predict(test_feature) # rbf metrics srocc, p1 = statstool.spearmanr(pred, test_label) plcc, p2 = statstool.pearsonr(pred, test_label) rmse = sqrt(mean_squared_error(pred, test_label)) LCC_rbf.append(plcc); SROCC_rbf.append(srocc); RMSE_rbf.append(rmse) sts_lcc = corestats.Stats(LCC_rbf) sts_srocc = corestats.Stats(SROCC_rbf) sts_rmse = corestats.Stats(RMSE_rbf) LCC.append(sts_lcc.avg()); SROCC.append(sts_srocc.avg()); RMSE.append(sts_rmse.avg()) if j == 0: baseplcc = sts_lcc.avg(); basesrocc = sts_srocc.avg(); basermse = sts_rmse.avg() else: basecolumn.remove(j) #print "all_feature metrics %s:plcc:%f,srocc:%f,rmse:%f " %(kernels[2],sts_lcc.avg(),sts_srocc.avg(),sts_rmse.avg()) # LCC_r = [(item-baseplcc)/baseplcc for item in LCC] # SROCC_r = [(item-basesrocc)/basesrocc for item in SROCC] # RMSE_r = [(item-basermse)/basermse for item in RMSE] N = 15 ind = np.arange(N) # the x locations for the groups width = 0.35 # the width of the bars fig, ax = plt.subplots() # add some ax.set_xlabel(u'特征序号', fontsize=18) ax.set_ylabel(u'相关系数和均方差', fontsize=18) # ax.set_title('The Correlation between features and mos') ax.set_xticks(ind) ax.set_xticklabels(('Orig', 'f1', 'f2', 'f3', 'f4', 'f5', 'f7', 'f13', 'f14', 'f15')) plt.axhline(y=baseplcc) plt.axhline(y=basesrocc) plt.axhline(y=basermse) plt.plot(LCC, 'b--*', label='PLCC') plt.plot(SROCC, 'g--+', label='SROCC') plt.plot(RMSE, 'r--o', label='RMSE') plt.legend(loc='center right') plt.show()