def combined_stats_over_time( label, runs, objective, worst, best, ): """ combine stats_over_time() vectors for multiple runs """ extract_fn = _.result.time combine_fn = min no_data = 999 by_run = [ stats_over_time(session, run, extract_fn, combine_fn, no_data) for run, session in runs ] max_len = max(list(map(len, by_run))) by_run_streams = [ Stream() << x << repeat(x[-1], max_len - len(x)) for x in by_run ] by_quanta = list(zip(*by_run_streams[:])) # TODO: Fix this, this variable should be configurable stats_quanta = 10 def get_data(value_function): final_values = [] for quanta, values in enumerate(by_quanta): sec = quanta * stats_quanta final_values.append([sec] + value_function(values)) return final_values mean_values = get_data(lambda values: [mean(values), stddev(values)]) def extract_percentiles(values): values = sorted(values) return ([values[int(round(p * (len(values) - 1)))] for p in PCTSTEPS] + [mean(values)]) percentile_values = get_data(extract_percentiles) return mean_values, percentile_values
def combined_stats_over_time(label, runs, objective, worst, best, ): """ combine stats_over_time() vectors for multiple runs """ extract_fn = _.result.time combine_fn = min no_data = 999 by_run = [stats_over_time(session, run, extract_fn, combine_fn, no_data) for run, session in runs] max_len = max(map(len, by_run)) by_run_streams = [Stream() << x << repeat(x[-1], max_len-len(x)) for x in by_run] by_quanta = zip(*by_run_streams[:]) # TODO: Fix this, this variable should be configurable stats_quanta = 10 def get_data(value_function): final_values = [] for quanta, values in enumerate(by_quanta): sec = quanta*stats_quanta final_values.append([sec] + value_function(values)) return final_values mean_values = get_data(lambda values: [mean(values), stddev(values)]) def extract_percentiles(values): values = sorted(values) return ([values[int(round(p*(len(values)-1)))] for p in PCTSTEPS] + [mean(values)]) percentile_values = get_data(extract_percentiles) return mean_values, percentile_values
def combined_stats_over_time(self, output_dir, label, runs, objective, worst, best, ): ''' combine stats_over_time() vectors for multiple runs ''' #extract_fn = lambda dr: objective.stats_quality_score(dr.result, worst, best) extract_fn = _.result.time combine_fn = min no_data = 999 log.debug("writing stats for %s to %s", label, output_dir) by_run = [self.stats_over_time(session, run, extract_fn, combine_fn, no_data) for run, session in runs] max_len = max(map(len, by_run)) by_run_streams = [Stream() << x << repeat(x[-1], max_len-len(x)) for x in by_run] by_quanta = zip(*by_run_streams[:]) def data_file(suffix, headers, value_function): with open(os.path.join(output_dir, label+suffix), 'w') as fd: out = csv.writer(fd, delimiter=' ', lineterminator='\n') out.writerow(['#sec'] + headers) for quanta, values in enumerate(by_quanta): sec = quanta*self.args.stats_quanta out.writerow([sec] + value_function(values)) #data_file('_details.dat', # map(lambda x: 'run%d'%x, xrange(max_len)), # list) #self.gnuplot_file(output_dir, # label+'_details', # [('"'+label+'_details.dat"' # ' using 1:%d'%i + # ' with lines' # ' title "Run %d"'%i) # for i in xrange(max_len)]) data_file('_mean.dat', ['#sec', 'mean', 'stddev'], lambda values: [mean(values), stddev(values)]) self.gnuplot_file(output_dir, label+'_mean', ['"'+label+'_mean.dat" using 1:2 with lines title "Mean"']) def extract_percentiles(values): values = sorted(values) return ([values[int(round(p*(len(values)-1)))] for p in PCTSTEPS] + [mean(values)]) data_file("_percentiles.dat", PCTSTEPS + ['mean'], extract_percentiles) self.gnuplot_file(output_dir, label+'_percentiles', reversed([ '"'+label+'_percentiles.dat" using 1:2 with lines title "0%"', # '"" using 1:3 with lines title "5%"', '"" using 1:4 with lines title "10%"', # '"" using 1:5 with lines title "25%"', '"" using 1:6 with lines title "20%"', # '"" using 1:7 with lines title "35%"', '"" using 1:8 with lines title "30%"', # '"" using 1:9 with lines title "45%"', '"" using 1:10 with lines title "40%"', # '"" using 1:11 with lines title "55%"', '"" using 1:12 with lines title "50%"', # '"" using 1:13 with lines title "65%"', '"" using 1:14 with lines title "70%"', # '"" using 1:15 with lines title "75%"', '"" using 1:16 with lines title "80%"', # '"" using 1:17 with lines title "85%"', '"" using 1:18 with lines title "90%"', # '"" using 1:19 with lines title "95%"', '"'+label+'_percentiles.dat" using 1:20 with lines title "100%"', ]))
def combined_stats_over_time( self, output_dir, label, runs, objective, worst, best, ): """ combine stats_over_time() vectors for multiple runs """ #extract_fn = lambda dr: objective.stats_quality_score(dr.result, worst, best) extract_fn = _.result.run_time combine_fn = min no_data = 999 log.debug("writing stats for %s to %s", label, output_dir) by_run = [ self.stats_over_time(session, run, extract_fn, combine_fn, no_data) for run, session in runs ] max_len = max(map(len, by_run)) by_run_streams = [ Stream() << x << repeat(x[-1], max_len - len(x)) for x in by_run ] by_quanta = zip(*by_run_streams[:]) def data_file(suffix, headers, value_function): with open(os.path.join(output_dir, label + suffix), 'w') as fd: out = csv.writer(fd, delimiter=' ', lineterminator='\n') out.writerow(['#sec'] + headers) for quanta, values in enumerate(by_quanta): sec = quanta * self.args.stats_quanta out.writerow([sec] + value_function(values)) #data_file('_details.dat', # map(lambda x: 'run%d'%x, xrange(max_len)), # list) #self.gnuplot_file(output_dir, # label+'_details', # [('"'+label+'_details.dat"' # ' using 1:%d'%i + # ' with lines' # ' title "Run %d"'%i) # for i in xrange(max_len)]) data_file('_mean.dat', ['#sec', 'mean', 'stddev'], lambda values: [mean(values), stddev(values)]) self.gnuplot_file( output_dir, label + '_mean', ['"' + label + '_mean.dat" using 1:2 with lines title "Mean"']) def extract_percentiles(values): values = sorted(values) return ( [values[int(round(p * (len(values) - 1)))] for p in PCTSTEPS] + [mean(values)]) data_file("_percentiles.dat", PCTSTEPS + ['mean'], extract_percentiles) self.gnuplot_file( output_dir, label + '_percentiles', reversed([ '"' + label + '_percentiles.dat" using 1:2 with lines title "0%"', # '"" using 1:3 with lines title "5%"', '"" using 1:4 with lines title "10%"', # '"" using 1:5 with lines title "25%"', '"" using 1:6 with lines title "20%"', # '"" using 1:7 with lines title "35%"', '"" using 1:8 with lines title "30%"', # '"" using 1:9 with lines title "45%"', '"" using 1:10 with lines title "40%"', # '"" using 1:11 with lines title "55%"', '"" using 1:12 with lines title "50%"', # '"" using 1:13 with lines title "65%"', '"" using 1:14 with lines title "70%"', # '"" using 1:15 with lines title "75%"', '"" using 1:16 with lines title "80%"', # '"" using 1:17 with lines title "85%"', '"" using 1:18 with lines title "90%"', # '"" using 1:19 with lines title "95%"', '"' + label + '_percentiles.dat" using 1:20 with lines title "100%"', ]))