def filter(self, testname, filters): """ filter the results set; applies each of the filters in order to the results data filters should be callables that take a list the last filter should return a scalar (float or int) returns a list of [[data, page], ...] """ retval = [] for result in self.results: page = result['page'] data = result['runs'] remaining_filters = [] # ignore* functions return a filtered set of data for f in filters: if f.func.__name__.startswith('ignore'): data = f.apply(data) else: remaining_filters.append(f) # calculate common numbers with the raw data data_summary = { 'min': min(data), 'max': max(data), 'mean': filter.mean(data), 'median': filter.median(data), 'std': filter.stddev(data) } # apply the summarization filters for f in remaining_filters: if f.func.__name__ == "v8_subtest": # for v8_subtest we need to page for reference data data = filter.v8_subtest(data, page) else: data = f.apply(data) data_summary['filtered'] = data # special case for dromaeo_dom and v8_7 if testname == 'dromaeo_dom' or testname.startswith('v8_7'): data_summary['value'] = data retval.append([data_summary, page]) return retval
def construct_results(self, vals, testname): if 'responsiveness' in testname: return filter.responsiveness_Metric([val for (val, page) in vals]) elif testname.startswith('v8_7'): return self.v8_Metric(vals) elif testname.startswith('kraken'): return self.JS_Metric(vals) elif testname.startswith('ares6'): return self.benchmark_score(vals) elif testname.startswith('jetstream'): return self.benchmark_score(vals) elif testname.startswith('speedometer'): return self.speedometer_score(vals) elif testname.startswith('stylebench'): return self.stylebench_score(vals) elif len(vals) > 1: return filter.geometric_mean([i for i, j in vals]) else: return filter.mean([i for i, j in vals])
def construct_results(self, vals, testname): if "responsiveness" in testname: return filter.responsiveness_Metric([val for (val, page) in vals]) elif testname.startswith("v8_7"): return self.v8_Metric(vals) elif testname.startswith("kraken"): return self.JS_Metric(vals) elif testname.startswith("ares6"): return self.benchmark_score(vals) elif testname.startswith("jetstream"): return self.benchmark_score(vals) elif testname.startswith("speedometer"): return self.speedometer_score(vals) elif testname.startswith("stylebench"): return self.stylebench_score(vals) elif len(vals) > 1: return filter.geometric_mean([i for i, j in vals]) else: return filter.mean([i for i, j in vals])
def __call__(self): suites = [] test_results = { 'framework': { 'name': self.results.results[0].framework, }, 'suites': suites, } for test in self.results.results: # serialize test results tsresult = None if not test.using_xperf: subtests = [] suite = { 'name': test.name(), 'extraOptions': self.results.extra_options or [], 'subtests': subtests } suites.append(suite) vals = [] replicates = {} # TODO: counters!!!! we don't have any, but they suffer the same for result in test.results: # XXX this will not work for manifests which list # the same page name twice. It also ignores cycles for page, val in result.raw_values(): if page == 'NULL': page = test.name() if tsresult is None: tsresult = r = self.tsresult_class() r.results = [{ 'index': 0, 'page': test.name(), 'runs': val }] else: r = tsresult.results[0] if r['page'] == test.name(): r['runs'].extend(val) replicates.setdefault(page, []).extend(val) tresults = [tsresult] if tsresult else test.results # Merge results for the same page when using cycle > 1 merged_results = {} for result in tresults: results = [] for r in result.results: page = r['page'] if page in merged_results: merged_results[page]['runs'].extend(r['runs']) else: merged_results[page] = r results.append(r) # override the list of page results for each run result.results = results for result in tresults: filtered_results = \ result.values(suite['name'], test.test_config['filters']) vals.extend([[i['value'], j] for i, j in filtered_results]) subtest_index = 0 for val, page in filtered_results: if page == 'NULL': # no real subtests page = test.name() subtest = { 'name': page, 'value': val['filtered'], 'replicates': replicates[page], } # if results are from a comparison test i.e. perf-reftest, it will also # contain replicates for 'base' and 'reference'; we wish to keep those # to reference; actual results were calculated as the difference of those base_runs = result.results[subtest_index].get( 'base_runs', None) ref_runs = result.results[subtest_index].get( 'ref_runs', None) if base_runs and ref_runs: subtest['base_replicates'] = base_runs subtest['ref_replicates'] = ref_runs subtests.append(subtest) subtest_index += 1 if test.test_config.get('lower_is_better') is not None: subtest['lowerIsBetter'] = \ test.test_config['lower_is_better'] if test.test_config.get('alert_threshold') is not None: subtest['alertThreshold'] = \ test.test_config['alert_threshold'] if test.test_config.get('subtest_alerts') is not None: subtest['shouldAlert'] = \ test.test_config['subtest_alerts'] if test.test_config.get('alert_threshold') is not None: subtest['alertThreshold'] = \ test.test_config['alert_threshold'] if test.test_config.get('unit'): subtest['unit'] = test.test_config['unit'] # if there is more than one subtest, calculate a summary result if len(subtests) > 1: suite['value'] = self.construct_results( vals, testname=test.name()) if test.test_config.get('lower_is_better') is not None: suite['lowerIsBetter'] = \ test.test_config['lower_is_better'] if test.test_config.get('alert_threshold') is not None: suite['alertThreshold'] = \ test.test_config['alert_threshold'] # counters results_aux data counter_subtests = [] for cd in test.all_counter_results: for name, vals in cd.items(): # We want to add the xperf data as talos_counters # exclude counters whose values are tuples (bad for # graphserver) if len(vals) > 0 and isinstance(vals[0], list): continue # mainthread IO is a list of filenames and accesses, we do # not report this as a counter if 'mainthreadio' in name: continue # responsiveness has it's own metric, not the mean # TODO: consider doing this for all counters if 'responsiveness' == name: subtest = { 'name': name, 'value': filter.responsiveness_Metric(vals) } counter_subtests.append(subtest) continue subtest = { 'name': name, 'value': 0.0, } counter_subtests.append(subtest) if test.using_xperf: if len(vals) > 0: subtest['value'] = vals[0] else: # calculate mean value if len(vals) > 0: varray = [float(v) for v in vals] subtest['value'] = filter.mean(varray) if counter_subtests: suites.append({ 'name': test.name(), 'extraOptions': self.results.extra_options or [], 'subtests': counter_subtests }) return test_results
def benchmark_score(cls, val_list): """ benchmark_score: ares6/jetstream self reported as 'geomean' """ results = [i for i, j in val_list if j == 'geomean'] return filter.mean(results)
def __call__(self): suites = [] test_results = { "framework": { "name": self.results.results[0].framework, }, "suites": suites, } for test in self.results.results: # serialize test results tsresult = None if not test.using_xperf: subtests = [] suite = { "name": test.name(), "extraOptions": self.results.extra_options or [], "subtests": subtests, "shouldAlert": test.test_config.get("suite_should_alert", True), } suites.append(suite) vals = [] replicates = {} # TODO: counters!!!! we don't have any, but they suffer the same for result in test.results: # XXX this will not work for manifests which list # the same page name twice. It also ignores cycles for page, val in result.raw_values(): if page == "NULL": page = test.name() if tsresult is None: tsresult = r = self.tsresult_class() r.results = [ {"index": 0, "page": test.name(), "runs": val} ] else: r = tsresult.results[0] if r["page"] == test.name(): r["runs"].extend(val) replicates.setdefault(page, []).extend(val) tresults = [tsresult] if tsresult else test.results # Merge results for the same page when using cycle > 1 merged_results = {} for result in tresults: results = [] for r in result.results: page = r["page"] if page in merged_results: merged_results[page]["runs"].extend(r["runs"]) else: merged_results[page] = r results.append(r) # override the list of page results for each run result.results = results for result in tresults: filtered_results = result.values( suite["name"], test.test_config["filters"] ) vals.extend([[i["value"], j] for i, j in filtered_results]) subtest_index = 0 for val, page in filtered_results: if page == "NULL": # no real subtests page = test.name() subtest = { "name": page, "value": val["filtered"], "replicates": replicates[page], } # if results are from a comparison test i.e. perf-reftest, it will also # contain replicates for 'base' and 'reference'; we wish to keep those # to reference; actual results were calculated as the difference of those base_runs = result.results[subtest_index].get("base_runs", None) ref_runs = result.results[subtest_index].get("ref_runs", None) if base_runs and ref_runs: subtest["base_replicates"] = base_runs subtest["ref_replicates"] = ref_runs subtests.append(subtest) subtest_index += 1 if test.test_config.get("lower_is_better") is not None: subtest["lowerIsBetter"] = test.test_config[ "lower_is_better" ] if test.test_config.get("alert_threshold") is not None: subtest["alertThreshold"] = test.test_config[ "alert_threshold" ] if test.test_config.get("subtest_alerts") is not None: subtest["shouldAlert"] = test.test_config["subtest_alerts"] if test.test_config.get("alert_threshold") is not None: subtest["alertThreshold"] = test.test_config[ "alert_threshold" ] if test.test_config.get("unit"): subtest["unit"] = test.test_config["unit"] # if there is only one subtest, carry alerting setting from the suite if len(subtests) == 1: subtests[0]["shouldAlert"] = suite["shouldAlert"] # if there is more than one subtest, calculate a summary result elif len(subtests) > 1: suite["value"] = self.construct_results(vals, testname=test.name()) if test.test_config.get("lower_is_better") is not None: suite["lowerIsBetter"] = test.test_config["lower_is_better"] if test.test_config.get("alert_threshold") is not None: suite["alertThreshold"] = test.test_config["alert_threshold"] # counters results_aux data counter_subtests = [] for cd in test.all_counter_results: for name, vals in cd.items(): # We want to add the xperf data as talos_counters # exclude counters whose values are tuples (bad for # graphserver) if len(vals) > 0 and isinstance(vals[0], list): continue # mainthread IO is a list of filenames and accesses, we do # not report this as a counter if "mainthreadio" in name: continue # responsiveness has it's own metric, not the mean # TODO: consider doing this for all counters if "responsiveness" == name: subtest = { "name": name, "value": filter.responsiveness_Metric(vals), } counter_subtests.append(subtest) continue subtest = { "name": name, "value": 0.0, } counter_subtests.append(subtest) if test.using_xperf: if len(vals) > 0: subtest["value"] = vals[0] else: # calculate mean value if len(vals) > 0: varray = [float(v) for v in vals] subtest["value"] = filter.mean(varray) if counter_subtests: suites.append( { "name": test.name(), "extraOptions": self.results.extra_options or [], "subtests": counter_subtests, "shouldAlert": test.test_config.get("suite_should_alert", True), } ) return test_results