def summarize(self): suites = [] vals = [] test_results = { 'framework': { 'name': 'raptor', }, 'suites': suites, } # check if we actually have any results if len(self.results) == 0: LOG.error("error: no raptor test results found!") return for test in self.results: subtests = [] suite = { 'name': test.name, 'extraOptions': test.extra_options, 'subtests': subtests } suites.append(suite) # each test can report multiple measurements per pageload # each measurement becomes a subtest inside the 'suite' for key, values in test.measurements.iteritems(): new_subtest = {} new_subtest['name'] = test.name + "-" + key new_subtest['replicates'] = values new_subtest['lower_is_better'] = test.lower_is_better new_subtest['alert_threshold'] = float(test.alert_threshold) new_subtest['value'] = 0 new_subtest['unit'] = test.unit filtered_values = filter.ignore_first(new_subtest['replicates'], 1) new_subtest['value'] = filter.median(filtered_values) vals.append(new_subtest['value']) subtests.append(new_subtest) # if there is more than one subtest, calculate a summary result if len(subtests) > 1: suite['value'] = self.construct_results(vals, testname=test.name) LOG.info("returning summarized test results:") LOG.info(test_results) self.summarized_results = test_results
def summarize(self): suites = [] test_results = { 'framework': { 'name': 'raptor', }, 'suites': suites, } # check if we actually have any results if len(self.results) == 0: LOG.error("error: no raptor test results found!") return for test in self.results: vals = [] subtests = [] suite = { 'name': test.name, 'type': test.type, 'extraOptions': test.extra_options, 'subtests': subtests, 'lowerIsBetter': test.lower_is_better, 'alertThreshold': float(test.alert_threshold) } suites.append(suite) # process results for pageloader type of tests if test.type == "pageload": # each test can report multiple measurements per pageload # each measurement becomes a subtest inside the 'suite' # this is the format we receive the results in from the pageload test # i.e. one test (subtest) in raptor-firefox-tp6: # {u'name': u'raptor-firefox-tp6-amazon', u'type': u'pageload', u'measurements': # {u'fnbpaint': [788, 315, 334, 286, 318, 276, 296, 296, 292, 285, 268, 277, 274, # 328, 295, 290, 286, 270, 279, 280, 346, 303, 308, 398, 281]}, u'browser': # u'Firefox 62.0a1 20180528123052', u'lower_is_better': True, u'page': # u'https://www.amazon.com/s/url=search-alias%3Daps&field-keywords=laptop', # u'unit': u'ms', u'alert_threshold': 2} for measurement_name, replicates in test.measurements.iteritems( ): new_subtest = {} new_subtest['name'] = test.name + "-" + measurement_name new_subtest['replicates'] = replicates new_subtest['lowerIsBetter'] = test.lower_is_better new_subtest['alertThreshold'] = float(test.alert_threshold) new_subtest['value'] = 0 new_subtest['unit'] = test.unit filtered_values = filter.ignore_first( new_subtest['replicates'], 1) new_subtest['value'] = filter.median(filtered_values) vals.append([new_subtest['value'], new_subtest['name']]) subtests.append(new_subtest) elif test.type == "benchmark": if 'speedometer' in test.measurements: subtests, vals = self.parseSpeedometerOutput(test) elif 'motionmark' in test.measurements: subtests, vals = self.parseMotionmarkOutput(test) elif 'sunspider' in test.measurements: subtests, vals = self.parseSunspiderOutput(test) elif 'webaudio' in test.measurements: subtests, vals = self.parseWebaudioOutput(test) suite['subtests'] = subtests else: LOG.error( "output.summarize received unsupported test results type") return # for pageload tests, if there are > 1 subtests here, that means there # were multiple measurements captured in each single pageload; we want # to get the mean of those values and report 1 overall 'suite' value # for the page; so that each test page/URL only has 1 line output # on treeherder/perfherder (all replicates available in the JSON) # for benchmarks there is generally more than one subtest in each cycle # and a benchmark-specific formula is needed to calculate the final score if len(subtests) > 1: suite['value'] = self.construct_summary(vals, testname=test.name) self.summarized_results = test_results
def summarize(self): suites = [] test_results = { 'framework': { 'name': 'raptor', }, 'suites': suites, } # check if we actually have any results if len(self.results) == 0: LOG.error("error: no raptor test results found!") return for test in self.results: vals = [] subtests = [] suite = { 'name': test.name, 'type': test.type, 'extraOptions': test.extra_options, 'subtests': subtests, 'lowerIsBetter': test.lower_is_better, 'unit': test.unit, 'alertThreshold': float(test.alert_threshold) } suites.append(suite) # process results for pageloader type of tests if test.type == "pageload": # each test can report multiple measurements per pageload # each measurement becomes a subtest inside the 'suite' # this is the format we receive the results in from the pageload test # i.e. one test (subtest) in raptor-firefox-tp6: # {u'name': u'raptor-firefox-tp6-amazon', u'type': u'pageload', u'measurements': # {u'fnbpaint': [788, 315, 334, 286, 318, 276, 296, 296, 292, 285, 268, 277, 274, # 328, 295, 290, 286, 270, 279, 280, 346, 303, 308, 398, 281]}, u'browser': # u'Firefox 62.0a1 20180528123052', u'lower_is_better': True, u'page': # u'https://www.amazon.com/s/url=search-alias%3Daps&field-keywords=laptop', # u'unit': u'ms', u'alert_threshold': 2} for measurement_name, replicates in test.measurements.iteritems( ): new_subtest = {} new_subtest['name'] = test.name + "-" + measurement_name new_subtest['replicates'] = replicates new_subtest['lowerIsBetter'] = test.subtest_lower_is_better new_subtest['alertThreshold'] = float(test.alert_threshold) new_subtest['value'] = 0 new_subtest['unit'] = test.subtest_unit # ignore first value due to 1st pageload noise LOG.info( "ignoring the first %s value due to initial pageload noise" % measurement_name) filtered_values = filter.ignore_first( new_subtest['replicates'], 1) # for pageload tests that measure TTFI: TTFI is not guaranteed to be available # everytime; the raptor measure.js webext will substitute a '-1' value in the # cases where TTFI is not available, which is acceptable; however we don't want # to include those '-1' TTFI values in our final results calculations if measurement_name == "ttfi": filtered_values = filter.ignore_negative( filtered_values) # we've already removed the first pageload value; if there aren't any more # valid TTFI values available for this pageload just remove it from results if len(filtered_values) < 1: continue new_subtest['value'] = filter.median(filtered_values) vals.append([new_subtest['value'], new_subtest['name']]) subtests.append(new_subtest) elif test.type == "benchmark": if 'speedometer' in test.measurements: subtests, vals = self.parseSpeedometerOutput(test) elif 'motionmark' in test.measurements: subtests, vals = self.parseMotionmarkOutput(test) elif 'sunspider' in test.measurements: subtests, vals = self.parseSunspiderOutput(test) elif 'webaudio' in test.measurements: subtests, vals = self.parseWebaudioOutput(test) elif 'unity-webgl' in test.measurements: subtests, vals = self.parseUnityWebGLOutput(test) elif 'assorted-dom' in test.measurements: subtests, vals = self.parseAssortedDomOutput(test) elif 'wasm-misc' in test.measurements: subtests, vals = self.parseWASMMiscOutput(test) suite['subtests'] = subtests else: LOG.error( "output.summarize received unsupported test results type") return # for pageload tests, if there are > 1 subtests here, that means there # were multiple measurements captured in each single pageload; we want # to get the mean of those values and report 1 overall 'suite' value # for the page; so that each test page/URL only has 1 line output # on treeherder/perfherder (all replicates available in the JSON) # for benchmarks there is generally more than one subtest in each cycle # and a benchmark-specific formula is needed to calculate the final score if len(subtests) > 1: suite['value'] = self.construct_summary(vals, testname=test.name) self.summarized_results = test_results
def summarize(self): suites = [] vals = [] test_results = { 'framework': { 'name': 'raptor', }, 'suites': suites, } # check if we actually have any results if len(self.results) == 0: LOG.error("error: no raptor test results found!") return for test in self.results: subtests = [] suite = { 'name': test.name, 'type': test.type, 'extraOptions': test.extra_options, 'subtests': subtests, 'lowerIsBetter': test.lower_is_better, 'alertThreshold': float(test.alert_threshold) } suites.append(suite) # process results for pageloader type of tests if test.type == "pageload": # each test can report multiple measurements per pageload # each measurement becomes a subtest inside the 'suite' # this is the format we receive the results in from the pageload test # i.e. one test (subtest) in raptor-firefox-tp6: # {u'name': u'raptor-firefox-tp6-amazon', u'type': u'pageload', u'measurements': # {u'fnbpaint': [788, 315, 334, 286, 318, 276, 296, 296, 292, 285, 268, 277, 274, # 328, 295, 290, 286, 270, 279, 280, 346, 303, 308, 398, 281]}, u'browser': # u'Firefox 62.0a1 20180528123052', u'lower_is_better': True, u'page': # u'https://www.amazon.com/s/url=search-alias%3Daps&field-keywords=laptop', # u'unit': u'ms', u'alert_threshold': 2} for key, values in test.measurements.iteritems(): new_subtest = {} new_subtest['name'] = test.name + "-" + key new_subtest['replicates'] = values new_subtest['lowerIsBetter'] = test.lower_is_better new_subtest['alertThreshold'] = float(test.alert_threshold) new_subtest['value'] = 0 new_subtest['unit'] = test.unit filtered_values = filter.ignore_first(new_subtest['replicates'], 1) new_subtest['value'] = filter.median(filtered_values) vals.append(new_subtest['value']) subtests.append(new_subtest) elif test.type == "benchmark": # each benchmark 'index' becomes a subtest; each pagecycle / iteration # of the test has multiple values per index/subtest # this is the format we receive the results in from the benchmark # i.e. this is ONE pagecycle of speedometer: # {u'name': u'raptor-speedometer', u'type': u'benchmark', u'measurements': # {u'speedometer': [[{u'AngularJS-TodoMVC/DeletingAllItems': [147.3000000000011, # 149.95999999999913, 143.29999999999927, 150.34000000000378, 257.6999999999971], # u'Inferno-TodoMVC/CompletingAllItems/Sync': [88.03999999999996,# # 85.60000000000036, 94.18000000000029, 95.19999999999709, 86.47999999999593], # u'AngularJS-TodoMVC': [518.2400000000016, 525.8199999999997, 610.5199999999968, # 532.8200000000215, 640.1800000000003], ...(repeated for each index/subtest)}]]}, # u'browser': u'Firefox 62.0a1 20180528123052', u'lower_is_better': False, u'page': # u'http://localhost:55019/Speedometer/index.html?raptor', u'unit': u'score', # u'alert_threshold': 2} for page_cycle in test.measurements['speedometer']: page_cycle_results = page_cycle[0] for sub, replicates in page_cycle_results.iteritems(): # for each pagecycle, replicates are appended to each subtest # so if it doesn't exist the first time create the subtest entry existing = False for existing_sub in subtests: if existing_sub['name'] == sub: # pagecycle, subtest already there, so append the replicates existing_sub['replicates'].extend(replicates) # update the value now that we have more replicates existing_sub['value'] = filter.median(existing_sub['replicates']) # now need to update our vals list too since have new subtest value for existing_val in vals: if existing_val[1] == sub: existing_val[0] = existing_sub['value'] break existing = True break if not existing: # subtest not added yet, first pagecycle, so add new one new_subtest = {} new_subtest['name'] = sub new_subtest['replicates'] = replicates new_subtest['lowerIsBetter'] = test.lower_is_better new_subtest['alertThreshold'] = float(test.alert_threshold) new_subtest['value'] = filter.median(replicates) new_subtest['unit'] = test.unit subtests.append(new_subtest) vals.append([new_subtest['value'], sub]) else: LOG.error("output.summarize received unsupported test results type") return # if there is more than one subtest, calculate a summary result if len(subtests) > 1: suite['value'] = self.construct_results(vals, testname=test.name) self.summarized_results = test_results
def summarize(self): suites = [] vals = [] test_results = { 'framework': { 'name': 'raptor', }, 'suites': suites, } # check if we actually have any results if len(self.results) == 0: LOG.error("error: no raptor test results found!") return for test in self.results: subtests = [] suite = { 'name': test.name, 'type': test.type, 'extraOptions': test.extra_options, 'subtests': subtests, 'lowerIsBetter': test.lower_is_better, 'alertThreshold': float(test.alert_threshold) } suites.append(suite) # process results for pageloader type of tests if test.type == "pageload": # each test can report multiple measurements per pageload # each measurement becomes a subtest inside the 'suite' # this is the format we receive the results in from the pageload test # i.e. one test (subtest) in raptor-firefox-tp6: # {u'name': u'raptor-firefox-tp6-amazon', u'type': u'pageload', u'measurements': # {u'fnbpaint': [788, 315, 334, 286, 318, 276, 296, 296, 292, 285, 268, 277, 274, # 328, 295, 290, 286, 270, 279, 280, 346, 303, 308, 398, 281]}, u'browser': # u'Firefox 62.0a1 20180528123052', u'lower_is_better': True, u'page': # u'https://www.amazon.com/s/url=search-alias%3Daps&field-keywords=laptop', # u'unit': u'ms', u'alert_threshold': 2} for key, values in test.measurements.iteritems(): new_subtest = {} new_subtest['name'] = test.name + "-" + key new_subtest['replicates'] = values new_subtest['lowerIsBetter'] = test.lower_is_better new_subtest['alertThreshold'] = float(test.alert_threshold) new_subtest['value'] = 0 new_subtest['unit'] = test.unit filtered_values = filter.ignore_first( new_subtest['replicates'], 1) new_subtest['value'] = filter.median(filtered_values) vals.append(new_subtest['value']) subtests.append(new_subtest) elif test.type == "benchmark": if 'speedometer' in test.measurements: subtests, vals = self.parseSpeedometerOutput(test) elif 'motionmark' in test.measurements: subtests, vals = self.parseMotionmarkOutput(test) suite['subtests'] = subtests else: LOG.error( "output.summarize received unsupported test results type") return # if there is more than one subtest, calculate a summary result if len(subtests) > 1: suite['value'] = self.construct_results(vals, testname=test.name) self.summarized_results = test_results