def _compute_values(self, values, nvalue, is_warmup=False, calibrate_loops=False, start=0): unit = self.metadata.get('unit') args = self.args if nvalue < 1: raise ValueError("nvalue must be >= 1") if self.loops <= 0: raise ValueError("loops must be >= 1") if is_warmup: value_name = 'Warmup' else: value_name = 'Value' index = 1 inner_loops = self.inner_loops if not inner_loops: inner_loops = 1 while True: if index > nvalue: break raw_value = self.task_func(self, self.loops) raw_value = float(raw_value) value = raw_value / (self.loops * inner_loops) if not value and not calibrate_loops: raise ValueError("benchmark function returned zero") if is_warmup: values.append((self.loops, value)) else: values.append(value) if args.verbose: text = format_value(unit, value) if is_warmup: text = ('%s (loops: %s, raw: %s)' % (text, format_number(self.loops), format_value(unit, raw_value))) print("%s %s: %s" % (value_name, start + index, text)) if calibrate_loops and raw_value < args.min_time: if self.loops * 2 > MAX_LOOPS: print("ERROR: failed to calibrate the number of loops") print("Raw timing %s with %s is still smaller than " "the minimum time of %s" % (format_value(unit, raw_value), format_number(self.loops, 'loop'), format_timedelta(args.min_time))) sys.exit(1) self.loops *= 2 # need more values for the calibration nvalue += 1 index += 1
def calibrate_warmups(self): # calibrate the number of warmups if self.loops < 1: raise ValueError("loops must be >= 1") if self.args.recalibrate_warmups: nwarmup = self.args.warmups else: nwarmup = 1 unit = self.metadata.get('unit') start = 0 # test_calibrate_warmups() requires at least 2 values per sample while True: total = nwarmup + WARMUP_SAMPLE_SIZE * 2 nvalue = total - len(self.warmups) if nvalue: self._compute_values(self.warmups, nvalue, is_warmup=True, start=start) start += nvalue if self.test_calibrate_warmups(nwarmup, unit): break if len(self.warmups) >= MAX_WARMUP_VALUES: print("ERROR: failed to calibrate the number of warmups") values = [format_value(unit, value) for loops, value in self.warmups] print("Values (%s): %s" % (len(values), ', '.join(values))) sys.exit(1) nwarmup += 1 if self.args.verbose: print("Calibration: use %s warmups" % format_number(nwarmup)) print() if self.args.recalibrate_warmups: self.metadata['recalibrate_warmups'] = nwarmup else: self.metadata['calibrate_warmups'] = nwarmup
def test_calibrate_warmups(self, nwarmup, unit): half = nwarmup + (len(self.warmups) - nwarmup) // 2 sample1 = [value for loops, value in self.warmups[nwarmup:half]] sample2 = [value for loops, value in self.warmups[half:]] first_value = sample1[0] # test if the first value is an outlier values = sample1[1:] + sample2 q1 = percentile(values, 0.25) q3 = percentile(values, 0.75) iqr = q3 - q1 outlier_max = (q3 + 1.5 * iqr) # only check maximum, not minimum outlier = not(first_value <= outlier_max) mean1 = statistics.mean(sample1) mean2 = statistics.mean(sample2) mean_diff = (mean1 - mean2) / float(mean2) s1_q1 = percentile(sample1, 0.25) s2_q1 = percentile(sample2, 0.25) s1_q3 = percentile(sample1, 0.75) s2_q3 = percentile(sample2, 0.75) q1_diff = (s1_q1 - s2_q1) / float(s2_q1) q3_diff = (s1_q3 - s2_q3) / float(s2_q3) mad1 = median_abs_dev(sample1) mad2 = median_abs_dev(sample2) # FIXME: handle division by zero mad_diff = (mad1 - mad2) / float(mad2) if self.args.verbose: stdev1 = statistics.stdev(sample1) stdev2 = statistics.stdev(sample2) stdev_diff = (stdev1 - stdev2) / float(stdev2) sample1_str = format_values(unit, (s1_q1, mean1, s1_q3, stdev1, mad1)) sample2_str = format_values(unit, (s2_q1, mean2, s2_q3, stdev2, mad2)) print("Calibration: warmups=%s" % format_number(nwarmup)) print(" first value: %s, outlier? %s (max: %s)" % (format_value(unit, first_value), outlier, format_value(unit, outlier_max))) print(" sample1(%s): Q1=%s mean=%s Q3=%s stdev=%s MAD=%s" % (len(sample1), sample1_str[0], sample1_str[1], sample1_str[2], sample1_str[3], sample1_str[4])) print(" sample2(%s): Q1=%s mean=%s Q3=%s stdev=%s MAD=%s" % (len(sample2), sample2_str[0], sample2_str[1], sample2_str[2], sample2_str[3], sample2_str[4])) print(" diff: Q1=%+.0f%% mean=%+.0f%% Q3=%+.0f%% stdev=%+.0f%% MAD=%+.0f%%" % (q1_diff * 100, mean_diff * 100, q3_diff * 100, stdev_diff * 100, mad_diff * 100)) if outlier: return False if not(-0.5 <= mean_diff <= 0.10): return False if abs(mad_diff) > 0.10: return False if abs(q1_diff) > 0.05: return False if abs(q3_diff) > 0.05: return False return True
def run_bench(self, nvalue, is_warmup=False, is_calibrate=False, calibrate=False): unit = self.metadata.get('unit') args = self.args if self.loops <= 0: raise ValueError("loops must be >= 1") if is_calibrate: value_name = 'Calibration' elif is_warmup: value_name = 'Warmup' else: value_name = 'Value' values = [] index = 1 inner_loops = self.inner_loops if not inner_loops: inner_loops = 1 while True: if index > nvalue: break raw_value = self.task_func(self, self.loops) raw_value = float(raw_value) value = raw_value / (self.loops * inner_loops) if not value and not (is_calibrate or is_warmup): raise ValueError("benchmark function returned zero") if is_warmup: values.append((self.loops, value)) else: values.append(value) if args.verbose: text = format_value(unit, value) if is_warmup or is_calibrate: text = ('%s (%s: %s)' % (text, format_number(self.loops, 'loop'), format_value(unit, raw_value))) print("%s %s: %s" % (value_name, index, text)) if calibrate and raw_value < args.min_time: self.loops *= 2 if self.loops > MAX_LOOPS: raise ValueError("error in calibration, loops is " "too big: %s" % self.loops) # need more values for the calibration nvalue += 1 index += 1 if args.verbose: if is_calibrate: print("Calibration: use %s loops" % format_number(self.loops)) print() return values