def _MergeHistogramSetByPath(hs): with TempFile() as temp: temp.write(json.dumps(hs.AsDicts()).encode('utf-8')) temp.close() return merge_histograms.MergeHistograms( temp.name, (reserved_infos.TEST_PATH.name, ))
def AddReservedDiagnostics(histogram_dicts, names_to_values): # We need to generate summary statistics for anything that had a story, so # filter out every histogram with no stories, then merge. If you keep the # histograms with no story, you end up with duplicates. hs_with_stories = _LoadHistogramSet(histogram_dicts) hs_with_stories.FilterHistograms( lambda h: not h.diagnostics.get(reserved_infos.STORIES.name, [])) hs_with_no_stories = _LoadHistogramSet(histogram_dicts) hs_with_no_stories.FilterHistograms( lambda h: h.diagnostics.get(reserved_infos.STORIES.name, [])) # TODO(#3987): Refactor recipes to call merge_histograms separately. with TempFile() as temp: temp.write(json.dumps(hs_with_stories.AsDicts())) temp.close() # This call combines all repetitions of a metric for a given story into a # single histogram. dicts_across_repeats = merge_histograms.MergeHistograms(temp.name, ( 'name', 'stories')) # This call creates summary metrics across each set of stories. dicts_across_stories = merge_histograms.MergeHistograms(temp.name, ( 'name',)) # Now load everything into one histogram set. First we load the summary # histograms, since we need to mark them with IS_SUMMARY. # After that we load the rest, and then apply all the diagnostics specified # on the command line. Finally, since we end up with a lot of diagnostics # that no histograms refer to, we make sure to prune those. histograms = histogram_set.HistogramSet() histograms.ImportDicts(dicts_across_stories) for h in histograms: h.diagnostics[ reserved_infos.IS_SUMMARY.name] = generic_set.GenericSet([True]) histograms.ImportDicts(dicts_across_repeats) histograms.ImportDicts(hs_with_no_stories.AsDicts()) histograms.DeduplicateDiagnostics() for name, value in names_to_values.iteritems(): assert name in ALL_NAMES histograms.AddSharedDiagnostic(name, generic_set.GenericSet([value])) histograms.RemoveOrphanedDiagnostics() return json.dumps(histograms.AsDicts())
def testSingularHistogramsGetMergedFrom(self): hist0 = histogram.Histogram('foo', 'count') hist1 = histogram.Histogram('bar', 'count') histograms = histogram_set.HistogramSet([hist0, hist1]) histograms_file = tempfile.NamedTemporaryFile(delete=False) histograms_file.write(json.dumps(histograms.AsDicts()).encode('utf-8')) histograms_file.close() merged_dicts = merge_histograms.MergeHistograms( histograms_file.name, ('name', )) merged_histograms = histogram_set.HistogramSet() merged_histograms.ImportDicts(merged_dicts) self.assertEqual(len(list(merged_histograms.shared_diagnostics)), 2) self.assertEqual(len(merged_histograms), 2) os.remove(histograms_file.name)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--isolated-script-test-output', type=str) parser.add_argument('--isolated-script-test-perf-output', type=str) parser.add_argument( '-f', '--filter', '--isolated-script-test-filter', type=str, help='Test filter.') parser.add_argument('--test-suite', help='Test suite to run.', default=DEFAULT_TEST_SUITE) parser.add_argument('--xvfb', help='Use xvfb.', action='store_true') parser.add_argument( '--shard-count', help='Number of shards for test splitting. Default is 1.', type=int, default=1) parser.add_argument( '--shard-index', help='Index of the current shard for test splitting. Default is 0.', type=int, default=0) parser.add_argument( '-l', '--log', help='Log output level. Default is %s.' % DEFAULT_LOG, default=DEFAULT_LOG) parser.add_argument( '-s', '--samples-per-test', help='Number of samples to run per test. Default is %d.' % DEFAULT_SAMPLES, type=int, default=DEFAULT_SAMPLES) parser.add_argument( '-t', '--trials-per-sample', help='Number of trials to run per sample. Default is %d.' % DEFAULT_TRIALS, type=int, default=DEFAULT_TRIALS) parser.add_argument( '--steps-per-trial', help='Fixed number of steps to run per trial.', type=int) parser.add_argument( '--max-errors', help='After this many errors, abort the run. Default is %d.' % DEFAULT_MAX_ERRORS, type=int, default=DEFAULT_MAX_ERRORS) parser.add_argument( '--smoke-test-mode', help='Do a quick run to validate correctness.', action='store_true') parser.add_argument( '--warmup-loops', help='Number of warmup loops to run in the perf test. Default is %d.' % DEFAULT_WARMUP_LOOPS, type=int, default=DEFAULT_WARMUP_LOOPS) parser.add_argument( '--calibration-time', help='Amount of time to spend each loop in calibration and warmup. Default is %d seconds.' % DEFAULT_CALIBRATION_TIME, type=int, default=DEFAULT_CALIBRATION_TIME) args, extra_flags = parser.parse_known_args() importlib.reload(logging) logging.basicConfig(level=args.log.upper()) start_time = time.time() # Use fast execution for smoke test mode. if args.smoke_test_mode: args.steps_per_trial = 1 args.trials_per_sample = 1 args.samples_per_test = 1 env = os.environ.copy() # Get sharding args if 'GTEST_TOTAL_SHARDS' in env and int(env['GTEST_TOTAL_SHARDS']) != 1: if 'GTEST_SHARD_INDEX' not in env: logging.error('Sharding params must be specified together.') sys.exit(1) args.shard_count = int(env.pop('GTEST_TOTAL_SHARDS')) args.shard_index = int(env.pop('GTEST_SHARD_INDEX')) # The harness currently uploads all traces in a batch, which is very slow. # TODO: Reduce lag from trace uploads and remove this. http://anglebug.com/6854 env['DEVICE_TIMEOUT_MULTIPLIER'] = '20' # Get test list cmd = [get_binary_name(args.test_suite), '--list-tests', '--verbose'] exit_code, lines = _run_and_get_output(args, cmd, env) if exit_code != EXIT_SUCCESS: logging.fatal('Could not find test list from test output:\n%s' % '\n'.join(lines)) tests = _get_tests_from_output(lines) if args.filter: tests = _filter_tests(tests, args.filter) # Get tests for this shard (if using sharding args) tests = _shard_tests(tests, args.shard_count, args.shard_index) num_tests = len(tests) if num_tests == 0: logging.error('No tests to run.') return EXIT_FAILURE logging.info('Running %d test%s' % (num_tests, 's' if num_tests > 1 else ' ')) # Run tests results = Results() histograms = histogram_set.HistogramSet() total_errors = 0 for test_index in range(num_tests): test = tests[test_index] cmd = [ get_binary_name(args.test_suite), '--gtest_filter=%s' % test, '--extract-test-list-from-filter', '--enable-device-cache', '--skip-clear-data', '--use-existing-test-data', '--verbose', '--calibration-time', str(args.calibration_time), ] if args.steps_per_trial: steps_per_trial = args.steps_per_trial else: cmd_calibrate = cmd + [ '--calibration', '--warmup-loops', str(args.warmup_loops), ] exit_code, calibrate_output = _run_and_get_output(args, cmd_calibrate, env) if exit_code != EXIT_SUCCESS: logging.fatal('%s failed. Output:\n%s' % (cmd_calibrate[0], '\n'.join(calibrate_output))) total_errors += 1 results.result_fail(test) continue steps_per_trial = _get_results_from_output(calibrate_output, 'steps_to_run') if not steps_per_trial: logging.warning('Skipping test %s' % test) continue assert (len(steps_per_trial) == 1) steps_per_trial = int(steps_per_trial[0]) logging.info('Test %d/%d: %s (samples=%d trials_per_sample=%d steps_per_trial=%d)' % (test_index + 1, num_tests, test, args.samples_per_test, args.trials_per_sample, steps_per_trial)) wall_times = [] test_histogram_set = histogram_set.HistogramSet() for sample in range(args.samples_per_test): if total_errors >= args.max_errors: logging.error('Error count exceeded max errors (%d). Aborting.' % args.max_errors) return EXIT_FAILURE cmd_run = cmd + [ '--steps-per-trial', str(steps_per_trial), '--trials', str(args.trials_per_sample), ] if args.smoke_test_mode: cmd_run += ['--no-warmup'] else: cmd_run += ['--warmup-loops', str(args.warmup_loops)] with common.temporary_file() as histogram_file_path: cmd_run += ['--isolated-script-test-perf-output=%s' % histogram_file_path] exit_code, output = _run_and_get_output(args, cmd_run, env) if exit_code != EXIT_SUCCESS: logging.error('%s failed. Output:\n%s' % (cmd_run[0], '\n'.join(output))) results.result_fail(test) total_errors += 1 break sample_wall_times = _get_results_from_output(output, 'wall_time') if not sample_wall_times: # This can be intentional for skipped tests. They are handled below. logging.warning('Test %s failed to produce a sample output' % test) break logging.info('Test %d/%d Sample %d/%d wall_times: %s' % (test_index + 1, num_tests, sample + 1, args.samples_per_test, str(sample_wall_times))) wall_times += sample_wall_times with open(histogram_file_path) as histogram_file: sample_json = json.load(histogram_file) sample_histogram = histogram_set.HistogramSet() sample_histogram.ImportDicts(sample_json) test_histogram_set.Merge(sample_histogram) if not results.has_result(test): if not wall_times: logging.warning('Skipping test %s. Assuming this is intentional.' % test) results.result_skip(test) elif len(wall_times) == (args.samples_per_test * args.trials_per_sample): if len(wall_times) > 7: truncation_n = len(wall_times) >> 3 logging.debug( 'Truncation: Removing the %d highest and lowest times from wall_times.' % truncation_n) wall_times = _truncated_list(wall_times, truncation_n) if len(wall_times) > 1: logging.info('Test %d/%d: %s: truncated mean wall_time = %.2f, cov = %.2f%%' % (test_index + 1, num_tests, test, _mean(wall_times), (_coefficient_of_variation(wall_times) * 100.0))) results.result_pass(test) # Merge the histogram set into one histogram with common.temporary_file() as merge_histogram_path: logging.info('Writing merged histograms to %s.' % merge_histogram_path) with open(merge_histogram_path, 'w') as merge_histogram_file: json.dump(test_histogram_set.AsDicts(), merge_histogram_file) merge_histogram_file.close() merged_dicts = merge_histograms.MergeHistograms( merge_histogram_path, groupby=['name']) merged_histogram = histogram_set.HistogramSet() merged_histogram.ImportDicts(merged_dicts) histograms.Merge(merged_histogram) else: logging.error('Test %s failed to record some samples' % test) if args.isolated_script_test_output: results.save_to_output_file(args.test_suite, args.isolated_script_test_output) # Uses special output files to match the merge script. _save_extra_output_files(args, results, histograms) if args.isolated_script_test_perf_output: with open(args.isolated_script_test_perf_output, 'w') as out_file: out_file.write(json.dumps(histograms.AsDicts(), indent=2)) end_time = time.time() logging.info('Elapsed time: %.2lf seconds.' % (end_time - start_time)) return EXIT_SUCCESS
def main(): parser = argparse.ArgumentParser() parser.add_argument('--isolated-script-test-output', type=str) parser.add_argument('--isolated-script-test-perf-output', type=str) parser.add_argument('-f', '--filter', '--isolated-script-test-filter', type=str, help='Test filter.') parser.add_argument('--test-suite', help='Test suite to run.', default=DEFAULT_TEST_SUITE) parser.add_argument('--xvfb', help='Use xvfb.', action='store_true') parser.add_argument( '--shard-count', help='Number of shards for test splitting. Default is 1.', type=int, default=1) parser.add_argument( '--shard-index', help='Index of the current shard for test splitting. Default is 0.', type=int, default=0) parser.add_argument('-l', '--log', help='Log output level. Default is %s.' % DEFAULT_LOG, default=DEFAULT_LOG) parser.add_argument( '-s', '--samples-per-test', help='Number of samples to run per test. Default is %d.' % DEFAULT_SAMPLES, type=int, default=DEFAULT_SAMPLES) parser.add_argument( '-t', '--trials-per-sample', help='Number of trials to run per sample. Default is %d.' % DEFAULT_TRIALS, type=int, default=DEFAULT_TRIALS) parser.add_argument('--steps-per-trial', help='Fixed number of steps to run per trial.', type=int) parser.add_argument( '--max-errors', help='After this many errors, abort the run. Default is %d.' % DEFAULT_MAX_ERRORS, type=int, default=DEFAULT_MAX_ERRORS) parser.add_argument('--smoke-test-mode', help='Do a quick run to validate correctness.', action='store_true') parser.add_argument( '--warmup-loops', help='Number of warmup loops to run in the perf test. Default is %d.' % DEFAULT_WARMUP_LOOPS, type=int, default=DEFAULT_WARMUP_LOOPS) parser.add_argument( '--calibration-time', help= 'Amount of time to spend each loop in calibration and warmup. Default is %d seconds.' % DEFAULT_CALIBRATION_TIME, type=int, default=DEFAULT_CALIBRATION_TIME) args, extra_flags = parser.parse_known_args() logging.basicConfig(level=args.log.upper(), stream=sys.stdout) start_time = time.time() # Use fast execution for smoke test mode. if args.smoke_test_mode: args.steps_per_trial = 1 args.trials_per_sample = 1 args.samples_per_test = 1 env = os.environ.copy() # Get sharding args if 'GTEST_TOTAL_SHARDS' in env and int(env['GTEST_TOTAL_SHARDS']) != 1: if 'GTEST_SHARD_INDEX' not in env: logging.error('Sharding params must be specified together.') sys.exit(1) args.shard_count = int(env.pop('GTEST_TOTAL_SHARDS')) args.shard_index = int(env.pop('GTEST_SHARD_INDEX')) # Get test list cmd = [get_binary_name(args.test_suite), '--list-tests', '--verbose'] lines = _run_and_get_output(args, cmd, env) if not lines: raise Exception('Could not find test list from test output.') tests = _get_tests_from_output(lines) if args.filter: tests = _filter_tests(tests, args.filter) # Get tests for this shard (if using sharding args) tests = _shard_tests(tests, args.shard_count, args.shard_index) # Run tests results = { 'tests': {}, 'interrupted': False, 'seconds_since_epoch': time.time(), 'path_delimiter': '.', 'version': 3, 'num_failures_by_type': { FAIL: 0, PASS: 0, SKIP: 0, }, } test_results = {} histograms = histogram_set.HistogramSet() total_errors = 0 for test in tests: cmd = [ get_binary_name(args.test_suite), '--gtest_filter=%s' % test, '--extract-test-list-from-filter', '--enable-device-cache', '--skip-clear-data', '--use-existing-test-data', '--verbose', '--calibration-time', str(args.calibration_time), ] if args.steps_per_trial: steps_per_trial = args.steps_per_trial else: cmd_calibrate = cmd + [ '--calibration', '--warmup-loops', str(args.warmup_loops), ] calibrate_output = _run_and_get_output(args, cmd_calibrate, env) if not calibrate_output: logging.error('Failed to get calibration output') test_results[test] = { 'expected': PASS, 'actual': FAIL, 'is_unexpected': True } results['num_failures_by_type'][FAIL] += 1 total_errors += 1 continue steps_per_trial = _get_results_from_output(calibrate_output, 'steps_to_run') if not steps_per_trial: logging.warning('Skipping test %s' % test) continue assert (len(steps_per_trial) == 1) steps_per_trial = int(steps_per_trial[0]) logging.info( 'Running %s %d times with %d trials and %d steps per trial.' % (test, args.samples_per_test, args.trials_per_sample, steps_per_trial)) wall_times = [] test_histogram_set = histogram_set.HistogramSet() for sample in range(args.samples_per_test): if total_errors >= args.max_errors: logging.error( 'Error count exceeded max errors (%d). Aborting.' % args.max_errors) return 1 cmd_run = cmd + [ '--steps-per-trial', str(steps_per_trial), '--trials', str(args.trials_per_sample), ] if args.smoke_test_mode: cmd_run += ['--no-warmup'] else: cmd_run += ['--warmup-loops', str(args.warmup_loops)] with common.temporary_file() as histogram_file_path: cmd_run += [ '--isolated-script-test-perf-output=%s' % histogram_file_path ] output = _run_and_get_output(args, cmd_run, env) if output: sample_wall_times = _get_results_from_output( output, 'wall_time') if not sample_wall_times: logging.warning( 'Test %s failed to produce a sample output' % test) break logging.info('Sample %d wall_time results: %s' % (sample, str(sample_wall_times))) wall_times += sample_wall_times with open(histogram_file_path) as histogram_file: sample_json = json.load(histogram_file) sample_histogram = histogram_set.HistogramSet() sample_histogram.ImportDicts(sample_json) test_histogram_set.Merge(sample_histogram) else: logging.error('Failed to get sample for test %s' % test) total_errors += 1 if not wall_times: logging.warning('Skipping test %s. Assuming this is intentional.' % test) test_results[test] = {'expected': SKIP, 'actual': SKIP} results['num_failures_by_type'][SKIP] += 1 elif len(wall_times) == (args.samples_per_test * args.trials_per_sample): if len(wall_times) > 7: truncation_n = len(wall_times) >> 3 logging.info( 'Truncation: Removing the %d highest and lowest times from wall_times.' % truncation_n) wall_times = _truncated_list(wall_times, truncation_n) if len(wall_times) > 1: logging.info( 'Mean wall_time for %s is %.2f, with coefficient of variation %.2f%%' % (test, _mean(wall_times), (_coefficient_of_variation(wall_times) * 100.0))) test_results[test] = {'expected': PASS, 'actual': PASS} results['num_failures_by_type'][PASS] += 1 # Merge the histogram set into one histogram with common.temporary_file() as merge_histogram_path: logging.info('Writing merged histograms to %s.' % merge_histogram_path) with open(merge_histogram_path, 'w') as merge_histogram_file: json.dump(test_histogram_set.AsDicts(), merge_histogram_file) merge_histogram_file.close() merged_dicts = merge_histograms.MergeHistograms( merge_histogram_path, groupby=['name']) merged_histogram = histogram_set.HistogramSet() merged_histogram.ImportDicts(merged_dicts) histograms.Merge(merged_histogram) else: logging.error('Test %s failed to record some samples' % test) test_results[test] = { 'expected': PASS, 'actual': FAIL, 'is_unexpected': True } results['num_failures_by_type'][FAIL] += 1 if test_results: results['tests'][args.test_suite] = test_results if args.isolated_script_test_output: with open(args.isolated_script_test_output, 'w') as out_file: out_file.write(json.dumps(results, indent=2)) # Uses special output files to match the merge script. _save_extra_output_files(args, results, histograms) if args.isolated_script_test_perf_output: with open(args.isolated_script_test_perf_output, 'w') as out_file: out_file.write(json.dumps(histograms.AsDicts(), indent=2)) end_time = time.time() logging.info('Elapsed time: %.2lf seconds.' % (end_time - start_time)) return 0