def run_comparisons(suppress, execution_configs, test_case, timeout, verbose=True, ignore_crashes=True, source_key=None): """Runs different configurations and bails out on output difference. Args: suppress: The helper object for textual suppressions. execution_configs: Two or more configurations to run. The first one will be used as baseline to compare all others to. test_case: The test case to run. timeout: Timeout in seconds for one run. verbose: Prints the executed commands. ignore_crashes: Typically we ignore crashes during fuzzing as they are frequent. However, when running sanity checks we should not crash and immediately flag crashes as a failure. source_key: A fixed source key. If not given, it will be inferred from the output. """ run_test_case = lambda config: config.command.run( test_case, timeout=timeout, verbose=verbose) # Run the baseline configuration. baseline_config = execution_configs[0] baseline_output = run_test_case(baseline_config) has_crashed = baseline_output.HasCrashed() # Iterate over the remaining configurations, run and compare. for comparison_config in execution_configs[1:]: comparison_output = run_test_case(comparison_config) has_crashed = has_crashed or comparison_output.HasCrashed() difference, source = suppress.diff(baseline_output, comparison_output) if difference: # Only bail out due to suppressed output if there was a difference. If a # suppression doesn't show up anymore in the statistics, we might want to # remove it. fail_bailout(baseline_output, suppress.ignore_by_output) fail_bailout(comparison_output, suppress.ignore_by_output) source_key = source_key or cluster_failures(source) raise FailException( format_difference(source_key, baseline_config, comparison_config, baseline_output, comparison_output, difference, source)) if has_crashed: if ignore_crashes: # Show if a crash has happened in one of the runs and no difference was # detected. This is only for the statistics during experiments. raise PassException('# V8 correctness - C-R-A-S-H') else: # Subsume unexpected crashes (e.g. during sanity checks) with one failure # state. raise FailException(FAILURE_HEADER_TEMPLATE % dict( configs='', source_key='', suppression='unexpected crash'))
def fail_bailout(output, ignore_by_output_fun): """Print failure state and return if ignore_by_output_fun matches output.""" bug = (ignore_by_output_fun(output.stdout) or '').strip() if bug: raise FailException(FAILURE_HEADER_TEMPLATE % dict(configs='', source_key='', suppression=bug))
def run_comparisons(suppress, execution_configs, test_case, timeout, verbose=True, ignore_crashes=True, source_key=None): """Runs different configurations and bails out on output difference. Args: suppress: The helper object for textual suppressions. execution_configs: Two or more configurations to run. The first one will be used as baseline to compare all others to. test_case: The test case to run. timeout: Timeout in seconds for one run. verbose: Prints the executed commands. ignore_crashes: Typically we ignore crashes during fuzzing as they are frequent. However, when running smoke tests we should not crash and immediately flag crashes as a failure. source_key: A fixed source key. If not given, it will be inferred from the output. """ runner = RepeatedRuns(test_case, timeout, verbose) # Run the baseline configuration. baseline_config = execution_configs[0] baseline_output = runner.run(baseline_config) # Iterate over the remaining configurations, run and compare. for comparison_config in execution_configs[1:]: comparison_output = runner.run(comparison_config) difference, source = suppress.diff(baseline_output, comparison_output) if difference: # Only bail out due to suppressed output if there was a difference. If a # suppression doesn't show up anymore in the statistics, we might want to # remove it. fail_bailout(baseline_output, suppress.ignore_by_output) fail_bailout(comparison_output, suppress.ignore_by_output) # Check if a difference also occurs with the fallback configuration and # give it precedence. E.g. we always prefer x64 differences. if comparison_config.fallback: fallback_output = runner.run(comparison_config.fallback) fallback_difference, fallback_source = suppress.diff( baseline_output, fallback_output) if fallback_difference: fail_bailout(fallback_output, suppress.ignore_by_output) source = fallback_source comparison_config = comparison_config.fallback comparison_output = fallback_output difference = fallback_difference raise FailException( format_difference(baseline_config, comparison_config, baseline_output, comparison_output, difference, source_key, source)) if runner.has_crashed: if ignore_crashes: # Show if a crash has happened in one of the runs and no difference was # detected. This is only for the statistics during experiments. raise PassException('# V8 correctness - C-R-A-S-H') else: # Subsume simulated and unexpected crashes (e.g. during smoke tests) # with one failure state. raise FailException(FAILURE_HEADER_TEMPLATE % dict( configs='', source_key='', suppression=runner.crash_state))