def run(self, round_number): """Run the testcase once.""" app_directory = environment.get_value('APP_DIR') warmup_timeout = environment.get_value('WARMUP_TIMEOUT') run_timeout = warmup_timeout if round_number == 1 else self._test_timeout if self._is_black_box: return_code, crash_time, output = process_handler.run_process( self._command, timeout=run_timeout, gestures=self._gestures, current_working_directory=app_directory) else: result = engine_reproduce(self._engine_impl, self._fuzz_target.binary, self._testcase_path, self._arguments, run_timeout) return_code = result.return_code crash_time = result.time_executed log_header = engine_common.get_log_header( result.command, environment.get_value('BOT_NAME'), result.time_executed) output = log_header + '\n' + result.output process_handler.terminate_stale_application_instances() crash_result = CrashResult(return_code, crash_time, output) if not crash_result.is_crash(): logs.log('No crash occurred (round {round_number}).'.format( round_number=round_number), output=output) return crash_result
def check_for_bad_build(job_type, crash_revision): """Return true if the build is bad, i.e. crashes on startup.""" # Check the bad build check flag to see if we want do this. if not environment.get_value('BAD_BUILD_CHECK'): return False # Create a blank command line with no file to run and no http. command = get_command_line_for_application(file_to_run='', needs_http=False) # When checking for bad builds, we use the default window size. # We don't want to pick a custom size since it can potentially cause a # startup crash and cause a build to be detected incorrectly as bad. default_window_argument = environment.get_value('WINDOW_ARG', '') if default_window_argument: command = command.replace(' %s' % default_window_argument, '') # TSAN is slow, and boots slow on first startup. Increase the warmup # timeout for this case. if environment.tool_matches('TSAN', job_type): fast_warmup_timeout = environment.get_value('WARMUP_TIMEOUT') else: fast_warmup_timeout = environment.get_value('FAST_WARMUP_TIMEOUT') # Initialize helper variables. is_bad_build = False build_run_console_output = '' app_directory = environment.get_value('APP_DIR') # Exit all running instances. process_handler.terminate_stale_application_instances() # Check if the build is bad. return_code, crash_time, output = process_handler.run_process( command, timeout=fast_warmup_timeout, current_working_directory=app_directory) crash_result = CrashResult(return_code, crash_time, output) # 1. Need to account for startup crashes with no crash state. E.g. failed to # load shared library. So, ignore state for comparison. # 2. Ignore leaks as they don't block a build from reporting regular crashes # and also don't impact regression range calculations. if (crash_result.is_crash(ignore_state=True) and not crash_result.should_ignore() and not crash_result.get_type() in ['Direct-leak', 'Indirect-leak']): is_bad_build = True build_run_console_output = utils.get_crash_stacktrace_output( command, crash_result.get_stacktrace(symbolized=True), crash_result.get_stacktrace(symbolized=False)) logs.log('Bad build for %s detected at r%d.' % (job_type, crash_revision), output=build_run_console_output) # Exit all running instances. process_handler.terminate_stale_application_instances() # Any of the conditions below indicate that bot is in a bad state and it is # not caused by the build itself. In that case, just exit. build_state = data_handler.get_build_state(job_type, crash_revision) if is_bad_build and utils.sub_string_exists_in(BAD_STATE_HINTS, output): logs.log_fatal_and_exit( 'Bad bot environment detected, exiting.', output=build_run_console_output, snapshot=process_handler.get_runtime_snapshot()) # If none of the other bots have added information about this build, # then add it now. if (build_state == data_types.BuildState.UNMARKED and not crash_result.should_ignore()): data_handler.add_build_metadata(job_type, crash_revision, is_bad_build, build_run_console_output) return is_bad_build
def run_testcase_and_return_result_in_queue(crash_queue, thread_index, file_path, gestures, env_copy, upload_output=False): """Run a single testcase and return crash results in the crash queue.""" # Since this is running in its own process, initialize the log handler again. # This is needed for Windows where instances are not shared across child # processes. See: # https://stackoverflow.com/questions/34724643/python-logging-with-multiprocessing-root-logger-different-in-windows logs.configure('run_testcase', { 'testcase_path': file_path, }) try: # Run testcase and check whether a crash occurred or not. return_code, crash_time, output = run_testcase(thread_index, file_path, gestures, env_copy) # Pull testcase directory to host to get any stats files. if environment.is_trusted_host(): from bot.untrusted_runner import file_host file_host.pull_testcases_from_worker() # Analyze the crash. crash_output = _get_crash_output(output) crash_result = CrashResult(return_code, crash_time, crash_output) # To provide consistency between stats and logs, we use timestamp taken # from stats when uploading logs and testcase. if upload_output: log_time = _get_testcase_time(file_path) if crash_result.is_crash(): # Initialize resource list with the testcase path. resource_list = [file_path] resource_list += get_resource_paths(crash_output) # Store the crash stack file in the crash stacktrace directory # with filename as the hash of the testcase path. crash_stacks_directory = environment.get_value( 'CRASH_STACKTRACES_DIR') stack_file_path = os.path.join(crash_stacks_directory, utils.string_hash(file_path)) utils.write_data_to_file(crash_output, stack_file_path) # Put crash/no-crash results in the crash queue. crash_queue.put( Crash(file_path=file_path, crash_time=crash_time, return_code=return_code, resource_list=resource_list, gestures=gestures, stack_file_path=stack_file_path)) # Don't upload uninteresting testcases (no crash) or if there is no log to # correlate it with (not upload_output). if upload_output: upload_testcase(file_path, log_time) if upload_output: # Include full output for uploaded logs (crash output, merge output, etc). crash_result_full = CrashResult(return_code, crash_time, output) log = prepare_log_for_upload(crash_result_full.get_stacktrace(), return_code) upload_log(log, log_time) except Exception: logs.log_error('Exception occurred while running ' 'run_testcase_and_return_result_in_queue.')
def execute_task(testcase_id, job_type): """Execute a symbolize command.""" # Locate the testcase associated with the id. testcase = data_handler.get_testcase_by_id(testcase_id) # We should atleast have a symbolized debug or release build. if not build_manager.has_symbolized_builds(): return data_handler.update_testcase_comment(testcase, data_types.TaskState.STARTED) # Setup testcase and its dependencies. file_list, _, testcase_file_path = setup.setup_testcase(testcase, job_type) if not file_list: return # Initialize variables. build_fail_wait = environment.get_value("FAIL_WAIT") old_crash_stacktrace = data_handler.get_stacktrace(testcase) sym_crash_type = testcase.crash_type sym_crash_address = testcase.crash_address sym_crash_state = testcase.crash_state sym_redzone = DEFAULT_REDZONE warmup_timeout = environment.get_value("WARMUP_TIMEOUT") # Decide which build revision to use. if testcase.crash_stacktrace == "Pending": # This usually happen when someone clicked the 'Update stacktrace from # trunk' button on the testcase details page. In this case, we are forced # to use trunk. No revision -> trunk build. build_revision = None else: build_revision = testcase.crash_revision # Set up a custom or regular build based on revision. build_manager.setup_build(build_revision) # Get crash revision used in setting up build. crash_revision = environment.get_value("APP_REVISION") if not build_manager.check_app_path(): testcase = data_handler.get_testcase_by_id(testcase_id) data_handler.update_testcase_comment(testcase, data_types.TaskState.ERROR, "Build setup failed") tasks.add_task( "symbolize", testcase_id, job_type, wait_time=build_fail_wait) return # ASAN tool settings (if the tool is used). # See if we can get better stacks with higher redzone sizes. # A UAF might actually turn out to be OOB read/write with a bigger redzone. if environment.tool_matches("ASAN", job_type) and testcase.security_flag: redzone = MAX_REDZONE while redzone >= MIN_REDZONE: environment.reset_current_memory_tool_options( redzone_size=testcase.redzone, disable_ubsan=testcase.disable_ubsan) process_handler.terminate_stale_application_instances() command = testcase_manager.get_command_line_for_application( testcase_file_path, needs_http=testcase.http_flag) return_code, crash_time, output = process_handler.run_process( command, timeout=warmup_timeout, gestures=testcase.gestures) crash_result = CrashResult(return_code, crash_time, output) if crash_result.is_crash() and "AddressSanitizer" in output: state = crash_result.get_symbolized_data() security_flag = crash_result.is_security_issue() if (not crash_analyzer.ignore_stacktrace(state.crash_stacktrace) and security_flag == testcase.security_flag and state.crash_type == testcase.crash_type and (state.crash_type != sym_crash_type or state.crash_state != sym_crash_state)): logs.log("Changing crash parameters.\nOld : %s, %s, %s" % (sym_crash_type, sym_crash_address, sym_crash_state)) sym_crash_type = state.crash_type sym_crash_address = state.crash_address sym_crash_state = state.crash_state sym_redzone = redzone old_crash_stacktrace = state.crash_stacktrace logs.log("\nNew : %s, %s, %s" % (sym_crash_type, sym_crash_address, sym_crash_state)) break redzone /= 2 # We should have atleast a symbolized debug or a release build. symbolized_builds = build_manager.setup_symbolized_builds(crash_revision) if not symbolized_builds or ( not build_manager.check_app_path() and not build_manager.check_app_path("APP_PATH_DEBUG")): testcase = data_handler.get_testcase_by_id(testcase_id) data_handler.update_testcase_comment(testcase, data_types.TaskState.ERROR, "Build setup failed") tasks.add_task( "symbolize", testcase_id, job_type, wait_time=build_fail_wait) return # Increase malloc_context_size to get all stack frames. Default is 30. environment.reset_current_memory_tool_options( redzone_size=sym_redzone, malloc_context_size=STACK_FRAME_COUNT, symbolize_inline_frames=True, disable_ubsan=testcase.disable_ubsan, ) # TSAN tool settings (if the tool is used). if environment.tool_matches("TSAN", job_type): environment.set_tsan_max_history_size() # Do the symbolization if supported by this application. result, sym_crash_stacktrace = get_symbolized_stacktraces( testcase_file_path, testcase, old_crash_stacktrace, sym_crash_state) # Update crash parameters. testcase = data_handler.get_testcase_by_id(testcase_id) testcase.crash_type = sym_crash_type testcase.crash_address = sym_crash_address testcase.crash_state = sym_crash_state testcase.crash_stacktrace = data_handler.filter_stacktrace( sym_crash_stacktrace) if not result: data_handler.update_testcase_comment( testcase, data_types.TaskState.ERROR, "Unable to reproduce crash, skipping " "stacktrace update", ) else: # Switch build url to use the less-optimized symbolized build with better # stacktrace. build_url = environment.get_value("BUILD_URL") if build_url: testcase.set_metadata("build_url", build_url, update_testcase=False) data_handler.update_testcase_comment(testcase, data_types.TaskState.FINISHED) testcase.symbolized = True testcase.crash_revision = crash_revision testcase.put() # We might have updated the crash state. See if we need to marked as duplicate # based on other testcases. data_handler.handle_duplicate_entry(testcase) task_creation.create_blame_task_if_needed(testcase) # Switch current directory before builds cleanup. root_directory = environment.get_value("ROOT_DIR") os.chdir(root_directory) # Cleanup symbolized builds which are space-heavy. symbolized_builds.delete()
def get_symbolized_stacktraces(testcase_file_path, testcase, old_crash_stacktrace, expected_state): """Use the symbolized builds to generate an updated stacktrace.""" # Initialize variables. app_path = environment.get_value("APP_PATH") app_path_debug = environment.get_value("APP_PATH_DEBUG") long_test_timeout = environment.get_value("WARMUP_TIMEOUT") retry_limit = environment.get_value("FAIL_RETRIES") symbolized = False debug_build_stacktrace = "" release_build_stacktrace = old_crash_stacktrace # Symbolize using the debug build first so that the debug build stacktrace # comes after the more important release build stacktrace. if app_path_debug: for _ in range(retry_limit): process_handler.terminate_stale_application_instances() command = testcase_manager.get_command_line_for_application( testcase_file_path, app_path=app_path_debug, needs_http=testcase.http_flag, ) return_code, crash_time, output = process_handler.run_process( command, timeout=long_test_timeout, gestures=testcase.gestures) crash_result = CrashResult(return_code, crash_time, output) if crash_result.is_crash(): state = crash_result.get_symbolized_data() if crash_analyzer.ignore_stacktrace(state.crash_stacktrace): continue unsymbolized_crash_stacktrace = crash_result.get_stacktrace( symbolized=False) debug_build_stacktrace = utils.get_crash_stacktrace_output( command, state.crash_stacktrace, unsymbolized_crash_stacktrace, build_type="debug", ) symbolized = True break # Symbolize using the release build. if app_path: for _ in range(retry_limit): process_handler.terminate_stale_application_instances() command = testcase_manager.get_command_line_for_application( testcase_file_path, app_path=app_path, needs_http=testcase.http_flag) return_code, crash_time, output = process_handler.run_process( command, timeout=long_test_timeout, gestures=testcase.gestures) crash_result = CrashResult(return_code, crash_time, output) if crash_result.is_crash(): state = crash_result.get_symbolized_data() if crash_analyzer.ignore_stacktrace(state.crash_stacktrace): continue if state.crash_state != expected_state: continue # Release stack's security flag has to match the symbolized release # stack's security flag. security_flag = crash_result.is_security_issue() if security_flag != testcase.security_flag: continue unsymbolized_crash_stacktrace = crash_result.get_stacktrace( symbolized=False) release_build_stacktrace = utils.get_crash_stacktrace_output( command, state.crash_stacktrace, unsymbolized_crash_stacktrace, build_type="release", ) symbolized = True break stacktrace = release_build_stacktrace if debug_build_stacktrace: stacktrace += "\n\n" + debug_build_stacktrace return symbolized, stacktrace
def test_for_reproducibility(testcase_path, expected_state, expected_security_flag, test_timeout, http_flag, gestures): """Test to see if a crash is fully reproducible or is a one-time crasher.""" # Cleanup any existing application instances and user profile directories. # Cleaning up temp clears user profile directories and should be done before # calling |get_command_line_for_application| call since that creates # dependencies in the profile folder. process_handler.terminate_stale_application_instances() shell.clear_temp_directory() app_directory = environment.get_value('APP_DIR') command = get_command_line_for_application(testcase_path, needs_http=http_flag) crash_count = 0 crash_retries = environment.get_value('CRASH_RETRIES') reproducible_crash_target_count = crash_retries * REPRODUCIBILITY_FACTOR warmup_timeout = environment.get_value('WARMUP_TIMEOUT') logs.log('Testing for crash (command="%s").' % command) round_number = 0 for round_number in xrange(1, crash_retries + 1): # Bail out early if there is no hope of finding a reproducible crash. if (crash_retries - round_number + crash_count + 1 < reproducible_crash_target_count): break run_timeout = warmup_timeout if round_number == 1 else test_timeout return_code, crash_time, output = process_handler.run_process( command, timeout=run_timeout, gestures=gestures, current_working_directory=app_directory) process_handler.terminate_stale_application_instances() crash_result = CrashResult(return_code, crash_time, output) if not crash_result.is_crash(): continue state = crash_result.get_symbolized_data() crash_state = state.crash_state security_flag = crash_result.is_security_issue() # If we don't have an expected crash state, set it to the one from initial # crash. if not expected_state: expected_state = crash_state if security_flag != expected_security_flag: logs.log('Detected a crash without the correct security flag.') continue crash_comparer = CrashComparer(crash_state, expected_state) if not crash_comparer.is_similar(): logs.log('Detected a crash with an unrelated state: ' 'Expected(%s), Found(%s).' % (expected_state, crash_state)) continue crash_count += 1 if crash_count >= reproducible_crash_target_count: logs.log('Crash is reproducible.') return True logs.log('Crash is not reproducible. Crash count: %d/%d.' % (crash_count, round_number)) return False
def test_for_crash_with_retries(testcase, testcase_path, test_timeout, http_flag=False, compare_crash=True): """Test for a crash and return crash parameters like crash type, crash state, crash stacktrace, etc.""" # Cleanup any existing application instances and user profile directories. # Cleaning up temp clears user profile directories and should be done before # calling |get_command_line_for_application| call since that creates # dependencies in the profile folder. process_handler.terminate_stale_application_instances() shell.clear_temp_directory() app_directory = environment.get_value('APP_DIR') command = get_command_line_for_application(testcase_path, needs_http=http_flag) crash_retries = environment.get_value('CRASH_RETRIES') flaky_stacktrace = testcase.flaky_stack warmup_timeout = environment.get_value('WARMUP_TIMEOUT') logs.log('Testing for crash (command="%s").' % command) for round_number in xrange(1, crash_retries + 1): run_timeout = warmup_timeout if round_number == 1 else test_timeout return_code, crash_time, output = process_handler.run_process( command, timeout=run_timeout, gestures=testcase.gestures, current_working_directory=app_directory) process_handler.terminate_stale_application_instances() crash_result = CrashResult(return_code, crash_time, output) if not crash_result.is_crash(): continue state = crash_result.get_symbolized_data() logs.log('Crash occurred in %d seconds (round %d). State:\n%s' % (crash_time, round_number, state.crash_state)) if not compare_crash or not testcase.crash_state: logs.log('Crash stacktrace comparison skipped.') return crash_result if flaky_stacktrace: logs.log('Crash stacktrace is marked flaky, skipping comparison.') return crash_result if crash_result.should_ignore(): logs.log('Crash stacktrace matched ignore signatures, ignored.') continue if crash_result.is_security_issue() != testcase.security_flag: logs.log('Crash security flag does not match, ignored.') continue crash_comparer = CrashComparer(state.crash_state, testcase.crash_state) if crash_comparer.is_similar(): logs.log('Crash stacktrace is similar to original stacktrace.') return crash_result else: logs.log('Crash stacktrace does not match original stacktrace.') logs.log("Didn't crash at all.") crash_result = CrashResult(return_code=0, crash_time=0, output=output) return crash_result
parser.add_argument("--outputfile", type=str, help="crashing program output", required=True) parser.add_argument("--exitcode", type=int, help="crashing program exit code", required=True) parser.add_argument("--time", type=int, help="crash collection time, Unix timestamp", default=0) args = parser.parse_args() output = open(args.outputfile).read() cr = CrashResult(args.exitcode, args.time, output) result = { 'type': cr.get_type(), 'is_crash': cr.is_crash(), 'is_security_issue': cr.is_security_issue(), 'should_ignore': cr.should_ignore(), 'stacktrace': cr.get_stacktrace(), 'output': cr.output, 'return_code': cr.return_code, } print(json.dumps(result, indent=4))