def _pre_run_cleanup(self): """Common cleanup before running a testcase.""" # Cleanup any existing application instances and user profile directories. # Cleaning up temp user profile directories. Should be done before calling # |get_command_line_for_application| call since that creates dependencies in # the profile folder. process_handler.terminate_stale_application_instances() shell.clear_temp_directory()
def beat(previous_state, log_filename): """Run a cycle of heartbeat checks to ensure bot is running.""" # Handle case when run_bot.py script is stuck. If yes, kill its process. task_end_time = tasks.get_task_end_time() if psutil and task_end_time and dates.time_has_expired( task_end_time, seconds=tasks.TASK_COMPLETION_BUFFER): # Get absolute path to |run_bot| script. We use this to identify unique # instances of bot running on a particular host. startup_scripts_directory = environment.get_startup_scripts_directory() bot_file_path = os.path.join(startup_scripts_directory, 'run_bot') for process in psutil.process_iter(): try: command_line = ' '.join(process.cmdline()) except (psutil.AccessDenied, psutil.NoSuchProcess, OSError): continue # Find the process running the main bot script. if bot_file_path not in command_line: continue process_id = process.pid logs.log('Killing stale bot (pid %d) which seems to have stuck.' % process_id) try: process_handler.terminate_root_and_child_processes(process_id) except Exception: logs.log_error('Failed to terminate stale bot processes.') # Minor cleanup to avoid disk space issues on bot restart. process_handler.terminate_stale_application_instances() shell.clear_temp_directory() shell.clear_testcase_directories() # Concerned stale processes should be killed. Now, delete the stale task. tasks.track_task_end() # Figure out when the log file was last modified. try: current_state = str(os.path.getmtime(log_filename)) except Exception: current_state = None # Only update the heartbeat if the log file was modified. if current_state and current_state != previous_state: # Try updating the heartbeat. If an error occurs, just # wait and return None. if not data_handler.update_heartbeat(): return None # Heartbeat is successfully updated. return current_state
def run(self, round_number: int) -> CrashResult: """Run the testcase once.""" app_directory = environment.get_value('APP_DIR') warmup_timeout = environment.get_value('WARMUP_TIMEOUT') run_timeout = warmup_timeout if round_number == 1 else self._test_timeout if self._is_black_box: return_code, crash_time, output = process_handler.run_process( self._command, timeout=run_timeout, gestures=self._gestures, current_working_directory=app_directory) else: try: result = engine_reproduce(self._engine_impl, self._fuzz_target.binary, self._testcase_path, self._arguments, run_timeout) except TimeoutError: # Treat reproduction timeouts as not crashing. return CrashResult(0, run_timeout, '') return_code = result.return_code crash_time = result.time_executed log_header = engine_common.get_log_header(result.command, result.time_executed) output = log_header + '\n' + result.output process_handler.terminate_stale_application_instances() crash_result = CrashResult(return_code, crash_time, output) if not crash_result.is_crash(): logs.log( f'No crash occurred (round {round_number}).', output=output, ) return crash_result
def check_for_bad_build(job_type, crash_revision): """Return true if the build is bad, i.e. crashes on startup.""" # Check the bad build check flag to see if we want do this. if not environment.get_value('BAD_BUILD_CHECK'): return False # Create a blank command line with no file to run and no http. command = get_command_line_for_application(file_to_run='', needs_http=False) # When checking for bad builds, we use the default window size. # We don't want to pick a custom size since it can potentially cause a # startup crash and cause a build to be detected incorrectly as bad. default_window_argument = environment.get_value('WINDOW_ARG', '') if default_window_argument: command = command.replace(' %s' % default_window_argument, '') # TSAN is slow, and boots slow on first startup. Increase the warmup # timeout for this case. if environment.tool_matches('TSAN', job_type): fast_warmup_timeout = environment.get_value('WARMUP_TIMEOUT') else: fast_warmup_timeout = environment.get_value('FAST_WARMUP_TIMEOUT') # Initialize helper variables. is_bad_build = False build_run_console_output = '' app_directory = environment.get_value('APP_DIR') # Exit all running instances. process_handler.terminate_stale_application_instances() # Check if the build is bad. return_code, crash_time, output = process_handler.run_process( command, timeout=fast_warmup_timeout, current_working_directory=app_directory) crash_result = CrashResult(return_code, crash_time, output) # 1. Need to account for startup crashes with no crash state. E.g. failed to # load shared library. So, ignore state for comparison. # 2. Ignore leaks as they don't block a build from reporting regular crashes # and also don't impact regression range calculations. if (crash_result.is_crash(ignore_state=True) and not crash_result.should_ignore() and not crash_result.get_type() in ['Direct-leak', 'Indirect-leak']): is_bad_build = True build_run_console_output = utils.get_crash_stacktrace_output( command, crash_result.get_stacktrace(symbolized=True), crash_result.get_stacktrace(symbolized=False)) logs.log('Bad build for %s detected at r%d.' % (job_type, crash_revision), output=build_run_console_output) # Exit all running instances. process_handler.terminate_stale_application_instances() # Any of the conditions below indicate that bot is in a bad state and it is # not caused by the build itself. In that case, just exit. build_state = data_handler.get_build_state(job_type, crash_revision) if is_bad_build and utils.sub_string_exists_in(BAD_STATE_HINTS, output): logs.log_fatal_and_exit( 'Bad bot environment detected, exiting.', output=build_run_console_output, snapshot=process_handler.get_runtime_snapshot()) # If none of the other bots have added information about this build, # then add it now. if (build_state == data_types.BuildState.UNMARKED and not crash_result.should_ignore()): data_handler.add_build_metadata(job_type, crash_revision, is_bad_build, build_run_console_output) return is_bad_build
def execute_task(testcase_id, job_type): """Execute a symbolize command.""" # Locate the testcase associated with the id. testcase = data_handler.get_testcase_by_id(testcase_id) # We should atleast have a symbolized debug or release build. if not build_manager.has_symbolized_builds(): return data_handler.update_testcase_comment(testcase, data_types.TaskState.STARTED) # Setup testcase and its dependencies. file_list, _, testcase_file_path = setup.setup_testcase(testcase, job_type) if not file_list: return # Initialize variables. build_fail_wait = environment.get_value('FAIL_WAIT') old_crash_stacktrace = data_handler.get_stacktrace(testcase) sym_crash_type = testcase.crash_type sym_crash_address = testcase.crash_address sym_crash_state = testcase.crash_state sym_redzone = DEFAULT_REDZONE warmup_timeout = environment.get_value('WARMUP_TIMEOUT') # Decide which build revision to use. if testcase.crash_stacktrace == 'Pending': # This usually happen when someone clicked the 'Update stacktrace from # trunk' button on the testcase details page. In this case, we are forced # to use trunk. No revision -> trunk build. build_revision = None else: build_revision = testcase.crash_revision # Set up a custom or regular build based on revision. build_manager.setup_build(build_revision) # Get crash revision used in setting up build. crash_revision = environment.get_value('APP_REVISION') if not build_manager.check_app_path(): testcase = data_handler.get_testcase_by_id(testcase_id) data_handler.update_testcase_comment(testcase, data_types.TaskState.ERROR, 'Build setup failed') tasks.add_task('symbolize', testcase_id, job_type, wait_time=build_fail_wait) return # ASAN tool settings (if the tool is used). # See if we can get better stacks with higher redzone sizes. # A UAF might actually turn out to be OOB read/write with a bigger redzone. if environment.tool_matches('ASAN', job_type) and testcase.security_flag: redzone = MAX_REDZONE while redzone >= MIN_REDZONE: environment.reset_current_memory_tool_options( redzone_size=testcase.redzone, disable_ubsan=testcase.disable_ubsan) process_handler.terminate_stale_application_instances() command = testcase_manager.get_command_line_for_application( testcase_file_path, needs_http=testcase.http_flag) return_code, crash_time, output = (process_handler.run_process( command, timeout=warmup_timeout, gestures=testcase.gestures)) crash_result = CrashResult(return_code, crash_time, output) if crash_result.is_crash() and 'AddressSanitizer' in output: state = crash_result.get_symbolized_data() security_flag = crash_result.is_security_issue() if (not crash_analyzer.ignore_stacktrace( state.crash_stacktrace) and security_flag == testcase.security_flag and state.crash_type == testcase.crash_type and (state.crash_type != sym_crash_type or state.crash_state != sym_crash_state)): logs.log( 'Changing crash parameters.\nOld : %s, %s, %s' % (sym_crash_type, sym_crash_address, sym_crash_state)) sym_crash_type = state.crash_type sym_crash_address = state.crash_address sym_crash_state = state.crash_state sym_redzone = redzone old_crash_stacktrace = state.crash_stacktrace logs.log( '\nNew : %s, %s, %s' % (sym_crash_type, sym_crash_address, sym_crash_state)) break redzone /= 2 # We should have atleast a symbolized debug or a release build. symbolized_builds = build_manager.setup_symbolized_builds(crash_revision) if (not symbolized_builds or (not build_manager.check_app_path() and not build_manager.check_app_path('APP_PATH_DEBUG'))): testcase = data_handler.get_testcase_by_id(testcase_id) data_handler.update_testcase_comment(testcase, data_types.TaskState.ERROR, 'Build setup failed') tasks.add_task('symbolize', testcase_id, job_type, wait_time=build_fail_wait) return # Increase malloc_context_size to get all stack frames. Default is 30. environment.reset_current_memory_tool_options( redzone_size=sym_redzone, malloc_context_size=STACK_FRAME_COUNT, symbolize_inline_frames=True, disable_ubsan=testcase.disable_ubsan) # TSAN tool settings (if the tool is used). if environment.tool_matches('TSAN', job_type): environment.set_tsan_max_history_size() # Do the symbolization if supported by this application. result, sym_crash_stacktrace = (get_symbolized_stacktraces( testcase_file_path, testcase, old_crash_stacktrace, sym_crash_state)) # Update crash parameters. testcase = data_handler.get_testcase_by_id(testcase_id) testcase.crash_type = sym_crash_type testcase.crash_address = sym_crash_address testcase.crash_state = sym_crash_state testcase.crash_stacktrace = ( data_handler.filter_stacktrace(sym_crash_stacktrace)) if not result: data_handler.update_testcase_comment( testcase, data_types.TaskState.ERROR, 'Unable to reproduce crash, skipping ' 'stacktrace update') else: # Switch build url to use the less-optimized symbolized build with better # stacktrace. build_url = environment.get_value('BUILD_URL') if build_url: testcase.set_metadata('build_url', build_url, update_testcase=False) data_handler.update_testcase_comment(testcase, data_types.TaskState.FINISHED) testcase.symbolized = True testcase.crash_revision = crash_revision testcase.put() # We might have updated the crash state. See if we need to marked as duplicate # based on other testcases. data_handler.handle_duplicate_entry(testcase) task_creation.create_blame_task_if_needed(testcase) # Switch current directory before builds cleanup. root_directory = environment.get_value('ROOT_DIR') os.chdir(root_directory) # Cleanup symbolized builds which are space-heavy. symbolized_builds.delete()
def get_symbolized_stacktraces(testcase_file_path, testcase, old_crash_stacktrace, expected_state): """Use the symbolized builds to generate an updated stacktrace.""" # Initialize variables. app_path = environment.get_value('APP_PATH') app_path_debug = environment.get_value('APP_PATH_DEBUG') long_test_timeout = environment.get_value('WARMUP_TIMEOUT') retry_limit = environment.get_value('FAIL_RETRIES') symbolized = False debug_build_stacktrace = '' release_build_stacktrace = old_crash_stacktrace # Symbolize using the debug build first so that the debug build stacktrace # comes after the more important release build stacktrace. if app_path_debug: for _ in range(retry_limit): process_handler.terminate_stale_application_instances() command = testcase_manager.get_command_line_for_application( testcase_file_path, app_path=app_path_debug, needs_http=testcase.http_flag) return_code, crash_time, output = (process_handler.run_process( command, timeout=long_test_timeout, gestures=testcase.gestures)) crash_result = CrashResult(return_code, crash_time, output) if crash_result.is_crash(): state = crash_result.get_symbolized_data() if crash_analyzer.ignore_stacktrace(state.crash_stacktrace): continue unsymbolized_crash_stacktrace = crash_result.get_stacktrace( symbolized=False) debug_build_stacktrace = utils.get_crash_stacktrace_output( command, state.crash_stacktrace, unsymbolized_crash_stacktrace, build_type='debug') symbolized = True break # Symbolize using the release build. if app_path: for _ in range(retry_limit): process_handler.terminate_stale_application_instances() command = testcase_manager.get_command_line_for_application( testcase_file_path, app_path=app_path, needs_http=testcase.http_flag) return_code, crash_time, output = (process_handler.run_process( command, timeout=long_test_timeout, gestures=testcase.gestures)) crash_result = CrashResult(return_code, crash_time, output) if crash_result.is_crash(): state = crash_result.get_symbolized_data() if crash_analyzer.ignore_stacktrace(state.crash_stacktrace): continue if state.crash_state != expected_state: continue # Release stack's security flag has to match the symbolized release # stack's security flag. security_flag = crash_result.is_security_issue() if security_flag != testcase.security_flag: continue unsymbolized_crash_stacktrace = crash_result.get_stacktrace( symbolized=False) release_build_stacktrace = utils.get_crash_stacktrace_output( command, state.crash_stacktrace, unsymbolized_crash_stacktrace, build_type='release') symbolized = True break stacktrace = release_build_stacktrace if debug_build_stacktrace: stacktrace += '\n\n' + debug_build_stacktrace return symbolized, stacktrace
def test_terminate_stale_application_instances(self): """Test terminating stale application instances.""" # TODO(ochang): Improve this test once we use Docker. process_handler.terminate_stale_application_instances()
def TerminateStaleApplicationInstances(self, request, context): # pylint: disable=unused-argument process_handler.terminate_stale_application_instances() return untrusted_runner_pb2.TerminateStaleApplicationInstancesResponse()