def process_stacktrace(self, unsymbolized_crash_stacktrace): self.frame_no = 0 symbolized_crash_stacktrace = u'' for line in unsymbolized_crash_stacktrace.splitlines(): self.current_line = utils.decode_to_unicode(line.rstrip()) # 0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45) stack_trace_line_format = ( '^( *#([0-9]+) *)(0x[0-9a-f]+) *\(([^+]*)\+(0x[0-9a-f]+)\)') match = re.match(stack_trace_line_format, line) if not match: symbolized_crash_stacktrace += u'%s\n' % self.current_line continue _, frameno_str, addr, binary, offset = match.groups() arch = "" # Arch can be embedded in the filename, e.g.: "libabc.dylib:x86_64h" colon_pos = binary.rfind(":") if colon_pos != -1: maybe_arch = binary[colon_pos + 1:] if is_valid_arch(maybe_arch): arch = maybe_arch binary = binary[0:colon_pos] if arch == "": arch = guess_arch(addr) if frameno_str == '0': # Assume that frame #0 is the first frame of new stack trace. self.frame_no = 0 original_binary = binary if self.binary_path_filter: binary = self.binary_path_filter(binary) symbolized_line = self.symbolize_address(addr, binary, offset, arch) if not symbolized_line: if original_binary != binary: symbolized_line = self.symbolize_address( addr, original_binary, offset, arch) if not symbolized_line: symbolized_crash_stacktrace += u'%s\n' % self.current_line else: for symbolized_frame in symbolized_line: symbolized_crash_stacktrace += u'%s\n' % (' #' + str( self.frame_no) + ' ' + symbolized_frame.rstrip()) self.frame_no += 1 # Close any left-over open pipes. for pipe in pipes: pipe.stdin.close() pipe.stdout.close() pipe.kill() return symbolized_crash_stacktrace
def process_stacktrace(self, unsymbolized_crash_stacktrace): self.frame_no = 0 symbolized_crash_stacktrace = u'' unsymbolized_crash_stacktrace_lines = \ unsymbolized_crash_stacktrace.splitlines() if lkl.is_lkl_stack_trace(unsymbolized_crash_stacktrace): line_parser = self._lkl_line_parser self.lkl_binary_name = lkl.get_lkl_binary_name( unsymbolized_crash_stacktrace_lines) # This should never happen but if it does, lets just return the unsymbolized stack. # We can't symbolize anything anyways. if not self.lkl_binary_name: return unsymbolized_crash_stacktrace else: line_parser = self._line_parser for line in unsymbolized_crash_stacktrace_lines: self.current_line = utils.decode_to_unicode(line.rstrip()) frameno_str, addr, binary, offset, arch = line_parser(line) if not binary or not offset: symbolized_crash_stacktrace += u'%s\n' % self.current_line continue if frameno_str == '0': # Assume that frame #0 is the first frame of new stack trace. self.frame_no = 0 original_binary = binary if self.binary_path_filter: binary = self.binary_path_filter(binary) symbolized_line = self.symbolize_address(addr, binary, offset, arch) if not symbolized_line: if original_binary != binary: symbolized_line = self.symbolize_address( addr, original_binary, offset, arch) if not symbolized_line: symbolized_crash_stacktrace += u'%s\n' % self.current_line else: for symbolized_frame in symbolized_line: symbolized_crash_stacktrace += u'%s\n' % (' #' + str( self.frame_no) + ' ' + symbolized_frame.rstrip()) self.frame_no += 1 # Close any left-over open pipes. for pipe in pipes: pipe.stdin.close() pipe.stdout.close() pipe.kill() return symbolized_crash_stacktrace
def minimize_corpus(self, target_path, arguments, input_dirs, output_dir, reproducers_dir, max_time): """Optional (but recommended): run corpus minimization. Args: target_path: Path to the target. arguments: Additional arguments needed for corpus minimization. input_dirs: Input corpora. output_dir: Output directory to place minimized corpus. reproducers_dir: The directory to put reproducers in when crashes are found. max_time: Maximum allowed time for the minimization. Returns: A Result object. Raises: TimeoutError: If the corpus minimization exceeds max_time. Error: If the merge failed in some other way. """ runner = libfuzzer.get_runner(target_path) libfuzzer.set_sanitizer_options(target_path) merge_tmp_dir = self._create_temp_corpus_dir('merge-workdir') result = runner.merge([output_dir] + input_dirs, merge_timeout=max_time, tmp_dir=merge_tmp_dir, additional_args=arguments, artifact_prefix=reproducers_dir, merge_control_file=getattr( self, '_merge_control_file', None)) if result.timed_out: raise engine.TimeoutError('Merging new testcases timed out\n' + result.output) if result.return_code != 0: raise MergeError('Merging new testcases failed: ' + result.output) merge_output = utils.decode_to_unicode(result.output) merge_stats = stats.parse_stats_from_merge_log( merge_output.splitlines()) # TODO(ochang): Get crashes found during merge. return engine.FuzzResult(merge_output, result.command, [], merge_stats, result.time_executed)
def _get_stats_from_log(log_path, strategies=None, arguments=None, stats_overrides=None): """Calculate stats for the given log the same way as the engine does.""" if strategies is None: strategies = [] if arguments is None: arguments = [] log_lines = utils.decode_to_unicode( utils.read_data_from_file(log_path, eval_data=False)).splitlines() stats = libfuzzer.parse_log_stats(log_lines) stats.update( performance_stats.parse_performance_features(log_lines, strategies, arguments)) if stats_overrides: stats.update(stats_overrides) return stats
def get_crash_info_and_stacktrace(application_command_line, crash_stacktrace, gestures): """Return crash minidump location and updated crash stacktrace.""" app_name_lower = environment.get_value('APP_NAME').lower() platform = environment.platform() retry_limit = environment.get_value('FAIL_RETRIES') using_android = platform == 'ANDROID' using_chrome = 'chrome' in app_name_lower or 'chromium' in app_name_lower warmup_timeout = environment.get_value('WARMUP_TIMEOUT', 90) # Minidump generation is only applicable on Chrome application. # FIXME: Support minidump generation on platforms other than Android. if not using_chrome or not using_android: return None, crash_stacktrace # Get the crash info from stacktrace. crash_info = get_crash_info(crash_stacktrace) # If we lost the minidump file, we need to recreate it. # Note that because of the way crash_info is generated now, if we have a # non-None crash_info, we should also have its minidump path; we insert # the check to safeguard against possibly constructing the crash_info in # other ways in the future that might potentially lose the minidump path. if not crash_info or not crash_info.minidump_info.path: for _ in xrange(retry_limit): _, _, output = ( process_handler.run_process( application_command_line, timeout=warmup_timeout, gestures=gestures)) crash_info = get_crash_info(output) if crash_info and crash_info.minidump_info.path: crash_stacktrace = utils.decode_to_unicode(output) break if not crash_info or not crash_info.minidump_info.path: # We could not regenerate a minidump for this crash. logs.log('Unable to regenerate a minidump for this crash.') return crash_info, crash_stacktrace
def process_stacktrace(self, unsymbolized_crash_stacktrace): self.frame_no = 0 symbolized_crash_stacktrace = u'' for line in unsymbolized_crash_stacktrace.splitlines(): self.current_line = utils.decode_to_unicode(line.rstrip()) # 0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45) stack_trace_line_format = ( '^( *#([0-9]+) *)(0x[0-9a-f]+) *\(([^+]*)\+(0x[0-9a-f]+)\)') match = re.match(stack_trace_line_format, line) if not match: symbolized_crash_stacktrace += u'%s\n' % self.current_line continue _, frameno_str, addr, binary, offset = match.groups() if frameno_str == '0': # Assume that frame #0 is the first frame of new stack trace. self.frame_no = 0 original_binary = binary if self.binary_path_filter: binary = self.binary_path_filter(binary) symbolized_line = self.symbolize_address(addr, binary, offset) if not symbolized_line: if original_binary != binary: symbolized_line = self.symbolize_address( addr, binary, offset) if not symbolized_line: symbolized_crash_stacktrace += u'%s\n' % self.current_line else: for symbolized_frame in symbolized_line: symbolized_crash_stacktrace += u'%s\n' % (' #' + str( self.frame_no) + ' ' + symbolized_frame.rstrip()) self.frame_no += 1 # Close any left-over open pipes. for pipe in pipes: pipe.stdin.close() pipe.stdout.close() pipe.kill() return symbolized_crash_stacktrace
def filter_stacktrace(stacktrace): """Filters stacktrace and returns content appropriate for storage as an appengine entity.""" unicode_stacktrace = utils.decode_to_unicode(stacktrace) if len(unicode_stacktrace) <= data_types.STACKTRACE_LENGTH_LIMIT: return unicode_stacktrace tmpdir = environment.get_value('BOT_TMPDIR') tmp_stacktrace_file = os.path.join(tmpdir, 'stacktrace.tmp') try: with open(tmp_stacktrace_file, 'w') as handle: handle.write(stacktrace) with open(tmp_stacktrace_file, 'r') as handle: key = blobs.write_blob(handle) except Exception: logs.log_error('Unable to write crash stacktrace to temporary file.') shell.remove_file(tmp_stacktrace_file) return unicode_stacktrace[(-1 * data_types.STACKTRACE_LENGTH_LIMIT):] shell.remove_file(tmp_stacktrace_file) return '%s%s' % (data_types.BLOBSTORE_STACK_PREFIX, key)
def reproduce(self, target_path, input_path, arguments, max_time): """Reproduce a crash given an input. Args: target_path: Path to the target. input_path: Path to the reproducer input. arguments: Additional arguments needed for reproduction. max_time: Maximum allowed time for the reproduction. Returns: A ReproduceResult. Raises: TimeoutError: If the reproduction exceeds max_time. """ runner = libfuzzer.get_runner(target_path) libfuzzer.set_sanitizer_options(target_path) # Remove fuzzing specific arguments. This is only really needed for legacy # testcases, and can be removed in the distant future. arguments = arguments[:] libfuzzer.remove_fuzzing_arguments(arguments) runs_argument = constants.RUNS_FLAG + str(constants.RUNS_TO_REPRODUCE) arguments.append(runs_argument) result = runner.run_single_testcase(input_path, timeout=max_time, additional_args=arguments) if result.timed_out: raise engine.TimeoutError('Reproducing timed out\n' + result.output) return engine.ReproduceResult(result.command, result.return_code, result.time_executed, utils.decode_to_unicode(result.output))
def store_testcase(crash, fuzzed_keys, minimized_keys, regression, fixed, one_time_crasher_flag, crash_revision, comment, absolute_path, fuzzer_name, fully_qualified_fuzzer_name, job_type, archived, archive_filename, binary_flag, http_flag, gestures, redzone, minidump_keys, window_argument, timeout_multiplier, minimized_arguments): """Create a testcase and store it in the datastore using remote api.""" # Initialize variable to prevent invalid values. if archived: archive_state = data_types.ArchiveStatus.FUZZED else: archive_state = 0 if not gestures: gestures = [] if not redzone: redzone = 128 # Create the testcase. testcase = data_types.Testcase() testcase.crash_type = crash.crash_type testcase.crash_address = crash.crash_address testcase.crash_state = utils.decode_to_unicode(crash.crash_state) testcase.crash_stacktrace = filter_stacktrace(crash.crash_stacktrace) testcase.fuzzed_keys = fuzzed_keys testcase.minimized_keys = minimized_keys testcase.bug_information = '' testcase.regression = regression testcase.fixed = fixed testcase.security_flag = crash.security_flag testcase.security_severity = _get_security_severity(crash, job_type, gestures) testcase.one_time_crasher_flag = one_time_crasher_flag testcase.crash_revision = crash_revision testcase.original_absolute_path = absolute_path testcase.absolute_path = absolute_path testcase.fuzzer_name = fuzzer_name testcase.overridden_fuzzer_name = fully_qualified_fuzzer_name or fuzzer_name testcase.job_type = job_type testcase.queue = tasks.default_queue() testcase.archive_state = archive_state testcase.archive_filename = archive_filename testcase.binary_flag = binary_flag testcase.http_flag = http_flag testcase.timestamp = datetime.datetime.utcnow() testcase.gestures = gestures testcase.redzone = redzone testcase.minidump_keys = minidump_keys testcase.window_argument = window_argument testcase.timeout_multiplier = float(timeout_multiplier) testcase.minimized_arguments = minimized_arguments testcase.project_name = get_project_name(job_type) # Set metadata fields (e.g. build url, build key, platform string, etc). set_initial_testcase_metadata(testcase) # Update the comment and save testcase. update_testcase_comment(testcase, data_types.TaskState.NA, comment) # Get testcase id from newly created testcase. testcase_id = testcase.key.id() logs.log( ('Created new testcase %d (reproducible:%s, security:%s, binary:%s).\n' 'crash_type: %s\ncrash_state:\n%s\n') % (testcase_id, not testcase.one_time_crasher_flag, testcase.security_flag, testcase.binary_flag, testcase.crash_type, testcase.crash_state)) # Update global blacklist to avoid finding this leak again (if needed). is_lsan_enabled = environment.get_value('LSAN') if is_lsan_enabled: from fuzzing import leak_blacklist leak_blacklist.add_crash_to_global_blacklist_if_needed(testcase) return testcase_id
def run_process(cmdline, current_working_directory=None, timeout=DEFAULT_TEST_TIMEOUT, need_shell=False, gestures=None, env_copy=None, testcase_run=True, ignore_children=True): """Executes a process with a given command line and other parameters.""" if environment.is_trusted_host() and testcase_run: from bot.untrusted_runner import remote_process_host return remote_process_host.run_process(cmdline, current_working_directory, timeout, need_shell, gestures, env_copy, testcase_run, ignore_children) if gestures is None: gestures = [] if env_copy: os.environ.update(env_copy) # FIXME(mbarbella): Using LAUNCHER_PATH here is error prone. It forces us to # do certain operations before fuzzer setup (e.g. bad build check). launcher = environment.get_value('LAUNCHER_PATH') # This is used when running scripts on native linux OS and not on the device. # E.g. running a fuzzer to generate testcases or launcher script. plt = environment.platform() if plt in ['ANDROID', 'FUCHSIA'] and (not testcase_run or launcher): plt = 'LINUX' elif plt == 'IOS' and (not testcase_run or launcher): plt = 'MAC' # Lower down testcase timeout slightly to account for time for crash analysis. timeout -= CRASH_ANALYSIS_TIME # LeakSanitizer hack - give time for stdout/stderr processing. lsan = environment.get_value('LSAN', False) if lsan: timeout -= LSAN_ANALYSIS_TIME # Initialize variables. adb_output = None process_output = '' process_status = None return_code = 0 process_poll_interval = environment.get_value('PROCESS_POLL_INTERVAL', 0.5) start_time = time.time() watch_for_process_exit = (environment.get_value('WATCH_FOR_PROCESS_EXIT') if plt == 'ANDROID' else True) window_list = [] # Get gesture start time from last element in gesture list. gestures = copy.deepcopy(gestures) if gestures and gestures[-1].startswith('Trigger'): gesture_start_time = int(gestures[-1].split(':')[1]) gestures.pop() else: gesture_start_time = timeout // 2 if plt == 'ANDROID': # Clear the log upfront. android.logger.clear_log() # Run the app. adb_output = android.adb.run_command(cmdline, timeout=timeout) else: cmd, args = shell.get_command_and_arguments(cmdline) process_output = mozprocess.processhandler.StoreOutput() process_status = ProcessStatus() try: process_handle = mozprocess.ProcessHandlerMixin( cmd, args, cwd=current_working_directory, shell=need_shell, processOutputLine=[process_output], onFinish=[process_status], ignore_children=ignore_children) start_process(process_handle) except: logs.log_error('Exception occurred when running command: %s.' % cmdline) return None, None, '' while True: time.sleep(process_poll_interval) # Run the gestures at gesture_start_time or in case we didn't find windows # in the last try. if (gestures and time.time() - start_time >= gesture_start_time and not window_list): # In case, we don't find any windows, we increment the gesture start time # so that the next check is after 1 second. gesture_start_time += 1 if plt == 'LINUX': linux.gestures.run_gestures(gestures, process_handle.pid, process_status, start_time, timeout, window_list) elif plt == 'WINDOWS': windows.gestures.run_gestures(gestures, process_handle.pid, process_status, start_time, timeout, window_list) elif plt == 'ANDROID': android.gestures.run_gestures(gestures, start_time, timeout) # TODO(mbarbella): We add a fake window here to prevent gestures on # Android from getting executed more than once. window_list = ['FAKE'] if time.time() - start_time >= timeout: break # Collect the process output. output = (android.logger.log_output() if plt == 'ANDROID' else b'\n'.join(process_output.output)) output = utils.decode_to_unicode(output) if crash_analyzer.is_memory_tool_crash(output): break # Check if we need to bail out on process exit. if watch_for_process_exit: # If |watch_for_process_exit| is set, then we already completed running # our app launch command. So, we can bail out. if plt == 'ANDROID': break # On desktop, we bail out as soon as the process finishes. if process_status and process_status.finished: # Wait for process shutdown and set return code. process_handle.wait(timeout=PROCESS_CLEANUP_WAIT_TIME) break # Process output based on platform. if plt == 'ANDROID': # Get current log output. If device is in reboot mode, logcat automatically # waits for device to be online. time.sleep(ANDROID_CRASH_LOGCAT_WAIT_TIME) output = android.logger.log_output() if android.constants.LOW_MEMORY_REGEX.search(output): # If the device is low on memory, we should force reboot and bail out to # prevent device from getting in a frozen state. logs.log('Device is low on memory, rebooting.', output=output) android.adb.hard_reset() android.adb.wait_for_device() elif android.adb.time_since_last_reboot() < time.time() - start_time: # Check if a reboot has happened, if yes, append log output before reboot # and kernel logs content to output. log_before_last_reboot = android.logger.log_output_before_last_reboot( ) kernel_log = android.adb.get_kernel_log_content() output = '%s%s%s%s%s' % ( log_before_last_reboot, utils.get_line_seperator('Device rebooted'), output, utils.get_line_seperator('Kernel Log'), kernel_log) # Make sure to reset SE Linux Permissive Mode. This can be done cheaply # in ~0.15 sec and is needed especially between runs for kernel crashes. android.adb.run_as_root() android.settings.change_se_linux_to_permissive_mode() return_code = 1 # Add output from adb to the front. if adb_output: output = '%s\n\n%s' % (adb_output, output) # Kill the application if it is still running. We do this at the end to # prevent this from adding noise to the logcat output. task_name = environment.get_value('TASK_NAME') child_process_termination_pattern = environment.get_value( 'CHILD_PROCESS_TERMINATION_PATTERN') if task_name == 'fuzz' and child_process_termination_pattern: # In some cases, we do not want to terminate the application after each # run to avoid long startup times (e.g. for chrome). Terminate processes # matching a particular pattern for light cleanup in this case. android.adb.kill_processes_and_children_matching_name( child_process_termination_pattern) else: # There is no special termination behavior. Simply stop the application. android.app.stop() else: # Get the return code in case the process has finished already. # If the process hasn't finished, return_code will be None which is what # callers expect unless the output indicates a crash. return_code = process_handle.poll() # If the process is still running, then terminate it. if not process_status.finished: launcher_with_interpreter = shell.get_execute_command( launcher, is_blackbox_fuzzer=True) if launcher else None if (launcher_with_interpreter and cmdline.startswith(launcher_with_interpreter)): # If this was a launcher script, we KILL all child processes created # except for APP_NAME. # It is expected that, if the launcher script terminated normally, it # cleans up all the child processes it created itself. terminate_root_and_child_processes(process_handle.pid) else: try: # kill() here actually sends SIGTERM on posix. process_handle.kill() except: pass if lsan: time.sleep(LSAN_ANALYSIS_TIME) output = b'\n'.join(process_output.output) output = utils.decode_to_unicode(output) # X Server hack when max client reached. if ('Maximum number of clients reached' in output or 'Unable to get connection to X server' in output): logs.log_error('Unable to connect to X server, exiting.') os.system('sudo killall -9 Xvfb blackbox >/dev/null 2>&1') sys.exit(0) if testcase_run and (crash_analyzer.is_memory_tool_crash(output) or crash_analyzer.is_check_failure_crash(output)): return_code = 1 # If a crash is found, then we add the memory state as well. if return_code and plt == 'ANDROID': ps_output = android.adb.get_ps_output() if ps_output: output += utils.get_line_seperator('Memory Statistics') output += ps_output if return_code: logs.log_warn('Process (%s) ended with exit code (%s).' % (repr(cmdline), str(return_code)), output=output) return return_code, round(time.time() - start_time, 1), output
def check_for_bad_build(job_type, crash_revision): """Return true if the build is bad, i.e. crashes on startup.""" # Check the bad build check flag to see if we want do this. if not environment.get_value('BAD_BUILD_CHECK'): return False # Do not detect leaks while checking for bad builds. environment.reset_current_memory_tool_options(leaks=False) # Create a blank command line with no file to run and no http. command = get_command_line_for_application(file_to_run='', needs_http=False) # When checking for bad builds, we use the default window size. # We don't want to pick a custom size since it can potentially cause a # startup crash and cause a build to be detected incorrectly as bad. default_window_argument = environment.get_value('WINDOW_ARG', '') if default_window_argument: command = command.replace(' %s' % default_window_argument, '') # Warmup timeout. fast_warmup_timeout = environment.get_value('FAST_WARMUP_TIMEOUT') # TSAN is slow, and boots slow on first startup. Increase the warmup # timeout for this case. if environment.tool_matches('TSAN', job_type): fast_warmup_timeout = environment.get_value('WARMUP_TIMEOUT') # Initialize helper variables. is_bad_build = False build_run_console_output = '' output = '' app_directory = environment.get_value('APP_DIR') # Check if the build is bad. process_handler.terminate_stale_application_instances() exit_code, _, output = process_handler.run_process( command, timeout=fast_warmup_timeout, current_working_directory=app_directory) output = utils.decode_to_unicode(output) if crash_analyzer.is_crash(exit_code, output): is_bad_build = True build_run_console_output = ( '%s\n\n%s\n\n%s' % (command, stack_symbolizer.symbolize_stacktrace(output), output)) logs.log('Bad build for %s detected at r%d.' % (job_type, crash_revision), output=build_run_console_output) # Exit all running instances. process_handler.terminate_stale_application_instances() # Any of the conditions below indicate that bot is in a bad state and it is # not caused by the build itself. In that case, just exit. build_state = data_handler.get_build_state(job_type, crash_revision) if (is_bad_build and ('cannot open display' in output or 'logging service has stopped' in output or 'Maximum number of clients reached' in output)): logs.log_fatal_and_exit('Bad bot environment detected, exiting.', output=build_run_console_output) # If none of the other bots have added information about this build, # then add it now. if build_state == data_types.BuildState.UNMARKED: data_handler.add_build_metadata(job_type, crash_revision, is_bad_build, build_run_console_output) # Reset memory tool options. environment.reset_current_memory_tool_options() return is_bad_build
def fuzz(self, target_path, options, reproducers_dir, max_time): """Run a fuzz session. Args: target_path: Path to the target. options: The FuzzOptions object returned by prepare(). reproducers_dir: The directory to put reproducers in when crashes are found. max_time: Maximum allowed time for the fuzzing to run. Returns: A FuzzResult object. """ profiler.start_if_needed('libfuzzer_fuzz') runner = libfuzzer.get_runner(target_path) launcher.set_sanitizer_options(target_path) # Directory to place new units. new_corpus_dir = self._create_temp_corpus_dir('new') corpus_directories = [new_corpus_dir] + options.fuzz_corpus_dirs fuzz_timeout = launcher.get_fuzz_timeout( options.is_mutations_run, total_timeout=max_time) fuzz_result = runner.fuzz( corpus_directories, fuzz_timeout=fuzz_timeout, additional_args=options.arguments, artifact_prefix=reproducers_dir, extra_env=options.extra_env) log_lines = utils.decode_to_unicode(fuzz_result.output).splitlines() # Output can be large, so save some memory by removing reference to the # original output which is no longer needed. fuzz_result.output = None # Check if we crashed, and get the crash testcase path. crash_testcase_file_path = runner.get_testcase_path(log_lines) # Parse stats information based on libFuzzer output. parsed_stats = launcher.parse_log_stats(log_lines) # Extend parsed stats by additional performance features. parsed_stats.update( stats.parse_performance_features( log_lines, options.strategies, options.arguments, include_strategies=False)) # Set some initial stat overrides. timeout_limit = fuzzer_utils.extract_argument( options.arguments, constants.TIMEOUT_FLAG, remove=False) expected_duration = runner.get_max_total_time(fuzz_timeout) actual_duration = int(fuzz_result.time_executed) fuzzing_time_percent = 100 * actual_duration / float(expected_duration) parsed_stats.update({ 'timeout_limit': int(timeout_limit), 'expected_duration': expected_duration, 'actual_duration': actual_duration, 'fuzzing_time_percent': fuzzing_time_percent, }) # Remove fuzzing arguments before merge and dictionary analysis step. arguments = options.arguments[:] launcher.remove_fuzzing_arguments(arguments) self._merge_new_units(target_path, options.corpus_dir, new_corpus_dir, options.fuzz_corpus_dirs, arguments, parsed_stats) fuzz_logs = '\n'.join(log_lines) crashes = [] if crash_testcase_file_path: # Write the new testcase. # Copy crash testcase contents into the main testcase path. crashes.append( engine.Crash(crash_testcase_file_path, fuzz_logs, arguments, actual_duration)) project_qualified_fuzzer_name = ( data_types.fuzz_target_project_qualified_name( utils.current_project(), os.path.basename(target_path))) launcher.analyze_and_update_recommended_dictionary( runner, project_qualified_fuzzer_name, log_lines, options.corpus_dir, arguments) return engine.FuzzResult(fuzz_logs, fuzz_result.command, crashes, parsed_stats, fuzz_result.time_executed)
def parse_mime_to_crash_report_info(local_minidump_mime_path): """Read the (local) minidump MIME file into a CrashReportInfo object.""" # Get the minidump name and path. minidump_path_match = re.match(r'(.*)\.mime', local_minidump_mime_path) if minidump_path_match is None: logs.log_error('Minidump filename in unexpected format: \'%s\'.' % local_minidump_mime_path) return None minidump_path = '%s.dmp' % minidump_path_match.group(1).strip() # Reformat the minidump MIME to include the boundary. with open(local_minidump_mime_path, 'rb') as minidump_mime_file_content: # The boundary is the first line after the first two dashes. boundary = minidump_mime_file_content.readline().strip()[2:] minidump_mime_string = ( 'Content-Type: multipart/form-data; boundary=\"%s\"\r\n--%s\r\n' % (boundary, boundary)) minidump_mime_string += minidump_mime_file_content.read() minidump_mime_contents = email.message_from_string(minidump_mime_string) # Parse the MIME contents, extracting the parameters needed for upload. mime_key_values = {} for mime_part in minidump_mime_contents.get_payload(): if isinstance(mime_part, str): mime_part = utils.decode_to_unicode(mime_part) logs.log_error('Unexpected str mime_part from mime path %s: %s' % (local_minidump_mime_path, mime_part)) continue part_descriptor = list(mime_part.values()) key_tokens = part_descriptor[0].split('; ') key_match = re.match(r'name="(.*)".*', key_tokens[1]) # Extract from the MIME part the key-value pairs used by report uploading. if key_match is not None: report_key = key_match.group(1) report_value = mime_part.get_payload() if report_key == MINIDUMP_FILE_KEY: utils.write_data_to_file(report_value, minidump_path) else: # Take care of aliases. if report_key == 'prod' or report_key == 'buildTargetId': report_key = PRODUCT_KEY elif report_key == 'ver': report_key = VERSION_KEY # Save the key-value pair. mime_key_values[report_key] = report_value # Pull out product and version explicitly since these are required # for upload. product, version = None, None if PRODUCT_KEY in mime_key_values: product = mime_key_values.pop(PRODUCT_KEY) else: logs.log_error( 'Could not find \'%s\' or alias in mime_key_values key.' % PRODUCT_KEY) if VERSION_KEY in mime_key_values: version = mime_key_values.pop(VERSION_KEY) else: logs.log_error( 'Could not find \'%s\' or alias in mime_key_values key.' % VERSION_KEY) # If missing, return None and log keys that do exist; otherwise, construct # CrashReportInfo and return. if product is None or version is None: logs.log_error('mime_key_values dict keys:\n%s' % str(list(mime_key_values.keys()))) return None return CrashReportInfo(minidump_path=minidump_path, product=product, version=version, optional_params=mime_key_values)
def fuzz(self, target_path, options, reproducers_dir, max_time): """Run a fuzz session. Args: target_path: Path to the target. options: The FuzzOptions object returned by prepare(). reproducers_dir: The directory to put reproducers in when crashes are found. max_time: Maximum allowed time for the fuzzing to run. Returns: A FuzzResult object. """ profiler.start_if_needed('libfuzzer_fuzz') runner = libfuzzer.get_runner(target_path) libfuzzer.set_sanitizer_options(target_path) # Directory to place new units. new_corpus_dir = self._create_temp_corpus_dir('new') corpus_directories = [new_corpus_dir] + options.fuzz_corpus_dirs fuzz_timeout = libfuzzer.get_fuzz_timeout(options.is_mutations_run, total_timeout=max_time) fuzz_result = runner.fuzz(corpus_directories, fuzz_timeout=fuzz_timeout, additional_args=options.arguments, artifact_prefix=reproducers_dir, extra_env=options.extra_env) project_qualified_fuzzer_name = _project_qualified_fuzzer_name( target_path) dict_error_match = DICT_PARSING_FAILED_REGEX.search(fuzz_result.output) if dict_error_match: logs.log_error( 'Dictionary parsing failed (target={target}, line={line}).'. format(target=project_qualified_fuzzer_name, line=dict_error_match.group(1)), engine_output=fuzz_result.output) elif (not environment.get_value('USE_MINIJAIL') and fuzz_result.return_code == constants.LIBFUZZER_ERROR_EXITCODE): # Minijail returns 1 if the exit code is nonzero. # Otherwise: we can assume that a return code of 1 means that libFuzzer # itself ran into an error. logs.log_error(ENGINE_ERROR_MESSAGE + ' (target={target}).'.format( target=project_qualified_fuzzer_name), engine_output=fuzz_result.output) log_lines = utils.decode_to_unicode(fuzz_result.output).splitlines() # Output can be large, so save some memory by removing reference to the # original output which is no longer needed. fuzz_result.output = None # Check if we crashed, and get the crash testcase path. crash_testcase_file_path = runner.get_testcase_path(log_lines) # If we exited with a non-zero return code with no crash file in output from # libFuzzer, this is most likely a startup crash. Use an empty testcase to # to store it as a crash. if not crash_testcase_file_path and fuzz_result.return_code: crash_testcase_file_path = self._create_empty_testcase_file() # Parse stats information based on libFuzzer output. parsed_stats = libfuzzer.parse_log_stats(log_lines) # Extend parsed stats by additional performance features. parsed_stats.update( stats.parse_performance_features(log_lines, options.strategies, options.arguments, include_strategies=False)) # Set some initial stat overrides. timeout_limit = fuzzer_utils.extract_argument(options.arguments, constants.TIMEOUT_FLAG, remove=False) expected_duration = runner.get_max_total_time(fuzz_timeout) actual_duration = int(fuzz_result.time_executed) fuzzing_time_percent = 100 * actual_duration / float(expected_duration) parsed_stats.update({ 'timeout_limit': int(timeout_limit), 'expected_duration': expected_duration, 'actual_duration': actual_duration, 'fuzzing_time_percent': fuzzing_time_percent, }) # Remove fuzzing arguments before merge and dictionary analysis step. arguments = options.arguments[:] libfuzzer.remove_fuzzing_arguments(arguments) self._merge_new_units(target_path, options.corpus_dir, new_corpus_dir, options.fuzz_corpus_dirs, arguments, parsed_stats) fuzz_logs = '\n'.join(log_lines) crashes = [] if crash_testcase_file_path: # Use higher timeout for reproduction. reproduce_arguments = arguments[:] libfuzzer.fix_timeout_argument_for_reproduction( reproduce_arguments) # Write the new testcase. # Copy crash testcase contents into the main testcase path. crashes.append( engine.Crash(crash_testcase_file_path, fuzz_logs, reproduce_arguments, actual_duration)) libfuzzer.analyze_and_update_recommended_dictionary( runner, project_qualified_fuzzer_name, log_lines, options.corpus_dir, arguments) return engine.FuzzResult(fuzz_logs, fuzz_result.command, crashes, parsed_stats, fuzz_result.time_executed)
def execute_task(full_fuzzer_name, job_type): """Execute ML RNN training task. The task is training RNN model by default. If more models are developed, arguments can be modified to specify which model to use. Args: fuzzer_name: Name of fuzzer, e.g. libpng_read_fuzzer. job_type: Job type, e.g. libfuzzer_chrome_asan. """ del job_type # Sets up fuzzer binary build. fuzz_target = data_handler.get_fuzz_target(full_fuzzer_name) if not fuzz_target: logs.log_warn( f'Fuzzer not found: {full_fuzzer_name}, skip RNN training.') return fuzzer_name = fuzz_target.project_qualified_name() # Directory to place training files, such as logs, models, corpus. # Use |FUZZ_INPUTS_DISK| since it is not size constrained. temp_directory = environment.get_value('FUZZ_INPUTS_DISK') # Get corpus. corpus_directory = get_corpus_directory(temp_directory, fuzzer_name) shell.remove_directory(corpus_directory, recreate=True) logs.log('Downloading corpus backup for %s.' % fuzzer_name) if not ml_train_utils.get_corpus(corpus_directory, fuzzer_name): logs.log_error('Failed to download corpus backup for %s.' % fuzzer_name) return # Get the directory to save models. model_directory = get_model_files_directory(temp_directory, fuzzer_name) shell.remove_directory(model_directory, recreate=True) # Get the directory to save training logs. log_directory = get_model_log_directory(temp_directory, fuzzer_name) shell.remove_directory(log_directory, recreate=True) result = train_rnn(corpus_directory, model_directory, log_directory) # Training process exited abnormally but not caused by timeout, meaning # error occurred during execution. if result.return_code and not result.timed_out: if result.return_code == constants.ExitCode.CORPUS_TOO_SMALL: logs.log_warn( 'ML RNN training task for fuzzer %s aborted due to small corpus.' % fuzzer_name) else: logs.log_error( 'ML RNN training task for fuzzer %s failed with ExitCode = %d.' % (fuzzer_name, result.return_code), output=utils.decode_to_unicode(result.output)) return # Timing out may be caused by large training corpus, but intermediate models # are frequently saved and can be uploaded. if result.timed_out: logs.log_warn('ML RNN training task for %s timed out.' % fuzzer_name) upload_model_to_gcs(model_directory, fuzzer_name)
def execute(input_directory, output_directory, fuzzer_name, generation_timeout): """Execute ML RNN generator to produce new inputs. This method should be called inside launcher, to generate a number of new inputs based on ML RNN model. It will fetch ML model from GCS bucket specified in environment variable `CORPUS_BUCKET`. The script to run the model resides in folder `tools/fuzzers/ml/rnn`. Args: input_directory: Seed corpus path. The directory should not be empty. output_directory: The directory to place generated inputs. fuzzer_name: Name of the fuzzer, e.g libpng_read_fuzzer. It indicates the subdirectory in gcs bucket to store models. generation_timeout: Time in seconds for the generator to run. Normally it takes <1s to generate an input, assuming the input length is <4KB. """ if environment.platform() != 'LINUX': logs.log('Unsupported platform for ML RNN generation, skipping.') return # Validate corpus folder. file_count = shell.get_directory_file_count(input_directory) if not file_count: logs.log('Corpus is empty. Skip generation.') return # Number of existing new inputs. They are possibly generated by other # generators. old_corpus_units = shell.get_directory_file_count(output_directory) old_corpus_bytes = shell.get_directory_size(output_directory) # Get model path. model_path = prepare_model_directory(fuzzer_name) if not model_path: return result = run(input_directory, output_directory, model_path, generation_timeout) # Generation process exited abnormally but not caused by timeout, meaning # error occurred during execution. if result.return_code and not result.timed_out: if result.return_code == constants.ExitCode.CORPUS_TOO_SMALL: logs.log_warn( 'ML RNN generation for fuzzer %s aborted due to small corpus.' % fuzzer_name) else: logs.log_error( 'ML RNN generation for fuzzer %s failed with ExitCode = %d.' % (fuzzer_name, result.return_code), output=utils.decode_to_unicode(result.output)) return # Timeout is not error, if we have new units generated. if result.timed_out: logs.log_warn('ML RNN generation for fuzzer %s timed out.' % fuzzer_name) new_corpus_units = (shell.get_directory_file_count(output_directory) - old_corpus_units) new_corpus_bytes = (shell.get_directory_size(output_directory) - old_corpus_bytes) if new_corpus_units: logs.log( 'Added %d new inputs (%d bytes) using ML RNN generator for %s.' % (new_corpus_units, new_corpus_bytes, fuzzer_name)) else: logs.log_error('ML RNN generator did not produce any inputs for %s' % fuzzer_name, output=utils.decode_to_unicode(result.output))