def cleanse_testcase(runner, testcase_file_path, cleanse_to, cleanse_timeout, arguments, use_minijail): """Cleanse testcase.""" remove_fuzzing_arguments(arguments) # Write in-progress cleanse testcases to temp dir. if use_minijail: arguments.append(constants.TMP_ARTIFACT_PREFIX_ARGUMENT) else: cleanse_temp_dir = os.path.join(fuzzer_utils.get_temp_dir(), 'cleanse_temp') engine_common.recreate_directory(cleanse_temp_dir) arguments.append('%s%s/' % (constants.ARTIFACT_PREFIX_FLAG, cleanse_temp_dir)) # Call the fuzzer to cleanse. result = runner.cleanse_crash(testcase_file_path, cleanse_to, cleanse_timeout, additional_args=arguments) print( 'Running command:', get_printable_command(result.command, runner.executable_path, use_minijail)) print(result.output)
def get_config(): """Get arguments for a given fuzz target.""" device_serial = environment.get_value('ANDROID_SERIAL') build_dir = environment.get_value('BUILD_DIR') temp_dir = fuzzer_utils.get_temp_dir() binary_path = os.path.join(build_dir, 'syzkaller') json_config_path = os.path.join(temp_dir, 'config.json') default_vmlinux_path = os.path.join('/tmp', device_serial, 'vmlinux') vmlinux_path = environment.get_value('VMLINUX_PATH', default_vmlinux_path) syzhub_address = environment.get_value('SYZHUB_ADDRESS') syzhub_client = environment.get_value('SYZHUB_CLIENT') syzhub_key = environment.get_value('SYZHUB_KEY') config.generate( serial=device_serial, work_dir_path=get_work_dir(), binary_path=binary_path, vmlinux_path=vmlinux_path, config_path=json_config_path, kcov=True, reproduce=False, syzhub_address=syzhub_address, syzhub_client=syzhub_client, syzhub_key=syzhub_key) return ['-config', json_config_path]
def analyze_and_update_recommended_dictionary(runner, fuzzer_name, log_lines, corpus_directory, arguments): """Extract and analyze recommended dictionary from fuzzer output, then update the corresponding dictionary stored in GCS if needed.""" logs.log('Extracting and analyzing recommended dictionary for %s.' % fuzzer_name) # Extract recommended dictionary elements from the log. dict_manager = dictionary_manager.DictionaryManager(fuzzer_name) recommended_dictionary = ( dict_manager.parse_recommended_dictionary_from_log_lines(log_lines)) if not recommended_dictionary: logs.log('No recommended dictionary in output from %s.' % fuzzer_name) return None # Write recommended dictionary into a file and run '-analyze_dict=1'. temp_dictionary_filename = (fuzzer_name + dictionary_manager.DICTIONARY_FILE_EXTENSION + '.tmp') temp_dictionary_path = os.path.join(fuzzer_utils.get_temp_dir(), temp_dictionary_filename) with open(temp_dictionary_path, 'w') as file_handle: file_handle.write('\n'.join(recommended_dictionary)) dictionary_analysis = runner.analyze_dictionary( temp_dictionary_path, corpus_directory, analyze_timeout=get_dictionary_analysis_timeout(), additional_args=arguments) if dictionary_analysis.timed_out: logs.log_warn('Recommended dictionary analysis for %s timed out.' % fuzzer_name) return None if dictionary_analysis.return_code != 0: logs.log_warn('Recommended dictionary analysis for %s failed: %d.' % (fuzzer_name, dictionary_analysis.return_code)) return None # Extract dictionary elements considered useless, calculate the result. useless_dictionary = dict_manager.parse_useless_dictionary_from_data( dictionary_analysis.output) logs.log( '%d out of %d recommended dictionary elements for %s are useless.' % (len(useless_dictionary), len(recommended_dictionary), fuzzer_name)) recommended_dictionary = set(recommended_dictionary) - set( useless_dictionary) if not recommended_dictionary: return None new_elements_added = dict_manager.update_recommended_dictionary( recommended_dictionary) logs.log('Added %d new elements to the recommended dictionary for %s.' % (new_elements_added, fuzzer_name)) return recommended_dictionary
def test_exit_failure_logged(self): """Test that we log when libFuzzer's exit code indicates it ran into an error.""" test_helpers.patch(self, [ 'metrics.logs.log_error', ]) def mocked_log_error(*args, **kwargs): # pylint: disable=unused-argument self.assertIn(engine.ENGINE_ERROR_MESSAGE, args[0]) self.mock.log_error.side_effect = mocked_log_error _, corpus_path = setup_testcase_and_corpus('empty', 'corpus_with_some_files') target_path = engine_common.find_fuzzer_path(DATA_DIR, 'exit_fuzzer') engine_impl = engine.LibFuzzerEngine() options = engine_impl.prepare(corpus_path, target_path, DATA_DIR) options.extra_env['EXIT_FUZZER_CODE'] = '1' results = engine_impl.fuzz(target_path, options, TEMP_DIR, 10) self.assertEqual(1, self.mock.log_error.call_count) self.assertEqual(1, len(results.crashes)) self.assertEqual(fuzzer_utils.get_temp_dir(), os.path.dirname(results.crashes[0].input_path)) self.assertEqual(0, os.path.getsize(results.crashes[0].input_path))
def _test_merge_reductions(self, temp_subdir): """Tests that reduced testcases are merged back into the original corpus without deleting the larger version.""" testcase_path = setup_testcase_and_corpus('empty', 'empty_corpus', fuzz=True) fuzz_target_name = 'analyze_dict_fuzzer' test_helpers.patch(self, [ 'bot.fuzzers.libFuzzer.launcher.create_merge_directory', 'bot.fuzzers.libFuzzer.launcher.get_merge_directory', 'bot.fuzzers.libFuzzer.launcher.parse_log_stats', ]) log_stats = collections.defaultdict(int) log_stats['new_units_added'] = 1 self.mock.parse_log_stats.side_effect = lambda logs: log_stats self.mock.get_merge_directory.side_effect = lambda: os.path.join( fuzzer_utils.get_temp_dir(), temp_subdir, launcher. MERGE_DIRECTORY_NAME) minimal_unit_contents = 'APPLE' minimal_unit_hash = '569bea285d70dda2218f89ef5454ea69fb5111ef' nonminimal_unit_contents = 'APPLEO' nonminimal_unit_hash = '540d9ba6239483d60cd7448a3202b96c90409186' def mocked_create_merge_directory(): """A mocked version of create_merge_directory that adds some interesting files to the merge corpus and initial corpus.""" merge_directory_path = launcher.get_merge_directory() shell.create_directory(merge_directory_path, create_intermediates=True, recreate=True) # Write the minimal unit to the merge directory. minimal_unit_path = os.path.join(merge_directory_path, minimal_unit_hash) with open(minimal_unit_path, 'w+') as file_handle: file_handle.write(minimal_unit_contents) # Write the nonminimal unit to the corpus directory. corpus_directory = os.getenv('FUZZ_CORPUS_DIR') nonminimal_unit_path = os.path.join(corpus_directory, nonminimal_unit_hash) with open(nonminimal_unit_path, 'w+') as file_handle: file_handle.write(nonminimal_unit_contents) return merge_directory_path self.mock.create_merge_directory.side_effect = mocked_create_merge_directory run_launcher(testcase_path, fuzz_target_name, '-runs=10') corpus_directory = os.getenv('FUZZ_CORPUS_DIR') # Verify that both the newly found minimal testcase and the nonminimal # testcase are in the corpus. self.assertIn(minimal_unit_hash, os.listdir(corpus_directory)) self.assertIn(nonminimal_unit_hash, os.listdir(corpus_directory))
def __init__(self, target_path, config, testcase_file_path, input_directory, afl_tools_path=None): """Inits the AflRunner. Args: target_path: Path to the fuzz target. config: AflConfig object. testcase_file_path: File to write crashes to. input_directory: Corpus directory passed to afl-fuzz. afl_tools_path: Path that is used to locate afl-* tools. """ self.target_path = target_path self.config = config self.testcase_file_path = testcase_file_path self._input_directory = input_directory if afl_tools_path is None: afl_tools_path = os.path.dirname(target_path) # Set paths to afl tools. self.afl_fuzz_path = os.path.join(afl_tools_path, 'afl-fuzz') self.afl_showmap_path = os.path.join(afl_tools_path, 'afl-showmap') self._afl_input = None self._afl_output = None self.strategies = FuzzingStrategies() # Set this to None so we can tell if it has never been set or if it's just # empty. self._fuzzer_stderr = None self.initial_max_total_time = 0 for env_var, value in config.additional_env_vars.iteritems(): environment.set_value(env_var, value) self.showmap_output_path = os.path.join(fuzzer_utils.get_temp_dir(), self.SHOWMAP_FILENAME) self.merge_timeout = engine_common.get_merge_timeout( DEFAULT_MERGE_TIMEOUT) self.showmap_no_output_logged = False
def add_recommended_dictionary(arguments, fuzzer_name, fuzzer_path): """Add recommended dictionary from GCS to existing .dict file or create a new one and update the arguments as needed. This function modifies |arguments| list in some cases.""" recommended_dictionary_path = os.path.join( fuzzer_utils.get_temp_dir(), dictionary_manager.RECOMMENDED_DICTIONARY_FILENAME) dict_manager = dictionary_manager.DictionaryManager(fuzzer_name) try: # Bail out if cannot download recommended dictionary from GCS. if not dict_manager.download_recommended_dictionary_from_gcs( recommended_dictionary_path): return False except Exception, ex: logs.log_error('Exception downloading recommended dictionary:\n%s.' % str(ex)) return False
def test_exit_target_bug_not_logged(self, exit_code): """Test that we don't log when exit code indicates bug found in target.""" test_helpers.patch(self, ["metrics.logs.log_error"]) def mocked_log_error(*args, **kwargs): # pylint: disable=unused-argument self.assertNotIn(engine.ENGINE_ERROR_MESSAGE, args[0]) self.mock.log_error.side_effect = mocked_log_error _, corpus_path = setup_testcase_and_corpus("empty", "corpus_with_some_files") target_path = engine_common.find_fuzzer_path(DATA_DIR, "exit_fuzzer") engine_impl = engine.LibFuzzerEngine() options = engine_impl.prepare(corpus_path, target_path, DATA_DIR) options.extra_env["EXIT_FUZZER_CODE"] = exit_code results = engine_impl.fuzz(target_path, options, TEMP_DIR, 10) self.assertEqual(1, len(results.crashes)) self.assertEqual(fuzzer_utils.get_temp_dir(), os.path.dirname(results.crashes[0].input_path)) self.assertEqual(0, os.path.getsize(results.crashes[0].input_path))
def add_recommended_dictionary(arguments, fuzzer_name, fuzzer_path): """Add recommended dictionary from GCS to existing .dict file or create a new one and update the arguments as needed. This function modifies |arguments| list in some cases.""" recommended_dictionary_path = os.path.join( fuzzer_utils.get_temp_dir(), dictionary_manager.RECOMMENDED_DICTIONARY_FILENAME) dict_manager = dictionary_manager.DictionaryManager(fuzzer_name) try: # Bail out if cannot download recommended dictionary from GCS. if not dict_manager.download_recommended_dictionary_from_gcs( recommended_dictionary_path): return False except Exception as ex: logs.log_error('Exception downloading recommended dictionary:\n%s.' % str(ex)) return False # Bail out if the downloaded dictionary is empty. if not os.path.getsize(recommended_dictionary_path): return False # Check if there is an existing dictionary file in arguments. original_dictionary_path = fuzzer_utils.extract_argument( arguments, constants.DICT_FLAG) merged_dictionary_path = ( original_dictionary_path or dictionary_manager.get_default_dictionary_path(fuzzer_path)) merged_dictionary_path += MERGED_DICT_SUFFIX dictionary_manager.merge_dictionary_files(original_dictionary_path, recommended_dictionary_path, merged_dictionary_path) arguments.append(constants.DICT_FLAG + merged_dictionary_path) return True
def create_corpus_directory(name): """Create a corpus directory with a give name in temp directory and return its full path.""" new_corpus_directory = os.path.join(fuzzer_utils.get_temp_dir(), name) engine_common.recreate_directory(new_corpus_directory) return new_corpus_directory
def _create_empty_testcase_file(self): """Create an empty testcase file in temporary directory.""" _, path = tempfile.mkstemp(dir=fuzzer_utils.get_temp_dir()) return path
def get_work_dir(): """Return work directory for Syzkaller.""" temp_dir = fuzzer_utils.get_temp_dir() return os.path.join(temp_dir, 'syzkaller')
def _create_temp_corpus_dir(self, name): """Create temporary corpus directory.""" new_corpus_directory = os.path.join(fuzzer_utils.get_temp_dir(), name) engine_common.recreate_directory(new_corpus_directory) return new_corpus_directory
def __init__(self): self.output_directory = os.path.join(fuzzer_utils.get_temp_dir(), 'afl_output_dir') engine_common.recreate_directory(self.output_directory)
def main(argv): """Run libFuzzer as specified by argv.""" atexit.register(fuzzer_utils.cleanup) # Initialize variables. arguments = argv[1:] testcase_file_path = arguments.pop(0) target_name = environment.get_value('FUZZ_TARGET') if arguments and arguments[0] == target_name: # Pop legacy fuzz target argument. arguments.pop(0) fuzzer_name = data_types.fuzz_target_project_qualified_name( utils.current_project(), target_name) # Initialize log handler. logs.configure( 'run_fuzzer', { 'fuzzer': fuzzer_name, 'engine': 'libFuzzer', 'job_name': environment.get_value('JOB_NAME') }) profiler.start_if_needed('libfuzzer_launcher') # Make sure that the fuzzer binary exists. build_directory = environment.get_value('BUILD_DIR') fuzzer_path = engine_common.find_fuzzer_path(build_directory, target_name) if not fuzzer_path: return # Install signal handler. signal.signal(signal.SIGTERM, engine_common.signal_term_handler) # Set up temp dir. engine_common.recreate_directory(fuzzer_utils.get_temp_dir()) # Setup minijail if needed. use_minijail = environment.get_value('USE_MINIJAIL') runner = libfuzzer.get_runner( fuzzer_path, temp_dir=fuzzer_utils.get_temp_dir()) if use_minijail: minijail_chroot = runner.chroot else: minijail_chroot = None # Get corpus directory. corpus_directory = environment.get_value('FUZZ_CORPUS_DIR') # Add common arguments which are necessary to be used for every run. arguments = expand_with_common_arguments(arguments) # Add sanitizer options to environment that were specified in the .options # file and options that this script requires. set_sanitizer_options(fuzzer_path) # If we don't have a corpus, then that means this is not a fuzzing run. if not corpus_directory: load_testcase_if_exists(runner, testcase_file_path, fuzzer_name, use_minijail, arguments) return # We don't have a crash testcase, fuzz. # Check dict argument to make sure that it's valid. dict_argument = fuzzer_utils.extract_argument( arguments, constants.DICT_FLAG, remove=False) if dict_argument and not os.path.exists(dict_argument): logs.log_error('Invalid dict %s for %s.' % (dict_argument, fuzzer_name)) fuzzer_utils.extract_argument(arguments, constants.DICT_FLAG) # If there's no dict argument, check for %target_binary_name%.dict file. if (not fuzzer_utils.extract_argument( arguments, constants.DICT_FLAG, remove=False)): default_dict_path = dictionary_manager.get_default_dictionary_path( fuzzer_path) if os.path.exists(default_dict_path): arguments.append(constants.DICT_FLAG + default_dict_path) # Set up scratch directory for writing new units. new_testcases_directory = create_corpus_directory('new') # Strategy pool is the list of strategies that we attempt to enable, whereas # fuzzing strategies is the list of strategies that are enabled. (e.g. if # mutator is selected in the pool, but not available for a given target, it # would not be added to fuzzing strategies.) strategy_pool = strategy_selection.generate_weighted_strategy_pool( strategy_list=strategy.LIBFUZZER_STRATEGY_LIST, use_generator=True, engine_name='libFuzzer') strategy_info = pick_strategies( strategy_pool, fuzzer_path, corpus_directory, arguments, minijail_chroot=minijail_chroot) arguments.extend(strategy_info.arguments) # Timeout for fuzzer run. fuzz_timeout = get_fuzz_timeout(strategy_info.is_mutations_run) # Get list of corpus directories. # TODO(flowerhack): Implement this to handle corpus sync'ing. if environment.platform() == 'FUCHSIA': corpus_directories = [] else: corpus_directories = get_corpus_directories( corpus_directory, new_testcases_directory, fuzzer_path, strategy_info.fuzzing_strategies, strategy_pool, minijail_chroot=minijail_chroot, allow_corpus_subset=not strategy_info.use_dataflow_tracing) corpus_directories.extend(strategy_info.additional_corpus_dirs) artifact_prefix = os.path.abspath(os.path.dirname(testcase_file_path)) # Execute the fuzzer binary with original arguments. fuzz_result = runner.fuzz( corpus_directories, fuzz_timeout=fuzz_timeout, artifact_prefix=artifact_prefix, additional_args=arguments, extra_env=strategy_info.extra_env) if (not use_minijail and fuzz_result.return_code == constants.LIBFUZZER_ERROR_EXITCODE): # Minijail returns 1 if the exit code is nonzero. # Otherwise: we can assume that a return code of 1 means that libFuzzer # itself ran into an error. logs.log_error(ENGINE_ERROR_MESSAGE, engine_output=fuzz_result.output) log_lines = fuzz_result.output.splitlines() # Output can be large, so save some memory by removing reference to the # original output which is no longer needed. fuzz_result.output = None # Check if we crashed, and get the crash testcase path. crash_testcase_file_path = runner.get_testcase_path(log_lines) if crash_testcase_file_path: # Copy crash testcase contents into the main testcase path. shutil.move(crash_testcase_file_path, testcase_file_path) # Print the command output. bot_name = environment.get_value('BOT_NAME', '') command = fuzz_result.command if use_minijail: # Remove minijail prefix. command = engine_common.strip_minijail_command(command, fuzzer_path) print(engine_common.get_log_header(command, bot_name, fuzz_result.time_executed)) # Parse stats information based on libFuzzer output. parsed_stats = parse_log_stats(log_lines) # Extend parsed stats by additional performance features. parsed_stats.update( stats.parse_performance_features( log_lines, strategy_info.fuzzing_strategies, arguments)) # Set some initial stat overrides. timeout_limit = fuzzer_utils.extract_argument( arguments, constants.TIMEOUT_FLAG, remove=False) expected_duration = runner.get_max_total_time(fuzz_timeout) actual_duration = int(fuzz_result.time_executed) fuzzing_time_percent = 100 * actual_duration / float(expected_duration) stat_overrides = { 'timeout_limit': int(timeout_limit), 'expected_duration': expected_duration, 'actual_duration': actual_duration, 'fuzzing_time_percent': fuzzing_time_percent, } # Remove fuzzing arguments before merge and dictionary analysis step. remove_fuzzing_arguments(arguments) # Make a decision on whether merge step is needed at all. If there are no # new units added by libFuzzer run, then no need to do merge at all. new_units_added = shell.get_directory_file_count(new_testcases_directory) merge_error = None if new_units_added: # Merge the new units with the initial corpus. if corpus_directory not in corpus_directories: corpus_directories.append(corpus_directory) # If this times out, it's possible that we will miss some units. However, if # we're taking >10 minutes to load/merge the corpus something is going very # wrong and we probably don't want to make things worse by adding units # anyway. merge_tmp_dir = None if not use_minijail: merge_tmp_dir = os.path.join(fuzzer_utils.get_temp_dir(), 'merge_workdir') engine_common.recreate_directory(merge_tmp_dir) old_corpus_len = shell.get_directory_file_count(corpus_directory) merge_directory = create_merge_directory() corpus_directories.insert(0, merge_directory) if use_minijail: bind_corpus_dirs(minijail_chroot, [merge_directory]) merge_result = runner.merge( corpus_directories, merge_timeout=engine_common.get_merge_timeout(DEFAULT_MERGE_TIMEOUT), tmp_dir=merge_tmp_dir, additional_args=arguments) move_mergeable_units(merge_directory, corpus_directory) new_corpus_len = shell.get_directory_file_count(corpus_directory) new_units_added = 0 merge_error = None if merge_result.timed_out: merge_error = 'Merging new testcases timed out:' elif merge_result.return_code != 0: merge_error = 'Merging new testcases failed:' else: new_units_added = new_corpus_len - old_corpus_len stat_overrides['new_units_added'] = new_units_added if merge_result.output: stat_overrides.update( stats.parse_stats_from_merge_log(merge_result.output.splitlines())) else: stat_overrides['new_units_added'] = 0 logs.log('Skipped corpus merge since no new units added by fuzzing.') # Get corpus size after merge. This removes the duplicate units that were # created during this fuzzing session. # TODO(flowerhack): Remove this workaround once we can handle corpus sync. if environment.platform() != 'FUCHSIA': stat_overrides['corpus_size'] = shell.get_directory_file_count( corpus_directory) # Delete all corpus directories except for the main one. These were temporary # directories to store new testcase mutations and have already been merged to # main corpus directory. if corpus_directory in corpus_directories: corpus_directories.remove(corpus_directory) for directory in corpus_directories: shutil.rmtree(directory, ignore_errors=True) if use_minijail: unbind_corpus_dirs(minijail_chroot, corpus_directories) # Apply overridden stats to the parsed stats prior to dumping. parsed_stats.update(stat_overrides) # Dump stats data for further uploading to BigQuery. engine_common.dump_big_query_data(parsed_stats, testcase_file_path, command) # Add custom crash state based on fuzzer name (if needed). add_custom_crash_state_if_needed(fuzzer_name, log_lines, parsed_stats) for line in log_lines: print(line) # Add fuzzing strategies used. print(engine_common.format_fuzzing_strategies( strategy_info.fuzzing_strategies)) # Add merge error (if any). if merge_error: print(data_types.CRASH_STACKTRACE_END_MARKER) print(merge_error) print('Command:', get_printable_command(merge_result.command, fuzzer_path, use_minijail)) print(merge_result.output) analyze_and_update_recommended_dictionary(runner, fuzzer_name, log_lines, corpus_directory, arguments) # Close minijail chroot. if use_minijail: minijail_chroot.close() # Record the stats to make them easily searchable in stackdriver. if new_units_added: logs.log( 'New units added to corpus: %d.' % new_units_added, stats=parsed_stats) else: logs.log('No new units found.', stats=parsed_stats)
def mock_get_directory_file_count(dir_path): """Mocked version, always return 1 for new testcases directory.""" if dir_path == os.path.join(fuzzer_utils.get_temp_dir(), "new"): return 1 return _get_directory_file_count_orig(dir_path)
def main(argv): """Run afl as specified by argv.""" atexit.register(fuzzer_utils.cleanup) # Initialize variables. _, testcase_file_path, target_name = argv[:3] input_directory = environment.get_value('FUZZ_CORPUS_DIR') fuzzer_name = data_types.fuzz_target_project_qualified_name( utils.current_project(), target_name) # Initialize log handler. logs.configure( 'run_fuzzer', { 'fuzzer': fuzzer_name, 'engine': 'afl', 'job_name': environment.get_value('JOB_NAME') }) build_directory = environment.get_value('BUILD_DIR') fuzzer_path = engine_common.find_fuzzer_path(build_directory, target_name) if not fuzzer_path: # This is an expected case when doing regression testing with old builds # that do not have that fuzz target. It can also happen when a host sends a # message to an untrusted worker that just restarted and lost information on # build directory. logs.log_warn('Could not find fuzz target %s.' % target_name) return # Install signal handler. signal.signal(signal.SIGTERM, engine_common.signal_term_handler) # Set up temp dir. engine_common.recreate_directory(fuzzer_utils.get_temp_dir()) config = AflConfig.from_target_path(fuzzer_path) runner = AflRunner(fuzzer_path, config, testcase_file_path, input_directory) # Add *SAN_OPTIONS overrides from .options file. engine_common.process_sanitizer_options_overrides(fuzzer_path) # If we don't have a corpus, then that means this is not a fuzzing run. if not input_directory: load_testcase_if_exists(runner, testcase_file_path) return # Make sure afl won't exit because of bad sanitizer options. set_additional_sanitizer_options_for_afl_fuzz() # Execute afl-fuzz on the fuzzing target. fuzz_result = runner.fuzz() # Print info for the fuzzer logs. command = fuzz_result.command print('Command: {0}\n' 'Bot: {1}\n' 'Time ran: {2}\n').format(engine_common.get_command_quoted(command), BOT_NAME, fuzz_result.time_executed) print fuzz_result.output runner.strategies.print_strategies() if fuzz_result.return_code: # If AFL returned a non-zero return code quit now without getting stats, # since they would be meaningless. print runner.fuzzer_stderr return stats_getter = stats.StatsGetter(runner.afl_output.stats_path, config.dict_path) try: new_units_generated, new_units_added, corpus_size = ( runner.libfuzzerize_corpus()) stats_getter.set_stats(fuzz_result.time_executed, new_units_generated, new_units_added, corpus_size, runner.strategies, runner.fuzzer_stderr, fuzz_result.output) engine_common.dump_big_query_data(stats_getter.stats, testcase_file_path, AFL_PREFIX, fuzzer_name, command) finally: print runner.fuzzer_stderr # Whenever new units are added to corpus, record the stats to make them # easily searchable in stackdriver. if new_units_added: logs.log('New units added to corpus: %d.' % new_units_added, stats=stats_getter.stats)
def get_merge_directory(): """Returns the path of the directory we can use for merging.""" temp_dir = fuzzer_utils.get_temp_dir() return os.path.join(temp_dir, MERGE_DIRECTORY_NAME)
def _minimize_corpus_two_step(self, target_path, arguments, existing_corpus_dirs, new_corpus_dir, output_corpus_dir, reproducers_dir, max_time): """Optional (but recommended): run corpus minimization. Args: target_path: Path to the target. arguments: Additional arguments needed for corpus minimization. existing_corpus_dirs: Input corpora that existed before the fuzzing run. new_corpus_dir: Input corpus that was generated during the fuzzing run. Must have at least one new file. output_corpus_dir: Output directory to place minimized corpus. reproducers_dir: The directory to put reproducers in when crashes are found. max_time: Maximum allowed time for the minimization. Returns: A Result object. """ if not _is_multistep_merge_supported(target_path): # Fallback to the old single step merge. It does not support incremental # stats and provides only `edge_coverage` and `feature_coverage` stats. logs.log( 'Old version of libFuzzer is used. Using single step merge.') return self.minimize_corpus( target_path, arguments, existing_corpus_dirs + [new_corpus_dir], output_corpus_dir, reproducers_dir, max_time) # The dir where merge control file is located must persist for both merge # steps. The second step re-uses the MCF produced during the first step. self._merge_control_file = os.path.join(fuzzer_utils.get_temp_dir(), 'MCF') # Two step merge process to obtain accurate stats for the new corpus units. # See https://reviews.llvm.org/D66107 for a more detailed description. merge_stats = {} # Step 1. Use only existing corpus and collect "initial" stats. result_1 = self.minimize_corpus(target_path, arguments, existing_corpus_dirs, output_corpus_dir, reproducers_dir, max_time) merge_stats['initial_edge_coverage'] = result_1.stats['edge_coverage'] merge_stats['initial_feature_coverage'] = result_1.stats[ 'feature_coverage'] # Clear the output dir as it does not have any new units at this point. engine_common.recreate_directory(output_corpus_dir) # Adjust the time limit for the time we spent on the first merge step. max_time -= result_1.time_executed if max_time <= 0: raise engine.TimeoutError('Merging new testcases timed out\n' + result_1.logs) # Step 2. Process the new corpus units as well. result_2 = self.minimize_corpus( target_path, arguments, existing_corpus_dirs + [new_corpus_dir], output_corpus_dir, reproducers_dir, max_time) merge_stats['edge_coverage'] = result_2.stats['edge_coverage'] merge_stats['feature_coverage'] = result_2.stats['feature_coverage'] # Diff the stats to obtain accurate values for the new corpus units. merge_stats['new_edges'] = (merge_stats['edge_coverage'] - merge_stats['initial_edge_coverage']) merge_stats['new_features'] = (merge_stats['feature_coverage'] - merge_stats['initial_feature_coverage']) output = result_1.logs + '\n\n' + result_2.logs if (merge_stats['new_edges'] < 0 or merge_stats['new_features'] < 0): logs.log_error('Two step merge failed.', merge_stats=merge_stats, output=output) merge_stats['new_edges'] = 0 merge_stats['new_features'] = 0 self._merge_control_file = None # TODO(ochang): Get crashes found during merge. return engine.FuzzResult( output, result_2.command, [], merge_stats, result_1.time_executed + result_2.time_executed)
def create_new_if_needed(self): """Checks if any inputs are too large for AFL. If not then does nothing. Otherwise creates a temporary input directory and copies the non-oversized inputs. """ # TODO(metzman): Get rid of this approach where a new corpus is created. # Instead use an approach that modifies the input corpus permanently so that # it doesn't have to be fixed every time by AFL. # TODO(metzman): Copy testcases in subdirectories so AFL can use them, even # when there are no oversized files. corpus_file_paths = list_full_file_paths_recursive( self.input_directory) usable_files_and_sizes = [ (path, os.path.getsize(path)) for path in corpus_file_paths if os.path.getsize(path) < constants.MAX_FILE_BYTES ] num_files = len(usable_files_and_sizes) if self.strategies.use_corpus_subset: num_files = min(num_files, self.strategies.corpus_subset_size) self.strategies.use_corpus_subset = ( self.strategies.corpus_subset_size == num_files) if num_files == len(corpus_file_paths): # Nothing to do here: using full corpus and all files are appropriately # sized. return None # Save the original input directory. self.original_input_directory = self.input_directory # Make a new directory that we can modify. self.input_directory = os.path.join(fuzzer_utils.get_temp_dir(), 'afl_input_dir') engine_common.recreate_directory(self.input_directory) copied_size = 0 for src_path, src_size in usable_files_and_sizes: if not num_files: break num_files -= 1 copied_size += src_size if copied_size > self.MAX_COPIED_CORPUS_SIZE: break filename = os.path.basename(src_path) dst_path = os.path.join(self.input_directory, filename) # TODO(metzman): Ask Michal to allow skipping of oversized inputs # automatically. Just copy the small enough, files, we can't use soft # links because of AFL and we can't use hard links because they do not # work across devices. shutil.copy(src_path, dst_path) num_files = len(os.listdir(self.input_directory)) num_files_original = len(os.listdir(self.original_input_directory)) logs.log(( 'Temporary input directory contains %d files. Original contains %d.' % (num_files, num_files_original))) return self.input_directory
def stderr_file_path(self): """Returns the file for afl to output stack traces.""" return os.path.join(fuzzer_utils.get_temp_dir(), STDERR_FILENAME)
def main(argv): """Run libFuzzer as specified by argv.""" atexit.register(fuzzer_utils.cleanup) # Initialize variables. arguments = argv[1:] testcase_file_path = arguments.pop(0) target_name = arguments.pop(0) fuzzer_name = data_types.fuzz_target_project_qualified_name( utils.current_project(), target_name) # Initialize log handler. logs.configure( 'run_fuzzer', { 'fuzzer': fuzzer_name, 'engine': 'libFuzzer', 'job_name': environment.get_value('JOB_NAME') }) profiler.start_if_needed('libfuzzer_launcher') # Make sure that the fuzzer binary exists. build_directory = environment.get_value('BUILD_DIR') fuzzer_path = engine_common.find_fuzzer_path(build_directory, target_name) if not fuzzer_path: # This is an expected case when doing regression testing with old builds # that do not have that fuzz target. It can also happen when a host sends a # message to an untrusted worker that just restarted and lost information on # build directory. logs.log_warn('Could not find fuzz target %s.' % target_name) return # Install signal handler. signal.signal(signal.SIGTERM, engine_common.signal_term_handler) # Set up temp dir. engine_common.recreate_directory(fuzzer_utils.get_temp_dir()) # Setup minijail if needed. use_minijail = environment.get_value('USE_MINIJAIL') runner = libfuzzer.get_runner(fuzzer_path, temp_dir=fuzzer_utils.get_temp_dir()) if use_minijail: minijail_chroot = runner.chroot else: minijail_chroot = None # Get corpus directory. corpus_directory = environment.get_value('FUZZ_CORPUS_DIR') # Add common arguments which are necessary to be used for every run. arguments = expand_with_common_arguments(arguments) # Add sanitizer options to environment that were specified in the .options # file and options that this script requires. set_sanitizer_options(fuzzer_path) # Minimize test argument. minimize_to = fuzzer_utils.extract_argument(arguments, MINIMIZE_TO_ARGUMENT) minimize_timeout = fuzzer_utils.extract_argument( arguments, MINIMIZE_TIMEOUT_ARGUMENT) if minimize_to and minimize_timeout: minimize_testcase(runner, testcase_file_path, minimize_to, int(minimize_timeout), arguments, use_minijail) return # Cleanse argument. cleanse_to = fuzzer_utils.extract_argument(arguments, CLEANSE_TO_ARGUMENT) cleanse_timeout = fuzzer_utils.extract_argument(arguments, CLEANSE_TIMEOUT_ARGUMENT) if cleanse_to and cleanse_timeout: cleanse_testcase(runner, testcase_file_path, cleanse_to, int(cleanse_timeout), arguments, use_minijail) return # If we don't have a corpus, then that means this is not a fuzzing run. if not corpus_directory: load_testcase_if_exists(runner, testcase_file_path, fuzzer_name, use_minijail, arguments) return # We don't have a crash testcase, fuzz. # Check dict argument to make sure that it's valid. dict_argument = fuzzer_utils.extract_argument(arguments, constants.DICT_FLAG, remove=False) if dict_argument and not os.path.exists(dict_argument): logs.log_error('Invalid dict %s for %s.' % (dict_argument, fuzzer_name)) fuzzer_utils.extract_argument(arguments, constants.DICT_FLAG) # If there's no dict argument, check for %target_binary_name%.dict file. if (not fuzzer_utils.extract_argument( arguments, constants.DICT_FLAG, remove=False)): default_dict_path = dictionary_manager.get_default_dictionary_path( fuzzer_path) if os.path.exists(default_dict_path): arguments.append(constants.DICT_FLAG + default_dict_path) fuzzing_strategies = [] # Select a generator to use for existing testcase mutations. generator = _select_generator() is_mutations_run = generator != Generator.NONE # Timeout for fuzzer run. fuzz_timeout = get_fuzz_timeout(is_mutations_run) # Set up scratch directory for writing new units. new_testcases_directory = create_corpus_directory('new') # Get list of corpus directories. corpus_directories = get_corpus_directories(corpus_directory, new_testcases_directory, fuzzer_path, fuzzing_strategies, minijail_chroot) # Bind corpus directories in minijail. if use_minijail: artifact_prefix = constants.ARTIFACT_PREFIX_FLAG + '/' else: artifact_prefix = '%s%s/' % (constants.ARTIFACT_PREFIX_FLAG, os.path.abspath( os.path.dirname(testcase_file_path))) # Generate new testcase mutations using radamsa, etc. if is_mutations_run: new_testcase_mutations_directory = generate_new_testcase_mutations( corpus_directory, fuzzer_name, generator, fuzzing_strategies) corpus_directories.append(new_testcase_mutations_directory) if use_minijail: bind_corpus_dirs(minijail_chroot, [new_testcase_mutations_directory]) max_len_argument = fuzzer_utils.extract_argument(arguments, constants.MAX_LEN_FLAG, remove=False) if not max_len_argument and do_random_max_length(): max_length = random.SystemRandom().randint(1, MAX_VALUE_FOR_MAX_LENGTH) arguments.append('%s%d' % (constants.MAX_LEN_FLAG, max_length)) fuzzing_strategies.append(strategy.RANDOM_MAX_LENGTH_STRATEGY) if do_recommended_dictionary(): if add_recommended_dictionary(arguments, fuzzer_name, fuzzer_path): fuzzing_strategies.append(strategy.RECOMMENDED_DICTIONARY_STRATEGY) if do_value_profile(): arguments.append(constants.VALUE_PROFILE_ARGUMENT) fuzzing_strategies.append(strategy.VALUE_PROFILE_STRATEGY) if do_fork(): max_fuzz_threads = environment.get_value('MAX_FUZZ_THREADS', 1) num_fuzz_processes = max( 1, multiprocessing.cpu_count() // max_fuzz_threads) arguments.append('%s%d' % (constants.FORK_FLAG, num_fuzz_processes)) fuzzing_strategies.append('%s_%d' % (strategy.FORK_STRATEGY, num_fuzz_processes)) extra_env = {} if do_mutator_plugin(): if use_mutator_plugin(target_name, extra_env, minijail_chroot): fuzzing_strategies.append(strategy.MUTATOR_PLUGIN_STRATEGY) # Execute the fuzzer binary with original arguments. fuzz_result = runner.fuzz(corpus_directories, fuzz_timeout=fuzz_timeout, additional_args=arguments + [artifact_prefix], extra_env=extra_env) if (not use_minijail and fuzz_result.return_code == constants.LIBFUZZER_ERROR_EXITCODE): # Minijail returns 1 if the exit code is nonzero. # Otherwise: we can assume that a return code of 1 means that libFuzzer # itself ran into an error. logs.log_error(ENGINE_ERROR_MESSAGE, engine_output=fuzz_result.output) log_lines = fuzz_result.output.splitlines() # Output can be large, so save some memory by removing reference to the # original output which is no longer needed. fuzz_result.output = None # Check if we crashed, and get the crash testcase path. crash_testcase_file_path = None for line in log_lines: match = re.match(CRASH_TESTCASE_REGEX, line) if match: crash_testcase_file_path = match.group(1) break if crash_testcase_file_path: # Write the new testcase. if use_minijail: # Convert chroot relative path to host path. Remove the leading '/' before # joining. crash_testcase_file_path = os.path.join( minijail_chroot.directory, crash_testcase_file_path[1:]) # Copy crash testcase contents into the main testcase path. shutil.move(crash_testcase_file_path, testcase_file_path) # Print the command output. log_header_format = ('Command: %s\n' 'Bot: %s\n' 'Time ran: %f\n') bot_name = environment.get_value('BOT_NAME', '') command = fuzz_result.command if use_minijail: # Remove minijail prefix. command = engine_common.strip_minijail_command(command, fuzzer_path) print(log_header_format % (engine_common.get_command_quoted(command), bot_name, fuzz_result.time_executed)) # Parse stats information based on libFuzzer output. parsed_stats = parse_log_stats(log_lines) # Extend parsed stats by additional performance features. parsed_stats.update( stats.parse_performance_features(log_lines, fuzzing_strategies, arguments)) # Set some initial stat overrides. timeout_limit = fuzzer_utils.extract_argument(arguments, constants.TIMEOUT_FLAG, remove=False) expected_duration = runner.get_max_total_time(fuzz_timeout) actual_duration = int(fuzz_result.time_executed) fuzzing_time_percent = 100 * actual_duration / float(expected_duration) stat_overrides = { 'timeout_limit': int(timeout_limit), 'expected_duration': expected_duration, 'actual_duration': actual_duration, 'fuzzing_time_percent': fuzzing_time_percent, } # Remove fuzzing arguments before merge and dictionary analysis step. remove_fuzzing_arguments(arguments) # Make a decision on whether merge step is needed at all. If there are no # new units added by libFuzzer run, then no need to do merge at all. new_units_added = shell.get_directory_file_count(new_testcases_directory) merge_error = None if new_units_added: # Merge the new units with the initial corpus. if corpus_directory not in corpus_directories: corpus_directories.append(corpus_directory) # If this times out, it's possible that we will miss some units. However, if # we're taking >10 minutes to load/merge the corpus something is going very # wrong and we probably don't want to make things worse by adding units # anyway. merge_tmp_dir = None if not use_minijail: merge_tmp_dir = os.path.join(fuzzer_utils.get_temp_dir(), 'merge_workdir') engine_common.recreate_directory(merge_tmp_dir) old_corpus_len = shell.get_directory_file_count(corpus_directory) merge_directory = create_merge_directory() corpus_directories.insert(0, merge_directory) if use_minijail: bind_corpus_dirs(minijail_chroot, [merge_directory]) merge_result = runner.merge( corpus_directories, merge_timeout=engine_common.get_merge_timeout( DEFAULT_MERGE_TIMEOUT), tmp_dir=merge_tmp_dir, additional_args=arguments) move_mergeable_units(merge_directory, corpus_directory) new_corpus_len = shell.get_directory_file_count(corpus_directory) new_units_added = 0 merge_error = None if merge_result.timed_out: merge_error = 'Merging new testcases timed out:' elif merge_result.return_code != 0: merge_error = 'Merging new testcases failed:' else: new_units_added = new_corpus_len - old_corpus_len stat_overrides['new_units_added'] = new_units_added if merge_result.output: stat_overrides.update( stats.parse_stats_from_merge_log( merge_result.output.splitlines())) else: stat_overrides['new_units_added'] = 0 logs.log('Skipped corpus merge since no new units added by fuzzing.') # Get corpus size after merge. This removes the duplicate units that were # created during this fuzzing session. stat_overrides['corpus_size'] = shell.get_directory_file_count( corpus_directory) # Delete all corpus directories except for the main one. These were temporary # directories to store new testcase mutations and have already been merged to # main corpus directory. if corpus_directory in corpus_directories: corpus_directories.remove(corpus_directory) for directory in corpus_directories: shutil.rmtree(directory, ignore_errors=True) if use_minijail: unbind_corpus_dirs(minijail_chroot, corpus_directories) # Apply overridden stats to the parsed stats prior to dumping. parsed_stats.update(stat_overrides) # Dump stats data for further uploading to BigQuery. engine_common.dump_big_query_data(parsed_stats, testcase_file_path, LIBFUZZER_PREFIX, fuzzer_name, command) # Add custom crash state based on fuzzer name (if needed). add_custom_crash_state_if_needed(fuzzer_name, log_lines, parsed_stats) for line in log_lines: print(line) # Add fuzzing strategies used. engine_common.print_fuzzing_strategies(fuzzing_strategies) # Add merge error (if any). if merge_error: print(data_types.CRASH_STACKTRACE_END_MARKER) print(merge_error) print( 'Command:', get_printable_command(merge_result.command, fuzzer_path, use_minijail)) print(merge_result.output) analyze_and_update_recommended_dictionary(runner, fuzzer_name, log_lines, corpus_directory, arguments) # Close minijail chroot. if use_minijail: minijail_chroot.close() # Record the stats to make them easily searchable in stackdriver. if new_units_added: logs.log('New units added to corpus: %d.' % new_units_added, stats=parsed_stats) else: logs.log('No new units found.', stats=parsed_stats)
def get_runner(fuzzer_path, temp_dir=None, use_minijail=None): """Get a libfuzzer runner.""" if use_minijail is None: use_minijail = environment.get_value('USE_MINIJAIL') if use_minijail is False: # If minijail is explicitly disabled, set the environment variable as well. environment.set_value('USE_MINIJAIL', False) if temp_dir is None: temp_dir = fuzzer_utils.get_temp_dir() build_dir = environment.get_value('BUILD_DIR') dataflow_build_dir = environment.get_value('DATAFLOW_BUILD_DIR') is_fuchsia = environment.platform() == 'FUCHSIA' if not is_fuchsia: # To ensure that we can run the fuzz target. os.chmod(fuzzer_path, 0o755) is_chromeos_system_job = environment.is_chromeos_system_job() if is_chromeos_system_job: minijail_chroot = minijail.ChromeOSChroot(build_dir) elif use_minijail: minijail_chroot = minijail.MinijailChroot(base_dir=temp_dir) if use_minijail or is_chromeos_system_job: # While it's possible for dynamic binaries to run without this, they need # to be accessible for symbolization etc. For simplicity we bind BUILD_DIR # to the same location within the chroot, which leaks the directory # structure of CF but this shouldn't be a big deal. minijail_chroot.add_binding( minijail.ChrootBinding(build_dir, build_dir, writeable=False)) if dataflow_build_dir: minijail_chroot.add_binding( minijail.ChrootBinding(dataflow_build_dir, dataflow_build_dir, writeable=False)) # Also bind the build dir to /out to make it easier to hardcode references # to data files. minijail_chroot.add_binding( minijail.ChrootBinding(build_dir, '/out', writeable=False)) minijail_bin = os.path.join(minijail_chroot.directory, 'bin') shell.create_directory(minijail_bin) # Set up /bin with llvm-symbolizer to allow symbolized stacktraces. # Don't copy if it already exists (e.g. ChromeOS chroot jail). llvm_symbolizer_source_path = environment.get_llvm_symbolizer_path() llvm_symbolizer_destination_path = os.path.join( minijail_bin, 'llvm-symbolizer') if not os.path.exists(llvm_symbolizer_destination_path): shutil.copy(llvm_symbolizer_source_path, llvm_symbolizer_destination_path) # copy /bin/sh, necessary for system(). if not environment.is_chromeos_system_job(): # The chroot has its own shell we don't need to copy (and probably # shouldn't because of library differences). shutil.copy(os.path.realpath('/bin/sh'), os.path.join(minijail_bin, 'sh')) runner = MinijailLibFuzzerRunner(fuzzer_path, minijail_chroot) elif is_fuchsia: runner = FuchsiaQemuLibFuzzerRunner(fuzzer_path) else: runner = LibFuzzerRunner(fuzzer_path) return runner