Example #1
0
    def test(self):
        """Test get_directory_file_count."""
        self.fs.CreateFile('/test/aa/bb.txt', contents='abc')
        self.fs.CreateFile('/test/aa/cc.txt', contents='def')
        self.fs.CreateFile('/test/aa/aa/aa.txt', contents='ghi')
        self.fs.CreateFile('/test/aa/aa/dd.txt', contents='t')

        self.assertEqual(shell.get_directory_file_count('/test/aa'), 4)
def generate_new_testcase_mutations_using_radamsa(
        corpus_directory, new_testcase_mutations_directory,
        generation_timeout):
    """Generate new testcase mutations based on Radamsa."""
    radamsa_path = get_radamsa_path()
    if not radamsa_path:
        # Mutations using radamsa are not supported on current platform, bail out.
        return

    radamsa_runner = new_process.ProcessRunner(radamsa_path)
    files_list = shell.get_files_list(corpus_directory)
    filtered_files_list = [
        f for f in files_list
        if os.path.getsize(f) <= RADAMSA_INPUT_FILE_SIZE_LIMIT
    ]
    if not filtered_files_list:
        # No mutations to do on an empty corpus or one with very large files.
        return

    old_corpus_size = shell.get_directory_file_count(
        new_testcase_mutations_directory)
    expected_completion_time = time.time() + generation_timeout

    for i in range(RADAMSA_MUTATIONS):
        original_file_path = random_choice(filtered_files_list)
        original_filename = os.path.basename(original_file_path)
        output_path = os.path.join(
            new_testcase_mutations_directory,
            get_radamsa_output_filename(original_filename, i),
        )

        result = radamsa_runner.run_and_wait(
            ["-o", output_path, original_file_path], timeout=RADAMSA_TIMEOUT)
        if result.return_code or result.timed_out:
            logs.log_error("Radamsa failed to mutate or timed out.",
                           output=result.output)

        # Check if we exceeded our timeout. If yes, do no more mutations and break.
        if time.time() > expected_completion_time:
            break

    new_corpus_size = shell.get_directory_file_count(
        new_testcase_mutations_directory)
    logs.log("Added %d tests using Radamsa mutations." %
             (new_corpus_size - old_corpus_size))
Example #3
0
    def run(self, initial_corpus_path, minimized_corpus_path, bad_units_path):
        """Run corpus pruning. Output result to directory."""
        if not shell.get_directory_file_count(initial_corpus_path):
            # Empty corpus, nothing to do.
            return

        # Set memory tool options and fuzzer arguments.
        engine_common.unpack_seed_corpus_if_needed(self.runner.target_path,
                                                   initial_corpus_path,
                                                   force_unpack=True)

        environment.reset_current_memory_tool_options(redzone_size=MIN_REDZONE,
                                                      leaks=True)
        self.runner.process_sanitizer_options()
        additional_args = self.runner.get_libfuzzer_flags()

        # Execute fuzzer with arguments for corpus pruning.
        logs.log("Running merge...")
        try:
            result = self.runner.minimize_corpus(
                additional_args,
                [initial_corpus_path],
                minimized_corpus_path,
                bad_units_path,
                CORPUS_PRUNING_TIMEOUT,
            )
        except engine.TimeoutError as e:
            raise CorpusPruningException(
                "Corpus pruning timed out while minimizing corpus\n" +
                e.message)
        except engine.Error as e:
            raise CorpusPruningException(
                "Corpus pruning failed to minimize corpus\n" + e.message)

        symbolized_output = stack_symbolizer.symbolize_stacktrace(result.logs)

        # Sanity check that there are files in minimized corpus after merging.
        if not shell.get_directory_file_count(minimized_corpus_path):
            raise CorpusPruningException(
                "Corpus pruning failed to minimize corpus\n" +
                symbolized_output)

        logs.log("Corpus merge finished successfully.",
                 output=symbolized_output)
Example #4
0
    def run(self, initial_corpus_path, minimized_corpus_path, bad_units_path):
        """Run corpus pruning. Output result to directory."""
        if not shell.get_directory_file_count(initial_corpus_path):
            # Empty corpus, nothing to do.
            return

        # Set memory tool options and fuzzer arguments.
        engine_common.unpack_seed_corpus_if_needed(self.runner.fuzzer_path,
                                                   initial_corpus_path,
                                                   force_unpack=True)

        environment.reset_current_memory_tool_options(redzone_size=MIN_REDZONE,
                                                      leaks=True)
        self.runner.process_sanitizer_options()
        additional_args = self.runner.get_libfuzzer_flags()

        # Execute fuzzer with arguments for corpus pruning.
        logs.log('Running merge...')
        result = self.runner.merge(
            [minimized_corpus_path, initial_corpus_path],
            CORPUS_PRUNING_TIMEOUT,
            artifact_prefix=bad_units_path,
            tmp_dir=self.context.merge_tmp_dir,
            additional_args=additional_args)

        # Sanity check that we didn't time out.
        symbolized_output = stack_symbolizer.symbolize_stacktrace(
            result.output)
        if result.timed_out:
            raise CorpusPruningException(
                'Corpus pruning timed out while merging corpus: %s.' %
                symbolized_output)
        # Sanity check that we didn't error out and there are files in minimized
        # corpus after merging.
        if (result.return_code
                or not shell.get_directory_file_count(minimized_corpus_path)):
            raise CorpusPruningException(
                'Corpus pruning failed to merge corpus: %s.' %
                symbolized_output)
        logs.log('Corpus merge finished successfully.',
                 output=symbolized_output)
Example #5
0
    def test(self):
        """Test clear_system_temp_directory works as expected."""
        self.fs.CreateFile('/tmp/aa/bb.txt', contents='abc')
        self.fs.CreateFile('/tmp/cc/dd/ee.txt', contents='def')
        self.fs.CreateDirectory('/tmp/ff/gg')
        self.fs.CreateDirectory('/tmp/hh')
        self.fs.CreateDirectory('/unrelated')
        self.fs.CreateFile('/unrelated/zz.txt', contents='zzz')
        os.symlink('/unrelated/zz.txt', '/tmp/hh/gg.txt')
        os.symlink('/unrelated', '/tmp/ii')

        shell.clear_system_temp_directory()

        self.assertTrue(os.path.exists('/tmp'))
        self.assertTrue(os.path.exists('/unrelated'))
        self.assertEqual(shell.get_directory_file_count('/tmp'), 0)
        self.assertEqual(shell.get_directory_file_count('/unrelated'), 1)
        self.assertFalse(os.path.exists('/tmp/aa/bb.txt'))
        self.assertFalse(os.path.exists('/tmp/cc/dd/ee.txt'))
        self.assertFalse(os.path.exists('/tmp/ff/gg'))
        self.assertFalse(os.path.exists('/tmp/hh'))
Example #6
0
def generate_new_testcase_mutations_using_radamsa(
        corpus_directory, new_testcase_mutations_directory,
        expected_completion_time):
    """Generate new testcase mutations based on Radamsa."""
    radamsa_path = get_radamsa_path()
    if not radamsa_path:
        # Mutations using radamsa are not supported on current platform, bail out.
        return

    radamsa_runner = new_process.ProcessRunner(radamsa_path)
    files_list = shell.get_files_list(corpus_directory)
    if not files_list:
        # No mutations to do on an empty corpus, bail out.
        return

    old_corpus_size = shell.get_directory_file_count(
        new_testcase_mutations_directory)

    for i in range(RADAMSA_MUTATIONS):
        original_file_path = engine_common.random_choice(files_list)
        original_filename = os.path.basename(original_file_path)
        output_path = os.path.join(
            new_testcase_mutations_directory,
            'radamsa-%08d-%s' % (i + 1, original_filename))

        result = radamsa_runner.run_and_wait(
            ['-o', output_path, original_file_path], timeout=RADAMSA_TIMEOUT)
        if result.return_code or result.timed_out:
            logs.log_error('Radamsa failed to mutate or timed out.',
                           output=result.output)

        # Check if we exceeded our timeout. If yes, do no more mutations and break.
        if time.time() > expected_completion_time:
            break

    new_corpus_size = shell.get_directory_file_count(
        new_testcase_mutations_directory)
    logs.log('Added %d tests using Radamsa mutations.' %
             (new_corpus_size - old_corpus_size))
Example #7
0
    def _cross_pollinate_other_fuzzer_corpuses(self):
        """Add other fuzzer corpuses to shared corpus path for cross-pollination."""
        corpus_backup_date = utils.utcnow().date() - datetime.timedelta(
            days=data_types.CORPUS_BACKUP_PUBLIC_LOOKBACK_DAYS)

        for cross_pollinate_fuzzer in self.cross_pollinate_fuzzers:
            project_qualified_name = (
                cross_pollinate_fuzzer.fuzz_target.project_qualified_name())
            backup_bucket_name = cross_pollinate_fuzzer.backup_bucket_name
            corpus_engine_name = cross_pollinate_fuzzer.corpus_engine_name

            corpus_backup_url = corpus_manager.gcs_url_for_backup_file(
                backup_bucket_name, corpus_engine_name, project_qualified_name,
                corpus_backup_date)
            corpus_backup_local_filename = '%s-%s' % (
                project_qualified_name, os.path.basename(corpus_backup_url))
            corpus_backup_local_path = os.path.join(
                self.shared_corpus_path, corpus_backup_local_filename)

            if not storage.exists(corpus_backup_url, ignore_errors=True):
                # This can happen in cases when a new fuzz target is checked in or if
                # missed to capture a backup for a particular day (for OSS-Fuzz, this
                # will result in a 403 instead of 404 since that GCS path belongs to
                # other project). So, just log a warning for debugging purposes only.
                logs.log_warn('Corpus backup does not exist, ignoring: %s.' %
                              corpus_backup_url)
                continue

            if not storage.copy_file_from(corpus_backup_url,
                                          corpus_backup_local_path):
                continue

            corpus_backup_output_directory = os.path.join(
                self.shared_corpus_path, project_qualified_name)
            shell.create_directory_if_needed(corpus_backup_output_directory)
            archive.unpack(corpus_backup_local_path,
                           corpus_backup_output_directory)
            shell.remove_file(corpus_backup_local_path)

            if not shell.get_directory_file_count(
                    corpus_backup_output_directory):
                logs.log_error('Failed to unpack corpus backup from url %s.' %
                               corpus_backup_url)
            else:
                logs.log(
                    'Corpus backup url %s successfully unpacked into shared corpus.'
                    % corpus_backup_url)
Example #8
0
def get_corpus_directories(main_corpus_directory,
                           new_testcases_directory,
                           fuzzer_path,
                           fuzzing_strategies,
                           strategy_pool,
                           minijail_chroot=None,
                           allow_corpus_subset=True):
    """Return a list of corpus directories to be passed to the fuzzer binary for
  fuzzing."""
    corpus_directories = []

    corpus_directories.append(new_testcases_directory)

    # Check for seed corpus and add it into corpus directory.
    engine_common.unpack_seed_corpus_if_needed(fuzzer_path,
                                               main_corpus_directory)

    # Pick a few testcases from our corpus to use as the initial corpus.
    subset_size = engine_common.random_choice(
        engine_common.CORPUS_SUBSET_NUM_TESTCASES)

    if (allow_corpus_subset
            and strategy_pool.do_strategy(strategy.CORPUS_SUBSET_STRATEGY)
            and shell.get_directory_file_count(main_corpus_directory) >
            subset_size):
        # Copy |subset_size| testcases into 'subset' directory.
        corpus_subset_directory = create_corpus_directory('subset')
        copy_from_corpus(corpus_subset_directory, main_corpus_directory,
                         subset_size)
        corpus_directories.append(corpus_subset_directory)
        fuzzing_strategies.append(strategy.CORPUS_SUBSET_STRATEGY.name + '_' +
                                  str(subset_size))
        if minijail_chroot:
            bind_corpus_dirs(minijail_chroot, [main_corpus_directory])
    else:
        # Regular fuzzing with the full main corpus directory.
        corpus_directories.append(main_corpus_directory)

    if minijail_chroot:
        bind_corpus_dirs(minijail_chroot, corpus_directories)

    return corpus_directories
    def test_generate(self):
        """Test generate specified number of inputs."""
        # Set a large timeout value and a small count value to avoid timeout.
        timeout = 20
        expected_count = 2

        result = generator.run(self.input_directory,
                               self.output_directory,
                               self.model_path,
                               timeout,
                               generation_count=expected_count,
                               hidden_state_size=MODEL_STATE_SIZE,
                               hidden_layer_size=MODEL_LAYER_SIZE)

        # Process exits normally and no timeout.
        self.assertEqual(result.return_code, constants.ExitCode.SUCCESS)
        self.assertFalse(result.timed_out)

        actual_count = shell.get_directory_file_count(self.output_directory)
        self.assertEqual(expected_count, actual_count)
    def test_empty_corpus(self):
        """Test generation should abort for empty corpus."""
        # Set a large timeout value and a small count value to avoid timeout.
        timeout = 20
        expected_count = 2

        result = generator.run(self.empty_directory,
                               self.output_directory,
                               self.model_path,
                               timeout,
                               generation_count=expected_count,
                               hidden_state_size=MODEL_STATE_SIZE,
                               hidden_layer_size=MODEL_LAYER_SIZE)

        self.assertEqual(result.return_code,
                         constants.ExitCode.CORPUS_TOO_SMALL)
        self.assertFalse(result.timed_out)

        # No new units.
        actual_count = shell.get_directory_file_count(self.output_directory)
        self.assertEqual(actual_count, 0)
    def test_timeout(self):
        """Test timeout case in generation."""
        # Set a small timeout value and a large count value to trigger timeout.
        # Note that timeout cannot be set too small since it takes time to
        # start generator. If this test failed please increase timeout value.
        timeout = 10

        result = generator.run(self.input_directory,
                               self.output_directory,
                               self.model_path,
                               timeout,
                               generation_count=sys.maxsize,
                               hidden_state_size=MODEL_STATE_SIZE,
                               hidden_layer_size=MODEL_LAYER_SIZE)

        # Process timed out.
        self.assertNotEqual(result.return_code, constants.ExitCode.SUCCESS)
        self.assertTrue(result.timed_out)

        actual_count = shell.get_directory_file_count(self.output_directory)
        self.assertGreater(actual_count, 0)
Example #12
0
    def get_libfuzzer_flags(self):
        """Get default libFuzzer options."""
        rss_limit = RSS_LIMIT
        max_len = engine_common.CORPUS_INPUT_SIZE_LIMIT
        detect_leaks = 1
        arguments = [TIMEOUT_FLAG]

        if self.fuzzer_options:
            # Default values from above can be customized for a given fuzz target.
            libfuzzer_arguments = self.fuzzer_options.get_engine_arguments(
                'libfuzzer')

            custom_rss_limit = libfuzzer_arguments.get('rss_limit_mb',
                                                       constructor=int)
            if custom_rss_limit and custom_rss_limit < rss_limit:
                rss_limit = custom_rss_limit

            custom_max_len = libfuzzer_arguments.get('max_len',
                                                     constructor=int)
            if custom_max_len and custom_max_len < max_len:
                max_len = custom_max_len

            # Some targets might falsely report leaks all the time, so allow this to
            # be disabled.
            custom_detect_leaks = libfuzzer_arguments.get('detect_leaks',
                                                          constructor=int)
            if custom_detect_leaks is not None:
                detect_leaks = custom_detect_leaks

        arguments.append(RSS_LIMIT_MB_FLAG % rss_limit)
        arguments.append(MAX_LEN_FLAG % max_len)
        arguments.append(DETECT_LEAKS_FLAG % detect_leaks)

        corpus_size = shell.get_directory_file_count(
            self.context.initial_corpus_path)
        use_value_profile = int(
            corpus_size <= CORPUS_SIZE_LIMIT_FOR_VALUE_PROFILE)
        arguments.append(USE_VALUE_PROFILE_FLAG % use_value_profile)

        return arguments
Example #13
0
    def libfuzzerize_corpus(self):
        """Make corpus directories libFuzzer compatible, merge new testcases
    if needed and return the number of new testcases added to corpus.
    """
        self.afl_input.restore_if_needed()
        # Number of new units created during fuzzing.
        new_units_generated = self.afl_output.count_new_units(
            self.afl_output.queue)

        # Number of new units we add to the corpus after merging.
        new_units_added = new_units_generated

        if new_units_generated:
            new_units_added = self.merge_corpus()
            logs.log('Merge completed successfully.')

        # Get corpus size after merge. This removes the duplicate units that were
        # created during this fuzzing session.
        corpus_size = shell.get_directory_file_count(
            self.afl_input.input_directory)

        return new_units_generated, new_units_added, corpus_size
Example #14
0
    def run(self, timeout):
        """Merge testcases from corpus from other fuzz targets."""
        if not shell.get_directory_file_count(self.context.shared_corpus_path):
            logs.log('No files found in shared corpus, skip merge.')
            return None

        # Run pruning on the shared corpus and log the result in case of error.
        logs.log('Merging shared corpus...')
        environment.reset_current_memory_tool_options(
            redzone_size=DEFAULT_REDZONE)
        self.runner.process_sanitizer_options()

        additional_args = self.runner.get_libfuzzer_flags()

        try:
            result = self.runner.minimize_corpus(
                additional_args, [self.context.shared_corpus_path],
                self.context.minimized_corpus_path,
                self.context.bad_units_path, timeout)
            symbolized_output = stack_symbolizer.symbolize_stacktrace(
                result.logs)
            logs.log('Shared corpus merge finished successfully.',
                     output=symbolized_output)
        except engine.TimeoutError as e:
            # Other cross pollinated fuzzer corpuses can have unexpected test cases
            # that time us out. This is expected, so bail out.
            logs.log_warn(
                'Corpus pruning timed out while merging shared corpus\n' +
                repr(e))
            return None
        except engine.Error as e:
            # Other cross pollinated fuzzer corpuses can be large, so we can run out
            # of disk space and exception out. This is expected, so bail out.
            logs.log_warn('Corpus pruning failed to merge shared corpus\n' +
                          repr(e))
            return None

        return result.stats
    def test_invalid_model(self):
        """Test TensorFlow should throw exception if model does not match."""
        # Set a large timeout value and a small count value to avoid timeout.
        timeout = 20
        expected_count = 2

        # Change model parameters to make demo model invalid.
        invalid_state_size = 8

        result = generator.run(self.input_directory,
                               self.output_directory,
                               self.model_path,
                               timeout,
                               generation_count=expected_count,
                               hidden_state_size=invalid_state_size,
                               hidden_layer_size=MODEL_LAYER_SIZE)

        self.assertEqual(result.return_code,
                         constants.ExitCode.TENSORFLOW_ERROR)
        self.assertFalse(result.timed_out)

        # No new units.
        actual_count = shell.get_directory_file_count(self.output_directory)
        self.assertEqual(actual_count, 0)
Example #16
0
def main(argv):
    """Run libFuzzer as specified by argv."""
    atexit.register(fuzzer_utils.cleanup)

    # Initialize variables.
    arguments = argv[1:]
    testcase_file_path = arguments.pop(0)
    target_name = arguments.pop(0)
    fuzzer_name = data_types.fuzz_target_project_qualified_name(
        utils.current_project(), target_name)

    # Initialize log handler.
    logs.configure(
        'run_fuzzer', {
            'fuzzer': fuzzer_name,
            'engine': 'libFuzzer',
            'job_name': environment.get_value('JOB_NAME')
        })

    profiler.start_if_needed('libfuzzer_launcher')

    # Make sure that the fuzzer binary exists.
    build_directory = environment.get_value('BUILD_DIR')
    fuzzer_path = engine_common.find_fuzzer_path(build_directory, target_name)
    if not fuzzer_path:
        # This is an expected case when doing regression testing with old builds
        # that do not have that fuzz target. It can also happen when a host sends a
        # message to an untrusted worker that just restarted and lost information on
        # build directory.
        logs.log_warn('Could not find fuzz target %s.' % target_name)
        return

    # Install signal handler.
    signal.signal(signal.SIGTERM, engine_common.signal_term_handler)

    # Set up temp dir.
    engine_common.recreate_directory(fuzzer_utils.get_temp_dir())

    # Setup minijail if needed.
    use_minijail = environment.get_value('USE_MINIJAIL')
    runner = libfuzzer.get_runner(fuzzer_path,
                                  temp_dir=fuzzer_utils.get_temp_dir())

    if use_minijail:
        minijail_chroot = runner.chroot
    else:
        minijail_chroot = None

    # Get corpus directory.
    corpus_directory = environment.get_value('FUZZ_CORPUS_DIR')

    # Add common arguments which are necessary to be used for every run.
    arguments = expand_with_common_arguments(arguments)

    # Add sanitizer options to environment that were specified in the .options
    # file and options that this script requires.
    set_sanitizer_options(fuzzer_path)

    # Minimize test argument.
    minimize_to = fuzzer_utils.extract_argument(arguments,
                                                MINIMIZE_TO_ARGUMENT)
    minimize_timeout = fuzzer_utils.extract_argument(
        arguments, MINIMIZE_TIMEOUT_ARGUMENT)

    if minimize_to and minimize_timeout:
        minimize_testcase(runner, testcase_file_path, minimize_to,
                          int(minimize_timeout), arguments, use_minijail)
        return

    # Cleanse argument.
    cleanse_to = fuzzer_utils.extract_argument(arguments, CLEANSE_TO_ARGUMENT)
    cleanse_timeout = fuzzer_utils.extract_argument(arguments,
                                                    CLEANSE_TIMEOUT_ARGUMENT)

    if cleanse_to and cleanse_timeout:
        cleanse_testcase(runner, testcase_file_path, cleanse_to,
                         int(cleanse_timeout), arguments, use_minijail)
        return

    # If we don't have a corpus, then that means this is not a fuzzing run.
    if not corpus_directory:
        load_testcase_if_exists(runner, testcase_file_path, fuzzer_name,
                                use_minijail, arguments)
        return

    # We don't have a crash testcase, fuzz.

    # Check dict argument to make sure that it's valid.
    dict_argument = fuzzer_utils.extract_argument(arguments,
                                                  constants.DICT_FLAG,
                                                  remove=False)
    if dict_argument and not os.path.exists(dict_argument):
        logs.log_error('Invalid dict %s for %s.' %
                       (dict_argument, fuzzer_name))
        fuzzer_utils.extract_argument(arguments, constants.DICT_FLAG)

    # If there's no dict argument, check for %target_binary_name%.dict file.
    if (not fuzzer_utils.extract_argument(
            arguments, constants.DICT_FLAG, remove=False)):
        default_dict_path = dictionary_manager.get_default_dictionary_path(
            fuzzer_path)
        if os.path.exists(default_dict_path):
            arguments.append(constants.DICT_FLAG + default_dict_path)

    fuzzing_strategies = []

    # Select a generator to use for existing testcase mutations.
    generator = _select_generator()
    is_mutations_run = generator != Generator.NONE

    # Timeout for fuzzer run.
    fuzz_timeout = get_fuzz_timeout(is_mutations_run)

    # Set up scratch directory for writing new units.
    new_testcases_directory = create_corpus_directory('new')

    # Get list of corpus directories.
    corpus_directories = get_corpus_directories(corpus_directory,
                                                new_testcases_directory,
                                                fuzzer_path,
                                                fuzzing_strategies,
                                                minijail_chroot)

    # Bind corpus directories in minijail.
    if use_minijail:
        artifact_prefix = constants.ARTIFACT_PREFIX_FLAG + '/'
    else:
        artifact_prefix = '%s%s/' % (constants.ARTIFACT_PREFIX_FLAG,
                                     os.path.abspath(
                                         os.path.dirname(testcase_file_path)))

    # Generate new testcase mutations using radamsa, etc.
    if is_mutations_run:
        new_testcase_mutations_directory = generate_new_testcase_mutations(
            corpus_directory, fuzzer_name, generator, fuzzing_strategies)
        corpus_directories.append(new_testcase_mutations_directory)
        if use_minijail:
            bind_corpus_dirs(minijail_chroot,
                             [new_testcase_mutations_directory])

    max_len_argument = fuzzer_utils.extract_argument(arguments,
                                                     constants.MAX_LEN_FLAG,
                                                     remove=False)
    if not max_len_argument and do_random_max_length():
        max_length = random.SystemRandom().randint(1, MAX_VALUE_FOR_MAX_LENGTH)
        arguments.append('%s%d' % (constants.MAX_LEN_FLAG, max_length))
        fuzzing_strategies.append(strategy.RANDOM_MAX_LENGTH_STRATEGY)

    if do_recommended_dictionary():
        if add_recommended_dictionary(arguments, fuzzer_name, fuzzer_path):
            fuzzing_strategies.append(strategy.RECOMMENDED_DICTIONARY_STRATEGY)

    if do_value_profile():
        arguments.append(constants.VALUE_PROFILE_ARGUMENT)
        fuzzing_strategies.append(strategy.VALUE_PROFILE_STRATEGY)

    if do_fork():
        max_fuzz_threads = environment.get_value('MAX_FUZZ_THREADS', 1)
        num_fuzz_processes = max(
            1,
            multiprocessing.cpu_count() // max_fuzz_threads)
        arguments.append('%s%d' % (constants.FORK_FLAG, num_fuzz_processes))
        fuzzing_strategies.append('%s_%d' %
                                  (strategy.FORK_STRATEGY, num_fuzz_processes))

    extra_env = {}
    if do_mutator_plugin():
        if use_mutator_plugin(target_name, extra_env, minijail_chroot):
            fuzzing_strategies.append(strategy.MUTATOR_PLUGIN_STRATEGY)

    # Execute the fuzzer binary with original arguments.
    fuzz_result = runner.fuzz(corpus_directories,
                              fuzz_timeout=fuzz_timeout,
                              additional_args=arguments + [artifact_prefix],
                              extra_env=extra_env)

    if (not use_minijail
            and fuzz_result.return_code == constants.LIBFUZZER_ERROR_EXITCODE):
        # Minijail returns 1 if the exit code is nonzero.
        # Otherwise: we can assume that a return code of 1 means that libFuzzer
        # itself ran into an error.
        logs.log_error(ENGINE_ERROR_MESSAGE, engine_output=fuzz_result.output)

    log_lines = fuzz_result.output.splitlines()
    # Output can be large, so save some memory by removing reference to the
    # original output which is no longer needed.
    fuzz_result.output = None

    # Check if we crashed, and get the crash testcase path.
    crash_testcase_file_path = None
    for line in log_lines:
        match = re.match(CRASH_TESTCASE_REGEX, line)
        if match:
            crash_testcase_file_path = match.group(1)
            break

    if crash_testcase_file_path:
        # Write the new testcase.
        if use_minijail:
            # Convert chroot relative path to host path. Remove the leading '/' before
            # joining.
            crash_testcase_file_path = os.path.join(
                minijail_chroot.directory, crash_testcase_file_path[1:])

        # Copy crash testcase contents into the main testcase path.
        shutil.move(crash_testcase_file_path, testcase_file_path)

    # Print the command output.
    log_header_format = ('Command: %s\n' 'Bot: %s\n' 'Time ran: %f\n')
    bot_name = environment.get_value('BOT_NAME', '')
    command = fuzz_result.command
    if use_minijail:
        # Remove minijail prefix.
        command = engine_common.strip_minijail_command(command, fuzzer_path)
    print(log_header_format % (engine_common.get_command_quoted(command),
                               bot_name, fuzz_result.time_executed))

    # Parse stats information based on libFuzzer output.
    parsed_stats = parse_log_stats(log_lines)

    # Extend parsed stats by additional performance features.
    parsed_stats.update(
        stats.parse_performance_features(log_lines, fuzzing_strategies,
                                         arguments))

    # Set some initial stat overrides.
    timeout_limit = fuzzer_utils.extract_argument(arguments,
                                                  constants.TIMEOUT_FLAG,
                                                  remove=False)

    expected_duration = runner.get_max_total_time(fuzz_timeout)
    actual_duration = int(fuzz_result.time_executed)
    fuzzing_time_percent = 100 * actual_duration / float(expected_duration)
    stat_overrides = {
        'timeout_limit': int(timeout_limit),
        'expected_duration': expected_duration,
        'actual_duration': actual_duration,
        'fuzzing_time_percent': fuzzing_time_percent,
    }

    # Remove fuzzing arguments before merge and dictionary analysis step.
    remove_fuzzing_arguments(arguments)

    # Make a decision on whether merge step is needed at all. If there are no
    # new units added by libFuzzer run, then no need to do merge at all.
    new_units_added = shell.get_directory_file_count(new_testcases_directory)
    merge_error = None
    if new_units_added:
        # Merge the new units with the initial corpus.
        if corpus_directory not in corpus_directories:
            corpus_directories.append(corpus_directory)

        # If this times out, it's possible that we will miss some units. However, if
        # we're taking >10 minutes to load/merge the corpus something is going very
        # wrong and we probably don't want to make things worse by adding units
        # anyway.

        merge_tmp_dir = None
        if not use_minijail:
            merge_tmp_dir = os.path.join(fuzzer_utils.get_temp_dir(),
                                         'merge_workdir')
            engine_common.recreate_directory(merge_tmp_dir)

        old_corpus_len = shell.get_directory_file_count(corpus_directory)
        merge_directory = create_merge_directory()
        corpus_directories.insert(0, merge_directory)

        if use_minijail:
            bind_corpus_dirs(minijail_chroot, [merge_directory])

        merge_result = runner.merge(
            corpus_directories,
            merge_timeout=engine_common.get_merge_timeout(
                DEFAULT_MERGE_TIMEOUT),
            tmp_dir=merge_tmp_dir,
            additional_args=arguments)

        move_mergeable_units(merge_directory, corpus_directory)
        new_corpus_len = shell.get_directory_file_count(corpus_directory)
        new_units_added = 0

        merge_error = None
        if merge_result.timed_out:
            merge_error = 'Merging new testcases timed out:'
        elif merge_result.return_code != 0:
            merge_error = 'Merging new testcases failed:'
        else:
            new_units_added = new_corpus_len - old_corpus_len

        stat_overrides['new_units_added'] = new_units_added

        if merge_result.output:
            stat_overrides.update(
                stats.parse_stats_from_merge_log(
                    merge_result.output.splitlines()))
    else:
        stat_overrides['new_units_added'] = 0
        logs.log('Skipped corpus merge since no new units added by fuzzing.')

    # Get corpus size after merge. This removes the duplicate units that were
    # created during this fuzzing session.
    stat_overrides['corpus_size'] = shell.get_directory_file_count(
        corpus_directory)

    # Delete all corpus directories except for the main one. These were temporary
    # directories to store new testcase mutations and have already been merged to
    # main corpus directory.
    if corpus_directory in corpus_directories:
        corpus_directories.remove(corpus_directory)
    for directory in corpus_directories:
        shutil.rmtree(directory, ignore_errors=True)

    if use_minijail:
        unbind_corpus_dirs(minijail_chroot, corpus_directories)

    # Apply overridden stats to the parsed stats prior to dumping.
    parsed_stats.update(stat_overrides)

    # Dump stats data for further uploading to BigQuery.
    engine_common.dump_big_query_data(parsed_stats, testcase_file_path,
                                      LIBFUZZER_PREFIX, fuzzer_name, command)

    # Add custom crash state based on fuzzer name (if needed).
    add_custom_crash_state_if_needed(fuzzer_name, log_lines, parsed_stats)
    for line in log_lines:
        print(line)

    # Add fuzzing strategies used.
    engine_common.print_fuzzing_strategies(fuzzing_strategies)

    # Add merge error (if any).
    if merge_error:
        print(data_types.CRASH_STACKTRACE_END_MARKER)
        print(merge_error)
        print(
            'Command:',
            get_printable_command(merge_result.command, fuzzer_path,
                                  use_minijail))
        print(merge_result.output)

    analyze_and_update_recommended_dictionary(runner, fuzzer_name, log_lines,
                                              corpus_directory, arguments)

    # Close minijail chroot.
    if use_minijail:
        minijail_chroot.close()

    # Record the stats to make them easily searchable in stackdriver.
    if new_units_added:
        logs.log('New units added to corpus: %d.' % new_units_added,
                 stats=parsed_stats)
    else:
        logs.log('No new units found.', stats=parsed_stats)
Example #17
0
    def _merge_new_units(
        self,
        target_path,
        corpus_dir,
        new_corpus_dir,
        fuzz_corpus_dirs,
        arguments,
        stat_overrides,
    ):
        """Merge new units."""
        # Make a decision on whether merge step is needed at all. If there are no
        # new units added by libFuzzer run, then no need to do merge at all.
        new_units_added = shell.get_directory_file_count(new_corpus_dir)
        if not new_units_added:
            stat_overrides["new_units_added"] = 0
            logs.log(
                "Skipped corpus merge since no new units added by fuzzing.")
            return

        # If this times out, it's possible that we will miss some units. However, if
        # we're taking >10 minutes to load/merge the corpus something is going very
        # wrong and we probably don't want to make things worse by adding units
        # anyway.
        merge_corpus = self._create_merge_corpus_dir()

        merge_dirs = fuzz_corpus_dirs[:]

        # Merge the new units with the initial corpus.
        if corpus_dir not in merge_dirs:
            merge_dirs.append(corpus_dir)

        old_corpus_len = shell.get_directory_file_count(corpus_dir)

        new_units_added = 0
        try:
            result = self._minimize_corpus_two_step(
                target_path=target_path,
                arguments=arguments,
                existing_corpus_dirs=merge_dirs,
                new_corpus_dir=new_corpus_dir,
                output_corpus_dir=merge_corpus,
                reproducers_dir=None,
                max_time=engine_common.get_merge_timeout(
                    libfuzzer.DEFAULT_MERGE_TIMEOUT),
            )

            libfuzzer.move_mergeable_units(merge_corpus, corpus_dir)
            new_corpus_len = shell.get_directory_file_count(corpus_dir)
            new_units_added = new_corpus_len - old_corpus_len

            stat_overrides.update(result.stats)
        except (MergeError, engine.TimeoutError) as e:
            logs.log_warn("Merge failed.", error=e.message)

        stat_overrides["new_units_added"] = new_units_added

        # Record the stats to make them easily searchable in stackdriver.
        logs.log("Stats calculated.", stats=stat_overrides)
        if new_units_added:
            logs.log("New units added to corpus: %d." % new_units_added)
        else:
            logs.log("No new units found.")
Example #18
0
def do_corpus_pruning(context, last_execution_failed, revision):
    """Run corpus pruning."""
    # Set |FUZZ_TARGET| environment variable to help with unarchiving only fuzz
    # target and its related files.
    environment.set_value("FUZZ_TARGET", context.fuzz_target.binary)

    if environment.is_trusted_host():
        from bot.untrusted_runner import tasks_host

        return tasks_host.do_corpus_pruning(context, last_execution_failed,
                                            revision)

    build_manager.setup_build(revision=revision)
    build_directory = environment.get_value("BUILD_DIR")
    if not build_directory:
        raise CorpusPruningException("Failed to setup build.")

    start_time = datetime.datetime.utcnow()
    runner = Runner(build_directory, context)
    pruner = CorpusPruner(runner)
    fuzzer_binary_name = os.path.basename(runner.target_path)

    # If our last execution failed, shrink to a randomized corpus of usable size
    # to prevent corpus from growing unbounded and recurring failures when trying
    # to minimize it.
    if last_execution_failed:
        for corpus_url in [
                context.corpus.get_gcs_url(),
                context.quarantine_corpus.get_gcs_url(),
        ]:
            _limit_corpus_size(corpus_url, CORPUS_SIZE_LIMIT_FOR_FAILURES)

    # Get initial corpus to process from GCS.
    context.sync_to_disk()
    initial_corpus_size = shell.get_directory_file_count(
        context.initial_corpus_path)

    # Restore a small batch of quarantined units back to corpus.
    context.restore_quarantined_units()

    # Shrink to a minimized corpus using corpus merge.
    pruner.run(
        context.initial_corpus_path,
        context.minimized_corpus_path,
        context.bad_units_path,
    )

    # Sync minimized corpus back to GCS.
    context.sync_to_gcs()

    # Create corpus backup.
    backup_bucket = environment.get_value("BACKUP_BUCKET")
    corpus_backup_url = corpus_manager.backup_corpus(
        backup_bucket, context.corpus, context.minimized_corpus_path)

    minimized_corpus_size_units = shell.get_directory_file_count(
        context.minimized_corpus_path)
    minimized_corpus_size_bytes = shell.get_directory_size(
        context.minimized_corpus_path)

    logs.log("Corpus pruned from %d to %d units." %
             (initial_corpus_size, minimized_corpus_size_units))

    # Process bad units found during merge.
    # Mapping of crash state -> CorpusCrash
    crashes = {}
    pruner.process_bad_units(context.bad_units_path,
                             context.quarantine_corpus_path, crashes)
    context.quarantine_corpus.rsync_from_disk(context.quarantine_corpus_path)

    # Store corpus stats into CoverageInformation entity.
    project_qualified_name = context.fuzz_target.project_qualified_name()
    today = datetime.datetime.utcnow().date()
    coverage_info = data_types.CoverageInformation(
        fuzzer=project_qualified_name, date=today)

    quarantine_corpus_size = shell.get_directory_file_count(
        context.quarantine_corpus_path)
    quarantine_corpus_dir_size = shell.get_directory_size(
        context.quarantine_corpus_path)

    # Populate coverage stats.
    coverage_info.corpus_size_units = minimized_corpus_size_units
    coverage_info.corpus_size_bytes = minimized_corpus_size_bytes
    coverage_info.quarantine_size_units = quarantine_corpus_size
    coverage_info.quarantine_size_bytes = quarantine_corpus_dir_size
    coverage_info.corpus_backup_location = corpus_backup_url
    coverage_info.corpus_location = context.corpus.get_gcs_url()
    coverage_info.quarantine_location = context.quarantine_corpus.get_gcs_url()

    # Calculate remaining time to use for shared corpus merging.
    time_remaining = _get_time_remaining(start_time)
    if time_remaining <= 0:
        logs.log_warn("Not enough time for shared corpus merging.")
        return None

    cross_pollinator = CrossPollinator(runner)
    cross_pollinator.run(time_remaining)

    context.sync_to_gcs()

    # Update corpus size stats.
    minimized_corpus_size_units = shell.get_directory_file_count(
        context.minimized_corpus_path)
    minimized_corpus_size_bytes = shell.get_directory_size(
        context.minimized_corpus_path)
    coverage_info.corpus_size_units = minimized_corpus_size_units
    coverage_info.corpus_size_bytes = minimized_corpus_size_bytes

    logs.log("Finished.")

    result = CorpusPruningResult(
        coverage_info=coverage_info,
        crashes=list(crashes.values()),
        fuzzer_binary_name=fuzzer_binary_name,
        revision=environment.get_value("APP_REVISION"),
    )

    return result
Example #19
0
    def prepare(self, corpus_dir, target_path, _):
        """Prepare for a fuzzing session, by generating options. Returns a
    FuzzOptions object.

    Args:
      corpus_dir: The main corpus directory.
      target_path: Path to the target.
      build_dir: Path to the build directory.

    Returns:
      A FuzzOptions object.
    """
        arguments = fuzzer.get_arguments(target_path)
        strategy_pool = strategy_selection.generate_weighted_strategy_pool(
            strategy_list=strategy.LIBFUZZER_STRATEGY_LIST,
            use_generator=True,
            engine_name=self.name)
        strategy_info = launcher.pick_strategies(strategy_pool, target_path,
                                                 corpus_dir, arguments)

        arguments.extend(strategy_info.arguments)

        # Check for seed corpus and add it into corpus directory.
        engine_common.unpack_seed_corpus_if_needed(target_path, corpus_dir)

        # Pick a few testcases from our corpus to use as the initial corpus.
        subset_size = engine_common.random_choice(
            engine_common.CORPUS_SUBSET_NUM_TESTCASES)

        if (not strategy_info.use_dataflow_tracing
                and strategy_pool.do_strategy(strategy.CORPUS_SUBSET_STRATEGY)
                and shell.get_directory_file_count(corpus_dir) > subset_size):
            # Copy |subset_size| testcases into 'subset' directory.
            corpus_subset_dir = self._create_temp_corpus_dir('subset')
            launcher.copy_from_corpus(corpus_subset_dir, corpus_dir,
                                      subset_size)
            strategy_info.fuzzing_strategies.append(
                strategy.CORPUS_SUBSET_STRATEGY.name + '_' + str(subset_size))
            strategy_info.additional_corpus_dirs.append(corpus_subset_dir)
        else:
            strategy_info.additional_corpus_dirs.append(corpus_dir)

        # Check dict argument to make sure that it's valid.
        dict_argument = fuzzer_utils.extract_argument(arguments,
                                                      constants.DICT_FLAG,
                                                      remove=False)
        if dict_argument and not os.path.exists(dict_argument):
            logs.log_error('Invalid dict %s for %s.' %
                           (dict_argument, target_path))
            fuzzer_utils.extract_argument(arguments, constants.DICT_FLAG)

        # If there's no dict argument, check for %target_binary_name%.dict file.
        if (not fuzzer_utils.extract_argument(
                arguments, constants.DICT_FLAG, remove=False)):
            default_dict_path = dictionary_manager.get_default_dictionary_path(
                target_path)
            if os.path.exists(default_dict_path):
                arguments.append(constants.DICT_FLAG + default_dict_path)

        return LibFuzzerOptions(corpus_dir, arguments,
                                strategy_info.fuzzing_strategies,
                                strategy_info.additional_corpus_dirs,
                                strategy_info.extra_env,
                                strategy_info.use_dataflow_tracing,
                                strategy_info.is_mutations_run)
def do_corpus_pruning(context, last_execution_failed, revision):
    """Run corpus pruning."""
    # Set |FUZZ_TARGET| environment variable to help with unarchiving only fuzz
    # target and its related files.
    environment.set_value('FUZZ_TARGET', context.fuzz_target.binary)

    if environment.is_trusted_host():
        from bot.untrusted_runner import tasks_host
        return tasks_host.do_corpus_pruning(context, last_execution_failed,
                                            revision)

    if not build_manager.setup_build(revision=revision):
        raise CorpusPruningException('Failed to setup build.')

    build_directory = environment.get_value('BUILD_DIR')
    start_time = datetime.datetime.utcnow()
    runner = Runner(build_directory, context)
    pruner = CorpusPruner(runner)
    fuzzer_binary_name = os.path.basename(runner.target_path)

    # If our last execution failed, shrink to a randomized corpus of usable size
    # to prevent corpus from growing unbounded and recurring failures when trying
    # to minimize it.
    if last_execution_failed:
        for corpus_url in [
                context.corpus.get_gcs_url(),
                context.quarantine_corpus.get_gcs_url()
        ]:
            _limit_corpus_size(corpus_url)

    # Get initial corpus to process from GCS.
    context.sync_to_disk()
    initial_corpus_size = shell.get_directory_file_count(
        context.initial_corpus_path)

    # Restore a small batch of quarantined units back to corpus.
    context.restore_quarantined_units()

    # Shrink to a minimized corpus using corpus merge.
    pruner_stats = pruner.run(context.initial_corpus_path,
                              context.minimized_corpus_path,
                              context.bad_units_path)

    # Sync minimized corpus back to GCS.
    context.sync_to_gcs()

    # Create corpus backup.
    # Temporarily copy the past crash regressions folder into the minimized corpus
    # so that corpus backup archive can have both.
    regressions_input_dir = os.path.join(context.initial_corpus_path,
                                         'regressions')
    regressions_output_dir = os.path.join(context.minimized_corpus_path,
                                          'regressions')
    if shell.get_directory_file_count(regressions_input_dir):
        shutil.copytree(regressions_input_dir, regressions_output_dir)
    backup_bucket = environment.get_value('BACKUP_BUCKET')
    corpus_backup_url = corpus_manager.backup_corpus(
        backup_bucket, context.corpus, context.minimized_corpus_path)
    shell.remove_directory(regressions_output_dir)

    minimized_corpus_size_units = shell.get_directory_file_count(
        context.minimized_corpus_path)
    minimized_corpus_size_bytes = shell.get_directory_size(
        context.minimized_corpus_path)

    logs.log('Corpus pruned from %d to %d units.' %
             (initial_corpus_size, minimized_corpus_size_units))

    # Process bad units found during merge.
    # Mapping of crash state -> CorpusCrash
    crashes = {}
    pruner.process_bad_units(context.bad_units_path,
                             context.quarantine_corpus_path, crashes)
    context.quarantine_corpus.rsync_from_disk(context.quarantine_corpus_path)

    # Store corpus stats into CoverageInformation entity.
    project_qualified_name = context.fuzz_target.project_qualified_name()
    today = datetime.datetime.utcnow().date()
    coverage_info = data_types.CoverageInformation(
        fuzzer=project_qualified_name, date=today)

    quarantine_corpus_size = shell.get_directory_file_count(
        context.quarantine_corpus_path)
    quarantine_corpus_dir_size = shell.get_directory_size(
        context.quarantine_corpus_path)

    # Save the minimize corpus size before cross pollination to put in BigQuery.
    pre_pollination_corpus_size = minimized_corpus_size_units

    # Populate coverage stats.
    coverage_info.corpus_size_units = minimized_corpus_size_units
    coverage_info.corpus_size_bytes = minimized_corpus_size_bytes
    coverage_info.quarantine_size_units = quarantine_corpus_size
    coverage_info.quarantine_size_bytes = quarantine_corpus_dir_size
    coverage_info.corpus_backup_location = corpus_backup_url
    coverage_info.corpus_location = context.corpus.get_gcs_url()
    coverage_info.quarantine_location = context.quarantine_corpus.get_gcs_url()

    # Calculate remaining time to use for shared corpus merging.
    time_remaining = _get_time_remaining(start_time)
    if time_remaining <= 0:
        logs.log_warn('Not enough time for shared corpus merging.')
        return None

    cross_pollinator = CrossPollinator(runner)
    pollinator_stats = cross_pollinator.run(time_remaining)

    context.sync_to_gcs()

    # Update corpus size stats.
    minimized_corpus_size_units = shell.get_directory_file_count(
        context.minimized_corpus_path)
    minimized_corpus_size_bytes = shell.get_directory_size(
        context.minimized_corpus_path)
    coverage_info.corpus_size_units = minimized_corpus_size_units
    coverage_info.corpus_size_bytes = minimized_corpus_size_bytes

    logs.log('Finished.')

    sources = ','.join([
        fuzzer.fuzz_target.project_qualified_name()
        for fuzzer in context.cross_pollinate_fuzzers
    ])

    cross_pollination_stats = None
    if pruner_stats and pollinator_stats:
        cross_pollination_stats = CrossPollinationStats(
            project_qualified_name, context.cross_pollination_method, sources,
            context.tag, initial_corpus_size, pre_pollination_corpus_size,
            pruner_stats['edge_coverage'], pollinator_stats['edge_coverage'],
            pruner_stats['feature_coverage'],
            pollinator_stats['feature_coverage'])

    return CorpusPruningResult(coverage_info=coverage_info,
                               crashes=list(crashes.values()),
                               fuzzer_binary_name=fuzzer_binary_name,
                               revision=environment.get_value('APP_REVISION'),
                               cross_pollination_stats=cross_pollination_stats)
Example #21
0
def _count_corpus_files(directory):
    """Count the number of corpus files."""
    return shell.get_directory_file_count(directory)
Example #22
0
def execute(input_directory, output_directory, fuzzer_name,
            generation_timeout):
    """Execute ML RNN generator to produce new inputs.

  This method should be called inside launcher, to generate a number of
  new inputs based on ML RNN model.

  It will fetch ML model from GCS bucket specified in environment
  variable `CORPUS_BUCKET`. The script to run the model resides
  in folder `tools/fuzzers/ml/rnn`.

  Args:
    input_directory: Seed corpus path. The directory should not be empty.
    output_directory: The directory to place generated inputs.
    fuzzer_name: Name of the fuzzer, e.g libpng_read_fuzzer. It indicates the
        subdirectory in gcs bucket to store models.
    generation_timeout: Time in seconds for the generator to run. Normally it
        takes <1s to generate an input, assuming the input length is <4KB.
  """
    if environment.platform() != 'LINUX':
        logs.log('Unsupported platform for ML RNN generation, skipping.')
        return

    # Validate corpus folder.
    file_count = shell.get_directory_file_count(input_directory)
    if not file_count:
        logs.log('Corpus is empty. Skip generation.')
        return

    # Number of existing new inputs. They are possibly generated by other
    # generators.
    old_corpus_units = shell.get_directory_file_count(output_directory)
    old_corpus_bytes = shell.get_directory_size(output_directory)

    # Get model path.
    model_path = prepare_model_directory(fuzzer_name)
    if not model_path:
        return

    result = run(input_directory, output_directory, model_path,
                 generation_timeout)

    # Generation process exited abnormally but not caused by timeout, meaning
    # error occurred during execution.
    if result.return_code and not result.timed_out:
        if result.return_code == constants.ExitCode.CORPUS_TOO_SMALL:
            logs.log_warn(
                'ML RNN generation for fuzzer %s aborted due to small corpus.'
                % fuzzer_name)
        else:
            logs.log_error(
                'ML RNN generation for fuzzer %s failed with ExitCode = %d.' %
                (fuzzer_name, result.return_code),
                output=result.output)
        return

    # Timeout is not error, if we have new units generated.
    if result.timed_out:
        logs.log_warn('ML RNN generation for fuzzer %s timed out.' %
                      fuzzer_name)

    new_corpus_units = (shell.get_directory_file_count(output_directory) -
                        old_corpus_units)
    new_corpus_bytes = (shell.get_directory_size(output_directory) -
                        old_corpus_bytes)
    if new_corpus_units:
        logs.log(
            'Added %d new inputs (%d bytes) using ML RNN generator for %s.' %
            (new_corpus_units, new_corpus_bytes, fuzzer_name))
    else:
        logs.log_error('ML RNN generator did not produce any inputs for %s' %
                       fuzzer_name,
                       output=result.output)
Example #23
0
def main(argv):
  """Run libFuzzer as specified by argv."""
  atexit.register(fuzzer_utils.cleanup)

  # Initialize variables.
  arguments = argv[1:]
  testcase_file_path = arguments.pop(0)

  target_name = environment.get_value('FUZZ_TARGET')
  if arguments and arguments[0] == target_name:
    # Pop legacy fuzz target argument.
    arguments.pop(0)

  fuzzer_name = data_types.fuzz_target_project_qualified_name(
      utils.current_project(), target_name)

  # Initialize log handler.
  logs.configure(
      'run_fuzzer', {
          'fuzzer': fuzzer_name,
          'engine': 'libFuzzer',
          'job_name': environment.get_value('JOB_NAME')
      })

  profiler.start_if_needed('libfuzzer_launcher')

  # Make sure that the fuzzer binary exists.
  build_directory = environment.get_value('BUILD_DIR')
  fuzzer_path = engine_common.find_fuzzer_path(build_directory, target_name)
  if not fuzzer_path:
    return

  # Install signal handler.
  signal.signal(signal.SIGTERM, engine_common.signal_term_handler)

  # Set up temp dir.
  engine_common.recreate_directory(fuzzer_utils.get_temp_dir())

  # Setup minijail if needed.
  use_minijail = environment.get_value('USE_MINIJAIL')
  runner = libfuzzer.get_runner(
      fuzzer_path, temp_dir=fuzzer_utils.get_temp_dir())

  if use_minijail:
    minijail_chroot = runner.chroot
  else:
    minijail_chroot = None

  # Get corpus directory.
  corpus_directory = environment.get_value('FUZZ_CORPUS_DIR')

  # Add common arguments which are necessary to be used for every run.
  arguments = expand_with_common_arguments(arguments)

  # Add sanitizer options to environment that were specified in the .options
  # file and options that this script requires.
  set_sanitizer_options(fuzzer_path)

  # If we don't have a corpus, then that means this is not a fuzzing run.
  if not corpus_directory:
    load_testcase_if_exists(runner, testcase_file_path, fuzzer_name,
                            use_minijail, arguments)
    return

  # We don't have a crash testcase, fuzz.

  # Check dict argument to make sure that it's valid.
  dict_argument = fuzzer_utils.extract_argument(
      arguments, constants.DICT_FLAG, remove=False)
  if dict_argument and not os.path.exists(dict_argument):
    logs.log_error('Invalid dict %s for %s.' % (dict_argument, fuzzer_name))
    fuzzer_utils.extract_argument(arguments, constants.DICT_FLAG)

  # If there's no dict argument, check for %target_binary_name%.dict file.
  if (not fuzzer_utils.extract_argument(
      arguments, constants.DICT_FLAG, remove=False)):
    default_dict_path = dictionary_manager.get_default_dictionary_path(
        fuzzer_path)
    if os.path.exists(default_dict_path):
      arguments.append(constants.DICT_FLAG + default_dict_path)

  # Set up scratch directory for writing new units.
  new_testcases_directory = create_corpus_directory('new')

  # Strategy pool is the list of strategies that we attempt to enable, whereas
  # fuzzing strategies is the list of strategies that are enabled. (e.g. if
  # mutator is selected in the pool, but not available for a given target, it
  # would not be added to fuzzing strategies.)
  strategy_pool = strategy_selection.generate_weighted_strategy_pool(
      strategy_list=strategy.LIBFUZZER_STRATEGY_LIST,
      use_generator=True,
      engine_name='libFuzzer')
  strategy_info = pick_strategies(
      strategy_pool,
      fuzzer_path,
      corpus_directory,
      arguments,
      minijail_chroot=minijail_chroot)
  arguments.extend(strategy_info.arguments)

  # Timeout for fuzzer run.
  fuzz_timeout = get_fuzz_timeout(strategy_info.is_mutations_run)

  # Get list of corpus directories.
  # TODO(flowerhack): Implement this to handle corpus sync'ing.
  if environment.platform() == 'FUCHSIA':
    corpus_directories = []
  else:
    corpus_directories = get_corpus_directories(
        corpus_directory,
        new_testcases_directory,
        fuzzer_path,
        strategy_info.fuzzing_strategies,
        strategy_pool,
        minijail_chroot=minijail_chroot,
        allow_corpus_subset=not strategy_info.use_dataflow_tracing)

  corpus_directories.extend(strategy_info.additional_corpus_dirs)

  artifact_prefix = os.path.abspath(os.path.dirname(testcase_file_path))
  # Execute the fuzzer binary with original arguments.
  fuzz_result = runner.fuzz(
      corpus_directories,
      fuzz_timeout=fuzz_timeout,
      artifact_prefix=artifact_prefix,
      additional_args=arguments,
      extra_env=strategy_info.extra_env)

  if (not use_minijail and
      fuzz_result.return_code == constants.LIBFUZZER_ERROR_EXITCODE):
    # Minijail returns 1 if the exit code is nonzero.
    # Otherwise: we can assume that a return code of 1 means that libFuzzer
    # itself ran into an error.
    logs.log_error(ENGINE_ERROR_MESSAGE, engine_output=fuzz_result.output)

  log_lines = fuzz_result.output.splitlines()
  # Output can be large, so save some memory by removing reference to the
  # original output which is no longer needed.
  fuzz_result.output = None

  # Check if we crashed, and get the crash testcase path.
  crash_testcase_file_path = runner.get_testcase_path(log_lines)
  if crash_testcase_file_path:
    # Copy crash testcase contents into the main testcase path.
    shutil.move(crash_testcase_file_path, testcase_file_path)

  # Print the command output.
  bot_name = environment.get_value('BOT_NAME', '')
  command = fuzz_result.command
  if use_minijail:
    # Remove minijail prefix.
    command = engine_common.strip_minijail_command(command, fuzzer_path)
  print(engine_common.get_log_header(command, bot_name,
                                     fuzz_result.time_executed))

  # Parse stats information based on libFuzzer output.
  parsed_stats = parse_log_stats(log_lines)

  # Extend parsed stats by additional performance features.
  parsed_stats.update(
      stats.parse_performance_features(
          log_lines, strategy_info.fuzzing_strategies, arguments))

  # Set some initial stat overrides.
  timeout_limit = fuzzer_utils.extract_argument(
      arguments, constants.TIMEOUT_FLAG, remove=False)

  expected_duration = runner.get_max_total_time(fuzz_timeout)
  actual_duration = int(fuzz_result.time_executed)
  fuzzing_time_percent = 100 * actual_duration / float(expected_duration)
  stat_overrides = {
      'timeout_limit': int(timeout_limit),
      'expected_duration': expected_duration,
      'actual_duration': actual_duration,
      'fuzzing_time_percent': fuzzing_time_percent,
  }

  # Remove fuzzing arguments before merge and dictionary analysis step.
  remove_fuzzing_arguments(arguments)

  # Make a decision on whether merge step is needed at all. If there are no
  # new units added by libFuzzer run, then no need to do merge at all.
  new_units_added = shell.get_directory_file_count(new_testcases_directory)
  merge_error = None
  if new_units_added:
    # Merge the new units with the initial corpus.
    if corpus_directory not in corpus_directories:
      corpus_directories.append(corpus_directory)

    # If this times out, it's possible that we will miss some units. However, if
    # we're taking >10 minutes to load/merge the corpus something is going very
    # wrong and we probably don't want to make things worse by adding units
    # anyway.

    merge_tmp_dir = None
    if not use_minijail:
      merge_tmp_dir = os.path.join(fuzzer_utils.get_temp_dir(), 'merge_workdir')
      engine_common.recreate_directory(merge_tmp_dir)

    old_corpus_len = shell.get_directory_file_count(corpus_directory)
    merge_directory = create_merge_directory()
    corpus_directories.insert(0, merge_directory)

    if use_minijail:
      bind_corpus_dirs(minijail_chroot, [merge_directory])

    merge_result = runner.merge(
        corpus_directories,
        merge_timeout=engine_common.get_merge_timeout(DEFAULT_MERGE_TIMEOUT),
        tmp_dir=merge_tmp_dir,
        additional_args=arguments)

    move_mergeable_units(merge_directory, corpus_directory)
    new_corpus_len = shell.get_directory_file_count(corpus_directory)
    new_units_added = 0

    merge_error = None
    if merge_result.timed_out:
      merge_error = 'Merging new testcases timed out:'
    elif merge_result.return_code != 0:
      merge_error = 'Merging new testcases failed:'
    else:
      new_units_added = new_corpus_len - old_corpus_len

    stat_overrides['new_units_added'] = new_units_added

    if merge_result.output:
      stat_overrides.update(
          stats.parse_stats_from_merge_log(merge_result.output.splitlines()))
  else:
    stat_overrides['new_units_added'] = 0
    logs.log('Skipped corpus merge since no new units added by fuzzing.')

  # Get corpus size after merge. This removes the duplicate units that were
  # created during this fuzzing session.
  # TODO(flowerhack): Remove this workaround once we can handle corpus sync.
  if environment.platform() != 'FUCHSIA':
    stat_overrides['corpus_size'] = shell.get_directory_file_count(
        corpus_directory)

  # Delete all corpus directories except for the main one. These were temporary
  # directories to store new testcase mutations and have already been merged to
  # main corpus directory.
  if corpus_directory in corpus_directories:
    corpus_directories.remove(corpus_directory)
  for directory in corpus_directories:
    shutil.rmtree(directory, ignore_errors=True)

  if use_minijail:
    unbind_corpus_dirs(minijail_chroot, corpus_directories)

  # Apply overridden stats to the parsed stats prior to dumping.
  parsed_stats.update(stat_overrides)

  # Dump stats data for further uploading to BigQuery.
  engine_common.dump_big_query_data(parsed_stats, testcase_file_path, command)

  # Add custom crash state based on fuzzer name (if needed).
  add_custom_crash_state_if_needed(fuzzer_name, log_lines, parsed_stats)
  for line in log_lines:
    print(line)

  # Add fuzzing strategies used.
  print(engine_common.format_fuzzing_strategies(
      strategy_info.fuzzing_strategies))

  # Add merge error (if any).
  if merge_error:
    print(data_types.CRASH_STACKTRACE_END_MARKER)
    print(merge_error)
    print('Command:',
          get_printable_command(merge_result.command, fuzzer_path,
                                use_minijail))
    print(merge_result.output)

  analyze_and_update_recommended_dictionary(runner, fuzzer_name, log_lines,
                                            corpus_directory, arguments)

  # Close minijail chroot.
  if use_minijail:
    minijail_chroot.close()

  # Record the stats to make them easily searchable in stackdriver.
  if new_units_added:
    logs.log(
        'New units added to corpus: %d.' % new_units_added, stats=parsed_stats)
  else:
    logs.log('No new units found.', stats=parsed_stats)