Beispiel #1
0
    def record_stats(self):
        """Use fuzzer.get_stats if it is offered, validate the stats and then
        save them to a file so that they will be synced to the filestore."""
        # TODO(metzman): Make this more resilient so we don't wait forever and
        # so that breakages in stats parsing doesn't break runner.

        fuzzer_module = get_fuzzer_module(self.fuzzer)

        fuzzer_module_get_stats = getattr(fuzzer_module, 'get_stats', None)
        if fuzzer_module_get_stats is None:
            # Stats support is optional.
            return

        try:
            output_corpus = environment.get('OUTPUT_CORPUS_DIR')
            stats_json_str = fuzzer_module_get_stats(output_corpus,
                                                     self.log_file)

        except Exception:  # pylint: disable=broad-except
            logs.error('Call to %d failed.', fuzzer_module_get_stats)
            return

        try:
            fuzzer_stats.validate_fuzzer_stats(stats_json_str)
        except (ValueError, json.decoder.JSONDecodeError):
            logs.error('Stats are invalid.')
            return

        stats_filename = experiment_utils.get_stats_filename(self.cycle)
        stats_path = os.path.join(self.results_dir, stats_filename)
        with open(stats_path, 'w') as stats_file_handle:
            stats_file_handle.write(stats_json_str)
Beispiel #2
0
def _clean_seed_corpus(seed_corpus_dir):
    """Prepares |seed_corpus_dir| for the trial. This ensures that it can be
    used by AFL which is picky about the seed corpus. Moves seed corpus files
    from sub-directories into the corpus directory root. Also, deletes any files
    that exceed the 1 MB limit. If the NO_SEEDS env var is specified than the
    seed corpus files are deleted."""
    if not os.path.exists(seed_corpus_dir):
        return

    if environment.get('NO_SEEDS'):
        logs.info('NO_SEEDS specified, deleting seed corpus files.')
        shutil.rmtree(seed_corpus_dir)
        os.mkdir(seed_corpus_dir)
        return

    failed_to_move_files = []
    for root, _, files in os.walk(seed_corpus_dir):
        for filename in files:
            file_path = os.path.join(root, filename)

            if os.path.getsize(file_path) > CORPUS_ELEMENT_BYTES_LIMIT:
                os.remove(file_path)
                logs.warning('Removed seed file %s as it exceeds 1 Mb limit.',
                             file_path)
                continue

            sha1sum = utils.file_hash(file_path)
            new_file_path = os.path.join(seed_corpus_dir, sha1sum)
            try:
                shutil.move(file_path, new_file_path)
            except OSError:
                failed_to_move_files.append((file_path, new_file_path))

    if failed_to_move_files:
        logs.error('Failed to move seed corpus files: %s', failed_to_move_files)
Beispiel #3
0
def copy_oss_fuzz_files(project, commit_date, benchmark_dir):
    """Checkout the right files from OSS-Fuzz to build the benchmark based on
    |project| and |commit_date|. Then copy them to |benchmark_dir|."""
    if not os.path.exists(os.path.join(OSS_FUZZ_DIR, '.git')):
        logs.error(
            '%s is not a git repo. Try running git submodule update --init',
            OSS_FUZZ_DIR)
        raise RuntimeError('%s is not a git repo.' % OSS_FUZZ_DIR)
    oss_fuzz_repo_manager = GitRepoManager(OSS_FUZZ_DIR)
    projects_dir = os.path.join(OSS_FUZZ_DIR, 'projects', project)
    try:
        # Find an OSS-Fuzz commit that can be used to build the benchmark.
        _, oss_fuzz_commit, _ = oss_fuzz_repo_manager.git([
            'log', '--before=' + commit_date.isoformat(), '-n1', '--format=%H',
            projects_dir
        ])
        oss_fuzz_commit = oss_fuzz_commit.strip()
        if not oss_fuzz_commit:
            logs.warning('No suitable earlier OSS-Fuzz commit found.')
            return False
        oss_fuzz_repo_manager.git(['checkout', oss_fuzz_commit, projects_dir])
        dir_util.copy_tree(projects_dir, benchmark_dir)
        os.remove(os.path.join(benchmark_dir, 'project.yaml'))
        return True
    finally:
        oss_fuzz_repo_manager.git(['reset', '--hard'])
Beispiel #4
0
def _mirror_output(process: subprocess.Popen, output_files: List) -> str:
    """Mirror output from |process|'s stdout to |output_files| and return the
    output."""
    lines = []
    out_queue, thread = _start_enqueue_thread(process)

    while True:
        # See if we can get a line from the queue.
        try:
            # TODO(metzman): Handle cases where the process does not have utf-8
            # encoded output.
            line = out_queue.get_nowait().decode('utf-8', errors='ignore')
        except queue.Empty:
            if not thread.is_alive():
                break
            continue
        if not line:
            if not thread.is_alive():
                break
            continue
        # If we did get a line, add it to our list and write it to the
        # output_files.
        lines.append(line)
        for output_file in output_files[:]:
            try:
                output_file.write(line)
                output_file.flush()
            except ValueError:
                logs.error('Could not write to output_file: %s.', output_file)
                output_files.remove(output_file)
    thread.join()
    return ''.join(lines)
Beispiel #5
0
def _filter_fuzzers_with_few_samples(
        benchmark_snapshot_df, threshold=_DEFAULT_FUZZER_SAMPLE_NUM_THRESHOLD):
    """Takes a benchmark snapshot and filters out fuzzers that have a sample
    size that is smaller than 80% of the largest sample size.

    This is especially useful to use after finding a good snapshot time
    somewhere at the end of the experiment where most of the trials are still
    running. If a fuzzer have much fewer trials at that snapshot time then the
    others it's better to drop it entirely.

    Returns the filtered benchmark snapshot.
    """
    samples_per_fuzzer = benchmark_snapshot_df.fuzzer.value_counts()
    max_samples = samples_per_fuzzer.max()
    criteria = samples_per_fuzzer > threshold * max_samples
    ok_fuzzers = samples_per_fuzzer[criteria].index

    # Log the list of bad fuzzers.
    bad_fuzzers = sorted(
        set(benchmark_snapshot_df.fuzzer.unique().tolist()) -
        set(ok_fuzzers.tolist()))
    if bad_fuzzers:
        benchmark_name = benchmark_snapshot_df.benchmark.unique()[0]
        logs.error(
            'Filtered bad fuzzers from {benchmark_name}: {bad_fuzzers}'.format(
                bad_fuzzers=bad_fuzzers, benchmark_name=benchmark_name))

    return benchmark_snapshot_df[benchmark_snapshot_df.fuzzer.isin(ok_fuzzers)]
Beispiel #6
0
def retry_build_loop(build_func: Callable, inputs: List[Tuple],
                     num_concurrent_builds: int) -> List:
    """Calls |build_func| in parallel on |inputs|. Repeat on failures up to
    |NUM_BUILD_RETRIES| times. Returns the list of inputs that |build_func| was
    called successfully on."""
    successes = []
    logs.info('Concurrent builds: %d.', num_concurrent_builds)
    with mp_pool.ThreadPool(num_concurrent_builds) as pool:
        for _ in range(NUM_BUILD_RETRIES):
            logs.info('Building using (%s): %s', build_func, inputs)
            results = pool.starmap(build_func, inputs)
            curr_successes, curr_failures = split_successes_and_failures(
                inputs, results)

            logs.info('Build successes: %s', curr_successes)
            successes.extend(curr_successes)
            if not curr_failures:
                break

            logs.error('Build failures: %s', curr_failures)
            inputs = curr_failures
            sleep_interval = random.uniform(1, BUILD_FAIL_WAIT)
            logs.info('Sleeping for %d secs before retrying.', sleep_interval)
            time.sleep(sleep_interval)

    return successes
Beispiel #7
0
def pytype(paths: List[Path]) -> bool:
    """Run pytype on |path| if it is a python file. Return False if it fails
    type checking."""
    # Pytype isn't supported on Python3.8+. See
    # https://github.com/google/pytype/issues/440.
    assert sys.version_info.major == 3, "Need Python3."
    if sys.version_info.minor > 7:
        logs.error(
            'Python version is: "%s". You should be using 3.7. '
            'Not running pytype.', sys.version)
        return True

    paths = [path for path in paths if is_python(path)]
    if not paths:
        return True

    base_command = ['python3', '-m', 'pytype']
    success = True

    # TODO(metzman): Change this to the parallel pytype when the path issue is
    # solved.
    for path in paths:
        command = base_command[:]
        command.append(path)
        returncode = subprocess.run(command, check=False).returncode
        if returncode != 0:
            success = False
    return success
Beispiel #8
0
def execute(  # pylint: disable=too-many-locals,too-many-branches
        command: List[str],
        *args,
        expect_zero: bool = True,
        timeout: int = None,
        write_to_stdout=False,
        # If not set, will default to PIPE.
        output_file=None,
        # Not True by default because we can't always set group on processes.
        kill_children: bool = False,
        **kwargs) -> ProcessResult:
    """Execute |command| and return the returncode and the output"""
    if write_to_stdout:
        # Don't set stdout, it's default value None, causes it to be set to
        # stdout.
        assert output_file is None
    elif not output_file:
        output_file = subprocess.PIPE

    kwargs['stdout'] = output_file
    kwargs['stderr'] = subprocess.STDOUT
    if kill_children:
        kwargs['preexec_fn'] = os.setsid

    process = subprocess.Popen(command, *args, **kwargs)
    process_group_id = os.getpgid(process.pid)

    wrapped_process = WrappedPopen(process)
    if timeout is not None:
        kill_thread = _start_kill_thread(wrapped_process, kill_children,
                                         timeout)
    output, _ = process.communicate()

    if timeout is not None:
        kill_thread.cancel()
    elif kill_children:
        # elif because the kill_thread will kill children if needed.
        _kill_process_group(process_group_id)

    retcode = process.returncode

    command_log_str = ' '.join(command)[:LOG_LIMIT_FIELD]
    log_message = 'Executed command: "%s" returned: %d.'

    if output is not None:
        output = output.decode('utf-8', errors='ignore')
        output_for_log = output[-LOG_LIMIT_FIELD:]
        log_extras = {'output': output_for_log}
    else:
        log_extras = None

    if expect_zero and retcode != 0 and not wrapped_process.timed_out:
        logs.error(log_message, command_log_str, retcode, extras=log_extras)
        raise subprocess.CalledProcessError(retcode, command)

    logs.debug(log_message, command_log_str, retcode, extras=log_extras)
    return ProcessResult(retcode, output, wrapped_process.timed_out)
Beispiel #9
0
def main():
    """Run an experiment in the cloud."""
    logs.initialize()

    parser = argparse.ArgumentParser(
        description='Begin an experiment that evaluates fuzzers on one or '
        'more benchmarks.')

    all_benchmarks = benchmark_utils.get_all_benchmarks()
    all_fuzzers = fuzzer_utils.get_fuzzer_names()

    parser.add_argument('-b',
                        '--benchmarks',
                        help='Benchmark names. All of them by default.',
                        nargs='+',
                        required=False,
                        default=all_benchmarks,
                        choices=all_benchmarks)
    parser.add_argument('-c',
                        '--experiment-config',
                        help='Path to the experiment configuration yaml file.',
                        required=True)
    parser.add_argument('-e',
                        '--experiment-name',
                        help='Experiment name.',
                        required=True)
    fuzzers_group = parser.add_mutually_exclusive_group()
    fuzzers_group.add_argument('-f',
                               '--fuzzers',
                               help='Fuzzers to use.',
                               nargs='+',
                               required=False,
                               default=None,
                               choices=all_fuzzers)
    fuzzers_group.add_argument('-cf',
                               '--changed-fuzzers',
                               help=('Use fuzzers that have changed since the '
                                     'last experiment. The last experiment is '
                                     'determined by the database your '
                                     'experiment uses, not necessarily the '
                                     'fuzzbench service'),
                               action='store_true',
                               required=False)

    args = parser.parse_args()

    if args.changed_fuzzers:
        fuzzers = experiment_changes.get_fuzzers_changed_since_last()
        if not fuzzers:
            logs.error('No fuzzers changed since last experiment. Exiting.')
            return 1
    else:
        fuzzers = args.fuzzers or all_fuzzers

    start_experiment(args.experiment_name, args.experiment_config,
                     args.benchmarks, fuzzers)
    return 0
Beispiel #10
0
def execute(  # pylint: disable=too-many-locals,too-many-branches
        command: List[str],
        *args,
        expect_zero: bool = True,
        output_files=None,
        timeout: int = None,
        write_to_stdout: bool = True,
        # Not True by default because we can't always set group on processes.
        kill_children: bool = False,
        **kwargs) -> ProcessResult:
    """Execute |command| and return the returncode and the output"""
    if output_files is None:
        output_files = []
    else:
        output_files = output_files[:]
    if write_to_stdout:
        output_files.append(sys.stdout)
    if output_files:
        kwargs['bufsize'] = 1
        kwargs['close_fds'] = 'posix' in sys.builtin_module_names

    kwargs['stdout'] = subprocess.PIPE
    kwargs['stderr'] = subprocess.STDOUT
    if kill_children:
        kwargs['preexec_fn'] = os.setsid

    process = subprocess.Popen(command, *args, **kwargs)
    process_group_id = os.getpgid(process.pid)

    kill_thread = None
    wrapped_process = WrappedPopen(process)
    if timeout is not None:
        kill_thread = _start_kill_thread(wrapped_process, kill_children,
                                         timeout)
    if output_files:
        output = _mirror_output(process, output_files)
    else:
        output, _ = process.communicate()
        output = output.decode('utf-8', errors='ignore')
    process.wait()
    if kill_thread:
        kill_thread.cancel()
    elif kill_children:
        _kill_process_group(process_group_id)
    retcode = process.returncode

    log_message = ('Executed command: "{command}" returned: {retcode}.'.format(
        command=(' '.join(command))[:LOG_LIMIT_FIELD], retcode=retcode))
    output_for_log = output[-LOG_LIMIT_FIELD:]
    log_extras = {'output': output_for_log}

    if expect_zero and retcode != 0 and not wrapped_process.timed_out:
        logs.error(log_message, extras=log_extras)
        raise subprocess.CalledProcessError(retcode, command)

    logs.debug(log_message, extras=log_extras)
    return ProcessResult(retcode, output, wrapped_process.timed_out)
Beispiel #11
0
def validate(benchmark):
    """Return True if |benchmark| is a valid fuzzbench fuzzer."""
    if VALID_BENCHMARK_REGEX.match(benchmark) is None:
        logs.error('%s does not conform to %s pattern.', benchmark,
                   VALID_BENCHMARK_REGEX.pattern)
        return False
    if benchmark in get_all_benchmarks():
        return True
    logs.error('%s must have a build.sh or oss-fuzz.yaml.', benchmark)
    return False
Beispiel #12
0
def run_fuzzer(max_total_time, log_filename):
    """Runs the fuzzer using its script. Logs stdout and stderr of the fuzzer
    script to |log_filename| if provided."""
    input_corpus = environment.get('SEED_CORPUS_DIR')
    output_corpus = environment.get('OUTPUT_CORPUS_DIR')
    fuzz_target_name = environment.get('FUZZ_TARGET')
    target_binary = fuzzer_utils.get_fuzz_target_binary(
        FUZZ_TARGET_DIR, fuzz_target_name)
    if not target_binary:
        logs.error('Fuzz target binary not found.')
        return

    _unpack_clusterfuzz_seed_corpus(target_binary, input_corpus)
    _clean_seed_corpus(input_corpus)

    if max_total_time is None:
        logs.warning('max_total_time is None. Fuzzing indefinitely.')

    runner_niceness = environment.get('RUNNER_NICENESS', 0)

    try:
        # Because the runner is launched at a higher priority,
        # set it back to the default(0) for fuzzing processes.
        command = [
            'nice', '-n',
            str(0 - runner_niceness), 'python3', '-u', '-c',
            ('import fuzzer; '
             'fuzzer.fuzz('
             "'{input_corpus}', '{output_corpus}', '{target_binary}')").format(
                 input_corpus=shlex.quote(input_corpus),
                 output_corpus=shlex.quote(output_corpus),
                 target_binary=shlex.quote(target_binary))
        ]

        fuzzer_environment = _get_fuzzer_environment()
        # Write output to stdout if user is fuzzing from command line.
        # Otherwise, write output to the log file.
        if environment.get('FUZZ_OUTSIDE_EXPERIMENT'):
            new_process.execute(command,
                                timeout=max_total_time,
                                write_to_stdout=True,
                                kill_children=True,
                                env=fuzzer_environment)
        else:
            with open(log_filename, 'wb') as log_file:
                new_process.execute(command,
                                    timeout=max_total_time,
                                    output_file=log_file,
                                    kill_children=True,
                                    env=fuzzer_environment)
    except subprocess.CalledProcessError:
        global fuzzer_errored_out  # pylint:disable=invalid-name
        fuzzer_errored_out = True
        logs.error('Fuzz process returned nonzero.')
Beispiel #13
0
def validate_name(fuzzer):
    """Return True if |fuzzer| is a valid fuzzbench fuzzer name."""
    # Although importing probably allows a subset of what the regex allows, use
    # the regex anyway to be safe. The regex is enforcing that the fuzzer is a
    # valid path for GCS or a linux system.
    if VALID_FUZZER_REGEX.match(fuzzer) is None:
        logs.error('Fuzzer: %s does not conform to pattern: %s.', fuzzer,
                   VALID_FUZZER_REGEX.pattern)
        return False

    return True
Beispiel #14
0
def main():
    """Creates or gets an already created service account key and saves it to
    the provided path."""
    logs.initialize()
    try:
        keyfile = sys.argv[1]
        get_or_create_key(sys.argv[2], keyfile)
        logs.info('Saved key to %s.', keyfile)
    except Exception:  # pylint: disable=broad-except
        logs.error('Failed to get or create key.')
        return 1
    return 0
Beispiel #15
0
def validate(fuzzer):
    """Return True if |fuzzer| is a valid fuzzbench fuzzer."""
    if not validate_name(fuzzer):
        return False

    # Try importing the fuzzer module.
    module_name = 'fuzzers.{}.fuzzer'.format(fuzzer)
    try:
        importlib.import_module(module_name)
        return True
    except Exception as error:  # pylint: disable=broad-except
        logs.error('Encountered "%s" while trying to import %s.', error,
                   module_name)
        return False
def validate(benchmark):
    """Return True if |benchmark| is a valid fuzzbench fuzzer."""
    if VALID_BENCHMARK_REGEX.match(benchmark) is None:
        logs.error('%s does not conform to %s pattern.', benchmark,
                   VALID_BENCHMARK_REGEX.pattern)
        return False
    benchmark_dir = os.path.join(utils.ROOT_DIR, 'benchmarks', benchmark)
    build_sh = os.path.join(benchmark_dir, 'build.sh')
    oss_fuzz_config = os.path.join(benchmark_dir, 'oss-fuzz.yaml')
    valid = os.path.exists(build_sh) or os.path.exists(oss_fuzz_config)
    if valid:
        return True
    logs.error('%s must have a build.sh or oss-fuzz.yaml.', benchmark)
    return False
Beispiel #17
0
    def do_sync(self, final_sync=False):
        """Save corpus archives and results to GCS."""
        try:
            if not final_sync and self.is_corpus_dir_same():
                logs.debug('Cycle: %d unchanged.', self.cycle)
                filesystem.append(self.unchanged_cycles_path, str(self.cycle))
            else:
                logs.debug('Cycle: %d changed.', self.cycle)
                self.archive_and_save_corpus()

            self.save_results()
            logs.debug('Finished sync.')
        except Exception:  # pylint: disable=broad-except
            logs.error('Failed to sync cycle: %d.', self.cycle)
Beispiel #18
0
        def handle_retry(num_try, exception=None):
            """Handle retry."""

            if (exception is None or
                    isinstance(exception, exception_type)) and num_try < tries:
                logs.info('Retrying on %s failed with %s. Retrying again.',
                          function_with_type,
                          sys.exc_info()[1])
                sleep(get_delay(num_try, delay, backoff))
                return True

            logs.error('Retrying on %s failed with %s. Raise.',
                       function_with_type,
                       sys.exc_info()[1])
            return False
def run_requested_experiment(dry_run):
    """Run the oldest requested experiment that hasn't been run yet in
    experiment-requests.yaml."""
    requested_experiments = _get_requested_experiments()

    # TODO(metzman): Look into supporting benchmarks as an optional parameter so
    # that people can add fuzzers that don't support everything.

    if PAUSE_SERVICE_KEYWORD in requested_experiments:
        # Check if automated experiment service is paused.
        logs.warning('Pause service requested, not running experiment.')
        return None

    requested_experiment = None
    for experiment_config in reversed(requested_experiments):
        experiment_name = _get_experiment_name(experiment_config)
        with db_utils.session_scope() as session:
            is_new_experiment = session.query(models.Experiment).filter(
                models.Experiment.name == experiment_name).first() is None
        if is_new_experiment:
            requested_experiment = experiment_config
            break

    if requested_experiment is None:
        logs.info('No new experiment to run. Exiting.')
        return None

    experiment_name = _get_experiment_name(requested_experiment)
    if not validate_experiment_requests([requested_experiment]):
        logs.error('Requested experiment: %s in %s is not valid.',
                   requested_experiment, REQUESTED_EXPERIMENTS_PATH)
        return None
    fuzzers = requested_experiment['fuzzers']

    benchmark_type = requested_experiment.get('type')
    if benchmark_type == benchmark_utils.BenchmarkType.BUG.value:
        benchmarks = benchmark_utils.get_bug_benchmarks()
    else:
        benchmarks = (benchmark_utils.get_oss_fuzz_coverage_benchmarks() +
                      benchmark_utils.get_standard_coverage_benchmarks())

    logs.info('Running experiment: %s with fuzzers: %s.', experiment_name,
              ' '.join(fuzzers))
    description = _get_description(requested_experiment)
    oss_fuzz_corpus = _use_oss_fuzz_corpus(requested_experiment)
    return _run_experiment(experiment_name, fuzzers, benchmarks, description,
                           oss_fuzz_corpus, dry_run)
Beispiel #20
0
def run_fuzzer(max_total_time, log_filename):
    """Runs the fuzzer using its script. Logs stdout and stderr of the fuzzer
    script to |log_filename| if provided."""
    input_corpus = environment.get('SEED_CORPUS_DIR')
    output_corpus = environment.get('OUTPUT_CORPUS_DIR')
    fuzz_target_name = environment.get('FUZZ_TARGET')
    target_binary = fuzzer_utils.get_fuzz_target_binary(
        FUZZ_TARGET_DIR, fuzz_target_name)
    if not target_binary:
        logs.error('Fuzz target binary not found.')
        return

    _unpack_clusterfuzz_seed_corpus(target_binary, input_corpus)
    _clean_seed_corpus(input_corpus)

    if max_total_time is None:
        logs.warning('max_total_time is None. Fuzzing indefinitely.')

    runner_niceness = environment.get('RUNNER_NICENESS', 0)

    try:
        with open(log_filename, 'w') as log_file:
            # Because the runner is launched at a higher priority,
            # set it back to the default(0) for fuzzing processes.
            new_process.execute([
                'nice', '-n',
                str(0 - runner_niceness), 'python3', '-u', '-c',
                ('import fuzzer; '
                 'fuzzer.fuzz('
                 "'{input_corpus}', '{output_corpus}', '{target_binary}')"
                 ).format(input_corpus=shlex.quote(input_corpus),
                          output_corpus=shlex.quote(output_corpus),
                          target_binary=shlex.quote(target_binary))
            ],
                                timeout=max_total_time,
                                output_files=[log_file],
                                kill_children=True,
                                env=_get_fuzzer_environment())
    except subprocess.CalledProcessError:
        logs.error('Fuzz process returned nonzero.')
Beispiel #21
0
def main():
    """Do the experiment and report results."""
    logs.initialize(default_extras={
        'component': 'dispatcher',
    })

    try:
        dispatcher_main()
    except Exception as error:
        logs.error('Error conducting experiment.')
        raise error

    if experiment_utils.is_local_experiment():
        return 0

    experiment_config_file_path = _get_config_file_path()

    if stop_experiment.stop_experiment(experiment_utils.get_experiment_name(),
                                       experiment_config_file_path):
        return 0

    return 1