コード例 #1
0
    def get_preempted_trials(self) -> List[models.Trial]:
        """Returns a list of trials that were preempted."""
        if not self.experiment_config.get('preemptible_runners'):
            # No preempted trials in a nonpreemptible experiment.
            assert not self.preempted_trials
            return []

        started_instances = self._get_started_unfinished_instances()
        query_time = datetime_now()

        preempted_instances = list(self._query_preempted_instances())
        trials = []
        for instance in preempted_instances:
            trial = started_instances.get(instance)
            if trial is None:
                # Preemption for this trial was probably handled already.
                logs.warning('Instance: %s is preempted but is not running.',
                             instance)
                continue
            if trial.id in self.preempted_trials:
                # We already know this instance was preempted.
                continue
            self.preempted_trials[trial.id] = trial
            trials.append(trial)

        # Update this now when we know that we have succeded processing the
        # query. It's far worse if we update the query too early than if we
        # don't update the query at this point (which will only result in
        # redundant work.
        self.last_preemptible_query = query_time

        # Return all preempted instances, those we knew from before hand and
        # those we discovered in the query.
        return trials
コード例 #2
0
def get_fuzzers_changed_since_last():
    """Returns a list of fuzzers that have changed since the last experiment
    stored in the database that has a commit that is in the current branch."""
    # TODO(metzman): Figure out a way of skipping experiments that were stopped
    # early.

    # Loop over experiments since some may have hashes that are not in the
    # current branch.
    with db_utils.session_scope() as session:
        experiments = list(
            session.query(models.Experiment).order_by(
                models.Experiment.time_created.desc()))
    if not experiments:
        raise Exception('No experiments found. Cannot find changed fuzzers.')

    changed_files = None
    for experiment in experiments:
        try:
            changed_files = diff_utils.get_changed_files(experiment.git_hash)
            break
        except diff_utils.DiffError:
            logs.warning('Skipping %s. Commit is not in branch.',
                         experiment.git_hash)

    if changed_files is None:
        raise Exception('No in-branch experiments. '
                        'Cannot find changed fuzzers.')
    return change_utils.get_changed_fuzzers(changed_files)
コード例 #3
0
    def can_start_nonpreemptible(self, nonpreemptible_starts: int,
                                 trials_to_run: int) -> bool:
        """Returns True if we can start a nonpreemptible trial."""
        if not self.experiment_config.get('preemptible_runners'):
            # This code shouldn't be executed in a preemptible experiment.
            # But just in case it is, it's not always OK to a non-preemptible
            # trial in a non-preemptible experiment.
            return True

        if self.preemptible_starts_futile:
            return False

        if nonpreemptible_starts >= self.max_nonpreemptibles:
            # Don't exceed the maximum number of nonpreemptibles.
            return False

        if (trials_to_run * self.MAX_FRACTION_FOR_NONPREEMPTIBLES >
                self.max_nonpreemptibles):
            # When we have trials left that can't be run on preemptibles, don't
            # naively allow nonpreemptible creation until we hit the limit.
            # Instead if we can't create enough nonpreemptibles to replace at
            # least 1/4 of the remaining trials, don't create nonpreemptibles at
            # all, the experiment can't be salvaged cheaply.
            # TODO(metzman): This policy can be bypassed if instances are
            # preempted one at a time. Fix this or get rid of the policy.
            self.preemptible_starts_futile = True
            logs.warning('Futile to replace preempted with nonpreemptibles.')
            return False

        # Supplement with nonpreemptibles if the experiment results are not so
        # messed up that doing so won't make the result useable.
        return True
コード例 #4
0
def copy_oss_fuzz_files(project, commit_date, benchmark_dir):
    """Checkout the right files from OSS-Fuzz to build the benchmark based on
    |project| and |commit_date|. Then copy them to |benchmark_dir|."""
    if not os.path.exists(os.path.join(OSS_FUZZ_DIR, '.git')):
        logs.error(
            '%s is not a git repo. Try running git submodule update --init',
            OSS_FUZZ_DIR)
        raise RuntimeError('%s is not a git repo.' % OSS_FUZZ_DIR)
    oss_fuzz_repo_manager = GitRepoManager(OSS_FUZZ_DIR)
    projects_dir = os.path.join(OSS_FUZZ_DIR, 'projects', project)
    try:
        # Find an OSS-Fuzz commit that can be used to build the benchmark.
        _, oss_fuzz_commit, _ = oss_fuzz_repo_manager.git([
            'log', '--before=' + commit_date.isoformat(), '-n1', '--format=%H',
            projects_dir
        ])
        oss_fuzz_commit = oss_fuzz_commit.strip()
        if not oss_fuzz_commit:
            logs.warning('No suitable earlier OSS-Fuzz commit found.')
            return False
        oss_fuzz_repo_manager.git(['checkout', oss_fuzz_commit, projects_dir])
        dir_util.copy_tree(projects_dir, benchmark_dir)
        os.remove(os.path.join(benchmark_dir, 'project.yaml'))
        return True
    finally:
        oss_fuzz_repo_manager.git(['reset', '--hard'])
コード例 #5
0
def _clean_seed_corpus(seed_corpus_dir):
    """Moves seed corpus files from sub-directories into the corpus directory
    root. Also, deletes any files that exceed the 1 MB limit."""
    if not os.path.exists(seed_corpus_dir):
        return

    failed_to_move_files = []
    for root, _, files in os.walk(seed_corpus_dir):
        for filename in files:
            file_path = os.path.join(root, filename)

            if os.path.getsize(file_path) > CORPUS_ELEMENT_BYTES_LIMIT:
                os.remove(file_path)
                logs.warning('Removed seed file %s as it exceeds 1 Mb limit.',
                             file_path)
                continue

            sha1sum = utils.file_hash(file_path)
            new_file_path = os.path.join(seed_corpus_dir, sha1sum)
            try:
                shutil.move(file_path, new_file_path)
            except OSError:
                failed_to_move_files.append((file_path, new_file_path))

    if failed_to_move_files:
        logs.error('Failed to move seed corpus files: %s',
                   failed_to_move_files)
コード例 #6
0
ファイル: runner.py プロジェクト: yaoshuyin/fuzzbench
def _clean_seed_corpus(seed_corpus_dir):
    """Prepares |seed_corpus_dir| for the trial. This ensures that it can be
    used by AFL which is picky about the seed corpus. Moves seed corpus files
    from sub-directories into the corpus directory root. Also, deletes any files
    that exceed the 1 MB limit. If the NO_SEEDS env var is specified than the
    seed corpus files are deleted."""
    if not os.path.exists(seed_corpus_dir):
        return

    if environment.get('NO_SEEDS'):
        logs.info('NO_SEEDS specified, deleting seed corpus files.')
        shutil.rmtree(seed_corpus_dir)
        os.mkdir(seed_corpus_dir)
        return

    failed_to_move_files = []
    for root, _, files in os.walk(seed_corpus_dir):
        for filename in files:
            file_path = os.path.join(root, filename)

            if os.path.getsize(file_path) > CORPUS_ELEMENT_BYTES_LIMIT:
                os.remove(file_path)
                logs.warning('Removed seed file %s as it exceeds 1 Mb limit.',
                             file_path)
                continue

            sha1sum = utils.file_hash(file_path)
            new_file_path = os.path.join(seed_corpus_dir, sha1sum)
            try:
                shutil.move(file_path, new_file_path)
            except OSError:
                failed_to_move_files.append((file_path, new_file_path))

    if failed_to_move_files:
        logs.error('Failed to move seed corpus files: %s', failed_to_move_files)
コード例 #7
0
def _load_base_builder_docker_repo():
    """Gets base-image digests. Returns the docker rep."""
    gcloud_path = spawn.find_executable('gcloud')
    if not gcloud_path:
        logs.warning('gcloud not found in PATH.')
        return None

    _, result, _ = new_process.execute([
        gcloud_path,
        'container',
        'images',
        'list-tags',
        'gcr.io/oss-fuzz-base/base-builder',
        '--format=json',
        '--sort-by=timestamp',
    ])
    result = json.loads(result)

    repo = BaseBuilderDockerRepo()
    for image in result:
        timestamp = datetime.datetime.fromisoformat(
            image['timestamp']['datetime']).astimezone(datetime.timezone.utc)
        repo.add_digest(timestamp, image['digest'])

    return repo
コード例 #8
0
def output_report(experiment_config: dict, in_progress=False):
    """Generate the HTML report and write it to |web_bucket|."""
    experiment_name = experiment_utils.get_experiment_name()
    web_filestore_path = posixpath.join(experiment_config['report_filestore'],
                                        experiment_name)

    reports_dir = get_reports_dir()

    # Don't merge with nonprivate experiments until the very end as doing it
    # while the experiment is in progress will produce unusable realtime
    # results.
    merge_with_nonprivate = (not in_progress and experiment_config.get(
        'merge_with_nonprivate', False))

    try:
        logger.debug('Generating report.')
        filesystem.recreate_directory(reports_dir)
        generate_report.generate_report(
            [experiment_name],
            str(reports_dir),
            in_progress=in_progress,
            merge_with_clobber_nonprivate=merge_with_nonprivate)
        filestore_utils.rsync(str(reports_dir),
                              web_filestore_path,
                              gsutil_options=[
                                  '-h',
                                  'Cache-Control:public,max-age=0,no-transform'
                              ])
        logger.debug('Done generating report.')
    except data_utils.EmptyDataError:
        logs.warning('No snapshot data.')
    except Exception:  # pylint: disable=broad-except
        logger.error('Error generating HTML report.')
コード例 #9
0
ファイル: runner.py プロジェクト: realwatch/fuzzbench
def run_fuzzer(max_total_time, log_filename):
    """Runs the fuzzer using its script. Logs stdout and stderr of the fuzzer
    script to |log_filename| if provided."""
    input_corpus = environment.get('SEED_CORPUS_DIR')
    output_corpus = environment.get('OUTPUT_CORPUS_DIR')
    fuzz_target_name = environment.get('FUZZ_TARGET')
    target_binary = fuzzer_utils.get_fuzz_target_binary(
        FUZZ_TARGET_DIR, fuzz_target_name)
    if not target_binary:
        logs.error('Fuzz target binary not found.')
        return

    _unpack_clusterfuzz_seed_corpus(target_binary, input_corpus)
    _clean_seed_corpus(input_corpus)

    if max_total_time is None:
        logs.warning('max_total_time is None. Fuzzing indefinitely.')

    runner_niceness = environment.get('RUNNER_NICENESS', 0)

    try:
        # Because the runner is launched at a higher priority,
        # set it back to the default(0) for fuzzing processes.
        command = [
            'nice', '-n',
            str(0 - runner_niceness), 'python3', '-u', '-c',
            ('import fuzzer; '
             'fuzzer.fuzz('
             "'{input_corpus}', '{output_corpus}', '{target_binary}')").format(
                 input_corpus=shlex.quote(input_corpus),
                 output_corpus=shlex.quote(output_corpus),
                 target_binary=shlex.quote(target_binary))
        ]

        fuzzer_environment = _get_fuzzer_environment()
        # Write output to stdout if user is fuzzing from command line.
        # Otherwise, write output to the log file.
        if environment.get('FUZZ_OUTSIDE_EXPERIMENT'):
            new_process.execute(command,
                                timeout=max_total_time,
                                write_to_stdout=True,
                                kill_children=True,
                                env=fuzzer_environment)
        else:
            with open(log_filename, 'wb') as log_file:
                new_process.execute(command,
                                    timeout=max_total_time,
                                    output_file=log_file,
                                    kill_children=True,
                                    env=fuzzer_environment)
    except subprocess.CalledProcessError:
        global fuzzer_errored_out  # pylint:disable=invalid-name
        fuzzer_errored_out = True
        logs.error('Fuzz process returned nonzero.')
コード例 #10
0
def validate_benchmarks(benchmarks: List[str]):
    """Parses and validates list of benchmarks."""
    benchmark_directories = get_directories(BENCHMARKS_DIR)
    if not os.path.exists(OSS_FUZZ_PROJECTS_DIR):
        logs.warning('OSS-Fuzz repository is not checked out.'
                     'skipping OSS-Fuzz benchmarks.')

    for benchmark in benchmarks:
        if benchmark not in benchmark_directories:
            raise Exception('Benchmark "%s" does not exist.' % benchmark)

    for benchmark in set(benchmarks):
        if benchmarks.count(benchmark) > 1:
            raise Exception('Benchmark "%s" is included more than once.' %
                            benchmark)
コード例 #11
0
ファイル: reporter.py プロジェクト: 14isnot40/fuzzbench
def output_report(experiment_config: dict,
                  in_progress=False,
                  coverage_report=False):
    """Generate the HTML report and write it to |web_bucket|."""
    experiment_name = experiment_utils.get_experiment_name()
    reports_dir = get_reports_dir()

    core_fuzzers = set(get_core_fuzzers())
    experiment_fuzzers = set(experiment_config['fuzzers'])
    fuzzers = experiment_fuzzers.union(core_fuzzers)

    # Calculate path to store report files in filestore.
    web_filestore_path = experiment_config['report_filestore']
    if not fuzzers.issubset(core_fuzzers):
        # This means that we are running an experimental report with fuzzers
        # not in the core list. So, store these in |experimental| sub-directory.
        web_filestore_path = os.path.join(web_filestore_path, 'experimental')
    web_filestore_path = posixpath.join(web_filestore_path, experiment_name)

    # Don't merge with nonprivate experiments until the very end as doing it
    # while the experiment is in progress will produce unusable realtime
    # results.
    merge_with_nonprivate = (not in_progress and experiment_config.get(
        'merge_with_nonprivate', False))

    try:
        logger.debug('Generating report.')
        filesystem.recreate_directory(reports_dir)
        generate_report.generate_report(
            [experiment_name],
            str(reports_dir),
            report_name=experiment_name,
            fuzzers=fuzzers,
            in_progress=in_progress,
            merge_with_clobber_nonprivate=merge_with_nonprivate,
            coverage_report=coverage_report)
        filestore_utils.rsync(
            str(reports_dir),
            web_filestore_path,
            delete=False,  # Don't remove existing coverage jsons.
            gsutil_options=[
                '-h', 'Cache-Control:public,max-age=0,no-transform'
            ])
        logger.debug('Done generating report.')
    except data_utils.EmptyDataError:
        logs.warning('No snapshot data.')
    except Exception:  # pylint: disable=broad-except
        logger.error('Error generating HTML report.')
コード例 #12
0
def run_requested_experiment(dry_run):
    """Run the oldest requested experiment that hasn't been run yet in
    experiment-requests.yaml."""
    requested_experiments = _get_requested_experiments()

    # TODO(metzman): Look into supporting benchmarks as an optional parameter so
    # that people can add fuzzers that don't support everything.

    if PAUSE_SERVICE_KEYWORD in requested_experiments:
        # Check if automated experiment service is paused.
        logs.warning('Pause service requested, not running experiment.')
        return None

    requested_experiment = None
    for experiment_config in reversed(requested_experiments):
        experiment_name = _get_experiment_name(experiment_config)
        with db_utils.session_scope() as session:
            is_new_experiment = session.query(models.Experiment).filter(
                models.Experiment.name == experiment_name).first() is None
        if is_new_experiment:
            requested_experiment = experiment_config
            break

    if requested_experiment is None:
        logs.info('No new experiment to run. Exiting.')
        return None

    experiment_name = _get_experiment_name(requested_experiment)
    if not validate_experiment_requests([requested_experiment]):
        logs.error('Requested experiment: %s in %s is not valid.',
                   requested_experiment, REQUESTED_EXPERIMENTS_PATH)
        return None
    fuzzers = requested_experiment['fuzzers']

    benchmark_type = requested_experiment.get('type')
    if benchmark_type == benchmark_utils.BenchmarkType.BUG.value:
        benchmarks = benchmark_utils.get_bug_benchmarks()
    else:
        benchmarks = (benchmark_utils.get_oss_fuzz_coverage_benchmarks() +
                      benchmark_utils.get_standard_coverage_benchmarks())

    logs.info('Running experiment: %s with fuzzers: %s.', experiment_name,
              ' '.join(fuzzers))
    description = _get_description(requested_experiment)
    oss_fuzz_corpus = _use_oss_fuzz_corpus(requested_experiment)
    return _run_experiment(experiment_name, fuzzers, benchmarks, description,
                           oss_fuzz_corpus, dry_run)
コード例 #13
0
 def sleep_until_next_sync(self):
     """Sleep until it is time to do the next sync."""
     if self.last_sync_time is not None:
         next_sync_time = (self.last_sync_time +
                           experiment_utils.SNAPSHOT_PERIOD)
         sleep_time = next_sync_time - time.time()
         if sleep_time < 0:
             # Log error if a sync has taken longer than SNAPSHOT_PERIOD and
             # messed up our time synchronization.
             logs.warning('Sleep time on cycle %d is %d', self.cycle,
                          sleep_time)
             sleep_time = 0
     else:
         sleep_time = experiment_utils.SNAPSHOT_PERIOD
     logs.debug('Sleeping for %d seconds.', sleep_time)
     time.sleep(sleep_time)
     # last_sync_time is recorded before the sync so that each sync happens
     # roughly SNAPSHOT_PERIOD after each other.
     self.last_sync_time = time.time()
コード例 #14
0
def run_fuzzer(max_total_time, log_filename):
    """Runs the fuzzer using its script. Logs stdout and stderr of the fuzzer
    script to |log_filename| if provided."""
    input_corpus = environment.get('SEED_CORPUS_DIR')
    output_corpus = environment.get('OUTPUT_CORPUS_DIR')
    fuzz_target_name = environment.get('FUZZ_TARGET')
    target_binary = fuzzer_utils.get_fuzz_target_binary(
        FUZZ_TARGET_DIR, fuzz_target_name)
    if not target_binary:
        logs.error('Fuzz target binary not found.')
        return

    _unpack_clusterfuzz_seed_corpus(target_binary, input_corpus)
    _clean_seed_corpus(input_corpus)

    if max_total_time is None:
        logs.warning('max_total_time is None. Fuzzing indefinitely.')

    runner_niceness = environment.get('RUNNER_NICENESS', 0)

    try:
        with open(log_filename, 'w') as log_file:
            # Because the runner is launched at a higher priority,
            # set it back to the default(0) for fuzzing processes.
            new_process.execute([
                'nice', '-n',
                str(0 - runner_niceness), 'python3', '-u', '-c',
                ('import fuzzer; '
                 'fuzzer.fuzz('
                 "'{input_corpus}', '{output_corpus}', '{target_binary}')"
                 ).format(input_corpus=shlex.quote(input_corpus),
                          output_corpus=shlex.quote(output_corpus),
                          target_binary=shlex.quote(target_binary))
            ],
                                timeout=max_total_time,
                                output_files=[log_file],
                                kill_children=True,
                                env=_get_fuzzer_environment())
    except subprocess.CalledProcessError:
        logs.error('Fuzz process returned nonzero.')
コード例 #15
0
def output_report(web_bucket, in_progress=False):
    """Generate the HTML report and write it to |web_bucket|."""
    experiment_name = experiment_utils.get_experiment_name()
    reports_dir = get_reports_dir()

    try:
        logger.debug('Generating report.')
        filesystem.recreate_directory(reports_dir)
        generate_report.generate_report([experiment_name],
                                        str(reports_dir),
                                        in_progress=in_progress)
        filestore_utils.rsync(str(reports_dir),
                              web_bucket,
                              gsutil_options=[
                                  '-h',
                                  'Cache-Control:public,max-age=0,no-transform'
                              ])
        logger.debug('Done generating report.')
    except data_utils.EmptyDataError:
        logs.warning('No snapshot data.')
    except Exception:  # pylint: disable=broad-except
        logger.error('Error generating HTML report.')
コード例 #16
0
def run_requested_experiment(dry_run):
    """Run the oldest requested experiment that hasn't been run yet in
    experiment-requests.yaml."""
    requested_experiments = _get_requested_experiments()

    # TODO(metzman): Look into supporting benchmarks as an optional parameter so
    # that people can add fuzzers that don't support everything.

    if PAUSE_SERVICE_KEYWORD in requested_experiments:
        # Check if automated experiment service is paused.
        logs.warning('Pause service requested, not running experiment.')
        return None

    requested_experiment = None
    for experiment_config in reversed(requested_experiments):
        experiment_name = _get_experiment_name(experiment_config)
        is_new_experiment = db_utils.query(models.Experiment).filter(
            models.Experiment.name == experiment_name).first() is None
        if is_new_experiment:
            requested_experiment = experiment_config
            break

    if requested_experiment is None:
        logs.info('No new experiment to run. Exiting.')
        return None

    experiment_name = _get_experiment_name(requested_experiment)
    if not validate_experiment_requests([requested_experiment]):
        logs.error('Requested experiment: %s in %s is not valid.',
                   requested_experiment, REQUESTED_EXPERIMENTS_PATH)
        return None
    fuzzers = requested_experiment['fuzzers']

    logs.info('Running experiment: %s with fuzzers: %s.', experiment_name,
              ' '.join(fuzzers))
    fuzzer_configs = fuzzer_utils.get_fuzzer_configs(fuzzers=fuzzers)
    return _run_experiment(experiment_name, fuzzer_configs, dry_run)