Exemple #1
0
def build_measurer(benchmark: str) -> bool:
    """Do a coverage build for a benchmark."""
    try:
        logger.info('Building measurer for benchmark: %s.', benchmark)
        buildlib.build_coverage(benchmark)
        docker_name = benchmark_utils.get_docker_name(benchmark)
        archive_name = 'coverage-build-%s.tar.gz' % docker_name

        coverage_binaries_dir = build_utils.get_coverage_binaries_dir()
        benchmark_coverage_binary_dir = coverage_binaries_dir / benchmark
        os.mkdir(benchmark_coverage_binary_dir)
        cloud_bucket_archive_path = exp_path.gcs(coverage_binaries_dir /
                                                 archive_name)
        gsutil.cp(cloud_bucket_archive_path,
                  str(benchmark_coverage_binary_dir),
                  parallel=False,
                  write_to_stdout=False)

        archive_path = benchmark_coverage_binary_dir / archive_name
        tar = tarfile.open(archive_path, 'r:gz')
        tar.extractall(benchmark_coverage_binary_dir)
        os.remove(archive_path)
        logs.info('Done building measurer for benchmark: %s.', benchmark)
        return True
    except Exception:  # pylint: disable=broad-except
        logger.error('Failed to build measurer for %s.', benchmark)
        return False
Exemple #2
0
def store_build_logs(build_config, build_result):
    """Save build results in the build logs bucket."""
    build_output = ('Command returned {retcode}.\nOutput: {output}'.format(
        retcode=build_result.retcode, output=build_result.output))
    with tempfile.NamedTemporaryFile(mode='w') as tmp:
        tmp.write(build_output)
        tmp.flush()

        build_log_filename = build_config + '.txt'
        gsutil.cp(tmp.name,
                  exp_path.gcs(get_build_logs_dir() / build_log_filename),
                  parallel=False)
Exemple #3
0
    def save_corpus_archive(self, archive):
        """Save corpus |archive| to GCS and delete when done."""
        if not self.gcs_sync_dir:
            return

        basename = os.path.basename(archive)
        gcs_path = posixpath.join(self.gcs_sync_dir, self.corpus_dir, basename)

        # Don't use parallel to avoid stability issues.
        gsutil.cp(archive, gcs_path, parallel=False)

        # Delete corpus archive so disk doesn't fill up.
        os.remove(archive)
Exemple #4
0
def add_oss_fuzz_corpus(benchmark, oss_fuzz_corpora_dir):
    """Add latest public corpus from OSS-Fuzz as the seed corpus for various
    fuzz targets."""
    project = benchmark_utils.get_project(benchmark)
    fuzz_target = benchmark_utils.get_fuzz_target(benchmark)

    if not fuzz_target.startswith(project):
        full_fuzz_target = '%s_%s' % (project, fuzz_target)
    else:
        full_fuzz_target = fuzz_target

    src_corpus_url = _OSS_FUZZ_CORPUS_BACKUP_URL_FORMAT.format(
        project=project, fuzz_target=full_fuzz_target)
    dest_corpus_url = os.path.join(oss_fuzz_corpora_dir, f'{benchmark}.zip')
    gsutil.cp(src_corpus_url, dest_corpus_url, parallel=True, expect_zero=False)
Exemple #5
0
    def archive_crashes(self, cycle):
        """Archive this cycle's crashes into cloud bucket."""
        if not os.listdir(self.crashes_dir):
            logs.info('No crashes found for cycle %d.', cycle)
            return

        logs.info('Archiving crashes for cycle %d.', cycle)
        crashes_archive_name = experiment_utils.get_crashes_archive_name(cycle)
        archive = os.path.join(os.path.dirname(self.crashes_dir),
                               crashes_archive_name)
        with tarfile.open(archive, 'w:gz') as tar:
            tar.add(self.crashes_dir,
                    arcname=os.path.basename(self.crashes_dir))
        gcs_path = exp_path.gcs(
            posixpath.join(self.trial_dir, 'crashes', crashes_archive_name))
        gsutil.cp(archive, gcs_path)
        os.remove(archive)
Exemple #6
0
def set_up_coverage_binary(benchmark):
    """Set up coverage binaries for |benchmark|."""
    initialize_logs()
    coverage_binaries_dir = build_utils.get_coverage_binaries_dir()
    benchmark_coverage_binary_dir = coverage_binaries_dir / benchmark
    if not os.path.exists(benchmark_coverage_binary_dir):
        os.mkdir(benchmark_coverage_binary_dir)
    archive_name = 'coverage-build-%s.tar.gz' % benchmark
    cloud_bucket_archive_path = exp_path.gcs(coverage_binaries_dir /
                                             archive_name)
    gsutil.cp(cloud_bucket_archive_path,
              str(benchmark_coverage_binary_dir),
              write_to_stdout=False)
    archive_path = benchmark_coverage_binary_dir / archive_name
    tar = tarfile.open(archive_path, 'r:gz')
    tar.extractall(benchmark_coverage_binary_dir)
    os.remove(archive_path)
Exemple #7
0
def copy_resources_to_bucket(config_dir: str, config: Dict):
    """Copy resources the dispatcher will need for the experiment to the
    cloud_experiment_bucket."""
    def filter_file(tar_info):
        """Filter out unnecessary directories."""
        if FILTER_SOURCE_REGEX.match(tar_info.name):
            return None
        return tar_info

    cloud_experiment_path = os.path.join(config['cloud_experiment_bucket'],
                                         config['experiment'])
    base_destination = os.path.join(cloud_experiment_path, 'input')

    # Send the local source repository to the cloud for use by dispatcher.
    # Local changes to any file will propagate.
    source_archive = 'src.tar.gz'
    with tarfile.open(source_archive, 'w:gz') as tar:
        tar.add(utils.ROOT_DIR, arcname='', recursive=True, filter=filter_file)
    gsutil.cp(source_archive, base_destination + '/', parallel=True)
    os.remove(source_archive)

    # Send config files.
    destination = os.path.join(base_destination, 'config')
    gsutil.rsync(config_dir, destination, parallel=True)
def copy_coverage_binaries(benchmark):
    """Copy coverage binaries in a local experiment."""
    shared_coverage_binaries_dir = get_shared_coverage_binaries_dir()
    mount_arg = '{0}:{0}'.format(shared_coverage_binaries_dir)
    builder_image_url = benchmark_utils.get_builder_image_url(
        benchmark, 'coverage', environment.get('CLOUD_PROJECT'))
    coverage_build_archive = 'coverage-build-{}.tar.gz'.format(benchmark)
    coverage_build_archive_shared_dir_path = os.path.join(
        shared_coverage_binaries_dir, coverage_build_archive)
    command = 'cd /out; tar -czvf {} *'.format(
        coverage_build_archive_shared_dir_path)
    new_process.execute([
        'docker', 'run', '-v', mount_arg, builder_image_url, '/bin/bash', '-c',
        command
    ])
    coverage_binaries_dir = build_utils.get_coverage_binaries_dir()
    coverage_build_archive_gcs_path = posixpath.join(
        exp_path.gcs(coverage_binaries_dir), coverage_build_archive)

    return gsutil.cp(coverage_build_archive_shared_dir_path,
                     coverage_build_archive_gcs_path)
Exemple #9
0
def measure_snapshot_coverage(fuzzer: str, benchmark: str, trial_num: int,
                              cycle: int) -> models.Snapshot:
    """Measure coverage of the snapshot for |cycle| for |trial_num| of |fuzzer|
    and |benchmark|."""
    snapshot_logger = logs.Logger('measurer',
                                  default_extras={
                                      'fuzzer': fuzzer,
                                      'benchmark': benchmark,
                                      'trial_id': str(trial_num),
                                      'cycle': str(cycle),
                                  })
    snapshot_measurer = SnapshotMeasurer(fuzzer, benchmark, trial_num,
                                         snapshot_logger)

    measuring_start_time = time.time()
    snapshot_logger.info('Measuring cycle: %d.', cycle)
    this_time = cycle * experiment_utils.get_snapshot_seconds()
    if snapshot_measurer.is_cycle_unchanged(cycle):
        snapshot_logger.info('Cycle: %d is unchanged.', cycle)
        current_pcs = snapshot_measurer.get_current_pcs()
        return models.Snapshot(time=this_time,
                               trial_id=trial_num,
                               edges_covered=len(current_pcs))

    corpus_archive_dst = os.path.join(
        snapshot_measurer.trial_dir, 'corpus',
        experiment_utils.get_corpus_archive_name(cycle))
    corpus_archive_src = exp_path.gcs(corpus_archive_dst)

    corpus_archive_dir = os.path.dirname(corpus_archive_dst)
    if not os.path.exists(corpus_archive_dir):
        os.makedirs(corpus_archive_dir)
    if gsutil.cp(corpus_archive_src,
                 corpus_archive_dst,
                 expect_zero=False,
                 parallel=False,
                 write_to_stdout=False)[0] != 0:
        snapshot_logger.warning('Corpus not found for cycle: %d.', cycle)
        return None

    snapshot_measurer.initialize_measurement_dirs()
    snapshot_measurer.extract_corpus(corpus_archive_dst)
    # Don't keep corpus archives around longer than they need to be.
    os.remove(corpus_archive_dst)

    # Get the coverage of the new corpus units.
    snapshot_measurer.run_cov_new_units()
    all_pcs = snapshot_measurer.merge_new_pcs()
    snapshot = models.Snapshot(time=this_time,
                               trial_id=trial_num,
                               edges_covered=len(all_pcs))

    # Record the new corpus files.
    snapshot_measurer.update_measured_files()

    # Archive crashes directory.
    snapshot_measurer.archive_crashes(cycle)

    measuring_time = round(time.time() - measuring_start_time, 2)
    snapshot_logger.info('Measured cycle: %d in %d seconds.', cycle,
                         measuring_time)
    return snapshot
Exemple #10
0
 def copy_unchanged_cycles_file():
     result = gsutil.cp(exp_path.gcs(self.unchanged_cycles_path),
                        self.unchanged_cycles_path)
     return result.retcode == 0
Exemple #11
0
    def test_integration_runner(self, mocked_error, tmp_path, environ):
        """Test that runner can run libFuzzer and saves snapshots to GCS."""
        # Switch cwd so that fuzzers don't create tons of files in the repo.
        os.chdir(tmp_path)

        # Set env variables that would be set by the Dockerfile.
        file_directory = pathlib.Path(__file__).parent

        root_dir = file_directory.parent
        os.environ['ROOT_DIR'] = str(root_dir)

        seed_corpus_dir = tmp_path / 'seeds'
        os.mkdir(seed_corpus_dir)
        os.environ['SEED_CORPUS_DIR'] = str(seed_corpus_dir)

        output_corpus_dir = tmp_path / 'corpus'
        os.mkdir(output_corpus_dir)
        os.environ['OUTPUT_CORPUS_DIR'] = str(output_corpus_dir)

        fuzzer = 'libfuzzer'
        fuzzer_variant = fuzzer + '_variant'
        fuzzer_parent_path = root_dir / 'fuzzers' / fuzzer

        benchmark = 'MultipleConstraintsOnSmallInputTest'
        test_experiment_bucket = os.environ['TEST_CLOUD_EXPERIMENT_BUCKET']
        experiment = 'integration-test-experiment'
        gcs_directory = posixpath.join(test_experiment_bucket, experiment,
                                       'experiment-folders',
                                       '%s-%s' % (benchmark, fuzzer_variant),
                                       'trial-1')
        gsutil.rm(gcs_directory, force=True)
        # Add fuzzer directory to make it easy to run fuzzer.py in local
        # configuration.
        os.environ['PYTHONPATH'] = ':'.join(
            [str(root_dir), str(fuzzer_parent_path)])

        # Set env variables that would set by the scheduler.
        os.environ['FUZZER'] = fuzzer
        os.environ['FUZZER_VARIANT_NAME'] = fuzzer_variant
        os.environ['BENCHMARK'] = benchmark
        os.environ['CLOUD_EXPERIMENT_BUCKET'] = test_experiment_bucket
        os.environ['EXPERIMENT'] = experiment

        os.environ['TRIAL_ID'] = str(TRIAL_NUM)

        max_total_time = 10
        os.environ['MAX_TOTAL_TIME'] = str(max_total_time)

        target_binary_path = (file_directory / 'test_data' / 'test_runner' /
                              benchmark)
        with mock.patch('common.fuzzer_utils.get_fuzz_target_binary',
                        return_value=str(target_binary_path)):
            with mock.patch('common.experiment_utils.SNAPSHOT_PERIOD',
                            max_total_time / 10):
                runner.main()

        gcs_corpus_directory = posixpath.join(gcs_directory, 'corpus')
        returncode, snapshots = gsutil.ls(gcs_corpus_directory)

        # Ensure that test works.
        assert returncode == 0, 'gsutil ls %s failed.' % gcs_corpus_directory

        assert len(snapshots) >= 2

        # Check that the archives are deleted after being copied to GCS.
        assert not os.path.exists(
            tmp_path / 'corpus-archives' / 'corpus-archive-0001.tar.gz')

        local_gcs_corpus_dir_copy = tmp_path / 'gcs_corpus_dir'
        os.mkdir(local_gcs_corpus_dir_copy)
        gsutil.cp('-r', posixpath.join(gcs_corpus_directory, '*'),
                  str(local_gcs_corpus_dir_copy))
        archive_size = os.path.getsize(local_gcs_corpus_dir_copy /
                                       'corpus-archive-0001.tar.gz')

        assert archive_size > 500

        assert len(os.listdir(output_corpus_dir)) > 5
        mocked_error.assert_not_called()