Esempio n. 1
0
 def csv_filestore_helper(file_name, df):
     """Helper method for storing csv files in filestore."""
     src = os.path.join(coverage_utils.get_coverage_info_dir(), 'data',
                        file_name)
     dst = exp_path.filestore(src)
     df.to_csv(src, index=False, compression='infer')
     filestore_utils.cp(src, dst)
Esempio n. 2
0
def coverage_steps(benchmark):
    """Returns GCB run steps for coverage builds."""
    coverage_binaries_dir = exp_path.filestore(
        build_utils.get_coverage_binaries_dir())
    steps = [{
        'name':
        DOCKER_IMAGE,
        'args': [
            'run',
            '-v',
            '/workspace/out:/host-out',
            # TODO(metzman): Get rid of this and use one source of truth
            # for tags.
            posixpath.join(get_docker_registry(), 'builders', 'coverage',
                           benchmark) + ':' +
            experiment_utils.get_experiment_name(),
            '/bin/bash',
            '-c',
            'cd /out; tar -czvf /host-out/coverage-build-' + benchmark +
            '.tar.gz * /src /work'
        ]
    }]
    step = {'name': 'gcr.io/cloud-builders/gsutil'}
    step['args'] = [
        '-m', 'cp', '/workspace/out/coverage-build-' + benchmark + '.tar.gz',
        coverage_binaries_dir + '/'
    ]
    steps.append(step)
    return steps
Esempio n. 3
0
 def copy_unchanged_cycles_file():
     unchanged_cycles_filestore_path = exp_path.filestore(
         self.unchanged_cycles_path)
     result = filestore_utils.cp(unchanged_cycles_filestore_path,
                                 self.unchanged_cycles_path,
                                 expect_zero=False)
     return result.retcode == 0
Esempio n. 4
0
 def copy_unchanged_cycles_file():
     unchanged_cycles_filestore_path = exp_path.filestore(
         self.unchanged_cycles_path)
     try:
         filestore_utils.cp(unchanged_cycles_filestore_path,
                            self.unchanged_cycles_path)
         return True
     except subprocess.CalledProcessError:
         return False
Esempio n. 5
0
 def get_fuzzer_stats(self, cycle):
     """Get the fuzzer stats for |cycle|."""
     stats_filename = experiment_utils.get_stats_filename(cycle)
     stats_filestore_path = exp_path.filestore(
         os.path.join(self.trial_dir, stats_filename))
     try:
         return get_fuzzer_stats(stats_filestore_path)
     except (ValueError, json.decoder.JSONDecodeError):
         logger.error('Stats are invalid.')
         return None
Esempio n. 6
0
def _build_benchmark_coverage(benchmark: str) -> Tuple[int, str]:
    """Build a coverage build of |benchmark| on GCB."""
    coverage_binaries_dir = exp_path.filestore(
        build_utils.get_coverage_binaries_dir())
    substitutions = {
        '_GCS_COVERAGE_BINARIES_DIR': coverage_binaries_dir,
        '_BENCHMARK': benchmark,
    }
    config_file = get_build_config_file('coverage.yaml')
    config_name = 'benchmark-{benchmark}-coverage'.format(benchmark=benchmark)
    return _build(config_file, config_name, substitutions)
Esempio n. 7
0
def build_coverage(benchmark):
    """Build coverage image for benchmark on GCB."""
    coverage_binaries_dir = exp_path.filestore(
        build_utils.get_coverage_binaries_dir())
    substitutions = {
        '_GCS_COVERAGE_BINARIES_DIR': coverage_binaries_dir,
        '_BENCHMARK': benchmark,
    }
    config_file = get_build_config_file('coverage.yaml')
    config_name = 'benchmark-{benchmark}-coverage'.format(benchmark=benchmark)
    _build(config_file, config_name, substitutions)
Esempio n. 8
0
def store_build_logs(build_config, build_result):
    """Save build results in the build logs bucket."""
    build_output = ('Command returned {retcode}.\nOutput: {output}'.format(
        retcode=build_result.retcode, output=build_result.output))
    with tempfile.NamedTemporaryFile(mode='w') as tmp:
        tmp.write(build_output)
        tmp.flush()

        build_log_filename = build_config + '.txt'
        filestore_utils.cp(
            tmp.name,
            exp_path.filestore(get_build_logs_dir() / build_log_filename))
Esempio n. 9
0
 def generate_coverage_regions_json(self):
     """Stores the coverage data in a json file."""
     covered_regions = extract_covered_regions_from_summary_json(
         self.merged_summary_json_file)
     coverage_json_src = os.path.join(self.data_dir, 'covered_regions.json')
     coverage_json_dst = exp_path.filestore(coverage_json_src)
     filesystem.create_directory(self.data_dir)
     with open(coverage_json_src, 'w') as file_handle:
         json.dump(covered_regions, file_handle)
     filestore_utils.cp(coverage_json_src,
                        coverage_json_dst,
                        expect_zero=False)
Esempio n. 10
0
def store_coverage_data(experiment_config: dict):
    """Generate the specific coverage data and store in cloud bucket."""
    logger.info('Start storing coverage data')
    with multiprocessing.Pool() as pool, multiprocessing.Manager() as manager:
        q = manager.Queue()  # pytype: disable=attribute-error
        covered_regions = get_all_covered_regions(experiment_config, pool, q)
        json_src_dir = get_experiment_folders_dir()
        json_src = os.path.join(json_src_dir, 'covered_regions.json')
        with open(json_src, 'w') as src_file:
            json.dump(covered_regions, src_file)
        json_dst = exp_path.filestore(json_src)
        filestore_utils.cp(json_src, json_dst)
    logger.info('Finished storing coverage data')
Esempio n. 11
0
 def save_crash_files(self, cycle):
     """Save crashes in per-cycle crash archive."""
     crashes_archive_name = experiment_utils.get_crashes_archive_name(cycle)
     archive_path = os.path.join(os.path.dirname(self.crashes_dir),
                                 crashes_archive_name)
     with tarfile.open(archive_path, 'w:gz') as tar:
         tar.add(self.crashes_dir,
                 arcname=os.path.basename(self.crashes_dir))
     trial_crashes_dir = posixpath.join(self.trial_dir, 'crashes')
     archive_filestore_path = exp_path.filestore(
         posixpath.join(trial_crashes_dir, crashes_archive_name))
     filestore_utils.cp(archive_path, archive_filestore_path)
     os.remove(archive_path)
Esempio n. 12
0
def set_up_coverage_binary(benchmark):
    """Set up coverage binaries for |benchmark|."""
    initialize_logs()
    coverage_binaries_dir = build_utils.get_coverage_binaries_dir()
    benchmark_coverage_binary_dir = coverage_binaries_dir / benchmark
    filesystem.create_directory(benchmark_coverage_binary_dir)
    archive_name = 'coverage-build-%s.tar.gz' % benchmark
    archive_filestore_path = exp_path.filestore(coverage_binaries_dir /
                                                archive_name)
    filestore_utils.cp(archive_filestore_path,
                       str(benchmark_coverage_binary_dir))
    archive_path = benchmark_coverage_binary_dir / archive_name
    tar = tarfile.open(archive_path, 'r:gz')
    tar.extractall(benchmark_coverage_binary_dir)
    os.remove(archive_path)
Esempio n. 13
0
    def archive_crashes(self, cycle):
        """Archive this cycle's crashes into filestore."""
        if not os.listdir(self.crashes_dir):
            logs.info('No crashes found for cycle %d.', cycle)
            return

        logs.info('Archiving crashes for cycle %d.', cycle)
        crashes_archive_name = experiment_utils.get_crashes_archive_name(cycle)
        archive_path = os.path.join(os.path.dirname(self.crashes_dir),
                                    crashes_archive_name)
        with tarfile.open(archive_path, 'w:gz') as tar:
            tar.add(self.crashes_dir,
                    arcname=os.path.basename(self.crashes_dir))
        archive_filestore_path = exp_path.filestore(
            posixpath.join(self.trial_dir, 'crashes', crashes_archive_name))
        filestore_utils.cp(archive_path, archive_filestore_path)
        os.remove(archive_path)
Esempio n. 14
0
def _build_oss_fuzz_project_coverage(benchmark: str) -> Tuple[int, str]:
    """Build a coverage build of OSS-Fuzz-based benchmark |benchmark| on GCB."""
    project = benchmark_utils.get_project(benchmark)
    oss_fuzz_builder_hash = benchmark_utils.get_oss_fuzz_builder_hash(
        benchmark)
    coverage_binaries_dir = exp_path.filestore(
        build_utils.get_coverage_binaries_dir())
    substitutions = {
        '_GCS_COVERAGE_BINARIES_DIR': coverage_binaries_dir,
        '_BENCHMARK': benchmark,
        '_OSS_FUZZ_PROJECT': project,
        '_OSS_FUZZ_BUILDER_HASH': oss_fuzz_builder_hash,
    }
    config_file = get_build_config_file('oss-fuzz-coverage.yaml')
    config_name = 'oss-fuzz-{project}-coverage-hash-{hash}'.format(
        project=project, hash=oss_fuzz_builder_hash)
    return _build(config_file, config_name, substitutions)
Esempio n. 15
0
def copy_coverage_binaries(benchmark):
    """Copy coverage binaries in a local experiment."""
    shared_coverage_binaries_dir = get_shared_coverage_binaries_dir()
    mount_arg = '{0}:{0}'.format(shared_coverage_binaries_dir)
    builder_image_url = benchmark_utils.get_builder_image_url(
        benchmark, 'coverage', environment.get('CLOUD_PROJECT'))
    coverage_build_archive = 'coverage-build-{}.tar.gz'.format(benchmark)
    coverage_build_archive_shared_dir_path = os.path.join(
        shared_coverage_binaries_dir, coverage_build_archive)
    command = 'cd /out; tar -czvf {} *'.format(
        coverage_build_archive_shared_dir_path)
    new_process.execute([
        'docker', 'run', '-v', mount_arg, builder_image_url, '/bin/bash', '-c',
        command
    ])
    coverage_binaries_dir = build_utils.get_coverage_binaries_dir()
    coverage_build_archive_gcs_path = posixpath.join(
        exp_path.filestore(coverage_binaries_dir), coverage_build_archive)

    return filestore_utils.cp(coverage_build_archive_shared_dir_path,
                              coverage_build_archive_gcs_path)
Esempio n. 16
0
    def generate_coverage_report(self):
        """Generates the coverage report and stores in bucket."""
        command = [
            'llvm-cov', 'show', '-format=html',
            '-path-equivalence=/,{prefix}'.format(
                prefix=self.source_files_dir),
            '-output-dir={dst_dir}'.format(dst_dir=self.report_dir),
            '-Xdemangler', 'c++filt', '-Xdemangler', '-n', self.binary_file,
            '-instr-profile={profdata}'.format(
                profdata=self.merged_profdata_file)
        ]
        result = new_process.execute(command, expect_zero=False)
        if result.retcode != 0:
            logger.error('Coverage report generation failed for '
                         'fuzzer: {fuzzer},benchmark: {benchmark}.'.format(
                             fuzzer=self.fuzzer, benchmark=self.benchmark))
            return

        src_dir = self.report_dir
        dst_dir = exp_path.filestore(self.report_dir)
        filestore_utils.cp(src_dir, dst_dir, recursive=True, parallel=True)
Esempio n. 17
0
def exists_in_experiment_filestore(path: pathlib.Path) -> bool:
    """Returns True if |path| exists in the experiment_filestore."""
    return filestore_utils.ls(exp_path.filestore(path),
                              must_exist=False).retcode == 0
Esempio n. 18
0
def measure_snapshot_coverage(fuzzer: str, benchmark: str, trial_num: int,
                              cycle: int) -> models.Snapshot:
    """Measure coverage of the snapshot for |cycle| for |trial_num| of |fuzzer|
    and |benchmark|."""
    snapshot_logger = logs.Logger('measurer',
                                  default_extras={
                                      'fuzzer': fuzzer,
                                      'benchmark': benchmark,
                                      'trial_id': str(trial_num),
                                      'cycle': str(cycle),
                                  })
    snapshot_measurer = SnapshotMeasurer(fuzzer, benchmark, trial_num,
                                         snapshot_logger)

    measuring_start_time = time.time()
    snapshot_logger.info('Measuring cycle: %d.', cycle)
    this_time = cycle * experiment_utils.get_snapshot_seconds()
    if snapshot_measurer.is_cycle_unchanged(cycle):
        snapshot_logger.info('Cycle: %d is unchanged.', cycle)
        regions_covered = snapshot_measurer.get_current_coverage()
        fuzzer_stats_data = snapshot_measurer.get_fuzzer_stats(cycle)
        return models.Snapshot(time=this_time,
                               trial_id=trial_num,
                               edges_covered=regions_covered,
                               fuzzer_stats=fuzzer_stats_data)

    corpus_archive_dst = os.path.join(
        snapshot_measurer.trial_dir, 'corpus',
        experiment_utils.get_corpus_archive_name(cycle))
    corpus_archive_src = exp_path.filestore(corpus_archive_dst)

    corpus_archive_dir = os.path.dirname(corpus_archive_dst)
    if not os.path.exists(corpus_archive_dir):
        os.makedirs(corpus_archive_dir)

    if filestore_utils.cp(corpus_archive_src,
                          corpus_archive_dst,
                          expect_zero=False).retcode:
        snapshot_logger.warning('Corpus not found for cycle: %d.', cycle)
        return None

    snapshot_measurer.initialize_measurement_dirs()
    snapshot_measurer.extract_corpus(corpus_archive_dst)
    # Don't keep corpus archives around longer than they need to be.
    os.remove(corpus_archive_dst)

    # Run coverage on the new corpus units.
    snapshot_measurer.run_cov_new_units()

    # Generate profdata and transform it into json form.
    snapshot_measurer.generate_coverage_information(cycle)

    # Get the coverage of the new corpus units.
    regions_covered = snapshot_measurer.get_current_coverage()
    fuzzer_stats_data = snapshot_measurer.get_fuzzer_stats(cycle)
    snapshot = models.Snapshot(time=this_time,
                               trial_id=trial_num,
                               edges_covered=regions_covered,
                               fuzzer_stats=fuzzer_stats_data)

    # Record the new corpus files.
    snapshot_measurer.update_measured_files()

    # Archive crashes directory.
    snapshot_measurer.archive_crashes(cycle)
    measuring_time = round(time.time() - measuring_start_time, 2)
    snapshot_logger.info('Measured cycle: %d in %f seconds.', cycle,
                         measuring_time)
    return snapshot
Esempio n. 19
0
def measure_snapshot_coverage(fuzzer: str, benchmark: str, trial_num: int,
                              cycle: int) -> models.Snapshot:
    """Measure coverage of the snapshot for |cycle| for |trial_num| of |fuzzer|
    and |benchmark|."""
    snapshot_logger = logs.Logger('measurer',
                                  default_extras={
                                      'fuzzer': fuzzer,
                                      'benchmark': benchmark,
                                      'trial_id': str(trial_num),
                                      'cycle': str(cycle),
                                  })
    snapshot_measurer = SnapshotMeasurer(fuzzer, benchmark, trial_num,
                                         snapshot_logger)

    measuring_start_time = time.time()
    snapshot_logger.info('Measuring cycle: %d.', cycle)
    this_time = cycle * experiment_utils.get_snapshot_seconds()
    if snapshot_measurer.is_cycle_unchanged(cycle):
        snapshot_logger.info('Cycle: %d is unchanged.', cycle)
        current_pcs = snapshot_measurer.get_current_pcs()
        return models.Snapshot(time=this_time,
                               trial_id=trial_num,
                               edges_covered=len(current_pcs))

    corpus_archive_dst = os.path.join(
        snapshot_measurer.trial_dir, 'corpus',
        experiment_utils.get_corpus_archive_name(cycle))
    corpus_archive_src = exp_path.filestore(corpus_archive_dst)

    corpus_archive_dir = os.path.dirname(corpus_archive_dst)
    if not os.path.exists(corpus_archive_dir):
        os.makedirs(corpus_archive_dir)

    try:
        filestore_utils.cp(corpus_archive_src, corpus_archive_dst)
    except subprocess.CalledProcessError:
        snapshot_logger.warning('Corpus not found for cycle: %d.', cycle)
        return None

    snapshot_measurer.initialize_measurement_dirs()
    snapshot_measurer.extract_corpus(corpus_archive_dst)
    # Don't keep corpus archives around longer than they need to be.
    os.remove(corpus_archive_dst)

    # Get the coverage of the new corpus units.
    snapshot_measurer.run_cov_new_units()
    all_pcs = snapshot_measurer.merge_new_pcs()
    snapshot = models.Snapshot(time=this_time,
                               trial_id=trial_num,
                               edges_covered=len(all_pcs))

    # Record the new corpus files.
    snapshot_measurer.update_measured_files()

    # Archive crashes directory.
    snapshot_measurer.archive_crashes(cycle)

    measuring_time = round(time.time() - measuring_start_time, 2)
    snapshot_logger.info('Measured cycle: %d in %d seconds.', cycle,
                         measuring_time)
    return snapshot