Beispiel #1
0
    def archive_corpus(self):
        """Archive this cycle's corpus."""
        archive = os.path.join(
            self.corpus_archives_dir,
            experiment_utils.get_corpus_archive_name(self.cycle))

        directories = [self.corpus_dir]
        if self.cycle == 1:
            # Some fuzzers like eclipser and LibFuzzer don't actually copy the
            # seed/input corpus to the output corpus (which AFL does do), this
            # results in their coverage being undercounted.
            seed_corpus = environment.get('SEED_CORPUS_DIR')
            directories.append(seed_corpus)

        archive_directories(directories, archive)
        return archive
Beispiel #2
0
    def extract_cycle_corpus(self, cycle: int) -> bool:
        """Extract the corpus archive for this cycle if it exists."""
        corpus_archive_path = os.path.join(
            self.trial_dir, 'corpus',
            experiment_utils.get_corpus_archive_name(cycle))

        if not os.path.exists(corpus_archive_path):
            self.logger.warning('Corpus not found for cycle: %d.', cycle)
            return False

        self.logger.debug('Corpus found for cycle: %d.', cycle)

        already_measured_units = set(os.listdir(self.prev_corpus_dir))
        crash_blacklist = self.UNIT_BLACKLIST[self.benchmark]
        unit_blacklist = already_measured_units.union(crash_blacklist)

        extract_corpus(corpus_archive_path, unit_blacklist, self.corpus_dir)

        return True
Beispiel #3
0
def measure_snapshot_coverage(fuzzer: str, benchmark: str, trial_num: int,
                              cycle: int) -> models.Snapshot:
    """Measure coverage of the snapshot for |cycle| for |trial_num| of |fuzzer|
    and |benchmark|."""
    snapshot_logger = logs.Logger('measurer',
                                  default_extras={
                                      'fuzzer': fuzzer,
                                      'benchmark': benchmark,
                                      'trial_id': str(trial_num),
                                      'cycle': str(cycle),
                                  })
    snapshot_measurer = SnapshotMeasurer(fuzzer, benchmark, trial_num,
                                         snapshot_logger)

    measuring_start_time = time.time()
    snapshot_logger.info('Measuring cycle: %d.', cycle)
    this_time = cycle * experiment_utils.get_snapshot_seconds()
    if snapshot_measurer.is_cycle_unchanged(cycle):
        snapshot_logger.info('Cycle: %d is unchanged.', cycle)
        regions_covered = snapshot_measurer.get_current_coverage()
        fuzzer_stats_data = snapshot_measurer.get_fuzzer_stats(cycle)
        return models.Snapshot(time=this_time,
                               trial_id=trial_num,
                               edges_covered=regions_covered,
                               fuzzer_stats=fuzzer_stats_data)

    corpus_archive_dst = os.path.join(
        snapshot_measurer.trial_dir, 'corpus',
        experiment_utils.get_corpus_archive_name(cycle))
    corpus_archive_src = exp_path.filestore(corpus_archive_dst)

    corpus_archive_dir = os.path.dirname(corpus_archive_dst)
    if not os.path.exists(corpus_archive_dir):
        os.makedirs(corpus_archive_dir)

    if filestore_utils.cp(corpus_archive_src,
                          corpus_archive_dst,
                          expect_zero=False).retcode:
        snapshot_logger.warning('Corpus not found for cycle: %d.', cycle)
        return None

    snapshot_measurer.initialize_measurement_dirs()
    snapshot_measurer.extract_corpus(corpus_archive_dst)
    # Don't keep corpus archives around longer than they need to be.
    os.remove(corpus_archive_dst)

    # Run coverage on the new corpus units.
    snapshot_measurer.run_cov_new_units()

    # Generate profdata and transform it into json form.
    snapshot_measurer.generate_coverage_information(cycle)

    # Get the coverage of the new corpus units.
    regions_covered = snapshot_measurer.get_current_coverage()
    fuzzer_stats_data = snapshot_measurer.get_fuzzer_stats(cycle)
    snapshot = models.Snapshot(time=this_time,
                               trial_id=trial_num,
                               edges_covered=regions_covered,
                               fuzzer_stats=fuzzer_stats_data)

    # Record the new corpus files.
    snapshot_measurer.update_measured_files()

    # Archive crashes directory.
    snapshot_measurer.archive_crashes(cycle)
    measuring_time = round(time.time() - measuring_start_time, 2)
    snapshot_logger.info('Measured cycle: %d in %f seconds.', cycle,
                         measuring_time)
    return snapshot
Beispiel #4
0
def measure_snapshot_coverage(fuzzer: str, benchmark: str, trial_num: int,
                              cycle: int) -> models.Snapshot:
    """Measure coverage of the snapshot for |cycle| for |trial_num| of |fuzzer|
    and |benchmark|."""
    snapshot_logger = logs.Logger('measurer',
                                  default_extras={
                                      'fuzzer': fuzzer,
                                      'benchmark': benchmark,
                                      'trial_id': str(trial_num),
                                      'cycle': str(cycle),
                                  })
    snapshot_measurer = SnapshotMeasurer(fuzzer, benchmark, trial_num,
                                         snapshot_logger)

    measuring_start_time = time.time()
    snapshot_logger.info('Measuring cycle: %d.', cycle)
    this_time = cycle * experiment_utils.get_snapshot_seconds()
    if snapshot_measurer.is_cycle_unchanged(cycle):
        snapshot_logger.info('Cycle: %d is unchanged.', cycle)
        current_pcs = snapshot_measurer.get_current_pcs()
        return models.Snapshot(time=this_time,
                               trial_id=trial_num,
                               edges_covered=len(current_pcs))

    corpus_archive_dst = os.path.join(
        snapshot_measurer.trial_dir, 'corpus',
        experiment_utils.get_corpus_archive_name(cycle))
    corpus_archive_src = exp_path.gcs(corpus_archive_dst)

    corpus_archive_dir = os.path.dirname(corpus_archive_dst)
    if not os.path.exists(corpus_archive_dir):
        os.makedirs(corpus_archive_dir)
    if gsutil.cp(corpus_archive_src,
                 corpus_archive_dst,
                 expect_zero=False,
                 parallel=False,
                 write_to_stdout=False)[0] != 0:
        snapshot_logger.warning('Corpus not found for cycle: %d.', cycle)
        return None

    snapshot_measurer.initialize_measurement_dirs()
    snapshot_measurer.extract_corpus(corpus_archive_dst)
    # Don't keep corpus archives around longer than they need to be.
    os.remove(corpus_archive_dst)

    # Get the coverage of the new corpus units.
    snapshot_measurer.run_cov_new_units()
    all_pcs = snapshot_measurer.merge_new_pcs()
    snapshot = models.Snapshot(time=this_time,
                               trial_id=trial_num,
                               edges_covered=len(all_pcs))

    # Record the new corpus files.
    snapshot_measurer.update_measured_files()

    # Archive crashes directory.
    snapshot_measurer.archive_crashes(cycle)

    measuring_time = round(time.time() - measuring_start_time, 2)
    snapshot_logger.info('Measured cycle: %d in %d seconds.', cycle,
                         measuring_time)
    return snapshot
def test_get_corpus_archive_name():
    """Tests that get_corpus_archive_name returns the expected result."""
    assert (experiment_utils.get_corpus_archive_name(9) ==
            'corpus-archive-0009.tar.gz')