def archive_corpus(self): """Archive this cycle's corpus.""" archive = os.path.join( self.corpus_archives_dir, experiment_utils.get_corpus_archive_name(self.cycle)) directories = [self.corpus_dir] if self.cycle == 1: # Some fuzzers like eclipser and LibFuzzer don't actually copy the # seed/input corpus to the output corpus (which AFL does do), this # results in their coverage being undercounted. seed_corpus = environment.get('SEED_CORPUS_DIR') directories.append(seed_corpus) archive_directories(directories, archive) return archive
def extract_cycle_corpus(self, cycle: int) -> bool: """Extract the corpus archive for this cycle if it exists.""" corpus_archive_path = os.path.join( self.trial_dir, 'corpus', experiment_utils.get_corpus_archive_name(cycle)) if not os.path.exists(corpus_archive_path): self.logger.warning('Corpus not found for cycle: %d.', cycle) return False self.logger.debug('Corpus found for cycle: %d.', cycle) already_measured_units = set(os.listdir(self.prev_corpus_dir)) crash_blacklist = self.UNIT_BLACKLIST[self.benchmark] unit_blacklist = already_measured_units.union(crash_blacklist) extract_corpus(corpus_archive_path, unit_blacklist, self.corpus_dir) return True
def measure_snapshot_coverage(fuzzer: str, benchmark: str, trial_num: int, cycle: int) -> models.Snapshot: """Measure coverage of the snapshot for |cycle| for |trial_num| of |fuzzer| and |benchmark|.""" snapshot_logger = logs.Logger('measurer', default_extras={ 'fuzzer': fuzzer, 'benchmark': benchmark, 'trial_id': str(trial_num), 'cycle': str(cycle), }) snapshot_measurer = SnapshotMeasurer(fuzzer, benchmark, trial_num, snapshot_logger) measuring_start_time = time.time() snapshot_logger.info('Measuring cycle: %d.', cycle) this_time = cycle * experiment_utils.get_snapshot_seconds() if snapshot_measurer.is_cycle_unchanged(cycle): snapshot_logger.info('Cycle: %d is unchanged.', cycle) regions_covered = snapshot_measurer.get_current_coverage() fuzzer_stats_data = snapshot_measurer.get_fuzzer_stats(cycle) return models.Snapshot(time=this_time, trial_id=trial_num, edges_covered=regions_covered, fuzzer_stats=fuzzer_stats_data) corpus_archive_dst = os.path.join( snapshot_measurer.trial_dir, 'corpus', experiment_utils.get_corpus_archive_name(cycle)) corpus_archive_src = exp_path.filestore(corpus_archive_dst) corpus_archive_dir = os.path.dirname(corpus_archive_dst) if not os.path.exists(corpus_archive_dir): os.makedirs(corpus_archive_dir) if filestore_utils.cp(corpus_archive_src, corpus_archive_dst, expect_zero=False).retcode: snapshot_logger.warning('Corpus not found for cycle: %d.', cycle) return None snapshot_measurer.initialize_measurement_dirs() snapshot_measurer.extract_corpus(corpus_archive_dst) # Don't keep corpus archives around longer than they need to be. os.remove(corpus_archive_dst) # Run coverage on the new corpus units. snapshot_measurer.run_cov_new_units() # Generate profdata and transform it into json form. snapshot_measurer.generate_coverage_information(cycle) # Get the coverage of the new corpus units. regions_covered = snapshot_measurer.get_current_coverage() fuzzer_stats_data = snapshot_measurer.get_fuzzer_stats(cycle) snapshot = models.Snapshot(time=this_time, trial_id=trial_num, edges_covered=regions_covered, fuzzer_stats=fuzzer_stats_data) # Record the new corpus files. snapshot_measurer.update_measured_files() # Archive crashes directory. snapshot_measurer.archive_crashes(cycle) measuring_time = round(time.time() - measuring_start_time, 2) snapshot_logger.info('Measured cycle: %d in %f seconds.', cycle, measuring_time) return snapshot
def measure_snapshot_coverage(fuzzer: str, benchmark: str, trial_num: int, cycle: int) -> models.Snapshot: """Measure coverage of the snapshot for |cycle| for |trial_num| of |fuzzer| and |benchmark|.""" snapshot_logger = logs.Logger('measurer', default_extras={ 'fuzzer': fuzzer, 'benchmark': benchmark, 'trial_id': str(trial_num), 'cycle': str(cycle), }) snapshot_measurer = SnapshotMeasurer(fuzzer, benchmark, trial_num, snapshot_logger) measuring_start_time = time.time() snapshot_logger.info('Measuring cycle: %d.', cycle) this_time = cycle * experiment_utils.get_snapshot_seconds() if snapshot_measurer.is_cycle_unchanged(cycle): snapshot_logger.info('Cycle: %d is unchanged.', cycle) current_pcs = snapshot_measurer.get_current_pcs() return models.Snapshot(time=this_time, trial_id=trial_num, edges_covered=len(current_pcs)) corpus_archive_dst = os.path.join( snapshot_measurer.trial_dir, 'corpus', experiment_utils.get_corpus_archive_name(cycle)) corpus_archive_src = exp_path.gcs(corpus_archive_dst) corpus_archive_dir = os.path.dirname(corpus_archive_dst) if not os.path.exists(corpus_archive_dir): os.makedirs(corpus_archive_dir) if gsutil.cp(corpus_archive_src, corpus_archive_dst, expect_zero=False, parallel=False, write_to_stdout=False)[0] != 0: snapshot_logger.warning('Corpus not found for cycle: %d.', cycle) return None snapshot_measurer.initialize_measurement_dirs() snapshot_measurer.extract_corpus(corpus_archive_dst) # Don't keep corpus archives around longer than they need to be. os.remove(corpus_archive_dst) # Get the coverage of the new corpus units. snapshot_measurer.run_cov_new_units() all_pcs = snapshot_measurer.merge_new_pcs() snapshot = models.Snapshot(time=this_time, trial_id=trial_num, edges_covered=len(all_pcs)) # Record the new corpus files. snapshot_measurer.update_measured_files() # Archive crashes directory. snapshot_measurer.archive_crashes(cycle) measuring_time = round(time.time() - measuring_start_time, 2) snapshot_logger.info('Measured cycle: %d in %d seconds.', cycle, measuring_time) return snapshot
def test_get_corpus_archive_name(): """Tests that get_corpus_archive_name returns the expected result.""" assert (experiment_utils.get_corpus_archive_name(9) == 'corpus-archive-0009.tar.gz')