def copy_resources_to_bucket(config_dir: str, config: Dict): """Copy resources the dispatcher will need for the experiment to the experiment_filestore.""" def filter_file(tar_info): """Filter out unnecessary directories.""" if FILTER_SOURCE_REGEX.match(tar_info.name): return None return tar_info # Set environment variables to use corresponding filestore_utils. os.environ['EXPERIMENT_FILESTORE'] = config['experiment_filestore'] os.environ['EXPERIMENT'] = config['experiment'] experiment_filestore_path = experiment_utils.get_experiment_filestore_path() base_destination = os.path.join(experiment_filestore_path, 'input') # Send the local source repository to the cloud for use by dispatcher. # Local changes to any file will propagate. source_archive = 'src.tar.gz' with tarfile.open(source_archive, 'w:gz') as tar: tar.add(utils.ROOT_DIR, arcname='', recursive=True, filter=filter_file) filestore_utils.cp(source_archive, base_destination + '/', parallel=True) os.remove(source_archive) # Send config files. destination = os.path.join(base_destination, 'config') filestore_utils.rsync(config_dir, destination, parallel=True)
def csv_filestore_helper(file_name, df): """Helper method for storing csv files in filestore.""" src = os.path.join(coverage_utils.get_coverage_info_dir(), 'data', file_name) dst = exp_path.filestore(src) df.to_csv(src, index=False, compression='infer') filestore_utils.cp(src, dst)
def test_parallel_take_no_effects_locally(fs, use_local_filestore): # pylint: disable=invalid-name,unused-argument """Tests that `parallel` argument takes no effect for local running no matter True or False.""" fs.create_dir(LOCAL_DIR) fs.create_dir(LOCAL_DIR_2) with mock.patch('common.new_process.execute') as mocked_execute: filestore_utils.rsync(LOCAL_DIR, LOCAL_DIR_2, parallel=True) filestore_utils.rsync(LOCAL_DIR, LOCAL_DIR_2, parallel=False) call_args_list = mocked_execute.call_args_list assert call_args_list[0] == call_args_list[1] with mock.patch('common.new_process.execute') as mocked_execute: filestore_utils.cp(LOCAL_DIR, LOCAL_DIR_2, recursive=True, parallel=True) filestore_utils.cp(LOCAL_DIR, LOCAL_DIR_2, recursive=True, parallel=False) call_args_list = mocked_execute.call_args_list assert call_args_list[0] == call_args_list[1] with mock.patch('common.new_process.execute') as mocked_execute: filestore_utils.rm(LOCAL_DIR, recursive=True, parallel=True) filestore_utils.rm(LOCAL_DIR, recursive=True, parallel=False) call_args_list = mocked_execute.call_args_list assert call_args_list[0] == call_args_list[1]
def copy_unchanged_cycles_file(): unchanged_cycles_filestore_path = exp_path.filestore( self.unchanged_cycles_path) try: filestore_utils.cp(unchanged_cycles_filestore_path, self.unchanged_cycles_path) return True except subprocess.CalledProcessError: return False
def store_build_logs(build_config, build_result): """Save build results in the build logs bucket.""" build_output = ('Command returned {retcode}.\nOutput: {output}'.format( retcode=build_result.retcode, output=build_result.output)) with tempfile.NamedTemporaryFile(mode='w') as tmp: tmp.write(build_output) tmp.flush() build_log_filename = build_config + '.txt' filestore_utils.cp( tmp.name, exp_path.filestore(get_build_logs_dir() / build_log_filename))
def generate_coverage_regions_json(self): """Stores the coverage data in a json file.""" covered_regions = extract_covered_regions_from_summary_json( self.merged_summary_json_file) coverage_json_src = os.path.join(self.data_dir, 'covered_regions.json') coverage_json_dst = exp_path.filestore(coverage_json_src) filesystem.create_directory(self.data_dir) with open(coverage_json_src, 'w') as file_handle: json.dump(covered_regions, file_handle) filestore_utils.cp(coverage_json_src, coverage_json_dst, expect_zero=False)
def save_corpus_archive(self, archive): """Save corpus |archive| to GCS and delete when done.""" if not self.gcs_sync_dir: return basename = os.path.basename(archive) gcs_path = posixpath.join(self.gcs_sync_dir, self.corpus_dir, basename) # Don't use parallel to avoid stability issues. filestore_utils.cp(archive, gcs_path) # Delete corpus archive so disk doesn't fill up. os.remove(archive)
def store_coverage_data(experiment_config: dict): """Generate the specific coverage data and store in cloud bucket.""" logger.info('Start storing coverage data') with multiprocessing.Pool() as pool, multiprocessing.Manager() as manager: q = manager.Queue() # pytype: disable=attribute-error covered_regions = get_all_covered_regions(experiment_config, pool, q) json_src_dir = get_experiment_folders_dir() json_src = os.path.join(json_src_dir, 'covered_regions.json') with open(json_src, 'w') as src_file: json.dump(covered_regions, src_file) json_dst = exp_path.filestore(json_src) filestore_utils.cp(json_src, json_dst) logger.info('Finished storing coverage data')
def save_crash_files(self, cycle): """Save crashes in per-cycle crash archive.""" crashes_archive_name = experiment_utils.get_crashes_archive_name(cycle) archive_path = os.path.join(os.path.dirname(self.crashes_dir), crashes_archive_name) with tarfile.open(archive_path, 'w:gz') as tar: tar.add(self.crashes_dir, arcname=os.path.basename(self.crashes_dir)) trial_crashes_dir = posixpath.join(self.trial_dir, 'crashes') archive_filestore_path = exp_path.filestore( posixpath.join(trial_crashes_dir, crashes_archive_name)) filestore_utils.cp(archive_path, archive_filestore_path) os.remove(archive_path)
def set_up_coverage_binary(benchmark): """Set up coverage binaries for |benchmark|.""" initialize_logs() coverage_binaries_dir = build_utils.get_coverage_binaries_dir() benchmark_coverage_binary_dir = coverage_binaries_dir / benchmark filesystem.create_directory(benchmark_coverage_binary_dir) archive_name = 'coverage-build-%s.tar.gz' % benchmark archive_filestore_path = exp_path.filestore(coverage_binaries_dir / archive_name) filestore_utils.cp(archive_filestore_path, str(benchmark_coverage_binary_dir)) archive_path = benchmark_coverage_binary_dir / archive_name tar = tarfile.open(archive_path, 'r:gz') tar.extractall(benchmark_coverage_binary_dir) os.remove(archive_path)
def get_fuzzer_covered_regions(benchmark_df, benchmark, fuzzer): """Gets the covered regions for |fuzzer| in |benchmark_df| from the json file in the bucket.""" with tempfile.TemporaryDirectory() as temp_dir: dst_file = os.path.join(temp_dir, 'tmp.json') src_filestore_path = get_fuzzer_filestore_path(benchmark_df, fuzzer) src_file = posixpath.join(src_filestore_path, 'coverage', 'data', benchmark, fuzzer, 'covered_regions.json') if filestore_utils.ls(src_file, must_exist=False).retcode: # Error occurred, coverage file does not exit. Bail out. return {} filestore_utils.cp(src_file, dst_file) with open(dst_file) as json_file: return json.load(json_file)
def copy_unchanged_cycles_file(): unchanged_cycles_filestore_path = exp_path.filestore( self.unchanged_cycles_path) result = filestore_utils.cp(unchanged_cycles_filestore_path, self.unchanged_cycles_path, expect_zero=False) return result.retcode == 0
def archive_crashes(self, cycle): """Archive this cycle's crashes into filestore.""" if not os.listdir(self.crashes_dir): logs.info('No crashes found for cycle %d.', cycle) return logs.info('Archiving crashes for cycle %d.', cycle) crashes_archive_name = experiment_utils.get_crashes_archive_name(cycle) archive_path = os.path.join(os.path.dirname(self.crashes_dir), crashes_archive_name) with tarfile.open(archive_path, 'w:gz') as tar: tar.add(self.crashes_dir, arcname=os.path.basename(self.crashes_dir)) archive_filestore_path = exp_path.filestore( posixpath.join(self.trial_dir, 'crashes', crashes_archive_name)) filestore_utils.cp(archive_path, archive_filestore_path) os.remove(archive_path)
def test_using_gsutil(use_gsutil): # pylint: disable=unused-argument """Tests that gsutil is used in Google Cloud running settings.""" with mock.patch('common.new_process.execute') as mocked_execute: filestore_utils.cp(GCS_DIR, GCS_DIR_2, recursive=True) assert 'gsutil' in mocked_execute.call_args_list[0][0][0] with mock.patch('common.new_process.execute') as mocked_execute: filestore_utils.ls(GCS_DIR) assert 'gsutil' in mocked_execute.call_args_list[0][0][0] with mock.patch('common.new_process.execute') as mocked_execute: filestore_utils.rm(GCS_DIR, recursive=True) assert 'gsutil' in mocked_execute.call_args_list[0][0][0] with mock.patch('common.new_process.execute') as mocked_execute: filestore_utils.rsync(GCS_DIR, GCS_DIR_2, recursive=True) assert 'gsutil' in mocked_execute.call_args_list[0][0][0]
def test_keyword_args(use_gsutil): # pylint: disable=unused-argument """Tests that keyword args, and in particular 'parallel' are handled correctly.""" with mock.patch('common.new_process.execute') as mocked_execute: filestore_utils.rm(GCS_DIR_2, recursive=True, parallel=True) mocked_execute.assert_called_with( ['gsutil', '-m', 'rm', '-r', GCS_DIR_2], expect_zero=True) with mock.patch('common.new_process.execute') as mocked_execute: mocked_execute.return_value = new_process.ProcessResult(0, '', '') filestore_utils.ls(GCS_DIR_2) mocked_execute.assert_called_with(['gsutil', 'ls', GCS_DIR_2], expect_zero=True) with mock.patch('common.new_process.execute') as mocked_execute: filestore_utils.cp(GCS_DIR, GCS_DIR_2, parallel=True) mocked_execute.assert_called_with( ['gsutil', '-m', 'cp', GCS_DIR, GCS_DIR_2], expect_zero=True)
def _unpack_clusterfuzz_seed_corpus(fuzz_target_path, corpus_directory): """If a clusterfuzz seed corpus archive is available, unpack it into the corpus directory if it exists. Copied from unpack_seed_corpus in engine_common.py in ClusterFuzz. """ oss_fuzz_corpus = environment.get('OSS_FUZZ_CORPUS') if oss_fuzz_corpus: benchmark = environment.get('BENCHMARK') corpus_archive_filename = f'{benchmark}.zip' oss_fuzz_corpus_archive_path = posixpath.join( experiment_utils.get_oss_fuzz_corpora_filestore_path(), corpus_archive_filename) seed_corpus_archive_path = posixpath.join(FUZZ_TARGET_DIR, corpus_archive_filename) filestore_utils.cp(oss_fuzz_corpus_archive_path, seed_corpus_archive_path) else: seed_corpus_archive_path = get_clusterfuzz_seed_corpus_path( fuzz_target_path) if not seed_corpus_archive_path: return with zipfile.ZipFile(seed_corpus_archive_path) as zip_file: # Unpack seed corpus recursively into the root of the main corpus # directory. idx = 0 for seed_corpus_file in zip_file.infolist(): if seed_corpus_file.filename.endswith('/'): # Ignore directories. continue # Allow callers to opt-out of unpacking large files. if seed_corpus_file.file_size > CORPUS_ELEMENT_BYTES_LIMIT: continue output_filename = '%016d' % idx output_file_path = os.path.join(corpus_directory, output_filename) zip_file.extract(seed_corpus_file, output_file_path) idx += 1 logs.info('Unarchived %d files from seed corpus %s.', idx, seed_corpus_archive_path)
def get_fuzzer_stats(stats_filestore_path): """Reads, validates and returns the stats in |stats_filestore_path|.""" with tempfile.NamedTemporaryFile() as temp_file: result = filestore_utils.cp(stats_filestore_path, temp_file.name, expect_zero=False) if result.retcode != 0: return None stats_str = temp_file.read() fuzzer_stats.validate_fuzzer_stats(stats_str) return json.loads(stats_str)
def test_using_local_filestore(fs, use_local_filestore): # pylint: disable=invalid-name,unused-argument """Tests that local_filestore is used in local running settings.""" fs.create_dir(LOCAL_DIR) fs.create_dir(LOCAL_DIR_2) with mock.patch('common.new_process.execute') as mocked_execute: filestore_utils.cp(LOCAL_DIR, LOCAL_DIR_2, recursive=True) assert 'gsutil' not in mocked_execute.call_args_list[0][0][0] with mock.patch('common.new_process.execute') as mocked_execute: filestore_utils.ls(LOCAL_DIR) assert 'gsutil' not in mocked_execute.call_args_list[0][0][0] with mock.patch('common.new_process.execute') as mocked_execute: filestore_utils.rm(LOCAL_DIR, recursive=True) assert 'gsutil' not in mocked_execute.call_args_list[0][0][0] with mock.patch('common.new_process.execute') as mocked_execute: filestore_utils.rsync(LOCAL_DIR, LOCAL_DIR_2, recursive=True) assert 'gsutil' not in mocked_execute.call_args_list[0][0][0]
def generate_coverage_report(self): """Generates the coverage report and stores in bucket.""" command = [ 'llvm-cov', 'show', '-format=html', '-path-equivalence=/,{prefix}'.format( prefix=self.source_files_dir), '-output-dir={dst_dir}'.format(dst_dir=self.report_dir), '-Xdemangler', 'c++filt', '-Xdemangler', '-n', self.binary_file, '-instr-profile={profdata}'.format( profdata=self.merged_profdata_file) ] result = new_process.execute(command, expect_zero=False) if result.retcode != 0: logger.error('Coverage report generation failed for ' 'fuzzer: {fuzzer},benchmark: {benchmark}.'.format( fuzzer=self.fuzzer, benchmark=self.benchmark)) return src_dir = self.report_dir dst_dir = exp_path.filestore(self.report_dir) filestore_utils.cp(src_dir, dst_dir, recursive=True, parallel=True)
def get_fuzzer_covered_regions(fuzzer: str, benchmark: str, filestore: str): """Returns the covered regions dict for |fuzzer| from the json file in the filestore.""" src_file = get_fuzzer_benchmark_covered_regions_filestore_path( fuzzer, benchmark, filestore) with tempfile.NamedTemporaryFile() as dst_file: if filestore_utils.cp(src_file, dst_file.name, expect_zero=False).retcode: logger.warning('covered_regions.json file: %s could not be copied.', src_file) return {} with open(dst_file.name) as json_file: return json.load(json_file)
def copy_coverage_binaries(benchmark): """Copy coverage binaries in a local experiment.""" shared_coverage_binaries_dir = get_shared_coverage_binaries_dir() mount_arg = '{0}:{0}'.format(shared_coverage_binaries_dir) builder_image_url = benchmark_utils.get_builder_image_url( benchmark, 'coverage', environment.get('CLOUD_PROJECT')) coverage_build_archive = 'coverage-build-{}.tar.gz'.format(benchmark) coverage_build_archive_shared_dir_path = os.path.join( shared_coverage_binaries_dir, coverage_build_archive) command = 'cd /out; tar -czvf {} *'.format( coverage_build_archive_shared_dir_path) new_process.execute([ 'docker', 'run', '-v', mount_arg, builder_image_url, '/bin/bash', '-c', command ]) coverage_binaries_dir = build_utils.get_coverage_binaries_dir() coverage_build_archive_gcs_path = posixpath.join( exp_path.filestore(coverage_binaries_dir), coverage_build_archive) return filestore_utils.cp(coverage_build_archive_shared_dir_path, coverage_build_archive_gcs_path)
def measure_snapshot_coverage(fuzzer: str, benchmark: str, trial_num: int, cycle: int) -> models.Snapshot: """Measure coverage of the snapshot for |cycle| for |trial_num| of |fuzzer| and |benchmark|.""" snapshot_logger = logs.Logger('measurer', default_extras={ 'fuzzer': fuzzer, 'benchmark': benchmark, 'trial_id': str(trial_num), 'cycle': str(cycle), }) snapshot_measurer = SnapshotMeasurer(fuzzer, benchmark, trial_num, snapshot_logger) measuring_start_time = time.time() snapshot_logger.info('Measuring cycle: %d.', cycle) this_time = cycle * experiment_utils.get_snapshot_seconds() if snapshot_measurer.is_cycle_unchanged(cycle): snapshot_logger.info('Cycle: %d is unchanged.', cycle) current_pcs = snapshot_measurer.get_current_pcs() return models.Snapshot(time=this_time, trial_id=trial_num, edges_covered=len(current_pcs)) corpus_archive_dst = os.path.join( snapshot_measurer.trial_dir, 'corpus', experiment_utils.get_corpus_archive_name(cycle)) corpus_archive_src = exp_path.filestore(corpus_archive_dst) corpus_archive_dir = os.path.dirname(corpus_archive_dst) if not os.path.exists(corpus_archive_dir): os.makedirs(corpus_archive_dir) try: filestore_utils.cp(corpus_archive_src, corpus_archive_dst) except subprocess.CalledProcessError: snapshot_logger.warning('Corpus not found for cycle: %d.', cycle) return None snapshot_measurer.initialize_measurement_dirs() snapshot_measurer.extract_corpus(corpus_archive_dst) # Don't keep corpus archives around longer than they need to be. os.remove(corpus_archive_dst) # Get the coverage of the new corpus units. snapshot_measurer.run_cov_new_units() all_pcs = snapshot_measurer.merge_new_pcs() snapshot = models.Snapshot(time=this_time, trial_id=trial_num, edges_covered=len(all_pcs)) # Record the new corpus files. snapshot_measurer.update_measured_files() # Archive crashes directory. snapshot_measurer.archive_crashes(cycle) measuring_time = round(time.time() - measuring_start_time, 2) snapshot_logger.info('Measured cycle: %d in %d seconds.', cycle, measuring_time) return snapshot
def measure_snapshot_coverage(fuzzer: str, benchmark: str, trial_num: int, cycle: int) -> models.Snapshot: """Measure coverage of the snapshot for |cycle| for |trial_num| of |fuzzer| and |benchmark|.""" snapshot_logger = logs.Logger('measurer', default_extras={ 'fuzzer': fuzzer, 'benchmark': benchmark, 'trial_id': str(trial_num), 'cycle': str(cycle), }) snapshot_measurer = SnapshotMeasurer(fuzzer, benchmark, trial_num, snapshot_logger) measuring_start_time = time.time() snapshot_logger.info('Measuring cycle: %d.', cycle) this_time = cycle * experiment_utils.get_snapshot_seconds() if snapshot_measurer.is_cycle_unchanged(cycle): snapshot_logger.info('Cycle: %d is unchanged.', cycle) regions_covered = snapshot_measurer.get_current_coverage() fuzzer_stats_data = snapshot_measurer.get_fuzzer_stats(cycle) return models.Snapshot(time=this_time, trial_id=trial_num, edges_covered=regions_covered, fuzzer_stats=fuzzer_stats_data) corpus_archive_dst = os.path.join( snapshot_measurer.trial_dir, 'corpus', experiment_utils.get_corpus_archive_name(cycle)) corpus_archive_src = exp_path.filestore(corpus_archive_dst) corpus_archive_dir = os.path.dirname(corpus_archive_dst) if not os.path.exists(corpus_archive_dir): os.makedirs(corpus_archive_dir) if filestore_utils.cp(corpus_archive_src, corpus_archive_dst, expect_zero=False).retcode: snapshot_logger.warning('Corpus not found for cycle: %d.', cycle) return None snapshot_measurer.initialize_measurement_dirs() snapshot_measurer.extract_corpus(corpus_archive_dst) # Don't keep corpus archives around longer than they need to be. os.remove(corpus_archive_dst) # Run coverage on the new corpus units. snapshot_measurer.run_cov_new_units() # Generate profdata and transform it into json form. snapshot_measurer.generate_coverage_information(cycle) # Get the coverage of the new corpus units. regions_covered = snapshot_measurer.get_current_coverage() fuzzer_stats_data = snapshot_measurer.get_fuzzer_stats(cycle) snapshot = models.Snapshot(time=this_time, trial_id=trial_num, edges_covered=regions_covered, fuzzer_stats=fuzzer_stats_data) # Record the new corpus files. snapshot_measurer.update_measured_files() # Archive crashes directory. snapshot_measurer.archive_crashes(cycle) measuring_time = round(time.time() - measuring_start_time, 2) snapshot_logger.info('Measured cycle: %d in %f seconds.', cycle, measuring_time) return snapshot
def test_integration_runner(self, mocked_error, tmp_path, environ): """Test that runner can run libFuzzer and saves snapshots to GCS.""" # Switch cwd so that fuzzers don't create tons of files in the repo. os.chdir(tmp_path) # Set env variables that would be set by the Dockerfile. file_directory = pathlib.Path(__file__).parent root_dir = file_directory.parent os.environ['ROOT_DIR'] = str(root_dir) seed_corpus_dir = tmp_path / 'seeds' os.mkdir(seed_corpus_dir) os.environ['SEED_CORPUS_DIR'] = str(seed_corpus_dir) output_corpus_dir = tmp_path / 'corpus' os.mkdir(output_corpus_dir) os.environ['OUTPUT_CORPUS_DIR'] = str(output_corpus_dir) fuzzer = 'libfuzzer' fuzzer_parent_path = root_dir / 'fuzzers' / fuzzer benchmark = 'MultipleConstraintsOnSmallInputTest' test_experiment_bucket = os.environ['TEST_EXPERIMENT_FILESTORE'] experiment = 'integration-test-experiment' gcs_directory = posixpath.join(test_experiment_bucket, experiment, 'experiment-folders', '%s-%s' % (benchmark, fuzzer), 'trial-1') filestore_utils.rm(gcs_directory, force=True) # Add fuzzer directory to make it easy to run fuzzer.py in local # configuration. os.environ['PYTHONPATH'] = ':'.join( [str(root_dir), str(fuzzer_parent_path)]) # Set env variables that would set by the scheduler. os.environ['FUZZER'] = fuzzer os.environ['BENCHMARK'] = benchmark os.environ['EXPERIMENT_FILESTORE'] = test_experiment_bucket os.environ['EXPERIMENT'] = experiment os.environ['TRIAL_ID'] = str(TRIAL_NUM) max_total_time = 10 os.environ['MAX_TOTAL_TIME'] = str(max_total_time) target_binary_path = (file_directory / 'test_data' / 'test_runner' / benchmark) with mock.patch('common.fuzzer_utils.get_fuzz_target_binary', return_value=str(target_binary_path)): with mock.patch('common.experiment_utils.get_snapshot_seconds', return_value=max_total_time / 10): runner.main() gcs_corpus_directory = posixpath.join(gcs_directory, 'corpus') snapshots = filestore_utils.ls(gcs_corpus_directory) assert len(snapshots) >= 2 # Check that the archives are deleted after being copied to GCS. assert not os.path.exists( tmp_path / 'corpus-archives' / 'corpus-archive-0001.tar.gz') local_gcs_corpus_dir_copy = tmp_path / 'gcs_corpus_dir' os.mkdir(local_gcs_corpus_dir_copy) filestore_utils.cp(posixpath.join(gcs_corpus_directory, '*'), str(local_gcs_corpus_dir_copy), recursive=True, parallel=True) archive_size = os.path.getsize(local_gcs_corpus_dir_copy / 'corpus-archive-0001.tar.gz') assert archive_size > 500 assert len(os.listdir(output_corpus_dir)) > 5 mocked_error.assert_not_called()