def build_measurer(benchmark: str) -> bool: """Do a coverage build for a benchmark.""" try: logger.info('Building measurer for benchmark: %s.', benchmark) buildlib.build_coverage(benchmark) docker_name = benchmark_utils.get_docker_name(benchmark) archive_name = 'coverage-build-%s.tar.gz' % docker_name coverage_binaries_dir = build_utils.get_coverage_binaries_dir() benchmark_coverage_binary_dir = coverage_binaries_dir / benchmark os.mkdir(benchmark_coverage_binary_dir) cloud_bucket_archive_path = exp_path.gcs(coverage_binaries_dir / archive_name) gsutil.cp(cloud_bucket_archive_path, str(benchmark_coverage_binary_dir), parallel=False, write_to_stdout=False) archive_path = benchmark_coverage_binary_dir / archive_name tar = tarfile.open(archive_path, 'r:gz') tar.extractall(benchmark_coverage_binary_dir) os.remove(archive_path) logs.info('Done building measurer for benchmark: %s.', benchmark) return True except Exception: # pylint: disable=broad-except logger.error('Failed to build measurer for %s.', benchmark) return False
def gcb_build_benchmark_coverage(benchmark: str) -> Tuple[int, str]: """Build a coverage build of |benchmark| on GCB.""" substitutions = { '_GCS_COVERAGE_BINARIES_DIR': exp_path.gcs(get_coverage_binaries_dir()), '_BENCHMARK': benchmark, } config_file = get_build_config_file('coverage.yaml') config_name = 'benchmark-{benchmark}-coverage'.format(benchmark=benchmark) return gcb_build(config_file, config_name, substitutions)
def store_build_logs(build_config, build_result): """Save build results in the build logs bucket.""" build_output = ('Command returned {retcode}.\nOutput: {output}'.format( retcode=build_result.retcode, output=build_result.output)) with tempfile.NamedTemporaryFile(mode='w') as tmp: tmp.write(build_output) tmp.flush() build_log_filename = build_config + '.txt' gsutil.cp(tmp.name, exp_path.gcs(get_build_logs_dir() / build_log_filename), parallel=False)
def gcb_build_oss_fuzz_project_coverage(benchmark: str) -> Tuple[int, str]: """Build a coverage build of OSS-Fuzz-based benchmark |benchmark| on GCB.""" project = benchmark_utils.get_project(benchmark) oss_fuzz_builder_hash = benchmark_utils.get_oss_fuzz_builder_hash( benchmark) substitutions = { '_GCS_COVERAGE_BINARIES_DIR': exp_path.gcs(get_coverage_binaries_dir()), '_OSS_FUZZ_PROJECT': project, '_OSS_FUZZ_BUILDER_HASH': oss_fuzz_builder_hash, } config_file = get_build_config_file('oss-fuzz-coverage.yaml') config_name = 'oss-fuzz-{project}-coverage-hash-{hash}'.format( project=project, hash=oss_fuzz_builder_hash) return gcb_build(config_file, config_name, substitutions)
def archive_crashes(self, cycle): """Archive this cycle's crashes into cloud bucket.""" if not os.listdir(self.crashes_dir): logs.info('No crashes found for cycle %d.', cycle) return logs.info('Archiving crashes for cycle %d.', cycle) crashes_archive_name = experiment_utils.get_crashes_archive_name(cycle) archive = os.path.join(os.path.dirname(self.crashes_dir), crashes_archive_name) with tarfile.open(archive, 'w:gz') as tar: tar.add(self.crashes_dir, arcname=os.path.basename(self.crashes_dir)) gcs_path = exp_path.gcs( posixpath.join(self.trial_dir, 'crashes', crashes_archive_name)) gsutil.cp(archive, gcs_path) os.remove(archive)
def set_up_coverage_binary(benchmark): """Set up coverage binaries for |benchmark|.""" initialize_logs() coverage_binaries_dir = build_utils.get_coverage_binaries_dir() benchmark_coverage_binary_dir = coverage_binaries_dir / benchmark if not os.path.exists(benchmark_coverage_binary_dir): os.mkdir(benchmark_coverage_binary_dir) archive_name = 'coverage-build-%s.tar.gz' % benchmark cloud_bucket_archive_path = exp_path.gcs(coverage_binaries_dir / archive_name) gsutil.cp(cloud_bucket_archive_path, str(benchmark_coverage_binary_dir), write_to_stdout=False) archive_path = benchmark_coverage_binary_dir / archive_name tar = tarfile.open(archive_path, 'r:gz') tar.extractall(benchmark_coverage_binary_dir) os.remove(archive_path)
def copy_coverage_binaries(benchmark): """Copy coverage binaries in a local experiment.""" shared_coverage_binaries_dir = get_shared_coverage_binaries_dir() mount_arg = '{0}:{0}'.format(shared_coverage_binaries_dir) builder_image_url = benchmark_utils.get_builder_image_url( benchmark, 'coverage', environment.get('CLOUD_PROJECT')) coverage_build_archive = 'coverage-build-{}.tar.gz'.format(benchmark) coverage_build_archive_shared_dir_path = os.path.join( shared_coverage_binaries_dir, coverage_build_archive) command = 'cd /out; tar -czvf {} *'.format( coverage_build_archive_shared_dir_path) new_process.execute([ 'docker', 'run', '-v', mount_arg, builder_image_url, '/bin/bash', '-c', command ]) coverage_binaries_dir = build_utils.get_coverage_binaries_dir() coverage_build_archive_gcs_path = posixpath.join( exp_path.gcs(coverage_binaries_dir), coverage_build_archive) return gsutil.cp(coverage_build_archive_shared_dir_path, coverage_build_archive_gcs_path)
def measure_all_trials(experiment: str, max_total_time: int, pool, q) -> bool: # pylint: disable=invalid-name """Get coverage data (with coverage runs) for all active trials. Note that this should not be called unless multiprocessing.set_start_method('spawn') was called first. Otherwise it will use fork which breaks logging.""" logger.info('Measuring all trials.') experiment_folders_dir = get_experiment_folders_dir() if not remote_dir_exists(experiment_folders_dir): return True try: gsutil.rsync(exp_path.gcs(experiment_folders_dir), str(experiment_folders_dir)) except subprocess.CalledProcessError: logger.error('Rsyncing experiment folders failed.') return True max_cycle = _time_to_cycle(max_total_time) unmeasured_snapshots = get_unmeasured_snapshots(experiment, max_cycle) if not unmeasured_snapshots: return False measure_trial_coverage_args = [ (unmeasured_snapshot, max_cycle, q) for unmeasured_snapshot in unmeasured_snapshots ] result = pool.starmap_async(measure_trial_coverage, measure_trial_coverage_args) # Poll the queue for snapshots and save them in batches until the pool is # done processing each unmeasured snapshot. Then save any remaining # snapshots. snapshots = [] snapshots_measured = False def save_snapshots(): """Saves measured snapshots if there were any, resets |snapshots| to an empty list and records the fact that snapshots have been measured.""" if not snapshots: return db_utils.bulk_save(snapshots) snapshots.clear() nonlocal snapshots_measured snapshots_measured = True while True: try: snapshot = q.get(timeout=SNAPSHOT_QUEUE_GET_TIMEOUT) snapshots.append(snapshot) except queue.Empty: if result.ready(): # If "ready" that means pool has finished calling on each # unmeasured_snapshot. Since it is finished and the queue is # empty, we can stop checking the queue for more snapshots. break if len(snapshots) >= SNAPSHOTS_BATCH_SAVE_SIZE * .75: # Save a smaller batch size if we can make an educated guess # that we will have to wait for the next snapshot. save_snapshots() continue if len(snapshots) >= SNAPSHOTS_BATCH_SAVE_SIZE and not result.ready(): save_snapshots() # If we have any snapshots left save them now. save_snapshots() return snapshots_measured
def remote_dir_exists(directory: pathlib.Path) -> bool: """Does |directory| exist in the CLOUD_EXPERIMENT_BUCKET.""" return gsutil.ls(exp_path.gcs(directory), must_exist=False)[0] == 0
def measure_snapshot_coverage(fuzzer: str, benchmark: str, trial_num: int, cycle: int) -> models.Snapshot: """Measure coverage of the snapshot for |cycle| for |trial_num| of |fuzzer| and |benchmark|.""" snapshot_logger = logs.Logger('measurer', default_extras={ 'fuzzer': fuzzer, 'benchmark': benchmark, 'trial_id': str(trial_num), 'cycle': str(cycle), }) snapshot_measurer = SnapshotMeasurer(fuzzer, benchmark, trial_num, snapshot_logger) measuring_start_time = time.time() snapshot_logger.info('Measuring cycle: %d.', cycle) this_time = cycle * experiment_utils.get_snapshot_seconds() if snapshot_measurer.is_cycle_unchanged(cycle): snapshot_logger.info('Cycle: %d is unchanged.', cycle) current_pcs = snapshot_measurer.get_current_pcs() return models.Snapshot(time=this_time, trial_id=trial_num, edges_covered=len(current_pcs)) corpus_archive_dst = os.path.join( snapshot_measurer.trial_dir, 'corpus', experiment_utils.get_corpus_archive_name(cycle)) corpus_archive_src = exp_path.gcs(corpus_archive_dst) corpus_archive_dir = os.path.dirname(corpus_archive_dst) if not os.path.exists(corpus_archive_dir): os.makedirs(corpus_archive_dir) if gsutil.cp(corpus_archive_src, corpus_archive_dst, expect_zero=False, parallel=False, write_to_stdout=False)[0] != 0: snapshot_logger.warning('Corpus not found for cycle: %d.', cycle) return None snapshot_measurer.initialize_measurement_dirs() snapshot_measurer.extract_corpus(corpus_archive_dst) # Don't keep corpus archives around longer than they need to be. os.remove(corpus_archive_dst) # Get the coverage of the new corpus units. snapshot_measurer.run_cov_new_units() all_pcs = snapshot_measurer.merge_new_pcs() snapshot = models.Snapshot(time=this_time, trial_id=trial_num, edges_covered=len(all_pcs)) # Record the new corpus files. snapshot_measurer.update_measured_files() # Archive crashes directory. snapshot_measurer.archive_crashes(cycle) measuring_time = round(time.time() - measuring_start_time, 2) snapshot_logger.info('Measured cycle: %d in %d seconds.', cycle, measuring_time) return snapshot
def copy_unchanged_cycles_file(): result = gsutil.cp(exp_path.gcs(self.unchanged_cycles_path), self.unchanged_cycles_path) return result.retcode == 0