def get_preempted_trials(self) -> List[models.Trial]: """Returns a list of trials that were preempted.""" if not self.experiment_config.get('preemptible_runners'): # No preempted trials in a nonpreemptible experiment. assert not self.preempted_trials return [] started_instances = self._get_started_unfinished_instances() query_time = datetime_now() preempted_instances = list(self._query_preempted_instances()) trials = [] for instance in preempted_instances: trial = started_instances.get(instance) if trial is None: # Preemption for this trial was probably handled already. logs.warning('Instance: %s is preempted but is not running.', instance) continue if trial.id in self.preempted_trials: # We already know this instance was preempted. continue self.preempted_trials[trial.id] = trial trials.append(trial) # Update this now when we know that we have succeded processing the # query. It's far worse if we update the query too early than if we # don't update the query at this point (which will only result in # redundant work. self.last_preemptible_query = query_time # Return all preempted instances, those we knew from before hand and # those we discovered in the query. return trials
def get_fuzzers_changed_since_last(): """Returns a list of fuzzers that have changed since the last experiment stored in the database that has a commit that is in the current branch.""" # TODO(metzman): Figure out a way of skipping experiments that were stopped # early. # Loop over experiments since some may have hashes that are not in the # current branch. with db_utils.session_scope() as session: experiments = list( session.query(models.Experiment).order_by( models.Experiment.time_created.desc())) if not experiments: raise Exception('No experiments found. Cannot find changed fuzzers.') changed_files = None for experiment in experiments: try: changed_files = diff_utils.get_changed_files(experiment.git_hash) break except diff_utils.DiffError: logs.warning('Skipping %s. Commit is not in branch.', experiment.git_hash) if changed_files is None: raise Exception('No in-branch experiments. ' 'Cannot find changed fuzzers.') return change_utils.get_changed_fuzzers(changed_files)
def can_start_nonpreemptible(self, nonpreemptible_starts: int, trials_to_run: int) -> bool: """Returns True if we can start a nonpreemptible trial.""" if not self.experiment_config.get('preemptible_runners'): # This code shouldn't be executed in a preemptible experiment. # But just in case it is, it's not always OK to a non-preemptible # trial in a non-preemptible experiment. return True if self.preemptible_starts_futile: return False if nonpreemptible_starts >= self.max_nonpreemptibles: # Don't exceed the maximum number of nonpreemptibles. return False if (trials_to_run * self.MAX_FRACTION_FOR_NONPREEMPTIBLES > self.max_nonpreemptibles): # When we have trials left that can't be run on preemptibles, don't # naively allow nonpreemptible creation until we hit the limit. # Instead if we can't create enough nonpreemptibles to replace at # least 1/4 of the remaining trials, don't create nonpreemptibles at # all, the experiment can't be salvaged cheaply. # TODO(metzman): This policy can be bypassed if instances are # preempted one at a time. Fix this or get rid of the policy. self.preemptible_starts_futile = True logs.warning('Futile to replace preempted with nonpreemptibles.') return False # Supplement with nonpreemptibles if the experiment results are not so # messed up that doing so won't make the result useable. return True
def copy_oss_fuzz_files(project, commit_date, benchmark_dir): """Checkout the right files from OSS-Fuzz to build the benchmark based on |project| and |commit_date|. Then copy them to |benchmark_dir|.""" if not os.path.exists(os.path.join(OSS_FUZZ_DIR, '.git')): logs.error( '%s is not a git repo. Try running git submodule update --init', OSS_FUZZ_DIR) raise RuntimeError('%s is not a git repo.' % OSS_FUZZ_DIR) oss_fuzz_repo_manager = GitRepoManager(OSS_FUZZ_DIR) projects_dir = os.path.join(OSS_FUZZ_DIR, 'projects', project) try: # Find an OSS-Fuzz commit that can be used to build the benchmark. _, oss_fuzz_commit, _ = oss_fuzz_repo_manager.git([ 'log', '--before=' + commit_date.isoformat(), '-n1', '--format=%H', projects_dir ]) oss_fuzz_commit = oss_fuzz_commit.strip() if not oss_fuzz_commit: logs.warning('No suitable earlier OSS-Fuzz commit found.') return False oss_fuzz_repo_manager.git(['checkout', oss_fuzz_commit, projects_dir]) dir_util.copy_tree(projects_dir, benchmark_dir) os.remove(os.path.join(benchmark_dir, 'project.yaml')) return True finally: oss_fuzz_repo_manager.git(['reset', '--hard'])
def _clean_seed_corpus(seed_corpus_dir): """Moves seed corpus files from sub-directories into the corpus directory root. Also, deletes any files that exceed the 1 MB limit.""" if not os.path.exists(seed_corpus_dir): return failed_to_move_files = [] for root, _, files in os.walk(seed_corpus_dir): for filename in files: file_path = os.path.join(root, filename) if os.path.getsize(file_path) > CORPUS_ELEMENT_BYTES_LIMIT: os.remove(file_path) logs.warning('Removed seed file %s as it exceeds 1 Mb limit.', file_path) continue sha1sum = utils.file_hash(file_path) new_file_path = os.path.join(seed_corpus_dir, sha1sum) try: shutil.move(file_path, new_file_path) except OSError: failed_to_move_files.append((file_path, new_file_path)) if failed_to_move_files: logs.error('Failed to move seed corpus files: %s', failed_to_move_files)
def _clean_seed_corpus(seed_corpus_dir): """Prepares |seed_corpus_dir| for the trial. This ensures that it can be used by AFL which is picky about the seed corpus. Moves seed corpus files from sub-directories into the corpus directory root. Also, deletes any files that exceed the 1 MB limit. If the NO_SEEDS env var is specified than the seed corpus files are deleted.""" if not os.path.exists(seed_corpus_dir): return if environment.get('NO_SEEDS'): logs.info('NO_SEEDS specified, deleting seed corpus files.') shutil.rmtree(seed_corpus_dir) os.mkdir(seed_corpus_dir) return failed_to_move_files = [] for root, _, files in os.walk(seed_corpus_dir): for filename in files: file_path = os.path.join(root, filename) if os.path.getsize(file_path) > CORPUS_ELEMENT_BYTES_LIMIT: os.remove(file_path) logs.warning('Removed seed file %s as it exceeds 1 Mb limit.', file_path) continue sha1sum = utils.file_hash(file_path) new_file_path = os.path.join(seed_corpus_dir, sha1sum) try: shutil.move(file_path, new_file_path) except OSError: failed_to_move_files.append((file_path, new_file_path)) if failed_to_move_files: logs.error('Failed to move seed corpus files: %s', failed_to_move_files)
def _load_base_builder_docker_repo(): """Gets base-image digests. Returns the docker rep.""" gcloud_path = spawn.find_executable('gcloud') if not gcloud_path: logs.warning('gcloud not found in PATH.') return None _, result, _ = new_process.execute([ gcloud_path, 'container', 'images', 'list-tags', 'gcr.io/oss-fuzz-base/base-builder', '--format=json', '--sort-by=timestamp', ]) result = json.loads(result) repo = BaseBuilderDockerRepo() for image in result: timestamp = datetime.datetime.fromisoformat( image['timestamp']['datetime']).astimezone(datetime.timezone.utc) repo.add_digest(timestamp, image['digest']) return repo
def output_report(experiment_config: dict, in_progress=False): """Generate the HTML report and write it to |web_bucket|.""" experiment_name = experiment_utils.get_experiment_name() web_filestore_path = posixpath.join(experiment_config['report_filestore'], experiment_name) reports_dir = get_reports_dir() # Don't merge with nonprivate experiments until the very end as doing it # while the experiment is in progress will produce unusable realtime # results. merge_with_nonprivate = (not in_progress and experiment_config.get( 'merge_with_nonprivate', False)) try: logger.debug('Generating report.') filesystem.recreate_directory(reports_dir) generate_report.generate_report( [experiment_name], str(reports_dir), in_progress=in_progress, merge_with_clobber_nonprivate=merge_with_nonprivate) filestore_utils.rsync(str(reports_dir), web_filestore_path, gsutil_options=[ '-h', 'Cache-Control:public,max-age=0,no-transform' ]) logger.debug('Done generating report.') except data_utils.EmptyDataError: logs.warning('No snapshot data.') except Exception: # pylint: disable=broad-except logger.error('Error generating HTML report.')
def run_fuzzer(max_total_time, log_filename): """Runs the fuzzer using its script. Logs stdout and stderr of the fuzzer script to |log_filename| if provided.""" input_corpus = environment.get('SEED_CORPUS_DIR') output_corpus = environment.get('OUTPUT_CORPUS_DIR') fuzz_target_name = environment.get('FUZZ_TARGET') target_binary = fuzzer_utils.get_fuzz_target_binary( FUZZ_TARGET_DIR, fuzz_target_name) if not target_binary: logs.error('Fuzz target binary not found.') return _unpack_clusterfuzz_seed_corpus(target_binary, input_corpus) _clean_seed_corpus(input_corpus) if max_total_time is None: logs.warning('max_total_time is None. Fuzzing indefinitely.') runner_niceness = environment.get('RUNNER_NICENESS', 0) try: # Because the runner is launched at a higher priority, # set it back to the default(0) for fuzzing processes. command = [ 'nice', '-n', str(0 - runner_niceness), 'python3', '-u', '-c', ('import fuzzer; ' 'fuzzer.fuzz(' "'{input_corpus}', '{output_corpus}', '{target_binary}')").format( input_corpus=shlex.quote(input_corpus), output_corpus=shlex.quote(output_corpus), target_binary=shlex.quote(target_binary)) ] fuzzer_environment = _get_fuzzer_environment() # Write output to stdout if user is fuzzing from command line. # Otherwise, write output to the log file. if environment.get('FUZZ_OUTSIDE_EXPERIMENT'): new_process.execute(command, timeout=max_total_time, write_to_stdout=True, kill_children=True, env=fuzzer_environment) else: with open(log_filename, 'wb') as log_file: new_process.execute(command, timeout=max_total_time, output_file=log_file, kill_children=True, env=fuzzer_environment) except subprocess.CalledProcessError: global fuzzer_errored_out # pylint:disable=invalid-name fuzzer_errored_out = True logs.error('Fuzz process returned nonzero.')
def validate_benchmarks(benchmarks: List[str]): """Parses and validates list of benchmarks.""" benchmark_directories = get_directories(BENCHMARKS_DIR) if not os.path.exists(OSS_FUZZ_PROJECTS_DIR): logs.warning('OSS-Fuzz repository is not checked out.' 'skipping OSS-Fuzz benchmarks.') for benchmark in benchmarks: if benchmark not in benchmark_directories: raise Exception('Benchmark "%s" does not exist.' % benchmark) for benchmark in set(benchmarks): if benchmarks.count(benchmark) > 1: raise Exception('Benchmark "%s" is included more than once.' % benchmark)
def output_report(experiment_config: dict, in_progress=False, coverage_report=False): """Generate the HTML report and write it to |web_bucket|.""" experiment_name = experiment_utils.get_experiment_name() reports_dir = get_reports_dir() core_fuzzers = set(get_core_fuzzers()) experiment_fuzzers = set(experiment_config['fuzzers']) fuzzers = experiment_fuzzers.union(core_fuzzers) # Calculate path to store report files in filestore. web_filestore_path = experiment_config['report_filestore'] if not fuzzers.issubset(core_fuzzers): # This means that we are running an experimental report with fuzzers # not in the core list. So, store these in |experimental| sub-directory. web_filestore_path = os.path.join(web_filestore_path, 'experimental') web_filestore_path = posixpath.join(web_filestore_path, experiment_name) # Don't merge with nonprivate experiments until the very end as doing it # while the experiment is in progress will produce unusable realtime # results. merge_with_nonprivate = (not in_progress and experiment_config.get( 'merge_with_nonprivate', False)) try: logger.debug('Generating report.') filesystem.recreate_directory(reports_dir) generate_report.generate_report( [experiment_name], str(reports_dir), report_name=experiment_name, fuzzers=fuzzers, in_progress=in_progress, merge_with_clobber_nonprivate=merge_with_nonprivate, coverage_report=coverage_report) filestore_utils.rsync( str(reports_dir), web_filestore_path, delete=False, # Don't remove existing coverage jsons. gsutil_options=[ '-h', 'Cache-Control:public,max-age=0,no-transform' ]) logger.debug('Done generating report.') except data_utils.EmptyDataError: logs.warning('No snapshot data.') except Exception: # pylint: disable=broad-except logger.error('Error generating HTML report.')
def run_requested_experiment(dry_run): """Run the oldest requested experiment that hasn't been run yet in experiment-requests.yaml.""" requested_experiments = _get_requested_experiments() # TODO(metzman): Look into supporting benchmarks as an optional parameter so # that people can add fuzzers that don't support everything. if PAUSE_SERVICE_KEYWORD in requested_experiments: # Check if automated experiment service is paused. logs.warning('Pause service requested, not running experiment.') return None requested_experiment = None for experiment_config in reversed(requested_experiments): experiment_name = _get_experiment_name(experiment_config) with db_utils.session_scope() as session: is_new_experiment = session.query(models.Experiment).filter( models.Experiment.name == experiment_name).first() is None if is_new_experiment: requested_experiment = experiment_config break if requested_experiment is None: logs.info('No new experiment to run. Exiting.') return None experiment_name = _get_experiment_name(requested_experiment) if not validate_experiment_requests([requested_experiment]): logs.error('Requested experiment: %s in %s is not valid.', requested_experiment, REQUESTED_EXPERIMENTS_PATH) return None fuzzers = requested_experiment['fuzzers'] benchmark_type = requested_experiment.get('type') if benchmark_type == benchmark_utils.BenchmarkType.BUG.value: benchmarks = benchmark_utils.get_bug_benchmarks() else: benchmarks = (benchmark_utils.get_oss_fuzz_coverage_benchmarks() + benchmark_utils.get_standard_coverage_benchmarks()) logs.info('Running experiment: %s with fuzzers: %s.', experiment_name, ' '.join(fuzzers)) description = _get_description(requested_experiment) oss_fuzz_corpus = _use_oss_fuzz_corpus(requested_experiment) return _run_experiment(experiment_name, fuzzers, benchmarks, description, oss_fuzz_corpus, dry_run)
def sleep_until_next_sync(self): """Sleep until it is time to do the next sync.""" if self.last_sync_time is not None: next_sync_time = (self.last_sync_time + experiment_utils.SNAPSHOT_PERIOD) sleep_time = next_sync_time - time.time() if sleep_time < 0: # Log error if a sync has taken longer than SNAPSHOT_PERIOD and # messed up our time synchronization. logs.warning('Sleep time on cycle %d is %d', self.cycle, sleep_time) sleep_time = 0 else: sleep_time = experiment_utils.SNAPSHOT_PERIOD logs.debug('Sleeping for %d seconds.', sleep_time) time.sleep(sleep_time) # last_sync_time is recorded before the sync so that each sync happens # roughly SNAPSHOT_PERIOD after each other. self.last_sync_time = time.time()
def run_fuzzer(max_total_time, log_filename): """Runs the fuzzer using its script. Logs stdout and stderr of the fuzzer script to |log_filename| if provided.""" input_corpus = environment.get('SEED_CORPUS_DIR') output_corpus = environment.get('OUTPUT_CORPUS_DIR') fuzz_target_name = environment.get('FUZZ_TARGET') target_binary = fuzzer_utils.get_fuzz_target_binary( FUZZ_TARGET_DIR, fuzz_target_name) if not target_binary: logs.error('Fuzz target binary not found.') return _unpack_clusterfuzz_seed_corpus(target_binary, input_corpus) _clean_seed_corpus(input_corpus) if max_total_time is None: logs.warning('max_total_time is None. Fuzzing indefinitely.') runner_niceness = environment.get('RUNNER_NICENESS', 0) try: with open(log_filename, 'w') as log_file: # Because the runner is launched at a higher priority, # set it back to the default(0) for fuzzing processes. new_process.execute([ 'nice', '-n', str(0 - runner_niceness), 'python3', '-u', '-c', ('import fuzzer; ' 'fuzzer.fuzz(' "'{input_corpus}', '{output_corpus}', '{target_binary}')" ).format(input_corpus=shlex.quote(input_corpus), output_corpus=shlex.quote(output_corpus), target_binary=shlex.quote(target_binary)) ], timeout=max_total_time, output_files=[log_file], kill_children=True, env=_get_fuzzer_environment()) except subprocess.CalledProcessError: logs.error('Fuzz process returned nonzero.')
def output_report(web_bucket, in_progress=False): """Generate the HTML report and write it to |web_bucket|.""" experiment_name = experiment_utils.get_experiment_name() reports_dir = get_reports_dir() try: logger.debug('Generating report.') filesystem.recreate_directory(reports_dir) generate_report.generate_report([experiment_name], str(reports_dir), in_progress=in_progress) filestore_utils.rsync(str(reports_dir), web_bucket, gsutil_options=[ '-h', 'Cache-Control:public,max-age=0,no-transform' ]) logger.debug('Done generating report.') except data_utils.EmptyDataError: logs.warning('No snapshot data.') except Exception: # pylint: disable=broad-except logger.error('Error generating HTML report.')
def run_requested_experiment(dry_run): """Run the oldest requested experiment that hasn't been run yet in experiment-requests.yaml.""" requested_experiments = _get_requested_experiments() # TODO(metzman): Look into supporting benchmarks as an optional parameter so # that people can add fuzzers that don't support everything. if PAUSE_SERVICE_KEYWORD in requested_experiments: # Check if automated experiment service is paused. logs.warning('Pause service requested, not running experiment.') return None requested_experiment = None for experiment_config in reversed(requested_experiments): experiment_name = _get_experiment_name(experiment_config) is_new_experiment = db_utils.query(models.Experiment).filter( models.Experiment.name == experiment_name).first() is None if is_new_experiment: requested_experiment = experiment_config break if requested_experiment is None: logs.info('No new experiment to run. Exiting.') return None experiment_name = _get_experiment_name(requested_experiment) if not validate_experiment_requests([requested_experiment]): logs.error('Requested experiment: %s in %s is not valid.', requested_experiment, REQUESTED_EXPERIMENTS_PATH) return None fuzzers = requested_experiment['fuzzers'] logs.info('Running experiment: %s with fuzzers: %s.', experiment_name, ' '.join(fuzzers)) fuzzer_configs = fuzzer_utils.get_fuzzer_configs(fuzzers=fuzzers) return _run_experiment(experiment_name, fuzzer_configs, dry_run)