def record_stats(self): """Use fuzzer.get_stats if it is offered, validate the stats and then save them to a file so that they will be synced to the filestore.""" # TODO(metzman): Make this more resilient so we don't wait forever and # so that breakages in stats parsing doesn't break runner. fuzzer_module = get_fuzzer_module(self.fuzzer) fuzzer_module_get_stats = getattr(fuzzer_module, 'get_stats', None) if fuzzer_module_get_stats is None: # Stats support is optional. return try: output_corpus = environment.get('OUTPUT_CORPUS_DIR') stats_json_str = fuzzer_module_get_stats(output_corpus, self.log_file) except Exception: # pylint: disable=broad-except logs.error('Call to %d failed.', fuzzer_module_get_stats) return try: fuzzer_stats.validate_fuzzer_stats(stats_json_str) except (ValueError, json.decoder.JSONDecodeError): logs.error('Stats are invalid.') return stats_filename = experiment_utils.get_stats_filename(self.cycle) stats_path = os.path.join(self.results_dir, stats_filename) with open(stats_path, 'w') as stats_file_handle: stats_file_handle.write(stats_json_str)
def _clean_seed_corpus(seed_corpus_dir): """Prepares |seed_corpus_dir| for the trial. This ensures that it can be used by AFL which is picky about the seed corpus. Moves seed corpus files from sub-directories into the corpus directory root. Also, deletes any files that exceed the 1 MB limit. If the NO_SEEDS env var is specified than the seed corpus files are deleted.""" if not os.path.exists(seed_corpus_dir): return if environment.get('NO_SEEDS'): logs.info('NO_SEEDS specified, deleting seed corpus files.') shutil.rmtree(seed_corpus_dir) os.mkdir(seed_corpus_dir) return failed_to_move_files = [] for root, _, files in os.walk(seed_corpus_dir): for filename in files: file_path = os.path.join(root, filename) if os.path.getsize(file_path) > CORPUS_ELEMENT_BYTES_LIMIT: os.remove(file_path) logs.warning('Removed seed file %s as it exceeds 1 Mb limit.', file_path) continue sha1sum = utils.file_hash(file_path) new_file_path = os.path.join(seed_corpus_dir, sha1sum) try: shutil.move(file_path, new_file_path) except OSError: failed_to_move_files.append((file_path, new_file_path)) if failed_to_move_files: logs.error('Failed to move seed corpus files: %s', failed_to_move_files)
def copy_oss_fuzz_files(project, commit_date, benchmark_dir): """Checkout the right files from OSS-Fuzz to build the benchmark based on |project| and |commit_date|. Then copy them to |benchmark_dir|.""" if not os.path.exists(os.path.join(OSS_FUZZ_DIR, '.git')): logs.error( '%s is not a git repo. Try running git submodule update --init', OSS_FUZZ_DIR) raise RuntimeError('%s is not a git repo.' % OSS_FUZZ_DIR) oss_fuzz_repo_manager = GitRepoManager(OSS_FUZZ_DIR) projects_dir = os.path.join(OSS_FUZZ_DIR, 'projects', project) try: # Find an OSS-Fuzz commit that can be used to build the benchmark. _, oss_fuzz_commit, _ = oss_fuzz_repo_manager.git([ 'log', '--before=' + commit_date.isoformat(), '-n1', '--format=%H', projects_dir ]) oss_fuzz_commit = oss_fuzz_commit.strip() if not oss_fuzz_commit: logs.warning('No suitable earlier OSS-Fuzz commit found.') return False oss_fuzz_repo_manager.git(['checkout', oss_fuzz_commit, projects_dir]) dir_util.copy_tree(projects_dir, benchmark_dir) os.remove(os.path.join(benchmark_dir, 'project.yaml')) return True finally: oss_fuzz_repo_manager.git(['reset', '--hard'])
def _mirror_output(process: subprocess.Popen, output_files: List) -> str: """Mirror output from |process|'s stdout to |output_files| and return the output.""" lines = [] out_queue, thread = _start_enqueue_thread(process) while True: # See if we can get a line from the queue. try: # TODO(metzman): Handle cases where the process does not have utf-8 # encoded output. line = out_queue.get_nowait().decode('utf-8', errors='ignore') except queue.Empty: if not thread.is_alive(): break continue if not line: if not thread.is_alive(): break continue # If we did get a line, add it to our list and write it to the # output_files. lines.append(line) for output_file in output_files[:]: try: output_file.write(line) output_file.flush() except ValueError: logs.error('Could not write to output_file: %s.', output_file) output_files.remove(output_file) thread.join() return ''.join(lines)
def _filter_fuzzers_with_few_samples( benchmark_snapshot_df, threshold=_DEFAULT_FUZZER_SAMPLE_NUM_THRESHOLD): """Takes a benchmark snapshot and filters out fuzzers that have a sample size that is smaller than 80% of the largest sample size. This is especially useful to use after finding a good snapshot time somewhere at the end of the experiment where most of the trials are still running. If a fuzzer have much fewer trials at that snapshot time then the others it's better to drop it entirely. Returns the filtered benchmark snapshot. """ samples_per_fuzzer = benchmark_snapshot_df.fuzzer.value_counts() max_samples = samples_per_fuzzer.max() criteria = samples_per_fuzzer > threshold * max_samples ok_fuzzers = samples_per_fuzzer[criteria].index # Log the list of bad fuzzers. bad_fuzzers = sorted( set(benchmark_snapshot_df.fuzzer.unique().tolist()) - set(ok_fuzzers.tolist())) if bad_fuzzers: benchmark_name = benchmark_snapshot_df.benchmark.unique()[0] logs.error( 'Filtered bad fuzzers from {benchmark_name}: {bad_fuzzers}'.format( bad_fuzzers=bad_fuzzers, benchmark_name=benchmark_name)) return benchmark_snapshot_df[benchmark_snapshot_df.fuzzer.isin(ok_fuzzers)]
def retry_build_loop(build_func: Callable, inputs: List[Tuple], num_concurrent_builds: int) -> List: """Calls |build_func| in parallel on |inputs|. Repeat on failures up to |NUM_BUILD_RETRIES| times. Returns the list of inputs that |build_func| was called successfully on.""" successes = [] logs.info('Concurrent builds: %d.', num_concurrent_builds) with mp_pool.ThreadPool(num_concurrent_builds) as pool: for _ in range(NUM_BUILD_RETRIES): logs.info('Building using (%s): %s', build_func, inputs) results = pool.starmap(build_func, inputs) curr_successes, curr_failures = split_successes_and_failures( inputs, results) logs.info('Build successes: %s', curr_successes) successes.extend(curr_successes) if not curr_failures: break logs.error('Build failures: %s', curr_failures) inputs = curr_failures sleep_interval = random.uniform(1, BUILD_FAIL_WAIT) logs.info('Sleeping for %d secs before retrying.', sleep_interval) time.sleep(sleep_interval) return successes
def pytype(paths: List[Path]) -> bool: """Run pytype on |path| if it is a python file. Return False if it fails type checking.""" # Pytype isn't supported on Python3.8+. See # https://github.com/google/pytype/issues/440. assert sys.version_info.major == 3, "Need Python3." if sys.version_info.minor > 7: logs.error( 'Python version is: "%s". You should be using 3.7. ' 'Not running pytype.', sys.version) return True paths = [path for path in paths if is_python(path)] if not paths: return True base_command = ['python3', '-m', 'pytype'] success = True # TODO(metzman): Change this to the parallel pytype when the path issue is # solved. for path in paths: command = base_command[:] command.append(path) returncode = subprocess.run(command, check=False).returncode if returncode != 0: success = False return success
def execute( # pylint: disable=too-many-locals,too-many-branches command: List[str], *args, expect_zero: bool = True, timeout: int = None, write_to_stdout=False, # If not set, will default to PIPE. output_file=None, # Not True by default because we can't always set group on processes. kill_children: bool = False, **kwargs) -> ProcessResult: """Execute |command| and return the returncode and the output""" if write_to_stdout: # Don't set stdout, it's default value None, causes it to be set to # stdout. assert output_file is None elif not output_file: output_file = subprocess.PIPE kwargs['stdout'] = output_file kwargs['stderr'] = subprocess.STDOUT if kill_children: kwargs['preexec_fn'] = os.setsid process = subprocess.Popen(command, *args, **kwargs) process_group_id = os.getpgid(process.pid) wrapped_process = WrappedPopen(process) if timeout is not None: kill_thread = _start_kill_thread(wrapped_process, kill_children, timeout) output, _ = process.communicate() if timeout is not None: kill_thread.cancel() elif kill_children: # elif because the kill_thread will kill children if needed. _kill_process_group(process_group_id) retcode = process.returncode command_log_str = ' '.join(command)[:LOG_LIMIT_FIELD] log_message = 'Executed command: "%s" returned: %d.' if output is not None: output = output.decode('utf-8', errors='ignore') output_for_log = output[-LOG_LIMIT_FIELD:] log_extras = {'output': output_for_log} else: log_extras = None if expect_zero and retcode != 0 and not wrapped_process.timed_out: logs.error(log_message, command_log_str, retcode, extras=log_extras) raise subprocess.CalledProcessError(retcode, command) logs.debug(log_message, command_log_str, retcode, extras=log_extras) return ProcessResult(retcode, output, wrapped_process.timed_out)
def main(): """Run an experiment in the cloud.""" logs.initialize() parser = argparse.ArgumentParser( description='Begin an experiment that evaluates fuzzers on one or ' 'more benchmarks.') all_benchmarks = benchmark_utils.get_all_benchmarks() all_fuzzers = fuzzer_utils.get_fuzzer_names() parser.add_argument('-b', '--benchmarks', help='Benchmark names. All of them by default.', nargs='+', required=False, default=all_benchmarks, choices=all_benchmarks) parser.add_argument('-c', '--experiment-config', help='Path to the experiment configuration yaml file.', required=True) parser.add_argument('-e', '--experiment-name', help='Experiment name.', required=True) fuzzers_group = parser.add_mutually_exclusive_group() fuzzers_group.add_argument('-f', '--fuzzers', help='Fuzzers to use.', nargs='+', required=False, default=None, choices=all_fuzzers) fuzzers_group.add_argument('-cf', '--changed-fuzzers', help=('Use fuzzers that have changed since the ' 'last experiment. The last experiment is ' 'determined by the database your ' 'experiment uses, not necessarily the ' 'fuzzbench service'), action='store_true', required=False) args = parser.parse_args() if args.changed_fuzzers: fuzzers = experiment_changes.get_fuzzers_changed_since_last() if not fuzzers: logs.error('No fuzzers changed since last experiment. Exiting.') return 1 else: fuzzers = args.fuzzers or all_fuzzers start_experiment(args.experiment_name, args.experiment_config, args.benchmarks, fuzzers) return 0
def execute( # pylint: disable=too-many-locals,too-many-branches command: List[str], *args, expect_zero: bool = True, output_files=None, timeout: int = None, write_to_stdout: bool = True, # Not True by default because we can't always set group on processes. kill_children: bool = False, **kwargs) -> ProcessResult: """Execute |command| and return the returncode and the output""" if output_files is None: output_files = [] else: output_files = output_files[:] if write_to_stdout: output_files.append(sys.stdout) if output_files: kwargs['bufsize'] = 1 kwargs['close_fds'] = 'posix' in sys.builtin_module_names kwargs['stdout'] = subprocess.PIPE kwargs['stderr'] = subprocess.STDOUT if kill_children: kwargs['preexec_fn'] = os.setsid process = subprocess.Popen(command, *args, **kwargs) process_group_id = os.getpgid(process.pid) kill_thread = None wrapped_process = WrappedPopen(process) if timeout is not None: kill_thread = _start_kill_thread(wrapped_process, kill_children, timeout) if output_files: output = _mirror_output(process, output_files) else: output, _ = process.communicate() output = output.decode('utf-8', errors='ignore') process.wait() if kill_thread: kill_thread.cancel() elif kill_children: _kill_process_group(process_group_id) retcode = process.returncode log_message = ('Executed command: "{command}" returned: {retcode}.'.format( command=(' '.join(command))[:LOG_LIMIT_FIELD], retcode=retcode)) output_for_log = output[-LOG_LIMIT_FIELD:] log_extras = {'output': output_for_log} if expect_zero and retcode != 0 and not wrapped_process.timed_out: logs.error(log_message, extras=log_extras) raise subprocess.CalledProcessError(retcode, command) logs.debug(log_message, extras=log_extras) return ProcessResult(retcode, output, wrapped_process.timed_out)
def validate(benchmark): """Return True if |benchmark| is a valid fuzzbench fuzzer.""" if VALID_BENCHMARK_REGEX.match(benchmark) is None: logs.error('%s does not conform to %s pattern.', benchmark, VALID_BENCHMARK_REGEX.pattern) return False if benchmark in get_all_benchmarks(): return True logs.error('%s must have a build.sh or oss-fuzz.yaml.', benchmark) return False
def run_fuzzer(max_total_time, log_filename): """Runs the fuzzer using its script. Logs stdout and stderr of the fuzzer script to |log_filename| if provided.""" input_corpus = environment.get('SEED_CORPUS_DIR') output_corpus = environment.get('OUTPUT_CORPUS_DIR') fuzz_target_name = environment.get('FUZZ_TARGET') target_binary = fuzzer_utils.get_fuzz_target_binary( FUZZ_TARGET_DIR, fuzz_target_name) if not target_binary: logs.error('Fuzz target binary not found.') return _unpack_clusterfuzz_seed_corpus(target_binary, input_corpus) _clean_seed_corpus(input_corpus) if max_total_time is None: logs.warning('max_total_time is None. Fuzzing indefinitely.') runner_niceness = environment.get('RUNNER_NICENESS', 0) try: # Because the runner is launched at a higher priority, # set it back to the default(0) for fuzzing processes. command = [ 'nice', '-n', str(0 - runner_niceness), 'python3', '-u', '-c', ('import fuzzer; ' 'fuzzer.fuzz(' "'{input_corpus}', '{output_corpus}', '{target_binary}')").format( input_corpus=shlex.quote(input_corpus), output_corpus=shlex.quote(output_corpus), target_binary=shlex.quote(target_binary)) ] fuzzer_environment = _get_fuzzer_environment() # Write output to stdout if user is fuzzing from command line. # Otherwise, write output to the log file. if environment.get('FUZZ_OUTSIDE_EXPERIMENT'): new_process.execute(command, timeout=max_total_time, write_to_stdout=True, kill_children=True, env=fuzzer_environment) else: with open(log_filename, 'wb') as log_file: new_process.execute(command, timeout=max_total_time, output_file=log_file, kill_children=True, env=fuzzer_environment) except subprocess.CalledProcessError: global fuzzer_errored_out # pylint:disable=invalid-name fuzzer_errored_out = True logs.error('Fuzz process returned nonzero.')
def validate_name(fuzzer): """Return True if |fuzzer| is a valid fuzzbench fuzzer name.""" # Although importing probably allows a subset of what the regex allows, use # the regex anyway to be safe. The regex is enforcing that the fuzzer is a # valid path for GCS or a linux system. if VALID_FUZZER_REGEX.match(fuzzer) is None: logs.error('Fuzzer: %s does not conform to pattern: %s.', fuzzer, VALID_FUZZER_REGEX.pattern) return False return True
def main(): """Creates or gets an already created service account key and saves it to the provided path.""" logs.initialize() try: keyfile = sys.argv[1] get_or_create_key(sys.argv[2], keyfile) logs.info('Saved key to %s.', keyfile) except Exception: # pylint: disable=broad-except logs.error('Failed to get or create key.') return 1 return 0
def validate(fuzzer): """Return True if |fuzzer| is a valid fuzzbench fuzzer.""" if not validate_name(fuzzer): return False # Try importing the fuzzer module. module_name = 'fuzzers.{}.fuzzer'.format(fuzzer) try: importlib.import_module(module_name) return True except Exception as error: # pylint: disable=broad-except logs.error('Encountered "%s" while trying to import %s.', error, module_name) return False
def validate(benchmark): """Return True if |benchmark| is a valid fuzzbench fuzzer.""" if VALID_BENCHMARK_REGEX.match(benchmark) is None: logs.error('%s does not conform to %s pattern.', benchmark, VALID_BENCHMARK_REGEX.pattern) return False benchmark_dir = os.path.join(utils.ROOT_DIR, 'benchmarks', benchmark) build_sh = os.path.join(benchmark_dir, 'build.sh') oss_fuzz_config = os.path.join(benchmark_dir, 'oss-fuzz.yaml') valid = os.path.exists(build_sh) or os.path.exists(oss_fuzz_config) if valid: return True logs.error('%s must have a build.sh or oss-fuzz.yaml.', benchmark) return False
def do_sync(self, final_sync=False): """Save corpus archives and results to GCS.""" try: if not final_sync and self.is_corpus_dir_same(): logs.debug('Cycle: %d unchanged.', self.cycle) filesystem.append(self.unchanged_cycles_path, str(self.cycle)) else: logs.debug('Cycle: %d changed.', self.cycle) self.archive_and_save_corpus() self.save_results() logs.debug('Finished sync.') except Exception: # pylint: disable=broad-except logs.error('Failed to sync cycle: %d.', self.cycle)
def handle_retry(num_try, exception=None): """Handle retry.""" if (exception is None or isinstance(exception, exception_type)) and num_try < tries: logs.info('Retrying on %s failed with %s. Retrying again.', function_with_type, sys.exc_info()[1]) sleep(get_delay(num_try, delay, backoff)) return True logs.error('Retrying on %s failed with %s. Raise.', function_with_type, sys.exc_info()[1]) return False
def run_requested_experiment(dry_run): """Run the oldest requested experiment that hasn't been run yet in experiment-requests.yaml.""" requested_experiments = _get_requested_experiments() # TODO(metzman): Look into supporting benchmarks as an optional parameter so # that people can add fuzzers that don't support everything. if PAUSE_SERVICE_KEYWORD in requested_experiments: # Check if automated experiment service is paused. logs.warning('Pause service requested, not running experiment.') return None requested_experiment = None for experiment_config in reversed(requested_experiments): experiment_name = _get_experiment_name(experiment_config) with db_utils.session_scope() as session: is_new_experiment = session.query(models.Experiment).filter( models.Experiment.name == experiment_name).first() is None if is_new_experiment: requested_experiment = experiment_config break if requested_experiment is None: logs.info('No new experiment to run. Exiting.') return None experiment_name = _get_experiment_name(requested_experiment) if not validate_experiment_requests([requested_experiment]): logs.error('Requested experiment: %s in %s is not valid.', requested_experiment, REQUESTED_EXPERIMENTS_PATH) return None fuzzers = requested_experiment['fuzzers'] benchmark_type = requested_experiment.get('type') if benchmark_type == benchmark_utils.BenchmarkType.BUG.value: benchmarks = benchmark_utils.get_bug_benchmarks() else: benchmarks = (benchmark_utils.get_oss_fuzz_coverage_benchmarks() + benchmark_utils.get_standard_coverage_benchmarks()) logs.info('Running experiment: %s with fuzzers: %s.', experiment_name, ' '.join(fuzzers)) description = _get_description(requested_experiment) oss_fuzz_corpus = _use_oss_fuzz_corpus(requested_experiment) return _run_experiment(experiment_name, fuzzers, benchmarks, description, oss_fuzz_corpus, dry_run)
def run_fuzzer(max_total_time, log_filename): """Runs the fuzzer using its script. Logs stdout and stderr of the fuzzer script to |log_filename| if provided.""" input_corpus = environment.get('SEED_CORPUS_DIR') output_corpus = environment.get('OUTPUT_CORPUS_DIR') fuzz_target_name = environment.get('FUZZ_TARGET') target_binary = fuzzer_utils.get_fuzz_target_binary( FUZZ_TARGET_DIR, fuzz_target_name) if not target_binary: logs.error('Fuzz target binary not found.') return _unpack_clusterfuzz_seed_corpus(target_binary, input_corpus) _clean_seed_corpus(input_corpus) if max_total_time is None: logs.warning('max_total_time is None. Fuzzing indefinitely.') runner_niceness = environment.get('RUNNER_NICENESS', 0) try: with open(log_filename, 'w') as log_file: # Because the runner is launched at a higher priority, # set it back to the default(0) for fuzzing processes. new_process.execute([ 'nice', '-n', str(0 - runner_niceness), 'python3', '-u', '-c', ('import fuzzer; ' 'fuzzer.fuzz(' "'{input_corpus}', '{output_corpus}', '{target_binary}')" ).format(input_corpus=shlex.quote(input_corpus), output_corpus=shlex.quote(output_corpus), target_binary=shlex.quote(target_binary)) ], timeout=max_total_time, output_files=[log_file], kill_children=True, env=_get_fuzzer_environment()) except subprocess.CalledProcessError: logs.error('Fuzz process returned nonzero.')
def main(): """Do the experiment and report results.""" logs.initialize(default_extras={ 'component': 'dispatcher', }) try: dispatcher_main() except Exception as error: logs.error('Error conducting experiment.') raise error if experiment_utils.is_local_experiment(): return 0 experiment_config_file_path = _get_config_file_path() if stop_experiment.stop_experiment(experiment_utils.get_experiment_name(), experiment_config_file_path): return 0 return 1