def get_fuzzer_or_engine_name(fuzzer_name): """Return fuzzing engine name if it exists, or |fuzzer_name|.""" fuzz_target = data_handler.get_fuzz_target(fuzzer_name) if fuzz_target: return fuzz_target.engine return fuzzer_name
def __init__(self, fuzzer_name, testcase_path, test_timeout, gestures, needs_http=False): self._testcase_path = testcase_path self._test_timeout = test_timeout self._gestures = gestures self._needs_http = needs_http fuzz_target = data_handler.get_fuzz_target(fuzzer_name) if fuzz_target: engine_impl = engine.get(fuzz_target.engine) else: engine_impl = None # TODO(ochang): Make this hard fail once migration to new fuzzing pipeline # is complete. if fuzz_target and engine_impl: self._is_black_box = False self._engine_impl = engine_impl # Read target_name + args from flags file. arguments = get_additional_command_line_flags(testcase_path) arguments = data_handler.filter_arguments(arguments, fuzz_target.binary) self._arguments = arguments.split() self._fuzz_target = fuzz_target else: self._is_black_box = True self._command = get_command_line_for_application( testcase_path, needs_http=needs_http)
def _store_testcase_for_regression_testing(testcase, testcase_file_path): """Stores reproduction testcase for future regression testing in corpus pruning task.""" if testcase.open: # Store testcase only after the crash is fixed. return if not testcase.bug_information: # Only store crashes with bugs associated with them. return fuzz_target = data_handler.get_fuzz_target(testcase.overridden_fuzzer_name) if not fuzz_target: # No work to do, only applicable for engine fuzzers. return corpus = corpus_manager.FuzzTargetCorpus( fuzz_target.engine, fuzz_target.project_qualified_name()) regression_testcase_url = os.path.join( corpus.get_regressions_corpus_gcs_url(), utils.file_hash(testcase_file_path)) if storage.copy_file_to(testcase_file_path, regression_testcase_url): logs.log('Successfully stored testcase for regression testing: ' + regression_testcase_url) else: logs.log_error('Failed to store testcase for regression testing: ' + regression_testcase_url)
def test_for_reproducibility(fuzzer_name, full_fuzzer_name, testcase_path, expected_state, expected_security_flag, test_timeout, http_flag, gestures, arguments=None): """Test to see if a crash is fully reproducible or is a one-time crasher.""" try: fuzz_target = data_handler.get_fuzz_target(full_fuzzer_name) if engine.get(fuzzer_name) and not fuzz_target: raise TargetNotFoundError runner = TestcaseRunner(fuzz_target, testcase_path, test_timeout, gestures, http_flag, arguments=arguments) crash_retries = environment.get_value('CRASH_RETRIES') return runner.test_reproduce_reliability(crash_retries, expected_state, expected_security_flag) except TargetNotFoundError: # If a target isn't found, treat it as not crashing. return False
def _get_fuzzer_or_engine(name): """Return fuzzer entity, or engine this target is part of.""" fuzz_target = data_handler.get_fuzz_target(name) if fuzz_target: name = fuzz_target.engine return data_types.Fuzzer.query(data_types.Fuzzer.name == name).get()
def execute_task(fuzzer_name_and_revision, job_type): """Execute corpus pruning task.""" # TODO(ochang): Remove this once remaining jobs in queue are all processed. if '@' in fuzzer_name_and_revision: full_fuzzer_name, revision = fuzzer_name_and_revision.split('@') revision = revisions.convert_revision_to_integer(revision) else: full_fuzzer_name = fuzzer_name_and_revision revision = 0 fuzz_target = data_handler.get_fuzz_target(full_fuzzer_name) task_name = 'corpus_pruning_%s_%s' % (full_fuzzer_name, job_type) # Get status of last execution. last_execution_metadata = data_handler.get_task_status(task_name) last_execution_failed = (last_execution_metadata and last_execution_metadata.status == data_types.TaskState.ERROR) # Make sure we're the only instance running for the given fuzzer and # job_type. if not data_handler.update_task_status(task_name, data_types.TaskState.STARTED): logs.log('A previous corpus pruning task is still running, exiting.') return # Setup fuzzer and data bundle. if not setup.update_fuzzer_and_data_bundles(fuzz_target.engine): raise CorpusPruningException('Failed to set up fuzzer %s.' % fuzz_target.engine) use_minijail = environment.get_value('USE_MINIJAIL') # TODO(unassigned): Use coverage information for better selection here. cross_pollinate_fuzzers = _get_cross_pollinate_fuzzers( fuzz_target.engine, full_fuzzer_name) context = Context(fuzz_target, cross_pollinate_fuzzers, use_minijail) # Copy global blacklist into local suppressions file if LSan is enabled. is_lsan_enabled = environment.get_value('LSAN') if is_lsan_enabled: # TODO(ochang): Copy this to untrusted worker. leak_blacklist.copy_global_to_local_blacklist() try: result = do_corpus_pruning(context, last_execution_failed, revision) _save_coverage_information(context, result) _process_corpus_crashes(context, result) except CorpusPruningException as e: logs.log_error('Corpus pruning failed: %s.' % str(e)) data_handler.update_task_status(task_name, data_types.TaskState.ERROR) return finally: context.cleanup() data_handler.update_task_status(task_name, data_types.TaskState.FINISHED)
def find_fuzz_target(engine, target_name, job_name): """Find a fuzz target given the engine, target name (which may or may not be prefixed with project), and job.""" project_name = data_handler.get_project_name(job_name) candidate_name = data_types.fuzz_target_fully_qualified_name( engine, project_name, target_name) target = data_handler.get_fuzz_target(candidate_name) if target: return target.fully_qualified_name(), target.binary return None, None
def execute_task(full_fuzzer_name, job_type): """Execute corpus pruning task.""" fuzz_target = data_handler.get_fuzz_target(full_fuzzer_name) task_name = 'corpus_pruning_%s_%s' % (full_fuzzer_name, job_type) revision = 0 # Trunk revision # Get status of last execution. last_execution_metadata = data_handler.get_task_status(task_name) last_execution_failed = (last_execution_metadata and last_execution_metadata.status == data_types.TaskState.ERROR) # Make sure we're the only instance running for the given fuzzer and # job_type. if not data_handler.update_task_status(task_name, data_types.TaskState.STARTED): logs.log('A previous corpus pruning task is still running, exiting.') return # Setup fuzzer and data bundle. if not setup.update_fuzzer_and_data_bundles(fuzz_target.engine): raise CorpusPruningException('Failed to set up fuzzer %s.' % fuzz_target.engine) cross_pollination_method, tag = choose_cross_pollination_strategy( full_fuzzer_name) # TODO(unassigned): Use coverage information for better selection here. cross_pollinate_fuzzers = _get_cross_pollinate_fuzzers( fuzz_target.engine, full_fuzzer_name, cross_pollination_method, tag) context = Context(fuzz_target, cross_pollinate_fuzzers, cross_pollination_method, tag) # Copy global blacklist into local suppressions file if LSan is enabled. is_lsan_enabled = environment.get_value('LSAN') if is_lsan_enabled: # TODO(ochang): Copy this to untrusted worker. leak_blacklist.copy_global_to_local_blacklist() try: result = do_corpus_pruning(context, last_execution_failed, revision) _record_cross_pollination_stats(result.cross_pollination_stats) _save_coverage_information(context, result) _process_corpus_crashes(context, result) except Exception: logs.log_error('Corpus pruning failed.') data_handler.update_task_status(task_name, data_types.TaskState.ERROR) return finally: context.cleanup() data_handler.update_task_status(task_name, data_types.TaskState.FINISHED)
def find_fuzz_target(engine, target_name, job_name): """Return fuzz target values given the engine, target name (which may or may not be prefixed with project), and job.""" project_name = data_handler.get_project_name(job_name) candidate_name = data_types.fuzz_target_fully_qualified_name( engine, project_name, target_name) target = data_handler.get_fuzz_target(candidate_name) if not target: raise helpers.EarlyExitException('Fuzz target does not exist.', 400) return target.fully_qualified_name(), target.binary
def get_coverage_info(self, fuzzer, date=None): """Return coverage info of child fuzzers.""" if fuzzer in data_types.BUILTIN_FUZZERS: # Get coverage info for a job (i.e. a project). job = self.single_job_or_none() project = data_handler.get_project_name(job) return get_coverage_info(project, date) fuzz_target = data_handler.get_fuzz_target(fuzzer) if fuzz_target: fuzzer = fuzz_target.project_qualified_name() return get_coverage_info(fuzzer, date)
def _prepare_testcase_dict(testcase): """Prepare a dictionary containing all information needed by the tool.""" # By calling _to_dict directly here we prevent the need to modify this as # the testcase and other models changes over time. # pylint: disable=protected-access testcase_dict = testcase._to_dict() fuzz_target = data_handler.get_fuzz_target(testcase.actual_fuzzer_name()) if fuzz_target: fuzz_target_dict = fuzz_target._to_dict() else: fuzz_target_dict = None # pylint: enable=protected-access # Several nonstandard bits of information are required for the tool to run. # Append these to the test case dict and serialize them as well. job = data_types.Job.query(data_types.Job.name == testcase.job_type).get() testcase_dict['job_definition'] = job.get_environment_string() testcase_dict['serialized_fuzz_target'] = fuzz_target_dict return testcase_dict
def test_get_libfuzzer_flags(self): """Test get_libfuzzer_flags logic.""" fuzz_target = data_handler.get_fuzz_target('libFuzzer_test_fuzzer') context = corpus_pruning_task.Context( fuzz_target, [], corpus_pruning_task.Pollination.RANDOM, None) runner = corpus_pruning_task.Runner(self.build_dir, context) flags = runner.get_libfuzzer_flags() expected_default_flags = [ '-timeout=5', '-rss_limit_mb=2560', '-max_len=5242880', '-detect_leaks=1', '-use_value_profile=1' ] six.assertCountEqual(self, flags, expected_default_flags) runner.fuzzer_options = options.FuzzerOptions( os.path.join(self.build_dir, 'test_get_libfuzzer_flags.options')) flags = runner.get_libfuzzer_flags() expected_custom_flags = [ '-timeout=5', '-rss_limit_mb=2560', '-max_len=1337', '-detect_leaks=0', '-use_value_profile=1' ] six.assertCountEqual(self, flags, expected_custom_flags)
def execute_task(full_fuzzer_name, job_type): """Execute ML RNN training task. The task is training RNN model by default. If more models are developed, arguments can be modified to specify which model to use. Args: fuzzer_name: Name of fuzzer, e.g. libpng_read_fuzzer. job_type: Job type, e.g. libfuzzer_chrome_asan. """ del job_type # Sets up fuzzer binary build. fuzz_target = data_handler.get_fuzz_target(full_fuzzer_name) if not fuzz_target: logs.log_warn( f'Fuzzer not found: {full_fuzzer_name}, skip RNN training.') return fuzzer_name = fuzz_target.project_qualified_name() # Directory to place training files, such as logs, models, corpus. # Use |FUZZ_INPUTS_DISK| since it is not size constrained. temp_directory = environment.get_value('FUZZ_INPUTS_DISK') # Get corpus. corpus_directory = get_corpus_directory(temp_directory, fuzzer_name) shell.remove_directory(corpus_directory, recreate=True) logs.log('Downloading corpus backup for %s.' % fuzzer_name) if not ml_train_utils.get_corpus(corpus_directory, fuzzer_name): logs.log_error('Failed to download corpus backup for %s.' % fuzzer_name) return # Get the directory to save models. model_directory = get_model_files_directory(temp_directory, fuzzer_name) shell.remove_directory(model_directory, recreate=True) # Get the directory to save training logs. log_directory = get_model_log_directory(temp_directory, fuzzer_name) shell.remove_directory(log_directory, recreate=True) result = train_rnn(corpus_directory, model_directory, log_directory) # Training process exited abnormally but not caused by timeout, meaning # error occurred during execution. if result.return_code and not result.timed_out: if result.return_code == constants.ExitCode.CORPUS_TOO_SMALL: logs.log_warn( 'ML RNN training task for fuzzer %s aborted due to small corpus.' % fuzzer_name) else: logs.log_error( 'ML RNN training task for fuzzer %s failed with ExitCode = %d.' % (fuzzer_name, result.return_code), output=utils.decode_to_unicode(result.output)) return # Timing out may be caused by large training corpus, but intermediate models # are frequently saved and can be uploaded. if result.timed_out: logs.log_warn('ML RNN training task for %s timed out.' % fuzzer_name) upload_model_to_gcs(model_directory, fuzzer_name)