def setUp(self): helpers.patch_environ(self) self.today = datetime.datetime.utcnow().date() self.today_minus_2 = self.today - datetime.timedelta(days=2) job_info = data_types.Job(name='job1', environment_string='PROJECT_NAME = xyz_name') job_info.put() cov_info = data_types.CoverageInformation(fuzzer='xyz_name', date=self.today_minus_2) cov_info.html_report_url = 'https://report_for_xyz/20161019/index.html' cov_info.put() cov_info = data_types.CoverageInformation(fuzzer='xyz_name', date=self.today) cov_info.html_report_url = 'https://report_for_xyz/20161021/index.html' cov_info.put()
def get_coverage_information(fuzzer_name, date, create_if_needed=False): """Get coverage information, or create if it doesn't exist.""" coverage_info = ndb.Key( data_types.CoverageInformation, data_types.coverage_information_key(fuzzer_name, date)).get() if not coverage_info and create_if_needed: coverage_info = data_types.CoverageInformation( fuzzer=fuzzer_name, date=date) return coverage_info
def do_corpus_pruning(context, last_execution_failed, revision): """Do corpus pruning on untrusted worker.""" cross_pollinate_fuzzers = [ untrusted_runner_pb2.CrossPollinateFuzzer( fuzz_target=_fuzz_target_to_proto(cpf.fuzz_target), backup_bucket_name=cpf.backup_bucket_name, corpus_engine_name=cpf.corpus_engine_name, ) for cpf in context.cross_pollinate_fuzzers ] request = untrusted_runner_pb2.PruneCorpusRequest( fuzz_target=_fuzz_target_to_proto(context.fuzz_target), cross_pollinate_fuzzers=cross_pollinate_fuzzers, last_execution_failed=last_execution_failed, revision=revision) response = host.stub().PruneCorpus(request) project_qualified_name = context.fuzz_target.project_qualified_name() today_date = datetime.datetime.utcnow().date() coverage_info = data_types.CoverageInformation( fuzzer=project_qualified_name, date=today_date) # Intentionally skip edge and function coverage values as those would come # from fuzzer coverage cron task (see src/go/server/cron/coverage.go). coverage_info.corpus_size_units = response.coverage_info.corpus_size_units coverage_info.corpus_size_bytes = response.coverage_info.corpus_size_bytes coverage_info.corpus_location = response.coverage_info.corpus_location coverage_info.corpus_backup_location = ( response.coverage_info.corpus_backup_location) coverage_info.quarantine_size_units = ( response.coverage_info.quarantine_size_units) coverage_info.quarantine_size_bytes = ( response.coverage_info.quarantine_size_bytes) coverage_info.quarantine_location = response.coverage_info.quarantine_location crashes = [ corpus_pruning_task.CorpusCrash( crash_state=crash.crash_state, crash_type=crash.crash_type, crash_address=crash.crash_address, crash_stacktrace=utils.decode_to_unicode(crash.crash_stacktrace), unit_path=crash.unit_path, security_flag=crash.security_flag, ) for crash in response.crashes ] return corpus_pruning_task.CorpusPruningResult( coverage_info=coverage_info, crashes=crashes, fuzzer_binary_name=response.fuzzer_binary_name, revision=response.revision)
def do_corpus_pruning(context, last_execution_failed, revision): """Run corpus pruning.""" # Set |FUZZ_TARGET| environment variable to help with unarchiving only fuzz # target and its related files. environment.set_value("FUZZ_TARGET", context.fuzz_target.binary) if environment.is_trusted_host(): from bot.untrusted_runner import tasks_host return tasks_host.do_corpus_pruning(context, last_execution_failed, revision) build_manager.setup_build(revision=revision) build_directory = environment.get_value("BUILD_DIR") if not build_directory: raise CorpusPruningException("Failed to setup build.") start_time = datetime.datetime.utcnow() runner = Runner(build_directory, context) pruner = CorpusPruner(runner) fuzzer_binary_name = os.path.basename(runner.target_path) # If our last execution failed, shrink to a randomized corpus of usable size # to prevent corpus from growing unbounded and recurring failures when trying # to minimize it. if last_execution_failed: for corpus_url in [ context.corpus.get_gcs_url(), context.quarantine_corpus.get_gcs_url(), ]: _limit_corpus_size(corpus_url, CORPUS_SIZE_LIMIT_FOR_FAILURES) # Get initial corpus to process from GCS. context.sync_to_disk() initial_corpus_size = shell.get_directory_file_count( context.initial_corpus_path) # Restore a small batch of quarantined units back to corpus. context.restore_quarantined_units() # Shrink to a minimized corpus using corpus merge. pruner.run( context.initial_corpus_path, context.minimized_corpus_path, context.bad_units_path, ) # Sync minimized corpus back to GCS. context.sync_to_gcs() # Create corpus backup. backup_bucket = environment.get_value("BACKUP_BUCKET") corpus_backup_url = corpus_manager.backup_corpus( backup_bucket, context.corpus, context.minimized_corpus_path) minimized_corpus_size_units = shell.get_directory_file_count( context.minimized_corpus_path) minimized_corpus_size_bytes = shell.get_directory_size( context.minimized_corpus_path) logs.log("Corpus pruned from %d to %d units." % (initial_corpus_size, minimized_corpus_size_units)) # Process bad units found during merge. # Mapping of crash state -> CorpusCrash crashes = {} pruner.process_bad_units(context.bad_units_path, context.quarantine_corpus_path, crashes) context.quarantine_corpus.rsync_from_disk(context.quarantine_corpus_path) # Store corpus stats into CoverageInformation entity. project_qualified_name = context.fuzz_target.project_qualified_name() today = datetime.datetime.utcnow().date() coverage_info = data_types.CoverageInformation( fuzzer=project_qualified_name, date=today) quarantine_corpus_size = shell.get_directory_file_count( context.quarantine_corpus_path) quarantine_corpus_dir_size = shell.get_directory_size( context.quarantine_corpus_path) # Populate coverage stats. coverage_info.corpus_size_units = minimized_corpus_size_units coverage_info.corpus_size_bytes = minimized_corpus_size_bytes coverage_info.quarantine_size_units = quarantine_corpus_size coverage_info.quarantine_size_bytes = quarantine_corpus_dir_size coverage_info.corpus_backup_location = corpus_backup_url coverage_info.corpus_location = context.corpus.get_gcs_url() coverage_info.quarantine_location = context.quarantine_corpus.get_gcs_url() # Calculate remaining time to use for shared corpus merging. time_remaining = _get_time_remaining(start_time) if time_remaining <= 0: logs.log_warn("Not enough time for shared corpus merging.") return None cross_pollinator = CrossPollinator(runner) cross_pollinator.run(time_remaining) context.sync_to_gcs() # Update corpus size stats. minimized_corpus_size_units = shell.get_directory_file_count( context.minimized_corpus_path) minimized_corpus_size_bytes = shell.get_directory_size( context.minimized_corpus_path) coverage_info.corpus_size_units = minimized_corpus_size_units coverage_info.corpus_size_bytes = minimized_corpus_size_bytes logs.log("Finished.") result = CorpusPruningResult( coverage_info=coverage_info, crashes=list(crashes.values()), fuzzer_binary_name=fuzzer_binary_name, revision=environment.get_value("APP_REVISION"), ) return result
def setUp(self): test_helpers.patch_environ(self) self.maxDiff = None # pylint: disable=invalid-name data_types.Fuzzer( name='testFuzzer', stats_columns=('sum(t.blah) as blah, custom(j.new_crashes) ' 'as new_crashes, _EDGE_COV as edge_coverage, ' '_FUNC_COV as func_coverage, ' '_CORPUS_SIZE as corpus_size, ' '_CORPUS_BACKUP as corpus_backup, ' '_QUARANTINE_SIZE as quarantine_size, ' '_COV_REPORT as coverage_report, ' '_FUZZER_RUN_LOGS as fuzzer_logs,' '_PERFORMANCE_REPORT as performance_report'), stats_column_descriptions=( 'blah: "blah description"\n' 'func_coverage: "func coverage description"\n')).put() data_types.Fuzzer( name='testFuzzer2', stats_columns=('sum(t.blah) as blah, custom(j.new_crashes) ' 'as new_crashes, _EDGE_COV as edge_coverage, ' '_FUNC_COV as func_coverage, ' '_CORPUS_SIZE as corpus_size, ' '_CORPUS_BACKUP as corpus_backup, ' '_QUARANTINE_SIZE as quarantine_size, ' '_COV_REPORT as coverage_report, ' '_FUZZER_RUN_LOGS as fuzzer_logs,' '_PERFORMANCE_REPORT as performance_report'), stats_column_descriptions=( 'blah: "blah description"\n' 'func_coverage: "func coverage description"\n')).put() data_types.Job( name='job', environment_string='FUZZ_LOGS_BUCKET = bucket').put() now = datetime.datetime.utcnow() data_types.FuzzTarget( engine='testFuzzer', project='test-project', binary='1_fuzzer').put() data_types.FuzzTargetJob( fuzz_target_name='testFuzzer_1_fuzzer', job='job', last_run=now).put() data_types.FuzzTarget( engine='testFuzzer', project='test-project', binary='2_fuzzer').put() data_types.FuzzTargetJob( fuzz_target_name='testFuzzer_2_fuzzer', job='job', last_run=now).put() data_types.FuzzTarget( engine='testFuzzer', project='test-project', binary='3_fuzzer').put() data_types.FuzzTargetJob( fuzz_target_name='testFuzzer_3_fuzzer', job='job', last_run=now).put() data_types.FuzzTarget( engine='testFuzzer2', project='test-project', binary='1_fuzzer').put() data_types.FuzzTargetJob( fuzz_target_name='testFuzzer2_1_fuzzer', job='job', last_run=now).put() cov_info = data_types.CoverageInformation( fuzzer='2_fuzzer', date=datetime.date(2016, 10, 19)) cov_info.edges_covered = 11 cov_info.edges_total = 30 cov_info.functions_covered = 10 cov_info.functions_total = 15 cov_info.html_report_url = 'https://report_for_2_fuzzer/20161019' cov_info.corpus_size_units = 20 cov_info.corpus_size_bytes = 200 cov_info.quarantine_size_units = 0 cov_info.quarantine_size_bytes = 0 cov_info.corpus_location = 'gs://corpus' cov_info.corpus_backup_location = 'gs://corpus-backup/file.zip' cov_info.quarantine_location = 'gs://quarantine' cov_info.put() cov_info = data_types.CoverageInformation( fuzzer='2_fuzzer', date=datetime.date(2016, 10, 21)) cov_info.edges_covered = 15 cov_info.edges_total = 30 cov_info.functions_covered = 11 cov_info.functions_total = 15 cov_info.html_report_url = 'https://report_for_2_fuzzer/20161021' cov_info.corpus_size_units = 40 cov_info.corpus_size_bytes = 400 cov_info.quarantine_size_units = 8 cov_info.quarantine_size_bytes = 80 cov_info.corpus_location = 'gs://corpus' cov_info.corpus_backup_location = 'gs://corpus-backup/file.zip' cov_info.quarantine_location = 'gs://quarantine' cov_info.put() cov_info = data_types.CoverageInformation( fuzzer='1_fuzzer', date=datetime.date(2016, 10, 20)) cov_info.edges_covered = 17 cov_info.edges_total = 38 cov_info.functions_covered = 12 cov_info.functions_total = 19 cov_info.html_report_url = 'https://report_for_1_fuzzer/20161020' cov_info.corpus_size_units = 47 cov_info.corpus_size_bytes = 480 cov_info.quarantine_size_units = 3 cov_info.quarantine_size_bytes = 8 cov_info.corpus_location = 'gs://corpus' cov_info.corpus_backup_location = 'gs://corpus-backup/file.zip' cov_info.quarantine_location = 'gs://quarantine' cov_info.put() self.client = mock.Mock(spec_set=big_query.Client) test_helpers.patch(self, [ 'google_cloud_utils.big_query.Client', ]) self.mock.Client.return_value = self.client
def do_corpus_pruning(context, last_execution_failed, revision): """Run corpus pruning.""" # Set |FUZZ_TARGET| environment variable to help with unarchiving only fuzz # target and its related files. environment.set_value('FUZZ_TARGET', context.fuzz_target.binary) if environment.is_trusted_host(): from bot.untrusted_runner import tasks_host return tasks_host.do_corpus_pruning(context, last_execution_failed, revision) if not build_manager.setup_build(revision=revision): raise CorpusPruningException('Failed to setup build.') build_directory = environment.get_value('BUILD_DIR') start_time = datetime.datetime.utcnow() runner = Runner(build_directory, context) pruner = CorpusPruner(runner) fuzzer_binary_name = os.path.basename(runner.target_path) # If our last execution failed, shrink to a randomized corpus of usable size # to prevent corpus from growing unbounded and recurring failures when trying # to minimize it. if last_execution_failed: for corpus_url in [ context.corpus.get_gcs_url(), context.quarantine_corpus.get_gcs_url() ]: _limit_corpus_size(corpus_url) # Get initial corpus to process from GCS. context.sync_to_disk() initial_corpus_size = shell.get_directory_file_count( context.initial_corpus_path) # Restore a small batch of quarantined units back to corpus. context.restore_quarantined_units() # Shrink to a minimized corpus using corpus merge. pruner_stats = pruner.run(context.initial_corpus_path, context.minimized_corpus_path, context.bad_units_path) # Sync minimized corpus back to GCS. context.sync_to_gcs() # Create corpus backup. # Temporarily copy the past crash regressions folder into the minimized corpus # so that corpus backup archive can have both. regressions_input_dir = os.path.join(context.initial_corpus_path, 'regressions') regressions_output_dir = os.path.join(context.minimized_corpus_path, 'regressions') if shell.get_directory_file_count(regressions_input_dir): shutil.copytree(regressions_input_dir, regressions_output_dir) backup_bucket = environment.get_value('BACKUP_BUCKET') corpus_backup_url = corpus_manager.backup_corpus( backup_bucket, context.corpus, context.minimized_corpus_path) shell.remove_directory(regressions_output_dir) minimized_corpus_size_units = shell.get_directory_file_count( context.minimized_corpus_path) minimized_corpus_size_bytes = shell.get_directory_size( context.minimized_corpus_path) logs.log('Corpus pruned from %d to %d units.' % (initial_corpus_size, minimized_corpus_size_units)) # Process bad units found during merge. # Mapping of crash state -> CorpusCrash crashes = {} pruner.process_bad_units(context.bad_units_path, context.quarantine_corpus_path, crashes) context.quarantine_corpus.rsync_from_disk(context.quarantine_corpus_path) # Store corpus stats into CoverageInformation entity. project_qualified_name = context.fuzz_target.project_qualified_name() today = datetime.datetime.utcnow().date() coverage_info = data_types.CoverageInformation( fuzzer=project_qualified_name, date=today) quarantine_corpus_size = shell.get_directory_file_count( context.quarantine_corpus_path) quarantine_corpus_dir_size = shell.get_directory_size( context.quarantine_corpus_path) # Save the minimize corpus size before cross pollination to put in BigQuery. pre_pollination_corpus_size = minimized_corpus_size_units # Populate coverage stats. coverage_info.corpus_size_units = minimized_corpus_size_units coverage_info.corpus_size_bytes = minimized_corpus_size_bytes coverage_info.quarantine_size_units = quarantine_corpus_size coverage_info.quarantine_size_bytes = quarantine_corpus_dir_size coverage_info.corpus_backup_location = corpus_backup_url coverage_info.corpus_location = context.corpus.get_gcs_url() coverage_info.quarantine_location = context.quarantine_corpus.get_gcs_url() # Calculate remaining time to use for shared corpus merging. time_remaining = _get_time_remaining(start_time) if time_remaining <= 0: logs.log_warn('Not enough time for shared corpus merging.') return None cross_pollinator = CrossPollinator(runner) pollinator_stats = cross_pollinator.run(time_remaining) context.sync_to_gcs() # Update corpus size stats. minimized_corpus_size_units = shell.get_directory_file_count( context.minimized_corpus_path) minimized_corpus_size_bytes = shell.get_directory_size( context.minimized_corpus_path) coverage_info.corpus_size_units = minimized_corpus_size_units coverage_info.corpus_size_bytes = minimized_corpus_size_bytes logs.log('Finished.') sources = ','.join([ fuzzer.fuzz_target.project_qualified_name() for fuzzer in context.cross_pollinate_fuzzers ]) cross_pollination_stats = None if pruner_stats and pollinator_stats: cross_pollination_stats = CrossPollinationStats( project_qualified_name, context.cross_pollination_method, sources, context.tag, initial_corpus_size, pre_pollination_corpus_size, pruner_stats['edge_coverage'], pollinator_stats['edge_coverage'], pruner_stats['feature_coverage'], pollinator_stats['feature_coverage']) return CorpusPruningResult(coverage_info=coverage_info, crashes=list(crashes.values()), fuzzer_binary_name=fuzzer_binary_name, revision=environment.get_value('APP_REVISION'), cross_pollination_stats=cross_pollination_stats)
def test_fuzzer_coverage(self): """Test fuzzer coverage cron implementation.""" # An old CoverageInformation for a fuzzer that should NOT be overwritten. cov_info_old = data_types.CoverageInformation( date=datetime.date(2018, 9, 1), fuzzer='boringssl_privkey', functions_covered=123, functions_total=555, edges_covered=1337, edges_total=31337, html_report_url=( 'https://storage.googleapis.com/oss-fuzz-coverage/boringssl/' 'reports/20180905/linux/index.html')) cov_info_old.put() # A recent CoverageInformation for a fuzzer that should be overwritten. cov_info_recent = data_types.CoverageInformation( date=datetime.date(2018, 9, 7), fuzzer='base64_decode_fuzzer', functions_covered=1, functions_total=5, edges_covered=3, edges_total=20, html_report_url='intentionally junk URL that must be overwritten') cov_info_recent.put() # A recent CoverageInformation for a project that should be overwritten. cov_info_project = data_types.CoverageInformation( date=datetime.date(2018, 9, 7), fuzzer='zlib', functions_covered=1, functions_total=2, edges_covered=3, edges_total=4, html_report_url='intentionally junk URL that must be overwritten') cov_info_project.put() fuzzer_coverage.collect_fuzzer_coverage(INTEGRATION_TEST_BUCKET) query = data_types.CoverageInformation.query() entities = {} for cov_info in query.fetch(): entities[cov_info.key] = cov_info # Assert and delete entities one by one to make sure we verify each of them. key = ndb.Key('CoverageInformation', 'boringssl_bn_div-20180905') expected_entity = data_types.CoverageInformation( date=datetime.date(2018, 9, 5), fuzzer='boringssl_bn_div', functions_covered=82, functions_total=1079, edges_covered=1059, edges_total=12384, html_report_url=( 'https://storage.googleapis.com/oss-fuzz-coverage/boringssl/' 'reports/20180905/linux/index.html')) self.assertCoverageInformation(entities[key], expected_entity) del entities[key] # This is the "old" entity that should not be updated (|cov_info_old|). key = ndb.Key('CoverageInformation', 'boringssl_privkey-20180901') expected_entity = data_types.CoverageInformation( date=datetime.date(2018, 9, 1), fuzzer='boringssl_privkey', functions_covered=123, functions_total=555, edges_covered=1337, edges_total=31337, html_report_url=( 'https://storage.googleapis.com/oss-fuzz-coverage/boringssl/' 'reports/20180905/linux/index.html')) self.assertCoverageInformation(entities[key], expected_entity) del entities[key] key = ndb.Key('CoverageInformation', 'boringssl_privkey-20180905') expected_entity = data_types.CoverageInformation( date=datetime.date(2018, 9, 5), fuzzer='boringssl_privkey', functions_covered=374, functions_total=1510, edges_covered=3535, edges_total=16926, html_report_url=( 'https://storage.googleapis.com/oss-fuzz-coverage/boringssl/' 'reports/20180905/linux/index.html')) self.assertCoverageInformation(entities[key], expected_entity) del entities[key] # Entity for boringssl project, not for a single fuzz target. key = ndb.Key('CoverageInformation', 'boringssl-20180905') expected_entity = data_types.CoverageInformation( date=datetime.date(2018, 9, 5), fuzzer='boringssl', functions_covered=1872, functions_total=4137, edges_covered=21303, edges_total=51251, html_report_url=( 'https://storage.googleapis.com/oss-fuzz-coverage/boringssl/' 'reports/20180905/linux/index.html')) self.assertCoverageInformation(entities[key], expected_entity) del entities[key] # The "recent" entity that should be updated (|cov_info_recent|). key = ndb.Key('CoverageInformation', 'base64_decode_fuzzer-20180907') expected_entity = data_types.CoverageInformation( date=datetime.date(2018, 9, 7), fuzzer='base64_decode_fuzzer', functions_covered=252, functions_total=5646, edges_covered=1111, edges_total=38748, html_report_url= ('https://chromium-coverage.appspot.com/reports/589371_fuzzers_only/' 'linux/index.html')) self.assertCoverageInformation(entities[key], expected_entity) del entities[key] key = ndb.Key('CoverageInformation', 'zucchini_raw_gen_fuzzer-20180907') expected_entity = data_types.CoverageInformation( date=datetime.date(2018, 9, 7), fuzzer='zucchini_raw_gen_fuzzer', functions_covered=440, functions_total=6439, edges_covered=1791, edges_total=45121, html_report_url= ('https://chromium-coverage.appspot.com/reports/589371_fuzzers_only/' 'linux/index.html')) self.assertCoverageInformation(entities[key], expected_entity) del entities[key] # Entity for chromium project. key = ndb.Key('CoverageInformation', 'chromium-20180907') expected_entity = data_types.CoverageInformation( date=datetime.date(2018, 9, 7), fuzzer='chromium', functions_covered=79960, functions_total=467023, edges_covered=682323, edges_total=3953229, html_report_url= ('https://chromium-coverage.appspot.com/reports/589371_fuzzers_only/' 'linux/index.html')) self.assertCoverageInformation(entities[key], expected_entity) del entities[key] key = ndb.Key('CoverageInformation', 'zlib_uncompress_fuzzer-20180907') expected_entity = data_types.CoverageInformation( date=datetime.date(2018, 9, 7), fuzzer='zlib_uncompress_fuzzer', functions_covered=19, functions_total=47, edges_covered=987, edges_total=1687, html_report_url=( 'https://storage.googleapis.com/oss-fuzz-coverage/zlib/reports/' '20180907/linux/index.html')) self.assertCoverageInformation(entities[key], expected_entity) del entities[key] # The recent project entity that should be updated (|cov_info_project|). key = ndb.Key('CoverageInformation', 'zlib-20180907') expected_entity = data_types.CoverageInformation( date=datetime.date(2018, 9, 7), fuzzer='zlib', functions_covered=19, functions_total=47, edges_covered=987, edges_total=1687, html_report_url=( 'https://storage.googleapis.com/oss-fuzz-coverage/zlib/reports/' '20180907/linux/index.html')) self.assertCoverageInformation(entities[key], expected_entity) del entities[key] # Should not have any entities left unverified. Ensures collect logic of # not creating duplicated entities if there is an existing one. In practice, # an existing entity could either be created by an earlier execution of # the cron task, or by the corpus pruning task. self.assertEqual(len(entities), 0)