def initialize_resources_dir(): """Download Fuchsia QEMU resources from GCS bucket.""" # This module depends on multiprocessing, which is not available in # appengine, and since appengine *imports* this file (but does not run this # function!), we import it here. from google_cloud_utils import gsutil resources_dir = environment.get_value('RESOURCES_DIR') if not resources_dir: raise errors.FuchsiaConfigError('Could not find RESOURCES_DIR') fuchsia_resources_dir = os.path.join(resources_dir, 'fuchsia') shell.create_directory( fuchsia_resources_dir, create_intermediates=True, recreate=True) # Bucket for QEMU resources. fuchsia_resources_url = environment.get_value('FUCHSIA_RESOURCES_URL') if not fuchsia_resources_url: raise errors.FuchsiaConfigError( 'Could not find path for remote' 'Fuchsia resources bucket (FUCHSIA_RESOURCES_URL') gsutil_command_arguments = [ '-m', 'cp', '-r', fuchsia_resources_url, fuchsia_resources_dir ] logs.log("Beginning Fuchsia SDK download.") result = gsutil.GSUtilRunner().run_gsutil(gsutil_command_arguments) if result.return_code or result.timed_out: raise errors.FuchsiaSdkError('Failed to download Fuchsia ' 'resources: ' + result.output) logs.log("Fuchsia SDK download complete.") # Bucket for build resources. Necessary for fuzzer selection. logs.log("Fetching Fuchsia build.") fuchsia_build_url = environment.get_value('FUCHSIA_BUILD_URL') if not fuchsia_build_url: raise errors.FuchsiaConfigError('Could not find path for remote' 'Fuchsia build bucket (FUCHSIA BUILD URL') gsutil_command_arguments = [ '-m', 'cp', '-r', fuchsia_build_url, fuchsia_resources_dir ] logs.log("Beginning Fuchsia build download.") result = gsutil.GSUtilRunner().run_gsutil(gsutil_command_arguments) if result.return_code or result.timed_out: raise errors.FuchsiaSdkError('Failed to download Fuchsia ' 'resources: ' + result.output) return fuchsia_resources_dir
def initialize_resources_dir(): """Download Fuchsia QEMU resources from GCS bucket.""" resources_dir = environment.get_value('RESOURCES_DIR') if not resources_dir: raise errors.FuchsiaConfigError('Could not find RESOURCES_DIR') fuchsia_resources_dir = os.path.join(resources_dir, 'fuchsia') shell.create_directory(fuchsia_resources_dir, recreate=True) fuchsia_resources_url = environment.get_value('FUCHSIA_RESOURCES_URL') if not fuchsia_resources_url: raise errors.FuchsiaConfigError( 'Could not find path for remote' 'Fuchsia resources bucket (FUCHSIA_RESOURCES_URL') gsutil_command_arguments = [ '-m', 'cp', '-r', fuchsia_resources_url, fuchsia_resources_dir ] logs.log("Beginning Fuchsia SDK download.") result = gsutil.GSUtilRunner().run_gsutil(gsutil_command_arguments) if result.return_code or result.timed_out: raise errors.FuchsiaSdkError('Failed to download Fuchsia' 'resources: ' + result.output) logs.log("Fuchsia SDK download complete.") return fuchsia_resources_dir
def upload_testcases_if_needed(fuzzer_name, testcase_list, testcase_directory): """Upload test cases from the list to a cloud storage bucket.""" bucket_name = local_config.ProjectConfig().get( 'coverage.fuzzer-testcases.bucket') if not bucket_name: return # Only consider test cases in the output directory. We might upload too much # if we search the data directory as well, or have missing resources. # TODO(mbarbella): Support resources in data bundles. testcase_list = [ os.path.relpath(testcase, testcase_directory) for testcase in testcase_list if testcase.startswith(testcase_directory) ] if not testcase_list: return # Bail out if this batch of test cases is too large. directory_size = shell.get_directory_size(testcase_directory) if directory_size >= MAX_TESTCASE_DIRECTORY_SIZE: return formatted_date = str(utils.utcnow().date()) gcs_base_url = 'gs://{bucket_name}/{date}/{fuzzer_name}/'.format( bucket_name=bucket_name, date=formatted_date, fuzzer_name=fuzzer_name) runner = gsutil.GSUtilRunner() batch_directory_blobs = storage.list_blobs(gcs_base_url) total_testcases = 0 for blob in batch_directory_blobs: if not blob.endswith(LIST_FILE_BASENAME): continue list_gcs_url = 'gs://{bucket}/{blob}'.format(bucket=bucket_name, blob=blob) data = storage.read_data(list_gcs_url) if not data: logs.log_error( 'Read no data from test case list at {gcs_url}'.format( gcs_url=list_gcs_url)) continue total_testcases += len(data.splitlines()) # If we've already uploaded enough test cases for this fuzzer today, return. if total_testcases >= TESTCASES_PER_DAY: return # Upload each batch of tests to its own unique sub-bucket. identifier = environment.get_value('BOT_NAME') + str(utils.utcnow()) gcs_base_url += utils.string_hash(identifier) list_gcs_url = gcs_base_url + '/' + LIST_FILE_BASENAME if not storage.write_data('\n'.join(testcase_list), list_gcs_url): return runner.rsync(testcase_directory, gcs_base_url) logs.log('Synced {count} test cases to {gcs_url}'.format( count=len(testcase_list), gcs_url=gcs_base_url))
def setUp(self): test_helpers.patch_environ(self) test_helpers.patch(self, ["system.new_process.ProcessRunner.run_and_wait"]) test_utils.set_up_pyfakefs(self) self.gsutil_runner_obj = gsutil.GSUtilRunner()
def initialize_resources_dir(): """Download Fuchsia QEMU resources from GCS bucket.""" # This module depends on multiprocessing, which is not available in # appengine, and since appengine *imports* this file (but does not run this # function!), we import it here. from google_cloud_utils import gsutil resources_dir = environment.get_value('RESOURCES_DIR') if not resources_dir: raise errors.FuchsiaConfigError('Could not find RESOURCES_DIR') fuchsia_resources_dir = os.path.join(resources_dir, 'fuchsia') shell.create_directory(fuchsia_resources_dir, create_intermediates=True, recreate=True) # Bucket for QEMU resources. fuchsia_resources_url = environment.get_value('FUCHSIA_BUILD_URL') if not fuchsia_resources_url: raise errors.FuchsiaConfigError( 'Could not find path for remote' 'Fuchsia resources bucket (FUCHSIA_BUILD_URL') gsutil_command_arguments = [ '-m', 'cp', '-r', fuchsia_resources_url, fuchsia_resources_dir ] logs.log("Fetching Fuchsia build.") result = gsutil.GSUtilRunner().run_gsutil(gsutil_command_arguments) if result.return_code or result.timed_out: raise errors.FuchsiaSdkError('Failed to download Fuchsia ' 'resources: ' + result.output) # Chmod the symbolizers so they can be used easily. symbolizer_path = os.path.join(fuchsia_resources_dir, 'build', 'zircon', 'prebuilt', 'downloads', 'symbolize', 'linux-x64', 'symbolize') llvm_symbolizer_path = os.path.join(fuchsia_resources_dir, 'build', 'buildtools', 'linux-x64', 'clang', 'bin', 'llvm-symbolizer') os.chmod(symbolizer_path, 0o111) os.chmod(llvm_symbolizer_path, 0o111) logs.log("Fuchsia build download complete.") return fuchsia_resources_dir
def update_data_bundle(fuzzer, data_bundle): """Updates a data bundle to the latest version.""" # This module can't be in the global imports due to appengine issues # with multiprocessing and psutil imports. from google_cloud_utils import gsutil # If we are using a data bundle on NFS, it is expected that our testcases # will usually be large enough that we would fill up our tmpfs directory # pretty quickly. So, change it to use an on-disk directory. if not data_bundle.is_local: testcase_disk_directory = environment.get_value('FUZZ_INPUTS_DISK') environment.set_value('FUZZ_INPUTS', testcase_disk_directory) data_bundle_directory = get_data_bundle_directory(fuzzer.name) if not data_bundle_directory: logs.log_error('Failed to setup data bundle %s.' % data_bundle.name) return False if not shell.create_directory(data_bundle_directory, create_intermediates=True): logs.log_error('Failed to create data bundle %s directory.' % data_bundle.name) return False # Check if data bundle is up to date. If yes, skip the update. if _is_data_bundle_up_to_date(data_bundle, data_bundle_directory): logs.log('Data bundle was recently synced, skip.') return True # Fetch lock for this data bundle. if not _fetch_lock_for_data_bundle_update(data_bundle): logs.log_error('Failed to lock data bundle %s.' % data_bundle.name) return False # Re-check if another bot did the sync already. If yes, skip. if _is_data_bundle_up_to_date(data_bundle, data_bundle_directory): logs.log('Another bot finished the sync, skip.') _release_lock_for_data_bundle_update(data_bundle) return True time_before_sync_start = time.time() # No need to sync anything if this is a search index data bundle. In that # case, the fuzzer will generate testcases from a gcs bucket periodically. if not _is_search_index_data_bundle(data_bundle.name): bucket_url = data_handler.get_data_bundle_bucket_url(data_bundle.name) if environment.is_trusted_host() and data_bundle.sync_to_worker: from bot.untrusted_runner import corpus_manager from bot.untrusted_runner import file_host worker_data_bundle_directory = file_host.rebase_to_worker_root( data_bundle_directory) file_host.create_directory(worker_data_bundle_directory, create_intermediates=True) result = corpus_manager.RemoteGSUtilRunner().rsync( bucket_url, worker_data_bundle_directory, delete=False) else: result = gsutil.GSUtilRunner().rsync(bucket_url, data_bundle_directory, delete=False) if result.return_code != 0: logs.log_error('Failed to sync data bundle %s: %s.' % (data_bundle.name, result.output)) _release_lock_for_data_bundle_update(data_bundle) return False # Update the testcase list file. testcase_manager.create_testcase_list_file(data_bundle_directory) # Write last synced time in the sync file. sync_file_path = _get_data_bundle_sync_file_path(data_bundle_directory) utils.write_data_to_file(time_before_sync_start, sync_file_path) if environment.is_trusted_host() and data_bundle.sync_to_worker: from bot.untrusted_runner import file_host worker_sync_file_path = file_host.rebase_to_worker_root(sync_file_path) file_host.copy_file_to_worker(sync_file_path, worker_sync_file_path) # Release acquired lock. _release_lock_for_data_bundle_update(data_bundle) return True
def setUp(self): """Set up.""" super(CorpusPruningTestUntrusted, self).setUp() environment.set_value('JOB_NAME', 'libfuzzer_asan_job') helpers.patch(self, [ 'bot.fuzzers.engine.get', 'bot.tasks.setup.get_fuzzer_directory', 'base.tasks.add_task', 'bot.tasks.corpus_pruning_task._record_cross_pollination_stats' ]) self.mock.get.return_value = libFuzzer_engine.LibFuzzerEngine() self.mock.get_fuzzer_directory.return_value = os.path.join( environment.get_value('ROOT_DIR'), 'src', 'python', 'bot', 'fuzzers', 'libFuzzer') self.corpus_bucket = os.environ['CORPUS_BUCKET'] self.quarantine_bucket = os.environ['QUARANTINE_BUCKET'] self.backup_bucket = os.environ['BACKUP_BUCKET'] job = data_types.Job( name='libfuzzer_asan_job', environment_string=( 'APP_NAME = test_fuzzer\n' 'CORPUS_BUCKET = {corpus_bucket}\n' 'QUARANTINE_BUCKET = {quarantine_bucket}\n' 'BACKUP_BUCKET={backup_bucket}\n' 'RELEASE_BUILD_BUCKET_PATH = ' 'gs://clusterfuzz-test-data/test_libfuzzer_builds/' 'test-libfuzzer-build-([0-9]+).zip\n' 'REVISION_VARS_URL = gs://clusterfuzz-test-data/' 'test_libfuzzer_builds/' 'test-libfuzzer-build-%s.srcmap.json\n'.format( corpus_bucket=self.corpus_bucket, quarantine_bucket=self.quarantine_bucket, backup_bucket=self.backup_bucket))) job.put() job = data_types.Job( name='libfuzzer_asan_job2', environment_string=( 'APP_NAME = test2_fuzzer\n' 'BACKUP_BUCKET = {backup_bucket}\n' 'CORPUS_FUZZER_NAME_OVERRIDE = libfuzzer\n'.format( backup_bucket=self.backup_bucket))) job.put() os.environ['PROJECT_NAME'] = 'oss-fuzz' data_types.FuzzTarget(engine='libFuzzer', project='test', binary='test_fuzzer').put() data_types.FuzzTargetJob(fuzz_target_name='libFuzzer_test_fuzzer', engine='libFuzzer', job='libfuzzer_asan_job', last_run=datetime.datetime.now()).put() data_types.FuzzTarget(engine='libFuzzer', project='test2', binary='fuzzer').put() data_types.FuzzTargetJob(fuzz_target_name='libFuzzer_test2_fuzzer', engine='libFuzzer', job='libfuzzer_asan_job2', last_run=datetime.datetime.now()).put() environment.set_value('USE_MINIJAIL', True) environment.set_value('SHARED_CORPUS_BUCKET', TEST_SHARED_BUCKET) # Set up remote corpora. self.corpus = corpus_manager.FuzzTargetCorpus('libFuzzer', 'test_fuzzer') self.corpus.rsync_from_disk(os.path.join(TEST_DIR, 'corpus'), delete=True) self.quarantine_corpus = corpus_manager.FuzzTargetCorpus( 'libFuzzer', 'test_fuzzer', quarantine=True) self.quarantine_corpus.rsync_from_disk(os.path.join( TEST_DIR, 'quarantine'), delete=True) self.mock.get_data_bundle_bucket_name.return_value = TEST_GLOBAL_BUCKET data_types.DataBundle(name='bundle', is_local=True, sync_to_worker=True).put() data_types.Fuzzer(revision=1, file_size='builtin', source='builtin', name='libFuzzer', max_testcases=4, builtin=True, data_bundle_name='bundle').put() self.temp_dir = tempfile.mkdtemp() # Copy corpus backup in the older date format. corpus_backup_date = ( datetime.datetime.utcnow().date() - datetime.timedelta( days=data_types.CORPUS_BACKUP_PUBLIC_LOOKBACK_DAYS)) corpus_backup_dir = ('gs://{bucket}/corpus/libfuzzer/test2_fuzzer/') gsutil.GSUtilRunner().run_gsutil([ 'cp', (corpus_backup_dir + 'backup.zip').format(bucket=TEST2_BACKUP_BUCKET), (corpus_backup_dir + '%s.zip' % corpus_backup_date).format(bucket=self.backup_bucket) ])
def upload_testcases_if_needed(fuzzer_name, testcase_list, testcase_directory, data_directory): """Upload test cases from the list to a cloud storage bucket.""" # Since builtin fuzzers have a coverage minimized corpus, no need to upload # test case samples for them. if fuzzer_name in builtin_fuzzers.BUILTIN_FUZZERS: return bucket_name = local_config.ProjectConfig().get( 'coverage.fuzzer-testcases.bucket') if not bucket_name: return files_list = [] has_testcases_in_testcase_directory = False has_testcases_in_data_directory = False for testcase_path in testcase_list: if testcase_path.startswith(testcase_directory): files_list.append( os.path.relpath(testcase_path, testcase_directory)) has_testcases_in_testcase_directory = True elif testcase_path.startswith(data_directory): files_list.append(os.path.relpath(testcase_path, data_directory)) has_testcases_in_data_directory = True if not files_list: return formatted_date = str(utils.utcnow().date()) gcs_base_url = 'gs://{bucket_name}/{date}/{fuzzer_name}/'.format( bucket_name=bucket_name, date=formatted_date, fuzzer_name=fuzzer_name) runner = gsutil.GSUtilRunner() batch_directory_blobs = storage.list_blobs(gcs_base_url) total_testcases = 0 for blob in batch_directory_blobs: if not blob.endswith(LIST_FILE_BASENAME): continue list_gcs_url = 'gs://{bucket}/{blob}'.format(bucket=bucket_name, blob=blob) data = storage.read_data(list_gcs_url) if not data: logs.log_error( 'Read no data from test case list at {gcs_url}'.format( gcs_url=list_gcs_url)) continue total_testcases += len(data.splitlines()) # If we've already uploaded enough test cases for this fuzzer today, return. if total_testcases >= TESTCASES_PER_DAY: return # Cap the number of files. testcases_limit = min(len(files_list), TESTCASES_PER_DAY - total_testcases) files_list = files_list[:testcases_limit] # Upload each batch of tests to its own unique sub-bucket. identifier = environment.get_value('BOT_NAME') + str(utils.utcnow()) gcs_base_url += utils.string_hash(identifier) list_gcs_url = gcs_base_url + '/' + LIST_FILE_BASENAME if not storage.write_data('\n'.join(files_list), list_gcs_url): return if has_testcases_in_testcase_directory: # Sync everything in |testcase_directory| since it is fuzzer-generated. runner.rsync(testcase_directory, gcs_base_url) if has_testcases_in_data_directory: # Sync all fuzzer generated testcase in data bundle directory. runner.rsync(data_directory, gcs_base_url, exclusion_pattern=('(?!.*{fuzz_prefix})'.format( fuzz_prefix=testcase_manager.FUZZ_PREFIX))) # Sync all possible resource dependencies as a best effort. It matches # |resources-| prefix that a fuzzer can use to indicate resources. Also, it # matches resources directory that Chromium web_tests use for dependencies. runner.rsync(data_directory, gcs_base_url, exclusion_pattern='(?!.*resource)') logs.log('Synced {count} test cases to {gcs_url}.'.format( count=len(files_list), gcs_url=gcs_base_url))
def setUp(self): """Set up.""" super(CorpusPruningTestUntrusted, self).setUp() environment.set_value("JOB_NAME", "libfuzzer_asan_job") helpers.patch( self, [ "bot.fuzzers.engine.get", "bot.fuzzers.libFuzzer.fuzzer.LibFuzzer.fuzzer_directory", "base.tasks.add_task", "datastore.data_handler.get_data_bundle_bucket_name", ], ) self.mock.get.return_value = libFuzzer_engine.LibFuzzerEngine() self.mock.fuzzer_directory.return_value = os.path.join( environment.get_value("ROOT_DIR"), "src", "python", "bot", "fuzzers", "libFuzzer", ) self.corpus_bucket = os.environ["CORPUS_BUCKET"] self.quarantine_bucket = os.environ["QUARANTINE_BUCKET"] self.backup_bucket = os.environ["BACKUP_BUCKET"] job = data_types.Job( name="libfuzzer_asan_job", environment_string=("APP_NAME = test_fuzzer\n" "CORPUS_BUCKET = {corpus_bucket}\n" "QUARANTINE_BUCKET = {quarantine_bucket}\n" "BACKUP_BUCKET={backup_bucket}\n" "RELEASE_BUILD_BUCKET_PATH = " "gs://clusterfuzz-test-data/test_libfuzzer_builds/" "test-libfuzzer-build-([0-9]+).zip\n" "REVISION_VARS_URL = gs://clusterfuzz-test-data/" "test_libfuzzer_builds/" "test-libfuzzer-build-%s.srcmap.json\n".format( corpus_bucket=self.corpus_bucket, quarantine_bucket=self.quarantine_bucket, backup_bucket=self.backup_bucket, )), ) job.put() job = data_types.Job( name="libfuzzer_asan_job2", environment_string=("APP_NAME = test2_fuzzer\n" "BACKUP_BUCKET = {backup_bucket}\n" "CORPUS_FUZZER_NAME_OVERRIDE = libfuzzer\n".format( backup_bucket=self.backup_bucket)), ) job.put() os.environ["PROJECT_NAME"] = "oss-fuzz" data_types.FuzzTarget( engine="libFuzzer", project="test", binary="test_fuzzer").put() data_types.FuzzTargetJob( fuzz_target_name="libFuzzer_test_fuzzer", engine="libFuzzer", job="libfuzzer_asan_job", last_run=datetime.datetime.now(), ).put() data_types.FuzzTarget( engine="libFuzzer", project="test2", binary="fuzzer").put() data_types.FuzzTargetJob( fuzz_target_name="libFuzzer_test2_fuzzer", engine="libFuzzer", job="libfuzzer_asan_job2", last_run=datetime.datetime.now(), ).put() environment.set_value("USE_MINIJAIL", True) environment.set_value("SHARED_CORPUS_BUCKET", TEST_SHARED_BUCKET) # Set up remote corpora. self.corpus = corpus_manager.FuzzTargetCorpus("libFuzzer", "test_fuzzer") self.corpus.rsync_from_disk(os.path.join(TEST_DIR, "corpus"), delete=True) self.quarantine_corpus = corpus_manager.FuzzTargetCorpus( "libFuzzer", "test_fuzzer", quarantine=True) self.quarantine_corpus.rsync_from_disk( os.path.join(TEST_DIR, "quarantine"), delete=True) self.mock.get_data_bundle_bucket_name.return_value = TEST_GLOBAL_BUCKET data_types.DataBundle( name="bundle", is_local=True, sync_to_worker=True).put() data_types.Fuzzer( revision=1, file_size="builtin", source="builtin", name="libFuzzer", max_testcases=4, builtin=True, data_bundle_name="bundle", ).put() self.temp_dir = tempfile.mkdtemp() # Copy corpus backup in the older date format. corpus_backup_date = datetime.datetime.utcnow().date() - datetime.timedelta( days=data_types.CORPUS_BACKUP_PUBLIC_LOOKBACK_DAYS) corpus_backup_dir = "gs://{bucket}/corpus/libfuzzer/test2_fuzzer/" gsutil.GSUtilRunner().run_gsutil([ "cp", (corpus_backup_dir + "backup.zip").format(bucket=TEST2_BACKUP_BUCKET), (corpus_backup_dir + "%s.zip" % corpus_backup_date).format(bucket=self.backup_bucket), ])