def get_corpus(corpus_directory, fuzzer_name): """Get corpus directory. This function will download latest corpus backup file from GCS, unzip the file and put them in corpus directory. Args: directory: The directory to place corpus. fuzzer_name: Fuzzer name, e.g. libpng_read_fuzzer, xml_parser_fuzzer, etc. Returns: True if the corpus can be acquired and False otherwise. """ backup_bucket_name = environment.get_value('BACKUP_BUCKET') corpus_fuzzer_name = environment.get_value('CORPUS_FUZZER_NAME_OVERRIDE') # Get GCS backup path. gcs_backup_path = corpus_manager.gcs_url_for_backup_file( backup_bucket_name, corpus_fuzzer_name, fuzzer_name, corpus_manager.LATEST_BACKUP_TIMESTAMP) # Get local backup path. local_backup_name = os.path.basename(gcs_backup_path) local_backup_path = os.path.join(corpus_directory, local_backup_name) # Download latest backup. if not storage.copy_file_from(gcs_backup_path, local_backup_path): logs.log_error('Failed to download corpus from GCS bucket {}.'.format( gcs_backup_path)) return False # Extract corpus from zip file. archive.unpack(local_backup_path, corpus_directory) shell.remove_file(local_backup_path) return True
def _make_corpus_backup_public(target, corpus_fuzzer_name_override, corpus_backup_bucket_name): """Identifies old corpus backups and makes them public.""" corpus_backup_date = utils.utcnow().date() - datetime.timedelta( days=data_types.CORPUS_BACKUP_PUBLIC_LOOKBACK_DAYS) corpus_backup_url = corpus_manager.gcs_url_for_backup_file( corpus_backup_bucket_name, corpus_fuzzer_name_override or target.engine, target.project_qualified_name(), corpus_backup_date) if not storage.get(corpus_backup_url): logs.log_warn('Failed to find corpus backup %s.' % corpus_backup_url) return if not _set_public_acl_if_needed(corpus_backup_url): return filename = (corpus_manager.PUBLIC_BACKUP_TIMESTAMP + os.extsep + corpus_manager.BACKUP_ARCHIVE_FORMAT) public_url = os.path.join(os.path.dirname(corpus_backup_url), filename) if not storage.copy_blob(corpus_backup_url, public_url): logs.log_error( 'Failed to overwrite %s with the latest public corpus backup.' % public_url) return if not _set_public_acl_if_needed(public_url): return logs.log('Corpus backup %s is now marked public.' % corpus_backup_url)
def _cross_pollinate_other_fuzzer_corpuses(self): """Add other fuzzer corpuses to shared corpus path for cross-pollination.""" corpus_backup_date = utils.utcnow().date() - datetime.timedelta( days=data_types.CORPUS_BACKUP_PUBLIC_LOOKBACK_DAYS) for cross_pollinate_fuzzer in self.cross_pollinate_fuzzers: project_qualified_name = ( cross_pollinate_fuzzer.fuzz_target.project_qualified_name()) backup_bucket_name = cross_pollinate_fuzzer.backup_bucket_name corpus_engine_name = cross_pollinate_fuzzer.corpus_engine_name corpus_backup_url = corpus_manager.gcs_url_for_backup_file( backup_bucket_name, corpus_engine_name, project_qualified_name, corpus_backup_date, ) corpus_backup_local_filename = "%s-%s" % ( project_qualified_name, os.path.basename(corpus_backup_url), ) corpus_backup_local_path = os.path.join( self.shared_corpus_path, corpus_backup_local_filename) if not storage.exists(corpus_backup_url, ignore_errors=True): # This can happen in cases when a new fuzz target is checked in or if # missed to capture a backup for a particular day (for OSS-Fuzz, this # will result in a 403 instead of 404 since that GCS path belongs to # other project). So, just log a warning for debugging purposes only. logs.log_warn("Corpus backup does not exist, ignoring: %s." % corpus_backup_url) continue if not storage.copy_file_from(corpus_backup_url, corpus_backup_local_path): continue corpus_backup_output_directory = os.path.join( self.shared_corpus_path, project_qualified_name) shell.create_directory(corpus_backup_output_directory) result = archive.unpack(corpus_backup_local_path, corpus_backup_output_directory) shell.remove_file(corpus_backup_local_path) if result: logs.log( "Corpus backup url %s successfully unpacked into shared corpus." % corpus_backup_url) else: logs.log_error("Failed to unpack corpus backup from url %s." % corpus_backup_url)
def _make_corpus_backup_public(target, corpus_fuzzer_name_override, corpus_backup_bucket_name): """Identifies old corpus backups and makes them public.""" corpus_backup_date = utils.utcnow().date() - datetime.timedelta( days=data_types.CORPUS_BACKUP_PUBLIC_LOOKBACK_DAYS) corpus_backup_url = corpus_manager.gcs_url_for_backup_file( corpus_backup_bucket_name, corpus_fuzzer_name_override or target.engine, target.project_qualified_name(), corpus_backup_date, ) try: result = storage.get(corpus_backup_url) except: result = None if not result: logs.log_warn("Failed to find corpus backup %s." % corpus_backup_url) return try: result = storage.get_acl(corpus_backup_url, "allUsers") except: result = None if result: # Backup is already marked public. Skip. logs.log("Corpus backup %s is already marked public, skipping." % corpus_backup_url) return try: result = storage.set_acl(corpus_backup_url, "allUsers") except: result = None if not result: logs.log_error("Failed to mark corpus backup %s public." % corpus_backup_url) return logs.log("Corpus backup %s is now marked public." % corpus_backup_url)