def get_corpus(corpus_directory, fuzzer_name): """Get corpus directory. This function will download latest corpus backup file from GCS, unzip the file and put them in corpus directory. Args: directory: The directory to place corpus. fuzzer_name: Fuzzer name, e.g. libpng_read_fuzzer, xml_parser_fuzzer, etc. Returns: True if the corpus can be acquired and False otherwise. """ backup_bucket_name = environment.get_value('BACKUP_BUCKET') corpus_fuzzer_name = environment.get_value('CORPUS_FUZZER_NAME_OVERRIDE') # Get GCS backup path. gcs_backup_path = corpus_manager.gcs_url_for_backup_file( backup_bucket_name, corpus_fuzzer_name, fuzzer_name, corpus_manager.LATEST_BACKUP_TIMESTAMP) # Get local backup path. local_backup_name = os.path.basename(gcs_backup_path) local_backup_path = os.path.join(corpus_directory, local_backup_name) # Download latest backup. if not storage.copy_file_from(gcs_backup_path, local_backup_path): logs.log_error('Failed to download corpus from GCS bucket {}.'.format( gcs_backup_path)) return False # Extract corpus from zip file. archive.unpack(local_backup_path, corpus_directory) shell.remove_file(local_backup_path) return True
def _unpack_mutator_plugin(mutator_plugin_archive_path): """Unpacks |mutator_plugin_archive_path| in the unpacked plugins directory and returns the path it was unpacked into.""" mutator_plugin_name = os.path.basename( os.path.splitext(mutator_plugin_archive_path)[0]) unpacked_plugin_dir = os.path.join(_get_mutator_plugins_unpacked_dir(), mutator_plugin_name) archive.unpack(mutator_plugin_archive_path, unpacked_plugin_dir) return unpacked_plugin_dir
def test_unpack_file_with_cwd_prefix(self): """Test unpack with trusted=False passes with file having './' prefix.""" tgz_path = os.path.join(TESTDATA_PATH, 'cwd-prefix.tgz') output_directory = tempfile.mkdtemp(prefix='cwd-prefix') archive.unpack(tgz_path, output_directory, trusted=False) test_file_path = os.path.join(output_directory, 'test') self.assertTrue(os.path.exists(test_file_path)) self.assertEqual(open(test_file_path).read(), 'abc\n') shell.remove_directory(output_directory)
def update_tests_if_needed(): """Updates layout tests every day.""" data_directory = environment.get_value('FUZZ_DATA') error_occured = False expected_task_duration = 60 * 60 # 1 hour. retry_limit = environment.get_value('FAIL_RETRIES') temp_archive = os.path.join(data_directory, 'temp.zip') tests_url = environment.get_value('WEB_TESTS_URL') # Check if we have a valid tests url. if not tests_url: return # Layout test updates are usually disabled to speedup local testing. if environment.get_value('LOCAL_DEVELOPMENT'): return # |UPDATE_WEB_TESTS| env variable can be used to control our update behavior. if not environment.get_value('UPDATE_WEB_TESTS'): return last_modified_time = persistent_cache.get_value( TESTS_LAST_UPDATE_KEY, constructor=datetime.datetime.utcfromtimestamp) if (last_modified_time is not None and not dates.time_has_expired(last_modified_time, days=TESTS_UPDATE_INTERVAL_DAYS)): return logs.log('Updating layout tests.') tasks.track_task_start(tasks.Task('update_tests', '', ''), expected_task_duration) # Download and unpack the tests archive. for _ in range(retry_limit): try: shell.remove_directory(data_directory, recreate=True) storage.copy_file_from(tests_url, temp_archive) archive.unpack(temp_archive, data_directory, trusted=True) shell.remove_file(temp_archive) error_occured = False break except: logs.log_error( 'Could not retrieve and unpack layout tests archive. Retrying.' ) error_occured = True if not error_occured: persistent_cache.set_value(TESTS_LAST_UPDATE_KEY, time.time(), persist_across_reboots=True) tasks.track_task_end()
def download_system_symbols_if_needed(symbols_directory): """Download system libraries from |SYMBOLS_URL| and cache locally.""" if not should_download_symbols(): return # Get the build fingerprint parameters. build_params = settings.get_build_parameters() if not build_params: logs.log_error('Unable to determine build parameters.') return build_params_check_path = os.path.join(symbols_directory, '.cached_build_params') if check_symbols_cached(build_params_check_path, build_params): return build_id = build_params.get('build_id') target = build_params.get('target') build_type = build_params.get('type') if not build_id or not target or not build_type: logs.log_error('Null build parameters found, exiting.') return symbols_archive_filename = f'{target}-symbols-{build_id}.zip' artifact_file_name = symbols_archive_filename output_filename_override = None # Include type and sanitizer information in the target. tool_suffix = environment.get_value('SANITIZER_TOOL_NAME') target_with_type_and_san = f'{target}-{build_type}' if tool_suffix and not tool_suffix in target_with_type_and_san: target_with_type_and_san += f'_{tool_suffix}' targets_with_type_and_san = [target_with_type_and_san] symbols_archive_path = os.path.join(symbols_directory, symbols_archive_filename) download_artifact_if_needed(build_id, symbols_directory, symbols_archive_path, targets_with_type_and_san, artifact_file_name, output_filename_override) if not os.path.exists(symbols_archive_path): logs.log_error( 'Unable to locate symbols archive %s.' % symbols_archive_path) return # Store the artifact for later use or for use by other bots. storage.store_file_in_cache(symbols_archive_path) archive.unpack(symbols_archive_path, symbols_directory, trusted=True) shell.remove_file(symbols_archive_path) utils.write_data_to_file(build_params, build_params_check_path)
def unpack_testcase(testcase): """Unpack a testcase and return all files it is composed of.""" # Figure out where the testcase file should be stored. input_directory, testcase_file_path = _get_testcase_file_and_path(testcase) minimized = testcase.minimized_keys and testcase.minimized_keys != 'NA' if minimized: key = testcase.minimized_keys archived = bool(testcase.archive_state & data_types.ArchiveStatus.MINIMIZED) else: key = testcase.fuzzed_keys archived = bool(testcase.archive_state & data_types.ArchiveStatus.FUZZED) if archived: if minimized: temp_filename = (os.path.join( input_directory, str(testcase.key.id()) + _TESTCASE_ARCHIVE_EXTENSION)) else: temp_filename = os.path.join(input_directory, testcase.archive_filename) else: temp_filename = testcase_file_path if not blobs.read_blob_to_disk(key, temp_filename): return None, input_directory, testcase_file_path file_list = [] if archived: archive.unpack(temp_filename, input_directory) file_list = archive.get_file_list(temp_filename) shell.remove_file(temp_filename) file_exists = False for file_name in file_list: if os.path.basename(file_name) == os.path.basename( testcase_file_path): file_exists = True break if not file_exists: logs.log_error( 'Expected file to run %s is not in archive. Base directory is %s and ' 'files in archive are [%s].' % (testcase_file_path, input_directory, ','.join(file_list))) return None, input_directory, testcase_file_path else: file_list.append(testcase_file_path) return file_list, input_directory, testcase_file_path
def _download_testcase(testcase_id, testcase, configuration): """Download the test case and return its path.""" print('Downloading testcase...') testcase_download_url = '{url}?id={id}'.format( url=configuration.get('testcase_download_url'), id=testcase_id) response, content = http_utils.request(testcase_download_url, method=http_utils.GET_METHOD, configuration=configuration) if response.status != 200: raise errors.ReproduceToolUnrecoverableError( 'Unable to download test case.') bot_absolute_filename = response[FILENAME_RESPONSE_HEADER] # Store the test case in the config directory for debuggability. testcase_directory = os.path.join(CONFIG_DIRECTORY, 'current-testcase') shell.remove_directory(testcase_directory, recreate=True) environment.set_value('FUZZ_INPUTS', testcase_directory) testcase_path = os.path.join(testcase_directory, os.path.basename(bot_absolute_filename)) utils.write_data_to_file(content, testcase_path) # Unpack the test case if it's archived. # TODO(mbarbella): Rewrite setup.unpack_testcase and share this code. if testcase.minimized_keys and testcase.minimized_keys != 'NA': mask = data_types.ArchiveStatus.MINIMIZED else: mask = data_types.ArchiveStatus.FUZZED if testcase.archive_state & mask: archive.unpack(testcase_path, testcase_directory) file_list = archive.get_file_list(testcase_path) testcase_path = None for file_name in file_list: if os.path.basename(file_name) == os.path.basename( testcase.absolute_path): testcase_path = os.path.join(testcase_directory, file_name) break if not testcase_path: raise errors.ReproduceToolUnrecoverableError( 'Test case file was not found in archive.\n' 'Original filename: {absolute_path}.\n' 'Archive contents: {file_list}'.format( absolute_path=testcase.absolute_path, file_list=file_list)) return testcase_path
def setup_user_profile_directory_if_needed(user_profile_directory): """Set user profile directory if it does not exist.""" if os.path.exists(user_profile_directory): # User profile directory already exists. Bail out. return shell.create_directory(user_profile_directory) # Create a file in user profile directory based on format: # filename;base64 encoded zlib compressed file contents. user_profile_file = environment.get_value('USER_PROFILE_FILE') if user_profile_file and ';' in user_profile_file: user_profile_filename, encoded_file_contents = ( user_profile_file.split(';', 1)) user_profile_file_contents = zlib.decompress( base64.b64decode(encoded_file_contents)) user_profile_file_path = os.path.join(user_profile_directory, user_profile_filename) utils.write_data_to_file(user_profile_file_contents, user_profile_file_path) # For Firefox, we need to install a special fuzzPriv extension that exposes # special functions to javascript, e.g. gc(), etc. app_name = environment.get_value('APP_NAME') if app_name.startswith('firefox'): # Create extensions directory. extensions_directory = os.path.join(user_profile_directory, 'extensions') shell.create_directory(extensions_directory) # Unpack the fuzzPriv extension. extension_archive = os.path.join(environment.get_resources_directory(), 'firefox', 'fuzzPriv-extension.zip') archive.unpack(extension_archive, extensions_directory) # Add this extension in the extensions configuration file. extension_config_file_path = os.path.join(user_profile_directory, 'extensions.ini') fuzz_extension_directory = os.path.join(extensions_directory, '*****@*****.**') extension_config_file_contents = ('[ExtensionDirs]\r\n' 'Extension0=%s\r\n' '\r\n' '[ThemeDirs]\r\n' % fuzz_extension_directory) utils.write_data_to_file(extension_config_file_contents, extension_config_file_path)
def download_latest_build(build_info, image_regexes, image_directory): """Download the latest build artifact for the given branch and target.""" # Check if our local build matches the latest build. If not, we will # download it. build_id = build_info['bid'] target = build_info['target'] last_build_info = persistent_cache.get_value(constants.LAST_FLASH_BUILD_KEY) if last_build_info and last_build_info['bid'] == build_id: return # Clean up the images directory first. shell.remove_directory(image_directory, recreate=True) for image_regex in image_regexes: image_file_path = fetch_artifact.get(build_id, target, image_regex, image_directory) if not image_file_path: logs.log_error('Failed to download artifact %s for ' 'branch %s and target %s.' % (image_file_path, build_info['branch'], target)) return if image_file_path.endswith('.zip') or image_file_path.endswith('.tar.gz'): archive.unpack(image_file_path, image_directory)
def _cross_pollinate_other_fuzzer_corpuses(self): """Add other fuzzer corpuses to shared corpus path for cross-pollination.""" corpus_backup_date = utils.utcnow().date() - datetime.timedelta( days=data_types.CORPUS_BACKUP_PUBLIC_LOOKBACK_DAYS) for cross_pollinate_fuzzer in self.cross_pollinate_fuzzers: project_qualified_name = ( cross_pollinate_fuzzer.fuzz_target.project_qualified_name()) backup_bucket_name = cross_pollinate_fuzzer.backup_bucket_name corpus_engine_name = cross_pollinate_fuzzer.corpus_engine_name corpus_backup_url = corpus_manager.gcs_url_for_backup_file( backup_bucket_name, corpus_engine_name, project_qualified_name, corpus_backup_date) corpus_backup_local_filename = '%s-%s' % ( project_qualified_name, os.path.basename(corpus_backup_url)) corpus_backup_local_path = os.path.join( self.shared_corpus_path, corpus_backup_local_filename) if not storage.exists(corpus_backup_url, ignore_errors=True): # This can happen in cases when a new fuzz target is checked in or if # missed to capture a backup for a particular day (for OSS-Fuzz, this # will result in a 403 instead of 404 since that GCS path belongs to # other project). So, just log a warning for debugging purposes only. logs.log_warn('Corpus backup does not exist, ignoring: %s.' % corpus_backup_url) continue if not storage.copy_file_from(corpus_backup_url, corpus_backup_local_path): continue corpus_backup_output_directory = os.path.join( self.shared_corpus_path, project_qualified_name) shell.create_directory(corpus_backup_output_directory) result = archive.unpack(corpus_backup_local_path, corpus_backup_output_directory) shell.remove_file(corpus_backup_local_path) if result: logs.log( 'Corpus backup url %s successfully unpacked into shared corpus.' % corpus_backup_url) else: logs.log_error('Failed to unpack corpus backup from url %s.' % corpus_backup_url)
def update_fuzzer_and_data_bundles(fuzzer_name): """Update the fuzzer with a given name if necessary.""" fuzzer = data_types.Fuzzer.query( data_types.Fuzzer.name == fuzzer_name).get() if not fuzzer: logs.log_error('No fuzzer exists with name %s.' % fuzzer_name) raise errors.InvalidFuzzerError # Set some helper environment variables. fuzzer_directory = get_fuzzer_directory(fuzzer_name) environment.set_value('FUZZER_DIR', fuzzer_directory) environment.set_value('UNTRUSTED_CONTENT', fuzzer.untrusted_content) # If the fuzzer generates large testcases or a large number of small ones # that don't fit on tmpfs, then use the larger disk directory. if fuzzer.has_large_testcases: testcase_disk_directory = environment.get_value('FUZZ_INPUTS_DISK') environment.set_value('FUZZ_INPUTS', testcase_disk_directory) # Adjust the test timeout, if user has provided one. if fuzzer.timeout: environment.set_value('TEST_TIMEOUT', fuzzer.timeout) # Increase fuzz test timeout if the fuzzer timeout is higher than its # current value. fuzz_test_timeout = environment.get_value('FUZZ_TEST_TIMEOUT') if fuzz_test_timeout and fuzz_test_timeout < fuzzer.timeout: environment.set_value('FUZZ_TEST_TIMEOUT', fuzzer.timeout) # Adjust the max testcases if this fuzzer has specified a lower limit. max_testcases = environment.get_value('MAX_TESTCASES') if fuzzer.max_testcases and fuzzer.max_testcases < max_testcases: environment.set_value('MAX_TESTCASES', fuzzer.max_testcases) # Check for updates to this fuzzer. version_file = os.path.join(fuzzer_directory, '.%s_version' % fuzzer_name) if (not fuzzer.builtin and revisions.needs_update(version_file, fuzzer.revision)): logs.log('Fuzzer update was found, updating.') # Clear the old fuzzer directory if it exists. if not shell.remove_directory(fuzzer_directory, recreate=True): logs.log_error('Failed to clear fuzzer directory.') return None # Copy the archive to local disk and unpack it. archive_path = os.path.join(fuzzer_directory, fuzzer.filename) if not blobs.read_blob_to_disk(fuzzer.blobstore_key, archive_path): logs.log_error('Failed to copy fuzzer archive.') return None try: archive.unpack(archive_path, fuzzer_directory) except Exception: error_message = ( 'Failed to unpack fuzzer archive %s ' '(bad archive or unsupported format).') % fuzzer.filename logs.log_error(error_message) fuzzer_logs.upload_script_log('Fatal error: ' + error_message, fuzzer_name=fuzzer_name) return None fuzzer_path = os.path.join(fuzzer_directory, fuzzer.executable_path) if not os.path.exists(fuzzer_path): error_message = ( 'Fuzzer executable %s not found. ' 'Check fuzzer configuration.') % fuzzer.executable_path logs.log_error(error_message) fuzzer_logs.upload_script_log('Fatal error: ' + error_message, fuzzer_name=fuzzer_name) return None # Make fuzzer executable. os.chmod(fuzzer_path, 0o750) # Cleanup unneeded archive. shell.remove_file(archive_path) # Save the current revision of this fuzzer in a file for later checks. revisions.write_revision_to_revision_file(version_file, fuzzer.revision) logs.log('Updated fuzzer to revision %d.' % fuzzer.revision) _clear_old_data_bundles_if_needed() # Setup data bundles associated with this fuzzer. data_bundles = ndb_utils.get_all_from_query( data_types.DataBundle.query( data_types.DataBundle.name == fuzzer.data_bundle_name)) for data_bundle in data_bundles: if not update_data_bundle(fuzzer, data_bundle): return None # Setup environment variable for launcher script path. if fuzzer.launcher_script: fuzzer_launcher_path = os.path.join(fuzzer_directory, fuzzer.launcher_script) environment.set_value('LAUNCHER_PATH', fuzzer_launcher_path) # For launcher script usecase, we need the entire fuzzer directory on the # worker. if environment.is_trusted_host(): from clusterfuzz._internal.bot.untrusted_runner import file_host worker_fuzzer_directory = file_host.rebase_to_worker_root( fuzzer_directory) file_host.copy_directory_to_worker(fuzzer_directory, worker_fuzzer_directory, replace=True) return fuzzer
def execute_task(metadata_id, job_type): """Unpack a bundled testcase archive and create analyze jobs for each item.""" metadata = ndb.Key(data_types.BundledArchiveMetadata, int(metadata_id)).get() if not metadata: logs.log_error('Invalid bundle metadata id %s.' % metadata_id) return bot_name = environment.get_value('BOT_NAME') upload_metadata = data_types.TestcaseUploadMetadata.query( data_types.TestcaseUploadMetadata.blobstore_key == metadata.blobstore_key).get() if not upload_metadata: logs.log_error('Invalid upload metadata key %s.' % metadata.blobstore_key) return job = data_types.Job.query(data_types.Job.name == metadata.job_type).get() if not job: logs.log_error('Invalid job_type %s.' % metadata.job_type) return # Update the upload metadata with this bot name. upload_metadata.bot_name = bot_name upload_metadata.put() # We can't use FUZZ_INPUTS directory since it is constrained # by tmpfs limits. testcases_directory = environment.get_value('FUZZ_INPUTS_DISK') # Retrieve multi-testcase archive. archive_path = os.path.join(testcases_directory, metadata.archive_filename) if not blobs.read_blob_to_disk(metadata.blobstore_key, archive_path): logs.log_error('Could not retrieve archive for bundle %d.' % metadata_id) tasks.add_task('unpack', metadata_id, job_type) return try: archive.unpack(archive_path, testcases_directory) except: logs.log_error('Could not unpack archive for bundle %d.' % metadata_id) tasks.add_task('unpack', metadata_id, job_type) return # Get additional testcase metadata (if any). additional_metadata = None if upload_metadata.additional_metadata_string: additional_metadata = json.loads( upload_metadata.additional_metadata_string) archive_state = data_types.ArchiveStatus.NONE bundled = True file_list = archive.get_file_list(archive_path) for file_path in file_list: absolute_file_path = os.path.join(testcases_directory, file_path) filename = os.path.basename(absolute_file_path) # Only files are actual testcases. Skip directories. if not os.path.isfile(absolute_file_path): continue try: file_handle = open(absolute_file_path, 'rb') blob_key = blobs.write_blob(file_handle) file_handle.close() except: blob_key = None if not blob_key: logs.log_error('Could not write testcase %s to blobstore.' % absolute_file_path) continue data_handler.create_user_uploaded_testcase( blob_key, metadata.blobstore_key, archive_state, metadata.archive_filename, filename, metadata.timeout, job, metadata.job_queue, metadata.http_flag, metadata.gestures, metadata.additional_arguments, metadata.bug_information, metadata.crash_revision, metadata.uploader_email, metadata.platform_id, metadata.app_launch_command, metadata.fuzzer_name, metadata.overridden_fuzzer_name, metadata.fuzzer_binary_name, bundled, upload_metadata.retries, upload_metadata.bug_summary_update_flag, upload_metadata.quiet_flag, additional_metadata) # The upload metadata for the archive is not needed anymore since we created # one for each testcase. upload_metadata.key.delete() shell.clear_testcase_directories()