def create(self, work_dir): """Configures a emulator process which can subsequently be `run`.""" # Download emulator image. if not environment.get_value('ANDROID_EMULATOR_BUCKET_PATH'): logs.log_error('ANDROID_EMULATOR_BUCKET_PATH is not set.') return archive_src_path = environment.get_value( 'ANDROID_EMULATOR_BUCKET_PATH') archive_dst_path = os.path.join(work_dir, 'emulator_bundle.zip') storage.copy_file_from(archive_src_path, archive_dst_path) # Extract emulator image. self.emulator_path = os.path.join(work_dir, 'emulator') shell.remove_directory(self.emulator_path) archive.unpack(archive_dst_path, self.emulator_path) shell.remove_file(archive_dst_path) # Stop any stale emulator instances. stop_script_path = os.path.join(self.emulator_path, 'stop') stop_proc = new_process.ProcessRunner(stop_script_path) stop_proc.run_and_wait() # Run emulator. run_script_path = os.path.join(self.emulator_path, 'run') self.process_runner = new_process.ProcessRunner(run_script_path)
def update_tests_if_needed(): """Updates layout tests every day.""" data_directory = environment.get_value('FUZZ_DATA') error_occured = False expected_task_duration = 60 * 60 # 1 hour. retry_limit = environment.get_value('FAIL_RETRIES') temp_archive = os.path.join(data_directory, 'temp.zip') tests_url = environment.get_value('WEB_TESTS_URL') # Check if we have a valid tests url. if not tests_url: return # Layout test updates are usually disabled to speedup local testing. if environment.get_value('LOCAL_DEVELOPMENT'): return # |UPDATE_WEB_TESTS| env variable can be used to control our update behavior. if not environment.get_value('UPDATE_WEB_TESTS'): return last_modified_time = persistent_cache.get_value( LAYOUT_TEST_LAST_UPDATE_KEY, constructor=datetime.datetime.utcfromtimestamp) if (last_modified_time is not None and not dates.time_has_expired( last_modified_time, days=LAYOUT_TEST_UPDATE_INTERVAL_DAYS)): return logs.log('Updating layout tests.') tasks.track_task_start(tasks.Task('update_tests', '', ''), expected_task_duration) # Download and unpack the tests archive. for _ in xrange(retry_limit): try: shell.remove_directory(data_directory, recreate=True) storage.copy_file_from(tests_url, temp_archive) archive.unpack(temp_archive, data_directory, trusted=True) shell.remove_file(temp_archive) error_occured = False break except: logs.log_error( 'Could not retrieve and unpack layout tests archive. Retrying.' ) error_occured = True if not error_occured: persistent_cache.set_value(LAYOUT_TEST_LAST_UPDATE_KEY, time.time(), persist_across_reboots=True) tasks.track_task_end()
def download_model_from_gcs(local_model_directory, fuzzer_name): """Pull model from GCS bucket and put them in specified model directory.""" # ML model is stored in corpus bucket. gcs_corpus_bucket = environment.get_value('CORPUS_BUCKET') if not gcs_corpus_bucket: logs.log('Corpus bucket is not set. Skip generation.') return False # Get cloud storage path. # e.g. gs://clusterfuzz-corpus/rnn/libpng_read_fuzzer gcs_model_directory = 'gs://%s/%s/%s' % ( gcs_corpus_bucket, constants.RNN_MODEL_NAME, fuzzer_name) logs.log('GCS model directory for fuzzer %s is %s.' % (fuzzer_name, gcs_model_directory)) # RNN model consists of three files. meta_filename = constants.RNN_MODEL_NAME + constants.MODEL_META_SUFFIX data_filename = constants.RNN_MODEL_NAME + constants.MODEL_DATA_SUFFIX index_filename = constants.RNN_MODEL_NAME + constants.MODEL_INDEX_SUFFIX # Cloud file paths. gcs_meta_path = '%s/%s' % (gcs_model_directory, meta_filename) gcs_data_path = '%s/%s' % (gcs_model_directory, data_filename) gcs_index_path = '%s/%s' % (gcs_model_directory, index_filename) # Check if model exists. if not (storage.exists(gcs_meta_path) and storage.exists(gcs_data_path) and storage.exists(gcs_index_path)): logs.log( 'ML RNN model for fuzzer %s does not exist. Skip generation.' % fuzzer_name) return False # Local file paths. local_meta_path = os.path.join(local_model_directory, meta_filename) local_data_path = os.path.join(local_model_directory, data_filename) local_index_path = os.path.join(local_model_directory, index_filename) # Download model files. result = (storage.copy_file_from(gcs_meta_path, local_meta_path) and storage.copy_file_from(gcs_data_path, local_data_path) and storage.copy_file_from(gcs_index_path, local_index_path)) if not result: logs.log( 'Failed to download RNN model for fuzzer %s. Skip generation.' % fuzzer_name) return False return True
def get_corpus(corpus_directory, fuzzer_name): """Get corpus directory. This function will download latest corpus backup file from GCS, unzip the file and put them in corpus directory. Args: directory: The directory to place corpus. fuzzer_name: Fuzzer name, e.g. libpng_read_fuzzer, xml_parser_fuzzer, etc. Returns: True if the corpus can be acquired and False otherwise. """ backup_bucket_name = environment.get_value('BACKUP_BUCKET') corpus_fuzzer_name = environment.get_value('CORPUS_FUZZER_NAME_OVERRIDE') # Get GCS backup path. gcs_backup_path = corpus_manager.gcs_url_for_backup_file( backup_bucket_name, corpus_fuzzer_name, fuzzer_name, corpus_manager.LATEST_BACKUP_TIMESTAMP) # Get local backup path. local_backup_name = os.path.basename(gcs_backup_path) local_backup_path = os.path.join(corpus_directory, local_backup_name) # Download latest backup. if not storage.copy_file_from(gcs_backup_path, local_backup_path): logs.log_error('Failed to download corpus from GCS bucket {}.'.format( gcs_backup_path)) return False # Extract corpus from zip file. archive.unpack(local_backup_path, corpus_directory) shell.remove_file(local_backup_path) return True
def create(self): """Configures a emulator process which can subsequently be `run`.""" # Download emulator image. if not environment.get_value('ANDROID_EMULATOR_BUCKET_PATH'): logs.log_error('ANDROID_EMULATOR_BUCKET_PATH is not set.') return temp_directory = environment.get_value('BOT_TMPDIR') archive_src_path = environment.get_value( 'ANDROID_EMULATOR_BUCKET_PATH') archive_dst_path = os.path.join(temp_directory, 'emulator_bundle.zip') storage.copy_file_from(archive_src_path, archive_dst_path) # Extract emulator image. self.emulator_path = os.path.join(temp_directory, 'emulator') archive.unpack(archive_dst_path, self.emulator_path) shell.remove_file(archive_dst_path) # Run emulator. script_path = os.path.join(self.emulator_path, 'run') self.process_runner = new_process.ProcessRunner(script_path)
def _download_mutator_plugin_archive(mutator_plugin_archive): """Downloads the |mutator_plugin_archive| from the mutator plugin storage bucket to the plugin archives directory. Returns the path that the archive was downloaded to.""" file_path = os.path.join(_get_mutator_plugins_archives_dir(), mutator_plugin_archive) url = '%s/%s' % (_get_mutator_plugins_bucket_url(), mutator_plugin_archive) if not storage.copy_file_from(url, file_path): logs.log_error( 'Failed to copy plugin archive from %s to %s' % (url, file_path)) return None return file_path
def _cross_pollinate_other_fuzzer_corpuses(self): """Add other fuzzer corpuses to shared corpus path for cross-pollination.""" corpus_backup_date = utils.utcnow().date() - datetime.timedelta( days=data_types.CORPUS_BACKUP_PUBLIC_LOOKBACK_DAYS) for cross_pollinate_fuzzer in self.cross_pollinate_fuzzers: project_qualified_name = ( cross_pollinate_fuzzer.fuzz_target.project_qualified_name()) backup_bucket_name = cross_pollinate_fuzzer.backup_bucket_name corpus_engine_name = cross_pollinate_fuzzer.corpus_engine_name corpus_backup_url = corpus_manager.gcs_url_for_backup_file( backup_bucket_name, corpus_engine_name, project_qualified_name, corpus_backup_date, ) corpus_backup_local_filename = "%s-%s" % ( project_qualified_name, os.path.basename(corpus_backup_url), ) corpus_backup_local_path = os.path.join( self.shared_corpus_path, corpus_backup_local_filename) if not storage.exists(corpus_backup_url, ignore_errors=True): # This can happen in cases when a new fuzz target is checked in or if # missed to capture a backup for a particular day (for OSS-Fuzz, this # will result in a 403 instead of 404 since that GCS path belongs to # other project). So, just log a warning for debugging purposes only. logs.log_warn("Corpus backup does not exist, ignoring: %s." % corpus_backup_url) continue if not storage.copy_file_from(corpus_backup_url, corpus_backup_local_path): continue corpus_backup_output_directory = os.path.join( self.shared_corpus_path, project_qualified_name) shell.create_directory(corpus_backup_output_directory) result = archive.unpack(corpus_backup_local_path, corpus_backup_output_directory) shell.remove_file(corpus_backup_local_path) if result: logs.log( "Corpus backup url %s successfully unpacked into shared corpus." % corpus_backup_url) else: logs.log_error("Failed to unpack corpus backup from url %s." % corpus_backup_url)
def download_recommended_dictionary_from_gcs(self, local_dict_path): """Download recommended dictionary from GCS to the given location. Args: local_dict_path: Path to a dictionary file on the disk. Returns: A boolean indicating whether downloading succeeded or not. """ # When the fuzz target is initially created or when it has no new # coverage or dictionary recommendations, then we won't have a # recommended dictionary in GCS. if not storage.exists(self.gcs_path): return False if storage.copy_file_from(self.gcs_path, local_dict_path): return True logs.log('Downloading %s failed.' % self.gcs_path) return False
def _unpack_build(base_build_dir, build_dir, build_url, target_weights=None): """Unpacks a build from a build url into the build directory.""" # Track time taken to unpack builds so that it doesn't silently regress. start_time = time.time() # Free up memory. utils.python_gc() # Remove the current build. logs.log('Removing build directory %s.' % build_dir) if not shell.remove_directory(build_dir, recreate=True): logs.log_error('Unable to clear build directory %s.' % build_dir) _handle_unrecoverable_error_on_windows() return False # Decide whether to use cache build archives or not. use_cache = environment.get_value('CACHE_STORE', False) # Download build archive locally. build_local_archive = os.path.join(build_dir, os.path.basename(build_url)) # Make the disk space necessary for the archive available. archive_size = storage.get_download_file_size( build_url, build_local_archive, use_cache=True) if archive_size is not None and not _make_space(archive_size, base_build_dir): shell.clear_data_directories() logs.log_fatal_and_exit( 'Failed to make space for download. ' 'Cleared all data directories to free up space, exiting.') logs.log('Downloading build from url %s.' % build_url) try: storage.copy_file_from(build_url, build_local_archive, use_cache=use_cache) except: logs.log_error('Unable to download build url %s.' % build_url) return False unpack_everything = environment.get_value('UNPACK_ALL_FUZZ_TARGETS_AND_FILES') if not unpack_everything: # For fuzzing, pick a random fuzz target so that we only un-archive that # particular fuzz target and its dependencies and save disk space. # If we are going to unpack everythng in archive based on # |UNPACK_ALL_FUZZ_TARGETS_AND_FILES| in the job defition, then don't set a # random fuzz target before we've unpacked the build. It won't actually save # us anything in this case and can be really expensive for large builds # (such as Chrome OS). Defer setting it until after the build has been # unpacked. _set_random_fuzz_target_for_fuzzing_if_needed( _get_fuzz_targets_from_archive(build_local_archive), target_weights) # Actual list of files to unpack can be smaller if we are only unarchiving # a particular fuzz target. file_match_callback = _get_file_match_callback() assert not (unpack_everything and file_match_callback is not None) if not _make_space_for_build(build_local_archive, base_build_dir, file_match_callback): shell.clear_data_directories() logs.log_fatal_and_exit( 'Failed to make space for build. ' 'Cleared all data directories to free up space, exiting.') # Unpack the local build archive. logs.log('Unpacking build archive %s.' % build_local_archive) trusted = not utils.is_oss_fuzz() try: archive.unpack( build_local_archive, build_dir, trusted=trusted, file_match_callback=file_match_callback) except: logs.log_error('Unable to unpack build archive %s.' % build_local_archive) return False if unpack_everything: # Set a random fuzz target now that the build has been unpacked, if we # didn't set one earlier. _set_random_fuzz_target_for_fuzzing_if_needed( _get_fuzz_targets_from_dir(build_dir), target_weights) # If this is partial build due to selected build files, then mark it as such # so that it is not re-used. if file_match_callback: partial_build_file_path = os.path.join(build_dir, PARTIAL_BUILD_FILE) utils.write_data_to_file('', partial_build_file_path) # No point in keeping the archive around. shell.remove_file(build_local_archive) end_time = time.time() elapsed_time = end_time - start_time log_func = logs.log_warn if elapsed_time > UNPACK_TIME_LIMIT else logs.log log_func('Build took %0.02f minutes to unpack.' % (elapsed_time / 60.)) return True
def update_source_code(): """Updates source code files with latest version from appengine.""" process_handler.cleanup_stale_processes() shell.clear_temp_directory() root_directory = environment.get_value('ROOT_DIR') temp_directory = environment.get_value('BOT_TMPDIR') temp_archive = os.path.join(temp_directory, 'clusterfuzz-source.zip') try: storage.copy_file_from(get_source_url(), temp_archive) except Exception: logs.log_error('Could not retrieve source code archive from url.') return try: file_list = archive.get_file_list(temp_archive) zip_archive = zipfile.ZipFile(temp_archive, 'r') except Exception: logs.log_error('Bad zip file.') return src_directory = os.path.join(root_directory, 'src') output_directory = os.path.dirname(root_directory) error_occurred = False normalized_file_set = set() for filepath in file_list: filename = os.path.basename(filepath) # This file cannot be updated on the fly since it is running as server. if filename == 'adb': continue absolute_filepath = os.path.join(output_directory, filepath) if os.path.altsep: absolute_filepath = absolute_filepath.replace(os.path.altsep, os.path.sep) if os.path.realpath(absolute_filepath) != absolute_filepath: continue normalized_file_set.add(absolute_filepath) try: file_extension = os.path.splitext(filename)[1] # Remove any .so files first before overwriting, as they can be loaded # in the memory of existing processes. Overwriting them directly causes # segfaults in existing processes (e.g. run.py). if file_extension == '.so' and os.path.exists(absolute_filepath): os.remove(absolute_filepath) # On Windows, to update DLLs (and native .pyd extensions), we rename it # first so that we can install the new version. if (environment.platform() == 'WINDOWS' and file_extension in ['.dll', '.pyd'] and os.path.exists(absolute_filepath)): _rename_dll_for_update(absolute_filepath) except Exception: logs.log_error('Failed to remove or move %s before extracting new ' 'version.' % absolute_filepath) try: extracted_path = zip_archive.extract(filepath, output_directory) external_attr = zip_archive.getinfo(filepath).external_attr mode = (external_attr >> 16) & 0o777 mode |= 0o440 os.chmod(extracted_path, mode) except: error_occurred = True logs.log_error( 'Failed to extract file %s from source archive.' % filepath) zip_archive.close() if error_occurred: return clear_pyc_files(src_directory) clear_old_files(src_directory, normalized_file_set) local_manifest_path = os.path.join(root_directory, utils.LOCAL_SOURCE_MANIFEST) source_version = utils.read_data_from_file( local_manifest_path, eval_data=False) logs.log('Source code updated to %s.' % source_version)