def upload_model_to_gcs(model_directory, fuzzer_name): """Upload the latest model to GCS bucket. There might be multiple intermediate models saved during training. This function will upload the latest one to GCS bucket. Args: model_directory: The directory to save intermediate models during training. fuzzer_name: The fuzzer the model is trained for. """ # Get latest valid model. model_paths = get_last_saved_model(model_directory) if not model_paths: logs.log_error('No valid RNN model is saved during training.') return latest_meta_file = model_paths['meta'] latest_data_file = model_paths['data'] latest_index_file = model_paths['index'] # Get GCS model path. gcs_model_directory = get_gcs_model_directory(fuzzer_name) if not gcs_model_directory: logs.log_error('Failed to upload model: cannot get GCS model bucket.') return # Basename of model files. meta_file_name = constants.RNN_MODEL_NAME + constants.MODEL_META_SUFFIX data_file_name = constants.RNN_MODEL_NAME + constants.MODEL_DATA_SUFFIX index_file_name = constants.RNN_MODEL_NAME + constants.MODEL_INDEX_SUFFIX gcs_meta_path = '%s/%s' % (gcs_model_directory, meta_file_name) gcs_data_path = '%s/%s' % (gcs_model_directory, data_file_name) gcs_index_path = '%s/%s' % (gcs_model_directory, index_file_name) logs.log('Uploading the model for %s: %s, %s, %s.' % (fuzzer_name, meta_file_name, data_file_name, index_file_name)) # Upload files to GCS. result = (storage.copy_file_to(latest_meta_file, gcs_meta_path) and storage.copy_file_to(latest_data_file, gcs_data_path) and storage.copy_file_to(latest_index_file, gcs_index_path)) if result: logs.log('Uploaded ML RNN model for fuzzer %s.' % fuzzer_name) else: logs.log_error('Failed to upload ML RNN model for fuzzer %s.' % fuzzer_name)
def _store_testcase_for_regression_testing(testcase, testcase_file_path): """Stores reproduction testcase for future regression testing in corpus pruning task.""" if testcase.open: # Store testcase only after the crash is fixed. return if not testcase.bug_information: # Only store crashes with bugs associated with them. return fuzz_target = data_handler.get_fuzz_target(testcase.overridden_fuzzer_name) if not fuzz_target: # No work to do, only applicable for engine fuzzers. return corpus = corpus_manager.FuzzTargetCorpus( fuzz_target.engine, fuzz_target.project_qualified_name()) regression_testcase_url = os.path.join( corpus.get_regressions_corpus_gcs_url(), utils.file_hash(testcase_file_path)) if storage.copy_file_to(testcase_file_path, regression_testcase_url): logs.log('Successfully stored testcase for regression testing: ' + regression_testcase_url) else: logs.log_error('Failed to store testcase for regression testing: ' + regression_testcase_url)
def backup_corpus(backup_bucket_name, corpus, directory): """Archive and store corpus as a backup. Args: backup_bucket_name: Backup bucket. corpus: The FuzzTargetCorpus. directory: Path to directory to be archived and backuped. Returns: The backup GCS url, or None on failure. """ if not backup_bucket_name: logs.log('No backup bucket provided, skipping corpus backup.') return None dated_backup_url = None timestamp = str(utils.utcnow().date()) # The archive path for shutil.make_archive should be without an extension. backup_archive_path = os.path.join( os.path.dirname(os.path.normpath(directory)), timestamp) try: backup_archive_path = shutil.make_archive(backup_archive_path, BACKUP_ARCHIVE_FORMAT, directory) logs.log('Created corpus backup file.', backup_archive_path=backup_archive_path, directory=directory, size=os.path.getsize(backup_archive_path)) dated_backup_url = gcs_url_for_backup_file( backup_bucket_name, corpus.engine, corpus.project_qualified_target_name, timestamp) if not storage.copy_file_to(backup_archive_path, dated_backup_url): return None latest_backup_url = gcs_url_for_backup_file( backup_bucket_name, corpus.engine, corpus.project_qualified_target_name, LATEST_BACKUP_TIMESTAMP) if not storage.copy_blob(dated_backup_url, latest_backup_url): logs.log_error('Failed to update latest corpus backup at "%s"' % latest_backup_url) except Exception as ex: logs.log_error('backup_corpus failed: %s\n' % str(ex), backup_bucket_name=backup_bucket_name, directory=directory, backup_archive_path=backup_archive_path) finally: # Remove backup archive. shell.remove_file(backup_archive_path) return dated_backup_url
def _upload_kernel_coverage_data(kcov_path, kernel_bid): """Upload kcov data to a cloud storage bucket.""" bucket_name = local_config.ProjectConfig().get('coverage.reports.bucket') if not bucket_name: return formatted_date = str(utils.utcnow().date().isoformat()) identifier = environment.get_value('BOT_NAME') + str( utils.utcnow().isoformat()) gcs_url = (f'gs://{bucket_name}/syzkaller/{formatted_date}/{kernel_bid}/' f'{identifier}') if storage.copy_file_to(kcov_path, gcs_url): logs.log(f'Copied kcov data to {gcs_url}.')
def upload_model_to_gcs(model_directory, fuzzer_name): """ Upload the entire model directory to GCS bucket. Note that metadata stored within the model directory is needed for the generation scripts. Args: model_directory (str): models/[architecture]/[run-name]. fuzzer_name (str): The fuzzer the model is trained for. Returns: True if corpus can be acquired and False otherwise. """ # Get GCS model path. gcs_model_directory = ml_train_utils.get_gcs_model_directory( run_constants.GRADIENTFUZZ_DIR, fuzzer_name) if not gcs_model_directory: logs.log_error('Failed to upload model: cannot get GCS model bucket.') return # Zip entire model directory and upload. model_dir_name = os.path.basename(model_directory) zipped_dir = shutil.make_archive(model_dir_name, 'zip', model_directory) gcs_model_path = f'{gcs_model_directory}/{zipped_dir}' logs.log(f'Uploading the model for fuzzer {fuzzer_name} and run' + f'{model_dir_name} to {gcs_model_path}.') # Upload files to GCS. result = storage.copy_file_to(zipped_dir, gcs_model_path) if result: logs.log(f'Uploaded GradientFuzz model {model_dir_name} for fuzzer' + f'{fuzzer_name}.') else: logs.log_error(f'Failed to upload GradientFuzz model {model_dir_name} ' + f'for fuzzer {fuzzer_name}.')