def post(self): """Handle a post request.""" name = self.request.get('name') if not name: raise helpers.EarlyExitException('Please give this corpus a name!', 400) if not data_types.DataBundle.VALID_NAME_REGEX.match(name): raise helpers.EarlyExitException( 'Name can only contain letters, numbers, dashes and underscores.', 400) user_email = helpers.get_user_email() bucket_name = data_handler.get_data_bundle_bucket_name(name) bucket_url = data_handler.get_data_bundle_bucket_url(name) is_local = not self.request.get('nfs', False) if not data_handler.create_data_bundle_bucket_and_iams( name, [user_email]): raise helpers.EarlyExitException( 'Failed to create bucket %s.' % bucket_name, 400) data_bundle = data_types.DataBundle.query( data_types.DataBundle.name == name).get() if not data_bundle: data_bundle = data_types.DataBundle() data_bundle.name = name data_bundle.bucket_name = bucket_name data_bundle.is_local = is_local data_bundle.put() template_values = { 'title': 'Success', 'message': ('Upload data to the corpus using: ' 'gsutil -d -m rsync -r <local_corpus_directory> %s' % bucket_url), } self.render('message.html', template_values)
def _is_data_bundle_up_to_date(data_bundle, data_bundle_directory): """Return true if the data bundle is up to date, false otherwise.""" sync_file_path = _get_data_bundle_sync_file_path(data_bundle_directory) if environment.is_trusted_host() and data_bundle.sync_to_worker: from bot.untrusted_runner import file_host worker_sync_file_path = file_host.rebase_to_worker_root(sync_file_path) shell.remove_file(sync_file_path) file_host.copy_file_from_worker(worker_sync_file_path, sync_file_path) if not os.path.exists(sync_file_path): return False last_sync_time = datetime.datetime.utcfromtimestamp( utils.read_data_from_file(sync_file_path)) # Check if we recently synced. if not dates.time_has_expired( last_sync_time, seconds=_DATA_BUNDLE_SYNC_INTERVAL_IN_SECONDS): return True # For search index data bundle, we don't sync them from bucket. Instead, we # rely on the fuzzer to generate testcases periodically. if _is_search_index_data_bundle(data_bundle.name): return False # Check when the bucket url had last updates. If no new updates, no need to # update directory. bucket_url = data_handler.get_data_bundle_bucket_url(data_bundle.name) last_updated_time = storage.last_updated(bucket_url) if last_updated_time and last_sync_time > last_updated_time: logs.log("Data bundle %s has no new content from last sync." % data_bundle.name) return True return False
def update_data_bundle(fuzzer, data_bundle): """Updates a data bundle to the latest version.""" # This module can't be in the global imports due to appengine issues # with multiprocessing and psutil imports. from google_cloud_utils import gsutil # If we are using a data bundle on NFS, it is expected that our testcases # will usually be large enough that we would fill up our tmpfs directory # pretty quickly. So, change it to use an on-disk directory. if not data_bundle.is_local: testcase_disk_directory = environment.get_value('FUZZ_INPUTS_DISK') environment.set_value('FUZZ_INPUTS', testcase_disk_directory) data_bundle_directory = get_data_bundle_directory(fuzzer.name) if not data_bundle_directory: logs.log_error('Failed to setup data bundle %s.' % data_bundle.name) return False if not shell.create_directory(data_bundle_directory, create_intermediates=True): logs.log_error('Failed to create data bundle %s directory.' % data_bundle.name) return False # Check if data bundle is up to date. If yes, skip the update. if _is_data_bundle_up_to_date(data_bundle, data_bundle_directory): logs.log('Data bundle was recently synced, skip.') return True # Fetch lock for this data bundle. if not _fetch_lock_for_data_bundle_update(data_bundle): logs.log_error('Failed to lock data bundle %s.' % data_bundle.name) return False # Re-check if another bot did the sync already. If yes, skip. if _is_data_bundle_up_to_date(data_bundle, data_bundle_directory): logs.log('Another bot finished the sync, skip.') _release_lock_for_data_bundle_update(data_bundle) return True time_before_sync_start = time.time() # No need to sync anything if this is a search index data bundle. In that # case, the fuzzer will generate testcases from a gcs bucket periodically. if not _is_search_index_data_bundle(data_bundle.name): bucket_url = data_handler.get_data_bundle_bucket_url(data_bundle.name) if environment.is_trusted_host() and data_bundle.sync_to_worker: from bot.untrusted_runner import corpus_manager from bot.untrusted_runner import file_host worker_data_bundle_directory = file_host.rebase_to_worker_root( data_bundle_directory) file_host.create_directory(worker_data_bundle_directory, create_intermediates=True) result = corpus_manager.RemoteGSUtilRunner().rsync( bucket_url, worker_data_bundle_directory, delete=False) else: result = gsutil.GSUtilRunner().rsync(bucket_url, data_bundle_directory, delete=False) if result.return_code != 0: logs.log_error('Failed to sync data bundle %s: %s.' % (data_bundle.name, result.output)) _release_lock_for_data_bundle_update(data_bundle) return False # Update the testcase list file. testcase_manager.create_testcase_list_file(data_bundle_directory) # Write last synced time in the sync file. sync_file_path = _get_data_bundle_sync_file_path(data_bundle_directory) utils.write_data_to_file(time_before_sync_start, sync_file_path) if environment.is_trusted_host() and data_bundle.sync_to_worker: from bot.untrusted_runner import file_host worker_sync_file_path = file_host.rebase_to_worker_root(sync_file_path) file_host.copy_file_to_worker(sync_file_path, worker_sync_file_path) # Release acquired lock. _release_lock_for_data_bundle_update(data_bundle) return True