def get(self): """Handle a get request.""" gcs_path = request.args.get('path', '') if not gcs_path: raise helpers.EarlyExitException('No path provided.', 400) if storage.get(gcs_path): host_url = storage.OBJECT_URL else: host_url = storage.DIRECTORY_URL bucket_name, object_path = storage.get_bucket_name_and_path(gcs_path) return self.redirect(host_url + '/' + bucket_name + '/' + object_path)
def _limit_corpus_size(corpus_url, size_limit): """Limit number of files in a corpus url.""" files_list = list(storage.list_blobs(corpus_url)) corpus_size = len(files_list) if corpus_size <= size_limit: # Corpus directory size is within limit, no more work to do. return logs.log( 'Limit corpus at {corpus_url} from {corpus_size} to {size_limit}.'.format( corpus_url=corpus_url, corpus_size=corpus_size, size_limit=size_limit)) files_to_delete = random.sample(files_list, corpus_size - size_limit) bucket, _ = storage.get_bucket_name_and_path(corpus_url) for file_to_delete in files_to_delete: path_to_delete = storage.get_cloud_storage_file_path(bucket, file_to_delete) storage.delete(path_to_delete)
def _sort_build_urls_by_revision(build_urls, bucket_path, reverse): """Return a sorted list of build url by revision.""" base_url = os.path.dirname(bucket_path) file_pattern = os.path.basename(bucket_path) filename_by_revision_dict = {} _, base_path = storage.get_bucket_name_and_path(base_url) base_path_with_seperator = base_path + '/' if base_path else '' for build_url in build_urls: match_pattern = '{base_path_with_seperator}({file_pattern})'.format( base_path_with_seperator=base_path_with_seperator, file_pattern=file_pattern) match = re.match(match_pattern, build_url) if match: filename = match.group(1) revision = match.group(2) # Ensure that there are no duplicate revisions. if revision in filename_by_revision_dict: job_name = environment.get_value('JOB_NAME') raise errors.BadStateError( 'Found duplicate revision %s when processing bucket. ' 'Bucket path is probably malformed for job %s.' % (revision, job_name)) filename_by_revision_dict[revision] = filename try: sorted_revisions = sorted( filename_by_revision_dict, reverse=reverse, key=lambda x: map(int, x.split('.'))) except: logs.log_warn( 'Revision pattern is not an integer, falling back to string sort.') sorted_revisions = sorted(filename_by_revision_dict, reverse=reverse) sorted_build_urls = [] for revision in sorted_revisions: filename = filename_by_revision_dict[revision] sorted_build_urls.append('%s/%s' % (base_url, filename)) return sorted_build_urls
def _limit_corpus_size(corpus_url): """Limit number of files and size of a corpus.""" corpus_count = 0 corpus_size = 0 deleted_corpus_count = 0 bucket, _ = storage.get_bucket_name_and_path(corpus_url) for corpus_file in storage.get_blobs(corpus_url): corpus_count += 1 corpus_size += corpus_file['size'] if (corpus_count > CORPUS_FILES_LIMIT_FOR_FAILURES or corpus_size > CORPUS_SIZE_LIMIT_FOR_FAILURES): path_to_delete = storage.get_cloud_storage_file_path( bucket, corpus_file['name']) storage.delete(path_to_delete) deleted_corpus_count += 1 if deleted_corpus_count: logs.log('Removed %d files from oversized corpus: %s.' % (deleted_corpus_count, corpus_url))