def get_blob_key(create_file_name): """Get a blob key for finalized blobstore file. Args: create_file_name: Writable blobstore filename as obtained from create() function. The file should be finalized. Returns: An instance of apphosting.ext.blobstore.BlobKey for corresponding blob or None if the blob referred to by the file name is not finalized. Raises: google.appengine.api.files.InvalidFileNameError if the file name is not a valid nonfinalized blob file name. """ if not create_file_name: raise files.InvalidArgumentError('Empty file name') if not isinstance(create_file_name, six.string_types): raise files.InvalidArgumentError('Expected string for file name') if not create_file_name.startswith(_BLOBSTORE_DIRECTORY): raise files.InvalidFileNameError( 'Filename %s passed to get_blob_key doesn\'t have prefix %s' % (create_file_name, _BLOBSTORE_DIRECTORY)) ticket = create_file_name[len(_BLOBSTORE_DIRECTORY):] if not ticket.startswith(files._CREATION_HANDLE_PREFIX): return blobstore.BlobKey(ticket) blob_file_index = datastore.Get([ datastore.Key.from_path(_BLOB_FILE_INDEX_KIND, _get_blob_file_index_key_name(ticket), namespace='') ])[0] if blob_file_index: blob_key_str = blob_file_index[_BLOB_KEY_PROPERTY_NAME] results = datastore.Get([ datastore.Key.from_path(blobstore.BLOB_INFO_KIND, blob_key_str, namespace='') ]) if results[0] is None: return None elif len(ticket) >= _DATASTORE_MAX_PROPERTY_SIZE: return None else: query = datastore.Query(blobstore.BLOB_INFO_KIND, {'creation_handle =': ticket}, keys_only=True, namespace='') results = query.Get(1) if not results: return None blob_key_str = results[0].name() return blobstore.BlobKey(blob_key_str)
def parseGlob(filename): """Parse a Gs filename or a filename pattern. Handle escape of '*' and '/'. Args: filename: a filename or filename pattern. filename must be a valid gs filepath in the format of '/gs/bucket/filename'. filename pattern has format '/gs/bucket/prefix*'. filename pattern represents filenames with the given prefix in the bucket. Please escape '*' and '\' with '\' if your filename contains them. We recommend using Python raw string to simplify escape expressions. Returns: A (string, string) tuple if filename is a pattern. The first string is the bucket name, second is the prefix or '' if prefix doesn't exist. Properly escaped filename if filename is not a pattern. example '/gs/bucket1/file1' => '/gs/bucket1/file1' '/gs/bucket2/*' => ('gs/bucket2', '') all files under bucket2 '/gs/bucket3/p*' => ('gs/bucket2', 'p') files under bucket3 with a prefix 'p' in its name r'/gs/bucket/file\*' => '/gs/bucket/file*' r'/gs/bucket/file\\*' => ('/gs/bucket', r'file\') all files under bucket with prefix r'file\' r'/gs/bucket/file\\\*' => '/gs/bucket/file\*' r'/gs/bucket/file\**' => ('/gs/bucket', 'file*') all files under bucket with prefix 'file*' Raises: google.appengine.api.files.InvalidFileNameError if filename is illegal. """ if not filename: raise files.InvalidFileNameError('filename is None.') if not isinstance(filename, basestring): raise files.InvalidFileNameError( 'filename %s should be of type string' % filename) match = _GS_FILEPATH_REGEX.match(filename) if not match: raise files.InvalidFileNameError( 'filename %s should start with/gs/bucketname', filename) bucketname = match.group(0) rest = filename[len(bucketname):] if not rest or (len(rest) == 1 and rest[0] == '/'): return bucketname, '' if not rest.startswith('/'): raise files.InvalidFileNameError( 'Expect / to separate bucketname and filename in %s' % filename) i = 1 prefix = False processed = '' while i < len(rest): char = rest[i] if char == '\\': if i + 1 == len(rest): processed += char else: processed += rest[i + 1] i += 1 elif char == '*': if i + 1 != len(rest): raise files.InvalidFileNameError( '* as a wildcard is not the last.') prefix = True else: processed += char i += 1 if prefix: return bucketname, processed else: return bucketname + '/' + processed
def listdir(path, kwargs=None): """Return a sorted list of filenames (matching a pattern) in the given path. Sorting (decrease by string) is done automatically by Google Cloud Storage. Args: path: a Google Cloud Storage path of "/gs/bucketname" form. kwargs: other keyword arguments to be relayed to Google Cloud Storage. This can be used to select certain files with names matching a pattern. Supported keywords: marker: a string after which (exclusive) to start listing. max_keys: the maximum number of filenames to return. prefix: limits the returned filenames to those with this prefix. no regex. See Google Cloud Storage documentation for more details and examples. https://developers.google.com/storage/docs/reference-methods#getbucket Returns: a sorted list containing filenames (matching a pattern) from the given path. The last filename can be used as a marker for another request for more files. """ if not path: raise files.InvalidFileNameError('Empty path') elif not isinstance(path, basestring): raise files.InvalidFileNameError('Expected string for path %s' % path) elif not _GS_BUCKETPATH_REGEX.match(path): raise files.InvalidFileNameError( 'Google storage path must have the form /gs/bucketname') if kwargs and kwargs.has_key('max_keys'): kwargs['max-keys'] = kwargs['max_keys'] kwargs.pop('max_keys') if ('SERVER_SOFTWARE' not in os.environ or os.environ['SERVER_SOFTWARE'].startswith('Development')): return _listdir_local(path, kwargs) bucketname = path[len(_GS_PREFIX):] request_headers = { 'Authorization': 'OAuth %s' % app_identity.get_access_token(_GS_RESTFUL_SCOPE_READ_ONLY)[0], 'x-goog-api-version': _GS_RESTFUL_API_VERSION } url = 'https://%s/%s' % (_GS_RESTFUL_URL, bucketname) if kwargs: url += '/?' + urlencode(kwargs) response = urlfetch.fetch(url=url, headers=request_headers, deadline=60) if response.status_code == 404: raise files.InvalidFileNameError('Bucket %s does not exist.' % bucketname) elif response.status_code == 401: raise files.PermissionDeniedError( 'Permission denied to read bucket %s.' % bucketname) dom = minidom.parseString(response.content) def __textValue(node): return node.firstChild.nodeValue error = dom.getElementsByTagName('Error') if len(error) == 1: details = error[0].getElementsByTagName('Details') if len(details) == 1: raise files.InvalidParameterError(__textValue(details[0])) else: code = __textValue(error[0].getElementsByTagName('Code')[0]) msg = __textValue(error[0].getElementsByTagName('Message')[0]) raise files.InvalidParameterError('%s: %s' % (code, msg)) return [ '/'.join([path, __textValue(key)]) for key in dom.getElementsByTagName('Key') ]