def test_filesystem(self): hfile = Hfile(hostname, port, path, mode='w') hfile.close() fs = Hfilesystem(hostname, port) self.assertTrue(fs.exists(path)) self.assertFalse(fs.exists(path + 'doesnotexist')) self.assertTrue(fs.rename(path, path + 'renamed')) self.assertTrue(fs.delete(path + 'renamed')) self.assertFalse(fs.delete(path))
class HdfsBlobStorage(blobstore_stub.BlobStorage): """Storage mechanism for storing blob data on HDFS.""" def __init__(self, server, port, storage_directory, app_id): """Constructor. Args: server: HDFS server address port: HDFS listen port storage_directory: Directory within which to store blobs. app_id: App id to store blobs on behalf of. """ self._server = server self._port = port self._storage_directory = storage_directory self._app_id = app_id self._fs = Hfilesystem(self._server, self._port) @classmethod def _BlobKey(cls, blob_key): """Normalize to instance of BlobKey.""" if not isinstance(blob_key, blobstore.BlobKey): return blobstore.BlobKey(unicode(blob_key)) return blob_key def _DirectoryForBlob(self, blob_key): """Determine which directory where a blob is stored. Each blob gets written to a directory underneath the storage objects storage directory based on the blobs kind, app-id and first character of its name. So blobs with blob-keys: _ACFDEDG _MNOPQRS _RSTUVWX Are stored in: <storage-dir>/blob/myapp/A <storage-dir>/blob/myapp/M <storage-dir>/R Args: blob_key: Blob key to determine directory for. Returns: Directory relative to this objects storage directory to where blob is stored or should be stored. """ blob_key = self._BlobKey(blob_key) return os.path.join(self._storage_directory, self._app_id, str(blob_key)[1]) def _FileForBlob(self, blob_key): """Calculate full filename to store blob contents in. This method does not check to see if the file actually exists. Args: blob_key: Blob key of blob to calculate file for. Returns: Complete path for file used for storing blob. """ blob_key = self._BlobKey(blob_key) return os.path.join(self._DirectoryForBlob(blob_key), str(blob_key)[1:]) def StoreBlob(self, blob_key, blob_stream): """Store blob stream to disk. Args: blob_key: Blob key of blob to store. blob_stream: Stream or stream-like object that will generate blob content. """ logging.debug('storing blob with key %s' %blob_key) blob_key = self._BlobKey(blob_key) blob_directory = self._DirectoryForBlob(blob_key) if not self._fs.exists(blob_directory): self._fs.mkdir(blob_directory) blob_file = self._FileForBlob(blob_key) hdfs_file = Hfile(self._server, self._port, blob_file, mode='w') try: while True: block = blob_stream.read(1 << 20) if not block: break hdfs_file.write(block) finally: hdfs_file.close() def OpenBlob(self, blob_key): """Open blob file for streaming. Args: blob_key: Blob-key of existing blob to open for reading. Returns: Open file stream for reading blob from disk. """ logging.debug('reading blob with key %s' %blob_key) return Hfile(self._server, self._port, self._FileForBlob(blob_key), mode='r') def DeleteBlob(self, blob_key): """Delete blob data from disk. Deleting an unknown blob will not raise an error. Args: blob_key: Blob-key of existing blob to delete. """ logging.debug('deleting blob with key %s' %blob_key) self._fs.delete(self._FileForBlob(blob_key))
class HdfsBlobStorage(blobstore_stub.BlobStorage): """Storage mechanism for storing blob data on HDFS.""" def __init__(self, server, port, storage_directory, app_id): """Constructor. Args: server: HDFS server address port: HDFS listen port storage_directory: Directory within which to store blobs. app_id: App id to store blobs on behalf of. """ self._server = server self._port = port self._storage_directory = storage_directory self._app_id = app_id self._fs = Hfilesystem(self._server, self._port) @classmethod def _BlobKey(cls, blob_key): """Normalize to instance of BlobKey.""" if not isinstance(blob_key, blobstore.BlobKey): return blobstore.BlobKey(unicode(blob_key)) return blob_key def _DirectoryForBlob(self, blob_key): """Determine which directory where a blob is stored. Each blob gets written to a directory underneath the storage objects storage directory based on the blobs kind, app-id and first character of its name. So blobs with blob-keys: _ACFDEDG _MNOPQRS _RSTUVWX Are stored in: <storage-dir>/blob/myapp/A <storage-dir>/blob/myapp/M <storage-dir>/R Args: blob_key: Blob key to determine directory for. Returns: Directory relative to this objects storage directory to where blob is stored or should be stored. """ blob_key = self._BlobKey(blob_key) return os.path.join(self._storage_directory, self._app_id, str(blob_key)[1]) def _FileForBlob(self, blob_key): """Calculate full filename to store blob contents in. This method does not check to see if the file actually exists. Args: blob_key: Blob key of blob to calculate file for. Returns: Complete path for file used for storing blob. """ blob_key = self._BlobKey(blob_key) return os.path.join(self._DirectoryForBlob(blob_key), str(blob_key)[1:]) def StoreBlob(self, blob_key, blob_stream): """Store blob stream to disk. Args: blob_key: Blob key of blob to store. blob_stream: Stream or stream-like object that will generate blob content. """ logging.debug('storing blob with key %s' % blob_key) blob_key = self._BlobKey(blob_key) blob_directory = self._DirectoryForBlob(blob_key) if not self._fs.exists(blob_directory): self._fs.mkdir(blob_directory) blob_file = self._FileForBlob(blob_key) hdfs_file = Hfile(self._server, self._port, blob_file, mode='w') try: while True: block = blob_stream.read(1 << 20) if not block: break hdfs_file.write(block) finally: hdfs_file.close() def OpenBlob(self, blob_key): """Open blob file for streaming. Args: blob_key: Blob-key of existing blob to open for reading. Returns: Open file stream for reading blob from disk. """ logging.debug('reading blob with key %s' % blob_key) return Hfile(self._server, self._port, self._FileForBlob(blob_key), mode='r') def DeleteBlob(self, blob_key): """Delete blob data from disk. Deleting an unknown blob will not raise an error. Args: blob_key: Blob-key of existing blob to delete. """ logging.debug('deleting blob with key %s' % blob_key) self._fs.delete(self._FileForBlob(blob_key))