class ThreadedIOBase(IOBase): def __init__(self, *, config: Config, name: str, module_configuration: ConfigDict, url: str, block_size: int) -> None: super().__init__( config=config, name=name, module_configuration=module_configuration, url=url, block_size=block_size) self._simultaneous_reads = config.get_from_dict(module_configuration, 'simultaneousReads', types=int) self._simultaneous_writes = config.get_from_dict(module_configuration, 'simultaneousWrites', types=int) self._read_executor: Optional[JobExecutor] = None self._write_executor: Optional[JobExecutor] = None def open_r(self) -> None: self._read_executor = JobExecutor(name='IO-Read', workers=self._simultaneous_reads, blocking_submit=False) @abstractmethod def _read(self, block: DereferencedBlock) -> Tuple[DereferencedBlock, bytes]: raise NotImplementedError() def read(self, block: Union[DereferencedBlock, Block]) -> None: block_deref = block.deref() if isinstance(block, Block) else block def job(): return self._read(block_deref) assert self._read_executor is not None self._read_executor.submit(job) def read_sync(self, block: Union[DereferencedBlock, Block]) -> bytes: block_deref = block.deref() if isinstance(block, Block) else block return self._read(block_deref)[1] def read_get_completed( self, timeout: Optional[int] = None) -> Iterator[Union[Tuple[DereferencedBlock, bytes], BaseException]]: assert self._read_executor is not None return self._read_executor.get_completed(timeout=timeout) def open_w(self, size: int, force: bool = False, sparse: bool = False) -> None: self._write_executor = JobExecutor(name='IO-Write', workers=self._simultaneous_writes, blocking_submit=True) def write(self, block: DereferencedBlock, data: bytes) -> None: def job(): return self._write(block, data) assert self._write_executor is not None self._write_executor.submit(job) def write_sync(self, block: DereferencedBlock, data: bytes) -> None: self._write(block, data) def write_get_completed(self, timeout: Optional[int] = None) -> Iterator[Union[DereferencedBlock, BaseException]]: assert self._write_executor is not None return self._write_executor.get_completed(timeout=timeout) @abstractmethod def _write(self, block: DereferencedBlock, data: bytes) -> DereferencedBlock: raise NotImplementedError() def close(self) -> None: if self._read_executor: self._read_executor.shutdown() if self._write_executor: self._write_executor.shutdown()
class StorageBase(ReprMixIn, metaclass=ABCMeta): _CHECKSUM_KEY = 'checksum' _CREATED_KEY = 'created' _MODIFIED_KEY = 'modified' _HMAC_KEY = 'hmac' _METADATA_VERSION_KEY = 'metadata_version' _OBJECT_SIZE_KEY = 'object_size' _SIZE_KEY = 'size' _TRANSFORMS_KEY = 'transforms' _META_SUFFIX = '.meta' def __init__(self, *, config: Config, name: str, storage_id: int, module_configuration: ConfigDict) -> None: self._name = name self._storage_id = storage_id self._active_transforms: List[TransformBase] = [] active_transforms = Config.get_from_dict(module_configuration, 'activeTransforms', None, types=list) if active_transforms is not None: for transform in active_transforms: self._active_transforms.append(TransformFactory.get_by_name(transform)) logger.info('Active transforms for storage {}: {}.'.format( name, ', '.join( ['{} ({})'.format(transform.name, transform.module) for transform in self._active_transforms]))) simultaneous_writes = Config.get_from_dict(module_configuration, 'simultaneousWrites', types=int) simultaneous_reads = Config.get_from_dict(module_configuration, 'simultaneousReads', types=int) simultaneous_removals = Config.get_from_dict(module_configuration, 'simultaneousRemovals', types=int) bandwidth_read = Config.get_from_dict(module_configuration, 'bandwidthRead', types=int) bandwidth_write = Config.get_from_dict(module_configuration, 'bandwidthWrite', types=int) self._consistency_check_writes = Config.get_from_dict( module_configuration, 'consistencyCheckWrites', False, types=bool) hmac_key_encoded = Config.get_from_dict(module_configuration, 'hmac.key', None, types=str) hmac_key: Optional[bytes] = None if hmac_key_encoded is None: hmac_password = Config.get_from_dict(module_configuration, 'hmac.password', None, types=str) if hmac_password is not None: hmac_kdf_salt = base64.b64decode(Config.get_from_dict(module_configuration, 'hmac.kdfSalt', types=str)) hmac_kdf_iterations = Config.get_from_dict(module_configuration, 'hmac.kdfIterations', types=int) hmac_key = derive_key( salt=hmac_kdf_salt, iterations=hmac_kdf_iterations, key_length=32, password=hmac_password) else: hmac_key = base64.b64decode(hmac_key_encoded) self._dict_hmac: Optional[DictHMAC] = None if hmac_key is not None: logger.info('Enabling HMAC object metadata integrity protection for storage {}.'.format(name)) self._dict_hmac = DictHMAC(hmac_key=self._HMAC_KEY, secret_key=hmac_key) self.read_throttling = TokenBucket() self.read_throttling.set_rate(bandwidth_read) # 0 disables throttling self.write_throttling = TokenBucket() self.write_throttling.set_rate(bandwidth_write) # 0 disables throttling self._read_executor = JobExecutor(name='Storage-Read', workers=simultaneous_reads, blocking_submit=False) self._write_executor = JobExecutor(name='Storage-Write', workers=simultaneous_writes, blocking_submit=True) self._remove_executor = JobExecutor(name='Storage-Remove', workers=simultaneous_removals, blocking_submit=True) @property def name(self) -> str: return self._name @property def storage_id(self) -> int: return self._storage_id def _build_metadata(self, *, size: int, object_size: int, transforms_metadata: List[Dict] = None, checksum: str = None) -> Tuple[Dict, bytes]: timestamp = datetime.datetime.utcnow().isoformat(timespec='microseconds') metadata: Dict = { self._CREATED_KEY: timestamp, self._METADATA_VERSION_KEY: str(VERSIONS.object_metadata.current), self._MODIFIED_KEY: timestamp, self._OBJECT_SIZE_KEY: object_size, self._SIZE_KEY: size, } if checksum: metadata[self._CHECKSUM_KEY] = checksum if transforms_metadata: metadata[self._TRANSFORMS_KEY] = transforms_metadata if self._dict_hmac: self._dict_hmac.add_digest(metadata) return metadata, json.dumps(metadata, separators=(',', ':')).encode('utf-8') def _decode_metadata(self, *, metadata_json: bytes, key: str, data_length: int) -> Dict: metadata = json.loads(metadata_json.decode('utf-8')) if self._dict_hmac: self._dict_hmac.verify_digest(metadata) # We currently support only one object metadata version if self._METADATA_VERSION_KEY not in metadata: raise KeyError('Required object metadata key {} is missing for object {}.'.format( self._METADATA_VERSION_KEY, key)) version_obj = semantic_version.Version(metadata[self._METADATA_VERSION_KEY]) if version_obj not in VERSIONS.object_metadata.supported: raise ValueError('Unsupported object metadata version: "{}".'.format(str(version_obj))) for required_key in [self._CREATED_KEY, self._MODIFIED_KEY, self._OBJECT_SIZE_KEY, self._SIZE_KEY]: if required_key not in metadata: raise KeyError('Required object metadata key {} is missing for object {}.'.format(required_key, key)) if data_length != metadata[self._OBJECT_SIZE_KEY]: raise ValueError('Length mismatch for object {}. Expected: {}, got: {}.'.format( key, metadata[self._OBJECT_SIZE_KEY], data_length)) return metadata def _check_write(self, *, key: str, metadata_key: str, data_expected: bytes) -> None: data_actual = self._read_object(key) metadata_actual_json = self._read_object(metadata_key) # Return value is ignored self._decode_metadata(metadata_json=metadata_actual_json, key=key, data_length=len(data_actual)) # Comparing encapsulated data here if data_expected != data_actual: raise ValueError('Written and read data of {} differ.'.format(key)) def _write(self, block: DereferencedBlock, data: bytes) -> DereferencedBlock: data, transforms_metadata = self._encapsulate(data) metadata, metadata_json = self._build_metadata( size=block.size, object_size=len(data), checksum=block.checksum, transforms_metadata=transforms_metadata) key = block.uid.storage_object_to_path() metadata_key = key + self._META_SUFFIX time.sleep(self.write_throttling.consume(len(data) + len(metadata_json))) t1 = time.time() try: self._write_object(key, data) self._write_object(metadata_key, metadata_json) except: try: self._rm_object(key) self._rm_object(metadata_key) except FileNotFoundError: pass raise t2 = time.time() logger.debug('{} wrote data of uid {} in {:.2f}s'.format(threading.current_thread().name, block.uid, t2 - t1)) if self._consistency_check_writes: try: self._check_write(key=key, metadata_key=metadata_key, data_expected=data) except (KeyError, ValueError) as exception: raise InvalidBlockException('Check write of block {} (UID {}) failed.'.format(block.id, block.uid), block) from exception return block def write_block_async(self, block: Union[DereferencedBlock, Block], data: bytes) -> None: block_deref = block.deref() if isinstance(block, Block) else block def job(): return self._write(block_deref, data) self._write_executor.submit(job) def write_block(self, block: Union[DereferencedBlock, Block], data: bytes) -> None: block_deref = block.deref() if isinstance(block, Block) else block self._write(block_deref, data) def write_get_completed(self, timeout: int = None) -> Iterator[Union[DereferencedBlock, BaseException]]: return self._write_executor.get_completed(timeout=timeout) def _read(self, block: DereferencedBlock, metadata_only: bool) -> Tuple[DereferencedBlock, Optional[bytes], Dict]: key = block.uid.storage_object_to_path() metadata_key = key + self._META_SUFFIX data: Optional[bytes] = None try: t1 = time.time() if not metadata_only: data = self._read_object(key) data_length = len(data) else: data_length = self._read_object_length(key) metadata_json = self._read_object(metadata_key) time.sleep(self.read_throttling.consume(len(data) if data else 0 + len(metadata_json))) t2 = time.time() except FileNotFoundError as exception: raise InvalidBlockException( 'Object metadata or data of block {} (UID{}) not found.'.format(block.id, block.uid), block) from exception try: metadata = self._decode_metadata(metadata_json=metadata_json, key=key, data_length=data_length) except (KeyError, ValueError) as exception: raise InvalidBlockException('Object metadata of block {} (UID{}) is invalid.'.format(block.id, block.uid), block) from exception if self._CHECKSUM_KEY not in metadata: raise InvalidBlockException( 'Required object metadata key {} is missing for block {} (UID {}).'.format( self._CHECKSUM_KEY, block.id, block.uid), block) if not metadata_only and self._TRANSFORMS_KEY in metadata: data = self._decapsulate(data, metadata[self._TRANSFORMS_KEY]) # type: ignore logger.debug('{} read data of uid {} in {:.2f}s{}'.format(threading.current_thread().name, block.uid, t2 - t1, ' (metadata only)' if metadata_only else '')) return block, data, metadata def read_block_async(self, block: Block, metadata_only: bool = False) -> None: def job(): return self._read(block.deref(), metadata_only) self._read_executor.submit(job) def read_block(self, block: Block, metadata_only: bool = False) -> Optional[bytes]: return self._read(block.deref(), metadata_only)[1] def read_get_completed(self, timeout: int = None) -> Iterator[Union[Tuple[DereferencedBlock, bytes, Dict], BaseException]]: return self._read_executor.get_completed(timeout=timeout) def check_block_metadata(self, *, block: DereferencedBlock, data_length: Optional[int], metadata: Dict) -> None: # Existence of keys has already been checked in _decode_metadata() and _read() if metadata[self._SIZE_KEY] != block.size: raise ValueError( 'Mismatch between recorded block size and data length in object metadata for block {} (UID {}). ' 'Expected: {}, got: {}.'.format(block.id, block.uid, block.size, metadata[self._SIZE_KEY])) if data_length and data_length != block.size: raise ValueError('Mismatch between recorded block size and actual data length for block {} (UID {}). ' 'Expected: {}, got: {}.'.format(block.id, block.uid, block.size, data_length)) if block.checksum != metadata[self._CHECKSUM_KEY]: raise ValueError( 'Mismatch between recorded block checksum and checksum in object metadata for block {} (UID {}). ' 'Expected: {}, got: {}.'.format( block.id, block.uid, cast(str, block.checksum)[:16], # We know that block.checksum is set metadata[self._CHECKSUM_KEY][:16])) def _rm_block(self, uid: BlockUid) -> BlockUid: key = uid.storage_object_to_path() metadata_key = key + self._META_SUFFIX try: self._rm_object(key) except FileNotFoundError as exception: raise BlockNotFoundError('Block UID {} not found on storage.'.format(str(uid)), uid) from exception finally: try: self._rm_object(metadata_key) except FileNotFoundError: pass return uid def rm_block_async(self, uid: BlockUid) -> None: def job(): return self._rm_block(uid) self._remove_executor.submit(job) def rm_block(self, uid: BlockUid) -> None: self._rm_block(uid) def rm_get_completed(self, timeout: int = None) -> Iterator[Union[BlockUid, BaseException]]: return self._remove_executor.get_completed(timeout=timeout) def wait_rms_finished(self): self._remove_executor.wait_for_all() # def rm_many_blocks(self, uids: Union[Sequence[BlockUid], AbstractSet[BlockUid]]) -> List[BlockUid]: # keys = [uid.storage_object_to_path() for uid in uids] # metadata_keys = [key + self._META_SUFFIX for key in keys] # # errors = self._rm_many_objects(keys) # self._rm_many_objects(metadata_keys) # return [cast(BlockUid, BlockUid.storage_path_to_object(error)) for error in errors] def list_blocks(self) -> Iterable[BlockUid]: keys = self._list_objects(BlockUid.storage_prefix()) for key in keys: assert isinstance(key, str) if key.endswith(self._META_SUFFIX): continue try: yield cast(BlockUid, BlockUid.storage_path_to_object(key)) except (RuntimeError, ValueError): # Ignore any keys which don't match our pattern to account for stray objects/files pass def list_versions(self) -> Iterable[VersionUid]: keys = self._list_objects(VersionUid.storage_prefix()) for key in keys: assert isinstance(key, str) if key.endswith(self._META_SUFFIX): continue try: yield cast(VersionUid, VersionUid.storage_path_to_object(key)) except (RuntimeError, ValueError): # Ignore any keys which don't match our pattern to account for stray objects/files pass def read_version(self, version_uid: VersionUid) -> str: key = version_uid.storage_object_to_path() metadata_key = key + self._META_SUFFIX data = self._read_object(key) metadata_json = self._read_object(metadata_key) metadata = self._decode_metadata(metadata_json=metadata_json, key=key, data_length=len(data)) if self._TRANSFORMS_KEY in metadata: data = self._decapsulate(data, metadata[self._TRANSFORMS_KEY]) if len(data) != metadata[self._SIZE_KEY]: raise ValueError('Length mismatch of original data for object {}. Expected: {}, got: {}.'.format( key, metadata[self._SIZE_KEY], len(data))) return data.decode('utf-8') def write_version(self, version_uid: VersionUid, data: str, overwrite: Optional[bool] = False) -> None: key = version_uid.storage_object_to_path() metadata_key = key + self._META_SUFFIX if not overwrite: try: self._read_object(key) except FileNotFoundError: pass else: raise FileExistsError('Version {} already exists in storage.'.format(version_uid.v_string)) data_bytes = data.encode('utf-8') size = len(data_bytes) data_bytes, transforms_metadata = self._encapsulate(data_bytes) metadata, metadata_json = self._build_metadata( size=size, object_size=len(data_bytes), transforms_metadata=transforms_metadata) try: self._write_object(key, data_bytes) self._write_object(metadata_key, metadata_json) except: try: self._rm_object(key) self._rm_object(metadata_key) except FileNotFoundError: pass raise if self._consistency_check_writes: self._check_write(key=key, metadata_key=metadata_key, data_expected=data_bytes) def rm_version(self, version_uid: VersionUid) -> None: key = version_uid.storage_object_to_path() metadata_key = key + self._META_SUFFIX try: self._rm_object(key) finally: try: self._rm_object(metadata_key) except FileNotFoundError: pass def storage_stats(self) -> Tuple[int, int]: objects_count = 0 objects_size = 0 for key, size in cast(Iterable[Tuple[str, int]], self._list_objects(include_size=True)): objects_count += 1 objects_size += size return objects_count, objects_size def _encapsulate(self, data: bytes) -> Tuple[bytes, List]: if self._active_transforms is not None: transforms_metadata = [] for transform in self._active_transforms: data_encapsulated, materials = transform.encapsulate(data=data) if data_encapsulated: transforms_metadata.append({ 'name': transform.name, 'module': transform.module, 'materials': materials, }) data = data_encapsulated return data, transforms_metadata else: return data, [] def _decapsulate(self, data: bytes, transforms_metadata: Sequence[Dict]) -> bytes: for element in reversed(transforms_metadata): name = element['name'] module = element['module'] transform = TransformFactory.get_by_name(name) if transform: if module != transform.module: raise ConfigurationError('Mismatch between object transform module and configured module for ' + '{} ({} != {})'.format(name, module, transform.module)) data = transform.decapsulate(data=data, materials=element['materials']) else: raise IOError('Unknown transform {} in object metadata.'.format(name)) return data def wait_writes_finished(self) -> None: self._write_executor.wait_for_all() def use_read_cache(self, enable: bool) -> bool: return False def close(self) -> None: self._read_executor.shutdown() self._write_executor.shutdown() self._remove_executor.shutdown() @abstractmethod def _write_object(self, key: str, data: bytes): raise NotImplementedError @abstractmethod def _read_object(self, key: str) -> bytes: raise NotImplementedError @abstractmethod def _read_object_length(self, key: str) -> int: raise NotImplementedError @abstractmethod def _rm_object(self, key: str) -> None: raise NotImplementedError @abstractmethod def _list_objects(self, prefix: str = None, include_size: bool = False) -> Union[Iterable[str], Iterable[Tuple[str, int]]]: raise NotImplementedError
class IO(IOBase): _pool_name: Optional[str] _image_name: Optional[str] _snapshot_name: Optional[str] def __init__(self, *, config: Config, name: str, module_configuration: ConfigDict, url: str, block_size: int) -> None: super().__init__(config=config, name=name, module_configuration=module_configuration, url=url, block_size=block_size) if self.parsed_url.username or self.parsed_url.password or self.parsed_url.hostname or self.parsed_url.port \ or self.parsed_url.params or self.parsed_url.fragment or self.parsed_url.query: raise UsageError('The supplied URL {} is invalid.'.format(self.url)) ceph_config_file = config.get_from_dict(module_configuration, 'cephConfigFile', types=str) client_identifier = config.get_from_dict(module_configuration, 'clientIdentifier', types=str) self._cluster = rados.Rados(conffile=ceph_config_file, rados_id=client_identifier) self._cluster.connect() # create a bitwise or'd list of the configured features self._new_image_features = 0 for feature in config.get_from_dict(module_configuration, 'newImageFeatures', types=list): try: self._new_image_features = self._new_image_features | getattr(rbd, feature) except AttributeError: raise ConfigurationError('{}: Unknown image feature {}.'.format(module_configuration.full_name, feature)) self._pool_name = None self._image_name = None self._snapshot_name = None self._simultaneous_reads = config.get_from_dict(module_configuration, 'simultaneousReads', types=int) self._simultaneous_writes = config.get_from_dict(module_configuration, 'simultaneousWrites', types=int) self._read_executor: Optional[JobExecutor] = None self._write_executor: Optional[JobExecutor] = None def open_r(self) -> None: self._read_executor = JobExecutor(name='IO-Read', workers=self._simultaneous_reads, blocking_submit=False) re_match = re.match('^([^/]+)/([^@]+)(?:@(.+))?$', self.parsed_url.path) if not re_match: raise UsageError('URL {} is invalid . Need {}:<pool>/<imagename> or {}:<pool>/<imagename>@<snapshotname>.'.format( self.url, self.name, self.name)) self._pool_name, self._image_name, self._snapshot_name = re_match.groups() # try opening it and quit if that's not possible. try: ioctx = self._cluster.open_ioctx(self._pool_name) except rados.ObjectNotFound: raise FileNotFoundError('Ceph pool {} not found.'.format(self._pool_name)) from None try: rbd.Image(ioctx, self._image_name, self._snapshot_name, read_only=True) except rbd.ImageNotFound: raise FileNotFoundError('RBD image or snapshot {} not found.'.format(self.url)) from None def open_w(self, size: int, force: bool = False, sparse: bool = False) -> None: self._write_executor = JobExecutor(name='IO-Write', workers=self._simultaneous_writes, blocking_submit=True) re_match = re.match('^([^/]+)/([^@]+)$', self.parsed_url.path) if not re_match: raise UsageError('URL {} is invalid . Need {}:<pool>/<imagename>.'.format(self.url, self.name)) self._pool_name, self._image_name = re_match.groups() # try opening it and quit if that's not possible. try: ioctx = self._cluster.open_ioctx(self._pool_name) except rados.ObjectNotFound: raise FileNotFoundError('Ceph pool {} not found.'.format(self._pool_name)) from None try: image = rbd.Image(ioctx, self._image_name) except rbd.ImageNotFound: rbd.RBD().create(ioctx, self._image_name, size, old_format=False, features=self._new_image_features) rbd.Image(ioctx, self._image_name) else: try: if not force: raise FileExistsError( 'RBD image {} already exists. Force the restore if you want to overwrite it.'.format(self.url)) else: image_size = image.size() if size > image_size: raise IOError( 'RBD image {} is too small. Its size is {} bytes, but we need {} bytes for the restore.'.format( self.url, image_size, size)) # If this is an existing image and sparse is true discard all objects from this image # RBD discard only supports a maximum region length of 0x7fffffff. if sparse: logger.debug('Discarding all objects of RBD image {}.'.format(self.url)) region_start = 0 bytes_to_end = image_size while bytes_to_end > 0: region_length = min(0x7fffffff, bytes_to_end) image.discard(region_start, region_length) region_start += region_length bytes_to_end -= region_length finally: image.close() def close(self) -> None: if self._read_executor: self._read_executor.shutdown() if self._write_executor: self._write_executor.shutdown() def size(self) -> int: assert self._pool_name is not None and self._image_name is not None ioctx = self._cluster.open_ioctx(self._pool_name) with rbd.Image(ioctx, self._image_name, self._snapshot_name, read_only=True) as image: size = image.size() return size def _read(self, block: DereferencedBlock) -> Tuple[DereferencedBlock, bytes]: offset = block.id * self.block_size t1 = time.time() ioctx = self._cluster.open_ioctx(self._pool_name) with rbd.Image(ioctx, self._image_name, self._snapshot_name, read_only=True) as image: data = image.read(offset, block.size, rados.LIBRADOS_OP_FLAG_FADVISE_DONTNEED) t2 = time.time() if not data: raise EOFError('End of file reached on {} when there should be data.'.format(self.url)) logger.debug('{} read block {} in {:.3f}s'.format( threading.current_thread().name, block.id, t2 - t1, )) return block, data def read(self, block: Union[DereferencedBlock, Block]) -> None: block_deref = block.deref() if isinstance(block, Block) else block def job(): return self._read(block_deref) assert self._read_executor is not None self._read_executor.submit(job) def read_sync(self, block: Union[DereferencedBlock, Block]) -> bytes: block_deref = block.deref() if isinstance(block, Block) else block return self._read(block_deref)[1] def read_get_completed(self, timeout: Optional[int] = None ) -> Iterator[Union[Tuple[DereferencedBlock, bytes], BaseException]]: assert self._read_executor is not None return self._read_executor.get_completed(timeout=timeout) def _write(self, block: DereferencedBlock, data: bytes) -> DereferencedBlock: offset = block.id * self.block_size t1 = time.time() ioctx = self._cluster.open_ioctx(self._pool_name) with rbd.Image(ioctx, self._image_name, self._snapshot_name) as image: written = image.write(data, offset, rados.LIBRADOS_OP_FLAG_FADVISE_DONTNEED) t2 = time.time() logger.debug('{} wrote block {} in {:.3f}s'.format( threading.current_thread().name, block.id, t2 - t1, )) assert written == len(data) return block def write(self, block: DereferencedBlock, data: bytes) -> None: def job(): return self._write(block, data) assert self._write_executor is not None self._write_executor.submit(job) def write_sync(self, block: DereferencedBlock, data: bytes) -> None: self._write(block, data) def write_get_completed(self, timeout: Optional[int] = None) -> Iterator[Union[DereferencedBlock, BaseException]]: assert self._write_executor is not None return self._write_executor.get_completed(timeout=timeout)
class IO(IOBase): def __init__(self, *, config: Config, name: str, module_configuration: ConfigDict, url: str, block_size: int) -> None: super().__init__(config=config, name=name, module_configuration=module_configuration, url=url, block_size=block_size) if self.parsed_url.username or self.parsed_url.password or self.parsed_url.hostname or self.parsed_url.port \ or self.parsed_url.params or self.parsed_url.fragment or self.parsed_url.query: raise UsageError('The supplied URL {} is invalid.'.format( self.url)) self._simultaneous_reads = config.get_from_dict(module_configuration, 'simultaneousReads', types=int) self._simultaneous_writes = config.get_from_dict(module_configuration, 'simultaneousWrites', types=int) self._read_executor: Optional[JobExecutor] = None self._write_executor: Optional[JobExecutor] = None def open_r(self) -> None: self._read_executor = JobExecutor(name='IO-Read', workers=self._simultaneous_reads, blocking_submit=False) def open_w(self, size: int, force: bool = False, sparse: bool = False) -> None: self._write_executor = JobExecutor(name='IO-Write', workers=self._simultaneous_writes, blocking_submit=True) if os.path.exists(self.parsed_url.path): if not force: raise FileExistsError( '{} already exists. Force the restore if you want to overwrite it.' .format(self.url)) else: if size > self.size(): raise IOError( '{} is too small. Its size is {} bytes, but we need {} bytes for the restore.' .format(self.url, self.size(), size)) else: with open(self.parsed_url.path, 'wb') as f: f.seek(size - 1) f.write(b'\0') def close(self) -> None: if self._read_executor: self._read_executor.shutdown() if self._write_executor: self._write_executor.shutdown() def size(self) -> int: with open(self.parsed_url.path, 'rb') as f: f.seek(0, 2) # to the end size = f.tell() return size def _read(self, block: DereferencedBlock) -> Tuple[DereferencedBlock, bytes]: offset = block.id * self.block_size t1 = time.time() with open(self.parsed_url.path, 'rb') as f: f.seek(offset) data = f.read(block.size) os.posix_fadvise(f.fileno(), offset, block.size, os.POSIX_FADV_DONTNEED) t2 = time.time() if not data: raise EOFError( 'End of file reached on {} when there should be data.'.format( self.url)) logger.debug('{} read block {} in {:.3f}s'.format( threading.current_thread().name, block.id, t2 - t1, )) return block, data def read(self, block: Union[DereferencedBlock, Block]) -> None: block_deref = block.deref() if isinstance(block, Block) else block def job(): return self._read(block_deref) assert self._read_executor is not None self._read_executor.submit(job) def read_sync(self, block: Union[DereferencedBlock, Block]) -> bytes: block_deref = block.deref() if isinstance(block, Block) else block return self._read(block_deref)[1] def read_get_completed( self, timeout: Optional[int] = None ) -> Iterator[Union[Tuple[DereferencedBlock, bytes], BaseException]]: assert self._read_executor is not None return self._read_executor.get_completed(timeout=timeout) def _write(self, block: DereferencedBlock, data: bytes) -> DereferencedBlock: offset = block.id * self.block_size t1 = time.time() with open(self.parsed_url.path, 'rb+') as f: f.seek(offset) written = f.write(data) os.posix_fadvise(f.fileno(), offset, len(data), os.POSIX_FADV_DONTNEED) t2 = time.time() logger.debug('{} wrote block {} in {:.3f}s'.format( threading.current_thread().name, block.id, t2 - t1, )) assert written == len(data) return block def write(self, block: DereferencedBlock, data: bytes) -> None: def job(): return self._write(block, data) assert self._write_executor is not None self._write_executor.submit(job) def write_sync(self, block: DereferencedBlock, data: bytes) -> None: self._write(block, data) def write_get_completed( self, timeout: Optional[int] = None ) -> Iterator[Union[DereferencedBlock, BaseException]]: assert self._write_executor is not None return self._write_executor.get_completed(timeout=timeout)
class IO(IOBase): _pool_name: Optional[str] _namespace_name: Optional[str] _image_name: Optional[str] _snapshot_name: Optional[str] def __init__(self, *, config: Config, name: str, module_configuration: ConfigDict, url: str, block_size: int) -> None: super().__init__(config=config, name=name, module_configuration=module_configuration, url=url, block_size=block_size) if self.parsed_url.username or self.parsed_url.password or self.parsed_url.hostname or self.parsed_url.port \ or self.parsed_url.params or self.parsed_url.fragment: raise UsageError('The supplied URL {} is invalid.'.format( self.url)) if self.parsed_url.query: try: extra_ceph_conf = parse_qs(self.parsed_url.query, keep_blank_values=True, strict_parsing=True, errors='strict') except (ValueError, UnicodeError) as exception: raise UsageError('The supplied URL {} is invalid.'.format( self.url)) from exception # parse_qs returns the values as lists, only consider the first appearance of each key in the query string. extra_ceph_conf = { key: value[0] for key, value in extra_ceph_conf.items() } else: extra_ceph_conf = {} ceph_config_file = config.get_from_dict(module_configuration, 'cephConfigFile', types=str) if 'client_identifier' in extra_ceph_conf: client_identifier = extra_ceph_conf['client_identifier'] del extra_ceph_conf['client_identifier'] else: client_identifier = config.get_from_dict(module_configuration, 'clientIdentifier', types=str) self._cluster = rados.Rados(conffile=ceph_config_file, rados_id=client_identifier, conf=extra_ceph_conf) self._cluster.connect() # create a bitwise or'd list of the configured features self._new_image_features = 0 for feature in config.get_from_dict(module_configuration, 'newImageFeatures', types=list): try: self._new_image_features = self._new_image_features | getattr( rbd, feature) except AttributeError: raise ConfigurationError( '{}: Unknown image feature {}.'.format( module_configuration.full_name, feature)) self._pool_name = None self._image_name = None self._snapshot_name = None self._simultaneous_reads = config.get_from_dict(module_configuration, 'simultaneousReads', types=int) self._simultaneous_writes = config.get_from_dict(module_configuration, 'simultaneousWrites', types=int) self._read_executor: Optional[JobExecutor] = None self._write_executor: Optional[JobExecutor] = None def open_r(self) -> None: self._read_executor = JobExecutor(name='IO-Read', workers=self._simultaneous_reads, blocking_submit=False) re_match = re.match('^([^/]+)/(?:([^/]*)/)?([^@]+)(?:@(.+))?$', self.parsed_url.path) if not re_match: raise UsageError( 'URL {} is invalid . Need {}:<pool>[/<namespace>]/<imagename>[@<snapshotname>].' .format(self.url, self.name)) self._pool_name, self._namespace_name, self._image_name, self._snapshot_name = re_match.groups( ) # try opening it and quit if that's not possible. try: ioctx = self._cluster.open_ioctx(self._pool_name) if self._namespace_name is not None and len( self._namespace_name) > 0: logger.debug( f'Configuring io context to use namespace {self._namespace_name}.' ) ioctx.set_namespace(self._namespace_name) except rados.ObjectNotFound: raise FileNotFoundError('Ceph pool {} not found.'.format( self._pool_name)) from None try: rbd.Image(ioctx, self._image_name, self._snapshot_name, read_only=True) except rbd.ImageNotFound: raise FileNotFoundError( 'RBD image or snapshot {} not found.'.format( self.url)) from None def open_w(self, size: int, force: bool = False, sparse: bool = False) -> None: self._write_executor = JobExecutor(name='IO-Write', workers=self._simultaneous_writes, blocking_submit=True) re_match = re.match('^([^/]+)/(?:([^/]*)/)?([^@]+)$', self.parsed_url.path) if not re_match: raise UsageError( 'URL {} is invalid . Need {}:<pool>[/<namespace>]/<imagename>[@<snapshotname>].' .format(self.url, self.name)) self._pool_name, self._namespace_name, self._image_name = re_match.groups( ) # try opening it and quit if that's not possible. try: ioctx = self._cluster.open_ioctx(self._pool_name) if self._namespace_name is not None and len( self._namespace_name) > 0: logger.debug( f'Configuring io context to use namespace {self._namespace_name}.' ) ioctx.set_namespace(self._namespace_name) except rados.ObjectNotFound: raise FileNotFoundError('Ceph pool {} not found.'.format( self._pool_name)) from None try: image = rbd.Image(ioctx, self._image_name) except rbd.ImageNotFound: rbd.RBD().create(ioctx, self._image_name, size, old_format=False, features=self._new_image_features) rbd.Image(ioctx, self._image_name) else: try: if not force: raise FileExistsError( 'RBD image {} already exists. Force the restore if you want to overwrite it.' .format(self.url)) else: image_size = image.size() if size > image_size: raise IOError( 'RBD image {} is too small. Its size is {} bytes, but we need {} bytes for the restore.' .format(self.url, image_size, size)) # If this is an existing image and sparse is true discard all objects from this image # RBD discard only supports a maximum region length of 0x7fffffff. if sparse: logger.debug( 'Discarding all objects of RBD image {}.'.format( self.url)) region_start = 0 bytes_to_end = image_size while bytes_to_end > 0: region_length = min(0x7fffffff, bytes_to_end) image.discard(region_start, region_length) region_start += region_length bytes_to_end -= region_length finally: image.close() def close(self) -> None: if self._read_executor: self._read_executor.shutdown() if self._write_executor: self._write_executor.shutdown() def size(self) -> int: assert self._pool_name is not None and self._image_name is not None ioctx = self._cluster.open_ioctx(self._pool_name) with rbd.Image(ioctx, self._image_name, self._snapshot_name, read_only=True) as image: size = image.size() return size def _read(self, block: DereferencedBlock) -> Tuple[DereferencedBlock, bytes]: offset = block.idx * self.block_size t1 = time.time() ioctx = self._cluster.open_ioctx(self._pool_name) with rbd.Image(ioctx, self._image_name, self._snapshot_name, read_only=True) as image: # LIBRADOS_OP_FLAG_FADVISE_DONTNEED: Indicates read data will not be accessed in the near future (by anyone) # LIBRADOS_OP_FLAG_FADVISE_NOCACHE: Indicates read data will not be accessed again (by *this* client) data = image.read(offset, block.size, rados.LIBRADOS_OP_FLAG_FADVISE_NOCACHE) t2 = time.time() if not data: raise EOFError( 'End of file reached on {} when there should be data.'.format( self.url)) logger.debug('{} read block {} in {:.3f}s'.format( threading.current_thread().name, block.idx, t2 - t1, )) return block, data def read(self, block: Union[DereferencedBlock, Block]) -> None: # We do need to dereference the block outside of the closure otherwise a reference to the block will be held # inside of the closure leading to database troubles. # See https://github.com/elemental-lf/benji/issues/61. block_deref = block.deref() def job(): return self._read(block_deref) assert self._read_executor is not None self._read_executor.submit(job) def read_sync(self, block: Union[DereferencedBlock, Block]) -> bytes: return self._read(block.deref())[1] def read_get_completed( self, timeout: Optional[int] = None ) -> Iterator[Union[Tuple[DereferencedBlock, bytes], BaseException]]: assert self._read_executor is not None return self._read_executor.get_completed(timeout=timeout) def _write(self, block: DereferencedBlock, data: bytes) -> DereferencedBlock: offset = block.idx * self.block_size t1 = time.time() ioctx = self._cluster.open_ioctx(self._pool_name) with rbd.Image(ioctx, self._image_name, self._snapshot_name) as image: written = image.write(data, offset, rados.LIBRADOS_OP_FLAG_FADVISE_DONTNEED) t2 = time.time() logger.debug('{} wrote block {} in {:.3f}s'.format( threading.current_thread().name, block.idx, t2 - t1, )) assert written == len(data) return block def write(self, block: Union[DereferencedBlock, Block], data: bytes) -> None: # We do need to dereference the block outside of the closure otherwise a reference to the block will be held # inside of the closure leading to database troubles. # See https://github.com/elemental-lf/benji/issues/61. block_deref = block.deref() def job(): return self._write(block_deref, data) assert self._write_executor is not None self._write_executor.submit(job) def write_sync(self, block: Union[DereferencedBlock, Block], data: bytes) -> None: self._write(block.deref(), data) def write_get_completed( self, timeout: Optional[int] = None ) -> Iterator[Union[DereferencedBlock, BaseException]]: assert self._write_executor is not None return self._write_executor.get_completed(timeout=timeout)