Beispiel #1
0
    def _read(self,
              block: DereferencedBlock) -> Tuple[DereferencedBlock, bytes]:
        offset = block.idx * self.block_size
        t1 = time.time()
        ioctx = self._cluster.open_ioctx(self._pool_name)
        with rbd.Image(ioctx,
                       self._image_name,
                       self._snapshot_name,
                       read_only=True) as image:
            # LIBRADOS_OP_FLAG_FADVISE_DONTNEED: Indicates read data will not be accessed in the near future (by anyone)
            # LIBRADOS_OP_FLAG_FADVISE_NOCACHE: Indicates read data will not be accessed again (by *this* client)
            data = image.read(offset, block.size,
                              rados.LIBRADOS_OP_FLAG_FADVISE_NOCACHE)
        t2 = time.time()

        if not data:
            raise EOFError(
                'End of file reached on {} when there should be data.'.format(
                    self.url))

        logger.debug('{} read block {} in {:.3f}s'.format(
            threading.current_thread().name,
            block.idx,
            t2 - t1,
        ))

        return block, data
Beispiel #2
0
    def _read(self,
              block: DereferencedBlock) -> Tuple[DereferencedBlock, bytes]:
        assert block.size == self.block_size
        lba = (block.id * self.block_size) // self._iscsi_block_size
        num_blocks = self.block_size // self._iscsi_block_size

        if lba >= self._iscsi_num_blocks:
            raise RuntimeError(
                'Attempt to read outside of the device. Requested LBA is {}, but device has only {} blocks. (1)'
                .format(lba, self._iscsi_num_blocks))

        if lba + num_blocks > self._iscsi_num_blocks:
            raise RuntimeError(
                'Attempt to read outside of the device. Requested LBA is {}, but device has only {} blocks. (2)'
                .format(lba + num_blocks, self._iscsi_num_blocks))

        t1 = time.time()
        task = self._iscsi_execute_sync('READ(16)', libiscsi.iscsi_read16_sync,
                                        self._iscsi_context, self._iscsi_lun,
                                        lba, self.block_size,
                                        self._iscsi_block_size, 0, 0, 0, 0, 0)

        data = task.datain
        assert len(data) == self.block_size
        t2 = time.time()

        logger.debug('{} read block {} in {:.3f}s'.format(
            threading.current_thread().name,
            block.id,
            t2 - t1,
        ))

        return block, data
Beispiel #3
0
    def _write(self, block: DereferencedBlock,
               data: bytes) -> DereferencedBlock:
        assert block.size == self.block_size
        lba = (block.id * self.block_size) // self._iscsi_block_size
        num_blocks = self.block_size // self._iscsi_block_size

        if lba >= self._iscsi_num_blocks:
            raise RuntimeError(
                'Attempt to write outside of the device. Requested LBA is {}, but device has only {} blocks. (1)'
                .format(lba, self._iscsi_num_blocks))

        if lba + num_blocks > self._iscsi_num_blocks:
            raise RuntimeError(
                'Attempt to write outside of the device. Requested LBA is {}, but device has only {} blocks. (2)'
                .format(lba + num_blocks, self._iscsi_num_blocks))

        t1 = time.time()
        self._iscsi_execute_sync('WRITE(16)', libiscsi.iscsi_write16_sync,
                                 self._iscsi_context, self._iscsi_lun, lba,
                                 data, self._iscsi_block_size, 0, 0, 0, 0, 0)
        t2 = time.time()

        logger.debug('{} wrote block {} in {:.3f}s'.format(
            threading.current_thread().name,
            block.id,
            t2 - t1,
        ))

        return block
Beispiel #4
0
    def open_r(self) -> None:
        re_match = re.match('^([^/]+)/(?:([^/]*)/)?([^@]+)(?:@(.+))?$',
                            self.parsed_url.path)
        if not re_match:
            raise UsageError(
                'URL {} is invalid . Need {}:<pool>[/<namespace>]/<imagename>[@<snapshotname>].'
                .format(self.url, self.name))
        self._pool_name, self._namespace_name, self._image_name, self._snapshot_name = re_match.groups(
        )

        # try opening it and quit if that's not possible.
        try:
            ioctx = self._cluster.open_ioctx(self._pool_name)
            if self._namespace_name is not None and len(
                    self._namespace_name) > 0:
                logger.debug(
                    f'Configuring io context to use namespace {self._namespace_name}.'
                )
                ioctx.set_namespace(self._namespace_name)
        except rados.ObjectNotFound:
            raise FileNotFoundError('Ceph pool {} not found.'.format(
                self._pool_name)) from None

        try:
            self._rbd_image = rbd.Image(ioctx,
                                        self._image_name,
                                        self._snapshot_name,
                                        read_only=True)
        except rbd.ImageNotFound:
            raise FileNotFoundError(
                'RBD image or snapshot {} not found.'.format(
                    self.url)) from None
Beispiel #5
0
    def _read(self,
              block: DereferencedBlock) -> Tuple[DereferencedBlock, bytes]:
        offset = block.id * self._block_size
        t1 = time.time()
        ioctx = self._cluster.open_ioctx(self._pool_name)
        with rbd.Image(ioctx,
                       self._image_name,
                       self._snapshot_name,
                       read_only=True) as image:
            data = image.read(offset, block.size,
                              rados.LIBRADOS_OP_FLAG_FADVISE_DONTNEED)
        t2 = time.time()

        if not data:
            raise EOFError(
                'End of file reached on {} when there should be data.'.format(
                    self.url))

        logger.debug('{} read block {} in {:.2f}s'.format(
            threading.current_thread().name,
            block.id,
            t2 - t1,
        ))

        return block, data
Beispiel #6
0
    def _write(self, block: DereferencedBlock, data: bytes) -> DereferencedBlock:
        data, transforms_metadata = self._encapsulate(data)

        metadata, metadata_json = self._build_metadata(
            size=block.size, object_size=len(data), checksum=block.checksum, transforms_metadata=transforms_metadata)

        key = block.uid.storage_object_to_path()
        metadata_key = key + self._META_SUFFIX

        time.sleep(self.write_throttling.consume(len(data) + len(metadata_json)))
        t1 = time.time()
        try:
            self._write_object(key, data)
            self._write_object(metadata_key, metadata_json)
        except:
            try:
                self._rm_object(key)
                self._rm_object(metadata_key)
            except FileNotFoundError:
                pass
            raise
        t2 = time.time()

        logger.debug('{} wrote data of uid {} in {:.2f}s'.format(threading.current_thread().name, block.uid, t2 - t1))

        if self._consistency_check_writes:
            try:
                self._check_write(key=key, metadata_key=metadata_key, data_expected=data)
            except (KeyError, ValueError) as exception:
                raise InvalidBlockException('Check write of block {} (UID {}) failed.'.format(block.id, block.uid),
                                            block) from exception

        return block
Beispiel #7
0
 def __init__(self, rules_spec, reference_time=None):
     self.reference_time = time.time(
     ) if reference_time is None else reference_time
     self.rules = self._parse_rules(rules_spec)
     logger.debug(
         'Retention filter set up with reference time {} and rules {}'.
         format(self.reference_time, self.rules))
Beispiel #8
0
    def __init__(self, *, config: Config, name: str, storage_id: int, module_configuration: ConfigDict) -> None:
        read_cache_directory = Config.get_from_dict(module_configuration, 'readCache.directory', None, types=str)
        read_cache_maximum_size = Config.get_from_dict(module_configuration, 'readCache.maximumSize', None, types=int)
        read_cache_shards = Config.get_from_dict(module_configuration, 'readCache.shards', None, types=int)

        if read_cache_directory and read_cache_maximum_size:
            os.makedirs(read_cache_directory, exist_ok=True)
            try:
                self._read_cache = FanoutCache(
                    read_cache_directory,
                    size_limit=read_cache_maximum_size,
                    shards=read_cache_shards,
                    eviction_policy='least-frequently-used',
                    statistics=1,
                )
            except Exception:
                logger.warning('Unable to enable disk based read caching. Continuing without it.')
                self._read_cache = None
            else:
                logger.debug('Disk based read caching instantiated (cache size {}, shards {}).'.format(
                    read_cache_maximum_size, read_cache_shards))
        else:
            self._read_cache = None
        self._use_read_cache = True

        # Start reader and write threads after the disk cached is created, so that they see it.
        super().__init__(config=config, name=name, storage_id=storage_id, module_configuration=module_configuration)
Beispiel #9
0
    def test_storage_stats(self):
        NUM_BLOBS = 15
        BLOB_SIZE = 4096

        saved_uids = list(self.storage.list_blocks())
        self.assertEqual(0, len(saved_uids))

        blocks = [
            Block(uid=BlockUid(i + 1, i + 100),
                  size=BLOB_SIZE,
                  checksum='0000000000000000') for i in range(NUM_BLOBS)
        ]
        for block in blocks:
            data = self.random_bytes(BLOB_SIZE)
            self.assertEqual(BLOB_SIZE, len(data))
            self.storage.write_block(block, data)

        objects_count, objects_size = self.storage.storage_stats()

        logger.debug(
            f'Storage stats: {objects_count} objects using {objects_size} bytes.'
        )

        self.assertEqual(NUM_BLOBS * 2,
                         objects_count)  # Also counts the metadata objects
        self.assertGreater(objects_size, 0)

        for block in blocks:
            self.storage.rm_block(block.uid)
Beispiel #10
0
    def __init__(self, hash_function_config: str) -> None:
        hash_args: Optional[str] = None
        try:
            hash_name, hash_args = hash_function_config.split(',', 1)
        except ValueError:
            hash_name = hash_function_config

        try:
            hash_module: Any = import_module('{}.{}'.format(self._CRYPTO_PACKAGE, hash_name))
        except ImportError as exception:
            raise ConfigurationError('Unsupported block hash {}.'.format(hash_name)) from exception

        hash_kwargs: Dict[str, Any] = {}
        if hash_args is not None:
            hash_kwargs = {k: literal_eval(v) for k, v in (pair.split('=') for pair in hash_args.split(','))}

        try:
            hash = hash_module.new(**hash_kwargs)
        except (TypeError, ValueError) as exception:
            raise ConfigurationError('Unsupported or invalid block hash arguments: {}.'.format(hash_kwargs)) from exception

        from benji.database import Block
        if len(hash.digest()) > Block.MAXIMUM_CHECKSUM_LENGTH:
            raise ConfigurationError('Specified block hash {} exceeds maximum digest length of {} bytes.'.format(
                hash_name, Block.MAXIMUM_CHECKSUM_LENGTH))

        logger.debug('Using block hash {} with kwargs {}.'.format(hash_name, hash_kwargs))

        self._hash_module = hash_module
        self._hash_kwargs = hash_kwargs
Beispiel #11
0
 def close(self) -> None:
     super().close()
     if self._read_cache is not None:
         (cache_hits, cache_misses) = self._read_cache.stats()
         logger.debug('Disk based cache statistics (since cache creation): {} hits, {} misses.'.format(
             cache_hits, cache_misses))
         self._read_cache.close()
Beispiel #12
0
    def test_version_filter_issue_9_slowness(self):
        version_uids = set()
        for i in range(3):
            version = self.database_backend.create_version(
                version_name='backup-name',
                snapshot_name='snapshot-name.{}'.format(i),
                size=16 * 1024 * 4096,
                storage_id=1,
                block_size=4 * 1024 * 4096)
            self.assertNotIn(version.uid, version_uids)
            version_uids.add(version.uid)

        t1 = timeit.timeit(
            lambda: self.database_backend.get_versions_with_filter(
                'snapshot_name == "snapshot-name.2" and name == "backup-name"'
            ),
            number=1)
        t2 = timeit.timeit(
            lambda: self.database_backend.get_versions_with_filter(
                '(snapshot_name == "snapshot-name.2" and name == "backup-name")'
            ),
            number=1)
        logger.debug(
            'test_version_filter_issue_9_slowness: t1 {}, t2 {}'.format(
                t1, t2))
        self.assertLess(t1 - t2, 5)
Beispiel #13
0
def parametrized_hash_function(config_hash_function):
    hash_name = None
    hash_args = None
    try:
        hash_name, hash_args = config_hash_function.split(',', 1)
    except ValueError:
        hash_name = config_hash_function
    hash_function = getattr(hashlib, hash_name)
    if hash_function is None:
        raise ConfigurationError(
            'Unsupported hash function {}.'.format(hash_name))
    kwargs = {}
    if hash_args is not None:
        kwargs = dict(
            (k, literal_eval(v))
            for k, v in (pair.split('=') for pair in hash_args.split(',')))
    logger.debug('Using hash function {} with kwargs {}'.format(
        hash_name, kwargs))
    hash_function_w_kwargs = hash_function(**kwargs)

    from benji.metadata import Block
    if len(hash_function_w_kwargs.digest()) > Block.MAXIMUM_CHECKSUM_LENGTH:
        raise ConfigurationError(
            'Specified hash function exceeds maximum digest length of {}.'.
            format(Block.MAXIMUM_CHECKSUM_LENGTH))

    return hash_function_w_kwargs
Beispiel #14
0
    def write_get_completed(self, timeout: Optional[int] = None) -> Iterator[Union[DereferencedBlock, BaseException]]:
        try:
            while not self._writes_finished():
                logger.debug('Write queue length, outstanding writes, completion queue length: {}, {}, {}.'.format(
                    len(self._write_queue), self._outstanding_aio_writes, self._write_completion_queue.qsize()))
                self._submit_aio_writes()

                completion, t1, t2, block = self._write_completion_queue.get(block=(timeout is None or timeout != 0),
                                                                             timeout=timeout)
                assert self._outstanding_aio_writes > 0
                self._outstanding_aio_writes -= 1

                try:
                    completion.wait_for_complete_and_cb()
                except Exception as exception:
                    yield exception
                else:
                    write_return_value = completion.get_return_value()
                    if write_return_value != 0:
                        raise IOError('Write of block {} failed.'.format(block.idx))

                    logger.debug('Wrote block {} in {:.3f}s'.format(block.idx, t2 - t1))

                    yield block

                self._write_completion_queue.task_done()
        except queue.Empty:
            return
        else:
            return
Beispiel #15
0
    def open_w(self,
               size: int,
               force: bool = False,
               sparse: bool = False) -> None:
        self._write_executor = JobExecutor(name='IO-Write',
                                           workers=self._simultaneous_writes,
                                           blocking_submit=True)

        re_match = re.match('^([^/]+)/([^@]+)$', self.parsed_url.path)
        if not re_match:
            raise UsageError(
                'URL {} is invalid . Need {}:<pool>/<imagename>.'.format(
                    self.url, self.name))
        self._pool_name, self._image_name = re_match.groups()

        # try opening it and quit if that's not possible.
        try:
            ioctx = self._cluster.open_ioctx(self._pool_name)
        except rados.ObjectNotFound:
            raise FileNotFoundError('Ceph pool {} not found.'.format(
                self._pool_name)) from None

        try:
            image = rbd.Image(ioctx, self._image_name)
        except rbd.ImageNotFound:
            rbd.RBD().create(ioctx,
                             self._image_name,
                             size,
                             old_format=False,
                             features=self._new_image_features)
            rbd.Image(ioctx, self._image_name)
        else:
            try:
                if not force:
                    raise FileExistsError(
                        'RBD image {} already exists. Force the restore if you want to overwrite it.'
                        .format(self.url))
                else:
                    image_size = image.size()
                    if size > image_size:
                        raise IOError(
                            'RBD image {} is too small. Its size is {} bytes, but we need {} bytes for the restore.'
                            .format(self.url, image_size, size))

                    # If this is an existing image and sparse is true discard all objects from this image
                    # RBD discard only supports a maximum region length of 0x7fffffff.
                    if sparse:
                        logger.debug(
                            'Discarding all objects of RBD image {}.'.format(
                                self.url))
                        region_start = 0
                        bytes_to_end = image_size
                        while bytes_to_end > 0:
                            region_length = min(0x7fffffff, bytes_to_end)
                            image.discard(region_start, region_length)
                            region_start += region_length
                            bytes_to_end -= region_length
            finally:
                image.close()
Beispiel #16
0
 def _init_connection(self) -> None:
     if not hasattr(self._local, 'session'):
         logger.debug('Initializing S3 session and resource for {}'.format(threading.current_thread().name))
         self._local.session = boto3.session.Session()
         if self._disable_encoding_type:
             self._local.session.events.unregister('before-parameter-build.s3.ListObjects',
                                                   set_list_objects_encoding_type_url)
         self._local.resource = self._local.session.resource('s3', **self._resource_config)
         self._local.bucket = self._local.resource.Bucket(self._bucket_name)
Beispiel #17
0
    def write_sync(self, block: Union[DereferencedBlock, Block], data: bytes) -> None:
        assert self._rbd_image is not None
        offset = block.idx * self.block_size
        t1 = time.time()
        written = self._rbd_image.write(data, offset, rados.LIBRADOS_OP_FLAG_FADVISE_DONTNEED)
        t2 = time.time()

        logger.debug('Wrote block {} in {:.3f}s'.format(block.idx, t2 - t1))

        assert written == block.size
Beispiel #18
0
    def __init__(self,
                 ad_hoc_config: str = None,
                 sources: Sequence[str] = None) -> None:
        if ad_hoc_config is None:
            if not sources:
                sources = self._get_sources()

            config = None
            for source in sources:
                if os.path.isfile(source):
                    try:
                        with open(source, 'r') as f:
                            config = ruamel.yaml.load(
                                f, Loader=ruamel.yaml.SafeLoader)
                    except Exception as exception:
                        raise ConfigurationError(
                            'Configuration file {} is invalid.'.format(
                                source)) from exception
                    if config is None:
                        raise ConfigurationError(
                            'Configuration file {} is empty.'.format(source))
                    break

            if not config:
                raise ConfigurationError(
                    'No configuration file found in the default places ({}).'.
                    format(', '.join(sources)))
        else:
            config = ruamel.yaml.load(ad_hoc_config,
                                      Loader=ruamel.yaml.SafeLoader)
            if config is None:
                raise ConfigurationError('Configuration string is empty.')

        if self._CONFIGURATION_VERSION_KEY not in config:
            raise ConfigurationError(
                'Configuration is missing required key "{}".'.format(
                    self._CONFIGURATION_VERSION_KEY))

        version = str(config[self._CONFIGURATION_VERSION_KEY])
        if not re.fullmatch(self._CONFIGURATION_VERSION_REGEX, version):
            raise ConfigurationError(
                'Configuration has invalid version of "{}".'.format(version))

        version_obj = semantic_version.Version.coerce(version)
        if version_obj not in VERSIONS.configuration.supported:
            raise ConfigurationError(
                'Configuration has unsupported version of "{}".'.format(
                    version))

        self._config_version = version_obj
        self._config = ConfigDict(self.validate(module=__name__,
                                                config=config))
        logger.debug('Loaded configuration.')
Beispiel #19
0
 def shutdown(self) -> None:
     if len(self._futures) > 0:
         logger.warning('Job executor "{}" is being shutdown with {} outstanding jobs, cancelling them.'.format(
             self._name, len(self._futures)))
         for future in self._futures:
             future.cancel()
         logger.debug('Job executor "{}" cancelled all outstanding jobs.'.format(self._name))
         if not self._blocking_submit:
             # Get all jobs so that the semaphore gets released and still waiting jobs can complete
             for _ in self.get_completed():
                 pass
             logger.debug('Job executor "{}" read results for all outstanding jobs.'.format(self._name))
     self._executor.shutdown()
Beispiel #20
0
    def read_sync(self, block: Union[DereferencedBlock, Block]) -> bytes:
        assert self._rbd_image is not None
        offset = block.idx * self.block_size
        t1 = time.time()
        data = self._rbd_image.read(offset, block.size, rados.LIBRADOS_OP_FLAG_FADVISE_DONTNEED)
        t2 = time.time()

        if not data:
            raise EOFError('End of file reached on {} when there should be data.'.format(self.url))

        logger.debug('Read block {} in {:.3f}s'.format(block.idx, t2 - t1))

        return data
Beispiel #21
0
    def read_get_completed(
        self,
        timeout: Optional[int] = None
    ) -> Iterator[Union[Tuple[DereferencedBlock, bytes], BaseException]]:
        try:
            while not self._reads_finished():
                logger.debug(
                    'Read queue length, outstanding reads, completion queue length: {}, {}, {}.'
                    .format(len(self._read_queue), self._outstanding_aio_reads,
                            self._read_completion_queue.qsize()))
                self._submit_aio_reads()

                completion, t1, t2, block, data = self._read_completion_queue.get(
                    block=True if timeout is None or timeout != 0 else False,
                    timeout=timeout)
                assert self._outstanding_aio_reads > 0
                self._outstanding_aio_reads -= 1

                try:
                    completion.wait_for_complete_and_cb()
                except Exception as exception:
                    yield exception
                else:
                    read_return_value = completion.get_return_value()

                    if read_return_value < 0:
                        raise IOError('Read of block {} failed.'.format(
                            block.id))

                    if read_return_value != block.size:
                        raise IOError(
                            'Short read of block {}. Wanted {} bytes but got {}.'
                            .format(block.id, block.size, read_return_value))

                    if not data:
                        # We shouldn't get here because a failed read should be caught by the "read_return_value < 0"
                        # check above. See: https://github.com/ceph/ceph/blob/880468b4bf6f0a1995de5bd98c09007a00222cbf/src/pybind/rbd/rbd.pyx#L4145.
                        raise IOError('Read of block {} failed.'.format(
                            block.id))

                    logger.debug('Read block {} in {:.3f}s'.format(
                        block.id, t2 - t1))

                    yield block, data

                self._read_completion_queue.task_done()
        except queue.Empty:
            return
        else:
            return
Beispiel #22
0
    def filter(self, versions: Sequence[Version]) -> List[Version]:
        # Category labels without latest
        categories = [
            category for category in self.rules.keys() if category != 'latest'
        ]

        for category in categories:
            setattr(self, '_{}_dict'.format(category), defaultdict(list))

        # Make our own copy
        versions = list(versions)
        # Sort from youngest to oldest
        versions.sort(key=lambda version: version.date.timestamp(),
                      reverse=True)

        # Remove latest versions from consideration if configured
        if 'latest' in self.rules:
            logger.debug('Keeping {} latest versions.'.format(
                self.rules['latest']))
            del versions[:self.rules['latest']]

        dismissed_versions = []
        for version in versions:
            try:
                td = _Timedelta(version.date.timestamp(), self.reference_time)
            except _TimedeltaError as exception:
                # Err on the safe side, ignore this versions (i.e. it won't be dismissed)
                logger.warning('Version {}: {}'.format(version.uid.v_string,
                                                       exception))
                continue

            logger.debug(
                'Time and time delta for version {} are {} and {}.'.format(
                    version.uid.v_string, version.date, td))

            for category in categories:
                timecount = getattr(td, category)
                if timecount <= self.rules[category]:
                    logger.debug(
                        'Found matching category {}, timecount {}.'.format(
                            category, timecount))
                    getattr(
                        self,
                        '_{}_dict'.format(category))[timecount].append(version)
                    break
            else:
                # For loop did not break: The item doesn't fit into any category,
                # it's too old
                dismissed_versions.append(version)
                logger.debug(
                    'Dismissing version, it doesn\'t fit into any category.')

        for category in categories:
            category_dict = getattr(self, '_{}_dict'.format(category))
            for timecount in category_dict:
                # Keep the oldest of each category, reject the rest
                dismissed_versions.extend(category_dict[timecount][:-1])

        return dismissed_versions
Beispiel #23
0
 def _resolve_schema(self, *, name: str) -> Dict:
     try:
         child = self._schema_registry[name]
     except KeyError:
         raise InternalError('Schema for module {} is missing.'.format(name))
     result: Dict = {}
     if self._PARENTS_KEY in child:
         parent_names = child[self._PARENTS_KEY]
         for parent_name in parent_names:
             parent = self._resolve_schema(name=parent_name)
             self._merge_dicts(result, parent)
     result = self._merge_dicts(result, child)
     if self._PARENTS_KEY in result:
         del result[self._PARENTS_KEY]
     logger.debug('Resolved schema for {}: {}.'.format(name, result))
     return result
Beispiel #24
0
    def _write(self, block: DereferencedBlock, data: bytes) -> DereferencedBlock:
        offset = block.id * self.block_size
        t1 = time.time()
        ioctx = self._cluster.open_ioctx(self._pool_name)
        with rbd.Image(ioctx, self._image_name, self._snapshot_name) as image:
            written = image.write(data, offset, rados.LIBRADOS_OP_FLAG_FADVISE_DONTNEED)
        t2 = time.time()

        logger.debug('{} wrote block {} in {:.3f}s'.format(
            threading.current_thread().name,
            block.id,
            t2 - t1,
        ))

        assert written == len(data)
        return block
Beispiel #25
0
    def backup(self, version_uid: str, volume: str, snapshot: str, source: str,
               rbd_hints: str, base_version_uid: str, block_size: int,
               labels: List[str], storage: str) -> None:
        if version_uid is None:
            version_uid = '{}-{}'.format(volume[:248], random_string(6))
        version_uid_obj = VersionUid(version_uid)
        base_version_uid_obj = VersionUid(
            base_version_uid) if base_version_uid else None

        if labels:
            label_add, label_remove = InputValidation.parse_and_validate_labels(
                labels)
        with Benji(self.config) as benji_obj:
            hints = None
            if rbd_hints:
                logger.debug(f'Loading RBD hints from file {rbd_hints}.')
                with open(rbd_hints, 'r') as f:
                    hints = hints_from_rbd_diff(f.read())
            backup_version = benji_obj.backup(
                version_uid=version_uid_obj,
                volume=volume,
                snapshot=snapshot,
                source=source,
                hints=hints,
                base_version_uid=base_version_uid_obj,
                storage_name=storage,
                block_size=block_size)

            if labels:
                for key, value in label_add:
                    benji_obj.add_label(backup_version.uid, key, value)
                for key in label_remove:
                    benji_obj.rm_label(backup_version.uid, key)
                if label_add:
                    logger.info('Added label(s) to version {}: {}.'.format(
                        backup_version.uid,
                        ', '.join('{}={}'.format(name, value)
                                  for name, value in label_add)))
                if label_remove:
                    logger.info('Removed label(s) from version {}: {}.'.format(
                        backup_version.uid, ', '.join(label_remove)))

            if self.machine_output:
                benji_obj.export_any({'versions': [backup_version]},
                                     sys.stdout,
                                     ignore_relationships=(((Version, ),
                                                            ('blocks', )), ))
Beispiel #26
0
 def close(self):
     """ Close the io
     """
     if self._read_executor:
         if len(self._read_futures) > 0:
             logger.warning(
                 'IO backend closed with {} outstanding read jobs, cancelling them.'
                 .format(len(self._read_futures)))
             for future in self._read_futures:
                 future.cancel()
             logger.debug('IO backend cancelled all outstanding read jobs.')
             # Get all jobs so that the semaphore gets released and still waiting jobs can complete
             for future in self.read_get_completed():
                 pass
             logger.debug(
                 'IO backend read results from all outstanding read jobs.')
         self._read_executor.shutdown()
Beispiel #27
0
    def _write(self, block: DereferencedBlock, data: bytes) -> DereferencedBlock:
        offset = block.id * self._block_size
        t1 = time.time()
        with open(self._path, 'rb+') as f:
            f.seek(offset)
            written = f.write(data)
            os.posix_fadvise(f.fileno(), offset, len(data), os.POSIX_FADV_DONTNEED)
        t2 = time.time()

        logger.debug('{} wrote block {} in {:.2f}s'.format(
            threading.current_thread().name,
            block.id,
            t2 - t1,
        ))

        assert written == len(data)
        return block
Beispiel #28
0
    def _read(
        self, block: DereferencedBlock, metadata_only: bool
    ) -> Tuple[DereferencedBlock, Optional[bytes], Dict]:
        key = block.uid.storage_object_to_path()
        metadata_key = key + self._META_SUFFIX
        data: Optional[bytes] = None
        try:
            t1 = time.time()
            if not metadata_only:
                data = self._read_object(key)
                data_length = len(data)
            else:
                data_length = self._read_object_length(key)
            metadata_json = self._read_object(metadata_key)
            time.sleep(
                self.read_throttling.consume(
                    len(data) if data else 0 + len(metadata_json)))
            t2 = time.time()
        except FileNotFoundError as exception:
            raise InvalidBlockException(
                'Object metadata or data of block {} (UID{}) not found.'.
                format(block.id, block.uid), block) from exception

        try:
            metadata = self._decode_metadata(metadata_json=metadata_json,
                                             key=key,
                                             data_length=data_length)
        except (KeyError, ValueError) as exception:
            raise InvalidBlockException(
                'Object metadata of block {} (UID{}) is invalid.'.format(
                    block.id, block.uid), block) from exception

        if self._CHECKSUM_KEY not in metadata:
            raise InvalidBlockException(
                'Required object metadata key {} is missing for block {} (UID {}).'
                .format(self._CHECKSUM_KEY, block.id, block.uid), block)

        if not metadata_only and self._TRANSFORMS_KEY in metadata:
            data = self._decapsulate(
                data, metadata[self._TRANSFORMS_KEY])  # type: ignore

        logger.debug('{} read data of uid {} in {:.2f}s{}'.format(
            threading.current_thread().name, block.uid, t2 - t1,
            ' (metadata only)' if metadata_only else ''))

        return block, data, metadata
Beispiel #29
0
    def __init__(self, cfg=None, sources=None, merge_defaults=True):
        yaml = YAML(typ='safe', pure=True)
        default_config = yaml.load(self.DEFAULT_CONFIG)

        if cfg is None:
            if not sources:
                sources = self._get_sources()

            config = None
            for source in sources:
                if os.path.isfile(source):
                    try:
                        config = yaml.load(Path(source))
                    except Exception as exception:
                        raise ConfigurationError('Configuration file {} is invalud.'.format(source)) from exception
                    if config is None:
                        raise ConfigurationError('Configuration file {} is empty.'.format(source))
                    break

            if not config:
                raise ConfigurationError('No configuration file found in the default places ({}).'.format(
                    ', '.join(sources)))
        else:
            config = yaml.load(cfg)
            if config is None:
                raise ConfigurationError('Configuration string is empty.')

        if 'configurationVersion' not in config or type(config['configurationVersion']) is not str:
            raise ConfigurationError('Configuration version is missing or not a string.')

        if config['configurationVersion'] != self.CONFIG_VERSION:
            raise ConfigurationError('Unknown configuration version {}.'.format(config['configurationVersion']))

        if merge_defaults:
            self._merge_dicts(config, default_config)

        with StringIO() as redacted_config_string:
            redacted_config = yaml.load(self.REDACT)
            self._merge_dicts(redacted_config, config)
            yaml.dump(redacted_config, redacted_config_string)
            logger.debug('Loaded configuration: {}'.format(redacted_config_string.getvalue()))

        self.config = config
Beispiel #30
0
    def _read(self, block: DereferencedBlock) -> Tuple[DereferencedBlock, bytes]:
        offset = block.id * self._block_size
        t1 = time.time()
        with open(self._path, 'rb') as f:
            f.seek(offset)
            data = f.read(block.size)
            os.posix_fadvise(f.fileno(), offset, block.size, os.POSIX_FADV_DONTNEED)
        t2 = time.time()

        if not data:
            raise EOFError('End of file reached on {} when there should be data.'.format(self.url))

        logger.debug('{} read block {} in {:.2f}s'.format(
            threading.current_thread().name,
            block.id,
            t2 - t1,
        ))

        return block, data