Example #1
0
    def __init__(self, *, config: Config, name: str,
                 module_configuration: ConfigDict) -> None:
        super().__init__(config=config,
                         name=name,
                         module_configuration=module_configuration)

        master_key_encoded: Optional[str] = Config.get_from_dict(
            module_configuration, 'masterKey', None, types=str)
        if master_key_encoded is not None:
            master_key = base64.b64decode(master_key_encoded)

            if len(master_key) != 32:
                raise ValueError(
                    'Key masterKey has the wrong length. It must be 32 bytes long and encoded as BASE64.'
                )

            self._master_key = master_key
        else:
            kdf_salt: bytes = base64.b64decode(
                Config.get_from_dict(module_configuration,
                                     'kdfSalt',
                                     types=str))
            kdf_iterations: int = Config.get_from_dict(module_configuration,
                                                       'kdfIterations',
                                                       types=int)
            password: str = Config.get_from_dict(module_configuration,
                                                 'password',
                                                 types=str)

            self._master_key = derive_key(salt=kdf_salt,
                                          iterations=kdf_iterations,
                                          key_length=32,
                                          password=password)
Example #2
0
    def __init__(self, *, config: Config, name: str, storage_id: int, module_configuration: ConfigDict) -> None:
        read_cache_directory = Config.get_from_dict(module_configuration, 'readCache.directory', None, types=str)
        read_cache_maximum_size = Config.get_from_dict(module_configuration, 'readCache.maximumSize', None, types=int)
        read_cache_shards = Config.get_from_dict(module_configuration, 'readCache.shards', None, types=int)

        if read_cache_directory and read_cache_maximum_size:
            os.makedirs(read_cache_directory, exist_ok=True)
            try:
                self._read_cache = FanoutCache(
                    read_cache_directory,
                    size_limit=read_cache_maximum_size,
                    shards=read_cache_shards,
                    eviction_policy='least-frequently-used',
                    statistics=1,
                )
            except Exception:
                logger.warning('Unable to enable disk based read caching. Continuing without it.')
                self._read_cache = None
            else:
                logger.debug('Disk based read caching instantiated (cache size {}, shards {}).'.format(
                    read_cache_maximum_size, read_cache_shards))
        else:
            self._read_cache = None
        self._use_read_cache = True

        # Start reader and write threads after the disk cached is created, so that they see it.
        super().__init__(config=config, name=name, storage_id=storage_id, module_configuration=module_configuration)
Example #3
0
    def __init__(self, *, identifier, materials):
        master_key = Config.get_from_dict(materials,
                                          'masterKey',
                                          None,
                                          types=bytes)
        if master_key is not None:
            if len(master_key) != 32:
                raise ValueError(
                    'Key masterKey has the wrong length. It must be 32 bytes long.'
                )

            self._master_key = master_key
        else:
            kdfSalt = Config.get_from_dict(materials, 'kdfSalt', types=bytes)
            kdfIterations = Config.get_from_dict(materials,
                                                 'kdfIterations',
                                                 types=int)
            password = Config.get_from_dict(materials, 'password', types=str)

            self._master_key = derive_key(salt=kdfSalt,
                                          iterations=kdfIterations,
                                          key_length=32,
                                          password=password)

        self._identifier = identifier
Example #4
0
 def test_lists(self):
     config = Config(cfg=self.CONFIG, merge_defaults=False)
     self.assertTrue(type(config.get('io.rbd.newImageFeatures')) is list)
     self.assertRaises(TypeError,
                       config.get('io.rbd.newImageFeatures', types=list))
     self.assertEqual('RBD_FEATURE_EXCLUSIVE_LOCK',
                      config.get('io.rbd.newImageFeatures')[1])
Example #5
0
    def __init__(self, *, config: Config, name: str,
                 module_configuration: ConfigDict, path: str,
                 block_size: int) -> None:
        super().__init__(config=config,
                         name=name,
                         module_configuration=module_configuration,
                         path=path,
                         block_size=block_size)

        ceph_config_file = config.get_from_dict(module_configuration,
                                                'cephConfigFile',
                                                types=str)
        client_identifier = config.get_from_dict(module_configuration,
                                                 'clientIdentifier',
                                                 types=str)
        self._cluster = rados.Rados(conffile=ceph_config_file,
                                    rados_id=client_identifier)
        self._cluster.connect()
        # create a bitwise or'd list of the configured features
        self._new_image_features = 0
        for feature in config.get_from_dict(module_configuration,
                                            'newImageFeatures',
                                            types=list):
            try:
                self._new_image_features = self._new_image_features | getattr(
                    rbd, feature)
            except AttributeError:
                raise ConfigurationError(
                    '{}: Unknown image feature {}.'.format(
                        module_configuration.full_name, feature))

        self._pool_name = None
        self._image_name = None
        self._snapshot_name = None
Example #6
0
    def __init__(self, *, config: Config, name: str,
                 module_configuration: ConfigDict) -> None:
        ecc_key_der: str = Config.get_from_dict(module_configuration,
                                                'eccKey',
                                                types=str)
        ecc_curve: Optional[str] = Config.get_from_dict(module_configuration,
                                                        'eccCurve',
                                                        'NIST P-384',
                                                        types=str)

        ecc_key = self._unpack_envelope_key(base64.b64decode(ecc_key_der))

        if ecc_key.curve != ecc_curve:
            raise ValueError(
                f'Key eccKey does not match the eccCurve setting (found: {ecc_key.curve}, expected: {ecc_curve}).'
            )

        self._ecc_key = ecc_key
        self._ecc_curve = ecc_key.curve

        point_q_len = self._ecc_key.pointQ.size_in_bytes()
        if point_q_len < self.AES_KEY_LEN:
            raise ValueError(
                f'Size of point Q is smaller than the AES key length, which reduces security ({point_q_len} < {self.AES_KEY_LEN}).'
            )

        # Note: We don't actually have a "master" aes key, because the key is derived from the ECC key
        # and set before calling the parent's encapsulate/decapsulate method.
        aes_config = module_configuration.copy()
        aes_config['masterKey'] = base64.b64encode(
            b'\x00' * self.AES_KEY_LEN).decode('ascii')
        super().__init__(config=config,
                         name=name,
                         module_configuration=aes_config)
Example #7
0
File: zstd.py Project: wech71/benji
    def __init__(self, *, config: Config, name: str,
                 module_configuration: ConfigDict) -> None:
        super().__init__(config=config,
                         name=name,
                         module_configuration=module_configuration)

        self.level: str = Config.get_from_dict(
            module_configuration,
            'level',
            types=int,
            check_func=lambda v: v >= 1 and v <= zstandard.
            MAX_COMPRESSION_LEVEL,
            check_message='Option level must be between 1 and {} (inclusive)'.
            format(zstandard.MAX_COMPRESSION_LEVEL))

        dict_data_file: str = Config.get_from_dict(module_configuration,
                                                   'dictDataFile',
                                                   None,
                                                   types=str)
        if dict_data_file:
            with open(dict_data_file, 'rb') as f:
                dict_data_content = f.read()
            self._dict_data = zstandard.ZstdCompressionDict(
                dict_data_content, dict_type=zstandard.DICT_TYPE_FULLDICT)
            self._dict_data.precompute_compress(self.level)
        else:
            self._dict_data = None

        self._local = threading.local()
Example #8
0
    def _import_modules(cls, config: Config, modules: ConfigList) -> None:
        for index, module_dict in enumerate(modules):
            module = Config.get_from_dict(module_dict,
                                          'module',
                                          types=str,
                                          full_name_override=modules.full_name,
                                          index=index)
            name = Config.get_from_dict(module_dict,
                                        'name',
                                        types=str,
                                        full_name_override=modules.full_name,
                                        index=index)
            configuration = Config.get_from_dict(module_dict,
                                                 'configuration',
                                                 None,
                                                 types=dict,
                                                 full_name_override=modules.full_name,
                                                 index=index)

            if name in cls._modules:
                raise ConfigurationError('Duplicate name "{}" in list {}.'.format(name, modules.full_name))

            module = importlib.import_module('{}.{}'.format(__package__, module))
            try:
                configuration = config.validate(module=module.__name__, config=configuration)
            except ConfigurationError as exception:
                raise ConfigurationError('Configuration for IO {} is invalid.'.format(name)) from exception
            cls._modules[name] = _IOFactoryModule(module=module,
                                                  arguments={
                                                      'config': config,
                                                      'name': name,
                                                      'module_configuration': configuration
                                                  })
Example #9
0
    def __init__(self, *, config: Config, name: str, module_configuration: ConfigDict, url: str,
                 block_size: int) -> None:
        super().__init__(config=config,
                         name=name,
                         module_configuration=module_configuration,
                         url=url,
                         block_size=block_size)

        if self.parsed_url.username or self.parsed_url.password or self.parsed_url.hostname or self.parsed_url.port \
                    or self.parsed_url.params or self.parsed_url.fragment or self.parsed_url.query:
            raise UsageError('The supplied URL {} is invalid.'.format(self.url))

        ceph_config_file = config.get_from_dict(module_configuration, 'cephConfigFile', types=str)
        client_identifier = config.get_from_dict(module_configuration, 'clientIdentifier', types=str)
        self._cluster = rados.Rados(conffile=ceph_config_file, rados_id=client_identifier)
        self._cluster.connect()
        # create a bitwise or'd list of the configured features
        self._new_image_features = 0
        for feature in config.get_from_dict(module_configuration, 'newImageFeatures', types=list):
            try:
                self._new_image_features = self._new_image_features | getattr(rbd, feature)
            except AttributeError:
                raise ConfigurationError('{}: Unknown image feature {}.'.format(module_configuration.full_name, feature))

        self._pool_name = None
        self._image_name = None
        self._snapshot_name = None

        self._simultaneous_reads = config.get_from_dict(module_configuration, 'simultaneousReads', types=int)
        self._simultaneous_writes = config.get_from_dict(module_configuration, 'simultaneousWrites', types=int)
        self._read_executor: Optional[JobExecutor] = None
        self._write_executor: Optional[JobExecutor] = None
Example #10
0
def upgrade():
    op.create_table(
        'storages',
        sa.Column('id', sa.Integer(), autoincrement=True, nullable=False),
        sa.Column('name', sa.String(length=255), nullable=False),
        sa.PrimaryKeyConstraint('id', name=op.f('pk_storages')),
        sa.UniqueConstraint('name', name=op.f('uq_storages_name')))

    benji_config = op.get_context().config.attributes.get('benji_config', None)
    if benji_config is not None:
        storages = sa.Table('storages',
                            sa.MetaData(),
                            autoload_with=op.get_bind())
        storages_list = benji_config.get('storages', types=list)
        for index, storage in enumerate(storages_list):
            name = Config.get_from_dict(storage,
                                        'name',
                                        types=str,
                                        index=index)
            storage_id = Config.get_from_dict(storage,
                                              'storageId',
                                              None,
                                              types=int,
                                              index=index)
            op.execute(storages.insert().values(name=name, id=storage_id))

    with op.batch_alter_table('versions', schema=None) as batch_op:
        batch_op.create_foreign_key(
            batch_op.f('fk_versions_storage_id_storages'), 'storages',
            ['storage_id'], ['id'])
    with op.batch_alter_table('deleted_blocks', schema=None) as batch_op:
        batch_op.create_foreign_key(
            batch_op.f('fk_deleted_blocks_storage_id_storages'), 'storages',
            ['storage_id'], ['id'])
Example #11
0
File: rbdaio.py Project: q3k/benji
    def __init__(self, *, config: Config, name: str,
                 module_configuration: ConfigDict, url: str,
                 block_size: int) -> None:
        super().__init__(config=config,
                         name=name,
                         module_configuration=module_configuration,
                         url=url,
                         block_size=block_size)

        if self.parsed_url.username or self.parsed_url.password or self.parsed_url.hostname or self.parsed_url.port \
                    or self.parsed_url.params or self.parsed_url.fragment or self.parsed_url.query:
            raise UsageError('The supplied URL {} is invalid.'.format(
                self.url))

        ceph_config_file = config.get_from_dict(module_configuration,
                                                'cephConfigFile',
                                                types=str)
        client_identifier = config.get_from_dict(module_configuration,
                                                 'clientIdentifier',
                                                 types=str)
        self._cluster = rados.Rados(conffile=ceph_config_file,
                                    rados_id=client_identifier)
        self._cluster.connect()
        # create a bitwise or'd list of the configured features
        self._new_image_features = 0
        for feature in config.get_from_dict(module_configuration,
                                            'newImageFeatures',
                                            types=list):
            try:
                self._new_image_features = self._new_image_features | getattr(
                    rbd, feature)
            except AttributeError:
                raise ConfigurationError(
                    '{}: Unknown image feature {}.'.format(
                        module_configuration.full_name, feature))

        self._pool_name = None
        self._image_name = None
        self._snapshot_name = None
        self._rbd_image = None

        self._simultaneous_reads = config.get_from_dict(module_configuration,
                                                        'simultaneousReads',
                                                        types=int)
        self._simultaneous_writes = config.get_from_dict(module_configuration,
                                                         'simultaneousWrites',
                                                         types=int)
        self._read_queue: Deque[DereferencedBlock] = deque()
        self._write_queue: Deque[Tuple[DereferencedBlock, bytes]] = deque()
        self._outstanding_aio_reads = 0
        self._outstanding_aio_writes = 0
        self._submitted_aio_writes = threading.BoundedSemaphore(
            self._simultaneous_writes)
        self._read_completion_queue: queue.Queue[Tuple[rbd.Completion, float,
                                                       float,
                                                       DereferencedBlock,
                                                       bytes]] = queue.Queue()
        self._write_completion_queue: queue.Queue[Tuple[
            rbd.Completion, float, float, DereferencedBlock]] = queue.Queue()
Example #12
0
 def __init__(self, *, config: Config, name: str, module_configuration: ConfigDict, url: str,
              block_size: int) -> None:
     super().__init__(
         config=config, name=name, module_configuration=module_configuration, url=url, block_size=block_size)
     self._simultaneous_reads = config.get_from_dict(module_configuration, 'simultaneousReads', types=int)
     self._simultaneous_writes = config.get_from_dict(module_configuration, 'simultaneousWrites', types=int)
     self._read_executor: Optional[JobExecutor] = None
     self._write_executor: Optional[JobExecutor] = None
Example #13
0
 def test_default_overwrite(self):
     config = Config(cfg="""
     configurationVersion: '{}'
     dataBackend:
       simultaneousReads: 12345678
     """.format(Config.CONFIG_VERSION),
                     merge_defaults=True)
     self.assertEqual(12345678, config.get('dataBackend.simultaneousReads'))
     self.assertEqual(1, config.get('io.rbd.simultaneousReads'))
Example #14
0
    def _import_modules(cls, config: Config, modules: ConfigList) -> None:
        for index, module_dict in enumerate(modules):
            module = Config.get_from_dict(module_dict,
                                          'module',
                                          types=str,
                                          full_name_override=modules.full_name,
                                          index=index)
            name = Config.get_from_dict(module_dict,
                                        'name',
                                        types=str,
                                        full_name_override=modules.full_name,
                                        index=index)
            storage_id = Config.get_from_dict(
                module_dict,
                'storageId',
                types=int,
                full_name_override=modules.full_name,
                index=index)
            configuration = Config.get_from_dict(
                module_dict,
                'configuration',
                None,
                types=dict,
                full_name_override=modules.full_name,
                index=index)

            if name in cls._name_to_storage_id:
                raise ConfigurationError(
                    'Duplicate name "{}" in list {}.'.format(
                        name, modules.full_name))

            if storage_id in cls._storage_id_to_name:
                raise ConfigurationError('Duplicate id {} in list {}.'.format(
                    storage_id, modules.full_name))

            module = importlib.import_module('{}.{}.{}'.format(
                __package__, cls._MODULE, module))
            try:
                configuration = config.validate(module=module.__name__,
                                                config=configuration)
            except ConfigurationError as exception:
                raise ConfigurationError(
                    'Configuration for storage {} is invalid.'.format(
                        name)) from exception
            cls._modules[storage_id] = _StorageFactoryModule(
                module=module,
                arguments={
                    'config': config,
                    'name': name,
                    'storage_id': storage_id,
                    'module_configuration': configuration
                })
            cls._name_to_storage_id[name] = storage_id
            cls._storage_id_to_name[storage_id] = name
Example #15
0
 def __init__(self, *, config: Config, name: str,
              module_configuration: ConfigDict, path: str,
              block_size: int) -> None:
     self._name = name
     self._path = path
     self._block_size = block_size
     self._simultaneous_reads = config.get_from_dict(module_configuration,
                                                     'simultaneousReads',
                                                     types=int)
     self._simultaneous_writes = config.get_from_dict(module_configuration,
                                                      'simultaneousWrites',
                                                      types=int)
     self._read_executor: Optional[JobExecutor] = None
     self._write_executor: Optional[JobExecutor] = None
Example #16
0
 def setUp(self):
     self.testpath = _TestPath()
     init_logging(console_level=logging.WARN if os.environ.get(
         'UNITTEST_QUIET', False) else logging.DEBUG,
                  console_formatter='console-plain')
     self.config = Config(ad_hoc_config=self.CONFIG.format(
         testpath=self.testpath.path))
Example #17
0
    def setUp(self):
        self.testpath = self.TestPath()
        init_logging(None, logging.DEBUG)

        self.config = Config(
            cfg=self.CONFIG.format(testpath=self.testpath.path),
            merge_defaults=False)
Example #18
0
File: file.py Project: wech71/benji
    def __init__(self, *, config: Config, name: str, module_configuration: ConfigDict, url: str,
                 block_size: int) -> None:
        super().__init__(config=config,
                         name=name,
                         module_configuration=module_configuration,
                         url=url,
                         block_size=block_size)

        if self.parsed_url.username or self.parsed_url.password or self.parsed_url.hostname or self.parsed_url.port \
                or self.parsed_url.params or self.parsed_url.fragment or self.parsed_url.query:
            raise UsageError('The supplied URL {} is invalid.'.format(self.url))

        self._simultaneous_reads = config.get_from_dict(module_configuration, 'simultaneousReads', types=int)
        self._simultaneous_writes = config.get_from_dict(module_configuration, 'simultaneousWrites', types=int)
        self._read_executor: Optional[JobExecutor] = None
        self._write_executor: Optional[JobExecutor] = None
Example #19
0
 def test_validation(self):
     config = Config(ad_hoc_config=self.CONFIG)
     module_configuration = {'path': '/var/tmp'}
     self.assertEqual(
         {
             'bandwidthRead': 0,
             'bandwidthWrite': 0,
             'consistencyCheckWrites': False,
             'path': '/var/tmp',
             'simultaneousReads': 3,
             'simultaneousWrites': 3,
             'simultaneousRemovals': 5,
         },
         config.validate(module='benji.storage.file',
                         config=module_configuration))
     module_configuration = {'asdasdas': 'dasdasd'}
     self.assertRaises(
         ConfigurationError,
         lambda: config.validate(module='benji.storage.file',
                                 config=module_configuration))
     module_configuration = {}
     self.assertRaises(
         ConfigurationError,
         lambda: config.validate(module='benji.storage.file',
                                 config=module_configuration))
     module_configuration = {'path': '/var/tmp', 'bandwidthRead': -1}
     self.assertRaises(
         ConfigurationError,
         lambda: config.validate(module='benji.storage.file',
                                 config=module_configuration))
     module_configuration = {'path': [1, 2, 3]}
     self.assertRaises(
         ConfigurationError,
         lambda: config.validate(module='benji.storage.file',
                                 config=module_configuration))
Example #20
0
    def __init__(self, *, materials):
        self.level = Config.get_from_dict(
            materials,
            'level',
            self.DEFAULT_LEVEL,
            types=int,
            check_func=lambda v: v >= 1 and v <= zstandard.
            MAX_COMPRESSION_LEVEL,
            check_message='Option level must be between 1 and {} (inclusive)'.
            format(zstandard.MAX_COMPRESSION_LEVEL))

        self.compressors = {}
        self.decompressors = {}
Example #21
0
 def test_validation_io_rbd(self):
     config = Config(ad_hoc_config=self.CONFIG)
     module_configuration = config.get('ios')[0]['configuration']
     self.assertEqual(
         {
             'cephConfigFile':
             '/etc/ceph/ceph.conf',
             'clientIdentifier':
             'admin',
             'newImageFeatures':
             ['RBD_FEATURE_LAYERING', 'RBD_FEATURE_EXCLUSIVE_LOCK'],
             'simultaneousReads':
             10,
             'simultaneousWrites':
             10,
         },
         config.validate(module='benji.io.rbd',
                         config=module_configuration))
     module_configuration['newImageFeatures'] = ['ASASA', 'DDASAD']
     self.assertRaises(
         ConfigurationError,
         lambda: config.validate(module='benji.io.rbd',
                                 config=module_configuration))
Example #22
0
    def setUp(self):
        self.testpath = self.TestPath()
        init_logging(
            None, logging.WARN
            if os.environ.get('UNITTEST_QUIET', False) else logging.DEBUG)
        # This disables ResourceWarnings from boto3 which are normal
        # See: https://github.com/boto/boto3/issues/454
        warnings.filterwarnings(
            "ignore",
            category=ResourceWarning,
            message=r'unclosed.*<(?:ssl.SSLSocket|socket\.socket).*>')

        self.config = Config(ad_hoc_config=self.CONFIG.format(
            testpath=self.testpath.path))
Example #23
0
File: iscsi.py Project: q3k/benji
    def __init__(self, *, config: Config, name: str,
                 module_configuration: ConfigDict, url: str,
                 block_size: int) -> None:
        super().__init__(config=config,
                         name=name,
                         module_configuration=module_configuration,
                         url=url,
                         block_size=block_size)

        if self.parsed_url.params or self.parsed_url.fragment:
            raise UsageError('The supplied URL {} is invalid.'.format(
                self.url))

        self._read_queue: List[DereferencedBlock] = []
        self._outstanding_write: Optional[Tuple[DereferencedBlock,
                                                bytes]] = None

        self._username = config.get_from_dict(module_configuration,
                                              'username',
                                              None,
                                              types=str)
        self._password = config.get_from_dict(module_configuration,
                                              'password',
                                              None,
                                              types=str)
        self._target_username = config.get_from_dict(module_configuration,
                                                     'targetUsername',
                                                     None,
                                                     types=str)
        self._target_password = config.get_from_dict(module_configuration,
                                                     'targetPassword',
                                                     None,
                                                     types=str)
        header_digest = config.get_from_dict(module_configuration,
                                             'headerDigest',
                                             types=str)
        header_digest_attr_name = 'ISCSI_HEADER_DIGEST_{}'.format(
            header_digest)
        if hasattr(libiscsi, header_digest_attr_name):
            self._header_digest = getattr(libiscsi, header_digest_attr_name)
        else:
            raise ConfigurationError(
                'Unknown header digest setting {}.'.format(header_digest))
        self._initiator_name = config.get_from_dict(module_configuration,
                                                    'initiatorName',
                                                    types=str)
        self._timeout = config.get_from_dict(module_configuration,
                                             'timeout',
                                             None,
                                             types=int)

        self._iscsi_context: Any = None
Example #24
0
    def __init__(self, *, config: Config, name: str,
                 module_configuration: ConfigDict):
        super().__init__(config=config,
                         name=name,
                         module_configuration=module_configuration)

        if os.sep != '/':
            raise RuntimeError(
                'This module only works with / as a path separator.')

        self.path = Config.get_from_dict(module_configuration,
                                         'path',
                                         types=str)

        # Ensure that self.path ends in os.path.sep
        if not self.path.endswith(os.path.sep):
            self.path = os.path.join(self.path, '')
Example #25
0
 def test_lists(self):
     config = Config(ad_hoc_config=self.CONFIG)
     ios = config.get('ios', types=list)
     self.assertIsInstance(
         Config.get_from_dict(ios[0], 'configuration.newImageFeatures'),
         ConfigList)
     self.assertRaises(
         TypeError, lambda: Config.get_from_dict(
             ios[0], 'configuration.newImageFeatures', types=int))
     self.assertEqual(
         'RBD_FEATURE_EXCLUSIVE_LOCK',
         Config.get_from_dict(ios[0], 'configuration.newImageFeatures')[1])
Example #26
0
    def __init__(self, *, config: Config, name: str, storage_id: int,
                 module_configuration: ConfigDict):
        super().__init__(config=config,
                         name=name,
                         storage_id=storage_id,
                         module_configuration=module_configuration)

        if os.sep != '/':
            raise RuntimeError(
                'This module only works with / as a path separator.')

        self.path = Config.get_from_dict(module_configuration,
                                         'path',
                                         types=str)

        # Ensure that self.path ends in a slash
        if not self.path.endswith('/'):
            self.path = self.path + '/'
Example #27
0
def main():
    if sys.hexversion < 0x030600F0:
        raise InternalError('Benji only supports Python 3.6 or above.')

    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
        allow_abbrev=False)

    parser.add_argument('-c',
                        '--config-file',
                        default=None,
                        type=str,
                        help='Specify a non-default configuration file')
    parser.add_argument('-m',
                        '--machine-output',
                        action='store_true',
                        default=False,
                        help='Enable machine-readable JSON output')
    parser.add_argument(
        '--log-level',
        choices=['DEBUG', 'INFO', 'WARNING', 'ERROR'],
        default='INFO',
        help='Only log messages of this level or above on the console')
    parser.add_argument('--no-color',
                        action='store_true',
                        default=False,
                        help='Disable colorization of console logging')

    subparsers_root = parser.add_subparsers(title='commands')

    # BACKUP
    p = subparsers_root.add_parser('backup', help='Perform a backup')
    p.add_argument('-s',
                   '--snapshot-name',
                   default='',
                   help='Snapshot name (e.g. the name of the RBD snapshot)')
    p.add_argument('-r',
                   '--rbd-hints',
                   default=None,
                   help='Hints in rbd diff JSON format')
    p.add_argument('-f',
                   '--base-version',
                   dest='base_version_uid',
                   default=None,
                   help='Base version UID')
    p.add_argument('-b',
                   '--block-size',
                   type=int,
                   default=None,
                   help='Block size in bytes')
    p.add_argument('-l',
                   '--label',
                   action='append',
                   dest='labels',
                   metavar='label',
                   default=None,
                   help='Labels for this version (can be repeated)')
    p.add_argument(
        '-S',
        '--storage',
        default='',
        help='Destination storage (if unspecified the default is used)')
    p.add_argument('source', help='Source URL')
    p.add_argument('version_name',
                   help='Backup version name (e.g. the hostname)')
    p.set_defaults(func='backup')

    # BATCH-DEEP-SCRUB
    p = subparsers_root.add_parser(
        'batch-deep-scrub',
        help='Check data and metadata integrity of multiple versions at once',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    p.add_argument('-p',
                   '--block-percentage',
                   type=partial(integer_range, 1, 100),
                   default=100,
                   help='Check only a certain percentage of blocks')
    p.add_argument('-P',
                   '--version-percentage',
                   type=partial(integer_range, 1, 100),
                   default=100,
                   help='Check only a certain percentage of versions')
    p.add_argument('-g',
                   '--group_label',
                   default=None,
                   help='Label to find related versions')
    p.add_argument('filter_expression',
                   nargs='?',
                   default=None,
                   help='Version filter expression')
    p.set_defaults(func='batch_deep_scrub')

    # BATCH-SCRUB
    p = subparsers_root.add_parser(
        'batch-scrub',
        help=
        'Check block existence and metadata integrity of multiple versions at once',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    p.add_argument('-p',
                   '--block-percentage',
                   type=partial(integer_range, 1, 100),
                   default=100,
                   help='Check only a certain percentage of blocks')
    p.add_argument('-P',
                   '--version-percentage',
                   type=partial(integer_range, 1, 100),
                   default=100,
                   help='Check only a certain percentage of versions')
    p.add_argument('-g',
                   '--group_label',
                   default=None,
                   help='Label to find related versions')
    p.add_argument('filter_expression',
                   nargs='?',
                   default=None,
                   help='Version filter expression')
    p.set_defaults(func='batch_scrub')

    # CLEANUP
    p = subparsers_root.add_parser('cleanup',
                                   help='Cleanup no longer referenced blocks')
    p.add_argument('--override-lock',
                   action='store_true',
                   help='Override and release any held lock (dangerous)')
    p.set_defaults(func='cleanup')

    # COMPLETION
    p = subparsers_root.add_parser('completion',
                                   help='Emit autocompletion script')
    p.add_argument('shell', choices=['bash', 'tcsh'], help='Shell')
    p.set_defaults(func='completion')

    # DATABASE-INIT
    p = subparsers_root.add_parser(
        'database-init',
        help='Initialize the database (will not delete existing tables or data)'
    )
    p.set_defaults(func='database_init')

    # DATABASE-MIGRATE
    p = subparsers_root.add_parser(
        'database-migrate',
        help='Migrate an existing database to a new schema revision')
    p.set_defaults(func='database_migrate')

    # DEEP-SCRUB
    p = subparsers_root.add_parser(
        'deep-scrub',
        help='Check a version\'s data and metadata integrity',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    p.add_argument('-s',
                   '--source',
                   default=None,
                   help='Additionally compare version against source URL')
    p.add_argument('-p',
                   '--block-percentage',
                   type=partial(integer_range, 1, 100),
                   default=100,
                   help='Check only a certain percentage of blocks')
    p.add_argument('version_uid', help='Version UID')
    p.set_defaults(func='deep_scrub')

    # ENFORCE
    p = subparsers_root.add_parser('enforce',
                                   help="Enforce a retention policy ")
    p.add_argument('--dry-run',
                   action='store_true',
                   help='Only show which versions would be removed')
    p.add_argument('-k',
                   '--keep-metadata-backup',
                   action='store_true',
                   help='Keep version metadata backup')
    p.add_argument('-g',
                   '--group_label',
                   default=None,
                   help='Label to find related versions to remove')
    p.add_argument('rules_spec', help='Retention rules specification')
    p.add_argument('filter_expression',
                   nargs='?',
                   default=None,
                   help='Version filter expression')
    p.set_defaults(func='enforce_retention_policy')

    # LABEL
    p = subparsers_root.add_parser('label', help='Add labels to a version')
    p.add_argument('version_uid')
    p.add_argument('labels', nargs='+')
    p.set_defaults(func='label')

    # LS
    p = subparsers_root.add_parser('ls', help='List versions')
    p.add_argument('filter_expression',
                   nargs='?',
                   default=None,
                   help='Version filter expression')
    p.add_argument('-l',
                   '--include-labels',
                   action='store_true',
                   help='Include labels in output')
    p.add_argument('-s',
                   '--include-stats',
                   action='store_true',
                   help='Include statistics in output')
    p.set_defaults(func='ls')

    # METADATA-BACKUP
    p = subparsers_root.add_parser(
        'metadata-backup', help='Back up the metadata of one or more versions')
    p.add_argument('filter_expression', help="Version filter expression")
    p.add_argument('-f',
                   '--force',
                   action='store_true',
                   help='Overwrite existing metadata backups')
    p.set_defaults(func='metadata_backup')

    # METADATA EXPORT
    p = subparsers_root.add_parser(
        'metadata-export',
        help=
        'Export the metadata of one or more versions to a file or standard output'
    )
    p.add_argument('filter_expression',
                   nargs='?',
                   default=None,
                   help="Version filter expression")
    p.add_argument('-f',
                   '--force',
                   action='store_true',
                   help='Overwrite an existing output file')
    p.add_argument('-o',
                   '--output-file',
                   default=None,
                   help='Output file (standard output if missing)')
    p.set_defaults(func='metadata_export')

    # METADATA-IMPORT
    p = subparsers_root.add_parser(
        'metadata-import',
        help=
        'Import the metadata of one or more versions from a file or standard input'
    )
    p.add_argument('-i',
                   '--input-file',
                   default=None,
                   help='Input file (standard input if missing)')
    p.set_defaults(func='metadata_import')

    # METADATA-LS
    p = subparsers_root.add_parser('metadata-ls',
                                   help='List the version metadata backup')
    p.add_argument('-S',
                   '--storage',
                   default=None,
                   help='Source storage (if unspecified the default is used)')
    p.set_defaults(func='metadata_ls')

    # METADATA-RESTORE
    p = subparsers_root.add_parser(
        'metadata-restore',
        help='Restore the metadata of one ore more versions')
    p.add_argument('-S',
                   '--storage',
                   default=None,
                   help='Source storage (if unspecified the default is used)')
    p.add_argument('version_uids',
                   metavar='VERSION_UID',
                   nargs='+',
                   help="Version UID")
    p.set_defaults(func='metadata_restore')

    # NBD
    p = subparsers_root.add_parser(
        'nbd',
        help='Start an NBD server',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    p.add_argument('-a',
                   '--bind-address',
                   default='127.0.0.1',
                   help='Bind to the specified IP address')
    p.add_argument('-p',
                   '--bind-port',
                   default=10809,
                   help='Bind to the specified port')
    p.add_argument('-r',
                   '--read-only',
                   action='store_true',
                   default=False,
                   help='NBD device is read-only')
    p.set_defaults(func='nbd')

    # PROTECT
    p = subparsers_root.add_parser('protect',
                                   help='Protect one or more versions')
    p.add_argument('version_uids',
                   metavar='version_uid',
                   nargs='+',
                   help="Version UID")
    p.set_defaults(func='protect')

    # RESTORE
    p = subparsers_root.add_parser('restore', help='Restore a backup')
    p.add_argument('-s',
                   '--sparse',
                   action='store_true',
                   help='Restore only existing blocks')
    p.add_argument('-f',
                   '--force',
                   action='store_true',
                   help='Overwrite an existing file, device or image')
    p.add_argument('-d',
                   '--database-backend-less',
                   action='store_true',
                   help='Restore without requiring the database backend')
    p.add_argument('version_uid', help='Version UID to restore')
    p.add_argument('destination', help='Destination URL')
    p.set_defaults(func='restore')

    # RM
    p = subparsers_root.add_parser('rm', help='Remove one or more versions')
    p.add_argument(
        '-f',
        '--force',
        action='store_true',
        help='Force removal (overrides protection of recent versions)')
    p.add_argument('-k',
                   '--keep-metadata-backup',
                   action='store_true',
                   help='Keep version metadata backup')
    p.add_argument('--override-lock',
                   action='store_true',
                   help='Override and release any held locks (dangerous)')
    p.add_argument('version_uids',
                   metavar='version_uid',
                   nargs='+',
                   help='Version UID')
    p.set_defaults(func='rm')

    # SCRUB
    p = subparsers_root.add_parser(
        'scrub',
        help='Check a version\'s block existence and metadata integrity',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    p.add_argument('-p',
                   '--block-percentage',
                   type=partial(integer_range, 1, 100),
                   default=100,
                   help='Check only a certain percentage of blocks')
    p.add_argument('version_uid', help='Version UID')
    p.set_defaults(func='scrub')

    # STORAGE-STATS
    p = subparsers_root.add_parser('storage-stats',
                                   help='Show storage statistics')
    p.add_argument('storage_name', nargs='?', default=None, help='Storage')
    p.set_defaults(func='storage_stats')

    # UNPROTECT
    p = subparsers_root.add_parser('unprotect',
                                   help='Unprotect one or more versions')
    p.add_argument('version_uids',
                   metavar='version_uid',
                   nargs='+',
                   help='Version UID')
    p.set_defaults(func='unprotect')

    # VERSION-INFO
    p = subparsers_root.add_parser('version-info',
                                   help='Program version information')
    p.set_defaults(func='version_info')

    argcomplete.autocomplete(parser)
    args = parser.parse_args()

    if not hasattr(args, 'func'):
        parser.print_usage()
        sys.exit(os.EX_USAGE)

    if args.func == 'completion':
        completion(args.shell)
        sys.exit(os.EX_OK)

    from benji.config import Config
    from benji.logging import logger, init_logging
    if args.config_file is not None and args.config_file != '':
        try:
            cfg = open(args.config_file, 'r', encoding='utf-8').read()
        except FileNotFoundError:
            logger.error('File {} not found.'.format(args.config_file))
            sys.exit(os.EX_USAGE)
        config = Config(ad_hoc_config=cfg)
    else:
        config = Config()

    init_logging(config.get('logFile', types=(str, type(None))),
                 console_level=args.log_level,
                 console_formatter='console-plain'
                 if args.no_color else 'console-colored')

    if sys.hexversion < 0x030604F0:
        logger.warning(
            'The installed Python version will use excessive amounts of memory when used with Benji. Upgrade Python to at least 3.6.4.'
        )

    import benji.commands
    commands = benji.commands.Commands(args.machine_output, config)
    func = getattr(commands, args.func)

    # Pass over to function
    func_args = dict(args._get_kwargs())
    del func_args['config_file']
    del func_args['func']
    del func_args['log_level']
    del func_args['machine_output']
    del func_args['no_color']

    # From most specific to least specific
    exception_mappings = [
        _ExceptionMapping(exception=benji.exception.UsageError,
                          exit_code=os.EX_USAGE),
        _ExceptionMapping(exception=benji.exception.AlreadyLocked,
                          exit_code=os.EX_NOPERM),
        _ExceptionMapping(exception=benji.exception.InternalError,
                          exit_code=os.EX_SOFTWARE),
        _ExceptionMapping(exception=benji.exception.ConfigurationError,
                          exit_code=os.EX_CONFIG),
        _ExceptionMapping(exception=benji.exception.InputDataError,
                          exit_code=os.EX_DATAERR),
        _ExceptionMapping(exception=benji.exception.ScrubbingError,
                          exit_code=os.EX_DATAERR),
        _ExceptionMapping(exception=PermissionError, exit_code=os.EX_NOPERM),
        _ExceptionMapping(exception=FileExistsError,
                          exit_code=os.EX_CANTCREAT),
        _ExceptionMapping(exception=FileNotFoundError,
                          exit_code=os.EX_NOINPUT),
        _ExceptionMapping(exception=EOFError, exit_code=os.EX_IOERR),
        _ExceptionMapping(exception=IOError, exit_code=os.EX_IOERR),
        _ExceptionMapping(exception=OSError, exit_code=os.EX_OSERR),
        _ExceptionMapping(exception=ConnectionError, exit_code=os.EX_IOERR),
        _ExceptionMapping(exception=LookupError, exit_code=os.EX_NOINPUT),
        _ExceptionMapping(exception=KeyboardInterrupt,
                          exit_code=os.EX_NOINPUT),
        _ExceptionMapping(exception=BaseException, exit_code=os.EX_SOFTWARE),
    ]

    try:
        logger.debug('commands.{0}(**{1!r})'.format(args.func, func_args))
        func(**func_args)
        sys.exit(os.EX_OK)
    except SystemExit:
        raise
    except BaseException as exception:
        for case in exception_mappings:
            if isinstance(exception, case.exception):
                message = str(exception)
                if message:
                    message = '{}: {}'.format(exception.__class__.__name__,
                                              message)
                else:
                    message = '{} exception occurred.'.format(
                        exception.__class__.__name__)
                logger.debug(message, exc_info=True)
                logger.error(message)
                sys.exit(case.exit_code)
Example #28
0
    def __init__(self, *, config: Config, name: str,
                 module_configuration: ConfigDict):
        super().__init__(config=config,
                         name=name,
                         module_configuration=module_configuration)

        account_id = Config.get_from_dict(module_configuration,
                                          'accountId',
                                          None,
                                          types=str)
        if account_id is None:
            account_id_file = Config.get_from_dict(module_configuration,
                                                   'accountIdFile',
                                                   types=str)
            with open(account_id_file, 'r') as f:
                account_id = f.read().rstrip()
        application_key = Config.get_from_dict(module_configuration,
                                               'applicationKey',
                                               None,
                                               types=str)
        if application_key is None:
            application_key_file = Config.get_from_dict(module_configuration,
                                                        'applicationKeyFile',
                                                        types=str)
            with open(application_key_file, 'r') as f:
                application_key = f.read().rstrip()

        bucket_name = Config.get_from_dict(module_configuration,
                                           'bucketName',
                                           types=str)

        account_info_file = Config.get_from_dict(module_configuration,
                                                 'accountInfoFile',
                                                 None,
                                                 types=str)
        if account_info_file is not None:
            account_info = SqliteAccountInfo(file_name=account_info_file)
        else:
            account_info = InMemoryAccountInfo()

        b2.bucket.Bucket.MAX_UPLOAD_ATTEMPTS = Config.get_from_dict(
            module_configuration, 'uploadAttempts', types=int)

        self._write_object_attempts = Config.get_from_dict(
            module_configuration, 'writeObjectAttempts', types=int)

        self._read_object_attempts = Config.get_from_dict(module_configuration,
                                                          'readObjectAttempts',
                                                          types=int)

        self.service = b2.api.B2Api(account_info)
        if account_info_file is not None:
            try:
                # This temporarily disables all logging as the b2 library does some very verbose logging
                # of the exception we're trying to catch here...
                logging.disable(logging.ERROR)
                _ = self.service.get_account_id()
                logging.disable(logging.NOTSET)
            except MissingAccountData:
                self.service.authorize_account('production', account_id,
                                               application_key)
        else:
            self.service.authorize_account('production', account_id,
                                           application_key)

        self.bucket = self.service.get_bucket_by_name(bucket_name)
Example #29
0
    def __init__(self, *, config: Config, name: str, storage_id: int, module_configuration: ConfigDict):
        aws_access_key_id = Config.get_from_dict(module_configuration, 'awsAccessKeyId', None, types=str)
        if aws_access_key_id is None:
            aws_access_key_id_file = Config.get_from_dict(module_configuration, 'awsAccessKeyIdFile', types=str)
            with open(aws_access_key_id_file, 'r') as f:
                aws_access_key_id = f.read().rstrip()
        aws_secret_access_key = Config.get_from_dict(module_configuration, 'awsSecretAccessKey', None, types=str)
        if aws_secret_access_key is None:
            aws_secret_access_key_file = Config.get_from_dict(module_configuration, 'awsSecretAccessKeyFile', types=str)
            with open(aws_secret_access_key_file, 'r') as f:
                aws_secret_access_key = f.read().rstrip()
        region_name = Config.get_from_dict(module_configuration, 'regionName', None, types=str)
        endpoint_url = Config.get_from_dict(module_configuration, 'endpointUrl', None, types=str)
        use_ssl = Config.get_from_dict(module_configuration, 'useSsl', None, types=bool)
        addressing_style = Config.get_from_dict(module_configuration, 'addressingStyle', None, types=str)
        signature_version = Config.get_from_dict(module_configuration, 'signatureVersion', None, types=str)

        self._bucket_name = Config.get_from_dict(module_configuration, 'bucketName', types=str)
        self._disable_encoding_type = Config.get_from_dict(module_configuration, 'disableEncodingType', types=bool)

        self._resource_config = {
            'aws_access_key_id': aws_access_key_id,
            'aws_secret_access_key': aws_secret_access_key,
        }

        if region_name:
            self._resource_config['region_name'] = region_name

        if endpoint_url:
            self._resource_config['endpoint_url'] = endpoint_url

        if use_ssl:
            self._resource_config['use_ssl'] = use_ssl

        resource_config = {}
        if addressing_style:
            resource_config['s3'] = {'addressing_style': addressing_style}

        if signature_version:
            resource_config['signature_version'] = signature_version

        self._resource_config['config'] = BotoCoreClientConfig(**resource_config)
        self._local = threading.local()
        self._init_connection()
        self._local.bucket = self._local.resource.Bucket(self._bucket_name)

        super().__init__(config=config, name=name, storage_id=storage_id, module_configuration=module_configuration)
Example #30
0
    def initialize(cls, config: Config) -> None:
        TransformFactory.initialize(config)

        cls._modules = {}
        storages: ConfigList = config.get('storages', types=list)
        cls._import_modules(config, storages)