def get_metadata(storagerouter):
        """
        Retrieve metadata for a Storage Router
        Example return value:
            {'ipaddresses': ['10.100.174.254', '172.22.1.100', '192.168.122.1'],
             'partitions': {'BACKEND': [{'available': 1000202043392,
                                         'guid': '9ec473ad-5c3f-4fdb-a4ef-c99bb4449025',
                                         'in_use': False,
                                         'mountpoint': u'/mnt/alba-asd/hiu8WiD7sCfVF2IKRa5U1VZLOBS3H75W',
                                         'size': 1000202043392,
                                         'ssd': False,
                                         'storagerouter_guid': u'f5155bc2-b238-4a94-b6ce-b5600e65607a'}],
                            'DB': [{'available': 425200713728,
                                    'guid': 'c0064548-c0be-474d-a66b-da65639831f8',
                                    'in_use': False,
                                    'mountpoint': '/mnt/storage',
                                    'size': 425200713728,
                                    'ssd': False,
                                    'storagerouter_guid': u'f5155bc2-b238-4a94-b6ce-b5600e65607a'}],
                            'SCRUB': [{'available': 340160570983,
                                       'guid': 'c0064548-c0be-474d-a66b-da65639831f8',
                                       'in_use': False,
                                       'mountpoint': '/mnt/storage',
                                       'size': 425200713728,
                                       'ssd': False,
                                       'storagerouter_guid': u'f5155bc2-b238-4a94-b6ce-b5600e65607a'}],
                            'WRITE': [{'available': 60016295936,
                                       'guid': '0d167ced-5a5f-47aa-b890-45b923b686c4',
                                       'in_use': False,
                                       'mountpoint': u'/mnt/ssd2',
                                       'size': 60016295936,
                                       'ssd': True,
                                       'storagerouter_guid': u'f5155bc2-b238-4a94-b6ce-b5600e65607a'}]},
             'scrub_available': True,
             'writecache_size': 60016295936}

        :param storagerouter: Storage Router to retrieve metadata for
        :return: Metadata
        """
        result, metadata = GeneralStorageRouter.api.execute_post_action(component='storagerouters',
                                                                        guid=storagerouter.guid,
                                                                        action='get_metadata',
                                                                        data={},
                                                                        wait=True,
                                                                        timeout=300)
        assert result is True, 'Retrieving metadata failed for Storage Router {0}'.format(storagerouter.name)

        required_params = {'ipaddresses': (list, Toolbox.regex_ip),
                           'partitions': (dict, None),
                           'scrub_available': (bool, None),
                           'writecache_size': (int, {'min': 0})}
        Toolbox.verify_required_params(required_params=required_params,
                                       actual_params=metadata,
                                       exact_match=True)
        return metadata
    def validate_alba_backend_removal(alba_backend_info):
        """
        Validate whether the backend has been deleted properly
        alba_backend_info should be a dictionary containing:
            - guid
            - name
            - maintenance_service_names
        :param alba_backend_info: Information about the backend
        :return: None
        """
        Toolbox.verify_required_params(actual_params=alba_backend_info,
                                       required_params={'name': (str, None),
                                                        'guid': (str, Toolbox.regex_guid),
                                                        'maintenance_service_names': (list, None)},
                                       exact_match=True)

        alba_backend_guid = alba_backend_info['guid']
        alba_backend_name = alba_backend_info['name']
        backend = GeneralBackend.get_by_name(alba_backend_name)
        assert backend is None,\
            'Still found a backend in the model with name {0}'.format(alba_backend_name)

        # Validate services removed from model
        for service in GeneralService.get_services_by_name(ServiceType.SERVICE_TYPES.ALBA_MGR):
            assert service.name != '{0}-abm'.format(alba_backend_name),\
                'An AlbaManager service has been found with name {0}'.format(alba_backend_name)
        for service in GeneralService.get_services_by_name(ServiceType.SERVICE_TYPES.NS_MGR):
            assert service.name.startswith('{0}-nsm_'.format(alba_backend_name)) is False,\
                'An NamespaceManager service has been found with name {0}'.format(alba_backend_name)

        # Validate ALBA backend configuration structure
        alba_backend_key = '/ovs/alba/backends'
        actual_configuration_keys = [key for key in Configuration.list(alba_backend_key)]
        assert alba_backend_guid not in actual_configuration_keys,\
            'Configuration still contains an entry in {0} with guid {1}'.format(alba_backend_key, alba_backend_guid)

        # Validate Arakoon configuration structure
        arakoon_keys = [key for key in Configuration.list('/ovs/arakoon') if key.startswith(alba_backend_name)]
        assert len(arakoon_keys) == 0,\
            'Configuration still contains configurations for clusters: {0}'.format(', '.join(arakoon_keys))

        # Validate services
        for storagerouter in GeneralStorageRouter.get_storage_routers():
            root_client = SSHClient(endpoint=storagerouter, username='******')
            maintenance_services = alba_backend_info['maintenance_service_names']
            abm_arakoon_service_name = 'ovs-arakoon-{0}-abm'.format(alba_backend_name)
            nsm_arakoon_service_name = 'ovs-arakoon-{0}-nsm_0'.format(alba_backend_name)
            for service_name in [abm_arakoon_service_name, nsm_arakoon_service_name] + maintenance_services:
                assert GeneralService.has_service(name=service_name, client=root_client) is False,\
                    'Service {0} still deployed on Storage Router {1}'.format(service_name, storagerouter.name)
    def validate_vpool_sanity(expected_settings):
        """
        Check if all requirements are met for a healthy vPool
        :param expected_settings: Parameters used to create a vPool, which will be verified
        :type expected_settings: dict

        :return: None
        """
        if not isinstance(expected_settings, dict) or len(expected_settings) == 0:
            raise ValueError('Cannot validate vpool when no settings are passed')

        generic_settings = expected_settings.values()[0]
        vpool_name = generic_settings['vpool_name']
        mountpoint = '/mnt/{0}'.format(vpool_name)
        backend_type = generic_settings['type']
        rdma_enabled = generic_settings['config_params']['dtl_transport'] == StorageDriverClient.FRAMEWORK_DTL_TRANSPORT_RSOCKET

        vpool = GeneralVPool.get_vpool_by_name(vpool_name=vpool_name)
        assert vpool is not None, 'Could not find vPool with name {0}'.format(vpool_name)
        vpool_config = GeneralVPool.get_configuration(vpool)

        # Verify some basic vPool attributes
        assert vpool.name == vpool_name, 'Expected name {0} for vPool'.format(vpool_name)
        assert vpool.backend_type.code == backend_type, 'Expected backend type {0}'.format(backend_type)
        assert vpool.status == VPool.STATUSES.RUNNING, 'vPool does not have RUNNING status'
        assert vpool.rdma_enabled == rdma_enabled, 'RDMA enabled setting is incorrect'
        assert set(expected_settings.keys()) == set([sd.storagerouter for sd in vpool.storagedrivers]), "vPool storagerouters don't match the expected Storage Routers"

        # Verify vPool Storage Driver configuration
        expected_vpool_config = copy.deepcopy(generic_settings['config_params'])
        for key, value in vpool_config.iteritems():
            if key == 'dtl_enabled' or key == 'tlog_multiplier':
                continue
            if key not in expected_vpool_config:
                raise ValueError('Expected settings does not contain key {0}'.format(key))

            if value != expected_vpool_config[key]:
                raise ValueError('vPool does not have expected configuration {0} for key {1}'.format(expected_vpool_config[key], key))
            expected_vpool_config.pop(key)

        if len(expected_vpool_config) > 0:
            raise ValueError('Actual vPool configuration does not contain keys: {0}'.format(', '.join(expected_vpool_config.keys())))

        # Prepare some fields to check
        config = generic_settings['config_params']
        dtl_mode = config['dtl_mode']
        sco_size = config['sco_size']
        dedupe_mode = config['dedupe_mode']
        cluster_size = config['cluster_size']
        write_buffer = config['write_buffer']
        dtl_transport = config['dtl_transport']
        cache_strategy = config['cache_strategy']
        # @TODO: Add more validations for other expected settings (instead of None)
        expected_config = {'backend_connection_manager': {'backend_interface_retries_on_error': 5,
                                                          'backend_interface_retry_interval_secs': 1,
                                                          'backend_interface_retry_backoff_multiplier': 2.0},
                           'content_addressed_cache': {'clustercache_mount_points': None,
                                                       'read_cache_serialization_path': u'/var/rsp/{0}'.format(vpool.name)},
                           'distributed_lock_store': {'dls_arakoon_cluster_id': None,
                                                      'dls_arakoon_cluster_nodes': None,
                                                      'dls_type': u'Arakoon'},
                           'distributed_transaction_log': {'dtl_path': None,
                                                           'dtl_transport': dtl_transport.upper()},
                           'event_publisher': {'events_amqp_routing_key': u'volumerouter',
                                               'events_amqp_uris': None},
                           'file_driver': {'fd_cache_path': None,
                                           'fd_extent_cache_capacity': u'1024',
                                           'fd_namespace': None},
                           'filesystem': {'fs_dtl_config_mode': u'Automatic',
                                          'fs_dtl_mode': u'{0}'.format(StorageDriverClient.VPOOL_DTL_MODE_MAP[dtl_mode]),
                                          'fs_enable_shm_interface': 1,
                                          'fs_file_event_rules': None,
                                          'fs_metadata_backend_arakoon_cluster_nodes': None,
                                          'fs_metadata_backend_mds_nodes': None,
                                          'fs_metadata_backend_type': u'MDS',
                                          'fs_raw_disk_suffix': None,
                                          'fs_virtual_disk_format': None},
                           'metadata_server': {'mds_nodes': None},
                           'scocache': {'backoff_gap': u'2GB',
                                        'scocache_mount_points': None,
                                        'trigger_gap': u'1GB'},
                           'threadpool_component': {'num_threads': 16},
                           'volume_manager': {'clean_interval': 1,
                                              'default_cluster_size': 1024 * cluster_size,
                                              'dtl_throttle_usecs': 4000,
                                              'metadata_path': None,
                                              'non_disposable_scos_factor': float(write_buffer) / StorageDriverClient.TLOG_MULTIPLIER_MAP[sco_size] / sco_size,
                                              'number_of_scos_in_tlog': StorageDriverClient.TLOG_MULTIPLIER_MAP[sco_size],
                                              'read_cache_default_behaviour': StorageDriverClient.VPOOL_CACHE_MAP[cache_strategy],
                                              'read_cache_default_mode': StorageDriverClient.VPOOL_DEDUPE_MAP[dedupe_mode],
                                              'tlog_path': None},
                           'volume_registry': {'vregistry_arakoon_cluster_id': u'voldrv',
                                               'vregistry_arakoon_cluster_nodes': None},
                           'volume_router': {'vrouter_backend_sync_timeout_ms': 5000,
                                             'vrouter_file_read_threshold': 1024,
                                             'vrouter_file_write_threshold': 1024,
                                             'vrouter_id': None,
                                             'vrouter_max_workers': 16,
                                             'vrouter_migrate_timeout_ms': 5000,
                                             'vrouter_min_workers': 4,
                                             'vrouter_redirect_timeout_ms': u'5000',
                                             'vrouter_routing_retries': 10,
                                             'vrouter_sco_multiplier': 1024,
                                             'vrouter_volume_read_threshold': 1024,
                                             'vrouter_volume_write_threshold': 1024},
                           'volume_router_cluster': {'vrouter_cluster_id': None}}
        vpool_services = {'all': ['ovs-watcher-volumedriver',
                                  'ovs-dtl_{0}'.format(vpool.name),
                                  'ovs-volumedriver_{0}'.format(vpool.name),
                                  'ovs-volumerouter-consumer'],
                          'extra': [],
                          'master': ['ovs-arakoon-voldrv']}
        sd_partitions = {'DB': ['MD', 'MDS', 'TLOG'],
                         'READ': ['None'],
                         'WRITE': ['FD', 'DTL', 'SCO'],
                         'SCRUB': ['None']}

        if backend_type == 'alba':
            backend_metadata = {'name': (str, None),
                                'preset': (str, Toolbox.regex_preset),
                                'backend_guid': (str, Toolbox.regex_guid),
                                'arakoon_config': (dict, None),
                                'connection': (dict, {'host': (str, Toolbox.regex_ip, False),
                                                      'port': (int, {'min': 1, 'max': 65535}),
                                                      'client_id': (str, Toolbox.regex_guid),
                                                      'client_secret': (str, None),
                                                      'local': (bool, None)}),
                                'backend_info': (dict, {'policies': (list, None),
                                                        'sco_size': (float, None),
                                                        'frag_size': (float, None),
                                                        'total_size': (float, None),
                                                        'nsm_partition_guids': (list, Toolbox.regex_guid)})}
            required = {'backend': (dict, backend_metadata),
                        'backend_aa': (dict, backend_metadata, False)}
            Toolbox.verify_required_params(required_params=required,
                                           actual_params=vpool.metadata)
            vpool_services['all'].append("ovs-albaproxy_{0}".format(vpool.name))
            sd_partitions['WRITE'].append('FCACHE')
            expected_config['backend_connection_manager'].update({'alba_connection_host': None,
                                                                  'alba_connection_port': None,
                                                                  'alba_connection_preset': None,
                                                                  'alba_connection_timeout': 15,
                                                                  'backend_type': u'{0}'.format(vpool.backend_type.code.upper())})
        elif backend_type == 'distributed':
            expected_config['backend_connection_manager'].update({'backend_type': u'LOCAL',
                                                                  'local_connection_path': u'{0}'.format(generic_settings['distributed_mountpoint'])})

        assert EtcdConfiguration.exists('/ovs/arakoon/voldrv/config', raw=True), 'Volumedriver arakoon does not exist'

        # Do some verifications for all SDs
        storage_ip = None
        voldrv_config = GeneralArakoon.get_config('voldrv')
        all_files = GeneralVPool.get_related_files(vpool=vpool)
        all_directories = GeneralVPool.get_related_directories(vpool=vpool)

        for storagedriver in vpool.storagedrivers:
            storagerouter = storagedriver.storagerouter
            root_client = SSHClient(storagerouter, username='******')

            assert EtcdConfiguration.exists('/ovs/vpools/{0}/hosts/{1}/config'.format(vpool.guid, storagedriver.storagedriver_id), raw=True), 'vPool config not found in etcd'
            current_config_sections = set([item for item in EtcdConfiguration.list('/ovs/vpools/{0}/hosts/{1}/config'.format(vpool.guid, storagedriver.storagedriver_id))])
            assert not current_config_sections.difference(set(expected_config.keys())), 'New section appeared in the storage driver config in etcd'
            assert not set(expected_config.keys()).difference(current_config_sections), 'Config section expected for storage driver, but not found in etcd'

            for key, values in expected_config.iteritems():
                current_config = EtcdConfiguration.get('/ovs/vpools/{0}/hosts/{1}/config/{2}'.format(vpool.guid, storagedriver.storagedriver_id, key))
                assert set(current_config.keys()).union(set(values.keys())) == set(values.keys()), 'Not all expected keys match for key "{0}" on Storage Driver {1}'.format(key, storagedriver.name)

                for sub_key, value in current_config.iteritems():
                    expected_value = values[sub_key]
                    if expected_value is None:
                        continue
                    assert value == expected_value, 'Key: {0} - Sub key: {1} - Value: {2} - Expected value: {3}'.format(key, sub_key, value, expected_value)

            # Check services
            if storagerouter.node_type == 'MASTER':
                for service_name in vpool_services['all'] + vpool_services['master']:
                    if service_name == 'ovs-arakoon-voldrv' and GeneralStorageDriver.has_role(storagedriver, 'DB') is False:
                        continue
                    if ServiceManager.get_service_status(name=service_name,
                                                         client=root_client) is not True:
                        raise ValueError('Service {0} is not running on node {1}'.format(service_name, storagerouter.ip))
            else:
                for service_name in vpool_services['all'] + vpool_services['extra']:
                    if ServiceManager.get_service_status(name=service_name,
                                                         client=root_client) is not True:
                        raise ValueError('Service {0} is not running on node {1}'.format(service_name, storagerouter.ip))

            # Check arakoon config
            if not voldrv_config.has_section(storagerouter.machine_id):
                raise ValueError('Voldrv arakoon cluster does not have section {0}'.format(storagerouter.machine_id))

            # Basic SD checks
            assert storagedriver.cluster_ip == storagerouter.ip, 'Incorrect cluster IP. Expected: {0}  -  Actual: {1}'.format(storagerouter.ip, storagedriver.cluster_ip)
            assert storagedriver.mountpoint == '/mnt/{0}'.format(vpool.name), 'Incorrect mountpoint. Expected: {0}  -  Actual: {1}'.format(mountpoint, storagedriver.mountpoint)
            if storage_ip is not None:
                assert storagedriver.storage_ip == storage_ip, 'Incorrect storage IP. Expected: {0}  -  Actual: {1}'.format(storage_ip, storagedriver.storage_ip)
            storage_ip = storagedriver.storage_ip

            # Check required directories and files
            if storagerouter.guid not in all_directories:
                raise ValueError('Could not find directory information for Storage Router {0}'.format(storagerouter.ip))
            if storagerouter.guid not in all_files:
                raise ValueError('Could not find file information for Storage Router {0}'.format(storagerouter.ip))

            for directory in all_directories[storagerouter.guid]:
                if root_client.dir_exists(directory) is False:
                    raise ValueError('Directory {0} does not exist on Storage Router {1}'.format(directory, storagerouter.ip))
            for file_name in all_files[storagerouter.guid]:
                if root_client.file_exists(file_name) is False:
                    raise ValueError('File {0} does not exist on Storage Router {1}'.format(file_name, storagerouter.ip))

            for partition in storagedriver.partitions:
                if partition.role in sd_partitions and partition.sub_role in sd_partitions[partition.role]:
                    sd_partitions[partition.role].remove(partition.sub_role)
                elif partition.role in sd_partitions and partition.sub_role is None:
                    sd_partitions[partition.role].remove('None')

            # Verify vPool writeable
            if storagerouter.pmachine.hvtype == 'VMWARE':
                GeneralVPool.mount_vpool(vpool=vpool,
                                         root_client=root_client)

            vdisk = GeneralVDisk.create_volume(size=10,
                                               vpool=vpool,
                                               root_client=root_client)
            GeneralVDisk.write_to_volume(vdisk=vdisk,
                                         vpool=vpool,
                                         root_client=root_client,
                                         count=10,
                                         bs='1M',
                                         input_type='random')
            GeneralVDisk.delete_volume(vdisk=vdisk,
                                       vpool=vpool,
                                       root_client=root_client)

        for role, sub_roles in sd_partitions.iteritems():
            for sub_role in sub_roles:
                raise ValueError('Not a single Storage Driver found with partition role {0} and sub-role {1}'.format(role, sub_role))
Esempio n. 4
0
    def dtl_checkup(vpool_guid=None, vdisk_guid=None, storagerouters_to_exclude=None):
        """
        Check DTL for all volumes
        :param vpool_guid:                vPool to check the DTL configuration of all its disks
        :type vpool_guid:                 String

        :param vdisk_guid:                Virtual Disk to check its DTL configuration
        :type vdisk_guid:                 String

        :param storagerouters_to_exclude: Storage Routers to exclude from possible targets
        :type storagerouters_to_exclude:  List

        :return:                          None
        """
        if vpool_guid is not None and vdisk_guid is not None:
            raise ValueError('vpool and vdisk are mutually exclusive')
        if storagerouters_to_exclude is None:
            storagerouters_to_exclude = []

        from ovs.lib.vpool import VPoolController

        logger.info('DTL checkup started')
        required_params = {'dtl_mode': (str, StorageDriverClient.VPOOL_DTL_MODE_MAP.keys()),
                           'dtl_enabled': (bool, None)}
        vdisk = VDisk(vdisk_guid) if vdisk_guid else None
        vpool = VPool(vpool_guid) if vpool_guid else None
        errors_found = False
        root_client_map = {}
        vpool_dtl_config_cache = {}
        vdisks = VDiskList.get_vdisks() if vdisk is None and vpool is None else vpool.vdisks if vpool is not None else [vdisk]
        for vdisk in vdisks:
            logger.info('    Verifying vDisk {0} with guid {1}'.format(vdisk.name, vdisk.guid))
            vdisk.invalidate_dynamics(['storagedriver_client', 'storagerouter_guid'])
            if vdisk.storagedriver_client is None:
                continue

            vpool = vdisk.vpool
            if vpool.guid not in vpool_dtl_config_cache:
                vpool_config = VPoolController.get_configuration(vpool.guid)  # Config on vPool is permanent for DTL settings
                vpool_dtl_config_cache[vpool.guid] = vpool_config
                Toolbox.verify_required_params(required_params, vpool_config)

            volume_id = str(vdisk.volume_id)
            vpool_config = vpool_dtl_config_cache[vpool.guid]
            dtl_vpool_enabled = vpool_config['dtl_enabled']
            try:
                current_dtl_config = vdisk.storagedriver_client.get_dtl_config(volume_id)
                current_dtl_config_mode = vdisk.storagedriver_client.get_dtl_config_mode(volume_id)
            except RuntimeError as rte:
                # Can occur when a volume has not been stolen yet from a dead node
                logger.error('Retrieving DTL configuration from storage driver failed with error: {0}'.format(rte))
                errors_found = True
                continue

            if dtl_vpool_enabled is False and (current_dtl_config is None or current_dtl_config.host == 'null'):
                logger.info('    DTL is globally disabled for vPool {0} with guid {1}'.format(vpool.name, vpool.guid))
                vdisk.storagedriver_client.set_manual_dtl_config(volume_id, None)
                continue
            elif current_dtl_config_mode == DTLConfigMode.MANUAL and (current_dtl_config is None or current_dtl_config.host == 'null'):
                logger.info('    DTL is disabled for virtual disk {0} with guid {1}'.format(vdisk.name, vdisk.guid))
                continue

            storage_router = StorageRouter(vdisk.storagerouter_guid)
            available_storagerouters = []
            # 1. Check available storage routers in the backup failure domain
            if storage_router.secondary_failure_domain is not None:
                for storagerouter in storage_router.secondary_failure_domain.primary_storagerouters:
                    if vpool.guid not in storagerouter.vpools_guids:
                        continue
                    if storagerouter not in root_client_map:
                        try:
                            root_client = SSHClient(storagerouter, username='******')
                        except UnableToConnectException:
                            logger.warning('    Storage Router with IP {0} of vDisk {1} is not reachable'.format(storagerouter.ip, vdisk.name))
                            continue
                        root_client_map[storagerouter] = root_client
                    else:
                        root_client = root_client_map[storagerouter]
                    if ServiceManager.get_service_status('dtl_{0}'.format(vpool.name), client=root_client) is True:
                        available_storagerouters.append(storagerouter)
            # 2. Check available storage routers in the same failure domain as current storage router
            if len(available_storagerouters) == 0:
                for storagerouter in storage_router.primary_failure_domain.primary_storagerouters:
                    if vpool.guid not in storagerouter.vpools_guids or storagerouter == storage_router:
                        continue
                    if storagerouter not in root_client_map:
                        try:
                            root_client = SSHClient(storagerouter, username='******')
                        except UnableToConnectException:
                            logger.warning('    Storage Router with IP {0} of vDisk {1} is not reachable'.format(storagerouter.ip, vdisk.name))
                            continue
                        root_client_map[storagerouter] = root_client
                    else:
                        root_client = root_client_map[storagerouter]
                    if ServiceManager.get_service_status('dtl_{0}'.format(vpool.name), client=root_client) is True:
                        available_storagerouters.append(storagerouter)

            # Remove storage routers to exclude
            for sr_guid in storagerouters_to_exclude:
                sr_to_exclude = StorageRouter(sr_guid)
                if sr_to_exclude in available_storagerouters:
                    available_storagerouters.remove(sr_to_exclude)

            if len(available_storagerouters) == 0:
                logger.info('    No Storage Routers could be found as valid DTL target')
                vdisk.storagedriver_client.set_manual_dtl_config(volume_id, None)
                continue

            # Check whether reconfiguration is required
            reconfigure_required = False
            if current_dtl_config is None:
                logger.info('        No DTL configuration found, but there are Storage Routers available')
                reconfigure_required = True
            elif current_dtl_config_mode == DTLConfigMode.AUTOMATIC:
                logger.info('        DTL configuration set to AUTOMATIC, switching to manual')
                reconfigure_required = True
            else:
                dtl_host = current_dtl_config.host
                dtl_port = current_dtl_config.port
                storage_drivers = [sd for sd in vpool.storagedrivers if sd.storagerouter.ip == dtl_host]

                logger.info('        DTL host: {0}'.format(dtl_host or '-'))
                logger.info('        DTL port: {0}'.format(dtl_port or '-'))
                if dtl_host not in [sr.ip for sr in available_storagerouters]:
                    logger.info('        Host not in available Storage Routers')
                    reconfigure_required = True
                elif dtl_port != storage_drivers[0].ports[2]:
                    logger.info('        Configured port does not match expected port ({0} vs {1})'.format(dtl_port, storage_drivers[0].ports[2]))
                    reconfigure_required = True

            # Perform the reconfiguration
            if reconfigure_required is True:
                logger.info('        Reconfigure required')
                index = random.randint(0, len(available_storagerouters) - 1)
                dtl_target = available_storagerouters[index]
                storage_drivers = [sd for sd in vpool.storagedrivers if sd.storagerouter == dtl_target]
                if len(storage_drivers) == 0:
                    raise ValueError('Could not retrieve related storagedriver')

                port = storage_drivers[0].ports[2]
                vpool_dtl_mode = vpool_config.get('dtl_mode', StorageDriverClient.FRAMEWORK_DTL_ASYNC)
                logger.info('        DTL config that will be set -->  Host: {0}, Port: {1}, Mode: {2}'.format(dtl_target.ip, port, vpool_dtl_mode))
                dtl_config = DTLConfig(str(dtl_target.ip), port, StorageDriverClient.VDISK_DTL_MODE_MAP[vpool_dtl_mode])
                vdisk.storagedriver_client.set_manual_dtl_config(volume_id, dtl_config)
        if errors_found is True:
            logger.error('DTL checkup ended with errors')
            raise Exception('DTL checkup failed with errors. Please check /var/log/ovs/lib.log for more information')
        logger.info('DTL checkup ended')
Esempio n. 5
0
    def set_config_params(vdisk_guid, new_config_params):
        """
        Sets configuration parameters for a given vdisk.
        :param vdisk_guid: Guid of the virtual disk to set the configuration parameters for
        :param new_config_params: New configuration parameters
        """
        required_params = {'dtl_mode': (str, StorageDriverClient.VDISK_DTL_MODE_MAP.keys()),
                           'sco_size': (int, StorageDriverClient.TLOG_MULTIPLIER_MAP.keys()),
                           'dedupe_mode': (str, StorageDriverClient.VDISK_DEDUPE_MAP.keys()),
                           'write_buffer': (int, {'min': 128, 'max': 10 * 1024}),
                           'cache_strategy': (str, StorageDriverClient.VDISK_CACHE_MAP.keys()),
                           'readcache_limit': (int, {'min': 1, 'max': 10 * 1024}, False)}

        if new_config_params.get('dtl_target') is not None:
            required_params.update({'dtl_target': (str, Toolbox.regex_ip)})

        Toolbox.verify_required_params(required_params, new_config_params)

        if new_config_params['dtl_mode'] != 'no_sync' and new_config_params.get('dtl_target') is None:
            raise Exception('If DTL mode is Asynchronous or Synchronous, a target IP should always be specified')

        errors = False
        vdisk = VDisk(vdisk_guid)
        volume_id = str(vdisk.volume_id)
        old_config_params = VDiskController.get_config_params(vdisk.guid)

        # 1st update SCO size, because this impacts TLOG multiplier which on its turn impacts write buffer
        new_sco_size = new_config_params['sco_size']
        old_sco_size = old_config_params['sco_size']
        if new_sco_size != old_sco_size:
            write_buffer = float(new_config_params['write_buffer'])
            tlog_multiplier = StorageDriverClient.TLOG_MULTIPLIER_MAP[new_sco_size]
            sco_factor = write_buffer / tlog_multiplier / new_sco_size
            try:
                logger.info('Updating property sco_size on vDisk {0} to {1}'.format(vdisk_guid, new_sco_size))
                vdisk.storagedriver_client.set_sco_multiplier(volume_id, new_sco_size / 4 * 1024)
                vdisk.storagedriver_client.set_tlog_multiplier(volume_id, tlog_multiplier)
                vdisk.storagedriver_client.set_sco_cache_max_non_disposable_factor(volume_id, sco_factor)
                logger.info('Updated property sco_size')
            except Exception as ex:
                logger.error('Error updating "sco_size": {0}'.format(ex))
                errors = True

        # 2nd Check for DTL changes
        new_dtl_mode = new_config_params['dtl_mode']
        old_dtl_mode = old_config_params['dtl_mode']
        new_dtl_target = new_config_params.get('dtl_target')
        old_dtl_target = old_config_params['dtl_target']
        if old_dtl_mode != new_dtl_mode or new_dtl_target != old_dtl_target:
            if old_dtl_mode != new_dtl_mode and new_dtl_mode == 'no_sync':
                logger.info('Disabling DTL for vDisk {0}'.format(vdisk_guid))
                vdisk.storagedriver_client.set_manual_dtl_config(volume_id, None)
            elif (new_dtl_target is not None and new_dtl_target != old_dtl_target or old_dtl_mode != new_dtl_mode) and new_dtl_mode != 'no_sync':
                logger.info('Changing DTL to use global values for vDisk {0}'.format(vdisk_guid))
                sr_target = StorageRouterList.get_by_ip(new_dtl_target)
                if sr_target is None:
                    logger.error('Failed to retrieve Storage Router with IP {0}'.format(new_dtl_target))
                    errors = True
                for sd in sr_target.storagedrivers:
                    if sd.vpool == vdisk.vpool:
                        dtl_config = DTLConfig(str(new_dtl_target), sd.ports[2], StorageDriverClient.VDISK_DTL_MODE_MAP[new_dtl_mode])
                        vdisk.storagedriver_client.set_manual_dtl_config(volume_id, dtl_config)
                        break
                else:
                    logger.error('Failed to retrieve Storage Driver with IP {0}'.format(new_dtl_target))
                    errors = True

        # 2nd update rest
        for key in required_params:
            try:
                if key in ['sco_size', 'dtl_mode', 'dtl_target']:
                    continue

                new_value = new_config_params[key]
                old_value = old_config_params[key]
                if new_value != old_value:
                    logger.info('Updating property {0} on vDisk {1} from to {2}'.format(key, vdisk_guid, new_value))
                    if key == 'dedupe_mode':
                        vdisk.storagedriver_client.set_readcache_mode(volume_id, StorageDriverClient.VDISK_DEDUPE_MAP[new_value])
                    elif key == 'write_buffer':
                        tlog_multiplier = vdisk.storagedriver_client.get_tlog_multiplier(volume_id) or StorageDriverClient.TLOG_MULTIPLIER_MAP[new_sco_size]
                        sco_factor = float(new_value) / tlog_multiplier / new_sco_size
                        vdisk.storagedriver_client.set_sco_cache_max_non_disposable_factor(volume_id, sco_factor)
                    elif key == 'cache_strategy':
                        vdisk.storagedriver_client.set_readcache_behaviour(volume_id, StorageDriverClient.VDISK_CACHE_MAP[new_value])
                    elif key == 'readcache_limit':
                        vol_info = vdisk.storagedriver_client.info_volume(volume_id)
                        block_size = vol_info.lba_size * vol_info.cluster_multiplier or 4096
                        limit = new_value * 1024 * 1024 * 1024 / block_size if new_value else None
                        vdisk.storagedriver_client.set_readcache_limit(volume_id, limit)
                    else:
                        raise KeyError('Unsupported property provided: "{0}"'.format(key))
                    logger.info('Updated property {0}'.format(key))
            except Exception as ex:
                logger.error('Error updating "{0}": {1}'.format(key, ex))
                errors = True
        if errors is True:
            raise Exception('Failed to update the values for vDisk {0}'.format(vdisk.name))
Esempio n. 6
0
    def set_config_params(vdisk_guid, new_config_params, old_config_params):
        """
        Sets configuration parameters for a given vdisk.
        """
        required_params = {
                           # 'dtl_mode': (str, StorageDriverClient.VDISK_DTL_MODE_MAP.keys()),
                           'sco_size': (int, StorageDriverClient.TLOG_MULTIPLIER_MAP.keys()),
                           'dedupe_mode': (str, StorageDriverClient.VDISK_DEDUPE_MAP.keys()),
                           'dtl_enabled': (bool, None),
                           # 'dtl_location': (str, None),
                           'write_buffer': (int, {'min': 128, 'max': 10 * 1024}),
                           'cache_strategy': (str, StorageDriverClient.VDISK_CACHE_MAP.keys()),
                           'readcache_limit': (int, {'min': 1, 'max': 10 * 1024}, False)}

        Toolbox.verify_required_params(required_params, new_config_params)
        Toolbox.verify_required_params(required_params, old_config_params)

        errors = False
        vdisk = VDisk(vdisk_guid)
        volume_id = str(vdisk.volume_id)
        old_sco_size = old_config_params['sco_size']
        new_sco_size = new_config_params['sco_size']

        # 1st update SCO size, because this impacts TLOG multiplier which on its turn impacts write buffer
        if new_sco_size != old_sco_size:
            write_buffer = float(new_config_params['write_buffer'])
            tlog_multiplier = StorageDriverClient.TLOG_MULTIPLIER_MAP[new_sco_size]
            sco_factor = write_buffer / tlog_multiplier / new_sco_size
            try:
                logger.info('Updating property sco_size on vDisk {0} from {1} to {2}'.format(vdisk_guid, old_sco_size, new_sco_size))
                vdisk.storagedriver_client.set_sco_multiplier(volume_id, new_sco_size / 4 * 1024)
                vdisk.storagedriver_client.set_tlog_multiplier(volume_id, tlog_multiplier)
                vdisk.storagedriver_client.set_sco_cache_max_non_disposable_factor(volume_id, sco_factor)
                logger.info('Updated property sco_size')
            except Exception as ex:
                logger.error('Error updating "sco_size": {0}'.format(ex))
                errors = True

        # 2nd update rest
        for key, old_value in old_config_params.iteritems():
            if key.startswith('dtl') or key == 'sco_size':
                continue
            new_value = new_config_params[key]
            if new_value != old_value:
                try:
                    logger.info('Updating property {0} on vDisk {1} from {2} to {3}'.format(key, vdisk_guid, old_value, new_value))
                    if key == 'cache_strategy':
                        vdisk.storagedriver_client.set_readcache_behaviour(volume_id, StorageDriverClient.VDISK_CACHE_MAP[new_value])
                    elif key == 'dedupe_mode':
                        vdisk.storagedriver_client.set_readcache_mode(volume_id, StorageDriverClient.VDISK_DEDUPE_MAP[new_value])
                    elif key == 'write_buffer':
                        tlog_multiplier = vdisk.storagedriver_client.get_tlog_multiplier(volume_id) or StorageDriverClient.TLOG_MULTIPLIER_MAP[new_sco_size]
                        sco_factor = float(new_value) / tlog_multiplier / new_sco_size
                        vdisk.storagedriver_client.set_sco_cache_max_non_disposable_factor(volume_id, sco_factor)
                    elif key == 'readcache_limit':
                        volume_info = vdisk.storagedriver_client.info_volume(volume_id)
                        block_size = volume_info.lba_size * volume_info.cluster_multiplier or 4096
                        limit = new_value * 1024 * 1024 * 1024 / block_size if new_value else None
                        vdisk.storagedriver_client.set_readcache_limit(volume_id, limit)
                    else:
                        raise KeyError('Unsupported property provided: "{0}"'.format(key))
                    logger.info('Updated property {0}'.format(key))
                except Exception as ex:
                    logger.error('Error updating "{0}": {1}'.format(key, ex))
                    errors = True
        if errors is True:
            raise Exception('Failed to update the values for vDisk {0}'.format(vdisk.name))
Esempio n. 7
0
    def execute_update(components):
        """
        Update the specified components on all StorageRouters
        This is called upon by 'at'
        :return: None
        """
        filemutex = file_mutex('system_update', wait=2)
        ssh_clients = []
        services_stop_start = set()
        try:
            filemutex.acquire()
            UpdateController._logger.debug('+++ Starting update +++')

            from ovs.dal.lists.storagerouterlist import StorageRouterList

            # Create SSHClients to all nodes
            UpdateController._logger.debug('Generating SSH client connections for each storage router')
            storage_routers = StorageRouterList.get_storagerouters()
            master_ips = []
            extra_ips = []
            for sr in storage_routers:
                try:
                    ssh_clients.append(SSHClient(sr.ip, username='******'))
                    if sr.node_type == 'MASTER':
                        master_ips.append(sr.ip)
                    elif sr.node_type == 'EXTRA':
                        extra_ips.append(sr.ip)
                except UnableToConnectException:
                    raise Exception('Update is only allowed on systems where all nodes are online and fully functional')

            # Create locks
            for client in ssh_clients:
                UpdateController._logger.debug('{0}: Creating lock files'.format(client.ip))
                client.run(['touch', UpdateController._update_file])  # Prevents manual install or update individual packages
                client.run(['touch', UpdateController._update_ongoing_file])

            # Check requirements
            packages_to_update = {}
            services_post_update = set()
            update_information = UpdateController.get_update_information_all()
            for component, component_info in update_information.iteritems():
                if component in components:
                    UpdateController._logger.debug('Verifying update information for component: {0}'.format(component.upper()))
                    Toolbox.verify_required_params(actual_params=component_info,
                                                   required_params={'downtime': (list, None),
                                                                    'packages': (dict, None),
                                                                    'prerequisites': (list, None),
                                                                    'services_stop_start': (set, None),
                                                                    'services_post_update': (set, None)})
                    if len(component_info['prerequisites']) > 0:
                        raise Exception('Update is only allowed when all prerequisites have been met')

                    packages_to_update.update(component_info['packages'])
                    services_stop_start.update(component_info['services_stop_start'])
                    services_post_update.update(component_info['services_post_update'])
            if len(packages_to_update) > 0:
                UpdateController._logger.debug('Packages to be updated: {0}'.format(', '.join(sorted(packages_to_update.keys()))))
            if len(services_stop_start) > 0:
                UpdateController._logger.debug('Services to stop before package update: {0}'.format(', '.join(sorted(services_stop_start))))
            if len(services_post_update) > 0:
                UpdateController._logger.debug('Services which will be restarted after update: {0}'.format(', '.join(sorted(services_post_update))))

            # Stop services
            if UpdateController.change_services_state(services=services_stop_start,
                                                      ssh_clients=ssh_clients,
                                                      action='stop') is False:
                raise Exception('Stopping all services on every node failed, cannot continue')

            # Install packages
            # First install packages on all StorageRouters individually
            if packages_to_update:
                failures = False
                for client in ssh_clients:
                    UpdateController._logger.debug('{0}: Installing packages'.format(client.ip))
                    for function in Toolbox.fetch_hooks('update', 'package_install_multi'):
                        try:
                            function(client=client, package_info=packages_to_update, components=components)
                        except Exception as ex:
                            UpdateController._logger.error('{0}: Package installation hook {1} failed with error: {2}'.format(client.ip, function.__name__, ex))
                            failures = True

                if set(components).difference({'framework', 'storagedriver'}):
                    # Second install packages on all ALBA nodes
                    for function in Toolbox.fetch_hooks('update', 'package_install_single'):
                        try:
                            function(package_info=packages_to_update, components=components)
                        except Exception as ex:
                            UpdateController._logger.exception('Package installation hook {0} failed with error: {1}'.format(function.__name__, ex))
                            failures = True

                if failures is True:
                    raise Exception('Installing the packages failed on 1 or more nodes')

            # Remove update file
            for client in ssh_clients:
                client.file_delete(UpdateController._update_file)

            # Migrate code
            if 'framework' in components:
                failures = []
                for client in ssh_clients:
                    UpdateController._logger.debug('{0}: Verifying extensions code migration is required'.format(client.ip))
                    try:
                        key = '/ovs/framework/hosts/{0}/versions'.format(System.get_my_machine_id(client=client))
                        old_versions = Configuration.get(key) if Configuration.exists(key) else {}
                        try:
                            with remote(client.ip, [Migrator]) as rem:
                                rem.Migrator.migrate(master_ips, extra_ips)
                        except EOFError as eof:
                            UpdateController._logger.warning('{0}: EOFError during code migration, retrying {1}'.format(client.ip, eof))
                            with remote(client.ip, [Migrator]) as rem:
                                rem.Migrator.migrate(master_ips, extra_ips)
                        new_versions = Configuration.get(key) if Configuration.exists(key) else {}
                        if old_versions != new_versions:
                            UpdateController._logger.debug('{0}: Finished extensions code migration. Old versions: {1} --> New versions: {2}'.format(client.ip, old_versions, new_versions))
                    except Exception as ex:
                        failures.append('{0}: {1}'.format(client.ip, str(ex)))
                if len(failures) > 0:
                    raise Exception('Failed to run the extensions migrate code on all nodes. Errors found:\n\n{0}'.format('\n\n'.join(failures)))

            # Start memcached
            if 'memcached' in services_stop_start:
                services_stop_start.remove('memcached')
                UpdateController._logger.debug('Starting memcached')
                UpdateController.change_services_state(services=['memcached'],
                                                       ssh_clients=ssh_clients,
                                                       action='start')

            # Migrate model
            if 'framework' in components:
                UpdateController._logger.debug('Verifying DAL code migration is required')
                old_versions = PersistentFactory.get_client().get('ovs_model_version') if PersistentFactory.get_client().exists('ovs_model_version') else {}

                from ovs.dal.helpers import Migration
                with remote(ssh_clients[0].ip, [Migration]) as rem:
                    rem.Migration.migrate()

                new_versions = PersistentFactory.get_client().get('ovs_model_version') if PersistentFactory.get_client().exists('ovs_model_version') else {}
                if old_versions != new_versions:
                    UpdateController._logger.debug('Finished DAL code migration. Old versions: {0} --> New versions: {1}'.format(old_versions, new_versions))

            # Post update actions
            for client in ssh_clients:
                UpdateController._logger.debug('{0}: Executing post-update actions'.format(client.ip))
                for function in Toolbox.fetch_hooks('update', 'post_update_multi'):
                    try:
                        function(client=client, components=components)
                    except Exception as ex:
                        UpdateController._logger.exception('{0}: Post update hook {1} failed with error: {2}'.format(client.ip, function.__name__, ex))

            for function in Toolbox.fetch_hooks('update', 'post_update_single'):
                try:
                    function(components=components)
                except Exception as ex:
                    UpdateController._logger.exception('Post update hook {0} failed with error: {1}'.format(function.__name__, ex))

            # Start services
            UpdateController.change_services_state(services=services_stop_start,
                                                   ssh_clients=ssh_clients,
                                                   action='start')

            UpdateController._refresh_package_information()
            UpdateController._logger.debug('+++ Finished updating +++')
        except NoLockAvailableException:
            UpdateController._logger.debug('Another update is currently in progress!')
        except Exception as ex:
            UpdateController._logger.exception('Error during update: {0}'.format(ex))
            if len(ssh_clients) > 0:
                UpdateController.change_services_state(services=services_stop_start,
                                                       ssh_clients=ssh_clients,
                                                       action='start')
                UpdateController._refresh_package_information()
                UpdateController._logger.error('Failed to update. Please check all the logs for more information')
        finally:
            filemutex.release()
            for ssh_client in ssh_clients:
                for file_name in [UpdateController._update_file, UpdateController._update_ongoing_file]:
                    try:
                        if ssh_client.file_exists(file_name):
                            ssh_client.file_delete(file_name)
                    except:
                        UpdateController._logger.warning('[0}: Failed to remove lock file {1}'.format(ssh_client.ip, file_name))
Esempio n. 8
0
    def check_if_proxies_work(result_handler):
        """
        Checks if all Alba Proxies work on a local machine, it creates a namespace and tries to put and object
        :param result_handler: logging object
        :type result_handler: ovs.extensions.healthcheck.result.HCResults
        :return: None
        :rtype: NoneType
        """
        namespace_params = {
            'bucket_count': (list, None),
            'logical': (int, None),
            'storage': (int, None),
            'storage_per_osd': (list, None)
        }

        result_handler.info('Checking the ALBA proxies.', add_to_result=False)

        amount_of_presets_not_working = []
        # ignore possible subprocess output
        fnull = open(os.devnull, 'w')
        # try put/get/verify on all available proxies on the local node
        local_proxies = ServiceHelper.get_local_proxy_services()
        if len(local_proxies) == 0:
            result_handler.info('Found no proxies.', add_to_result=False)
            return amount_of_presets_not_working
        for service in local_proxies:
            try:
                result_handler.info('Checking ALBA proxy {0}.'.format(
                    service.name),
                                    add_to_result=False)
                ip = service.alba_proxy.storagedriver.storage_ip
                # Encapsulating try to determine test output
                try:
                    # Determine what to what backend the proxy is connected
                    proxy_client_cfg = AlbaCLI.run(command='proxy-client-cfg',
                                                   named_params={
                                                       'host': ip,
                                                       'port': service.ports[0]
                                                   })
                except AlbaException:
                    result_handler.failure(
                        'Fetching proxy info has failed. Please verify if {0}:{1} is the correct address for proxy {2}.'
                        .format(ip, service.ports[0], service.name))
                    continue
                # Fetch arakoon information
                abm_name = proxy_client_cfg.get('cluster_id')
                # Check if proxy config is correctly setup
                if abm_name is None:
                    raise ConfigNotMatchedException(
                        'Proxy config for proxy {0} does not have the correct format on node {1} with port {2}.'
                        .format(service.name, ip, service.ports[0]))
                abm_config = Configuration.get_configuration_path(
                    '/ovs/vpools/{0}/proxies/{1}/config/abm'.format(
                        service.alba_proxy.storagedriver.vpool.guid,
                        service.alba_proxy.guid))

                # Determine presets / backend
                try:
                    presets = AlbaCLI.run(command='list-presets',
                                          config=abm_config)
                except AlbaException:
                    result_handler.failure(
                        'Listing the presets has failed. Please check the arakoon config path. We used {0}'
                        .format(abm_config))
                    continue

                for preset in presets:
                    # If preset is not in use, test will fail so add a skip
                    if preset['in_use'] is False:
                        result_handler.skip(
                            'Preset {0} is not in use and will not be checked'.
                            format(preset['name']))
                        continue
                    preset_name = preset['name']
                    # Encapsulation try for cleanup
                    try:
                        # Generate new namespace name using the preset
                        namespace_key_prefix = 'ovs-healthcheck-ns-{0}-{1}'.format(
                            preset_name, AlbaHealthCheck.LOCAL_ID)
                        namespace_key = '{0}_{1}'.format(
                            namespace_key_prefix, uuid.uuid4())
                        object_key = 'ovs-healthcheck-obj-{0}'.format(
                            str(uuid.uuid4()))
                        # Create namespace
                        AlbaCLI.run(command='proxy-create-namespace',
                                    named_params={
                                        'host': ip,
                                        'port': service.ports[0]
                                    },
                                    extra_params=[namespace_key, preset_name])
                        # Wait until fully created
                        namespace_start_time = time.time()
                        for index in xrange(2):
                            # Running twice because the first one could give a false positive as the osds will alert the nsm
                            # and the nsm would respond with got messages but these were not the ones we are after
                            AlbaCLI.run(command='deliver-messages',
                                        config=abm_config)
                        while True:
                            if time.time(
                            ) - namespace_start_time > AlbaHealthCheck.NAMESPACE_TIMEOUT:
                                raise RuntimeError(
                                    'Creation namespace has timed out after {0}s'
                                    .format(time.time() -
                                            namespace_start_time))
                            list_ns_osds_output = AlbaCLI.run(
                                command='list-ns-osds',
                                config=abm_config,
                                extra_params=[namespace_key])
                            # Example output: [[0, [u'Active']], [3, [u'Active']]]
                            namespace_ready = True
                            for osd_info in list_ns_osds_output:  # If there are no osd_info records, uploading will fail so covered by HC
                                osd_state = osd_info[1][0]
                                if osd_state != 'Active':
                                    namespace_ready = False
                            if namespace_ready is True:
                                break
                        result_handler.success(
                            'Namespace successfully created on proxy {0} with preset {1}!'
                            .format(service.name, preset_name))
                        namespace_info = AlbaCLI.run(
                            command='show-namespace',
                            config=abm_config,
                            extra_params=[namespace_key])
                        Toolbox.verify_required_params(
                            required_params=namespace_params,
                            actual_params=namespace_info)
                        result_handler.success(
                            'Namespace successfully fetched on proxy {0} with preset {1}!'
                            .format(service.name, preset_name))

                        # Put test object to given dir
                        with open(AlbaHealthCheck.TEMP_FILE_LOC,
                                  'wb') as output_file:
                            output_file.write(
                                os.urandom(AlbaHealthCheck.TEMP_FILE_SIZE))
                        AlbaCLI.run(command='proxy-upload-object',
                                    named_params={
                                        'host': ip,
                                        'port': service.ports[0]
                                    },
                                    extra_params=[
                                        namespace_key,
                                        AlbaHealthCheck.TEMP_FILE_LOC,
                                        object_key
                                    ])
                        result_handler.success(
                            'Successfully uploaded the object to namespace {0}'
                            .format(namespace_key))
                        # download object
                        AlbaCLI.run(command='proxy-download-object',
                                    named_params={
                                        'host': ip,
                                        'port': service.ports[0]
                                    },
                                    extra_params=[
                                        namespace_key, object_key,
                                        AlbaHealthCheck.TEMP_FILE_FETCHED_LOC
                                    ])
                        result_handler.success(
                            'Successfully downloaded the object to namespace {0}'
                            .format(namespace_key))
                        # check if files exists - issue #57
                        if not (os.path.isfile(
                                AlbaHealthCheck.TEMP_FILE_FETCHED_LOC) and
                                os.path.isfile(AlbaHealthCheck.TEMP_FILE_LOC)):
                            # creation of object failed
                            raise ObjectNotFoundException(
                                ValueError('Creation of object has failed'))
                        hash_original = hashlib.md5(
                            open(AlbaHealthCheck.TEMP_FILE_LOC,
                                 'rb').read()).hexdigest()
                        hash_fetched = hashlib.md5(
                            open(AlbaHealthCheck.TEMP_FILE_FETCHED_LOC,
                                 'rb').read()).hexdigest()

                        if hash_original == hash_fetched:
                            result_handler.success(
                                'Fetched object {0} from namespace {1} on proxy {2} with preset {3} matches the created object!'
                                .format(object_key, namespace_key,
                                        service.name, preset_name))
                        else:
                            result_handler.failure(
                                'Fetched object {0} from namespace {1} on proxy {2} with preset {3} does not match the created object!'
                                .format(object_key, namespace_key,
                                        service.name, preset_name))

                    except ObjectNotFoundException as ex:
                        amount_of_presets_not_working.append(preset_name)
                        result_handler.failure(
                            'Failed to put object on namespace {0} failed on proxy {1}with preset {2} With error {3}'
                            .format(namespace_key, service.name, preset_name,
                                    ex))
                    except AlbaException as ex:
                        if ex.alba_command == 'proxy-create-namespace':
                            result_handler.failure(
                                'Create namespace has failed with {0} on namespace {1} with proxy {2} with preset {3}'
                                .format(str(ex), namespace_key, service.name,
                                        preset_name))
                        elif ex.alba_command == 'show-namespace':
                            result_handler.failure(
                                'Show namespace has failed with {0} on namespace {1} with proxy {2} with preset {3}'
                                .format(str(ex), namespace_key, service.name,
                                        preset_name))
                        elif ex.alba_command == 'proxy-upload-object':
                            result_handler.failure(
                                'Uploading the object has failed with {0} on namespace {1} with proxy {2} with preset {3}'
                                .format(str(ex), namespace_key, service.name,
                                        preset_name))
                        elif ex.alba_command == 'proxy-download-object':
                            result_handler.failure(
                                'Downloading the object has failed with {0} on namespace {1} with proxy {2} with preset {3}'
                                .format(str(ex), namespace_key, service.name,
                                        preset_name))
                    finally:
                        # Delete the created namespace and preset
                        subprocess.call(
                            ['rm', str(AlbaHealthCheck.TEMP_FILE_LOC)],
                            stdout=fnull,
                            stderr=subprocess.STDOUT)
                        subprocess.call(
                            ['rm',
                             str(AlbaHealthCheck.TEMP_FILE_FETCHED_LOC)],
                            stdout=fnull,
                            stderr=subprocess.STDOUT)
                        namespaces = AlbaCLI.run(command='list-namespaces',
                                                 config=abm_config)
                        namespaces_to_remove = []
                        proxy_named_params = {
                            'host': ip,
                            'port': service.ports[0]
                        }
                        for namespace in namespaces:
                            if namespace['name'].startswith(
                                    namespace_key_prefix):
                                namespaces_to_remove.append(namespace['name'])
                        for namespace_name in namespaces_to_remove:
                            if namespace_name == namespace_key:
                                result_handler.info(
                                    'Deleting namespace {0}.'.format(
                                        namespace_name))
                            else:
                                result_handler.warning(
                                    'Deleting namespace {0} which was leftover from a previous run.'
                                    .format(namespace_name))

                            AlbaCLI.run(command='proxy-delete-namespace',
                                        named_params=proxy_named_params,
                                        extra_params=[namespace_name])

                            namespace_delete_start = time.time()
                            while True:
                                try:
                                    AlbaCLI.run(
                                        command='show-namespace',
                                        config=abm_config,
                                        extra_params=[namespace_name]
                                    )  # Will fail if the namespace does not exist
                                except AlbaException:
                                    result_handler.success(
                                        'Namespace {0} successfully removed.'.
                                        format(namespace_name))
                                    break
                                if time.time(
                                ) - namespace_delete_start > AlbaHealthCheck.NAMESPACE_TIMEOUT:
                                    raise RuntimeError(
                                        'Delete namespace has timed out after {0}s'
                                        .format(time.time() -
                                                namespace_start_time))

                            # be tidy, and make the proxy forget the namespace
                            try:
                                AlbaCLI.run(
                                    command='proxy-statistics',
                                    named_params=proxy_named_params,
                                    extra_params=['--forget', namespace_name])
                            except:
                                result_handler.warning(
                                    'Failed to make proxy forget namespace {0}.'
                                    .format(namespace_name))

            except subprocess.CalledProcessError as ex:
                # this should stay for the deletion of the remaining files
                amount_of_presets_not_working.append(service.name)
                result_handler.failure(
                    'Proxy {0} has some problems. Got {1} as error'.format(
                        service.name, ex))

            except ConfigNotMatchedException as ex:
                amount_of_presets_not_working.append(service.name)
                result_handler.failure(
                    'Proxy {0} has some problems. Got {1} as error'.format(
                        service.name, ex))
    def validate_alba_backend_sanity_without_claimed_disks(alba_backend):
        """
        Validate whether the ALBA backend is configured correctly
        :param alba_backend: ALBA backend
        :return: None
        """
        # Attribute validation
        assert alba_backend.available is True, 'ALBA backend {0} is not available'.format(alba_backend.backend.name)
        assert len(alba_backend.presets) >= 1, 'No preset found for ALBA backend {0}'.format(alba_backend.backend.name)
        assert len([default for default in alba_backend.presets if default['is_default'] is True]) == 1, 'Could not find default preset for backend {0}'.format(alba_backend.backend.name)
        assert alba_backend.backend.backend_type.code == 'alba', 'Backend type for ALBA backend is {0}'.format(alba_backend.backend.backend_type.code)
        assert alba_backend.backend.status == 'RUNNING', 'Status for ALBA backend is {0}'.format(alba_backend.backend.status)
        assert isinstance(alba_backend.metadata_information, dict) is True, 'ALBA backend {0} metadata information is not a dictionary'.format(alba_backend.backend.name)
        Toolbox.verify_required_params(actual_params=alba_backend.metadata_information,
                                       required_params={'nsm_partition_guids': (list, Toolbox.regex_guid)},
                                       exact_match=True)

        # Validate ABM and NSM services
        storagerouters = GeneralStorageRouter.get_storage_routers()
        storagerouters_with_db_role = [sr for sr in storagerouters if GeneralStorageRouter.has_roles(storagerouter=sr, roles='DB') is True and sr.node_type == 'MASTER']

        assert len(alba_backend.abm_services) == len(storagerouters_with_db_role), 'Not enough ABM services found'
        assert len(alba_backend.nsm_services) == len(storagerouters_with_db_role), 'Not enough NSM services found'

        # Validate ALBA backend ETCD structure
        alba_backend_key = '/ovs/alba/backends'
        assert EtcdConfiguration.exists(key=alba_backend_key, raw=True) is True, 'Etcd does not contain key {0}'.format(alba_backend_key)

        actual_etcd_keys = [key for key in EtcdConfiguration.list(alba_backend_key)]
        expected_etcd_keys = ['verification_schedule', 'global_gui_error_interval', alba_backend.guid,
                              'default_nsm_hosts']
        optional_etcd_keys = ['verification_factor']

        expected_keys_amount = 0
        for optional_key in optional_etcd_keys:
            if optional_key in actual_etcd_keys:
                expected_keys_amount += 1

        for expected_key in expected_etcd_keys:
            if not re.match(Toolbox.regex_guid, expected_key):
                expected_keys_amount += 1
            assert expected_key in actual_etcd_keys, 'Key {0} was not found in tree {1}'.format(expected_key, alba_backend_key)

        for actual_key in list(actual_etcd_keys):
            if re.match(Toolbox.regex_guid, actual_key):
                actual_etcd_keys.remove(actual_key)  # Remove all alba backend keys
        assert len(actual_etcd_keys) == expected_keys_amount, 'Another key was added to the {0} tree'.format(alba_backend_key)

        this_alba_backend_key = '{0}/{1}'.format(alba_backend_key, alba_backend.guid)
        actual_keys = [key for key in EtcdConfiguration.list(this_alba_backend_key)]
        expected_keys = ['maintenance']
        assert actual_keys == expected_keys, 'Actual keys: {0} - Expected keys: {1}'.format(actual_keys, expected_keys)

        maintenance_key = '{0}/maintenance'.format(this_alba_backend_key)
        actual_keys = [key for key in EtcdConfiguration.list(maintenance_key)]
        expected_keys = ['nr_of_agents', 'config']
        assert set(actual_keys) == set(expected_keys), 'Actual keys: {0} - Expected keys: {1}'.format(actual_keys, expected_keys)
        # @TODO: Add validation for config values

        # Validate ASD node ETCD structure
        alba_nodes = GeneralAlba.get_alba_nodes()
        assert len(alba_nodes) > 0, 'Could not find any ALBA nodes in the model'
        alba_node_key = '/ovs/alba/asdnodes'
        actual_keys = [key for key in EtcdConfiguration.list(alba_node_key)]
        assert len(alba_nodes) == len(actual_keys), 'Amount of ALBA nodes in model does not match amount of ALBA nodes in ETCD. In model: {0} - In Etcd: {1}'.format(len(alba_nodes), len(actual_keys))
        for alba_node in alba_nodes:
            assert alba_node.node_id in actual_keys, 'ALBA node with ID {0} not present in ETCD'.format(alba_node.node_id)

            actual_asdnode_keys = [key for key in EtcdConfiguration.list('{0}/{1}'.format(alba_node_key, alba_node.node_id))]
            expected_asdnode_keys = ['config']
            assert actual_asdnode_keys == expected_asdnode_keys, 'Actual keys: {0} - Expected keys: {1}'.format(actual_asdnode_keys, expected_asdnode_keys)

            actual_config_keys = [key for key in EtcdConfiguration.list('{0}/{1}/config'.format(alba_node_key, alba_node.node_id))]
            expected_config_keys = ['main', 'network']
            assert set(actual_config_keys) == set(expected_config_keys), 'Actual keys: {0} - Expected keys: {1}'.format(actual_config_keys, expected_config_keys)
            # @TODO: Add validation for main and network values

        # Validate Arakoon ETCD structure
        arakoon_abm_key = '/ovs/arakoon/{0}/config'.format(alba_backend.abm_services[0].service.name)
        arakoon_nsm_key = '/ovs/arakoon/{0}/config'.format(alba_backend.nsm_services[0].service.name)
        assert EtcdConfiguration.exists(key=arakoon_abm_key, raw=True) is True, 'Etcd key {0} does not exists'.format(arakoon_abm_key)
        assert EtcdConfiguration.exists(key=arakoon_nsm_key, raw=True) is True, 'Etcd key {0} does not exists'.format(arakoon_nsm_key)
        # @TODO: Add validation for config values

        # Validate maintenance agents
        actual_amount_agents = len([service for node_services in [alba_node.client.list_maintenance_services() for alba_node in alba_nodes] for service in node_services])
        expected_amount_agents = EtcdConfiguration.get('/ovs/alba/backends/{0}/maintenance/nr_of_agents'.format(alba_backend.guid))
        assert actual_amount_agents == expected_amount_agents, 'Amount of maintenance agents is incorrect. Found {0} - Expected {1}'.format(actual_amount_agents, expected_amount_agents)

        # Validate arakoon services
        machine_ids = [sr.machine_id for sr in storagerouters_with_db_role]
        abm_service_name = alba_backend.abm_services[0].service.name
        nsm_service_name = alba_backend.nsm_services[0].service.name
        for storagerouter in storagerouters_with_db_role:
            root_client = SSHClient(endpoint=storagerouter,
                                    username='******')
            abm_arakoon_service_name = 'ovs-arakoon-{0}'.format(abm_service_name)
            nsm_arakoon_service_name = 'ovs-arakoon-{0}'.format(nsm_service_name)
            for service_name in [abm_arakoon_service_name, nsm_arakoon_service_name]:
                assert GeneralService.has_service(name=service_name,
                                                  client=root_client) is True, 'Service {0} not deployed on Storage Router {1}'.format(service_name, storagerouter.name)
                assert GeneralService.get_service_status(name=service_name,
                                                         client=root_client) is True, 'Service {0} not running on Storage Router {1}'.format(service_name, storagerouter.name)
                out, err, _ = General.execute_command('arakoon --who-master -config {0}'.format(GeneralArakoon.ETCD_CONFIG_PATH.format(abm_service_name)))
                assert out.strip() in machine_ids, 'Arakoon master is {0}, but should be 1 of "{1}"'.format(out.strip(), ', '.join(machine_ids))