def _configure_arakoon_to_volumedriver(): print 'Update existing vPools' logger.info('Update existing vPools') config = ArakoonClusterConfig('voldrv') config.load_config() arakoon_nodes = [] for node in config.nodes: arakoon_nodes.append({ 'host': node.ip, 'port': node.client_port, 'node_id': node.name }) if EtcdConfiguration.dir_exists('/ovs/vpools'): for vpool_guid in EtcdConfiguration.list('/ovs/vpools'): for storagedriver_id in EtcdConfiguration.list( '/ovs/vpools/{0}/hosts'.format(vpool_guid)): storagedriver_config = StorageDriverConfiguration( 'storagedriver', vpool_guid, storagedriver_id) storagedriver_config.load() storagedriver_config.configure_volume_registry( vregistry_arakoon_cluster_id='voldrv', vregistry_arakoon_cluster_nodes=arakoon_nodes) storagedriver_config.configure_distributed_lock_store( dls_type='Arakoon', dls_arakoon_cluster_id='voldrv', dls_arakoon_cluster_nodes=arakoon_nodes) storagedriver_config.save(reload_config=True)
def get_unused_arakoon_metadata_and_claim(cluster_type, locked=True): """ Retrieve arakoon cluster information based on its type :param cluster_type: Type of arakoon cluster (See ServiceType.ARAKOON_CLUSTER_TYPES) :type cluster_type: str :param locked: Execute this in a locked context :type locked: bool :return: List of ArakoonClusterMetadata objects :rtype: ArakoonClusterMetadata """ cluster_type = cluster_type.upper() if cluster_type not in ServiceType.ARAKOON_CLUSTER_TYPES: raise ValueError('Unsupported arakoon cluster type provided. Please choose from {0}'.format(', '.join(ServiceType.ARAKOON_CLUSTER_TYPES))) if not EtcdConfiguration.dir_exists('/ovs/arakoon'): return None mutex = volatile_mutex('claim_arakoon_metadata', wait=10) try: if locked is True: mutex.acquire() for cluster_name in EtcdConfiguration.list('/ovs/arakoon'): metadata = ArakoonClusterMetadata(cluster_id=cluster_name) metadata.load_metadata() if metadata.cluster_type == cluster_type and metadata.in_use is False and metadata.internal is False: metadata.claim() return metadata finally: if locked is True: mutex.release()
def _get_free_ports(client): node_name = System.get_my_machine_id(client) clusters = [] exclude_ports = [] if EtcdConfiguration.dir_exists(ArakoonInstaller.ETCD_CONFIG_ROOT): for cluster_name in EtcdConfiguration.list( ArakoonInstaller.ETCD_CONFIG_ROOT): try: config = ArakoonClusterConfig(cluster_name) config.load_config() for node in config.nodes: if node.name == node_name: clusters.append(cluster_name) exclude_ports.append(node.client_port) exclude_ports.append(node.messaging_port) except: logger.error( ' Could not load port information of cluster {0}'. format(cluster_name)) ports = System.get_free_ports( EtcdConfiguration.get( '/ovs/framework/hosts/{0}/ports|arakoon'.format(node_name)), exclude_ports, 2, client) logger.debug( ' Loaded free ports {0} based on existing clusters {1}'.format( ports, clusters)) return ports
def list(self, discover=False, ip=None, node_id=None): """ Lists all available ALBA Nodes :param discover: If True and IP provided, return list of single ALBA node, If True and no IP provided, return all ALBA nodes else return modeled ALBA nodes :param ip: IP of ALBA node to retrieve :param node_id: ID of the ALBA node """ if discover is False and (ip is not None or node_id is not None): raise RuntimeError('Discover is mutually exclusive with IP and nodeID') if (ip is None and node_id is not None) or (ip is not None and node_id is None): raise RuntimeError('Both IP and nodeID need to be specified') if discover is False: return AlbaNodeList.get_albanodes() if ip is not None: node = AlbaNode(volatile=True) node.ip = ip node.type = 'ASD' node.node_id = node_id node.port = EtcdConfiguration.get('/ovs/alba/asdnodes/{0}/config/main|port'.format(node_id)) node.username = EtcdConfiguration.get('/ovs/alba/asdnodes/{0}/config/main|username'.format(node_id)) node.password = EtcdConfiguration.get('/ovs/alba/asdnodes/{0}/config/main|password'.format(node_id)) data = node.client.get_metadata() if data['_success'] is False and data['_error'] == 'Invalid credentials': raise RuntimeError('Invalid credentials') if data['node_id'] != node_id: raise RuntimeError('Unexpected node identifier. {0} vs {1}'.format(data['node_id'], node_id)) node_list = DataList(AlbaNode, {}) node_list._executed = True node_list._guids = [node.guid] node_list._objects = {node.guid: node} node_list._data = {node.guid: {'guid': node.guid, 'data': node._data}} return node_list nodes = {} model_node_ids = [node.node_id for node in AlbaNodeList.get_albanodes()] found_node_ids = [] asd_node_ids = [] if EtcdConfiguration.dir_exists('/ovs/alba/asdnodes'): asd_node_ids = EtcdConfiguration.list('/ovs/alba/asdnodes') for node_id in asd_node_ids: node = AlbaNode(volatile=True) node.type = 'ASD' node.node_id = node_id node.ip = EtcdConfiguration.get('/ovs/alba/asdnodes/{0}/config/main|ip'.format(node_id)) node.port = EtcdConfiguration.get('/ovs/alba/asdnodes/{0}/config/main|port'.format(node_id)) node.username = EtcdConfiguration.get('/ovs/alba/asdnodes/{0}/config/main|username'.format(node_id)) node.password = EtcdConfiguration.get('/ovs/alba/asdnodes/{0}/config/main|password'.format(node_id)) if node.node_id not in model_node_ids and node.node_id not in found_node_ids: nodes[node.guid] = node found_node_ids.append(node.node_id) node_list = DataList(AlbaNode, {}) node_list._executed = True node_list._guids = nodes.keys() node_list._objects = nodes node_list._data = dict([(node.guid, {'guid': node.guid, 'data': node._data}) for node in nodes.values()]) return node_list
def validate_alba_backend_removal(alba_backend_info): """ Validate whether the backend has been deleted properly alba_backend_info should be a dictionary containing: - guid - name - maintenance_service_names :param alba_backend_info: Information about the backend :return: None """ Toolbox.verify_required_params(actual_params=alba_backend_info, required_params={'name': (str, None), 'guid': (str, Toolbox.regex_guid), 'maintenance_service_names': (list, None)}, exact_match=True) alba_backend_guid = alba_backend_info['guid'] alba_backend_name = alba_backend_info['name'] backend = GeneralBackend.get_by_name(alba_backend_name) assert backend is None, 'Still found a backend in the model with name {0}'.format(alba_backend_name) # Validate services removed from model for service in GeneralService.get_services_by_name(ServiceType.SERVICE_TYPES.ALBA_MGR): assert service.name != '{0}-abm'.format(alba_backend_name), 'An AlbaManager service has been found with name {0}'.format(alba_backend_name) for service in GeneralService.get_services_by_name(ServiceType.SERVICE_TYPES.NS_MGR): assert service.name.startswith('{0}-nsm_'.format(alba_backend_name)) is False, 'An NamespaceManager service has been found with name {0}'.format(alba_backend_name) # Validate ALBA backend ETCD structure alba_backend_key = '/ovs/alba/backends' actual_etcd_keys = [key for key in EtcdConfiguration.list(alba_backend_key)] assert alba_backend_guid not in actual_etcd_keys, 'Etcd still contains an entry in {0} with guid {1}'.format(alba_backend_key, alba_backend_guid) # Validate Arakoon ETCD structure arakoon_keys = [key for key in EtcdConfiguration.list('/ovs/arakoon') if key.startswith(alba_backend_name)] assert len(arakoon_keys) == 0, 'Etcd still contains configurations for clusters: {0}'.format(', '.join(arakoon_keys)) # Validate services for storagerouter in GeneralStorageRouter.get_storage_routers(): root_client = SSHClient(endpoint=storagerouter, username='******') maintenance_services = alba_backend_info['maintenance_service_names'] abm_arakoon_service_name = 'ovs-arakoon-{0}-abm'.format(alba_backend_name) nsm_arakoon_service_name = 'ovs-arakoon-{0}-nsm_0'.format(alba_backend_name) for service_name in [abm_arakoon_service_name, nsm_arakoon_service_name] + maintenance_services: assert GeneralService.has_service(name=service_name, client=root_client) is False, 'Service {0} still deployed on Storage Router {1}'.format(service_name, storagerouter.name)
def _configure_arakoon_to_volumedriver(cluster_name): print 'Update existing vPools' StorageDriverController._logger.info('Update existing vPools') config = ArakoonClusterConfig(cluster_name) config.load_config() arakoon_nodes = [] for node in config.nodes: arakoon_nodes.append({'host': node.ip, 'port': node.client_port, 'node_id': node.name}) if EtcdConfiguration.dir_exists('/ovs/vpools'): for vpool_guid in EtcdConfiguration.list('/ovs/vpools'): for storagedriver_id in EtcdConfiguration.list('/ovs/vpools/{0}/hosts'.format(vpool_guid)): storagedriver_config = StorageDriverConfiguration('storagedriver', vpool_guid, storagedriver_id) storagedriver_config.load() storagedriver_config.configure_volume_registry(vregistry_arakoon_cluster_id=cluster_name, vregistry_arakoon_cluster_nodes=arakoon_nodes) storagedriver_config.configure_distributed_lock_store(dls_type='Arakoon', dls_arakoon_cluster_id=cluster_name, dls_arakoon_cluster_nodes=arakoon_nodes) storagedriver_config.save(reload_config=True)
def get_arakoon_metadata_by_cluster_name(cluster_name): """ Retrieve arakoon cluster information based on its name :param cluster_name: Name of the arakoon cluster :type cluster_name: str :return: Arakoon cluster metadata information :rtype: ArakoonClusterMetadata """ if not EtcdConfiguration.exists('/ovs/arakoon', raw=True): raise ValueError('Etcd key "/ovs/arakoon" not found') for cluster in EtcdConfiguration.list('/ovs/arakoon'): if cluster == cluster_name: arakoon_metadata = ArakoonClusterMetadata(cluster_id=cluster_name) arakoon_metadata.load_metadata() return arakoon_metadata raise ValueError('No arakoon cluster found with name "{0}"'.format(cluster_name))
def _get_free_ports(client): node_name = System.get_my_machine_id(client) clusters = [] exclude_ports = [] if EtcdConfiguration.dir_exists(ArakoonInstaller.ETCD_CONFIG_ROOT): for cluster_name in EtcdConfiguration.list(ArakoonInstaller.ETCD_CONFIG_ROOT): try: config = ArakoonClusterConfig(cluster_name) config.load_config() for node in config.nodes: if node.name == node_name: clusters.append(cluster_name) exclude_ports.append(node.client_port) exclude_ports.append(node.messaging_port) except: logger.error(' Could not load port information of cluster {0}'.format(cluster_name)) ports = System.get_free_ports(EtcdConfiguration.get('/ovs/framework/hosts/{0}/ports|arakoon'.format(node_name)), exclude_ports, 2, client) logger.debug(' Loaded free ports {0} based on existing clusters {1}'.format(ports, clusters)) return ports
def model_local_albanode(**kwargs): """ Add all ALBA nodes known to etcd to the model :param kwargs: Kwargs containing information regarding the node :type kwargs: dict :return: None """ _ = kwargs if EtcdConfiguration.dir_exists('/ovs/alba/asdnodes'): for node_id in EtcdConfiguration.list('/ovs/alba/asdnodes'): node = AlbaNodeList.get_albanode_by_node_id(node_id) if node is None: node = AlbaNode() main_config = EtcdConfiguration.get('/ovs/alba/asdnodes/{0}/config/main'.format(node_id)) node.type = 'ASD' node.node_id = node_id node.ip = main_config['ip'] node.port = main_config['port'] node.username = main_config['username'] node.password = main_config['password'] node.storagerouter = StorageRouterList.get_by_ip(main_config['ip']) node.save()
def check_vpool_cleanup(vpool_info, storagerouters=None): """ Check if everything related to a vPool has been cleaned up on the storagerouters provided vpool_info should be a dictionary containing: - type - guid - files - directories - name (optional) - vpool (optional) If vpool is provided: - storagerouters need to be provided, because on these Storage Routers, we check whether the vPool has been cleaned up If name is provided: - If storagerouters is NOT provided, all Storage Routers will be checked for a correct vPool removal - If storagerouters is provided, only these Storage Routers will be checked for a correct vPool removal :param vpool_info: Information about the vPool :param storagerouters: Storage Routers to check if vPool has been cleaned up :return: None """ for required_param in ['type', 'guid', 'files', 'directories']: if required_param not in vpool_info: raise ValueError('Incorrect vpool_info provided') if 'vpool' in vpool_info and 'name' in vpool_info: raise ValueError('vpool and name are mutually exclusive') if 'vpool' not in vpool_info and 'name' not in vpool_info: raise ValueError('Either vpool or vpool_name needs to be provided') vpool = vpool_info.get('vpool') vpool_name = vpool_info.get('name') vpool_guid = vpool_info['guid'] vpool_type = vpool_info['type'] files = vpool_info['files'] directories = vpool_info['directories'] supported_backend_types = GeneralBackend.get_valid_backendtypes() if vpool_type not in supported_backend_types: raise ValueError('Unsupported Backend Type provided. Please choose from: {0}'.format(', '.join(supported_backend_types))) if storagerouters is None: storagerouters = GeneralStorageRouter.get_storage_routers() if vpool_name is not None: assert GeneralVPool.get_vpool_by_name(vpool_name=vpool_name) is None, 'A vPool with name {0} still exists'.format(vpool_name) # Prepare some fields to check vpool_name = vpool.name if vpool else vpool_name vpool_services = ['ovs-dtl_{0}'.format(vpool_name), 'ovs-volumedriver_{0}'.format(vpool_name)] if vpool_type == 'alba': vpool_services.append('ovs-albaproxy_{0}'.format(vpool_name)) # Check etcd if vpool is None: assert EtcdConfiguration.exists('/ovs/vpools/{0}'.format(vpool_guid), raw=True) is False, 'vPool config still found in etcd' else: remaining_sd_ids = set([storagedriver.storagedriver_id for storagedriver in vpool.storagedrivers]) current_sd_ids = set([item for item in EtcdConfiguration.list('/ovs/vpools/{0}/hosts'.format(vpool_guid))]) assert not remaining_sd_ids.difference(current_sd_ids), 'There are more storagedrivers modelled than present in etcd' assert not current_sd_ids.difference(remaining_sd_ids), 'There are more storagedrivers in etcd than present in model' # Perform checks on all storagerouters where vpool was removed for storagerouter in storagerouters: # Check management center mgmt_center = GeneralManagementCenter.get_mgmt_center(pmachine=storagerouter.pmachine) if mgmt_center is not None: assert GeneralManagementCenter.is_host_configured(pmachine=storagerouter.pmachine) is False, 'Management Center is still configured on Storage Router {0}'.format(storagerouter.ip) # Check MDS services mds_services = GeneralService.get_services_by_name(ServiceType.SERVICE_TYPES.MD_SERVER) assert len([mds_service for mds_service in mds_services if mds_service.storagerouter_guid == storagerouter.guid]) == 0, 'There are still MDS services present for Storage Router {0}'.format(storagerouter.ip) # Check services root_client = SSHClient(storagerouter, username='******') for service in vpool_services: if ServiceManager.has_service(service, client=root_client): raise RuntimeError('Service {0} is still configured on Storage Router {1}'.format(service, storagerouter.ip)) # Check KVM vpool if storagerouter.pmachine.hvtype == 'KVM': vpool_overview = root_client.run('virsh pool-list --all').splitlines() vpool_overview.pop(1) vpool_overview.pop(0) for vpool_info in vpool_overview: kvm_vpool_name = vpool_info.split()[0].strip() if vpool_name == kvm_vpool_name: raise ValueError('vPool {0} is still defined on Storage Router {1}'.format(vpool_name, storagerouter.ip)) # Check file and directory existence if storagerouter.guid not in directories: raise ValueError('Could not find directory information for Storage Router {0}'.format(storagerouter.ip)) if storagerouter.guid not in files: raise ValueError('Could not find file information for Storage Router {0}'.format(storagerouter.ip)) for directory in directories[storagerouter.guid]: assert root_client.dir_exists(directory) is False, 'Directory {0} still exists on Storage Router {1}'.format(directory, storagerouter.ip) for file_name in files[storagerouter.guid]: assert root_client.file_exists(file_name) is False, 'File {0} still exists on Storage Router {1}'.format(file_name, storagerouter.ip) # Look for errors in storagedriver log for error_type in ['error', 'fatal']: cmd = "cat -vet /var/log/ovs/volumedriver/{0}.log | tail -1000 | grep ' {1} '; echo true > /dev/null".format(vpool_name, error_type) errors = [] for line in root_client.run(cmd).splitlines(): if "HierarchicalArakoon" in line: continue errors.append(line) if len(errors) > 0: if error_type == 'error': print 'Volumedriver log file contains errors on Storage Router {0}\n - {1}'.format(storagerouter.ip, '\n - '.join(errors)) else: raise RuntimeError('Fatal errors found in volumedriver log file on Storage Router {0}\n - {1}'.format(storagerouter.ip, '\n - '.join(errors)))
def validate_vpool_sanity(expected_settings): """ Check if all requirements are met for a healthy vPool :param expected_settings: Parameters used to create a vPool, which will be verified :type expected_settings: dict :return: None """ if not isinstance(expected_settings, dict) or len(expected_settings) == 0: raise ValueError('Cannot validate vpool when no settings are passed') generic_settings = expected_settings.values()[0] vpool_name = generic_settings['vpool_name'] mountpoint = '/mnt/{0}'.format(vpool_name) backend_type = generic_settings['type'] rdma_enabled = generic_settings['config_params']['dtl_transport'] == StorageDriverClient.FRAMEWORK_DTL_TRANSPORT_RSOCKET vpool = GeneralVPool.get_vpool_by_name(vpool_name=vpool_name) assert vpool is not None, 'Could not find vPool with name {0}'.format(vpool_name) vpool_config = GeneralVPool.get_configuration(vpool) # Verify some basic vPool attributes assert vpool.name == vpool_name, 'Expected name {0} for vPool'.format(vpool_name) assert vpool.backend_type.code == backend_type, 'Expected backend type {0}'.format(backend_type) assert vpool.status == VPool.STATUSES.RUNNING, 'vPool does not have RUNNING status' assert vpool.rdma_enabled == rdma_enabled, 'RDMA enabled setting is incorrect' assert set(expected_settings.keys()) == set([sd.storagerouter for sd in vpool.storagedrivers]), "vPool storagerouters don't match the expected Storage Routers" # Verify vPool Storage Driver configuration expected_vpool_config = copy.deepcopy(generic_settings['config_params']) for key, value in vpool_config.iteritems(): if key == 'dtl_enabled' or key == 'tlog_multiplier': continue if key not in expected_vpool_config: raise ValueError('Expected settings does not contain key {0}'.format(key)) if value != expected_vpool_config[key]: raise ValueError('vPool does not have expected configuration {0} for key {1}'.format(expected_vpool_config[key], key)) expected_vpool_config.pop(key) if len(expected_vpool_config) > 0: raise ValueError('Actual vPool configuration does not contain keys: {0}'.format(', '.join(expected_vpool_config.keys()))) # Prepare some fields to check config = generic_settings['config_params'] dtl_mode = config['dtl_mode'] sco_size = config['sco_size'] dedupe_mode = config['dedupe_mode'] cluster_size = config['cluster_size'] write_buffer = config['write_buffer'] dtl_transport = config['dtl_transport'] cache_strategy = config['cache_strategy'] # @TODO: Add more validations for other expected settings (instead of None) expected_config = {'backend_connection_manager': {'backend_interface_retries_on_error': 5, 'backend_interface_retry_interval_secs': 1, 'backend_interface_retry_backoff_multiplier': 2.0}, 'content_addressed_cache': {'clustercache_mount_points': None, 'read_cache_serialization_path': u'/var/rsp/{0}'.format(vpool.name)}, 'distributed_lock_store': {'dls_arakoon_cluster_id': None, 'dls_arakoon_cluster_nodes': None, 'dls_type': u'Arakoon'}, 'distributed_transaction_log': {'dtl_path': None, 'dtl_transport': dtl_transport.upper()}, 'event_publisher': {'events_amqp_routing_key': u'volumerouter', 'events_amqp_uris': None}, 'file_driver': {'fd_cache_path': None, 'fd_extent_cache_capacity': u'1024', 'fd_namespace': None}, 'filesystem': {'fs_dtl_config_mode': u'Automatic', 'fs_dtl_mode': u'{0}'.format(StorageDriverClient.VPOOL_DTL_MODE_MAP[dtl_mode]), 'fs_enable_shm_interface': 1, 'fs_file_event_rules': None, 'fs_metadata_backend_arakoon_cluster_nodes': None, 'fs_metadata_backend_mds_nodes': None, 'fs_metadata_backend_type': u'MDS', 'fs_raw_disk_suffix': None, 'fs_virtual_disk_format': None}, 'metadata_server': {'mds_nodes': None}, 'scocache': {'backoff_gap': u'2GB', 'scocache_mount_points': None, 'trigger_gap': u'1GB'}, 'threadpool_component': {'num_threads': 16}, 'volume_manager': {'clean_interval': 1, 'default_cluster_size': 1024 * cluster_size, 'dtl_throttle_usecs': 4000, 'metadata_path': None, 'non_disposable_scos_factor': float(write_buffer) / StorageDriverClient.TLOG_MULTIPLIER_MAP[sco_size] / sco_size, 'number_of_scos_in_tlog': StorageDriverClient.TLOG_MULTIPLIER_MAP[sco_size], 'read_cache_default_behaviour': StorageDriverClient.VPOOL_CACHE_MAP[cache_strategy], 'read_cache_default_mode': StorageDriverClient.VPOOL_DEDUPE_MAP[dedupe_mode], 'tlog_path': None}, 'volume_registry': {'vregistry_arakoon_cluster_id': u'voldrv', 'vregistry_arakoon_cluster_nodes': None}, 'volume_router': {'vrouter_backend_sync_timeout_ms': 5000, 'vrouter_file_read_threshold': 1024, 'vrouter_file_write_threshold': 1024, 'vrouter_id': None, 'vrouter_max_workers': 16, 'vrouter_migrate_timeout_ms': 5000, 'vrouter_min_workers': 4, 'vrouter_redirect_timeout_ms': u'5000', 'vrouter_routing_retries': 10, 'vrouter_sco_multiplier': 1024, 'vrouter_volume_read_threshold': 1024, 'vrouter_volume_write_threshold': 1024}, 'volume_router_cluster': {'vrouter_cluster_id': None}} vpool_services = {'all': ['ovs-watcher-volumedriver', 'ovs-dtl_{0}'.format(vpool.name), 'ovs-volumedriver_{0}'.format(vpool.name), 'ovs-volumerouter-consumer'], 'extra': [], 'master': ['ovs-arakoon-voldrv']} sd_partitions = {'DB': ['MD', 'MDS', 'TLOG'], 'READ': ['None'], 'WRITE': ['FD', 'DTL', 'SCO'], 'SCRUB': ['None']} if backend_type == 'alba': backend_metadata = {'name': (str, None), 'preset': (str, Toolbox.regex_preset), 'backend_guid': (str, Toolbox.regex_guid), 'arakoon_config': (dict, None), 'connection': (dict, {'host': (str, Toolbox.regex_ip, False), 'port': (int, {'min': 1, 'max': 65535}), 'client_id': (str, Toolbox.regex_guid), 'client_secret': (str, None), 'local': (bool, None)}), 'backend_info': (dict, {'policies': (list, None), 'sco_size': (float, None), 'frag_size': (float, None), 'total_size': (float, None), 'nsm_partition_guids': (list, Toolbox.regex_guid)})} required = {'backend': (dict, backend_metadata), 'backend_aa': (dict, backend_metadata, False)} Toolbox.verify_required_params(required_params=required, actual_params=vpool.metadata) vpool_services['all'].append("ovs-albaproxy_{0}".format(vpool.name)) sd_partitions['WRITE'].append('FCACHE') expected_config['backend_connection_manager'].update({'alba_connection_host': None, 'alba_connection_port': None, 'alba_connection_preset': None, 'alba_connection_timeout': 15, 'backend_type': u'{0}'.format(vpool.backend_type.code.upper())}) elif backend_type == 'distributed': expected_config['backend_connection_manager'].update({'backend_type': u'LOCAL', 'local_connection_path': u'{0}'.format(generic_settings['distributed_mountpoint'])}) assert EtcdConfiguration.exists('/ovs/arakoon/voldrv/config', raw=True), 'Volumedriver arakoon does not exist' # Do some verifications for all SDs storage_ip = None voldrv_config = GeneralArakoon.get_config('voldrv') all_files = GeneralVPool.get_related_files(vpool=vpool) all_directories = GeneralVPool.get_related_directories(vpool=vpool) for storagedriver in vpool.storagedrivers: storagerouter = storagedriver.storagerouter root_client = SSHClient(storagerouter, username='******') assert EtcdConfiguration.exists('/ovs/vpools/{0}/hosts/{1}/config'.format(vpool.guid, storagedriver.storagedriver_id), raw=True), 'vPool config not found in etcd' current_config_sections = set([item for item in EtcdConfiguration.list('/ovs/vpools/{0}/hosts/{1}/config'.format(vpool.guid, storagedriver.storagedriver_id))]) assert not current_config_sections.difference(set(expected_config.keys())), 'New section appeared in the storage driver config in etcd' assert not set(expected_config.keys()).difference(current_config_sections), 'Config section expected for storage driver, but not found in etcd' for key, values in expected_config.iteritems(): current_config = EtcdConfiguration.get('/ovs/vpools/{0}/hosts/{1}/config/{2}'.format(vpool.guid, storagedriver.storagedriver_id, key)) assert set(current_config.keys()).union(set(values.keys())) == set(values.keys()), 'Not all expected keys match for key "{0}" on Storage Driver {1}'.format(key, storagedriver.name) for sub_key, value in current_config.iteritems(): expected_value = values[sub_key] if expected_value is None: continue assert value == expected_value, 'Key: {0} - Sub key: {1} - Value: {2} - Expected value: {3}'.format(key, sub_key, value, expected_value) # Check services if storagerouter.node_type == 'MASTER': for service_name in vpool_services['all'] + vpool_services['master']: if service_name == 'ovs-arakoon-voldrv' and GeneralStorageDriver.has_role(storagedriver, 'DB') is False: continue if ServiceManager.get_service_status(name=service_name, client=root_client) is not True: raise ValueError('Service {0} is not running on node {1}'.format(service_name, storagerouter.ip)) else: for service_name in vpool_services['all'] + vpool_services['extra']: if ServiceManager.get_service_status(name=service_name, client=root_client) is not True: raise ValueError('Service {0} is not running on node {1}'.format(service_name, storagerouter.ip)) # Check arakoon config if not voldrv_config.has_section(storagerouter.machine_id): raise ValueError('Voldrv arakoon cluster does not have section {0}'.format(storagerouter.machine_id)) # Basic SD checks assert storagedriver.cluster_ip == storagerouter.ip, 'Incorrect cluster IP. Expected: {0} - Actual: {1}'.format(storagerouter.ip, storagedriver.cluster_ip) assert storagedriver.mountpoint == '/mnt/{0}'.format(vpool.name), 'Incorrect mountpoint. Expected: {0} - Actual: {1}'.format(mountpoint, storagedriver.mountpoint) if storage_ip is not None: assert storagedriver.storage_ip == storage_ip, 'Incorrect storage IP. Expected: {0} - Actual: {1}'.format(storage_ip, storagedriver.storage_ip) storage_ip = storagedriver.storage_ip # Check required directories and files if storagerouter.guid not in all_directories: raise ValueError('Could not find directory information for Storage Router {0}'.format(storagerouter.ip)) if storagerouter.guid not in all_files: raise ValueError('Could not find file information for Storage Router {0}'.format(storagerouter.ip)) for directory in all_directories[storagerouter.guid]: if root_client.dir_exists(directory) is False: raise ValueError('Directory {0} does not exist on Storage Router {1}'.format(directory, storagerouter.ip)) for file_name in all_files[storagerouter.guid]: if root_client.file_exists(file_name) is False: raise ValueError('File {0} does not exist on Storage Router {1}'.format(file_name, storagerouter.ip)) for partition in storagedriver.partitions: if partition.role in sd_partitions and partition.sub_role in sd_partitions[partition.role]: sd_partitions[partition.role].remove(partition.sub_role) elif partition.role in sd_partitions and partition.sub_role is None: sd_partitions[partition.role].remove('None') # Verify vPool writeable if storagerouter.pmachine.hvtype == 'VMWARE': GeneralVPool.mount_vpool(vpool=vpool, root_client=root_client) vdisk = GeneralVDisk.create_volume(size=10, vpool=vpool, root_client=root_client) GeneralVDisk.write_to_volume(vdisk=vdisk, vpool=vpool, root_client=root_client, count=10, bs='1M', input_type='random') GeneralVDisk.delete_volume(vdisk=vdisk, vpool=vpool, root_client=root_client) for role, sub_roles in sd_partitions.iteritems(): for sub_role in sub_roles: raise ValueError('Not a single Storage Driver found with partition role {0} and sub-role {1}'.format(role, sub_role))
def validate_alba_backend_sanity_without_claimed_disks(alba_backend): """ Validate whether the ALBA backend is configured correctly :param alba_backend: ALBA backend :return: None """ # Attribute validation assert alba_backend.available is True, 'ALBA backend {0} is not available'.format(alba_backend.backend.name) assert len(alba_backend.presets) >= 1, 'No preset found for ALBA backend {0}'.format(alba_backend.backend.name) assert len([default for default in alba_backend.presets if default['is_default'] is True]) == 1, 'Could not find default preset for backend {0}'.format(alba_backend.backend.name) assert alba_backend.backend.backend_type.code == 'alba', 'Backend type for ALBA backend is {0}'.format(alba_backend.backend.backend_type.code) assert alba_backend.backend.status == 'RUNNING', 'Status for ALBA backend is {0}'.format(alba_backend.backend.status) assert isinstance(alba_backend.metadata_information, dict) is True, 'ALBA backend {0} metadata information is not a dictionary'.format(alba_backend.backend.name) Toolbox.verify_required_params(actual_params=alba_backend.metadata_information, required_params={'nsm_partition_guids': (list, Toolbox.regex_guid)}, exact_match=True) # Validate ABM and NSM services storagerouters = GeneralStorageRouter.get_storage_routers() storagerouters_with_db_role = [sr for sr in storagerouters if GeneralStorageRouter.has_roles(storagerouter=sr, roles='DB') is True and sr.node_type == 'MASTER'] assert len(alba_backend.abm_services) == len(storagerouters_with_db_role), 'Not enough ABM services found' assert len(alba_backend.nsm_services) == len(storagerouters_with_db_role), 'Not enough NSM services found' # Validate ALBA backend ETCD structure alba_backend_key = '/ovs/alba/backends' assert EtcdConfiguration.exists(key=alba_backend_key, raw=True) is True, 'Etcd does not contain key {0}'.format(alba_backend_key) actual_etcd_keys = [key for key in EtcdConfiguration.list(alba_backend_key)] expected_etcd_keys = ['verification_schedule', 'global_gui_error_interval', alba_backend.guid, 'default_nsm_hosts'] optional_etcd_keys = ['verification_factor'] expected_keys_amount = 0 for optional_key in optional_etcd_keys: if optional_key in actual_etcd_keys: expected_keys_amount += 1 for expected_key in expected_etcd_keys: if not re.match(Toolbox.regex_guid, expected_key): expected_keys_amount += 1 assert expected_key in actual_etcd_keys, 'Key {0} was not found in tree {1}'.format(expected_key, alba_backend_key) for actual_key in list(actual_etcd_keys): if re.match(Toolbox.regex_guid, actual_key): actual_etcd_keys.remove(actual_key) # Remove all alba backend keys assert len(actual_etcd_keys) == expected_keys_amount, 'Another key was added to the {0} tree'.format(alba_backend_key) this_alba_backend_key = '{0}/{1}'.format(alba_backend_key, alba_backend.guid) actual_keys = [key for key in EtcdConfiguration.list(this_alba_backend_key)] expected_keys = ['maintenance'] assert actual_keys == expected_keys, 'Actual keys: {0} - Expected keys: {1}'.format(actual_keys, expected_keys) maintenance_key = '{0}/maintenance'.format(this_alba_backend_key) actual_keys = [key for key in EtcdConfiguration.list(maintenance_key)] expected_keys = ['nr_of_agents', 'config'] assert set(actual_keys) == set(expected_keys), 'Actual keys: {0} - Expected keys: {1}'.format(actual_keys, expected_keys) # @TODO: Add validation for config values # Validate ASD node ETCD structure alba_nodes = GeneralAlba.get_alba_nodes() assert len(alba_nodes) > 0, 'Could not find any ALBA nodes in the model' alba_node_key = '/ovs/alba/asdnodes' actual_keys = [key for key in EtcdConfiguration.list(alba_node_key)] assert len(alba_nodes) == len(actual_keys), 'Amount of ALBA nodes in model does not match amount of ALBA nodes in ETCD. In model: {0} - In Etcd: {1}'.format(len(alba_nodes), len(actual_keys)) for alba_node in alba_nodes: assert alba_node.node_id in actual_keys, 'ALBA node with ID {0} not present in ETCD'.format(alba_node.node_id) actual_asdnode_keys = [key for key in EtcdConfiguration.list('{0}/{1}'.format(alba_node_key, alba_node.node_id))] expected_asdnode_keys = ['config'] assert actual_asdnode_keys == expected_asdnode_keys, 'Actual keys: {0} - Expected keys: {1}'.format(actual_asdnode_keys, expected_asdnode_keys) actual_config_keys = [key for key in EtcdConfiguration.list('{0}/{1}/config'.format(alba_node_key, alba_node.node_id))] expected_config_keys = ['main', 'network'] assert set(actual_config_keys) == set(expected_config_keys), 'Actual keys: {0} - Expected keys: {1}'.format(actual_config_keys, expected_config_keys) # @TODO: Add validation for main and network values # Validate Arakoon ETCD structure arakoon_abm_key = '/ovs/arakoon/{0}/config'.format(alba_backend.abm_services[0].service.name) arakoon_nsm_key = '/ovs/arakoon/{0}/config'.format(alba_backend.nsm_services[0].service.name) assert EtcdConfiguration.exists(key=arakoon_abm_key, raw=True) is True, 'Etcd key {0} does not exists'.format(arakoon_abm_key) assert EtcdConfiguration.exists(key=arakoon_nsm_key, raw=True) is True, 'Etcd key {0} does not exists'.format(arakoon_nsm_key) # @TODO: Add validation for config values # Validate maintenance agents actual_amount_agents = len([service for node_services in [alba_node.client.list_maintenance_services() for alba_node in alba_nodes] for service in node_services]) expected_amount_agents = EtcdConfiguration.get('/ovs/alba/backends/{0}/maintenance/nr_of_agents'.format(alba_backend.guid)) assert actual_amount_agents == expected_amount_agents, 'Amount of maintenance agents is incorrect. Found {0} - Expected {1}'.format(actual_amount_agents, expected_amount_agents) # Validate arakoon services machine_ids = [sr.machine_id for sr in storagerouters_with_db_role] abm_service_name = alba_backend.abm_services[0].service.name nsm_service_name = alba_backend.nsm_services[0].service.name for storagerouter in storagerouters_with_db_role: root_client = SSHClient(endpoint=storagerouter, username='******') abm_arakoon_service_name = 'ovs-arakoon-{0}'.format(abm_service_name) nsm_arakoon_service_name = 'ovs-arakoon-{0}'.format(nsm_service_name) for service_name in [abm_arakoon_service_name, nsm_arakoon_service_name]: assert GeneralService.has_service(name=service_name, client=root_client) is True, 'Service {0} not deployed on Storage Router {1}'.format(service_name, storagerouter.name) assert GeneralService.get_service_status(name=service_name, client=root_client) is True, 'Service {0} not running on Storage Router {1}'.format(service_name, storagerouter.name) out, err, _ = General.execute_command('arakoon --who-master -config {0}'.format(GeneralArakoon.ETCD_CONFIG_PATH.format(abm_service_name))) assert out.strip() in machine_ids, 'Arakoon master is {0}, but should be 1 of "{1}"'.format(out.strip(), ', '.join(machine_ids))