def validate_vpool_sanity(expected_settings): """ Check if all requirements are met for a healthy vPool :param expected_settings: Parameters used to create a vPool, which will be verified :type expected_settings: dict :return: None """ if not isinstance(expected_settings, dict) or len(expected_settings) == 0: raise ValueError('Cannot validate vpool when no settings are passed') generic_settings = expected_settings.values()[0] vpool_name = generic_settings['vpool_name'] mountpoint = '/mnt/{0}'.format(vpool_name) backend_type = generic_settings['type'] rdma_enabled = generic_settings['config_params']['dtl_transport'] == StorageDriverClient.FRAMEWORK_DTL_TRANSPORT_RSOCKET vpool = GeneralVPool.get_vpool_by_name(vpool_name=vpool_name) assert vpool is not None, 'Could not find vPool with name {0}'.format(vpool_name) vpool_config = GeneralVPool.get_configuration(vpool) # Verify some basic vPool attributes assert vpool.name == vpool_name, 'Expected name {0} for vPool'.format(vpool_name) assert vpool.backend_type.code == backend_type, 'Expected backend type {0}'.format(backend_type) assert vpool.status == VPool.STATUSES.RUNNING, 'vPool does not have RUNNING status' assert vpool.rdma_enabled == rdma_enabled, 'RDMA enabled setting is incorrect' assert set(expected_settings.keys()) == set([sd.storagerouter for sd in vpool.storagedrivers]), "vPool storagerouters don't match the expected Storage Routers" # Verify vPool Storage Driver configuration expected_vpool_config = copy.deepcopy(generic_settings['config_params']) for key, value in vpool_config.iteritems(): if key == 'dtl_enabled' or key == 'tlog_multiplier': continue if key not in expected_vpool_config: raise ValueError('Expected settings does not contain key {0}'.format(key)) if value != expected_vpool_config[key]: raise ValueError('vPool does not have expected configuration {0} for key {1}'.format(expected_vpool_config[key], key)) expected_vpool_config.pop(key) if len(expected_vpool_config) > 0: raise ValueError('Actual vPool configuration does not contain keys: {0}'.format(', '.join(expected_vpool_config.keys()))) # Prepare some fields to check config = generic_settings['config_params'] dtl_mode = config['dtl_mode'] sco_size = config['sco_size'] dedupe_mode = config['dedupe_mode'] cluster_size = config['cluster_size'] write_buffer = config['write_buffer'] dtl_transport = config['dtl_transport'] cache_strategy = config['cache_strategy'] # @TODO: Add more validations for other expected settings (instead of None) expected_config = {'backend_connection_manager': {'backend_interface_retries_on_error': 5, 'backend_interface_retry_interval_secs': 1, 'backend_interface_retry_backoff_multiplier': 2.0}, 'content_addressed_cache': {'clustercache_mount_points': None, 'read_cache_serialization_path': u'/var/rsp/{0}'.format(vpool.name)}, 'distributed_lock_store': {'dls_arakoon_cluster_id': None, 'dls_arakoon_cluster_nodes': None, 'dls_type': u'Arakoon'}, 'distributed_transaction_log': {'dtl_path': None, 'dtl_transport': dtl_transport.upper()}, 'event_publisher': {'events_amqp_routing_key': u'volumerouter', 'events_amqp_uris': None}, 'file_driver': {'fd_cache_path': None, 'fd_extent_cache_capacity': u'1024', 'fd_namespace': None}, 'filesystem': {'fs_dtl_config_mode': u'Automatic', 'fs_dtl_mode': u'{0}'.format(StorageDriverClient.VPOOL_DTL_MODE_MAP[dtl_mode]), 'fs_enable_shm_interface': 1, 'fs_file_event_rules': None, 'fs_metadata_backend_arakoon_cluster_nodes': None, 'fs_metadata_backend_mds_nodes': None, 'fs_metadata_backend_type': u'MDS', 'fs_raw_disk_suffix': None, 'fs_virtual_disk_format': None}, 'metadata_server': {'mds_nodes': None}, 'scocache': {'backoff_gap': u'2GB', 'scocache_mount_points': None, 'trigger_gap': u'1GB'}, 'threadpool_component': {'num_threads': 16}, 'volume_manager': {'clean_interval': 1, 'default_cluster_size': 1024 * cluster_size, 'dtl_throttle_usecs': 4000, 'metadata_path': None, 'non_disposable_scos_factor': float(write_buffer) / StorageDriverClient.TLOG_MULTIPLIER_MAP[sco_size] / sco_size, 'number_of_scos_in_tlog': StorageDriverClient.TLOG_MULTIPLIER_MAP[sco_size], 'read_cache_default_behaviour': StorageDriverClient.VPOOL_CACHE_MAP[cache_strategy], 'read_cache_default_mode': StorageDriverClient.VPOOL_DEDUPE_MAP[dedupe_mode], 'tlog_path': None}, 'volume_registry': {'vregistry_arakoon_cluster_id': u'voldrv', 'vregistry_arakoon_cluster_nodes': None}, 'volume_router': {'vrouter_backend_sync_timeout_ms': 5000, 'vrouter_file_read_threshold': 1024, 'vrouter_file_write_threshold': 1024, 'vrouter_id': None, 'vrouter_max_workers': 16, 'vrouter_migrate_timeout_ms': 5000, 'vrouter_min_workers': 4, 'vrouter_redirect_timeout_ms': u'5000', 'vrouter_routing_retries': 10, 'vrouter_sco_multiplier': 1024, 'vrouter_volume_read_threshold': 1024, 'vrouter_volume_write_threshold': 1024}, 'volume_router_cluster': {'vrouter_cluster_id': None}} vpool_services = {'all': ['ovs-watcher-volumedriver', 'ovs-dtl_{0}'.format(vpool.name), 'ovs-volumedriver_{0}'.format(vpool.name), 'ovs-volumerouter-consumer'], 'extra': [], 'master': ['ovs-arakoon-voldrv']} sd_partitions = {'DB': ['MD', 'MDS', 'TLOG'], 'READ': ['None'], 'WRITE': ['FD', 'DTL', 'SCO'], 'SCRUB': ['None']} if backend_type == 'alba': backend_metadata = {'name': (str, None), 'preset': (str, Toolbox.regex_preset), 'backend_guid': (str, Toolbox.regex_guid), 'arakoon_config': (dict, None), 'connection': (dict, {'host': (str, Toolbox.regex_ip, False), 'port': (int, {'min': 1, 'max': 65535}), 'client_id': (str, Toolbox.regex_guid), 'client_secret': (str, None), 'local': (bool, None)}), 'backend_info': (dict, {'policies': (list, None), 'sco_size': (float, None), 'frag_size': (float, None), 'total_size': (float, None), 'nsm_partition_guids': (list, Toolbox.regex_guid)})} required = {'backend': (dict, backend_metadata), 'backend_aa': (dict, backend_metadata, False)} Toolbox.verify_required_params(required_params=required, actual_params=vpool.metadata) vpool_services['all'].append("ovs-albaproxy_{0}".format(vpool.name)) sd_partitions['WRITE'].append('FCACHE') expected_config['backend_connection_manager'].update({'alba_connection_host': None, 'alba_connection_port': None, 'alba_connection_preset': None, 'alba_connection_timeout': 15, 'backend_type': u'{0}'.format(vpool.backend_type.code.upper())}) elif backend_type == 'distributed': expected_config['backend_connection_manager'].update({'backend_type': u'LOCAL', 'local_connection_path': u'{0}'.format(generic_settings['distributed_mountpoint'])}) assert EtcdConfiguration.exists('/ovs/arakoon/voldrv/config', raw=True), 'Volumedriver arakoon does not exist' # Do some verifications for all SDs storage_ip = None voldrv_config = GeneralArakoon.get_config('voldrv') all_files = GeneralVPool.get_related_files(vpool=vpool) all_directories = GeneralVPool.get_related_directories(vpool=vpool) for storagedriver in vpool.storagedrivers: storagerouter = storagedriver.storagerouter root_client = SSHClient(storagerouter, username='******') assert EtcdConfiguration.exists('/ovs/vpools/{0}/hosts/{1}/config'.format(vpool.guid, storagedriver.storagedriver_id), raw=True), 'vPool config not found in etcd' current_config_sections = set([item for item in EtcdConfiguration.list('/ovs/vpools/{0}/hosts/{1}/config'.format(vpool.guid, storagedriver.storagedriver_id))]) assert not current_config_sections.difference(set(expected_config.keys())), 'New section appeared in the storage driver config in etcd' assert not set(expected_config.keys()).difference(current_config_sections), 'Config section expected for storage driver, but not found in etcd' for key, values in expected_config.iteritems(): current_config = EtcdConfiguration.get('/ovs/vpools/{0}/hosts/{1}/config/{2}'.format(vpool.guid, storagedriver.storagedriver_id, key)) assert set(current_config.keys()).union(set(values.keys())) == set(values.keys()), 'Not all expected keys match for key "{0}" on Storage Driver {1}'.format(key, storagedriver.name) for sub_key, value in current_config.iteritems(): expected_value = values[sub_key] if expected_value is None: continue assert value == expected_value, 'Key: {0} - Sub key: {1} - Value: {2} - Expected value: {3}'.format(key, sub_key, value, expected_value) # Check services if storagerouter.node_type == 'MASTER': for service_name in vpool_services['all'] + vpool_services['master']: if service_name == 'ovs-arakoon-voldrv' and GeneralStorageDriver.has_role(storagedriver, 'DB') is False: continue if ServiceManager.get_service_status(name=service_name, client=root_client) is not True: raise ValueError('Service {0} is not running on node {1}'.format(service_name, storagerouter.ip)) else: for service_name in vpool_services['all'] + vpool_services['extra']: if ServiceManager.get_service_status(name=service_name, client=root_client) is not True: raise ValueError('Service {0} is not running on node {1}'.format(service_name, storagerouter.ip)) # Check arakoon config if not voldrv_config.has_section(storagerouter.machine_id): raise ValueError('Voldrv arakoon cluster does not have section {0}'.format(storagerouter.machine_id)) # Basic SD checks assert storagedriver.cluster_ip == storagerouter.ip, 'Incorrect cluster IP. Expected: {0} - Actual: {1}'.format(storagerouter.ip, storagedriver.cluster_ip) assert storagedriver.mountpoint == '/mnt/{0}'.format(vpool.name), 'Incorrect mountpoint. Expected: {0} - Actual: {1}'.format(mountpoint, storagedriver.mountpoint) if storage_ip is not None: assert storagedriver.storage_ip == storage_ip, 'Incorrect storage IP. Expected: {0} - Actual: {1}'.format(storage_ip, storagedriver.storage_ip) storage_ip = storagedriver.storage_ip # Check required directories and files if storagerouter.guid not in all_directories: raise ValueError('Could not find directory information for Storage Router {0}'.format(storagerouter.ip)) if storagerouter.guid not in all_files: raise ValueError('Could not find file information for Storage Router {0}'.format(storagerouter.ip)) for directory in all_directories[storagerouter.guid]: if root_client.dir_exists(directory) is False: raise ValueError('Directory {0} does not exist on Storage Router {1}'.format(directory, storagerouter.ip)) for file_name in all_files[storagerouter.guid]: if root_client.file_exists(file_name) is False: raise ValueError('File {0} does not exist on Storage Router {1}'.format(file_name, storagerouter.ip)) for partition in storagedriver.partitions: if partition.role in sd_partitions and partition.sub_role in sd_partitions[partition.role]: sd_partitions[partition.role].remove(partition.sub_role) elif partition.role in sd_partitions and partition.sub_role is None: sd_partitions[partition.role].remove('None') # Verify vPool writeable if storagerouter.pmachine.hvtype == 'VMWARE': GeneralVPool.mount_vpool(vpool=vpool, root_client=root_client) vdisk = GeneralVDisk.create_volume(size=10, vpool=vpool, root_client=root_client) GeneralVDisk.write_to_volume(vdisk=vdisk, vpool=vpool, root_client=root_client, count=10, bs='1M', input_type='random') GeneralVDisk.delete_volume(vdisk=vdisk, vpool=vpool, root_client=root_client) for role, sub_roles in sd_partitions.iteritems(): for sub_role in sub_roles: raise ValueError('Not a single Storage Driver found with partition role {0} and sub-role {1}'.format(role, sub_role))
def validate_vpool_sanity(expected_settings): """ Check if all requirements are met for a healthy vPool :param expected_settings: Parameters used to create a vPool, which will be verified :type expected_settings: dict :return: None """ if not isinstance(expected_settings, dict) or len(expected_settings) == 0: raise ValueError("Cannot validate vpool when no settings are passed") generic_settings = expected_settings.values()[0] vpool_name = generic_settings["vpool_name"] mountpoint = "/mnt/{0}".format(vpool_name) backend_type = generic_settings["type"] rdma_enabled = ( generic_settings["config_params"]["dtl_transport"] == StorageDriverClient.FRAMEWORK_DTL_TRANSPORT_RSOCKET ) vpool = GeneralVPool.get_vpool_by_name(vpool_name=vpool_name) assert vpool is not None, "Could not find vPool with name {0}".format(vpool_name) vpool_config = GeneralVPool.get_configuration(vpool) # Verify some basic vPool attributes assert vpool.name == vpool_name, "Expected name {0} for vPool".format(vpool_name) assert vpool.status == VPool.STATUSES.RUNNING, "vPool does not have RUNNING status" assert vpool.rdma_enabled == rdma_enabled, "RDMA enabled setting is incorrect" assert set(expected_settings.keys()) == set( [sd.storagerouter for sd in vpool.storagedrivers] ), "vPool storagerouters don't match the expected Storage Routers" # Verify vPool Storage Driver configuration expected_vpool_config = copy.deepcopy(generic_settings["config_params"]) for key, value in vpool_config.iteritems(): if key == "dtl_enabled" or key == "tlog_multiplier" or key == "dtl_config_mode": continue if key not in expected_vpool_config: raise ValueError("Expected settings does not contain key {0}".format(key)) if value != expected_vpool_config[key]: raise ValueError( "vPool does not have expected configuration {0} for key {1}".format(expected_vpool_config[key], key) ) expected_vpool_config.pop(key) if len(expected_vpool_config) > 0: raise ValueError( "Actual vPool configuration does not contain keys: {0}".format(", ".join(expected_vpool_config.keys())) ) # Prepare some fields to check config = generic_settings["config_params"] dtl_mode = config["dtl_mode"] sco_size = config["sco_size"] cluster_size = config["cluster_size"] write_buffer = config["write_buffer"] dtl_transport = config["dtl_transport"] # @TODO: Add more validations for other expected settings (instead of None) expected_config = { "backend_connection_manager": { "backend_interface_retries_on_error": 5, "backend_interface_retry_interval_secs": 1, "backend_interface_retry_backoff_multiplier": 2.0, }, "content_addressed_cache": { "clustercache_mount_points": None, "read_cache_serialization_path": u"/var/rsp/{0}".format(vpool.name), }, "distributed_lock_store": { "dls_arakoon_cluster_id": None, "dls_arakoon_cluster_nodes": None, "dls_type": u"Arakoon", }, "distributed_transaction_log": {"dtl_path": None, "dtl_transport": dtl_transport.upper()}, "event_publisher": {"events_amqp_routing_key": u"volumerouter", "events_amqp_uris": None}, "file_driver": {"fd_cache_path": None, "fd_extent_cache_capacity": u"1024", "fd_namespace": None}, "filesystem": { "fs_dtl_config_mode": u"Automatic", "fs_dtl_mode": u"{0}".format(StorageDriverClient.VPOOL_DTL_MODE_MAP[dtl_mode]), "fs_enable_shm_interface": 1, "fs_file_event_rules": None, "fs_metadata_backend_arakoon_cluster_nodes": None, "fs_metadata_backend_mds_nodes": None, "fs_metadata_backend_type": u"MDS", "fs_raw_disk_suffix": None, "fs_virtual_disk_format": None, }, "metadata_server": {"mds_nodes": None}, "scocache": {"backoff_gap": u"2GB", "scocache_mount_points": None, "trigger_gap": u"1GB"}, "threadpool_component": {"num_threads": 16}, "volume_manager": { "clean_interval": 1, "default_cluster_size": 1024 * cluster_size, "dtl_throttle_usecs": 4000, "metadata_path": None, "non_disposable_scos_factor": float(write_buffer) / StorageDriverClient.TLOG_MULTIPLIER_MAP[sco_size] / sco_size, "number_of_scos_in_tlog": StorageDriverClient.TLOG_MULTIPLIER_MAP[sco_size], "tlog_path": None, }, "volume_registry": {"vregistry_arakoon_cluster_id": u"voldrv", "vregistry_arakoon_cluster_nodes": None}, "volume_router": { "vrouter_backend_sync_timeout_ms": 5000, "vrouter_file_read_threshold": 1024, "vrouter_file_write_threshold": 1024, "vrouter_id": None, "vrouter_max_workers": 16, "vrouter_migrate_timeout_ms": 5000, "vrouter_min_workers": 4, "vrouter_redirect_timeout_ms": u"5000", "vrouter_routing_retries": 10, "vrouter_sco_multiplier": 1024, "vrouter_volume_read_threshold": 1024, "vrouter_volume_write_threshold": 1024, }, "volume_router_cluster": {"vrouter_cluster_id": None}, } vpool_services = { "all": [ "ovs-watcher-volumedriver", "ovs-dtl_{0}".format(vpool.name), "ovs-volumedriver_{0}".format(vpool.name), "ovs-volumerouter-consumer", ], "extra": [], "master": ["ovs-arakoon-voldrv"], } sd_partitions = {"DB": ["MD", "MDS", "TLOG"], "WRITE": ["FD", "DTL", "SCO"]} assert Configuration.exists("/ovs/arakoon/voldrv/config", raw=True), "Volumedriver arakoon does not exist" # Do some verifications for all SDs storage_ip = None voldrv_config = GeneralArakoon.get_config("voldrv") all_files = GeneralVPool.get_related_files(vpool=vpool) all_directories = GeneralVPool.get_related_directories(vpool=vpool) for storagedriver in vpool.storagedrivers: storagerouter = storagedriver.storagerouter root_client = SSHClient(storagerouter, username="******") assert Configuration.exists( "/ovs/vpools/{0}/hosts/{1}/config".format(vpool.guid, storagedriver.storagedriver_id), raw=True ), "vPool config not found in configuration" # @todo: replace next lines with implementation defined in: http://jira.openvstorage.com/browse/OVS-4577 # current_config_sections = set([item for item in Configuration.list('/ovs/vpools/{0}/hosts/{1}/config'.format(vpool.guid, storagedriver.storagedriver_id))]) # assert not current_config_sections.difference(set(expected_config.keys())), 'New section appeared in the storage driver config in configuration' # assert not set(expected_config.keys()).difference(current_config_sections), 'Config section expected for storage driver, but not found in configuration' # # for key, values in expected_config.iteritems(): # current_config = Configuration.get('/ovs/vpools/{0}/hosts/{1}/config/{2}'.format(vpool.guid, storagedriver.storagedriver_id, key)) # assert set(current_config.keys()).union(set(values.keys())) == set(values.keys()), 'Not all expected keys match for key "{0}" on Storage Driver {1}'.format(key, storagedriver.name) # # for sub_key, value in current_config.iteritems(): # expected_value = values[sub_key] # if expected_value is None: # continue # assert value == expected_value, 'Key: {0} - Sub key: {1} - Value: {2} - Expected value: {3}'.format(key, sub_key, value, expected_value) # Check services if storagerouter.node_type == "MASTER": for service_name in vpool_services["all"] + vpool_services["master"]: if ( service_name == "ovs-arakoon-voldrv" and GeneralStorageDriver.has_role(storagedriver, "DB") is False ): continue exitcode, output = ServiceManager.get_service_status(name=service_name, client=root_client) if exitcode is not True: raise ValueError( "Service {0} is not running on node {1} - {2}".format( service_name, storagerouter.ip, output ) ) else: for service_name in vpool_services["all"] + vpool_services["extra"]: exitcode, output = ServiceManager.get_service_status(name=service_name, client=root_client) if exitcode is not True: raise ValueError( "Service {0} is not running on node {1} - {2}".format( service_name, storagerouter.ip, output ) ) # Check arakoon config if not voldrv_config.has_section(storagerouter.machine_id): raise ValueError("Voldrv arakoon cluster does not have section {0}".format(storagerouter.machine_id)) # Basic SD checks assert ( storagedriver.cluster_ip == storagerouter.ip ), "Incorrect cluster IP. Expected: {0} - Actual: {1}".format(storagerouter.ip, storagedriver.cluster_ip) assert storagedriver.mountpoint == "/mnt/{0}".format( vpool.name ), "Incorrect mountpoint. Expected: {0} - Actual: {1}".format(mountpoint, storagedriver.mountpoint) if storage_ip is not None: assert ( storagedriver.storage_ip == storage_ip ), "Incorrect storage IP. Expected: {0} - Actual: {1}".format(storage_ip, storagedriver.storage_ip) storage_ip = storagedriver.storage_ip # Check required directories and files if storagerouter.guid not in all_directories: raise ValueError("Could not find directory information for Storage Router {0}".format(storagerouter.ip)) if storagerouter.guid not in all_files: raise ValueError("Could not find file information for Storage Router {0}".format(storagerouter.ip)) for directory in all_directories[storagerouter.guid]: if root_client.dir_exists(directory) is False: raise ValueError( "Directory {0} does not exist on Storage Router {1}".format(directory, storagerouter.ip) ) for file_name in all_files[storagerouter.guid]: if root_client.file_exists(file_name) is False: raise ValueError( "File {0} does not exist on Storage Router {1}".format(file_name, storagerouter.ip) ) # @TODO: check roles and sub_roles for all storagedrivers and not just once for partition in storagedriver.partitions: if partition.role in sd_partitions and partition.sub_role in sd_partitions[partition.role]: sd_partitions[partition.role].remove(partition.sub_role) elif ( partition.role in sd_partitions and partition.sub_role is None and len(sd_partitions[partition.role]) ): sd_partitions[partition.role].remove("None") # Verify vPool writeable if GeneralHypervisor.get_hypervisor_type() == "VMWARE": GeneralVPool.mount_vpool(vpool=vpool, root_client=root_client) vdisk = GeneralVDisk.create_volume(size=10, vpool=vpool, root_client=root_client) GeneralVDisk.write_to_volume( vdisk=vdisk, vpool=vpool, root_client=root_client, count=10, bs="1M", input_type="random" ) GeneralVDisk.delete_volume(vdisk=vdisk, vpool=vpool, root_client=root_client) for role, sub_roles in sd_partitions.iteritems(): for sub_role in sub_roles: raise ValueError( "Not a single Storage Driver found with partition role {0} and sub-role {1}".format(role, sub_role) )