class ServiceType(DataObject): """ A ServiceType represents some kind of service that needs to be managed by the framework. """ SERVICE_TYPES = DataObject.enumerator( 'Service_type', { 'NS_MGR': 'NamespaceManager', 'ARAKOON': 'Arakoon', 'ALBA_MGR': 'AlbaManager', 'MD_SERVER': 'MetadataServer', 'ALBA_PROXY': 'AlbaProxy', 'ALBA_S3_TRANSACTION': 'AlbaS3Transaction' }) ARAKOON_CLUSTER_TYPES = DataObject.enumerator( 'Arakoon_cluster_type', ['ABM', 'FWK', 'NSM', 'SD', 'CFG']) __properties = [ Property('name', str, unique=True, indexed=True, doc='Name of the ServiceType.') ] __relations = [] __dynamics = []
def __init__(self, *args, **kwargs): """ Initializes a vDisk, setting up its additional helpers """ DataObject.__init__(self, *args, **kwargs) self._frozen = False self._storagedriver_client = None self._frozen = True
def __init__(self, *args, **kwargs): """ Initializes an AlbaNode, setting up its additional helpers """ DataObject.__init__(self, *args, **kwargs) self._frozen = False self.client = ASDManagerClient(self) self._frozen = True
def __init__(self, *args, **kwargs): """ Initializes a MDSService, setting up its additional helpers """ DataObject.__init__(self, *args, **kwargs) self._frozen = False self.metadataserver_client = None self._frozen = True self.reload_client()
def __init__(self, *args, **kwargs): """ Initializes an AlbaNode, setting up its additional helpers """ DataObject.__init__(self, *args, **kwargs) self._frozen = False self.client = None if os.environ.get('RUNNING_UNITTESTS') == 'True': self.client = ManagerClientMockup(self) else: if self.type not in self.CLIENTS: raise NotImplementedError('Type {0} is not implemented'.format( self.type)) self.client = self.CLIENTS[self.type](self) self._frozen = True
class StorageDriverPartition(DataObject): """ The StorageDriverPartition class represents the junction table between StorageDriver and Partitions. Examples: * my_storagedriver.partitions[0].partition * my_partition.storagedrivers[0].storagedriver """ SUBROLE = DataObject.enumerator('Role', ['FCACHE', 'FD', 'MD', 'MDS', 'SCO', 'TLOG']) __properties = [Property('number', int, doc='Number of the service in case there is more than one'), Property('size', long, mandatory=False, doc='Size in bytes configured for use'), Property('role', DiskPartition.ROLES.keys(), doc='Role of the partition'), Property('sub_role', SUBROLE.keys(), mandatory=False, doc='Sub-role of this StorageDriverPartition')] __relations = [Relation('partition', DiskPartition, 'storagedrivers'), Relation('storagedriver', StorageDriver, 'partitions'), Relation('mds_service', MDSService, 'storagedriver_partitions', mandatory=False)] __dynamics = [Dynamic('folder', str, 3600), Dynamic('path', str, 3600)] def _folder(self): """ Folder on the mountpoint """ if self.sub_role: return '{0}_{1}_{2}_{3}'.format(self.storagedriver.vpool.name, self.role.lower(), self.sub_role.lower(), self.number) return '{0}_{1}_{2}'.format(self.storagedriver.vpool.name, self.role.lower(), self.number) def _path(self): """ Actual path on filesystem, including mountpoint """ return '{0}/{1}'.format(self.partition.folder, self.folder)
class DiskPartition(DataObject): """ The DiskPartition class represents a partition on a physical Disk """ ROLES = DataObject.enumerator('Role', ['DB', 'READ', 'SCRUB', 'WRITE', 'BACKEND']) VIRTUAL_STORAGE_LOCATION = '/mnt/storage' __properties = [ Property('id', str, doc='The partition identifier'), Property('filesystem', str, mandatory=False, doc='The filesystem used on the partition'), Property('state', ['OK', 'FAILURE', 'MISSING'], doc='State of the partition'), Property('inode', int, mandatory=False, doc='The partitions inode'), Property('offset', int, doc='Offset of the partition'), Property('size', int, doc='Size of the partition'), Property('mountpoint', str, mandatory=False, doc='Mountpoint of the partition, None if not mounted'), Property('path', str, doc='The partition path'), Property('roles', list, default=[], doc='A list of claimed roles') ] __relations = [Relation('disk', Disk, 'partitions')] __dynamics = [Dynamic('usage', list, 120), Dynamic('folder', str, 3600)] def _usage(self): """ A dict representing this partition's usage in a more user-friendly form """ dataset = [] for junction in self.storagedrivers: dataset.append({ 'type': 'storagedriver', 'role': junction.role, 'size': junction.size, 'relation': junction.storagedriver_guid, 'folder': junction.folder }) return dataset def _folder(self): """ Corrected mountpoint """ return DiskPartition.VIRTUAL_STORAGE_LOCATION if self.mountpoint == '/' else self.mountpoint
class Disk(DataObject): """ The Disk class represents physical disks that are available to a storagerouter (technically, they can be virtual disks, but from the OS (and framework) point of view, they're considered physical) """ STATES = DataObject.enumerator('state', ['OK', 'FAILURE', 'MISSING']) __properties = [Property('aliases', list, doc='The device aliases'), Property('model', str, mandatory=False, doc='The disks model'), Property('state', STATES.keys(), doc='The state of the disk'), Property('name', str, doc='Name of the disk (e.g. sda)'), Property('size', int, doc='Size of the disk, in bytes'), Property('is_ssd', bool, doc='The type of the disk')] __relations = [Relation('storagerouter', StorageRouter, 'disks')] __dynamics = []
class AlbaNodeCluster(DataObject): """ The AlbaNodeCluster represents a group of AlbaNodes which will function as one The idea behind the cluster is that when one AlbaNode would fail, another can take over The information within the AlbaNodes would be the same (same stack information) This cluster contains the same information as an AlbaNode for representation purposes """ CLUSTER_TYPES = DataObject.enumerator('ClusterType', ['ASD', 'GENERIC', 'MIXED']) _logger = Logger('hybrids') __properties = [ Property('name', str, mandatory=False, doc='Optional name for the AlbaNode') ] __dynamics = [ Dynamic('type', CLUSTER_TYPES.keys(), 3600), Dynamic('ips', list, 3600), Dynamic('cluster_metadata', dict, 3600), Dynamic('local_summary', dict, 60), Dynamic('stack', dict, 15, locked=True), Dynamic('maintenance_services', dict, 30, locked=True), Dynamic('supported_osd_types', list, 3600), Dynamic('read_only_mode', bool, 60) ] def _type(self): """ Retrieve the type of the cluster :return: Type of the cluster :rtype: str """ node_type = None for alba_node in self.alba_nodes: if node_type is None: node_type = alba_node.type continue if alba_node.type != node_type: # Should be blocked by the API. This type is currently not supported node_type = self.CLUSTER_TYPES.MIXED break return node_type or self.CLUSTER_TYPES.ASD # Default to ASD def _cluster_metadata(self): """ Returns a set of metadata hinting on how the cluster should be used The GUI/API can adapt based on this information """ cluster_metadata = { 'fill': False, # Prepare Slot for future usage 'fill_add': False, # OSDs will added and claimed right away 'clear': False } # Indicates whether OSDs can be removed from ALBA Node / Slot if self.type == self.CLUSTER_TYPES.ASD: cluster_metadata.update({ 'fill': True, 'fill_metadata': { 'count': 'integer' }, 'clear': True }) elif self.type == self.CLUSTER_TYPES.GENERIC: cluster_metadata.update({ 'fill_add': True, 'fill_add_metadata': { 'osd_type': 'osd_type', 'ips': 'list_of_ip', 'port': 'port' }, 'clear': True }) # Do nothing in when the type is mixed as nothing is supported return cluster_metadata def _ips(self): """ Returns the IPs of the nodes :return: List of lists with IPs of all linked Nodes :rtype: list[list[str]] """ ips = [] for alba_node in self.alba_nodes: ips.append(alba_node.ips) return ips def _maintenance_services(self): """ Returns all maintenance services on this node, grouped by backend name """ services = {} for alba_node in self.alba_nodes: services[alba_node.node_id] = alba_node.maintenance_services def _stack(self): """ Returns an overview of this node's storage stack """ stack = {} for alba_node in self.alba_nodes: stack[alba_node.node_id] = alba_node.stack # @Todo collapse information together based on active/passive # @todo Do not collapse on device both rother on slot id (which is an alias that should match) return stack def _supported_osd_types(self): """ Returns a list of all supported OSD types """ from ovs.dal.hybrids.albaosd import AlbaOSD if self.type == self.CLUSTER_TYPES.GENERIC: return [AlbaOSD.OSD_TYPES.ASD, AlbaOSD.OSD_TYPES.AD] if self.type == self.CLUSTER_TYPES.NODE_TYPES.ASD: return [AlbaOSD.OSD_TYPES.ASD] else: # Mixed type return [AlbaOSD.OSD_TYPES.ASD, AlbaOSD.OSD_TYPES.AD] def _read_only_mode(self): """ Indicates whether the ALBA Node can be used for OSD manipulation If the version on the ALBA Node is lower than a specific version required by the framework, the ALBA Node becomes read only, this means, that actions such as creating, restarting, deleting OSDs becomes impossible until the node's software has been updated :return: True if the ALBA Node should be read only, False otherwise :rtype: bool """ # The whole cluster should be read-only as not all actions can be mirrored return any(alba_node.read_only_mode for alba_node in self.alba_nodes) def _local_summary(self): """ Return a summary of the OSDs based on their state * Ok -> green * WARNING -> orange * ERROR -> red * UNKNOWN -> gray The summary will contain a list of dicts with guid, osd_id and claimed_by eg: {'red': [{osd_id: 1, claimed_by: alba_backend_guid1}], 'green': [{osd_id: 2, claimed_by: None}], ...} :return: Summary of the OSDs filtered by status (which are represented by color) """ local_summary = {} for alba_node in self.alba_nodes: local_summary[alba_node.node_id] = alba_node.local_summary return local_summary
class AlbaOSD(DataObject): """ The AlbaOSD represents a claimed ASD or an AlbaBackend """ OSD_TYPES = DataObject.enumerator('OSDType', ['ASD', 'ALBA_BACKEND', 'AD', 'S3']) __properties = [Property('osd_id', str, unique=True, doc='OSD identifier'), Property('osd_type', OSD_TYPES.keys(), doc='Type of OSD (ASD, ALBA_BACKEND)'), Property('ips', list, mandatory=False, doc='List of IP addresses on which the OSD is exposed'), Property('port', int, mandatory=False, doc='Port on which the OSD process is listening'), Property('metadata', dict, mandatory=False, doc='Additional information about this OSD, such as connection information (if OSD is an ALBA backend'), Property('slot_id', str, indexed=True, mandatory=False, doc='A pointer towards a certain slot. Will be used to map OSDs into container')] __relations = [Relation('alba_backend', AlbaBackend, 'osds', doc='The AlbaBackend that claimed the OSD'), Relation('alba_node', AlbaNode, 'osds', mandatory=False, doc='The Alba Node to which the OSD belongs'), Relation('domain', Domain, 'osds', mandatory=False, doc='The Domain in which the OSD resides')] __dynamics = [Dynamic('statistics', dict, 5, locked=True), Dynamic('stack_info', dict, 5)] def _statistics(self, dynamic): """ Loads statistics from the ASD """ data_keys = {'apply': ['Apply', 'Apply2'], 'multi_get': ['MultiGet', 'MultiGet2'], 'range': ['Range'], 'range_entries': ['RangeEntries'], 'statistics': ['Statistics']} volatile = VolatileFactory.get_client() prev_key = '{0}_{1}'.format(self._key, 'statistics_previous') previous_stats = volatile.get(prev_key, default={}) try: all_statistics = self.alba_backend.osd_statistics if self.osd_id not in all_statistics: return {} data = all_statistics[self.osd_id] statistics = {'timestamp': time.time()} delta = statistics['timestamp'] - previous_stats.get('timestamp', statistics['timestamp']) for key, sources in data_keys.iteritems(): if key not in statistics: statistics[key] = {'n': 0, 'max': [], 'min': [], 'avg': []} for source in sources: if source in data: statistics[key]['n'] += data[source]['n'] statistics[key]['max'].append(data[source]['max']) statistics[key]['min'].append(data[source]['min']) statistics[key]['avg'].append(data[source]['avg'] * data[source]['n']) statistics[key]['max'] = max(statistics[key]['max']) if len(statistics[key]['max']) > 0 else 0 statistics[key]['min'] = min(statistics[key]['min']) if len(statistics[key]['min']) > 0 else 0 if statistics[key]['n'] > 0: statistics[key]['avg'] = sum(statistics[key]['avg']) / float(statistics[key]['n']) else: statistics[key]['avg'] = 0 if key in previous_stats: if delta < 0: statistics[key]['n_ps'] = 0 elif delta == 0: statistics[key]['n_ps'] = previous_stats[key].get('n_ps', 0) else: statistics[key]['n_ps'] = max(0, (statistics[key]['n'] - previous_stats[key]['n']) / delta) else: statistics[key]['n_ps'] = 0 volatile.set(prev_key, statistics, dynamic.timeout * 10) return statistics except Exception: # This might fail every now and then, e.g. on disk removal. Let's ignore for now. return {} def _stack_info(self): """ Returns summarized properties for adding to the storage stacks """ return {'osd_id': self.osd_id, 'type': self.osd_type, 'ips': self.ips, 'port': self.port, 'metadata': self.metadata, 'claimed_by': self.alba_backend_guid}
class Backend(DataObject): """ A Backend represents an instance of the supported backend types that has been setup with the OVS GUI """ STATUSES = DataObject.enumerator( 'Status', ['INSTALLING', 'RUNNING', 'FAILURE', 'WARNING', 'DELETING']) __properties = [ Property('name', str, unique=True, doc='Name of the Backend.'), Property('status', STATUSES.keys(), default='INSTALLING', doc='State of the backend') ] __relations = [ Relation('backend_type', BackendType, 'backends', doc='Type of the backend.') ] __dynamics = [ Dynamic('linked_guid', str, 3600), Dynamic('available', bool, 60), Dynamic('regular_domains', list, 60), Dynamic('access_rights', dict, 3600), Dynamic('live_status', str, 30) ] def _linked_guid(self): """ Returns the GUID of the detail object that's linked to this particular backend. This depends on the backend type. This requires that the backlink from that object to this object is named <backend_type>_backend and is a one-to-one relation """ if self.backend_type.has_plugin is False: return None return getattr(self, '{0}_backend_guid'.format(self.backend_type.code)) def _available(self): """ Returns True if the backend can be used """ if self.backend_type.has_plugin is False: return False linked_backend = getattr(self, '{0}_backend'.format(self.backend_type.code)) if linked_backend is not None: return linked_backend.available return False def _regular_domains(self): """ Returns a list of domain guids :return: List of domain guids """ return [junction.domain_guid for junction in self.domains] def _access_rights(self): """ A condensed extract from the user_rights and client_rights :return: dict """ data = {'users': {}, 'clients': {}} for user_right in self.user_rights: data['users'][user_right.user_guid] = user_right.grant for client_right in self.client_rights: data['clients'][client_right.client_guid] = client_right.grant return data def _live_status(self): """ Retrieve the actual status from the Backend :return: Status reported by the plugin """ if self.backend_type.has_plugin is False: return 'running' linked_backend = getattr(self, '{0}_backend'.format(self.backend_type.code)) if linked_backend is not None: return linked_backend.live_status return 'running'
class VolumedriverHealthCheck(object): """ A healthcheck for the volumedriver components """ MODULE = 'volumedriver' LOCAL_ID = System.get_my_machine_id() LOCAL_SR = System.get_my_storagerouter() VDISK_CHECK_SIZE = 1024 ** 3 # 1GB in bytes VDISK_HALTED_STATES = DataObject.enumerator('Halted_status', ['HALTED', 'FENCED']) VDISK_TIMEOUT_BEFORE_DELETE = 0.5 # Only used to check status of a fenced volume. This should not be used to link a status of a non-halted/fenced volume FENCED_HALTED_STATUS_MAP = {'max_redirect': {'status': VDisk.STATUSES.NON_RUNNING, 'severity': 'failure', 'halted': ('These volumes are not running: {0}', ErrorCodes.volume_max_redirect), 'fenced': ('These volumes are fenced but not running on another node: {0}', ErrorCodes.volume_fenced_max_redirect)}, 'halted': {'status': VDisk.STATUSES.HALTED, 'severity': 'failure', 'halted': ('These volumes are halted: {0}', ErrorCodes.volume_halted), 'fenced': ('These volumes are fenced and but halted on another node: {0}', ErrorCodes.volume_fenced_halted)}, 'connection_fail': {'status': 'UNKNOWN', 'severity': 'failure', 'halted': ('These volumes experienced a connectivity/timeout problem: {0}', ErrorCodes.voldrv_connection_problem), 'fenced': ('These volumes are fenced but experienced a connectivity/timeout problem on another node: {0}', ErrorCodes.voldrv_connection_problem)}, 'ok': {'status': VDisk.STATUSES.RUNNING, 'severity': 'failure', 'halted': ('These volumes are running: {0}', ErrorCodes.volume_ok), 'fenced': ('These volumes are fenced but running on another node: {0}', ErrorCodes.volume_fenced_ok)}, 'not_found': {'status': 'NOT_FOUND', 'severity': 'warning', 'halted': ('These volumes could not be queried for information: {0}', ErrorCodes.volume_not_found), 'fenced': ('These volumes are fenced but could not be queried for information on another node: {0}', ErrorCodes.volume_fenced_not_found)}} logger = Logger('healthcheck-ovs_volumedriver') @staticmethod @expose_to_cli(MODULE, 'dtl-test', HealthCheckCLI.ADDON_TYPE, help='Verify that all VDisks their DTL is properly running', short_help='Test if DTL is properly running') def check_dtl(result_handler): """ Checks the dtl for all vdisks on the local node :param result_handler: logging object :type result_handler: ovs.extensions.healthcheck.result.HCResults :return: None :rtype: NoneType """ # Fetch vdisks hosted on this machine local_sr = System.get_my_storagerouter() if len(local_sr.vdisks_guids) == 0: return result_handler.skip('No VDisks present in cluster.') for vdisk_guid in local_sr.vdisks_guids: vdisk = VDisk(vdisk_guid) vdisk.invalidate_dynamics(['dtl_status', 'info']) if vdisk.dtl_status == 'ok_standalone' or vdisk.dtl_status == 'disabled': result_handler.success('VDisk {0}s DTL is disabled'.format(vdisk.name), code=ErrorCodes.volume_dtl_standalone) elif vdisk.dtl_status == 'ok_sync': result_handler.success('VDisk {0}s DTL is enabled and running.'.format(vdisk.name), code=ErrorCodes.volume_dtl_ok) elif vdisk.dtl_status == 'degraded': result_handler.warning('VDisk {0}s DTL is degraded.'.format(vdisk.name), code=ErrorCodes.volume_dtl_degraded) elif vdisk.dtl_status == 'checkup_required': result_handler.warning('VDisk {0}s DTL should be configured.'.format(vdisk.name), code=ErrorCodes.volume_dtl_checkup_required) elif vdisk.dtl_status == 'catch_up': result_handler.warning('VDisk {0}s DTL is enabled but still syncing.'.format(vdisk.name), code=ErrorCodes.volume_dtl_catch_up) else: result_handler.warning('VDisk {0}s DTL has an unknown status: {1}.'.format(vdisk.name, vdisk.dtl_status), code=ErrorCodes.volume_dtl_unknown) @staticmethod @timeout_decorator.timeout(30) def _check_volumedriver(vdisk_name, storagedriver_guid, logger, vdisk_size=VDISK_CHECK_SIZE): """ Checks if the volumedriver can create a new vdisk :param vdisk_name: name of a vdisk (e.g. test.raw) :type vdisk_name: str :param storagedriver_guid: guid of a storagedriver :type storagedriver_guid: str :param vdisk_size: size of the volume in bytes (e.g. 10737418240 is 10GB in bytes) :type vdisk_size: int :param logger: logger instance :type logger: ovs.extensions.healthcheck.result.HCResults :return: True if succeeds :rtype: bool """ try: VDiskController.create_new(vdisk_name, vdisk_size, storagedriver_guid) except FileExistsException: # can be ignored until fixed in framework # https://github.com/openvstorage/framework/issues/1247 return True except Exception as ex: logger.failure('Creation of the vdisk failed. Got {0}'.format(str(ex))) return False return True @staticmethod @timeout_decorator.timeout(30) def _check_volumedriver_remove(vpool_name, vdisk_name, present=True): """ Remove a vdisk from a vpool :param vdisk_name: name of a vdisk (e.g. test.raw) :type vdisk_name: str :param vpool_name: name of a vpool :type vpool_name: str :param present: should the disk be present? :type present: bool :return: True if disk is not present anymore :rtype: bool """ try: vdisk = VDiskHelper.get_vdisk_by_name(vdisk_name=vdisk_name, vpool_name=vpool_name) VDiskController.delete(vdisk.guid) return True except VDiskNotFoundError: # not found, if it should be present, re-raise the exception if present: raise else: return True @staticmethod # @expose_to_cli(MODULE, 'volumedrivers-test', HealthCheckCLI.ADDON_TYPE, # help='Verify that the Volumedrivers are responding to events', # short_help='Test if Volumedrivers are responding to events') def check_volumedrivers(result_handler): """ Checks if the VOLUMEDRIVERS work on a local machine (compatible with multiple vPools) :param result_handler: logging object :type result_handler: ovs.extensions.healthcheck.result.HCResults :return: None :rtype: NoneType """ result_handler.info('Checking volumedrivers.', add_to_result=False) vpools = VPoolList.get_vpools() if len(vpools) == 0: result_handler.skip('No vPools found!') return for vp in vpools: name = 'ovs-healthcheck-test-{0}.raw'.format(VolumedriverHealthCheck.LOCAL_ID) if vp.guid not in VolumedriverHealthCheck.LOCAL_SR.vpools_guids: result_handler.skip('Skipping vPool {0} because it is not living here.'.format(vp.name)) continue try: # delete if previous vdisk with this name exists storagedriver_guid = next((storagedriver.guid for storagedriver in vp.storagedrivers if storagedriver.storagedriver_id == vp.name + VolumedriverHealthCheck.LOCAL_ID)) # create a new one volume = VolumedriverHealthCheck._check_volumedriver(name, storagedriver_guid, result_handler) if volume is True: # delete the recently created try: VolumedriverHealthCheck._check_volumedriver_remove(vpool_name=vp.name, vdisk_name=name) except Exception as ex: raise RuntimeError('Could not delete the created volume. Got {0}'.format(str(ex))) # Working at this point result_handler.success('Volumedriver of vPool {0} is working fine!'.format(vp.name)) else: # not working result_handler.failure('Something went wrong during vdisk creation on vpool {0}.'.format(vp.name)) except TimeoutError: # timeout occurred, action took too long result_handler.warning('Volumedriver of vPool {0} seems to timeout.'.format(vp.name)) except IOError as ex: # can be input/output error by volumedriver result_handler.failure('Volumedriver of vPool {0} seems to have IO problems. Got `{1}` while executing.'.format(vp.name, ex.message)) except RuntimeError as ex: result_handler.failure('Volumedriver of vPool {0} seems to have problems. Got `{1}` while executing.'.format(vp.name, ex)) except VDiskNotFoundError: result_handler.warning('Volume on vPool {0} was not found, please retry again'.format(vp.name)) except Exception as ex: result_handler.failure('Uncaught exception for Volumedriver of vPool {0}.Got {1} while executing.'.format(vp.name, ex)) finally: # Attempt to delete the created vdisk try: VolumedriverHealthCheck._check_volumedriver_remove(vpool_name=vp.name, vdisk_name=name, present=False) except: pass @classmethod def _is_volumedriver_timeout(cls, exception): """ Validates whether a certain exception is a timeout exception (RuntimeError, prior to NodeNotReachable in voldriver 6.17) :param exception: Exception object to check :return: True if it is a timeout or False if it's not :rtype: bool """ return isinstance(exception, ClusterNotReachableException) or isinstance(exception, RuntimeError) and 'failed to send XMLRPC request' in str(exception) @classmethod @expose_to_cli(MODULE, 'halted-volumes-test', HealthCheckCLI.ADDON_TYPE, help='Verify that there are no halted/fenced volumes within the cluster', short_help='Test if there are no halted/fenced volumes') def check_for_halted_volumes(cls, result_handler): """ Checks for halted volumes on a single or multiple vPools This will only check the volume states on the current node. If any other volumedriver would be down, only the HA'd volumes would pop-up as they could appear halted here (should be verified by the volumedriver team) :param result_handler: logging object :type result_handler: ovs.extensions.healthcheck.result.HCResults :return: None :rtype: NoneType """ vpools = VPoolList.get_vpools() local_sr = System.get_my_storagerouter() if len(vpools) == 0: result_handler.skip('No vPools found!'.format(len(vpools)), code=ErrorCodes.vpools_none) return for vpool in vpools: log_start = 'Halted volumes test vPool {0}'.format(vpool.name) if vpool.guid not in local_sr.vpools_guids: result_handler.skip('{0} - Skipping vPool {1} because it is not living here.'.format(log_start, vpool.name), code=ErrorCodes.vpool_not_local, add_to_result=False) continue result_handler.info('{0} - Retrieving all information'.format(log_start), add_to_result=False) storagedriver = None for std in vpool.storagedrivers: if std.storagerouter_guid == local_sr.guid: storagedriver = std break if storagedriver is None: result_handler.failure('{0} - Could not associate a StorageDriver with this StorageRouter'.format(log_start), code=ErrorCodes.std_no_str) continue volume_fenced_states = dict((key, []) for key in cls.FENCED_HALTED_STATUS_MAP.keys()) volume_lists = {cls.VDISK_HALTED_STATES.HALTED: [], cls.VDISK_HALTED_STATES.FENCED: []} volume_states = {cls.VDISK_HALTED_STATES.HALTED: {cls.VDISK_HALTED_STATES.HALTED: volume_lists[cls.VDISK_HALTED_STATES.HALTED]}, cls.VDISK_HALTED_STATES.FENCED: volume_fenced_states} # Less loops to write for outputting result_handler.info('{0} - Scanning for halted volumes'.format(log_start), add_to_result=False) try: voldrv_client = vpool.storagedriver_client objectregistry_client = vpool.objectregistry_client except Exception: cls.logger.exception('{0} - Unable to instantiate the required clients'.format(log_start)) result_handler.exception('{0} - Unable to load the Volumedriver clients'.format(log_start), code=ErrorCodes.voldr_unknown_problem) continue try: # Listing all halted volumes with the volumedriver client as it detects stolen volumes too (fenced instances) volumes = voldrv_client.list_halted_volumes(str(storagedriver.storagedriver_id)) except Exception as ex: cls.logger.exception('{0} - Exception occurred when listing volumes'.format(log_start)) if cls._is_volumedriver_timeout(ex) is False: # Unhandled exception at this point result_handler.exception('{0} - Unable to list the Volumes due to an unidentified problem. Please check the logging'.format(log_start), code=ErrorCodes.voldr_unknown_problem) else: result_handler.failure('{0} - Could not list the volumes for due to a connection problem.'.format(log_start), code=ErrorCodes.voldrv_connection_problem) continue # Retrieve the parent of the current volume. If this id would not be identical to the one we fetched for, that would mean it is fenced # Object registry goes to Arakoon # Capturing any possible that would occur to provide a clearer vision of what went wrong for volume in volumes: try: registry_entry = objectregistry_client.find(volume) if registry_entry.node_id() == storagedriver.storagedriver_id: volume_lists[cls.VDISK_HALTED_STATES.HALTED].append(volume) else: # Fenced volume_lists[cls.VDISK_HALTED_STATES.FENCED].append(volume) except Exception: msg = '{0} - Unable to consult the object registry client for volume \'{1}\''.format(log_start, volume) cls.logger.exception(msg) result_handler.exception(msg, code=ErrorCodes.voldr_unknown_problem) # Include fenced - OTHER state combo for volume in volume_lists[cls.VDISK_HALTED_STATES.FENCED]: try: _, state = cls._get_volume_issue(voldrv_client, volume, log_start) volume_fenced_states[state].append(volume) except Exception: # Only unhandled at this point result_handler.exception('{0} - Unable to the volume info for volume {1} due to an unidentified problem. Please check the logging'.format(log_start, volume), code=ErrorCodes.voldr_unknown_problem) for halted_state, volume_state_info in volume_states.iteritems(): for state, volumes in volume_state_info.iteritems(): if len(volumes) == 0: continue # Skip OK/empty lists map_value = cls.FENCED_HALTED_STATUS_MAP[state.lower()] log_func = getattr(result_handler, map_value['severity']) message, code = map_value[halted_state.lower()] log_func('{0} - {1}'.format(log_start, message.format(', '.join(volumes))), code=code) # Call success in case nothing is wrong if all(len(l) == 0 for l in volume_lists.values()): result_handler.success('{0} - No volumes found in halted/fenced state'.format(log_start)) @classmethod def _get_volume_issue(cls, voldrv_client, volume_id, log_start): """ Maps all possible exceptions to a state. These states can be mapped to a status using the FENCED_HALTED_STATUS_MAP because the volumedriver does not return a state itself :param voldrv_client: Storagedriver client :param volume_id: Id of the volume :raises: The unhandled exception when such an exception could occur (we try to identify all problems but one could slip past us) :return: The volume_id and state :rtype: tuple(str, str) """ state = 'ok' try: # Check if the information can be retrieved about the volume vol_info = voldrv_client.info_volume(volume_id, req_timeout_secs=5) if vol_info.halted is True: state = 'halted' except Exception as ex: cls.logger.exception('{0} - Exception occurred when fetching the info for volume \'{1}\''.format(log_start, volume_id)) if isinstance(ex, ObjectNotFoundException): # Ignore ovsdb invalid entrees as model consistency will handle it. state = 'not_found' elif isinstance(ex, MaxRedirectsExceededException): # This means the volume is not halted but detached or unreachable for the Volumedriver state = 'max_redirect' # @todo replace RuntimeError with NodeNotReachableException elif any(isinstance(ex, exception) for exception in [ClusterNotReachableException, RuntimeError]): if cls._is_volumedriver_timeout(ex) is False: # Unhandled exception at this point raise # Timeout / connection problems state = 'connection_fail' else: # Something to be looked at raise return volume_id, state @staticmethod @timeout_decorator.timeout(5) def _check_filedriver(vp_name, test_name): """ Async method to checks if a FILEDRIVER `touch` works on a vpool Always try to check if the file exists after performing this method :param vp_name: name of the vpool :type vp_name: str :param test_name: name of the test file (e.g. `ovs-healthcheck-LOCAL_ID`) :type test_name: str :return: True if succeeded, False if failed :rtype: bool """ return subprocess.check_output('touch /mnt/{0}/{1}.xml'.format(vp_name, test_name), stderr=subprocess.STDOUT, shell=True) @staticmethod @timeout_decorator.timeout(5) def _check_filedriver_remove(vp_name): """ Async method to checks if a FILEDRIVER `remove` works on a vpool Always try to check if the file exists after performing this method :param vp_name: name of the vpool :type vp_name: str :return: True if succeeded, False if failed :rtype: bool """ subprocess.check_output('rm -f /mnt/{0}/ovs-healthcheck-test-*.xml'.format(vp_name), stderr=subprocess.STDOUT, shell=True) return not os.path.exists('/mnt/{0}/ovs-healthcheck-test-*.xml'.format(vp_name)) @staticmethod # @expose_to_cli(MODULE, 'filedrivers-test', HealthCheckCLI.ADDON_TYPE, # help='Verify that all Volumedrivers are accessible through FUSE', # short_help='Test if that the FUSE layer is responding') # @todo replace fuse test with edge test def check_filedrivers(result_handler): """ Checks if the file drivers work on a local machine (compatible with multiple vPools) :param result_handler: logging object :type result_handler: ovs.extensions.healthcheck.result.HCResults """ result_handler.info('Checking file drivers.', add_to_result=False) vpools = VPoolList.get_vpools() # perform tests if len(vpools) == 0: result_handler.skip('No vPools found!') return for vp in vpools: name = 'ovs-healthcheck-test-{0}'.format(VolumedriverHealthCheck.LOCAL_ID) if vp.guid not in VolumedriverHealthCheck.LOCAL_SR.vpools_guids: result_handler.skip('Skipping vPool {0} because it is not living here.'.format(vp.name)) continue try: VolumedriverHealthCheck._check_filedriver(vp.name, name) if os.path.exists('/mnt/{0}/{1}.xml'.format(vp.name, name)): # working VolumedriverHealthCheck._check_filedriver_remove(vp.name) result_handler.success('Filedriver for vPool {0} is working fine!'.format(vp.name)) else: # not working result_handler.failure('Filedriver for vPool {0} seems to have problems!'.format(vp.name)) except TimeoutError: # timeout occurred, action took too long result_handler.warning('Filedriver of vPool {0} seems to have `timeout` problems'.format(vp.name)) except subprocess.CalledProcessError: # can be input/output error by filedriver result_handler.failure('Filedriver of vPool {0} seems to have `input/output` problems'.format(vp.name)) @staticmethod @expose_to_cli(MODULE, 'volume-potential-test', HealthCheckCLI.ADDON_TYPE, help='Verify that the Volumedrivers have enough VDisk potential left', short_help='Test if the Volumedrivers can create enough VDisks') @expose_to_cli.option('--critical-vol-number', '-c', type=int, default=25, help='Minimum number of volumes left to create') def check_volume_potential(result_handler, critical_vol_number=25): """ Checks all local storage drivers from a volume driver. Results in a success if enough volumes are available, a warning if the number of volumes is lower then a threshold value (critical_volume_number) and a failure if the nr of volumes ==0) :param result_handler: logging object :type result_handler: ovs.extensions.healthcheck.result.HCResults :param critical_vol_number: Mimimal number of volumes that can be made before throwing a warning :type critical_vol_number: int """ result_handler.info('Checking volume potential of storagedrivers') if not isinstance(critical_vol_number, int) or critical_vol_number < 0: raise ValueError('Critical volume number should be a positive integer') for std in VolumedriverHealthCheck.LOCAL_SR.storagedrivers: try: std_config = StorageDriverConfiguration(std.vpool_guid, std.storagedriver_id) client = LocalStorageRouterClient(std_config.remote_path) vol_potential = client.volume_potential(str(std.storagedriver_id)) if vol_potential >= critical_vol_number: log_level = 'success' elif critical_vol_number > vol_potential > 0: log_level = 'warning' else: log_level = 'failure' getattr(result_handler, log_level)('Volume potential of local storage driver: {0}: {1} (potential at: {2})'.format(std.storagedriver_id, log_level.upper(), vol_potential)) except RuntimeError: result_handler.exception('Unable to retrieve configuration for storagedriver {0}'.format(std.storagedriver_id)) @staticmethod @expose_to_cli(MODULE, 'sco-cache-mountpoint-test', HealthCheckCLI.ADDON_TYPE, help='Verify that sco-cache mountpoints are up and running', short_help='Test if sco-cache mountpoints are up and running') def check_sco_cache_mountpoints(result_handler): """ Iterates over StorageDrivers of a local StorageRouter and will check all its sco cache mount points. Will result in a warning log if the sco is in offline state :param result_handler: logging object :type result_handler: ovs.extensions.healthcheck.result.HCResults """ result_handler.info('Checking sco cache mount points on all local storagedrivers') for std in VolumedriverHealthCheck.LOCAL_SR.storagedrivers: try: std_config = StorageDriverConfiguration(std.vpool_guid, std.storagedriver_id) client = LocalStorageRouterClient(std_config.remote_path) for std_info in client.sco_cache_mount_point_info(str(std.storagedriver_id)): if std_info.offlined is True: result_handler.warning('Mountpoint at location {0} of storagedriver {1} is in offline state'.format(std_info.path, std.storagedriver_id)) else: result_handler.success('Mountpoint at location {0} of storagedriver {1} is in online state'.format(std_info.path, std.storagedriver_id)) except RuntimeError: result_handler.exception('Unable to check sco cache mountpoint of storagedriver {0}'.format(std.storagedriver_id))
class VPool(DataObject): """ The VPool class represents a vPool. A vPool is a Virtual Storage Pool, a Filesystem, used to deploy vDisks. a vPool can span multiple Storage Drivers and connects to a single Storage BackendType. """ STATUSES = DataObject.enumerator('Status', [ 'DELETING', 'EXTENDING', 'FAILURE', 'INSTALLING', 'RUNNING', 'SHRINKING' ]) __properties = [ Property('name', str, unique=True, doc='Name of the vPool'), Property('description', str, mandatory=False, doc='Description of the vPool'), Property( 'size', int, mandatory=False, doc= 'Size of the vPool expressed in Bytes. Set to zero if not applicable.' ), Property('login', str, mandatory=False, doc='Login/Username for the Storage BackendType.'), Property('password', str, mandatory=False, doc='Password for the Storage BackendType.'), Property( 'connection', str, mandatory=False, doc= 'Connection (IP, URL, Domain name, Zone, ...) for the Storage BackendType.' ), Property( 'metadata', dict, mandatory=False, doc='Metadata for the backends, as used by the Storage Drivers.'), Property( 'rdma_enabled', bool, default=False, doc= 'Has the vpool been configured to use RDMA for DTL transport, which is only possible if all storagerouters are RDMA capable' ), Property('status', STATUSES.keys(), doc='Status of the vPool') ] __relations = [] __dynamics = [ Dynamic('configuration', dict, 3600), Dynamic('statistics', dict, 4), Dynamic('identifier', str, 120) ] _fixed_properties = ['storagedriver_client', 'objectregistry_client'] def __init__(self, *args, **kwargs): """ Initializes a vPool, setting up its additional helpers """ DataObject.__init__(self, *args, **kwargs) self._frozen = False self._storagedriver_client = None self._objectregistry_client = None self._frozen = True @property def storagedriver_client(self): """ Client used for communication between Storage Driver and framework :return: StorageDriverClient """ if self._storagedriver_client is None: self.reload_client('storagedriver') return self._storagedriver_client @property def objectregistry_client(self): """ Client used for communication between Storage Driver OR and framework :return: ObjectRegistryClient """ if self._objectregistry_client is None: self.reload_client('objectregistry') return self._objectregistry_client def _configuration(self): """ VPool configuration """ if not self.storagedrivers or not self.storagedrivers[0].storagerouter: return {} storagedriver_config = StorageDriverConfiguration( 'storagedriver', self.guid, self.storagedrivers[0].storagedriver_id) storagedriver_config.load() dtl = storagedriver_config.configuration['distributed_transaction_log'] file_system = storagedriver_config.configuration['filesystem'] volume_router = storagedriver_config.configuration['volume_router'] volume_manager = storagedriver_config.configuration['volume_manager'] dtl_host = file_system['fs_dtl_host'] dtl_mode = file_system.get('fs_dtl_mode', StorageDriverClient.VOLDRV_DTL_ASYNC) cluster_size = volume_manager['default_cluster_size'] / 1024 dtl_transport = dtl['dtl_transport'] sco_multiplier = volume_router['vrouter_sco_multiplier'] dtl_config_mode = file_system['fs_dtl_config_mode'] tlog_multiplier = volume_manager['number_of_scos_in_tlog'] non_disposable_sco_factor = volume_manager[ 'non_disposable_scos_factor'] sco_size = sco_multiplier * cluster_size / 1024 # SCO size is in MiB ==> SCO multiplier * cluster size (4 KiB by default) write_buffer = tlog_multiplier * sco_size * non_disposable_sco_factor dtl_enabled = not (dtl_config_mode == StorageDriverClient.VOLDRV_DTL_MANUAL_MODE and dtl_host == '') return { 'sco_size': sco_size, 'dtl_mode': StorageDriverClient.REVERSE_DTL_MODE_MAP[dtl_mode] if dtl_enabled is True else 'no_sync', 'dtl_enabled': dtl_enabled, 'cluster_size': cluster_size, 'write_buffer': write_buffer, 'dtl_transport': StorageDriverClient.REVERSE_DTL_TRANSPORT_MAP[dtl_transport], 'dtl_config_mode': dtl_config_mode, 'tlog_multiplier': tlog_multiplier } def _statistics(self, dynamic): """ Aggregates the Statistics (IOPS, Bandwidth, ...) of each vDisk served by the vPool. """ from ovs.dal.hybrids.vdisk import VDisk statistics = {} for key in StorageDriverClient.STAT_KEYS: statistics[key] = 0 statistics['{0}_ps'.format(key)] = 0 for storagedriver in self.storagedrivers: for key, value in storagedriver.fetch_statistics().iteritems(): statistics[key] += value statistics['timestamp'] = time.time() VDisk.calculate_delta(self._key, dynamic, statistics) return statistics def _identifier(self): """ An identifier of this vPool in its current configuration state """ return '{0}_{1}'.format(self.guid, '_'.join(self.storagedrivers_guids)) def reload_client(self, client): """ Reloads the StorageDriverClient or ObjectRegistryClient """ self._frozen = False if client == 'storagedriver': self._storagedriver_client = StorageDriverClient.load(self) elif client == 'objectregistry': self._objectregistry_client = ObjectRegistryClient.load(self) self._frozen = True
class VPool(DataObject): """ The VPool class represents a vPool. A vPool is a Virtual Storage Pool, a Filesystem, used to deploy vMachines. a vPool can span multiple Storage Drivers and connects to a single Storage BackendType. """ STATUSES = DataObject.enumerator('Status', [ 'DELETING', 'EXTENDING', 'FAILURE', 'INSTALLING', 'RUNNING', 'SHRINKING' ]) __properties = [ Property('name', str, doc='Name of the vPool'), Property('description', str, mandatory=False, doc='Description of the vPool'), Property( 'size', int, mandatory=False, doc= 'Size of the vPool expressed in Bytes. Set to zero if not applicable.' ), Property('login', str, mandatory=False, doc='Login/Username for the Storage BackendType.'), Property('password', str, mandatory=False, doc='Password for the Storage BackendType.'), Property( 'connection', str, mandatory=False, doc= 'Connection (IP, URL, Domain name, Zone, ...) for the Storage BackendType.' ), Property( 'metadata', dict, mandatory=False, doc='Metadata for the backends, as used by the Storage Drivers.'), Property( 'rdma_enabled', bool, default=False, doc= 'Has the vpool been configured to use RDMA for DTL transport, which is only possible if all storagerouters are RDMA capable' ), Property('status', STATUSES.keys(), doc='Status of the vPool') ] __relations = [ Relation('backend_type', BackendType, 'vpools', doc='Type of storage backend.') ] __dynamics = [ Dynamic('statistics', dict, 4), Dynamic('identifier', str, 120), Dynamic('stored_data', int, 60) ] _fixed_properties = ['storagedriver_client'] def __init__(self, *args, **kwargs): """ Initializes a vPool, setting up its additional helpers """ DataObject.__init__(self, *args, **kwargs) self._frozen = False self._storagedriver_client = None self._frozen = True @property def storagedriver_client(self): """ Client used for communication between Storage Driver and framework :return: StorageDriverClient """ if self._storagedriver_client is None: self.reload_client() return self._storagedriver_client def _statistics(self, dynamic): """ Aggregates the Statistics (IOPS, Bandwidth, ...) of each vDisk served by the vPool. """ from ovs.dal.hybrids.vdisk import VDisk statistics = {} for key in StorageDriverClient.STAT_KEYS: statistics[key] = 0 statistics['{0}_ps'.format(key)] = 0 for storagedriver in self.storagedrivers: for key, value in storagedriver.fetch_statistics().iteritems(): statistics[key] += value statistics['timestamp'] = time.time() VDisk.calculate_delta(self._key, dynamic, statistics) return statistics def _stored_data(self): """ Aggregates the Stored Data of each vDisk served by the vPool. """ return self.statistics['stored'] def _identifier(self): """ An identifier of this vPool in its current configuration state """ return '{0}_{1}'.format(self.guid, '_'.join(self.storagedrivers_guids)) def reload_client(self): """ Reloads the StorageDriver Client """ self._frozen = False self._storagedriver_client = StorageDriverClient.load(self) self._frozen = True
class StorageDriver(DataObject): """ The StorageDriver class represents a Storage Driver. A Storage Driver is an application on a Storage Router to which the vDisks connect. The Storage Driver is the gateway to the Storage Backend. """ DISTANCES = DataObject.enumerator('Distance', { 'NEAR': 0, 'FAR': 10000, 'INFINITE': 20000 }) _logger = Logger('hybrids') __properties = [ Property('name', str, doc='Name of the Storage Driver.'), Property('description', str, mandatory=False, doc='Description of the Storage Driver.'), Property( 'ports', dict, doc= 'Ports on which the Storage Driver is listening (management, xmlrpc, dtl, edge).' ), Property('cluster_ip', str, doc='IP address on which the Storage Driver is listening.'), Property('storage_ip', str, doc='IP address on which the vpool is shared to hypervisor'), Property( 'storagedriver_id', str, unique=True, indexed=True, doc='ID of the Storage Driver as known by the Storage Drivers.'), Property('mountpoint', str, doc='Mountpoint from which the Storage Driver serves data'), Property('startup_counter', int, default=0, doc='StorageDriver startup counter') ] __relations = [ Relation('vpool', VPool, 'storagedrivers'), Relation('storagerouter', StorageRouter, 'storagedrivers') ] __dynamics = [ Dynamic('status', str, 30), Dynamic('statistics', dict, 4), Dynamic('edge_clients', list, 30), Dynamic('vdisks_guids', list, 15), Dynamic('proxy_summary', dict, 15), Dynamic('vpool_backend_info', dict, 60), Dynamic('cluster_node_config', dict, 3600), Dynamic('global_write_buffer', int, 60) ] def _status(self): """ Fetches the Status of the Storage Driver. """ _ = self return None def _statistics(self, dynamic): """ Aggregates the Statistics (IOPS, Bandwidth, ...) of the vDisks connected to the Storage Driver. """ from ovs.dal.hybrids.vdisk import VDisk statistics = {} for key, value in self.fetch_statistics().iteritems(): statistics[key] = value statistics['timestamp'] = time.time() VDisk.calculate_delta(self._key, dynamic, statistics) return statistics def _edge_clients(self): """ Retrieves all edge clients """ clients = [] try: for item in self.vpool.storagedriver_client.list_client_connections( str(self.storagedriver_id), req_timeout_secs=2): clients.append({ 'key': '{0}:{1}'.format(item.ip, item.port), 'object_id': item.object_id, 'ip': item.ip, 'port': item.port, 'server_ip': self.storage_ip, 'server_port': self.ports['edge'] }) except Exception: StorageDriver._logger.exception( 'Error loading edge clients from {0}'.format( self.storagedriver_id)) clients.sort(key=lambda e: (e['ip'], e['port'])) return clients def _vdisks_guids(self): """ Gets the vDisk guids served by this StorageDriver. """ from ovs.dal.lists.vdisklist import VDiskList volume_ids = [] for entry in self.vpool.objectregistry_client.get_all_registrations(): if entry.node_id() == self.storagedriver_id: volume_ids.append(entry.object_id()) return VDiskList.get_in_volume_ids(volume_ids).guids def fetch_statistics(self): """ Loads statistics from this vDisk - returns unprocessed data """ # Load data from volumedriver sdstats = StorageDriverClient.EMPTY_STATISTICS() if self.storagedriver_id and self.vpool: try: sdstats = self.vpool.storagedriver_client.statistics_node( str(self.storagedriver_id), req_timeout_secs=2) except Exception as ex: StorageDriver._logger.error( 'Error loading statistics_node from {0}: {1}'.format( self.storagedriver_id, ex)) # Load volumedriver data in dictionary return VDisk.extract_statistics( sdstats, None if len(self.vpool.vdisks) == 0 else self.vpool.vdisks[0]) def _vpool_backend_info(self): """ Retrieve some additional information about the vPool to be shown in the GUI Size of the global write buffer for this Storage Driver, the accelerated backend info, connection info and caching strategy :return: Information about vPool and accelerated Backend :rtype: dict """ vpool_backend_info = { 'backend': copy.deepcopy(self.vpool.metadata['backend']), 'caching_info': { StorageDriverConfiguration.CACHE_BLOCK: { 'read': False, 'write': False, 'quota': None, 'backend_info': None }, # Will contain connection info if it wouldn't be None StorageDriverConfiguration.CACHE_FRAGMENT: { 'read': False, 'write': False, 'quota': None, 'backend_info': None } } } if 'caching_info' not in self.vpool.metadata: self._logger.critical( 'Metadata structure has not been updated yet') return vpool_backend_info if self.storagerouter_guid not in self.vpool.metadata['caching_info']: # No caching configured return vpool_backend_info for cache_type, cache_data in vpool_backend_info[ 'caching_info'].iteritems(): caching_info = self.vpool.metadata['caching_info'][ self.storagerouter_guid][cache_type] # Update the cache data matching the keys currently specified in cache_data cache_data.update( (k, caching_info[k]) for k in cache_data.viewkeys() & caching_info.viewkeys()) # Possible set backend_info to None to match this view if caching_info['is_backend'] is False: cache_data['backend_info'] = None # Add global write buffer vpool_backend_info['global_write_buffer'] = self.global_write_buffer return vpool_backend_info def _cluster_node_config(self): """ Prepares a ClusterNodeConfig dict for the StorageDriver process """ from ovs.extensions.generic.configuration import Configuration, NotFoundException rdma = Configuration.get('/ovs/framework/rdma') distance_map = {} primary_domains = [] secondary_domains = [] for junction in self.storagerouter.domains: if junction.backup is False: primary_domains.append(junction.domain_guid) else: secondary_domains.append(junction.domain_guid) # @todo implement more race-conditions guarantees. Current guarantee is the single update invalidating the value # through cluster_registry_checkup try: storagerouters_marked_for_update = list( Configuration.list(VPOOL_UPDATE_KEY)) except NotFoundException: storagerouters_marked_for_update = [] for sd in self.vpool.storagedrivers: if sd.guid == self.guid: continue if sd.storagerouter_guid in storagerouters_marked_for_update: distance_map[str( sd.storagedriver_id)] = StorageDriver.DISTANCES.FAR elif len(primary_domains) == 0: distance_map[str( sd.storagedriver_id)] = StorageDriver.DISTANCES.NEAR else: distance = StorageDriver.DISTANCES.INFINITE for junction in sd.storagerouter.domains: if junction.backup is False: if junction.domain_guid in primary_domains: distance = min(distance, StorageDriver.DISTANCES.NEAR) break # We can break here since we reached the minimum distance elif junction.domain_guid in secondary_domains: distance = min(distance, StorageDriver.DISTANCES.FAR) distance_map[str(sd.storagedriver_id)] = distance return { 'vrouter_id': self.storagedriver_id, 'host': self.storage_ip, 'message_port': self.ports['management'], 'xmlrpc_host': self.cluster_ip, 'xmlrpc_port': self.ports['xmlrpc'], 'failovercache_host': self.storage_ip, 'failovercache_port': self.ports['dtl'], 'network_server_uri': '{0}://{1}:{2}'.format('rdma' if rdma else 'tcp', self.storage_ip, self.ports['edge']), 'node_distance_map': distance_map } def _proxy_summary(self): """ Returns a summary of the proxies of this StorageDriver :return: summary of the proxies :rtype: dict """ proxy_info = {'red': 0, 'orange': 0, 'green': 0} summary = {'proxies': proxy_info} try: service_manager = ServiceFactory.get_manager() client = SSHClient(self.storagerouter) except Exception: self._logger.exception('Unable to retrieve necessary clients') else: for alba_proxy in self.alba_proxies: try: service_status = service_manager.get_service_status( alba_proxy.service.name, client) except Exception: # A ValueError can occur when the services are still being deployed (the model will be updated before the actual deployment) self._logger.exception( 'Unable to retrieve the service status for service {0} of StorageDriver {1}' .format(alba_proxy.service.name, self.guid)) proxy_info['red'] += 1 continue if service_status == 'active': proxy_info['green'] += 1 elif service_status == 'inactive': proxy_info['orange'] += 1 else: proxy_info['red'] += 1 finally: return summary def _global_write_buffer(self): """ Return the global write buffer for available for a StorageDriver :return: Calculated global write buffer :rtype: int """ # Avoid circular import from ovs.dal.hybrids.j_storagedriverpartition import StorageDriverPartition global_write_buffer = 0 for partition in self.partitions: if partition.role == DiskPartition.ROLES.WRITE and partition.sub_role == StorageDriverPartition.SUBROLE.SCO: global_write_buffer += partition.size return global_write_buffer
class AlbaNode(DataObject): """ The AlbaNode contains information about nodes (containing OSDs) """ NODE_TYPES = DataObject.enumerator('NodeType', ['ASD', 'GENERIC', 'S3']) OSD_STATUSES = DataObject.enumerator( 'OSDStatus', { 'ERROR': 'error', 'MISSING': 'missing', 'OK': 'ok', 'UNAVAILABLE': 'unavailable', 'UNKNOWN': 'unknown', 'WARNING': 'warning' }) OSD_STATUS_DETAILS = DataObject.enumerator( 'OSDStatusDetail', { 'ACTIVATING': 'service_activating', 'ALBAERROR': 'albaerror', 'DECOMMISSIONED': 'decommissioned', 'ERROR': 'recenterrors', 'NODEDOWN': 'nodedown', 'UNREACHABLE': 'unreachable' }) SLOT_STATUSES = DataObject.enumerator( 'SlotStatus', { 'OK': 'ok', 'WARNING': 'warning', 'MISSING': 'missing', 'UNAVAILABLE': 'unavailable', 'UNKNOWN': 'unknown', 'EMPTY': 'empty' }) CLIENTS = DataObject.enumerator( 'AlbaNodeClients', { NODE_TYPES.ASD: ASDManagerClient, NODE_TYPES.GENERIC: GenericManagerClient, NODE_TYPES.S3: S3ManagerClient }) CONFIG_LOCATIONS = DataObject.enumerator( 'AlbaNodeConfigLocations', { NODE_TYPES.ASD: ASD_NODE_CONFIG_PATH, NODE_TYPES.GENERIC: '', NODE_TYPES.S3: S3_NODE_CONFIG_PATH }) _logger = Logger('hybrids') __properties = [ Property('ip', str, indexed=True, mandatory=False, doc='IP Address'), Property('port', int, mandatory=False, doc='Port'), Property('node_id', str, unique=True, indexed=True, doc='Alba node_id identifier'), Property('name', str, mandatory=False, doc='Optional name for the AlbaNode'), Property('username', str, mandatory=False, doc='Username of the AlbaNode'), Property('password', str, mandatory=False, doc='Password of the AlbaNode'), Property('type', NODE_TYPES.keys(), default=NODE_TYPES.ASD, doc='The type of the AlbaNode'), Property( 'package_information', dict, mandatory=False, default={}, doc= 'Information about installed packages and potential available new versions' ) ] __relations = [ Relation('storagerouter', StorageRouter, 'alba_node', onetoone=True, mandatory=False, doc='StorageRouter hosting the Alba Node'), Relation('alba_node_cluster', AlbaNodeCluster, 'alba_nodes', mandatory=False, doc='The Alba Node Cluster to which the Alba Node belongs') ] __dynamics = [ Dynamic('stack', dict, 15, locked=True), Dynamic('ips', list, 3600), Dynamic('maintenance_services', dict, 30, locked=True), Dynamic('node_metadata', dict, 3600), Dynamic('supported_osd_types', list, 3600), Dynamic('read_only_mode', bool, 60), Dynamic('local_summary', dict, 60), Dynamic('ipmi_info', dict, 3600) ] def __init__(self, *args, **kwargs): """ Initializes an AlbaNode, setting up its additional helpers """ DataObject.__init__(self, *args, **kwargs) self._frozen = False self.client = None if os.environ.get('RUNNING_UNITTESTS') == 'True': self.client = ManagerClientMockup(self) else: if self.type not in self.CLIENTS: raise NotImplementedError('Type {0} is not implemented'.format( self.type)) self.client = self.CLIENTS[self.type](self) self._frozen = True def _ips(self): """ Returns the IPs of the node """ return Configuration.get( os.path.join(self.CONFIG_LOCATIONS[self.type], 'network|ips').format(self.node_id)) def _maintenance_services(self): """ Returns all maintenance services on this node, grouped by backend name """ services = {} try: for service_name in self.client.list_maintenance_services(): match = re.match('^alba-maintenance_(.*)-[a-zA-Z0-9]{16}$', service_name) if match is not None: service_status = self.client.get_service_status( name=service_name) backend_name = match.groups()[0] if backend_name not in services: services[backend_name] = [] services[backend_name].append( [service_name, service_status]) except Exception: self._logger.exception('Unable to list the maintenance services') return services def _stack(self): """ Returns an overview of this node's storage stack """ from ovs.dal.hybrids.albabackend import AlbaBackend from ovs.dal.lists.albabackendlist import AlbaBackendList def _move(info): for move in [('state', 'status'), ('state_detail', 'status_detail')]: if move[0] in info: info[move[1]] = info[move[0]] del info[move[0]] stack = {} node_down = False # Fetch stack from asd-manager try: remote_stack = self.client.get_stack() for slot_id, slot_data in remote_stack.iteritems(): stack[slot_id] = {'status': 'ok'} stack[slot_id].update(slot_data) # Migrate state > status _move(stack[slot_id]) for osd_data in slot_data.get('osds', {}).itervalues(): _move(osd_data) except (requests.ConnectionError, requests.Timeout, InvalidCredentialsError): self._logger.warning( 'Error during stack retrieval. Assuming that the node is down') node_down = True model_osds = {} found_osds = {} # Apply own model to fetched stack for osd in self.osds: model_osds[osd.osd_id] = osd # Initially set the info if osd.slot_id not in stack: stack[osd.slot_id] = { 'status': self.OSD_STATUSES.UNKNOWN if node_down is True else self.OSD_STATUSES.MISSING, 'status_detail': self.OSD_STATUS_DETAILS.NODEDOWN if node_down is True else '', 'osds': {} } osd_data = stack[osd.slot_id]['osds'].get(osd.osd_id, {}) stack[osd.slot_id]['osds'][ osd.osd_id] = osd_data # Initially set the info in the stack osd_data.update(osd.stack_info) if node_down is True: osd_data['status'] = self.OSD_STATUSES.UNKNOWN osd_data['status_detail'] = self.OSD_STATUS_DETAILS.NODEDOWN elif osd.alba_backend_guid is not None: # Osds has been claimed # Load information from alba if osd.alba_backend_guid not in found_osds: found_osds[osd.alba_backend_guid] = {} if osd.alba_backend.abm_cluster is not None: config = Configuration.get_configuration_path( osd.alba_backend.abm_cluster.config_location) try: for found_osd in AlbaCLI.run( command='list-all-osds', config=config): found_osds[osd.alba_backend_guid][ found_osd['long_id']] = found_osd except (AlbaError, RuntimeError): self._logger.exception( 'Listing all osds has failed') osd_data['status'] = self.OSD_STATUSES.UNKNOWN osd_data[ 'status_detail'] = self.OSD_STATUS_DETAILS.ALBAERROR continue if osd.osd_id not in found_osds[osd.alba_backend_guid]: # Not claimed by any backend thus not in use continue found_osd = found_osds[osd.alba_backend_guid][osd.osd_id] if found_osd['decommissioned'] is True: osd_data['status'] = self.OSD_STATUSES.UNAVAILABLE osd_data[ 'status_detail'] = self.OSD_STATUS_DETAILS.DECOMMISSIONED continue backend_interval_key = '/ovs/alba/backends/{0}/gui_error_interval'.format( osd.alba_backend_guid) if Configuration.exists(backend_interval_key): interval = Configuration.get(backend_interval_key) else: interval = Configuration.get( '/ovs/alba/backends/global_gui_error_interval') read = found_osd['read'] or [0] write = found_osd['write'] or [0] errors = found_osd['errors'] osd_data['status'] = self.OSD_STATUSES.WARNING osd_data['status_detail'] = self.OSD_STATUS_DETAILS.ERROR if len(errors) == 0 or (len(read + write) > 0 and max(min(read), min(write)) > max(error[0] for error in errors) + interval): osd_data['status'] = self.OSD_STATUSES.OK osd_data['status_detail'] = '' statistics = {} for slot_info in stack.itervalues(): for osd_id, osd in slot_info['osds'].iteritems(): if osd.get( 'status_detail') == self.OSD_STATUS_DETAILS.ACTIVATING: osd['claimed_by'] = 'unknown' # We won't be able to connect to it just yet continue if osd_id not in model_osds: # The osd is known by the remote node but not in the model # In that case, let's connect to the OSD to see whether we get some info from it try: ips = osd['hosts'] if 'hosts' in osd and len( osd['hosts']) > 0 else osd.get('ips', []) port = osd['port'] claimed_by = 'unknown' for ip in ips: try: # Output will be None if it is not claimed claimed_by = AlbaCLI.run('get-osd-claimed-by', named_params={ 'host': ip, 'port': port }) break except (AlbaError, RuntimeError): self._logger.warning( 'get-osd-claimed-by failed for IP:port {0}:{1}' .format(ip, port)) alba_backend = AlbaBackendList.get_by_alba_id( claimed_by) osd['claimed_by'] = alba_backend.guid if alba_backend is not None else claimed_by except KeyError: osd['claimed_by'] = 'unknown' except: self._logger.exception( 'Could not load OSD info: {0}'.format(osd_id)) osd['claimed_by'] = 'unknown' if osd.get('status') not in ['error', 'warning']: osd['status'] = self.OSD_STATUSES.ERROR osd['status_detail'] = self.OSD_STATUS_DETAILS.UNREACHABLE claimed_by = osd.get('claimed_by', 'unknown') if claimed_by == 'unknown': continue try: alba_backend = AlbaBackend(claimed_by) except ObjectNotFoundException: continue # Add usage information if alba_backend not in statistics: statistics[alba_backend] = alba_backend.osd_statistics osd_statistics = statistics[alba_backend] if osd_id not in osd_statistics: continue stats = osd_statistics[osd_id] osd['usage'] = { 'size': int(stats['capacity']), 'used': int(stats['disk_usage']), 'available': int(stats['capacity'] - stats['disk_usage']) } return stack def _node_metadata(self): """ Returns a set of metadata hinting on how the Node should be used """ slots_metadata = { 'fill': False, # Prepare Slot for future usage 'fill_add': False, # OSDs will added and claimed right away 'clear': False } # Indicates whether OSDs can be removed from ALBA Node / Slot if self.type == AlbaNode.NODE_TYPES.ASD: slots_metadata.update({ 'fill': True, 'fill_metadata': { 'count': 'integer' }, 'clear': True }) elif self.type == AlbaNode.NODE_TYPES.GENERIC: slots_metadata.update({ 'fill_add': True, 'fill_add_metadata': { 'osd_type': 'osd_type', 'ips': 'list_of_ip', 'port': 'port' }, 'clear': True }) elif self.type == AlbaNode.NODE_TYPES.S3: slots_metadata.update({ 'fill_add': True, 'fill_add_metadata': { 'count': 'integer', 'osd_type': 'osd_type', 'buckets': 'list_of_string' }, 'clear': True }) return slots_metadata def _supported_osd_types(self): """ Returns a list of all supported OSD types """ from ovs.dal.hybrids.albaosd import AlbaOSD if self.type == AlbaNode.NODE_TYPES.GENERIC: return [AlbaOSD.OSD_TYPES.ASD, AlbaOSD.OSD_TYPES.AD] elif self.type == AlbaNode.NODE_TYPES.ASD: return [AlbaOSD.OSD_TYPES.ASD] elif self.type == AlbaNode.NODE_TYPES.S3: return [] return [] def _read_only_mode(self): """ Indicates whether the ALBA Node can be used for OSD manipulation If the version on the ALBA Node is lower than a specific version required by the framework, the ALBA Node becomes read only, this means, that actions such as creating, restarting, deleting OSDs becomes impossible until the node's software has been updated :return: True if the ALBA Node should be read only, False otherwise :rtype: bool """ read_only = False if self.type in [AlbaNode.NODE_TYPES.GENERIC, AlbaNode.NODE_TYPES.ASD]: try: read_only = self.client.get_metadata()['_version'] < 3 except (requests.ConnectionError, requests.Timeout, InvalidCredentialsError): # When down, nothing can be edited. self._logger.warning( 'Error during stack retrieval. Assuming that the node is down and disabling read_only because nothing can be done' ) return read_only # Version 3 was introduced when Slots for Active Drives have been introduced def _local_summary(self): """ Return a summary of the OSDs based on their state * Ok -> green * WARNING -> orange * ERROR -> red * UNKNOWN -> gray The summary will contain a list of dicts with guid, osd_id and claimed_by eg: {'red': [{osd_id: 1, claimed_by: alba_backend_guid1}], 'green': [{osd_id: 2, claimed_by: None}], ...} :return: Summary of the OSDs filtered by status (which are represented by color) """ device_info = {'red': [], 'green': [], 'orange': [], 'gray': []} local_summary = {'devices': device_info} # For future additions? state_map = { self.OSD_STATUSES.OK: 'green', self.OSD_STATUSES.WARNING: 'orange', self.OSD_STATUSES.ERROR: 'red', self.OSD_STATUSES.UNKNOWN: 'gray' } for slot_id, slot_data in self.stack.iteritems(): for osd_id, osd_data in slot_data.get('osds', {}).iteritems(): status = osd_data.get('status', self.OSD_STATUSES.UNKNOWN) osd_info = { 'claimed_by': osd_data.get('claimed_by'), 'osd_id': osd_data.get('osd_id') } if status in state_map: # Can never be too sure device_info[state_map[status]].append(osd_info) return local_summary def _ipmi_info(self): """ Retrieve the IPMI information of the AlbaNode :return: Dict with ipmi information :rtype: dict """ try: return Configuration.get( os.path.join(self.CONFIG_LOCATIONS[self.type], 'ipmi').format(self.node_id)) except NotFoundException: # Could be that the ASDManager does not yet have the IPMI info stored self._logger.warning('No IPMI config path found') return {'ip': None, 'username': None, 'password': None}
class VDisk(DataObject): """ The VDisk class represents a vDisk. A vDisk is a Virtual Disk served by Open vStorage. """ STATUSES = DataObject.enumerator( 'Status', ['HALTED', 'NON_RUNNING', 'RUNNING', 'UNKNOWN']) VDISK_NAME_REGEX = '^[0-9a-zA-Z][\-_a-zA-Z0-9]+[a-zA-Z0-9]$' _logger = Logger('hybrids') __properties = [ Property('name', str, mandatory=False, doc='Name of the vDisk.'), Property('description', str, mandatory=False, doc='Description of the vDisk.'), Property('size', int, doc='Size of the vDisk in Bytes.'), Property( 'devicename', str, doc= 'The name of the container file (e.g. the VMDK-file) describing the vDisk.' ), Property('volume_id', str, mandatory=False, indexed=True, doc='ID of the vDisk in the Open vStorage Volume Driver.'), Property( 'parentsnapshot', str, mandatory=False, doc= 'Points to a parent storage driver parent ID. None if there is no parent Snapshot' ), Property('cinder_id', str, mandatory=False, doc='Cinder Volume ID, for volumes managed through Cinder'), Property( 'has_manual_dtl', bool, default=False, doc= 'Indicates whether the default DTL location has been overruled by customer' ), Property( 'pagecache_ratio', float, default=1.0, doc='Ratio of the volume\'s metadata pages that needs to be cached' ), Property( 'metadata', dict, default=dict(), doc='Contains fixed metadata about the volume (e.g. lba_size, ...)' ), Property( 'cache_quota', dict, mandatory=False, doc= 'Maximum caching space(s) this volume can consume (in Bytes) per cache type. If not None, the caching(s) for this volume has been set manually' ) ] __relations = [ Relation('vpool', VPool, 'vdisks'), Relation('parent_vdisk', None, 'child_vdisks', mandatory=False) ] __dynamics = [ Dynamic('dtl_status', str, 60), Dynamic('snapshots', list, 30), Dynamic('snapshot_ids', list, 30), Dynamic('info', dict, 60), Dynamic('statistics', dict, 4), Dynamic('storagedriver_id', str, 60), Dynamic('storagerouter_guid', str, 15), Dynamic('is_vtemplate', bool, 60), Dynamic('edge_clients', list, 30) ] _fixed_properties = [ 'storagedriver_client', 'objectregistry_client', 'fsmetadata_client' ] def __init__(self, *args, **kwargs): """ Initializes a vDisk, setting up its additional helpers """ DataObject.__init__(self, *args, **kwargs) self._frozen = False self._storagedriver_client = None self._objectregistry_client = None self._fsmetadata_client = None self._frozen = True @property def storagedriver_client(self): """ Client used for communication between StorageDriver and framework :return: StorageDriverClient """ if self._storagedriver_client is None: self.reload_client('storagedriver') return self._storagedriver_client @property def objectregistry_client(self): """ Client used for communication between StorageDriver OR and framework :return: ObjectRegistryClient """ if self._objectregistry_client is None: self.reload_client('objectregistry') return self._objectregistry_client @property def fsmetadata_client(self): """ Client used for communications between StorageDriver FS metadata and framework :return: FileSystemMetaDataClient """ if self._fsmetadata_client is None: self.reload_client('filesystem_metadata') return self._fsmetadata_client def _dtl_status(self): """ Retrieve the DTL status for a vDisk """ sd_status = self._info().get('failover_mode', 'UNKNOWN').lower() if sd_status == '': sd_status = 'unknown' if sd_status not in [ 'ok_sync', 'ok_standalone' ]: # ok_sync or ok_standalone according to voldrv, can still mean incorrect deployment return sd_status # Verify whether 'ok_standalone' or 'ok_sync' is the correct status for this vDisk vpool_dtl = self.vpool.configuration['dtl_enabled'] if (self.has_manual_dtl is False and vpool_dtl is False) or ( self.has_manual_dtl is True and vpool_dtl is True and len(self.domains_dtl_guids) == 0): return 'disabled' storagerouter_guid = self._storagerouter_guid() if storagerouter_guid is None: return 'checkup_required' this_sr = StorageRouter(storagerouter_guid) other_storagerouters = set([ sd.storagerouter for sd in self.vpool.storagedrivers if sd.storagerouter_guid != storagerouter_guid ]) # Retrieve all StorageRouters linked to the Recovery Domains (primary) and Regular Domains (secondary) for the StorageRouter hosting this vDisk primary = set() secondary = set() for junction in this_sr.domains: if junction.backup is True: primary.update( set( StorageRouterList. get_primary_storagerouters_for_domain( junction.domain))) else: secondary.update( set( StorageRouterList. get_primary_storagerouters_for_domain( junction.domain))) primary = primary.intersection(other_storagerouters) secondary = secondary.difference(primary) secondary = secondary.intersection(other_storagerouters) try: config = self.storagedriver_client.get_dtl_config( str(self.volume_id)) except: return 'checkup_required' if self.has_manual_dtl is False: # No DTL targets --> Check for Storage Routers linked to current vPool (priority for StorageRouters in recovery domain of current StorageRouter) possible_storagerouters = list( primary) if len(primary) > 0 else list(secondary) if len( secondary) > 0 else list(other_storagerouters) if len(possible_storagerouters) > 0 and config is not None: if config.host not in [ sd.storage_ip for sr in possible_storagerouters for sd in sr.storagedrivers if sd.vpool_guid == self.vpool_guid ]: return 'checkup_required' else: if len(self.domains_dtl) > 0: chosen_storagerouters = set() for junction in self.domains_dtl: chosen_storagerouters.update( set( StorageRouterList. get_primary_storagerouters_for_domain( junction.domain))) possible_storagerouters = chosen_storagerouters.intersection( other_storagerouters) else: possible_storagerouters = other_storagerouters if config is None: if len(possible_storagerouters) == 0: if sd_status == 'ok_standalone': return sd_status return 'checkup_required' else: if len(possible_storagerouters) > 0: if config.host in [ sd.storage_ip for sr in possible_storagerouters for sd in sr.storagedrivers if sd.vpool_guid == self.vpool_guid ]: return sd_status return 'checkup_required' return 'checkup_required' return sd_status def _snapshot_ids(self): """ Fetches the snapshot IDs for this vDisk """ if not self.volume_id or not self.vpool: return [] from ovs.lib.vdisk import VDiskController try: return VDiskController.list_snapshot_ids(vdisk=self) except: return [] def _snapshots(self): """ Fetches the information of all snapshots for this vDisk """ snapshots = [] self.invalidate_dynamics('snapshot_ids') for snap_id in self.snapshot_ids: try: snapshot = self.storagedriver_client.info_snapshot( str(self.volume_id), snap_id, req_timeout_secs=2) except SnapshotNotFoundException: continue if snapshot.metadata: metadata = pickle.loads(snapshot.metadata) if isinstance(metadata, dict): snapshots.append({ 'guid': snap_id, 'timestamp': metadata['timestamp'], 'label': metadata['label'], 'is_consistent': metadata['is_consistent'], 'is_automatic': metadata.get('is_automatic', True), 'is_sticky': metadata.get('is_sticky', False), 'in_backend': snapshot.in_backend, 'stored': int(snapshot.stored) }) else: snapshots.append({ 'guid': snap_id, 'timestamp': time.mktime( datetime.strptime(snapshot.timestamp.strip(), '%c').timetuple()), 'label': snap_id, 'is_consistent': False, 'is_automatic': False, 'is_sticky': False, 'in_backend': snapshot.in_backend, 'stored': int(snapshot.stored) }) return snapshots def _info(self): """ Fetches the info (see Volume Driver API) for the vDisk. """ vdiskinfo = StorageDriverClient.EMPTY_INFO() vdisk_state = VDisk.STATUSES.RUNNING if self.volume_id and self.vpool: try: try: vdiskinfo = self.storagedriver_client.info_volume( str(self.volume_id), req_timeout_secs=2) except VolumeRestartInProgressException: time.sleep(0.5) vdiskinfo = self.storagedriver_client.info_volume( str(self.volume_id), req_timeout_secs=2) except MaxRedirectsExceededException: vdisk_state = VDisk.STATUSES.NON_RUNNING # @todo replace RuntimeError with NodeNotReachableException except (ClusterNotReachableException, RuntimeError) as exception: if isinstance(exception, ClusterNotReachableException) or ( isinstance(exception, RuntimeError) and 'failed to send XMLRPC request' in str(exception)): self._logger.debug( 'VDisk {0} status has been set to UNKNOWN'.format( self.name)) vdisk_state = VDisk.STATUSES.UNKNOWN except Exception as ex: self._logger.debug( 'Uncaught exception occurred when requesting the volume info for vDisk {0}: {1}' .format(self.name, ex)) vdiskinfodict = {} for key, value in vdiskinfo.__class__.__dict__.items(): if type(value) is property: objectvalue = getattr(vdiskinfo, key) if key == 'object_type': vdiskinfodict[key] = str(objectvalue) elif key == 'metadata_backend_config': vdiskinfodict[key] = {} if hasattr(objectvalue, 'node_configs') and callable( objectvalue.node_configs): vdiskinfodict[key] = [] for nodeconfig in objectvalue.node_configs(): vdiskinfodict[key].append({ 'ip': nodeconfig.address(), 'port': nodeconfig.port() }) elif key == 'halted' and objectvalue is True: self._logger.debug( 'VDisk {0} status has been set to HALTED'.format( self.name)) vdisk_state = VDisk.STATUSES.HALTED else: vdiskinfodict[key] = objectvalue vdiskinfodict['live_status'] = vdisk_state return vdiskinfodict def _statistics(self, dynamic): """ Fetches the Statistics for the vDisk. """ statistics = {} for key, value in self.fetch_statistics().iteritems(): statistics[key] = value statistics['timestamp'] = time.time() VDisk.calculate_delta(self._key, dynamic, statistics) return statistics def _storagedriver_id(self): """ Returns the Volume Storage Driver ID to which the vDisk is connected. """ vdisk_object = self.objectregistry_client.find(str(self.volume_id)) if vdisk_object is not None: return vdisk_object.node_id() return None def _storagerouter_guid(self): """ Loads the vDisks StorageRouter guid """ storagedriver_id = self._storagedriver_id() if not storagedriver_id: return None from ovs.dal.hybrids.storagedriver import StorageDriver sds = DataList( StorageDriver, { 'type': DataList.where_operator.AND, 'items': [('storagedriver_id', DataList.operator.EQUALS, storagedriver_id)] }) if len(sds) == 1: return sds[0].storagerouter_guid return None def _is_vtemplate(self): """ Returns whether the vdisk is a template """ vdisk_object = self.objectregistry_client.find(str(self.volume_id)) if vdisk_object is not None: return str(vdisk_object.object_type()) == 'TEMPLATE' return False def _edge_clients(self): """ Retrieves all edge clients """ clients = {} for storagedriver in self.vpool.storagedrivers: for client in storagedriver.edge_clients: if client['object_id'] == self.volume_id: clients[client['key']] = client return clients.values() def fetch_statistics(self): """ Loads statistics from this vDisk - returns unprocessed data """ # Load data from volumedriver vdiskstats = StorageDriverClient.EMPTY_STATISTICS() if self.volume_id and self.vpool: try: vdiskstats = self.storagedriver_client.statistics_volume( str(self.volume_id), req_timeout_secs=2) except Exception as ex: VDisk._logger.error( 'Error loading statistics_volume from {0}: {1}'.format( self.volume_id, ex)) # Load volumedriver data in dictionary return VDisk.extract_statistics(vdiskstats, self) @staticmethod def extract_statistics(stats, vdisk): """ Extract the statistics useful for the framework from all statistics passed in by StorageDriver """ statsdict = {} try: pc = stats.performance_counters for counter, info in { 'backend_read_request_size': { 'sum': 'backend_data_read', 'events': 'backend_read_operations', 'distribution': 'backend_read_operations_distribution' }, 'backend_read_request_usecs': { 'sum': 'backend_read_latency', 'distribution': 'backend_read_latency_distribution' }, 'backend_write_request_size': { 'sum': 'backend_data_written', 'events': 'backend_write_operations', 'distribution': 'backend_write_operations_distribution' }, 'backend_write_request_usecs': { 'sum': 'backend_write_latency', 'distribution': 'backend_write_latency_distribution' }, 'sync_request_usecs': { 'sum': 'sync_latency', 'distribution': 'sync_latency_distribution' }, 'read_request_size': { 'sum': 'data_read', 'events': 'read_operations', 'distribution': 'read_operations_distribution' }, 'read_request_usecs': { 'sum': 'read_latency', 'distribution': 'read_latency_distribution' }, 'write_request_size': { 'sum': 'data_written', 'events': 'write_operations', 'distribution': 'write_operations_distribution' }, 'write_request_usecs': { 'sum': 'write_latency', 'distribution': 'write_latency_distribution' }, 'unaligned_read_request_size': { 'sum': 'unaligned_data_read', 'events': 'unaligned_read_operations', 'distribution': 'unaligned_read_operations_distribution' }, 'unaligned_read_request_usecs': { 'sum': 'unaligned_read_latency', 'distribution': 'unaligned_read_latency_distribution' }, 'unaligned_write_request_size': { 'sum': 'unaligned_data_written', 'events': 'unaligned_write_operations', 'distribution': 'unaligned_write_operations_distribution' }, 'unaligned_write_request_usecs': { 'sum': 'unaligned_write_latency', 'distribution': 'unaligned_write_latency_distribution' } }.iteritems(): if hasattr(pc, counter): counter_object = getattr(pc, counter) for method, target in info.iteritems(): if hasattr(counter_object, method): statsdict[target] = getattr( counter_object, method)() for key in [ 'cluster_cache_hits', 'cluster_cache_misses', 'metadata_store_hits', 'metadata_store_misses', 'sco_cache_hits', 'sco_cache_misses', 'stored', 'partial_read_fast', 'partial_read_slow' ]: if hasattr(stats, key): statsdict[key] = getattr(stats, key) # Do some more manual calculations block_size = 0 if vdisk is not None: block_size = vdisk.metadata.get('lba_size', 0) * vdisk.metadata.get( 'cluster_multiplier', 0) if block_size == 0: block_size = 4096 for key, source in { '4k_read_operations': 'data_read', '4k_write_operations': 'data_written', '4k_unaligned_read_operations': 'unaligned_data_read', '4k_unaligned_write_operations': 'unaligned_data_written' }.iteritems(): statsdict[key] = statsdict.get(source, 0) / block_size # Pre-calculate sums for key, items in StorageDriverClient.STAT_SUMS.iteritems(): statsdict[key] = 0 for item in items: statsdict[key] += statsdict[item] except: pass return statsdict @staticmethod def calculate_delta(key, dynamic, current_stats): """ Calculate statistics deltas :param key: Key to retrieve from volatile factory :param dynamic: :param current_stats: Current statistics to compare with :return: None """ volatile = VolatileFactory.get_client() prev_key = '{0}_{1}'.format(key, 'statistics_previous') previous_stats = volatile.get(prev_key, default={}) for key in current_stats.keys(): if key == 'timestamp' or '_latency' in key or '_distribution' in key: continue delta = current_stats['timestamp'] - previous_stats.get( 'timestamp', current_stats['timestamp']) if delta == 0: current_stats['{0}_ps'.format(key)] = previous_stats.get( '{0}_ps'.format(key), 0) elif delta > 0 and key in previous_stats: current_stats['{0}_ps'.format(key)] = max( 0, (current_stats[key] - previous_stats[key]) / delta) else: current_stats['{0}_ps'.format(key)] = 0 volatile.set(prev_key, current_stats, dynamic.timeout * 10) def reload_client(self, client): """ Reloads the StorageDriverClient or ObjectRegistryClient """ if self.vpool_guid: self._frozen = False if client == 'storagedriver': self._storagedriver_client = StorageDriverClient.load( self.vpool) elif client == 'objectregistry': self._objectregistry_client = ObjectRegistryClient.load( self.vpool) elif client == 'filesystem_metadata': self._fsmetadata_client = FSMetaDataClient.load(self.vpool) self._frozen = True
class VPool(DataObject): """ The VPool class represents a vPool. A vPool is a Virtual Storage Pool, a Filesystem, used to deploy vDisks. a vPool can span multiple Storage Drivers and connects to a single Storage BackendType. """ _logger = Logger('hybrids') STATUSES = DataObject.enumerator('Status', [ 'DELETING', 'EXTENDING', 'FAILURE', 'INSTALLING', 'RUNNING', 'SHRINKING' ]) CACHES = DataObject.enumerator('Cache', { 'BLOCK': 'block', 'FRAGMENT': 'fragment' }) __properties = [ Property('name', str, unique=True, indexed=True, doc='Name of the vPool'), Property('description', str, mandatory=False, doc='Description of the vPool'), Property('login', str, mandatory=False, doc='Login/Username for the Storage BackendType.'), Property('password', str, mandatory=False, doc='Password for the Storage BackendType.'), Property( 'connection', str, mandatory=False, doc= 'Connection (IP, URL, Domain name, Zone, ...) for the Storage BackendType.' ), Property( 'metadata', dict, mandatory=False, doc='Metadata for the backends, as used by the Storage Drivers.'), Property( 'rdma_enabled', bool, default=False, doc= 'Has the vpool been configured to use RDMA for DTL transport, which is only possible if all storagerouters are RDMA capable' ), Property('status', STATUSES.keys(), doc='Status of the vPool'), Property( 'metadata_store_bits', int, mandatory=False, doc= 'StorageDrivers deployed for this vPool will make use of this amount of metadata store bits' ) ] __relations = [] __dynamics = [ Dynamic('configuration', dict, 3600), Dynamic('statistics', dict, 4), Dynamic('identifier', str, 120), Dynamic('extensible', tuple, 60), Dynamic('volume_potentials', dict, 60) ] _fixed_properties = [ 'storagedriver_client', 'objectregistry_client', 'clusterregistry_client' ] def __init__(self, *args, **kwargs): """ Initializes a vPool, setting up its additional helpers """ DataObject.__init__(self, *args, **kwargs) self._frozen = False self._storagedriver_client = None self._objectregistry_client = None self._clusterregistry_client = None self._frozen = True @property def storagedriver_client(self): """ Client used for communication between Storage Driver and framework :return: StorageDriverClient """ if self._storagedriver_client is None: self.reload_client('storagedriver') return self._storagedriver_client @property def objectregistry_client(self): """ Client used for communication between Storage Driver OR and framework :return: ObjectRegistryClient """ if self._objectregistry_client is None: self.reload_client('objectregistry') return self._objectregistry_client @property def clusterregistry_client(self): """ Client used for making changes to the StorageDriver's Cluster Registry :return: ClusterRegistry client """ if self._clusterregistry_client is None: self.reload_client('clusterregistry') return self._clusterregistry_client def reload_client(self, client): """ Reloads the StorageDriverClient, ObjectRegistryClient or ClusterRegistry client """ self._frozen = False if client == 'storagedriver': self._storagedriver_client = StorageDriverClient.load(self) elif client == 'objectregistry': self._objectregistry_client = ObjectRegistryClient.load(self) elif client == 'clusterregistry': self._clusterregistry_client = ClusterRegistryClient.load(self) self._frozen = True def _configuration(self): """ VPool configuration """ if not self.storagedrivers or not self.storagedrivers[0].storagerouter: return {} storagedriver_config = StorageDriverConfiguration( self.guid, self.storagedrivers[0].storagedriver_id) for expected_key in [ 'distributed_transaction_log', 'filesystem', 'volume_router', 'volume_manager' ]: if expected_key not in storagedriver_config.configuration: return {} dtl = storagedriver_config.configuration['distributed_transaction_log'] file_system = storagedriver_config.configuration['filesystem'] volume_router = storagedriver_config.configuration['volume_router'] volume_manager = storagedriver_config.configuration['volume_manager'] dtl_host = file_system['fs_dtl_host'] dtl_mode = file_system.get('fs_dtl_mode', StorageDriverClient.VOLDRV_DTL_ASYNC) cluster_size = volume_manager['default_cluster_size'] / 1024 dtl_transport = dtl['dtl_transport'] sco_multiplier = volume_router['vrouter_sco_multiplier'] dtl_config_mode = file_system['fs_dtl_config_mode'] tlog_multiplier = volume_manager['number_of_scos_in_tlog'] non_disposable_sco_factor = volume_manager[ 'non_disposable_scos_factor'] sco_size = sco_multiplier * cluster_size / 1024 # SCO size is in MiB ==> SCO multiplier * cluster size (4 KiB by default) write_buffer = tlog_multiplier * sco_size * non_disposable_sco_factor dtl_enabled = not (dtl_config_mode == StorageDriverClient.VOLDRV_DTL_MANUAL_MODE and dtl_host == '') try: mds_config = Configuration.get('/ovs/vpools/{0}/mds_config'.format( self.guid)) except NotFoundException: mds_config = {} return { 'sco_size': sco_size, 'dtl_mode': StorageDriverClient.REVERSE_DTL_MODE_MAP[dtl_mode] if dtl_enabled is True else 'no_sync', 'mds_config': mds_config, 'dtl_enabled': dtl_enabled, 'cluster_size': cluster_size, 'write_buffer': write_buffer, 'dtl_transport': StorageDriverClient.REVERSE_DTL_TRANSPORT_MAP[dtl_transport], 'dtl_config_mode': dtl_config_mode, 'tlog_multiplier': tlog_multiplier } def _statistics(self, dynamic): """ Aggregates the Statistics (IOPS, Bandwidth, ...) of each vDisk served by the vPool. """ from ovs.dal.hybrids.vdisk import VDisk statistics = {} for storagedriver in self.storagedrivers: for key, value in storagedriver.fetch_statistics().iteritems(): if isinstance(value, dict): if key not in statistics: statistics[key] = {} for subkey, subvalue in value.iteritems(): if subkey not in statistics[key]: statistics[key][subkey] = 0 statistics[key][subkey] += subvalue else: if key not in statistics: statistics[key] = 0 statistics[key] += value statistics['timestamp'] = time.time() VDisk.calculate_delta(self._key, dynamic, statistics) return statistics def _identifier(self): """ An identifier of this vPool in its current configuration state """ return '{0}_{1}'.format(self.guid, '_'.join(self.storagedrivers_guids)) def _extensible(self): """ Verifies whether this vPool can be extended or not """ reasons = [] if self.status != VPool.STATUSES.RUNNING: reasons.append('non_running') if self.metadata_store_bits is None: reasons.append('voldrv_missing_info') return len(reasons) == 0, reasons def _volume_potentials(self): # type: () -> Dict[str, int] """ Get an overview of all volume potentials for every Storagedriver in this vpool A possible -1 can be returned for the volume potential which indicates that the potential could not be retrieved :return: The overview with the volume potential :rtype: dict """ volume_potentials = {} for storagedriver in self.storagedrivers: volume_potential = -1 try: std_config = StorageDriverConfiguration( storagedriver.vpool_guid, storagedriver.storagedriver_id) client = LocalStorageRouterClient(std_config.remote_path) volume_potential = client.volume_potential( str(storagedriver.storagedriver_id)) except Exception: self._logger.exception( 'Unable to retrieve configuration for storagedriver {0}'. format(storagedriver.storagedriver_id)) volume_potentials[ storagedriver.storagerouter.guid] = volume_potential return volume_potentials
class StorageDriver(DataObject): """ The StorageDriver class represents a Storage Driver. A Storage Driver is an application on a Storage Router to which the vDisks connect. The Storage Driver is the gateway to the Storage Backend. """ DISTANCES = DataObject.enumerator('Distance', { 'NEAR': 0, 'FAR': 10000, 'INFINITE': 20000 }) _logger = LogHandler.get('dal', name='hybrid') __properties = [ Property('name', str, doc='Name of the Storage Driver.'), Property('description', str, mandatory=False, doc='Description of the Storage Driver.'), Property( 'ports', dict, doc= 'Ports on which the Storage Driver is listening (management, xmlrpc, dtl, edge).' ), Property('cluster_ip', str, doc='IP address on which the Storage Driver is listening.'), Property('storage_ip', str, doc='IP address on which the vpool is shared to hypervisor'), Property( 'storagedriver_id', str, unique=True, indexed=True, doc='ID of the Storage Driver as known by the Storage Drivers.'), Property('mountpoint', str, doc='Mountpoint from which the Storage Driver serves data'), Property('startup_counter', int, default=0, doc='StorageDriver startup counter') ] __relations = [ Relation('vpool', VPool, 'storagedrivers'), Relation('storagerouter', StorageRouter, 'storagedrivers') ] __dynamics = [ Dynamic('status', str, 30), Dynamic('statistics', dict, 4), Dynamic('edge_clients', list, 30), Dynamic('vdisks_guids', list, 15), Dynamic('vpool_backend_info', dict, 60), Dynamic('cluster_node_config', dict, 3600) ] def _status(self): """ Fetches the Status of the Storage Driver. """ _ = self return None def _statistics(self, dynamic): """ Aggregates the Statistics (IOPS, Bandwidth, ...) of the vDisks connected to the Storage Driver. """ from ovs.dal.hybrids.vdisk import VDisk statistics = {} for key, value in self.fetch_statistics().iteritems(): statistics[key] = value statistics['timestamp'] = time.time() VDisk.calculate_delta(self._key, dynamic, statistics) return statistics def _edge_clients(self): """ Retrieves all edge clients """ clients = [] try: for item in self.vpool.storagedriver_client.list_client_connections( str(self.storagedriver_id), req_timeout_secs=2): clients.append({ 'key': '{0}:{1}'.format(item.ip, item.port), 'object_id': item.object_id, 'ip': item.ip, 'port': item.port, 'server_ip': self.storage_ip, 'server_port': self.ports['edge'] }) except Exception: StorageDriver._logger.exception( 'Error loading edge clients from {0}'.format( self.storagedriver_id)) clients.sort(key=lambda e: (e['ip'], e['port'])) return clients def _vdisks_guids(self): """ Gets the vDisk guids served by this StorageDriver. """ from ovs.dal.lists.vdisklist import VDiskList volume_ids = [] for entry in self.vpool.objectregistry_client.get_all_registrations(): if entry.node_id() == self.storagedriver_id: volume_ids.append(entry.object_id()) return VDiskList.get_in_volume_ids(volume_ids).guids def fetch_statistics(self): """ Loads statistics from this vDisk - returns unprocessed data """ # Load data from volumedriver sdstats = StorageDriverClient.EMPTY_STATISTICS() if self.storagedriver_id and self.vpool: try: sdstats = self.vpool.storagedriver_client.statistics_node( str(self.storagedriver_id), req_timeout_secs=2) except Exception as ex: StorageDriver._logger.error( 'Error loading statistics_node from {0}: {1}'.format( self.storagedriver_id, ex)) # Load volumedriver data in dictionary return VDisk.extract_statistics( sdstats, None if len(self.vpool.vdisks) == 0 else self.vpool.vdisks[0]) def _vpool_backend_info(self): """ Retrieve some additional information about the vPool to be shown in the GUI Size of the global write buffer for this Storage Driver, the accelerated backend info, connection info and caching strategy :return: Information about vPool and accelerated Backend :rtype: dict """ from ovs.dal.hybrids.diskpartition import DiskPartition from ovs.dal.hybrids.j_storagedriverpartition import StorageDriverPartition global_write_buffer = 0 for partition in self.partitions: if partition.role == DiskPartition.ROLES.WRITE and partition.sub_role == StorageDriverPartition.SUBROLE.SCO: global_write_buffer += partition.size cache_read = None cache_write = None cache_quota_fc = None cache_quota_bc = None backend_info = None connection_info = None block_cache_read = None block_cache_write = None block_cache_backend_info = None block_cache_connection_info = None metadata_key = 'backend_aa_{0}'.format(self.storagerouter_guid) if metadata_key in self.vpool.metadata: metadata = self.vpool.metadata[metadata_key] backend_info = metadata['backend_info'] connection_info = metadata['connection_info'] metadata_key = 'backend_bc_{0}'.format(self.storagerouter_guid) if metadata_key in self.vpool.metadata: metadata = self.vpool.metadata[metadata_key] block_cache_backend_info = metadata['backend_info'] block_cache_connection_info = metadata['connection_info'] if self.storagerouter_guid in self.vpool.metadata['backend'][ 'caching_info']: caching_info = self.vpool.metadata['backend']['caching_info'][ self.storagerouter_guid] cache_read = caching_info['fragment_cache_on_read'] cache_write = caching_info['fragment_cache_on_write'] cache_quota_fc = caching_info.get('quota_fc') cache_quota_bc = caching_info.get('quota_bc') block_cache_read = caching_info.get('block_cache_on_read') block_cache_write = caching_info.get('block_cache_on_write') return { 'cache_read': cache_read, 'cache_write': cache_write, 'cache_quota_fc': cache_quota_fc, 'cache_quota_bc': cache_quota_bc, 'backend_info': backend_info, 'connection_info': connection_info, 'block_cache_read': block_cache_read, 'block_cache_write': block_cache_write, 'block_cache_backend_info': block_cache_backend_info, 'block_cache_connection_info': block_cache_connection_info, 'global_write_buffer': global_write_buffer } def _cluster_node_config(self): """ Prepares a ClusterNodeConfig dict for the StorageDriver process """ from ovs.extensions.generic.configuration import Configuration rdma = Configuration.get('/ovs/framework/rdma') distance_map = {} primary_domains = [] secondary_domains = [] for junction in self.storagerouter.domains: if junction.backup is False: primary_domains.append(junction.domain_guid) else: secondary_domains.append(junction.domain_guid) for sd in self.vpool.storagedrivers: if sd.guid == self.guid: continue if len(primary_domains) == 0: distance_map[str( sd.storagedriver_id)] = StorageDriver.DISTANCES.NEAR else: distance = StorageDriver.DISTANCES.INFINITE for junction in sd.storagerouter.domains: if junction.backup is False: if junction.domain_guid in primary_domains: distance = min(distance, StorageDriver.DISTANCES.NEAR) break # We can break here since we reached the minimum distance elif junction.domain_guid in secondary_domains: distance = min(distance, StorageDriver.DISTANCES.FAR) distance_map[str(sd.storagedriver_id)] = distance return { 'vrouter_id': self.storagedriver_id, 'host': self.storage_ip, 'message_port': self.ports['management'], 'xmlrpc_host': self.cluster_ip, 'xmlrpc_port': self.ports['xmlrpc'], 'failovercache_host': self.storage_ip, 'failovercache_port': self.ports['dtl'], 'network_server_uri': '{0}://{1}:{2}'.format('rdma' if rdma else 'tcp', self.storage_ip, self.ports['edge']), 'node_distance_map': distance_map }
class AlbaBackend(DataObject): """ The AlbaBackend provides ALBA specific information """ SCALINGS = DataObject.enumerator('Scaling', ['GLOBAL', 'LOCAL']) STATUSES = DataObject.enumerator( 'Status', { 'UNKNOWN': 'unknown', 'FAILURE': 'failure', 'WARNING': 'warning', 'RUNNING': 'running' }) # lower-case values for backwards compatibility _logger = Logger('hybrids') __properties = [ Property('alba_id', str, mandatory=False, indexed=True, doc='ALBA internal identifier'), Property('scaling', SCALINGS.keys(), doc='Scaling for an ALBA Backend can be {0}'.format( ' or '.join(SCALINGS.keys()))) ] __relations = [ Relation('backend', Backend, 'alba_backend', onetoone=True, doc='Linked generic Backend') ] __dynamics = [ Dynamic('local_stack', dict, 15, locked=True), Dynamic('statistics', dict, 5, locked=True), Dynamic('ns_data', list, 60, locked=True), Dynamic('usages', dict, 60, locked=True), Dynamic('presets', list, 60, locked=True), Dynamic('available', bool, 60), Dynamic('name', str, 3600), Dynamic('osd_statistics', dict, 5, locked=True), Dynamic('linked_backend_guids', set, 30, locked=True), Dynamic('remote_stack', dict, 60, locked=True), Dynamic('local_summary', dict, 15, locked=True), Dynamic('live_status', str, 30, locked=True) ] def _local_stack(self): """ Returns a live list of all disks known to this AlbaBackend """ if self.abm_cluster is None: return {} # No ABM cluster yet, so backend not fully installed yet # Load information from node osd_statistics = self.osd_statistics def _load_live_info(_node, _storage_map): node_id = _node.node_id _storage_map[node_id] = {} for slot_id, _slot_data in _node.stack.iteritems(): # Pre-fill some info _storage_map[node_id][slot_id] = { 'osds': {}, 'name': slot_id, 'status': 'error', 'status_detail': 'unknown' } # Extend the OSD info with the usage information for osd_id, osd_data in _slot_data.get('osds', {}).iteritems(): if osd_id in osd_statistics: stats = osd_statistics[osd_id] osd_data['usage'] = { 'size': int(stats['capacity']), 'used': int(stats['disk_usage']), 'available': int(stats['capacity'] - stats['disk_usage']) } _storage_map[node_id][slot_id].update(_slot_data) threads = [] storage_map = {} for node in AlbaNodeList.get_albanodes(): thread = Thread(target=_load_live_info, args=(node, storage_map)) thread.start() threads.append(thread) for thread in threads: thread.join() return storage_map def _statistics(self): """ Returns statistics for all its asds """ data_keys = [ 'apply', 'multi_get', 'range', 'range_entries', 'statistics' ] statistics = {} for key in data_keys: statistics[key] = { 'n': 0, 'n_ps': 0, 'avg': [], 'max': [], 'min': [] } for asd in self.osds: asd_stats = asd.statistics if not asd_stats: continue for key in data_keys: statistics[key]['n'] += asd_stats[key]['n'] statistics[key]['n_ps'] += asd_stats[key]['n_ps'] statistics[key]['avg'].append(asd_stats[key]['avg']) statistics[key]['max'].append(asd_stats[key]['max']) statistics[key]['min'].append(asd_stats[key]['min']) for key in data_keys: statistics[key]['max'] = max(statistics[key]['max']) if len( statistics[key]['max']) > 0 else 0 statistics[key]['min'] = min(statistics[key]['min']) if len( statistics[key]['min']) > 0 else 0 if len(statistics[key]['avg']) > 0: statistics[key]['avg'] = sum(statistics[key]['avg']) / len( statistics[key]['avg']) else: statistics[key]['avg'] = 0 statistics['creation'] = time.time() return statistics def _ns_data(self): """ Loads namespace data """ if self.abm_cluster is None: return [] # No ABM cluster yet, so backend not fully installed yet config = Configuration.get_configuration_path( self.abm_cluster.config_location) return AlbaCLI.run(command='show-namespaces', config=config, named_params={'max': -1})[1] def _usages(self): """ Returns an overview of free space, total space and used space """ # Collect total usage usages = {'free': 0.0, 'size': 0.0, 'used': 0.0} if self.abm_cluster is None: return usages config = Configuration.get_configuration_path( self.abm_cluster.config_location) try: osds_stats = AlbaCLI.run(command='list-osds', config=config) except AlbaError: self._logger.exception('Unable to fetch OSD information') return usages for osd_stats in osds_stats: usages['size'] += osd_stats['total'] usages['used'] += osd_stats['used'] usages['free'] = usages['size'] - usages['used'] return usages def _presets(self): """ Returns the policies active on the node """ if self.abm_cluster is None: return [] # No ABM cluster yet, so backend not fully installed yet osds = {} if self.scaling != AlbaBackend.SCALINGS.GLOBAL: for node_id, slots in self.local_stack.iteritems(): osds[node_id] = 0 for slot_id, slot_data in slots.iteritems(): for osd_id, osd_data in slot_data['osds'].iteritems(): if osd_data['status'] in [ AlbaNode.OSD_STATUSES.OK, AlbaNode.OSD_STATUSES.WARNING ] and osd_data.get('claimed_by') == self.guid: osds[node_id] += 1 config = Configuration.get_configuration_path( self.abm_cluster.config_location) presets = AlbaCLI.run(command='list-presets', config=config) preset_dict = {} for preset in presets: preset_dict[preset['name']] = preset if 'in_use' not in preset: preset['in_use'] = True if 'is_default' not in preset: preset['is_default'] = False preset['is_available'] = False preset['policies'] = [ tuple(policy) for policy in preset['policies'] ] preset['policy_metadata'] = {} active_policy = None for policy in preset['policies']: is_available = False available_disks = 0 if self.scaling == AlbaBackend.SCALINGS.GLOBAL: available_disks += sum( self.local_summary['devices'].values()) if self.scaling == AlbaBackend.SCALINGS.LOCAL: available_disks += sum( min(osds[node], policy[3]) for node in osds) if available_disks >= policy[2]: if active_policy is None: active_policy = policy is_available = True preset['policy_metadata'][policy] = { 'is_active': False, 'in_use': False, 'is_available': is_available } preset['is_available'] |= is_available if active_policy is not None: preset['policy_metadata'][active_policy]['is_active'] = True for namespace in self.ns_data: if namespace['namespace']['state'] != 'active': continue policy_usage = namespace['statistics']['bucket_count'] preset = preset_dict[namespace['namespace']['preset_name']] for usage in policy_usage: used_policy = tuple( usage[0]) # Policy as reported to be "in use" for configured_policy in preset[ 'policies']: # All configured policies if used_policy[0] == configured_policy[0] and used_policy[ 1] == configured_policy[ 1] and used_policy[3] <= configured_policy[3]: preset['policy_metadata'][configured_policy][ 'in_use'] = True break for preset in presets: preset['policies'] = [str(policy) for policy in preset['policies']] for key in preset['policy_metadata'].keys(): preset['policy_metadata'][str( key)] = preset['policy_metadata'][key] del preset['policy_metadata'][key] return presets def _available(self): """ Returns True if the Backend can be used """ return self.backend.status == 'RUNNING' def _name(self): """ Returns the Backend's name """ return self.backend.name def _osd_statistics(self): """ Loads statistics from all it's asds in one call """ from ovs.dal.hybrids.albaosd import AlbaOSD statistics = {} if self.abm_cluster is None: return statistics # No ABM cluster yet, so backend not fully installed yet osd_ids = [ osd.osd_id for osd in self.osds if osd.osd_type in [AlbaOSD.OSD_TYPES.ASD, AlbaOSD.OSD_TYPES.AD] ] if len(osd_ids) == 0: return statistics try: config = Configuration.get_configuration_path( self.abm_cluster.config_location) # TODO: This will need to be changed to osd-multistatistics, see openvstorage/alba#749 raw_statistics = AlbaCLI.run( command='asd-multistatistics', config=config, named_params={'long-id': ','.join(osd_ids)}) except RuntimeError: return statistics if raw_statistics: for osd_id, stats in raw_statistics.iteritems(): if stats['success'] is True: statistics[osd_id] = stats['result'] return statistics def _linked_backend_guids(self): """ Returns a list (recursively) of all ALBA backends linked to this ALBA Backend based on the linked AlbaOSDs :return: List of ALBA Backend guids :rtype: set """ # Import here to prevent from circular references from ovs.dal.hybrids.albaosd import AlbaOSD def _load_backend_info(_connection_info, _alba_backend_guid, _exceptions): # '_exceptions' must be an immutable object to be usable outside the Thread functionality client = OVSClient.get_instance( connection_info=_connection_info, cache_store=VolatileFactory.get_client()) try: new_guids = client.get( '/alba/backends/{0}/'.format(_alba_backend_guid), params={'contents': 'linked_backend_guids'})['linked_backend_guids'] with lock: guids.update(new_guids) except HttpNotFoundException: pass # ALBA Backend has been deleted, we don't care we can't find the linked guids except HttpForbiddenException as fe: AlbaBackend._logger.exception( 'Collecting remote ALBA Backend information failed due to permission issues. {0}' .format(fe)) _exceptions.append('not_allowed') except Exception as ex: AlbaBackend._logger.exception( 'Collecting remote ALBA Backend information failed with error: {0}' .format(ex)) _exceptions.append('unknown') lock = Lock() guids = {self.guid} threads = [] exceptions = [] for osd in self.osds: if osd.osd_type == AlbaOSD.OSD_TYPES.ALBA_BACKEND and osd.metadata is not None: connection_info = osd.metadata['backend_connection_info'] alba_backend_guid = osd.metadata['backend_info']['linked_guid'] thread = Thread(target=_load_backend_info, args=(connection_info, alba_backend_guid, exceptions)) thread.start() threads.append(thread) for thread in threads: thread.join() if len(exceptions) > 0: return None # This causes the 'Link Backend' button in the GUI to become disabled return guids def _remote_stack(self): """ Live list of information about remote linked OSDs of type ALBA Backend :return: Information about all linked OSDs :rtype: dict """ # Import here to prevent from circular references from ovs.dal.hybrids.albaosd import AlbaOSD def _load_backend_info(_connection_info, _alba_backend_guid): client = OVSClient.get_instance( connection_info=_connection_info, cache_store=VolatileFactory.get_client()) return_value[_alba_backend_guid][ 'live_status'] = AlbaBackend.STATUSES.UNKNOWN try: info = client.get( '/alba/backends/{0}/'.format(_alba_backend_guid), params={'contents': 'local_summary,live_status'}) with lock: return_value[_alba_backend_guid].update( info['local_summary']) return_value[_alba_backend_guid]['live_status'] = info[ 'live_status'] except HttpNotFoundException as ex: return_value[_alba_backend_guid]['error'] = 'backend_deleted' self._logger.warning( 'AlbaBackend {0} STATUS set as FAILURE due to HttpNotFoundException: {1}' .format(self.name, ex)) return_value[_alba_backend_guid][ 'live_status'] = AlbaBackend.STATUSES.FAILURE except HttpForbiddenException: return_value[_alba_backend_guid]['error'] = 'not_allowed' except Exception as ex: return_value[_alba_backend_guid]['error'] = 'unknown' AlbaBackend._logger.exception( 'Collecting remote ALBA Backend information failed with error: {0}' .format(ex)) # Retrieve local summaries of all related OSDs of type ALBA_BACKEND lock = Lock() threads = [] return_value = {} cluster_ips = [sr.ip for sr in StorageRouterList.get_storagerouters()] for osd in self.osds: if osd.osd_type == AlbaOSD.OSD_TYPES.ALBA_BACKEND and osd.metadata is not None: backend_info = osd.metadata['backend_info'] connection_info = osd.metadata['backend_connection_info'] connection_host = connection_info['host'] alba_backend_guid = backend_info['linked_guid'] return_value[alba_backend_guid] = { 'name': backend_info['linked_name'], 'error': '', 'domain': None if osd.domain is None else { 'guid': osd.domain_guid, 'name': osd.domain.name }, 'preset': backend_info['linked_preset'], 'osd_id': backend_info['linked_alba_id'], 'local_ip': connection_host in cluster_ips, 'remote_host': connection_host } thread = Thread(target=_load_backend_info, args=(connection_info, alba_backend_guid)) thread.start() threads.append(thread) for thread in threads: thread.join() return return_value def _local_summary(self): """ A local summary for an ALBA Backend containing information used to show in the GLOBAL ALBA Backend detail page :return: Information about used size, devices, name, scaling :rtype: dict """ usage_info = {'size': 0, 'used': 0} device_info = {'red': 0, 'green': 0, 'orange': 0, 'gray': 0} return_value = { 'name': self.name, 'sizes': usage_info, 'devices': device_info, 'scaling': self.scaling, 'domain_info': dict((backend_domain.domain_guid, backend_domain.domain.name) for backend_domain in self.backend.domains), 'backend_guid': self.backend.guid } # Calculate device information if self.scaling != AlbaBackend.SCALINGS.GLOBAL: for node_values in self.local_stack.itervalues(): for slot_values in node_values.itervalues(): for osd_info in slot_values.get('osds', {}).itervalues(): if self.guid == osd_info.get('claimed_by'): status = osd_info.get('status', 'unknown') if status == AlbaNode.OSD_STATUSES.OK: device_info['green'] += 1 elif status == AlbaNode.OSD_STATUSES.WARNING: device_info['orange'] += 1 elif status == AlbaNode.OSD_STATUSES.ERROR: device_info['red'] += 1 elif status == AlbaNode.OSD_STATUSES.UNKNOWN: device_info['gray'] += 1 # Calculate used and total size usage_info = self.usages if self.scaling != AlbaBackend.SCALINGS.LOCAL: for backend_values in self.remote_stack.itervalues(): for key, value in backend_values.get('sizes', {}).iteritems(): usage_info[key] += value devices = backend_values.get('devices') if devices is None: continue if devices['red'] > 0: device_info['red'] += 1 elif devices['orange'] > 0: device_info['orange'] += 1 elif devices['green'] > 0: device_info['green'] += 1 elif devices.get('gray', 0) > 0: device_info['gray'] += 1 return return_value def _live_status(self): """ Retrieve the live status of the ALBA Backend to be displayed in the 'Backends' page in the GUI based on: - Maintenance agents presence - Maintenance agents status - Disk statuses :return: Status as reported by the plugin :rtype: str """ if self.backend.status == Backend.STATUSES.INSTALLING: return 'installing' if self.backend.status == Backend.STATUSES.DELETING: return 'deleting' # Verify failed disks devices = self.local_summary['devices'] if devices['red'] > 0: self._logger.warning( 'AlbaBackend {0} STATUS set to FAILURE due to {1} failed disks' .format(self.name, devices['red'])) return AlbaBackend.STATUSES.FAILURE # Verify remote OSDs remote_errors = False linked_backend_warning = False for remote_info in self.remote_stack.itervalues(): if remote_info['error'] == 'unknown' or remote_info[ 'live_status'] == AlbaBackend.STATUSES.FAILURE: message = None if remote_info['error'] == 'unknown': message = 'unknown remote error info' elif remote_info[ 'live_status'] == AlbaBackend.STATUSES.FAILURE: message = 'FAILURE in live_status' self._logger.warning( 'AlbaBackend {0} STATUS set to FAILURE due to OSD {1}: {2} ' .format(self.name, remote_info['name'], message)) return AlbaBackend.STATUSES.FAILURE if remote_info['error'] == 'not_allowed': remote_errors = True if remote_info['live_status'] == AlbaBackend.STATUSES.WARNING: linked_backend_warning = True # Retrieve ASD and maintenance service information def _get_node_information(_node): if _node not in nodes_used_by_this_backend: for slot_info in _node.stack.itervalues(): for osd_info in slot_info['osds'].itervalues(): if osd_info['claimed_by'] == self.guid: nodes_used_by_this_backend.add(_node) break if _node in nodes_used_by_this_backend: break try: services = _node.maintenance_services if self.name in services: for _service_name, _service_status in services[self.name]: services_for_this_backend[_service_name] = _node service_states[_service_name] = _service_status if _node.node_id not in services_per_node: services_per_node[_node.node_id] = 0 services_per_node[_node.node_id] += 1 except Exception: pass services_for_this_backend = {} services_per_node = {} service_states = {} nodes_used_by_this_backend = set() threads = [] all_nodes = AlbaNodeList.get_albanodes() for node in all_nodes: thread = Thread(target=_get_node_information, args=(node, )) thread.start() threads.append(thread) for thread in threads: thread.join() zero_services = False if len(services_for_this_backend) == 0: if len(all_nodes) > 0: AlbaBackend._logger.error( 'AlbaBackend {0} STATUS set to FAILURE due to no maintenance services' .format(self.name)) return AlbaBackend.STATUSES.FAILURE zero_services = True # Verify maintenance agents status for service_name, node in services_for_this_backend.iteritems(): try: service_status = service_states.get(service_name) if service_status is None or service_status != 'active': AlbaBackend._logger.error( 'AlbaBackend {0} STATUS set to FAILURE due to non-running maintenance service(s): {1}' .format(self.name, service_name)) return AlbaBackend.STATUSES.FAILURE except Exception: pass # Verify maintenance agents presence layout_key = '/ovs/alba/backends/{0}/maintenance/agents_layout'.format( self.guid) layout = None if Configuration.exists(layout_key): layout = Configuration.get(layout_key) if not isinstance(layout, list) or not any( node.node_id for node in all_nodes if node.node_id in layout): layout = None if layout is None: config_key = '/ovs/alba/backends/{0}/maintenance/nr_of_agents'.format( self.guid) expected_services = 3 if Configuration.exists(config_key): expected_services = Configuration.get(config_key) expected_services = min(expected_services, len(nodes_used_by_this_backend)) or 1 if len(services_for_this_backend) < expected_services: AlbaBackend._logger.warning( 'Live status for backend {0} is "warning": insufficient maintenance services' .format(self.name)) return AlbaBackend.STATUSES.WARNING else: for node_id in layout: if node_id not in services_per_node: AlbaBackend._logger.warning( 'Live status for backend {0} is "warning": invalid maintenance service layout' .format(self.name)) return AlbaBackend.STATUSES.WARNING # Verify local and remote OSDs if devices['orange'] > 0: AlbaBackend._logger.warning( 'Live status for backend {0} is "warning": one or more OSDs in warning' .format(self.name)) return AlbaBackend.STATUSES.WARNING if remote_errors is True or linked_backend_warning is True: AlbaBackend._logger.warning( 'Live status for backend {0} is "warning": errors/warnings on remote stack' .format(self.name)) return AlbaBackend.STATUSES.WARNING if zero_services is True: AlbaBackend._logger.warning( 'Live status for backend {0} is "warning": no maintenance services' .format(self.name)) return AlbaBackend.STATUSES.WARNING return AlbaBackend.STATUSES.RUNNING
class StorageRouter(DataObject): """ A StorageRouter represents the Open vStorage software stack, any (v)machine on which it is installed """ _logger = Logger('hybrids') __properties = [Property('name', str, unique=True, doc='Name of the Storage Router.'), Property('description', str, mandatory=False, doc='Description of the Storage Router.'), Property('machine_id', str, unique=True, mandatory=False, indexed=True, doc='The hardware identifier of the Storage Router'), Property('ip', str, unique=True, indexed=True, doc='IP Address of the Storage Router, if available'), Property('heartbeats', dict, default={}, doc='Heartbeat information of various monitors'), Property('node_type', ['MASTER', 'EXTRA'], default='EXTRA', doc='Indicates the node\'s type'), Property('rdma_capable', bool, doc='Is this Storage Router RDMA capable'), Property('last_heartbeat', float, mandatory=False, doc='When was the last (external) heartbeat send/received'), Property('package_information', dict, mandatory=False, default={}, doc='Information about installed packages and potential available new versions')] __relations = [] __dynamics = [Dynamic('statistics', dict, 4), Dynamic('vpools_guids', list, 15), Dynamic('vdisks_guids', list, 15), Dynamic('status', str, 10), Dynamic('partition_config', dict, 3600), Dynamic('regular_domains', list, 60), Dynamic('recovery_domains', list, 60), Dynamic('features', dict, 3600)] ALBA_FEATURES = DataObject.enumerator('Alba_features', {'CACHE_QUOTA': 'cache-quota', 'BLOCK_CACHE': 'block-cache', 'AUTO_CLEANUP': 'auto-cleanup-deleted-namespaces'}) STORAGEDRIVER_FEATURES = DataObject.enumerator('Storagedriver_features', {'DIRECTORY_UNLINK': 'directory_unlink'}) def _statistics(self, dynamic): """ Aggregates the Statistics (IOPS, Bandwidth, ...) of each vDisk. """ from ovs.dal.hybrids.vdisk import VDisk statistics = {} for storagedriver in self.storagedrivers: for key, value in storagedriver.fetch_statistics().iteritems(): if isinstance(value, dict): if key not in statistics: statistics[key] = {} for subkey, subvalue in value.iteritems(): if subkey not in statistics[key]: statistics[key][subkey] = 0 statistics[key][subkey] += subvalue else: if key not in statistics: statistics[key] = 0 statistics[key] += value statistics['timestamp'] = time.time() VDisk.calculate_delta(self._key, dynamic, statistics) return statistics def _vdisks_guids(self): """ Gets the vDisk guids served by this StorageRouter. """ from ovs.dal.lists.vdisklist import VDiskList volume_ids = [] vpools = set() storagedriver_ids = [] for storagedriver in self.storagedrivers: vpools.add(storagedriver.vpool) storagedriver_ids.append(storagedriver.storagedriver_id) for vpool in vpools: for entry in vpool.objectregistry_client.get_all_registrations(): if entry.node_id() in storagedriver_ids: volume_ids.append(entry.object_id()) return VDiskList.get_in_volume_ids(volume_ids).guids def _vpools_guids(self): """ Gets the vPool guids linked to this StorageRouter (trough StorageDriver) """ vpool_guids = set() for storagedriver in self.storagedrivers: vpool_guids.add(storagedriver.vpool_guid) return list(vpool_guids) def _status(self): """ Calculates the current Storage Router status based on various heartbeats """ pointer = 0 statusses = ['OK', 'WARNING', 'FAILURE'] current_time = time.time() if self.heartbeats is not None: process_delay = abs(self.heartbeats.get('process', 0) - current_time) if process_delay > 60 * 5: pointer = max(pointer, 2) else: delay = abs(self.heartbeats.get('celery', 0) - current_time) if delay > 60 * 5: pointer = max(pointer, 2) elif delay > 60 * 2: pointer = max(pointer, 1) for disk in self.disks: if disk.state == 'MISSING': pointer = max(pointer, 2) for partition in disk.partitions: if partition.state == 'MISSING': pointer = max(pointer, 2) return statusses[pointer] def _partition_config(self): """ Returns a dict with all partition information of a given storagerouter """ from ovs.dal.hybrids.diskpartition import DiskPartition dataset = dict((role, []) for role in DiskPartition.ROLES) for disk in self.disks: for partition in disk.partitions: for role in partition.roles: dataset[role].append(partition.guid) return dataset def _regular_domains(self): """ Returns a list of domain guids with backup flag False :return: List of domain guids """ return [junction.domain_guid for junction in self.domains if junction.backup is False] def _recovery_domains(self): """ Returns a list of domain guids with backup flag True :return: List of domain guids """ return [junction.domain_guid for junction in self.domains if junction.backup is True] def _features(self): """ Returns information about installed/available features :return: Dictionary containing edition and available features per component """ try: client = SSHClient(self, username='******') enterprise_regex = re.compile('^(?P<edition>ee-)?(?P<version>.*)$') version = client.run(command=PackageFactory.VERSION_CMD_SD, allow_insecure=True, allow_nonzero=True) volumedriver_version = enterprise_regex.match(version).groupdict() volumedriver_edition = PackageFactory.EDITION_ENTERPRISE if volumedriver_version['edition'] == 'ee-' else PackageFactory.EDITION_COMMUNITY volumedriver_version_lv = LooseVersion(volumedriver_version['version']) volumedriver_features = [feature for feature, version in {self.STORAGEDRIVER_FEATURES.DIRECTORY_UNLINK: ('6.15.0', None)}.iteritems() if volumedriver_version_lv >= LooseVersion(version[0]) and (version[1] is None or version[1] == volumedriver_edition)] version = client.run(command=PackageFactory.VERSION_CMD_ALBA, allow_insecure=True, allow_nonzero=True) alba_version = enterprise_regex.match(version).groupdict() alba_edition = PackageFactory.EDITION_ENTERPRISE if alba_version['edition'] == 'ee-' else PackageFactory.EDITION_COMMUNITY alba_version_lv = LooseVersion(alba_version['version']) alba_features = [feature for feature, version in {self.ALBA_FEATURES.CACHE_QUOTA: ('1.4.4', PackageFactory.EDITION_ENTERPRISE), self.ALBA_FEATURES.BLOCK_CACHE: ('1.4.0', PackageFactory.EDITION_ENTERPRISE), self.ALBA_FEATURES.AUTO_CLEANUP: ('1.5.27', PackageFactory.EDITION_ENTERPRISE)}.iteritems() if alba_version_lv >= LooseVersion(version[0]) and (version[1] is None or version[1] == alba_edition)] return {'volumedriver': {'edition': volumedriver_edition, 'features': volumedriver_features}, 'alba': {'edition': alba_edition, 'features': alba_features}} except UnableToConnectException: pass except Exception: StorageRouter._logger.exception('Could not load feature information') return {}