Exemple #1
0
class ServiceType(DataObject):
    """
    A ServiceType represents some kind of service that needs to be managed by the framework.
    """
    SERVICE_TYPES = DataObject.enumerator(
        'Service_type', {
            'NS_MGR': 'NamespaceManager',
            'ARAKOON': 'Arakoon',
            'ALBA_MGR': 'AlbaManager',
            'MD_SERVER': 'MetadataServer',
            'ALBA_PROXY': 'AlbaProxy',
            'ALBA_S3_TRANSACTION': 'AlbaS3Transaction'
        })
    ARAKOON_CLUSTER_TYPES = DataObject.enumerator(
        'Arakoon_cluster_type', ['ABM', 'FWK', 'NSM', 'SD', 'CFG'])

    __properties = [
        Property('name',
                 str,
                 unique=True,
                 indexed=True,
                 doc='Name of the ServiceType.')
    ]
    __relations = []
    __dynamics = []
Exemple #2
0
 def __init__(self, *args, **kwargs):
     """
     Initializes a vDisk, setting up its additional helpers
     """
     DataObject.__init__(self, *args, **kwargs)
     self._frozen = False
     self._storagedriver_client = None
     self._frozen = True
Exemple #3
0
 def __init__(self, *args, **kwargs):
     """
     Initializes a vDisk, setting up its additional helpers
     """
     DataObject.__init__(self, *args, **kwargs)
     self._frozen = False
     self._storagedriver_client = None
     self._frozen = True
 def __init__(self, *args, **kwargs):
     """
     Initializes an AlbaNode, setting up its additional helpers
     """
     DataObject.__init__(self, *args, **kwargs)
     self._frozen = False
     self.client = ASDManagerClient(self)
     self._frozen = True
 def __init__(self, *args, **kwargs):
     """
     Initializes a MDSService, setting up its additional helpers
     """
     DataObject.__init__(self, *args, **kwargs)
     self._frozen = False
     self.metadataserver_client = None
     self._frozen = True
     self.reload_client()
 def __init__(self, *args, **kwargs):
     """
     Initializes an AlbaNode, setting up its additional helpers
     """
     DataObject.__init__(self, *args, **kwargs)
     self._frozen = False
     self.client = None
     if os.environ.get('RUNNING_UNITTESTS') == 'True':
         self.client = ManagerClientMockup(self)
     else:
         if self.type not in self.CLIENTS:
             raise NotImplementedError('Type {0} is not implemented'.format(
                 self.type))
         self.client = self.CLIENTS[self.type](self)
     self._frozen = True
class StorageDriverPartition(DataObject):
    """
    The StorageDriverPartition class represents the junction table between StorageDriver and Partitions.
    Examples:
    * my_storagedriver.partitions[0].partition
    * my_partition.storagedrivers[0].storagedriver
    """
    SUBROLE = DataObject.enumerator('Role', ['FCACHE', 'FD', 'MD', 'MDS', 'SCO', 'TLOG'])

    __properties = [Property('number', int, doc='Number of the service in case there is more than one'),
                    Property('size', long, mandatory=False, doc='Size in bytes configured for use'),
                    Property('role', DiskPartition.ROLES.keys(), doc='Role of the partition'),
                    Property('sub_role', SUBROLE.keys(), mandatory=False, doc='Sub-role of this StorageDriverPartition')]
    __relations = [Relation('partition', DiskPartition, 'storagedrivers'),
                   Relation('storagedriver', StorageDriver, 'partitions'),
                   Relation('mds_service', MDSService, 'storagedriver_partitions', mandatory=False)]
    __dynamics = [Dynamic('folder', str, 3600),
                  Dynamic('path', str, 3600)]

    def _folder(self):
        """
        Folder on the mountpoint
        """
        if self.sub_role:
            return '{0}_{1}_{2}_{3}'.format(self.storagedriver.vpool.name, self.role.lower(), self.sub_role.lower(), self.number)
        return '{0}_{1}_{2}'.format(self.storagedriver.vpool.name, self.role.lower(), self.number)

    def _path(self):
        """
        Actual path on filesystem, including mountpoint
        """
        return '{0}/{1}'.format(self.partition.folder, self.folder)
Exemple #8
0
class DiskPartition(DataObject):
    """
    The DiskPartition class represents a partition on a physical Disk
    """
    ROLES = DataObject.enumerator('Role',
                                  ['DB', 'READ', 'SCRUB', 'WRITE', 'BACKEND'])
    VIRTUAL_STORAGE_LOCATION = '/mnt/storage'

    __properties = [
        Property('id', str, doc='The partition identifier'),
        Property('filesystem',
                 str,
                 mandatory=False,
                 doc='The filesystem used on the partition'),
        Property('state', ['OK', 'FAILURE', 'MISSING'],
                 doc='State of the partition'),
        Property('inode', int, mandatory=False, doc='The partitions inode'),
        Property('offset', int, doc='Offset of the partition'),
        Property('size', int, doc='Size of the partition'),
        Property('mountpoint',
                 str,
                 mandatory=False,
                 doc='Mountpoint of the partition, None if not mounted'),
        Property('path', str, doc='The partition path'),
        Property('roles', list, default=[], doc='A list of claimed roles')
    ]
    __relations = [Relation('disk', Disk, 'partitions')]
    __dynamics = [Dynamic('usage', list, 120), Dynamic('folder', str, 3600)]

    def _usage(self):
        """
        A dict representing this partition's usage in a more user-friendly form
        """
        dataset = []
        for junction in self.storagedrivers:
            dataset.append({
                'type': 'storagedriver',
                'role': junction.role,
                'size': junction.size,
                'relation': junction.storagedriver_guid,
                'folder': junction.folder
            })
        return dataset

    def _folder(self):
        """
        Corrected mountpoint
        """
        return DiskPartition.VIRTUAL_STORAGE_LOCATION if self.mountpoint == '/' else self.mountpoint
Exemple #9
0
class Disk(DataObject):
    """
    The Disk class represents physical disks that are available to a storagerouter (technically, they can be
    virtual disks, but from the OS (and framework) point of view, they're considered physical)
    """
    STATES = DataObject.enumerator('state', ['OK', 'FAILURE', 'MISSING'])

    __properties = [Property('aliases', list, doc='The device aliases'),
                    Property('model', str, mandatory=False, doc='The disks model'),
                    Property('state', STATES.keys(), doc='The state of the disk'),
                    Property('name', str, doc='Name of the disk (e.g. sda)'),
                    Property('size', int, doc='Size of the disk, in bytes'),
                    Property('is_ssd', bool, doc='The type of the disk')]
    __relations = [Relation('storagerouter', StorageRouter, 'disks')]
    __dynamics = []
Exemple #10
0
class AlbaNodeCluster(DataObject):
    """
    The AlbaNodeCluster represents a group of AlbaNodes which will function as one
    The idea behind the cluster is that when one AlbaNode would fail, another can take over
    The information within the AlbaNodes would be the same (same stack information)
    This cluster contains the same information as an AlbaNode for representation purposes
    """
    CLUSTER_TYPES = DataObject.enumerator('ClusterType',
                                          ['ASD', 'GENERIC', 'MIXED'])

    _logger = Logger('hybrids')
    __properties = [
        Property('name',
                 str,
                 mandatory=False,
                 doc='Optional name for the AlbaNode')
    ]
    __dynamics = [
        Dynamic('type', CLUSTER_TYPES.keys(), 3600),
        Dynamic('ips', list, 3600),
        Dynamic('cluster_metadata', dict, 3600),
        Dynamic('local_summary', dict, 60),
        Dynamic('stack', dict, 15, locked=True),
        Dynamic('maintenance_services', dict, 30, locked=True),
        Dynamic('supported_osd_types', list, 3600),
        Dynamic('read_only_mode', bool, 60)
    ]

    def _type(self):
        """
        Retrieve the type of the cluster
        :return: Type of the cluster
        :rtype: str
        """
        node_type = None
        for alba_node in self.alba_nodes:
            if node_type is None:
                node_type = alba_node.type
                continue
            if alba_node.type != node_type:  # Should be blocked by the API. This type is currently not supported
                node_type = self.CLUSTER_TYPES.MIXED
                break
        return node_type or self.CLUSTER_TYPES.ASD  # Default to ASD

    def _cluster_metadata(self):
        """
        Returns a set of metadata hinting on how the cluster should be used
        The GUI/API can adapt based on this information
        """
        cluster_metadata = {
            'fill': False,  # Prepare Slot for future usage
            'fill_add': False,  # OSDs will added and claimed right away
            'clear': False
        }  # Indicates whether OSDs can be removed from ALBA Node / Slot
        if self.type == self.CLUSTER_TYPES.ASD:
            cluster_metadata.update({
                'fill': True,
                'fill_metadata': {
                    'count': 'integer'
                },
                'clear': True
            })
        elif self.type == self.CLUSTER_TYPES.GENERIC:
            cluster_metadata.update({
                'fill_add': True,
                'fill_add_metadata': {
                    'osd_type': 'osd_type',
                    'ips': 'list_of_ip',
                    'port': 'port'
                },
                'clear': True
            })
        # Do nothing in when the type is mixed as nothing is supported
        return cluster_metadata

    def _ips(self):
        """
        Returns the IPs of the nodes
        :return: List of lists with IPs of all linked Nodes
        :rtype: list[list[str]]
        """
        ips = []
        for alba_node in self.alba_nodes:
            ips.append(alba_node.ips)
        return ips

    def _maintenance_services(self):
        """
        Returns all maintenance services on this node, grouped by backend name
        """
        services = {}
        for alba_node in self.alba_nodes:
            services[alba_node.node_id] = alba_node.maintenance_services

    def _stack(self):
        """
        Returns an overview of this node's storage stack
        """
        stack = {}
        for alba_node in self.alba_nodes:
            stack[alba_node.node_id] = alba_node.stack
        # @Todo collapse information together based on active/passive
        # @todo Do not collapse on device both rother on slot id (which is an alias that should match)
        return stack

    def _supported_osd_types(self):
        """
        Returns a list of all supported OSD types
        """
        from ovs.dal.hybrids.albaosd import AlbaOSD
        if self.type == self.CLUSTER_TYPES.GENERIC:
            return [AlbaOSD.OSD_TYPES.ASD, AlbaOSD.OSD_TYPES.AD]
        if self.type == self.CLUSTER_TYPES.NODE_TYPES.ASD:
            return [AlbaOSD.OSD_TYPES.ASD]
        else:  # Mixed type
            return [AlbaOSD.OSD_TYPES.ASD, AlbaOSD.OSD_TYPES.AD]

    def _read_only_mode(self):
        """
        Indicates whether the ALBA Node can be used for OSD manipulation
        If the version on the ALBA Node is lower than a specific version required by the framework, the ALBA Node becomes read only,
        this means, that actions such as creating, restarting, deleting OSDs becomes impossible until the node's software has been updated
        :return: True if the ALBA Node should be read only, False otherwise
        :rtype: bool
        """
        # The whole cluster should be read-only as not all actions can be mirrored
        return any(alba_node.read_only_mode for alba_node in self.alba_nodes)

    def _local_summary(self):
        """
        Return a summary of the OSDs based on their state
        * Ok -> green
        * WARNING -> orange
        * ERROR -> red
        * UNKNOWN -> gray
        The summary will contain a list of dicts with guid, osd_id and claimed_by
        eg:
        {'red': [{osd_id: 1, claimed_by: alba_backend_guid1}],
         'green': [{osd_id: 2, claimed_by: None}],
          ...}
        :return: Summary of the OSDs filtered by status (which are represented by color)
        """
        local_summary = {}
        for alba_node in self.alba_nodes:
            local_summary[alba_node.node_id] = alba_node.local_summary
        return local_summary
class AlbaOSD(DataObject):
    """
    The AlbaOSD represents a claimed ASD or an AlbaBackend
    """
    OSD_TYPES = DataObject.enumerator('OSDType', ['ASD', 'ALBA_BACKEND', 'AD', 'S3'])

    __properties = [Property('osd_id', str, unique=True, doc='OSD identifier'),
                    Property('osd_type', OSD_TYPES.keys(), doc='Type of OSD (ASD, ALBA_BACKEND)'),
                    Property('ips', list, mandatory=False, doc='List of IP addresses on which the OSD is exposed'),
                    Property('port', int, mandatory=False, doc='Port on which the OSD process is listening'),
                    Property('metadata', dict, mandatory=False, doc='Additional information about this OSD, such as connection information (if OSD is an ALBA backend'),
                    Property('slot_id', str, indexed=True, mandatory=False, doc='A pointer towards a certain slot. Will be used to map OSDs into container')]
    __relations = [Relation('alba_backend', AlbaBackend, 'osds', doc='The AlbaBackend that claimed the OSD'),
                   Relation('alba_node', AlbaNode, 'osds', mandatory=False, doc='The Alba Node to which the OSD belongs'),
                   Relation('domain', Domain, 'osds', mandatory=False, doc='The Domain in which the OSD resides')]
    __dynamics = [Dynamic('statistics', dict, 5, locked=True),
                  Dynamic('stack_info', dict, 5)]

    def _statistics(self, dynamic):
        """
        Loads statistics from the ASD
        """
        data_keys = {'apply': ['Apply', 'Apply2'],
                     'multi_get': ['MultiGet', 'MultiGet2'],
                     'range': ['Range'],
                     'range_entries': ['RangeEntries'],
                     'statistics': ['Statistics']}
        volatile = VolatileFactory.get_client()
        prev_key = '{0}_{1}'.format(self._key, 'statistics_previous')
        previous_stats = volatile.get(prev_key, default={})
        try:
            all_statistics = self.alba_backend.osd_statistics
            if self.osd_id not in all_statistics:
                return {}
            data = all_statistics[self.osd_id]
            statistics = {'timestamp': time.time()}
            delta = statistics['timestamp'] - previous_stats.get('timestamp', statistics['timestamp'])
            for key, sources in data_keys.iteritems():
                if key not in statistics:
                    statistics[key] = {'n': 0, 'max': [], 'min': [], 'avg': []}
                for source in sources:
                    if source in data:
                        statistics[key]['n'] += data[source]['n']
                        statistics[key]['max'].append(data[source]['max'])
                        statistics[key]['min'].append(data[source]['min'])
                        statistics[key]['avg'].append(data[source]['avg'] * data[source]['n'])
                statistics[key]['max'] = max(statistics[key]['max']) if len(statistics[key]['max']) > 0 else 0
                statistics[key]['min'] = min(statistics[key]['min']) if len(statistics[key]['min']) > 0 else 0
                if statistics[key]['n'] > 0:
                    statistics[key]['avg'] = sum(statistics[key]['avg']) / float(statistics[key]['n'])
                else:
                    statistics[key]['avg'] = 0
                if key in previous_stats:
                    if delta < 0:
                        statistics[key]['n_ps'] = 0
                    elif delta == 0:
                        statistics[key]['n_ps'] = previous_stats[key].get('n_ps', 0)
                    else:
                        statistics[key]['n_ps'] = max(0, (statistics[key]['n'] - previous_stats[key]['n']) / delta)
                else:
                    statistics[key]['n_ps'] = 0
            volatile.set(prev_key, statistics, dynamic.timeout * 10)
            return statistics
        except Exception:
            # This might fail every now and then, e.g. on disk removal. Let's ignore for now.
            return {}

    def _stack_info(self):
        """
        Returns summarized properties for adding to the storage stacks
        """
        return {'osd_id': self.osd_id,
                'type': self.osd_type,
                'ips': self.ips,
                'port': self.port,
                'metadata': self.metadata,
                'claimed_by': self.alba_backend_guid}
Exemple #12
0
class Backend(DataObject):
    """
    A Backend represents an instance of the supported backend types that has been setup with the OVS GUI
    """
    STATUSES = DataObject.enumerator(
        'Status', ['INSTALLING', 'RUNNING', 'FAILURE', 'WARNING', 'DELETING'])

    __properties = [
        Property('name', str, unique=True, doc='Name of the Backend.'),
        Property('status',
                 STATUSES.keys(),
                 default='INSTALLING',
                 doc='State of the backend')
    ]
    __relations = [
        Relation('backend_type',
                 BackendType,
                 'backends',
                 doc='Type of the backend.')
    ]
    __dynamics = [
        Dynamic('linked_guid', str, 3600),
        Dynamic('available', bool, 60),
        Dynamic('regular_domains', list, 60),
        Dynamic('access_rights', dict, 3600),
        Dynamic('live_status', str, 30)
    ]

    def _linked_guid(self):
        """
        Returns the GUID of the detail object that's linked to this particular backend. This depends on the backend type.
        This requires that the backlink from that object to this object is named <backend_type>_backend and is a
        one-to-one relation
        """
        if self.backend_type.has_plugin is False:
            return None
        return getattr(self, '{0}_backend_guid'.format(self.backend_type.code))

    def _available(self):
        """
        Returns True if the backend can be used
        """
        if self.backend_type.has_plugin is False:
            return False
        linked_backend = getattr(self,
                                 '{0}_backend'.format(self.backend_type.code))
        if linked_backend is not None:
            return linked_backend.available
        return False

    def _regular_domains(self):
        """
        Returns a list of domain guids
        :return: List of domain guids
        """
        return [junction.domain_guid for junction in self.domains]

    def _access_rights(self):
        """
        A condensed extract from the user_rights and client_rights
        :return: dict
        """
        data = {'users': {}, 'clients': {}}
        for user_right in self.user_rights:
            data['users'][user_right.user_guid] = user_right.grant
        for client_right in self.client_rights:
            data['clients'][client_right.client_guid] = client_right.grant
        return data

    def _live_status(self):
        """
        Retrieve the actual status from the Backend
        :return: Status reported by the plugin
        """
        if self.backend_type.has_plugin is False:
            return 'running'

        linked_backend = getattr(self,
                                 '{0}_backend'.format(self.backend_type.code))
        if linked_backend is not None:
            return linked_backend.live_status
        return 'running'
class VolumedriverHealthCheck(object):
    """
    A healthcheck for the volumedriver components
    """
    MODULE = 'volumedriver'
    LOCAL_ID = System.get_my_machine_id()
    LOCAL_SR = System.get_my_storagerouter()
    VDISK_CHECK_SIZE = 1024 ** 3  # 1GB in bytes
    VDISK_HALTED_STATES = DataObject.enumerator('Halted_status', ['HALTED', 'FENCED'])
    VDISK_TIMEOUT_BEFORE_DELETE = 0.5
    # Only used to check status of a fenced volume. This should not be used to link a status of a non-halted/fenced volume
    FENCED_HALTED_STATUS_MAP = {'max_redirect': {'status': VDisk.STATUSES.NON_RUNNING,
                                                 'severity': 'failure',
                                                 'halted': ('These volumes are not running: {0}', ErrorCodes.volume_max_redirect),
                                                 'fenced': ('These volumes are fenced but not running on another node: {0}', ErrorCodes.volume_fenced_max_redirect)},
                                'halted': {'status': VDisk.STATUSES.HALTED,
                                           'severity': 'failure',
                                           'halted': ('These volumes are halted: {0}', ErrorCodes.volume_halted),
                                           'fenced': ('These volumes are fenced and but halted on another node: {0}', ErrorCodes.volume_fenced_halted)},
                                'connection_fail': {'status': 'UNKNOWN',
                                                    'severity': 'failure',
                                                    'halted': ('These volumes experienced a connectivity/timeout problem: {0}', ErrorCodes.voldrv_connection_problem),
                                                    'fenced': ('These volumes are fenced but experienced a connectivity/timeout problem on another node: {0}', ErrorCodes.voldrv_connection_problem)},
                                'ok': {'status': VDisk.STATUSES.RUNNING,
                                       'severity': 'failure',
                                       'halted': ('These volumes are running: {0}', ErrorCodes.volume_ok),
                                       'fenced': ('These volumes are fenced but running on another node: {0}', ErrorCodes.volume_fenced_ok)},
                                'not_found': {'status': 'NOT_FOUND',
                                              'severity': 'warning',
                                              'halted': ('These volumes could not be queried for information: {0}', ErrorCodes.volume_not_found),
                                              'fenced': ('These volumes are fenced but could not be queried for information on another node: {0}', ErrorCodes.volume_fenced_not_found)}}

    logger = Logger('healthcheck-ovs_volumedriver')

    @staticmethod
    @expose_to_cli(MODULE, 'dtl-test', HealthCheckCLI.ADDON_TYPE,
                   help='Verify that all VDisks their DTL is properly running',
                   short_help='Test if DTL is properly running')
    def check_dtl(result_handler):
        """
        Checks the dtl for all vdisks on the local node
        :param result_handler: logging object
        :type result_handler: ovs.extensions.healthcheck.result.HCResults
        :return: None
        :rtype: NoneType
        """
        # Fetch vdisks hosted on this machine
        local_sr = System.get_my_storagerouter()
        if len(local_sr.vdisks_guids) == 0:
            return result_handler.skip('No VDisks present in cluster.')
        for vdisk_guid in local_sr.vdisks_guids:
            vdisk = VDisk(vdisk_guid)
            vdisk.invalidate_dynamics(['dtl_status', 'info'])
            if vdisk.dtl_status == 'ok_standalone' or vdisk.dtl_status == 'disabled':
                result_handler.success('VDisk {0}s DTL is disabled'.format(vdisk.name), code=ErrorCodes.volume_dtl_standalone)
            elif vdisk.dtl_status == 'ok_sync':
                result_handler.success('VDisk {0}s DTL is enabled and running.'.format(vdisk.name), code=ErrorCodes.volume_dtl_ok)
            elif vdisk.dtl_status == 'degraded':
                result_handler.warning('VDisk {0}s DTL is degraded.'.format(vdisk.name), code=ErrorCodes.volume_dtl_degraded)
            elif vdisk.dtl_status == 'checkup_required':
                result_handler.warning('VDisk {0}s DTL should be configured.'.format(vdisk.name), code=ErrorCodes.volume_dtl_checkup_required)
            elif vdisk.dtl_status == 'catch_up':
                result_handler.warning('VDisk {0}s DTL is enabled but still syncing.'.format(vdisk.name), code=ErrorCodes.volume_dtl_catch_up)
            else:
                result_handler.warning('VDisk {0}s DTL has an unknown status: {1}.'.format(vdisk.name, vdisk.dtl_status), code=ErrorCodes.volume_dtl_unknown)

    @staticmethod
    @timeout_decorator.timeout(30)
    def _check_volumedriver(vdisk_name, storagedriver_guid, logger, vdisk_size=VDISK_CHECK_SIZE):
        """
        Checks if the volumedriver can create a new vdisk
        :param vdisk_name: name of a vdisk (e.g. test.raw)
        :type vdisk_name: str
        :param storagedriver_guid: guid of a storagedriver
        :type storagedriver_guid: str
        :param vdisk_size: size of the volume in bytes (e.g. 10737418240 is 10GB in bytes)
        :type vdisk_size: int
        :param logger: logger instance
        :type logger: ovs.extensions.healthcheck.result.HCResults
        :return: True if succeeds
        :rtype: bool
        """
        try:
            VDiskController.create_new(vdisk_name, vdisk_size, storagedriver_guid)
        except FileExistsException:
            # can be ignored until fixed in framework
            # https://github.com/openvstorage/framework/issues/1247
            return True
        except Exception as ex:
            logger.failure('Creation of the vdisk failed. Got {0}'.format(str(ex)))
            return False
        return True

    @staticmethod
    @timeout_decorator.timeout(30)
    def _check_volumedriver_remove(vpool_name, vdisk_name, present=True):
        """
        Remove a vdisk from a vpool
        :param vdisk_name: name of a vdisk (e.g. test.raw)
        :type vdisk_name: str
        :param vpool_name: name of a vpool
        :type vpool_name: str
        :param present: should the disk be present?
        :type present: bool
        :return: True if disk is not present anymore
        :rtype: bool
        """
        try:
            vdisk = VDiskHelper.get_vdisk_by_name(vdisk_name=vdisk_name, vpool_name=vpool_name)
            VDiskController.delete(vdisk.guid)
            return True
        except VDiskNotFoundError:
            # not found, if it should be present, re-raise the exception
            if present:
                raise
            else:
                return True

    @staticmethod
    # @expose_to_cli(MODULE, 'volumedrivers-test', HealthCheckCLI.ADDON_TYPE,
    #                help='Verify that the Volumedrivers are responding to events',
    #                short_help='Test if Volumedrivers are responding to events')
    def check_volumedrivers(result_handler):
        """
        Checks if the VOLUMEDRIVERS work on a local machine (compatible with multiple vPools)
        :param result_handler: logging object
        :type result_handler: ovs.extensions.healthcheck.result.HCResults
        :return: None
        :rtype: NoneType
        """
        result_handler.info('Checking volumedrivers.', add_to_result=False)
        vpools = VPoolList.get_vpools()
        if len(vpools) == 0:
            result_handler.skip('No vPools found!')
            return
        for vp in vpools:
            name = 'ovs-healthcheck-test-{0}.raw'.format(VolumedriverHealthCheck.LOCAL_ID)
            if vp.guid not in VolumedriverHealthCheck.LOCAL_SR.vpools_guids:
                result_handler.skip('Skipping vPool {0} because it is not living here.'.format(vp.name))
                continue
            try:
                # delete if previous vdisk with this name exists
                storagedriver_guid = next((storagedriver.guid for storagedriver in vp.storagedrivers
                                           if storagedriver.storagedriver_id == vp.name +
                                           VolumedriverHealthCheck.LOCAL_ID))
                # create a new one
                volume = VolumedriverHealthCheck._check_volumedriver(name, storagedriver_guid, result_handler)

                if volume is True:
                    # delete the recently created
                    try:
                        VolumedriverHealthCheck._check_volumedriver_remove(vpool_name=vp.name, vdisk_name=name)
                    except Exception as ex:
                        raise RuntimeError('Could not delete the created volume. Got {0}'.format(str(ex)))
                    # Working at this point
                    result_handler.success('Volumedriver of vPool {0} is working fine!'.format(vp.name))
                else:
                    # not working
                    result_handler.failure('Something went wrong during vdisk creation on vpool {0}.'.format(vp.name))

            except TimeoutError:
                # timeout occurred, action took too long
                result_handler.warning('Volumedriver of vPool {0} seems to timeout.'.format(vp.name))
            except IOError as ex:
                # can be input/output error by volumedriver
                result_handler.failure('Volumedriver of vPool {0} seems to have IO problems. Got `{1}` while executing.'.format(vp.name, ex.message))
            except RuntimeError as ex:
                result_handler.failure('Volumedriver of vPool {0} seems to have problems. Got `{1}` while executing.'.format(vp.name, ex))
            except VDiskNotFoundError:
                result_handler.warning('Volume on vPool {0} was not found, please retry again'.format(vp.name))
            except Exception as ex:
                result_handler.failure('Uncaught exception for Volumedriver of vPool {0}.Got {1} while executing.'.format(vp.name, ex))
            finally:
                # Attempt to delete the created vdisk
                try:
                    VolumedriverHealthCheck._check_volumedriver_remove(vpool_name=vp.name, vdisk_name=name, present=False)
                except:
                    pass

    @classmethod
    def _is_volumedriver_timeout(cls, exception):
        """
        Validates whether a certain exception is a timeout exception (RuntimeError, prior to NodeNotReachable in voldriver 6.17)
        :param exception: Exception object to check
        :return: True if it is a timeout or False if it's not
        :rtype: bool
        """
        return isinstance(exception, ClusterNotReachableException) or isinstance(exception, RuntimeError) and 'failed to send XMLRPC request' in str(exception)

    @classmethod
    @expose_to_cli(MODULE, 'halted-volumes-test', HealthCheckCLI.ADDON_TYPE,
                   help='Verify that there are no halted/fenced volumes within the cluster',
                   short_help='Test if there  are no halted/fenced volumes')
    def check_for_halted_volumes(cls, result_handler):
        """
        Checks for halted volumes on a single or multiple vPools
        This will only check the volume states on the current node. If any other volumedriver would be down,
        only the HA'd volumes would pop-up as they could appear halted here (should be verified by the volumedriver team)
        :param result_handler: logging object
        :type result_handler: ovs.extensions.healthcheck.result.HCResults
        :return: None
        :rtype: NoneType
        """
        vpools = VPoolList.get_vpools()
        local_sr = System.get_my_storagerouter()

        if len(vpools) == 0:
            result_handler.skip('No vPools found!'.format(len(vpools)), code=ErrorCodes.vpools_none)
            return
        for vpool in vpools:
            log_start = 'Halted volumes test vPool {0}'.format(vpool.name)
            if vpool.guid not in local_sr.vpools_guids:
                result_handler.skip('{0} - Skipping vPool {1} because it is not living here.'.format(log_start, vpool.name),
                                    code=ErrorCodes.vpool_not_local, add_to_result=False)
                continue

            result_handler.info('{0} - Retrieving all information'.format(log_start), add_to_result=False)
            storagedriver = None
            for std in vpool.storagedrivers:
                if std.storagerouter_guid == local_sr.guid:
                    storagedriver = std
                    break

            if storagedriver is None:
                result_handler.failure('{0} - Could not associate a StorageDriver with this StorageRouter'.format(log_start),
                                       code=ErrorCodes.std_no_str)
                continue

            volume_fenced_states = dict((key, []) for key in cls.FENCED_HALTED_STATUS_MAP.keys())
            volume_lists = {cls.VDISK_HALTED_STATES.HALTED: [], cls.VDISK_HALTED_STATES.FENCED: []}
            volume_states = {cls.VDISK_HALTED_STATES.HALTED: {cls.VDISK_HALTED_STATES.HALTED: volume_lists[cls.VDISK_HALTED_STATES.HALTED]},
                             cls.VDISK_HALTED_STATES.FENCED: volume_fenced_states}  # Less loops to write for outputting
            result_handler.info('{0} - Scanning for halted volumes'.format(log_start), add_to_result=False)
            try:
                voldrv_client = vpool.storagedriver_client
                objectregistry_client = vpool.objectregistry_client
            except Exception:
                cls.logger.exception('{0} - Unable to instantiate the required clients'.format(log_start))
                result_handler.exception('{0} - Unable to load the Volumedriver clients'.format(log_start),
                                         code=ErrorCodes.voldr_unknown_problem)
                continue
            try:
                # Listing all halted volumes with the volumedriver client as it detects stolen volumes too (fenced instances)
                volumes = voldrv_client.list_halted_volumes(str(storagedriver.storagedriver_id))
            except Exception as ex:
                cls.logger.exception('{0} - Exception occurred when listing volumes'.format(log_start))
                if cls._is_volumedriver_timeout(ex) is False:
                    # Unhandled exception at this point
                    result_handler.exception('{0} - Unable to list the Volumes due to an unidentified problem. Please check the logging'.format(log_start),
                                             code=ErrorCodes.voldr_unknown_problem)
                else:
                    result_handler.failure('{0} - Could not list the volumes for due to a connection problem.'.format(log_start),
                                           code=ErrorCodes.voldrv_connection_problem)
                continue
            # Retrieve the parent of the current volume. If this id would not be identical to the one we fetched for, that would mean it is fenced
            # Object registry goes to Arakoon
            # Capturing any possible that would occur to provide a clearer vision of what went wrong
            for volume in volumes:
                try:
                    registry_entry = objectregistry_client.find(volume)
                    if registry_entry.node_id() == storagedriver.storagedriver_id:
                        volume_lists[cls.VDISK_HALTED_STATES.HALTED].append(volume)
                    else:
                        # Fenced
                        volume_lists[cls.VDISK_HALTED_STATES.FENCED].append(volume)
                except Exception:
                    msg = '{0} - Unable to consult the object registry client for volume \'{1}\''.format(log_start, volume)
                    cls.logger.exception(msg)
                    result_handler.exception(msg, code=ErrorCodes.voldr_unknown_problem)
            # Include fenced - OTHER state combo
            for volume in volume_lists[cls.VDISK_HALTED_STATES.FENCED]:
                try:
                    _, state = cls._get_volume_issue(voldrv_client, volume, log_start)
                    volume_fenced_states[state].append(volume)
                except Exception:
                    # Only unhandled at this point
                    result_handler.exception('{0} - Unable to the volume info for volume {1} due to an unidentified problem. Please check the logging'.format(log_start, volume),
                                             code=ErrorCodes.voldr_unknown_problem)
            for halted_state, volume_state_info in volume_states.iteritems():
                for state, volumes in volume_state_info.iteritems():
                    if len(volumes) == 0:
                        continue  # Skip OK/empty lists
                    map_value = cls.FENCED_HALTED_STATUS_MAP[state.lower()]
                    log_func = getattr(result_handler, map_value['severity'])
                    message, code = map_value[halted_state.lower()]
                    log_func('{0} - {1}'.format(log_start, message.format(', '.join(volumes))), code=code)
            # Call success in case nothing is wrong
            if all(len(l) == 0 for l in volume_lists.values()):
                result_handler.success('{0} - No volumes found in halted/fenced state'.format(log_start))

    @classmethod
    def _get_volume_issue(cls, voldrv_client, volume_id, log_start):
        """
        Maps all possible exceptions to a state. These states can be mapped to a status using the FENCED_HALTED_STATUS_MAP
        because the volumedriver does not return a state itself
        :param voldrv_client: Storagedriver client
        :param volume_id: Id of the volume
        :raises: The unhandled exception when such an exception could occur (we try to identify all problems but one could slip past us)
        :return: The volume_id and state
        :rtype: tuple(str, str)
        """
        state = 'ok'
        try:
            # Check if the information can be retrieved about the volume
            vol_info = voldrv_client.info_volume(volume_id, req_timeout_secs=5)
            if vol_info.halted is True:
                state = 'halted'
        except Exception as ex:
            cls.logger.exception('{0} - Exception occurred when fetching the info for volume \'{1}\''.format(log_start, volume_id))
            if isinstance(ex, ObjectNotFoundException):
                # Ignore ovsdb invalid entrees as model consistency will handle it.
                state = 'not_found'
            elif isinstance(ex, MaxRedirectsExceededException):
                # This means the volume is not halted but detached or unreachable for the Volumedriver
                state = 'max_redirect'
            # @todo replace RuntimeError with NodeNotReachableException
            elif any(isinstance(ex, exception) for exception in [ClusterNotReachableException, RuntimeError]):
                if cls._is_volumedriver_timeout(ex) is False:
                    # Unhandled exception at this point
                    raise
                # Timeout / connection problems
                state = 'connection_fail'
            else:
                # Something to be looked at
                raise
        return volume_id, state

    @staticmethod
    @timeout_decorator.timeout(5)
    def _check_filedriver(vp_name, test_name):
        """
        Async method to checks if a FILEDRIVER `touch` works on a vpool
        Always try to check if the file exists after performing this method
        :param vp_name: name of the vpool
        :type vp_name: str
        :param test_name: name of the test file (e.g. `ovs-healthcheck-LOCAL_ID`)
        :type test_name: str
        :return: True if succeeded, False if failed
        :rtype: bool
        """
        return subprocess.check_output('touch /mnt/{0}/{1}.xml'.format(vp_name, test_name), stderr=subprocess.STDOUT, shell=True)

    @staticmethod
    @timeout_decorator.timeout(5)
    def _check_filedriver_remove(vp_name):
        """
        Async method to checks if a FILEDRIVER `remove` works on a vpool
        Always try to check if the file exists after performing this method
        :param vp_name: name of the vpool
        :type vp_name: str
        :return: True if succeeded, False if failed
        :rtype: bool
        """
        subprocess.check_output('rm -f /mnt/{0}/ovs-healthcheck-test-*.xml'.format(vp_name), stderr=subprocess.STDOUT, shell=True)
        return not os.path.exists('/mnt/{0}/ovs-healthcheck-test-*.xml'.format(vp_name))

    @staticmethod
    # @expose_to_cli(MODULE, 'filedrivers-test', HealthCheckCLI.ADDON_TYPE,
    #                help='Verify that all Volumedrivers are accessible through FUSE',
    #                short_help='Test if that the FUSE layer is responding')
    # @todo replace fuse test with edge test
    def check_filedrivers(result_handler):
        """
        Checks if the file drivers work on a local machine (compatible with multiple vPools)
        :param result_handler: logging object
        :type result_handler: ovs.extensions.healthcheck.result.HCResults
        """
        result_handler.info('Checking file drivers.', add_to_result=False)
        vpools = VPoolList.get_vpools()
        # perform tests
        if len(vpools) == 0:
            result_handler.skip('No vPools found!')
            return
        for vp in vpools:
            name = 'ovs-healthcheck-test-{0}'.format(VolumedriverHealthCheck.LOCAL_ID)
            if vp.guid not in VolumedriverHealthCheck.LOCAL_SR.vpools_guids:
                result_handler.skip('Skipping vPool {0} because it is not living here.'.format(vp.name))
                continue
            try:
                VolumedriverHealthCheck._check_filedriver(vp.name, name)
                if os.path.exists('/mnt/{0}/{1}.xml'.format(vp.name, name)):
                    # working
                    VolumedriverHealthCheck._check_filedriver_remove(vp.name)
                    result_handler.success('Filedriver for vPool {0} is working fine!'.format(vp.name))
                else:
                    # not working
                    result_handler.failure('Filedriver for vPool {0} seems to have problems!'.format(vp.name))
            except TimeoutError:
                # timeout occurred, action took too long
                result_handler.warning('Filedriver of vPool {0} seems to have `timeout` problems'.format(vp.name))
            except subprocess.CalledProcessError:
                # can be input/output error by filedriver
                result_handler.failure('Filedriver of vPool {0} seems to have `input/output` problems'.format(vp.name))

    @staticmethod
    @expose_to_cli(MODULE, 'volume-potential-test', HealthCheckCLI.ADDON_TYPE,
                   help='Verify that the Volumedrivers have enough VDisk potential left',
                   short_help='Test if the Volumedrivers can create enough VDisks')
    @expose_to_cli.option('--critical-vol-number', '-c', type=int, default=25, help='Minimum number of volumes left to create')
    def check_volume_potential(result_handler, critical_vol_number=25):
        """
        Checks all local storage drivers from a volume driver. Results in a success if enough volumes are available, a warning if the number of volumes is
        lower then a threshold value (critical_volume_number) and a failure if the nr of volumes ==0)
        :param result_handler: logging object
        :type result_handler: ovs.extensions.healthcheck.result.HCResults
        :param critical_vol_number: Mimimal number of volumes that can be made before throwing a warning
        :type critical_vol_number: int
        """
        result_handler.info('Checking volume potential of storagedrivers')

        if not isinstance(critical_vol_number, int) or critical_vol_number < 0:
            raise ValueError('Critical volume number should be a positive integer')

        for std in VolumedriverHealthCheck.LOCAL_SR.storagedrivers:
            try:
                std_config = StorageDriverConfiguration(std.vpool_guid, std.storagedriver_id)
                client = LocalStorageRouterClient(std_config.remote_path)
                vol_potential = client.volume_potential(str(std.storagedriver_id))
                if vol_potential >= critical_vol_number:
                    log_level = 'success'
                elif critical_vol_number > vol_potential > 0:
                    log_level = 'warning'
                else:
                    log_level = 'failure'
                getattr(result_handler, log_level)('Volume potential of local storage driver: {0}: {1} (potential at: {2})'.format(std.storagedriver_id, log_level.upper(), vol_potential))
            except RuntimeError:
                result_handler.exception('Unable to retrieve configuration for storagedriver {0}'.format(std.storagedriver_id))

    @staticmethod
    @expose_to_cli(MODULE, 'sco-cache-mountpoint-test', HealthCheckCLI.ADDON_TYPE,
                   help='Verify that sco-cache mountpoints are up and running',
                   short_help='Test if sco-cache mountpoints are up and running')
    def check_sco_cache_mountpoints(result_handler):
        """
        Iterates over StorageDrivers of a local StorageRouter and will check all its sco cache mount points.
        Will result in a warning log if the sco is in offline state
        :param result_handler: logging object
        :type result_handler: ovs.extensions.healthcheck.result.HCResults
        """
        result_handler.info('Checking sco cache mount points on all local storagedrivers')
        for std in VolumedriverHealthCheck.LOCAL_SR.storagedrivers:
            try:
                std_config = StorageDriverConfiguration(std.vpool_guid, std.storagedriver_id)
                client = LocalStorageRouterClient(std_config.remote_path)
                for std_info in client.sco_cache_mount_point_info(str(std.storagedriver_id)):
                    if std_info.offlined is True:
                        result_handler.warning('Mountpoint at location {0} of storagedriver {1} is in offline state'.format(std_info.path, std.storagedriver_id))
                    else:
                        result_handler.success('Mountpoint at location {0} of storagedriver {1} is in online state'.format(std_info.path, std.storagedriver_id))
            except RuntimeError:
                result_handler.exception('Unable to check sco cache mountpoint of storagedriver {0}'.format(std.storagedriver_id))
Exemple #14
0
class VPool(DataObject):
    """
    The VPool class represents a vPool. A vPool is a Virtual Storage Pool, a Filesystem, used to
    deploy vDisks. a vPool can span multiple Storage Drivers and connects to a single Storage BackendType.
    """
    STATUSES = DataObject.enumerator('Status', [
        'DELETING', 'EXTENDING', 'FAILURE', 'INSTALLING', 'RUNNING',
        'SHRINKING'
    ])

    __properties = [
        Property('name', str, unique=True, doc='Name of the vPool'),
        Property('description',
                 str,
                 mandatory=False,
                 doc='Description of the vPool'),
        Property(
            'size',
            int,
            mandatory=False,
            doc=
            'Size of the vPool expressed in Bytes. Set to zero if not applicable.'
        ),
        Property('login',
                 str,
                 mandatory=False,
                 doc='Login/Username for the Storage BackendType.'),
        Property('password',
                 str,
                 mandatory=False,
                 doc='Password for the Storage BackendType.'),
        Property(
            'connection',
            str,
            mandatory=False,
            doc=
            'Connection (IP, URL, Domain name, Zone, ...) for the Storage BackendType.'
        ),
        Property(
            'metadata',
            dict,
            mandatory=False,
            doc='Metadata for the backends, as used by the Storage Drivers.'),
        Property(
            'rdma_enabled',
            bool,
            default=False,
            doc=
            'Has the vpool been configured to use RDMA for DTL transport, which is only possible if all storagerouters are RDMA capable'
        ),
        Property('status', STATUSES.keys(), doc='Status of the vPool')
    ]
    __relations = []
    __dynamics = [
        Dynamic('configuration', dict, 3600),
        Dynamic('statistics', dict, 4),
        Dynamic('identifier', str, 120)
    ]
    _fixed_properties = ['storagedriver_client', 'objectregistry_client']

    def __init__(self, *args, **kwargs):
        """
        Initializes a vPool, setting up its additional helpers
        """
        DataObject.__init__(self, *args, **kwargs)
        self._frozen = False
        self._storagedriver_client = None
        self._objectregistry_client = None
        self._frozen = True

    @property
    def storagedriver_client(self):
        """
        Client used for communication between Storage Driver and framework
        :return: StorageDriverClient
        """
        if self._storagedriver_client is None:
            self.reload_client('storagedriver')
        return self._storagedriver_client

    @property
    def objectregistry_client(self):
        """
        Client used for communication between Storage Driver OR and framework
        :return: ObjectRegistryClient
        """
        if self._objectregistry_client is None:
            self.reload_client('objectregistry')
        return self._objectregistry_client

    def _configuration(self):
        """
        VPool configuration
        """
        if not self.storagedrivers or not self.storagedrivers[0].storagerouter:
            return {}

        storagedriver_config = StorageDriverConfiguration(
            'storagedriver', self.guid,
            self.storagedrivers[0].storagedriver_id)
        storagedriver_config.load()

        dtl = storagedriver_config.configuration['distributed_transaction_log']
        file_system = storagedriver_config.configuration['filesystem']
        volume_router = storagedriver_config.configuration['volume_router']
        volume_manager = storagedriver_config.configuration['volume_manager']

        dtl_host = file_system['fs_dtl_host']
        dtl_mode = file_system.get('fs_dtl_mode',
                                   StorageDriverClient.VOLDRV_DTL_ASYNC)
        cluster_size = volume_manager['default_cluster_size'] / 1024
        dtl_transport = dtl['dtl_transport']
        sco_multiplier = volume_router['vrouter_sco_multiplier']
        dtl_config_mode = file_system['fs_dtl_config_mode']
        tlog_multiplier = volume_manager['number_of_scos_in_tlog']
        non_disposable_sco_factor = volume_manager[
            'non_disposable_scos_factor']

        sco_size = sco_multiplier * cluster_size / 1024  # SCO size is in MiB ==> SCO multiplier * cluster size (4 KiB by default)
        write_buffer = tlog_multiplier * sco_size * non_disposable_sco_factor
        dtl_enabled = not (dtl_config_mode
                           == StorageDriverClient.VOLDRV_DTL_MANUAL_MODE
                           and dtl_host == '')

        return {
            'sco_size':
            sco_size,
            'dtl_mode':
            StorageDriverClient.REVERSE_DTL_MODE_MAP[dtl_mode]
            if dtl_enabled is True else 'no_sync',
            'dtl_enabled':
            dtl_enabled,
            'cluster_size':
            cluster_size,
            'write_buffer':
            write_buffer,
            'dtl_transport':
            StorageDriverClient.REVERSE_DTL_TRANSPORT_MAP[dtl_transport],
            'dtl_config_mode':
            dtl_config_mode,
            'tlog_multiplier':
            tlog_multiplier
        }

    def _statistics(self, dynamic):
        """
        Aggregates the Statistics (IOPS, Bandwidth, ...) of each vDisk served by the vPool.
        """
        from ovs.dal.hybrids.vdisk import VDisk
        statistics = {}
        for key in StorageDriverClient.STAT_KEYS:
            statistics[key] = 0
            statistics['{0}_ps'.format(key)] = 0
        for storagedriver in self.storagedrivers:
            for key, value in storagedriver.fetch_statistics().iteritems():
                statistics[key] += value
        statistics['timestamp'] = time.time()
        VDisk.calculate_delta(self._key, dynamic, statistics)
        return statistics

    def _identifier(self):
        """
        An identifier of this vPool in its current configuration state
        """
        return '{0}_{1}'.format(self.guid, '_'.join(self.storagedrivers_guids))

    def reload_client(self, client):
        """
        Reloads the StorageDriverClient or ObjectRegistryClient
        """
        self._frozen = False
        if client == 'storagedriver':
            self._storagedriver_client = StorageDriverClient.load(self)
        elif client == 'objectregistry':
            self._objectregistry_client = ObjectRegistryClient.load(self)
        self._frozen = True
Exemple #15
0
class VPool(DataObject):
    """
    The VPool class represents a vPool. A vPool is a Virtual Storage Pool, a Filesystem, used to
    deploy vMachines. a vPool can span multiple Storage Drivers and connects to a single Storage BackendType.
    """
    STATUSES = DataObject.enumerator('Status', [
        'DELETING', 'EXTENDING', 'FAILURE', 'INSTALLING', 'RUNNING',
        'SHRINKING'
    ])

    __properties = [
        Property('name', str, doc='Name of the vPool'),
        Property('description',
                 str,
                 mandatory=False,
                 doc='Description of the vPool'),
        Property(
            'size',
            int,
            mandatory=False,
            doc=
            'Size of the vPool expressed in Bytes. Set to zero if not applicable.'
        ),
        Property('login',
                 str,
                 mandatory=False,
                 doc='Login/Username for the Storage BackendType.'),
        Property('password',
                 str,
                 mandatory=False,
                 doc='Password for the Storage BackendType.'),
        Property(
            'connection',
            str,
            mandatory=False,
            doc=
            'Connection (IP, URL, Domain name, Zone, ...) for the Storage BackendType.'
        ),
        Property(
            'metadata',
            dict,
            mandatory=False,
            doc='Metadata for the backends, as used by the Storage Drivers.'),
        Property(
            'rdma_enabled',
            bool,
            default=False,
            doc=
            'Has the vpool been configured to use RDMA for DTL transport, which is only possible if all storagerouters are RDMA capable'
        ),
        Property('status', STATUSES.keys(), doc='Status of the vPool')
    ]
    __relations = [
        Relation('backend_type',
                 BackendType,
                 'vpools',
                 doc='Type of storage backend.')
    ]
    __dynamics = [
        Dynamic('statistics', dict, 4),
        Dynamic('identifier', str, 120),
        Dynamic('stored_data', int, 60)
    ]
    _fixed_properties = ['storagedriver_client']

    def __init__(self, *args, **kwargs):
        """
        Initializes a vPool, setting up its additional helpers
        """
        DataObject.__init__(self, *args, **kwargs)
        self._frozen = False
        self._storagedriver_client = None
        self._frozen = True

    @property
    def storagedriver_client(self):
        """
        Client used for communication between Storage Driver and framework
        :return: StorageDriverClient
        """
        if self._storagedriver_client is None:
            self.reload_client()
        return self._storagedriver_client

    def _statistics(self, dynamic):
        """
        Aggregates the Statistics (IOPS, Bandwidth, ...) of each vDisk served by the vPool.
        """
        from ovs.dal.hybrids.vdisk import VDisk
        statistics = {}
        for key in StorageDriverClient.STAT_KEYS:
            statistics[key] = 0
            statistics['{0}_ps'.format(key)] = 0
        for storagedriver in self.storagedrivers:
            for key, value in storagedriver.fetch_statistics().iteritems():
                statistics[key] += value
        statistics['timestamp'] = time.time()
        VDisk.calculate_delta(self._key, dynamic, statistics)
        return statistics

    def _stored_data(self):
        """
        Aggregates the Stored Data of each vDisk served by the vPool.
        """
        return self.statistics['stored']

    def _identifier(self):
        """
        An identifier of this vPool in its current configuration state
        """
        return '{0}_{1}'.format(self.guid, '_'.join(self.storagedrivers_guids))

    def reload_client(self):
        """
        Reloads the StorageDriver Client
        """
        self._frozen = False
        self._storagedriver_client = StorageDriverClient.load(self)
        self._frozen = True
Exemple #16
0
class StorageDriver(DataObject):
    """
    The StorageDriver class represents a Storage Driver. A Storage Driver is an application
    on a Storage Router to which the vDisks connect. The Storage Driver is the gateway to the Storage Backend.
    """
    DISTANCES = DataObject.enumerator('Distance', {
        'NEAR': 0,
        'FAR': 10000,
        'INFINITE': 20000
    })

    _logger = Logger('hybrids')
    __properties = [
        Property('name', str, doc='Name of the Storage Driver.'),
        Property('description',
                 str,
                 mandatory=False,
                 doc='Description of the Storage Driver.'),
        Property(
            'ports',
            dict,
            doc=
            'Ports on which the Storage Driver is listening (management, xmlrpc, dtl, edge).'
        ),
        Property('cluster_ip',
                 str,
                 doc='IP address on which the Storage Driver is listening.'),
        Property('storage_ip',
                 str,
                 doc='IP address on which the vpool is shared to hypervisor'),
        Property(
            'storagedriver_id',
            str,
            unique=True,
            indexed=True,
            doc='ID of the Storage Driver as known by the Storage Drivers.'),
        Property('mountpoint',
                 str,
                 doc='Mountpoint from which the Storage Driver serves data'),
        Property('startup_counter',
                 int,
                 default=0,
                 doc='StorageDriver startup counter')
    ]
    __relations = [
        Relation('vpool', VPool, 'storagedrivers'),
        Relation('storagerouter', StorageRouter, 'storagedrivers')
    ]
    __dynamics = [
        Dynamic('status', str, 30),
        Dynamic('statistics', dict, 4),
        Dynamic('edge_clients', list, 30),
        Dynamic('vdisks_guids', list, 15),
        Dynamic('proxy_summary', dict, 15),
        Dynamic('vpool_backend_info', dict, 60),
        Dynamic('cluster_node_config', dict, 3600),
        Dynamic('global_write_buffer', int, 60)
    ]

    def _status(self):
        """
        Fetches the Status of the Storage Driver.
        """
        _ = self
        return None

    def _statistics(self, dynamic):
        """
        Aggregates the Statistics (IOPS, Bandwidth, ...) of the vDisks connected to the Storage Driver.
        """
        from ovs.dal.hybrids.vdisk import VDisk
        statistics = {}
        for key, value in self.fetch_statistics().iteritems():
            statistics[key] = value
        statistics['timestamp'] = time.time()
        VDisk.calculate_delta(self._key, dynamic, statistics)
        return statistics

    def _edge_clients(self):
        """
        Retrieves all edge clients
        """
        clients = []
        try:
            for item in self.vpool.storagedriver_client.list_client_connections(
                    str(self.storagedriver_id), req_timeout_secs=2):
                clients.append({
                    'key': '{0}:{1}'.format(item.ip, item.port),
                    'object_id': item.object_id,
                    'ip': item.ip,
                    'port': item.port,
                    'server_ip': self.storage_ip,
                    'server_port': self.ports['edge']
                })
        except Exception:
            StorageDriver._logger.exception(
                'Error loading edge clients from {0}'.format(
                    self.storagedriver_id))
        clients.sort(key=lambda e: (e['ip'], e['port']))
        return clients

    def _vdisks_guids(self):
        """
        Gets the vDisk guids served by this StorageDriver.
        """
        from ovs.dal.lists.vdisklist import VDiskList
        volume_ids = []
        for entry in self.vpool.objectregistry_client.get_all_registrations():
            if entry.node_id() == self.storagedriver_id:
                volume_ids.append(entry.object_id())
        return VDiskList.get_in_volume_ids(volume_ids).guids

    def fetch_statistics(self):
        """
        Loads statistics from this vDisk - returns unprocessed data
        """
        # Load data from volumedriver
        sdstats = StorageDriverClient.EMPTY_STATISTICS()
        if self.storagedriver_id and self.vpool:
            try:
                sdstats = self.vpool.storagedriver_client.statistics_node(
                    str(self.storagedriver_id), req_timeout_secs=2)
            except Exception as ex:
                StorageDriver._logger.error(
                    'Error loading statistics_node from {0}: {1}'.format(
                        self.storagedriver_id, ex))
        # Load volumedriver data in dictionary
        return VDisk.extract_statistics(
            sdstats,
            None if len(self.vpool.vdisks) == 0 else self.vpool.vdisks[0])

    def _vpool_backend_info(self):
        """
        Retrieve some additional information about the vPool to be shown in the GUI
        Size of the global write buffer for this Storage Driver, the accelerated backend info, connection info and caching strategy
        :return: Information about vPool and accelerated Backend
        :rtype: dict
        """
        vpool_backend_info = {
            'backend': copy.deepcopy(self.vpool.metadata['backend']),
            'caching_info': {
                StorageDriverConfiguration.CACHE_BLOCK: {
                    'read': False,
                    'write': False,
                    'quota': None,
                    'backend_info': None
                },  # Will contain connection info if it wouldn't be None
                StorageDriverConfiguration.CACHE_FRAGMENT: {
                    'read': False,
                    'write': False,
                    'quota': None,
                    'backend_info': None
                }
            }
        }
        if 'caching_info' not in self.vpool.metadata:
            self._logger.critical(
                'Metadata structure has not been updated yet')
            return vpool_backend_info
        if self.storagerouter_guid not in self.vpool.metadata['caching_info']:
            # No caching configured
            return vpool_backend_info
        for cache_type, cache_data in vpool_backend_info[
                'caching_info'].iteritems():
            caching_info = self.vpool.metadata['caching_info'][
                self.storagerouter_guid][cache_type]
            # Update the cache data matching the keys currently specified in cache_data
            cache_data.update(
                (k, caching_info[k])
                for k in cache_data.viewkeys() & caching_info.viewkeys())
            # Possible set backend_info to None to match this view
            if caching_info['is_backend'] is False:
                cache_data['backend_info'] = None
        # Add global write buffer
        vpool_backend_info['global_write_buffer'] = self.global_write_buffer
        return vpool_backend_info

    def _cluster_node_config(self):
        """
        Prepares a ClusterNodeConfig dict for the StorageDriver process
        """
        from ovs.extensions.generic.configuration import Configuration, NotFoundException
        rdma = Configuration.get('/ovs/framework/rdma')
        distance_map = {}
        primary_domains = []
        secondary_domains = []
        for junction in self.storagerouter.domains:
            if junction.backup is False:
                primary_domains.append(junction.domain_guid)
            else:
                secondary_domains.append(junction.domain_guid)
        # @todo implement more race-conditions guarantees. Current guarantee is the single update invalidating the value
        # through cluster_registry_checkup
        try:
            storagerouters_marked_for_update = list(
                Configuration.list(VPOOL_UPDATE_KEY))
        except NotFoundException:
            storagerouters_marked_for_update = []
        for sd in self.vpool.storagedrivers:
            if sd.guid == self.guid:
                continue
            if sd.storagerouter_guid in storagerouters_marked_for_update:
                distance_map[str(
                    sd.storagedriver_id)] = StorageDriver.DISTANCES.FAR
            elif len(primary_domains) == 0:
                distance_map[str(
                    sd.storagedriver_id)] = StorageDriver.DISTANCES.NEAR
            else:
                distance = StorageDriver.DISTANCES.INFINITE
                for junction in sd.storagerouter.domains:
                    if junction.backup is False:
                        if junction.domain_guid in primary_domains:
                            distance = min(distance,
                                           StorageDriver.DISTANCES.NEAR)
                            break  # We can break here since we reached the minimum distance
                        elif junction.domain_guid in secondary_domains:
                            distance = min(distance,
                                           StorageDriver.DISTANCES.FAR)
                distance_map[str(sd.storagedriver_id)] = distance
        return {
            'vrouter_id':
            self.storagedriver_id,
            'host':
            self.storage_ip,
            'message_port':
            self.ports['management'],
            'xmlrpc_host':
            self.cluster_ip,
            'xmlrpc_port':
            self.ports['xmlrpc'],
            'failovercache_host':
            self.storage_ip,
            'failovercache_port':
            self.ports['dtl'],
            'network_server_uri':
            '{0}://{1}:{2}'.format('rdma' if rdma else 'tcp', self.storage_ip,
                                   self.ports['edge']),
            'node_distance_map':
            distance_map
        }

    def _proxy_summary(self):
        """
        Returns a summary of the proxies of this StorageDriver
        :return: summary of the proxies
        :rtype: dict
        """
        proxy_info = {'red': 0, 'orange': 0, 'green': 0}
        summary = {'proxies': proxy_info}

        try:
            service_manager = ServiceFactory.get_manager()
            client = SSHClient(self.storagerouter)
        except Exception:
            self._logger.exception('Unable to retrieve necessary clients')
        else:
            for alba_proxy in self.alba_proxies:
                try:
                    service_status = service_manager.get_service_status(
                        alba_proxy.service.name, client)
                except Exception:
                    # A ValueError can occur when the services are still being deployed (the model will be updated before the actual deployment)
                    self._logger.exception(
                        'Unable to retrieve the service status for service {0} of StorageDriver {1}'
                        .format(alba_proxy.service.name, self.guid))
                    proxy_info['red'] += 1
                    continue
                if service_status == 'active':
                    proxy_info['green'] += 1
                elif service_status == 'inactive':
                    proxy_info['orange'] += 1
                else:
                    proxy_info['red'] += 1
        finally:
            return summary

    def _global_write_buffer(self):
        """
        Return the global write buffer for available for a StorageDriver
        :return: Calculated global write buffer
        :rtype: int
        """
        # Avoid circular import
        from ovs.dal.hybrids.j_storagedriverpartition import StorageDriverPartition

        global_write_buffer = 0
        for partition in self.partitions:
            if partition.role == DiskPartition.ROLES.WRITE and partition.sub_role == StorageDriverPartition.SUBROLE.SCO:
                global_write_buffer += partition.size
        return global_write_buffer
class AlbaNode(DataObject):
    """
    The AlbaNode contains information about nodes (containing OSDs)
    """
    NODE_TYPES = DataObject.enumerator('NodeType', ['ASD', 'GENERIC', 'S3'])
    OSD_STATUSES = DataObject.enumerator(
        'OSDStatus', {
            'ERROR': 'error',
            'MISSING': 'missing',
            'OK': 'ok',
            'UNAVAILABLE': 'unavailable',
            'UNKNOWN': 'unknown',
            'WARNING': 'warning'
        })
    OSD_STATUS_DETAILS = DataObject.enumerator(
        'OSDStatusDetail', {
            'ACTIVATING': 'service_activating',
            'ALBAERROR': 'albaerror',
            'DECOMMISSIONED': 'decommissioned',
            'ERROR': 'recenterrors',
            'NODEDOWN': 'nodedown',
            'UNREACHABLE': 'unreachable'
        })
    SLOT_STATUSES = DataObject.enumerator(
        'SlotStatus', {
            'OK': 'ok',
            'WARNING': 'warning',
            'MISSING': 'missing',
            'UNAVAILABLE': 'unavailable',
            'UNKNOWN': 'unknown',
            'EMPTY': 'empty'
        })
    CLIENTS = DataObject.enumerator(
        'AlbaNodeClients', {
            NODE_TYPES.ASD: ASDManagerClient,
            NODE_TYPES.GENERIC: GenericManagerClient,
            NODE_TYPES.S3: S3ManagerClient
        })
    CONFIG_LOCATIONS = DataObject.enumerator(
        'AlbaNodeConfigLocations', {
            NODE_TYPES.ASD: ASD_NODE_CONFIG_PATH,
            NODE_TYPES.GENERIC: '',
            NODE_TYPES.S3: S3_NODE_CONFIG_PATH
        })

    _logger = Logger('hybrids')
    __properties = [
        Property('ip', str, indexed=True, mandatory=False, doc='IP Address'),
        Property('port', int, mandatory=False, doc='Port'),
        Property('node_id',
                 str,
                 unique=True,
                 indexed=True,
                 doc='Alba node_id identifier'),
        Property('name',
                 str,
                 mandatory=False,
                 doc='Optional name for the AlbaNode'),
        Property('username',
                 str,
                 mandatory=False,
                 doc='Username of the AlbaNode'),
        Property('password',
                 str,
                 mandatory=False,
                 doc='Password of the AlbaNode'),
        Property('type',
                 NODE_TYPES.keys(),
                 default=NODE_TYPES.ASD,
                 doc='The type of the AlbaNode'),
        Property(
            'package_information',
            dict,
            mandatory=False,
            default={},
            doc=
            'Information about installed packages and potential available new versions'
        )
    ]
    __relations = [
        Relation('storagerouter',
                 StorageRouter,
                 'alba_node',
                 onetoone=True,
                 mandatory=False,
                 doc='StorageRouter hosting the Alba Node'),
        Relation('alba_node_cluster',
                 AlbaNodeCluster,
                 'alba_nodes',
                 mandatory=False,
                 doc='The Alba Node Cluster to which the Alba Node belongs')
    ]
    __dynamics = [
        Dynamic('stack', dict, 15, locked=True),
        Dynamic('ips', list, 3600),
        Dynamic('maintenance_services', dict, 30, locked=True),
        Dynamic('node_metadata', dict, 3600),
        Dynamic('supported_osd_types', list, 3600),
        Dynamic('read_only_mode', bool, 60),
        Dynamic('local_summary', dict, 60),
        Dynamic('ipmi_info', dict, 3600)
    ]

    def __init__(self, *args, **kwargs):
        """
        Initializes an AlbaNode, setting up its additional helpers
        """
        DataObject.__init__(self, *args, **kwargs)
        self._frozen = False
        self.client = None
        if os.environ.get('RUNNING_UNITTESTS') == 'True':
            self.client = ManagerClientMockup(self)
        else:
            if self.type not in self.CLIENTS:
                raise NotImplementedError('Type {0} is not implemented'.format(
                    self.type))
            self.client = self.CLIENTS[self.type](self)
        self._frozen = True

    def _ips(self):
        """
        Returns the IPs of the node
        """
        return Configuration.get(
            os.path.join(self.CONFIG_LOCATIONS[self.type],
                         'network|ips').format(self.node_id))

    def _maintenance_services(self):
        """
        Returns all maintenance services on this node, grouped by backend name
        """
        services = {}
        try:
            for service_name in self.client.list_maintenance_services():
                match = re.match('^alba-maintenance_(.*)-[a-zA-Z0-9]{16}$',
                                 service_name)
                if match is not None:
                    service_status = self.client.get_service_status(
                        name=service_name)
                    backend_name = match.groups()[0]
                    if backend_name not in services:
                        services[backend_name] = []
                    services[backend_name].append(
                        [service_name, service_status])
        except Exception:
            self._logger.exception('Unable to list the maintenance services')
        return services

    def _stack(self):
        """
        Returns an overview of this node's storage stack
        """
        from ovs.dal.hybrids.albabackend import AlbaBackend
        from ovs.dal.lists.albabackendlist import AlbaBackendList

        def _move(info):
            for move in [('state', 'status'),
                         ('state_detail', 'status_detail')]:
                if move[0] in info:
                    info[move[1]] = info[move[0]]
                    del info[move[0]]

        stack = {}
        node_down = False
        # Fetch stack from asd-manager
        try:
            remote_stack = self.client.get_stack()
            for slot_id, slot_data in remote_stack.iteritems():
                stack[slot_id] = {'status': 'ok'}
                stack[slot_id].update(slot_data)
                # Migrate state > status
                _move(stack[slot_id])
                for osd_data in slot_data.get('osds', {}).itervalues():
                    _move(osd_data)
        except (requests.ConnectionError, requests.Timeout,
                InvalidCredentialsError):
            self._logger.warning(
                'Error during stack retrieval. Assuming that the node is down')
            node_down = True

        model_osds = {}
        found_osds = {}
        # Apply own model to fetched stack
        for osd in self.osds:
            model_osds[osd.osd_id] = osd  # Initially set the info
            if osd.slot_id not in stack:
                stack[osd.slot_id] = {
                    'status':
                    self.OSD_STATUSES.UNKNOWN
                    if node_down is True else self.OSD_STATUSES.MISSING,
                    'status_detail':
                    self.OSD_STATUS_DETAILS.NODEDOWN
                    if node_down is True else '',
                    'osds': {}
                }
            osd_data = stack[osd.slot_id]['osds'].get(osd.osd_id, {})
            stack[osd.slot_id]['osds'][
                osd.osd_id] = osd_data  # Initially set the info in the stack
            osd_data.update(osd.stack_info)
            if node_down is True:
                osd_data['status'] = self.OSD_STATUSES.UNKNOWN
                osd_data['status_detail'] = self.OSD_STATUS_DETAILS.NODEDOWN
            elif osd.alba_backend_guid is not None:  # Osds has been claimed
                # Load information from alba
                if osd.alba_backend_guid not in found_osds:
                    found_osds[osd.alba_backend_guid] = {}
                    if osd.alba_backend.abm_cluster is not None:
                        config = Configuration.get_configuration_path(
                            osd.alba_backend.abm_cluster.config_location)
                        try:
                            for found_osd in AlbaCLI.run(
                                    command='list-all-osds', config=config):
                                found_osds[osd.alba_backend_guid][
                                    found_osd['long_id']] = found_osd
                        except (AlbaError, RuntimeError):
                            self._logger.exception(
                                'Listing all osds has failed')
                            osd_data['status'] = self.OSD_STATUSES.UNKNOWN
                            osd_data[
                                'status_detail'] = self.OSD_STATUS_DETAILS.ALBAERROR
                            continue

                if osd.osd_id not in found_osds[osd.alba_backend_guid]:
                    # Not claimed by any backend thus not in use
                    continue
                found_osd = found_osds[osd.alba_backend_guid][osd.osd_id]
                if found_osd['decommissioned'] is True:
                    osd_data['status'] = self.OSD_STATUSES.UNAVAILABLE
                    osd_data[
                        'status_detail'] = self.OSD_STATUS_DETAILS.DECOMMISSIONED
                    continue

                backend_interval_key = '/ovs/alba/backends/{0}/gui_error_interval'.format(
                    osd.alba_backend_guid)
                if Configuration.exists(backend_interval_key):
                    interval = Configuration.get(backend_interval_key)
                else:
                    interval = Configuration.get(
                        '/ovs/alba/backends/global_gui_error_interval')
                read = found_osd['read'] or [0]
                write = found_osd['write'] or [0]
                errors = found_osd['errors']
                osd_data['status'] = self.OSD_STATUSES.WARNING
                osd_data['status_detail'] = self.OSD_STATUS_DETAILS.ERROR
                if len(errors) == 0 or (len(read + write) > 0
                                        and max(min(read), min(write)) >
                                        max(error[0]
                                            for error in errors) + interval):
                    osd_data['status'] = self.OSD_STATUSES.OK
                    osd_data['status_detail'] = ''

        statistics = {}
        for slot_info in stack.itervalues():
            for osd_id, osd in slot_info['osds'].iteritems():
                if osd.get(
                        'status_detail') == self.OSD_STATUS_DETAILS.ACTIVATING:
                    osd['claimed_by'] = 'unknown'  # We won't be able to connect to it just yet
                    continue
                if osd_id not in model_osds:
                    # The osd is known by the remote node but not in the model
                    # In that case, let's connect to the OSD to see whether we get some info from it
                    try:
                        ips = osd['hosts'] if 'hosts' in osd and len(
                            osd['hosts']) > 0 else osd.get('ips', [])
                        port = osd['port']
                        claimed_by = 'unknown'
                        for ip in ips:
                            try:
                                # Output will be None if it is not claimed
                                claimed_by = AlbaCLI.run('get-osd-claimed-by',
                                                         named_params={
                                                             'host': ip,
                                                             'port': port
                                                         })
                                break
                            except (AlbaError, RuntimeError):
                                self._logger.warning(
                                    'get-osd-claimed-by failed for IP:port {0}:{1}'
                                    .format(ip, port))
                        alba_backend = AlbaBackendList.get_by_alba_id(
                            claimed_by)
                        osd['claimed_by'] = alba_backend.guid if alba_backend is not None else claimed_by
                    except KeyError:
                        osd['claimed_by'] = 'unknown'
                    except:
                        self._logger.exception(
                            'Could not load OSD info: {0}'.format(osd_id))
                        osd['claimed_by'] = 'unknown'
                        if osd.get('status') not in ['error', 'warning']:
                            osd['status'] = self.OSD_STATUSES.ERROR
                            osd['status_detail'] = self.OSD_STATUS_DETAILS.UNREACHABLE
                claimed_by = osd.get('claimed_by', 'unknown')
                if claimed_by == 'unknown':
                    continue
                try:
                    alba_backend = AlbaBackend(claimed_by)
                except ObjectNotFoundException:
                    continue
                # Add usage information
                if alba_backend not in statistics:
                    statistics[alba_backend] = alba_backend.osd_statistics
                osd_statistics = statistics[alba_backend]
                if osd_id not in osd_statistics:
                    continue
                stats = osd_statistics[osd_id]
                osd['usage'] = {
                    'size': int(stats['capacity']),
                    'used': int(stats['disk_usage']),
                    'available': int(stats['capacity'] - stats['disk_usage'])
                }
        return stack

    def _node_metadata(self):
        """
        Returns a set of metadata hinting on how the Node should be used
        """
        slots_metadata = {
            'fill': False,  # Prepare Slot for future usage
            'fill_add': False,  # OSDs will added and claimed right away
            'clear': False
        }  # Indicates whether OSDs can be removed from ALBA Node / Slot
        if self.type == AlbaNode.NODE_TYPES.ASD:
            slots_metadata.update({
                'fill': True,
                'fill_metadata': {
                    'count': 'integer'
                },
                'clear': True
            })
        elif self.type == AlbaNode.NODE_TYPES.GENERIC:
            slots_metadata.update({
                'fill_add': True,
                'fill_add_metadata': {
                    'osd_type': 'osd_type',
                    'ips': 'list_of_ip',
                    'port': 'port'
                },
                'clear': True
            })
        elif self.type == AlbaNode.NODE_TYPES.S3:
            slots_metadata.update({
                'fill_add': True,
                'fill_add_metadata': {
                    'count': 'integer',
                    'osd_type': 'osd_type',
                    'buckets': 'list_of_string'
                },
                'clear': True
            })
        return slots_metadata

    def _supported_osd_types(self):
        """
        Returns a list of all supported OSD types
        """
        from ovs.dal.hybrids.albaosd import AlbaOSD
        if self.type == AlbaNode.NODE_TYPES.GENERIC:
            return [AlbaOSD.OSD_TYPES.ASD, AlbaOSD.OSD_TYPES.AD]
        elif self.type == AlbaNode.NODE_TYPES.ASD:
            return [AlbaOSD.OSD_TYPES.ASD]
        elif self.type == AlbaNode.NODE_TYPES.S3:
            return []
        return []

    def _read_only_mode(self):
        """
        Indicates whether the ALBA Node can be used for OSD manipulation
        If the version on the ALBA Node is lower than a specific version required by the framework, the ALBA Node becomes read only,
        this means, that actions such as creating, restarting, deleting OSDs becomes impossible until the node's software has been updated
        :return: True if the ALBA Node should be read only, False otherwise
        :rtype: bool
        """
        read_only = False
        if self.type in [AlbaNode.NODE_TYPES.GENERIC, AlbaNode.NODE_TYPES.ASD]:
            try:
                read_only = self.client.get_metadata()['_version'] < 3
            except (requests.ConnectionError, requests.Timeout,
                    InvalidCredentialsError):
                # When down, nothing can be edited.
                self._logger.warning(
                    'Error during stack retrieval. Assuming that the node is down and disabling read_only because nothing can be done'
                )
        return read_only  # Version 3 was introduced when Slots for Active Drives have been introduced

    def _local_summary(self):
        """
        Return a summary of the OSDs based on their state
        * Ok -> green
        * WARNING -> orange
        * ERROR -> red
        * UNKNOWN -> gray
        The summary will contain a list of dicts with guid, osd_id and claimed_by
        eg:
        {'red': [{osd_id: 1, claimed_by: alba_backend_guid1}],
         'green': [{osd_id: 2, claimed_by: None}],
          ...}
        :return: Summary of the OSDs filtered by status (which are represented by color)
        """
        device_info = {'red': [], 'green': [], 'orange': [], 'gray': []}
        local_summary = {'devices': device_info}  # For future additions?
        state_map = {
            self.OSD_STATUSES.OK: 'green',
            self.OSD_STATUSES.WARNING: 'orange',
            self.OSD_STATUSES.ERROR: 'red',
            self.OSD_STATUSES.UNKNOWN: 'gray'
        }
        for slot_id, slot_data in self.stack.iteritems():
            for osd_id, osd_data in slot_data.get('osds', {}).iteritems():
                status = osd_data.get('status', self.OSD_STATUSES.UNKNOWN)
                osd_info = {
                    'claimed_by': osd_data.get('claimed_by'),
                    'osd_id': osd_data.get('osd_id')
                }
                if status in state_map:  # Can never be too sure
                    device_info[state_map[status]].append(osd_info)
        return local_summary

    def _ipmi_info(self):
        """
        Retrieve the IPMI information of the AlbaNode
        :return: Dict with ipmi information
        :rtype: dict
        """
        try:
            return Configuration.get(
                os.path.join(self.CONFIG_LOCATIONS[self.type],
                             'ipmi').format(self.node_id))
        except NotFoundException:  # Could be that the ASDManager does not yet have the IPMI info stored
            self._logger.warning('No IPMI config path found')
            return {'ip': None, 'username': None, 'password': None}
Exemple #18
0
class VDisk(DataObject):
    """
    The VDisk class represents a vDisk. A vDisk is a Virtual Disk served by Open vStorage.
    """
    STATUSES = DataObject.enumerator(
        'Status', ['HALTED', 'NON_RUNNING', 'RUNNING', 'UNKNOWN'])

    VDISK_NAME_REGEX = '^[0-9a-zA-Z][\-_a-zA-Z0-9]+[a-zA-Z0-9]$'

    _logger = Logger('hybrids')
    __properties = [
        Property('name', str, mandatory=False, doc='Name of the vDisk.'),
        Property('description',
                 str,
                 mandatory=False,
                 doc='Description of the vDisk.'),
        Property('size', int, doc='Size of the vDisk in Bytes.'),
        Property(
            'devicename',
            str,
            doc=
            'The name of the container file (e.g. the VMDK-file) describing the vDisk.'
        ),
        Property('volume_id',
                 str,
                 mandatory=False,
                 indexed=True,
                 doc='ID of the vDisk in the Open vStorage Volume Driver.'),
        Property(
            'parentsnapshot',
            str,
            mandatory=False,
            doc=
            'Points to a parent storage driver parent ID. None if there is no parent Snapshot'
        ),
        Property('cinder_id',
                 str,
                 mandatory=False,
                 doc='Cinder Volume ID, for volumes managed through Cinder'),
        Property(
            'has_manual_dtl',
            bool,
            default=False,
            doc=
            'Indicates whether the default DTL location has been overruled by customer'
        ),
        Property(
            'pagecache_ratio',
            float,
            default=1.0,
            doc='Ratio of the volume\'s metadata pages that needs to be cached'
        ),
        Property(
            'metadata',
            dict,
            default=dict(),
            doc='Contains fixed metadata about the volume (e.g. lba_size, ...)'
        ),
        Property(
            'cache_quota',
            dict,
            mandatory=False,
            doc=
            'Maximum caching space(s) this volume can consume (in Bytes) per cache type. If not None, the caching(s) for this volume has been set manually'
        )
    ]
    __relations = [
        Relation('vpool', VPool, 'vdisks'),
        Relation('parent_vdisk', None, 'child_vdisks', mandatory=False)
    ]
    __dynamics = [
        Dynamic('dtl_status', str, 60),
        Dynamic('snapshots', list, 30),
        Dynamic('snapshot_ids', list, 30),
        Dynamic('info', dict, 60),
        Dynamic('statistics', dict, 4),
        Dynamic('storagedriver_id', str, 60),
        Dynamic('storagerouter_guid', str, 15),
        Dynamic('is_vtemplate', bool, 60),
        Dynamic('edge_clients', list, 30)
    ]
    _fixed_properties = [
        'storagedriver_client', 'objectregistry_client', 'fsmetadata_client'
    ]

    def __init__(self, *args, **kwargs):
        """
        Initializes a vDisk, setting up its additional helpers
        """
        DataObject.__init__(self, *args, **kwargs)
        self._frozen = False
        self._storagedriver_client = None
        self._objectregistry_client = None
        self._fsmetadata_client = None
        self._frozen = True

    @property
    def storagedriver_client(self):
        """
        Client used for communication between StorageDriver and framework
        :return: StorageDriverClient
        """
        if self._storagedriver_client is None:
            self.reload_client('storagedriver')
        return self._storagedriver_client

    @property
    def objectregistry_client(self):
        """
        Client used for communication between StorageDriver OR and framework
        :return: ObjectRegistryClient
        """
        if self._objectregistry_client is None:
            self.reload_client('objectregistry')
        return self._objectregistry_client

    @property
    def fsmetadata_client(self):
        """
        Client used for communications between StorageDriver FS metadata and framework
        :return: FileSystemMetaDataClient
        """
        if self._fsmetadata_client is None:
            self.reload_client('filesystem_metadata')
        return self._fsmetadata_client

    def _dtl_status(self):
        """
        Retrieve the DTL status for a vDisk
        """
        sd_status = self._info().get('failover_mode', 'UNKNOWN').lower()
        if sd_status == '':
            sd_status = 'unknown'
        if sd_status not in [
                'ok_sync', 'ok_standalone'
        ]:  # ok_sync or ok_standalone according to voldrv, can still mean incorrect deployment
            return sd_status

        # Verify whether 'ok_standalone' or 'ok_sync' is the correct status for this vDisk
        vpool_dtl = self.vpool.configuration['dtl_enabled']
        if (self.has_manual_dtl is False and vpool_dtl is False) or (
                self.has_manual_dtl is True and vpool_dtl is True
                and len(self.domains_dtl_guids) == 0):
            return 'disabled'

        storagerouter_guid = self._storagerouter_guid()
        if storagerouter_guid is None:
            return 'checkup_required'

        this_sr = StorageRouter(storagerouter_guid)
        other_storagerouters = set([
            sd.storagerouter for sd in self.vpool.storagedrivers
            if sd.storagerouter_guid != storagerouter_guid
        ])

        # Retrieve all StorageRouters linked to the Recovery Domains (primary) and Regular Domains (secondary) for the StorageRouter hosting this vDisk
        primary = set()
        secondary = set()
        for junction in this_sr.domains:
            if junction.backup is True:
                primary.update(
                    set(
                        StorageRouterList.
                        get_primary_storagerouters_for_domain(
                            junction.domain)))
            else:
                secondary.update(
                    set(
                        StorageRouterList.
                        get_primary_storagerouters_for_domain(
                            junction.domain)))
        primary = primary.intersection(other_storagerouters)
        secondary = secondary.difference(primary)
        secondary = secondary.intersection(other_storagerouters)

        try:
            config = self.storagedriver_client.get_dtl_config(
                str(self.volume_id))
        except:
            return 'checkup_required'

        if self.has_manual_dtl is False:  # No DTL targets --> Check for Storage Routers linked to current vPool (priority for StorageRouters in recovery domain of current StorageRouter)
            possible_storagerouters = list(
                primary) if len(primary) > 0 else list(secondary) if len(
                    secondary) > 0 else list(other_storagerouters)
            if len(possible_storagerouters) > 0 and config is not None:
                if config.host not in [
                        sd.storage_ip for sr in possible_storagerouters
                        for sd in sr.storagedrivers
                        if sd.vpool_guid == self.vpool_guid
                ]:
                    return 'checkup_required'
        else:
            if len(self.domains_dtl) > 0:
                chosen_storagerouters = set()
                for junction in self.domains_dtl:
                    chosen_storagerouters.update(
                        set(
                            StorageRouterList.
                            get_primary_storagerouters_for_domain(
                                junction.domain)))
                possible_storagerouters = chosen_storagerouters.intersection(
                    other_storagerouters)
            else:
                possible_storagerouters = other_storagerouters

            if config is None:
                if len(possible_storagerouters) == 0:
                    if sd_status == 'ok_standalone':
                        return sd_status
                return 'checkup_required'
            else:
                if len(possible_storagerouters) > 0:
                    if config.host in [
                            sd.storage_ip for sr in possible_storagerouters
                            for sd in sr.storagedrivers
                            if sd.vpool_guid == self.vpool_guid
                    ]:
                        return sd_status
                    return 'checkup_required'
                return 'checkup_required'
        return sd_status

    def _snapshot_ids(self):
        """
        Fetches the snapshot IDs for this vDisk
        """
        if not self.volume_id or not self.vpool:
            return []

        from ovs.lib.vdisk import VDiskController
        try:
            return VDiskController.list_snapshot_ids(vdisk=self)
        except:
            return []

    def _snapshots(self):
        """
        Fetches the information of all snapshots for this vDisk
        """
        snapshots = []
        self.invalidate_dynamics('snapshot_ids')
        for snap_id in self.snapshot_ids:
            try:
                snapshot = self.storagedriver_client.info_snapshot(
                    str(self.volume_id), snap_id, req_timeout_secs=2)
            except SnapshotNotFoundException:
                continue
            if snapshot.metadata:
                metadata = pickle.loads(snapshot.metadata)
                if isinstance(metadata, dict):
                    snapshots.append({
                        'guid':
                        snap_id,
                        'timestamp':
                        metadata['timestamp'],
                        'label':
                        metadata['label'],
                        'is_consistent':
                        metadata['is_consistent'],
                        'is_automatic':
                        metadata.get('is_automatic', True),
                        'is_sticky':
                        metadata.get('is_sticky', False),
                        'in_backend':
                        snapshot.in_backend,
                        'stored':
                        int(snapshot.stored)
                    })
            else:
                snapshots.append({
                    'guid':
                    snap_id,
                    'timestamp':
                    time.mktime(
                        datetime.strptime(snapshot.timestamp.strip(),
                                          '%c').timetuple()),
                    'label':
                    snap_id,
                    'is_consistent':
                    False,
                    'is_automatic':
                    False,
                    'is_sticky':
                    False,
                    'in_backend':
                    snapshot.in_backend,
                    'stored':
                    int(snapshot.stored)
                })
        return snapshots

    def _info(self):
        """
        Fetches the info (see Volume Driver API) for the vDisk.
        """
        vdiskinfo = StorageDriverClient.EMPTY_INFO()
        vdisk_state = VDisk.STATUSES.RUNNING
        if self.volume_id and self.vpool:
            try:
                try:
                    vdiskinfo = self.storagedriver_client.info_volume(
                        str(self.volume_id), req_timeout_secs=2)
                except VolumeRestartInProgressException:
                    time.sleep(0.5)
                    vdiskinfo = self.storagedriver_client.info_volume(
                        str(self.volume_id), req_timeout_secs=2)
            except MaxRedirectsExceededException:
                vdisk_state = VDisk.STATUSES.NON_RUNNING
            # @todo replace RuntimeError with NodeNotReachableException
            except (ClusterNotReachableException, RuntimeError) as exception:
                if isinstance(exception, ClusterNotReachableException) or (
                        isinstance(exception, RuntimeError)
                        and 'failed to send XMLRPC request' in str(exception)):
                    self._logger.debug(
                        'VDisk {0} status has been set to UNKNOWN'.format(
                            self.name))
                    vdisk_state = VDisk.STATUSES.UNKNOWN
            except Exception as ex:
                self._logger.debug(
                    'Uncaught exception occurred when requesting the volume info for vDisk {0}: {1}'
                    .format(self.name, ex))

        vdiskinfodict = {}
        for key, value in vdiskinfo.__class__.__dict__.items():
            if type(value) is property:
                objectvalue = getattr(vdiskinfo, key)
                if key == 'object_type':
                    vdiskinfodict[key] = str(objectvalue)
                elif key == 'metadata_backend_config':
                    vdiskinfodict[key] = {}
                    if hasattr(objectvalue, 'node_configs') and callable(
                            objectvalue.node_configs):
                        vdiskinfodict[key] = []
                        for nodeconfig in objectvalue.node_configs():
                            vdiskinfodict[key].append({
                                'ip':
                                nodeconfig.address(),
                                'port':
                                nodeconfig.port()
                            })
                    elif key == 'halted' and objectvalue is True:
                        self._logger.debug(
                            'VDisk {0} status has been set to HALTED'.format(
                                self.name))
                        vdisk_state = VDisk.STATUSES.HALTED
                else:
                    vdiskinfodict[key] = objectvalue
        vdiskinfodict['live_status'] = vdisk_state
        return vdiskinfodict

    def _statistics(self, dynamic):
        """
        Fetches the Statistics for the vDisk.
        """
        statistics = {}
        for key, value in self.fetch_statistics().iteritems():
            statistics[key] = value
        statistics['timestamp'] = time.time()
        VDisk.calculate_delta(self._key, dynamic, statistics)
        return statistics

    def _storagedriver_id(self):
        """
        Returns the Volume Storage Driver ID to which the vDisk is connected.
        """
        vdisk_object = self.objectregistry_client.find(str(self.volume_id))
        if vdisk_object is not None:
            return vdisk_object.node_id()
        return None

    def _storagerouter_guid(self):
        """
        Loads the vDisks StorageRouter guid
        """
        storagedriver_id = self._storagedriver_id()
        if not storagedriver_id:
            return None
        from ovs.dal.hybrids.storagedriver import StorageDriver
        sds = DataList(
            StorageDriver, {
                'type':
                DataList.where_operator.AND,
                'items': [('storagedriver_id', DataList.operator.EQUALS,
                           storagedriver_id)]
            })
        if len(sds) == 1:
            return sds[0].storagerouter_guid
        return None

    def _is_vtemplate(self):
        """
        Returns whether the vdisk is a template
        """
        vdisk_object = self.objectregistry_client.find(str(self.volume_id))
        if vdisk_object is not None:
            return str(vdisk_object.object_type()) == 'TEMPLATE'
        return False

    def _edge_clients(self):
        """
        Retrieves all edge clients
        """
        clients = {}
        for storagedriver in self.vpool.storagedrivers:
            for client in storagedriver.edge_clients:
                if client['object_id'] == self.volume_id:
                    clients[client['key']] = client
        return clients.values()

    def fetch_statistics(self):
        """
        Loads statistics from this vDisk - returns unprocessed data
        """
        # Load data from volumedriver
        vdiskstats = StorageDriverClient.EMPTY_STATISTICS()
        if self.volume_id and self.vpool:
            try:
                vdiskstats = self.storagedriver_client.statistics_volume(
                    str(self.volume_id), req_timeout_secs=2)
            except Exception as ex:
                VDisk._logger.error(
                    'Error loading statistics_volume from {0}: {1}'.format(
                        self.volume_id, ex))
        # Load volumedriver data in dictionary
        return VDisk.extract_statistics(vdiskstats, self)

    @staticmethod
    def extract_statistics(stats, vdisk):
        """
        Extract the statistics useful for the framework from all statistics passed in by StorageDriver
        """
        statsdict = {}
        try:
            pc = stats.performance_counters
            for counter, info in {
                    'backend_read_request_size': {
                        'sum': 'backend_data_read',
                        'events': 'backend_read_operations',
                        'distribution': 'backend_read_operations_distribution'
                    },
                    'backend_read_request_usecs': {
                        'sum': 'backend_read_latency',
                        'distribution': 'backend_read_latency_distribution'
                    },
                    'backend_write_request_size': {
                        'sum': 'backend_data_written',
                        'events': 'backend_write_operations',
                        'distribution': 'backend_write_operations_distribution'
                    },
                    'backend_write_request_usecs': {
                        'sum': 'backend_write_latency',
                        'distribution': 'backend_write_latency_distribution'
                    },
                    'sync_request_usecs': {
                        'sum': 'sync_latency',
                        'distribution': 'sync_latency_distribution'
                    },
                    'read_request_size': {
                        'sum': 'data_read',
                        'events': 'read_operations',
                        'distribution': 'read_operations_distribution'
                    },
                    'read_request_usecs': {
                        'sum': 'read_latency',
                        'distribution': 'read_latency_distribution'
                    },
                    'write_request_size': {
                        'sum': 'data_written',
                        'events': 'write_operations',
                        'distribution': 'write_operations_distribution'
                    },
                    'write_request_usecs': {
                        'sum': 'write_latency',
                        'distribution': 'write_latency_distribution'
                    },
                    'unaligned_read_request_size': {
                        'sum': 'unaligned_data_read',
                        'events': 'unaligned_read_operations',
                        'distribution':
                        'unaligned_read_operations_distribution'
                    },
                    'unaligned_read_request_usecs': {
                        'sum': 'unaligned_read_latency',
                        'distribution': 'unaligned_read_latency_distribution'
                    },
                    'unaligned_write_request_size': {
                        'sum': 'unaligned_data_written',
                        'events': 'unaligned_write_operations',
                        'distribution':
                        'unaligned_write_operations_distribution'
                    },
                    'unaligned_write_request_usecs': {
                        'sum': 'unaligned_write_latency',
                        'distribution': 'unaligned_write_latency_distribution'
                    }
            }.iteritems():
                if hasattr(pc, counter):
                    counter_object = getattr(pc, counter)
                    for method, target in info.iteritems():
                        if hasattr(counter_object, method):
                            statsdict[target] = getattr(
                                counter_object, method)()

            for key in [
                    'cluster_cache_hits', 'cluster_cache_misses',
                    'metadata_store_hits', 'metadata_store_misses',
                    'sco_cache_hits', 'sco_cache_misses', 'stored',
                    'partial_read_fast', 'partial_read_slow'
            ]:
                if hasattr(stats, key):
                    statsdict[key] = getattr(stats, key)
            # Do some more manual calculations
            block_size = 0
            if vdisk is not None:
                block_size = vdisk.metadata.get('lba_size',
                                                0) * vdisk.metadata.get(
                                                    'cluster_multiplier', 0)
            if block_size == 0:
                block_size = 4096
            for key, source in {
                    '4k_read_operations': 'data_read',
                    '4k_write_operations': 'data_written',
                    '4k_unaligned_read_operations': 'unaligned_data_read',
                    '4k_unaligned_write_operations': 'unaligned_data_written'
            }.iteritems():
                statsdict[key] = statsdict.get(source, 0) / block_size
            # Pre-calculate sums
            for key, items in StorageDriverClient.STAT_SUMS.iteritems():
                statsdict[key] = 0
                for item in items:
                    statsdict[key] += statsdict[item]
        except:
            pass
        return statsdict

    @staticmethod
    def calculate_delta(key, dynamic, current_stats):
        """
        Calculate statistics deltas
        :param key: Key to retrieve from volatile factory
        :param dynamic:
        :param current_stats: Current statistics to compare with
        :return: None
        """
        volatile = VolatileFactory.get_client()
        prev_key = '{0}_{1}'.format(key, 'statistics_previous')
        previous_stats = volatile.get(prev_key, default={})
        for key in current_stats.keys():
            if key == 'timestamp' or '_latency' in key or '_distribution' in key:
                continue
            delta = current_stats['timestamp'] - previous_stats.get(
                'timestamp', current_stats['timestamp'])
            if delta == 0:
                current_stats['{0}_ps'.format(key)] = previous_stats.get(
                    '{0}_ps'.format(key), 0)
            elif delta > 0 and key in previous_stats:
                current_stats['{0}_ps'.format(key)] = max(
                    0, (current_stats[key] - previous_stats[key]) / delta)
            else:
                current_stats['{0}_ps'.format(key)] = 0
        volatile.set(prev_key, current_stats, dynamic.timeout * 10)

    def reload_client(self, client):
        """
        Reloads the StorageDriverClient or ObjectRegistryClient
        """
        if self.vpool_guid:
            self._frozen = False
            if client == 'storagedriver':
                self._storagedriver_client = StorageDriverClient.load(
                    self.vpool)
            elif client == 'objectregistry':
                self._objectregistry_client = ObjectRegistryClient.load(
                    self.vpool)
            elif client == 'filesystem_metadata':
                self._fsmetadata_client = FSMetaDataClient.load(self.vpool)
            self._frozen = True
Exemple #19
0
class VPool(DataObject):
    """
    The VPool class represents a vPool. A vPool is a Virtual Storage Pool, a Filesystem, used to
    deploy vDisks. a vPool can span multiple Storage Drivers and connects to a single Storage BackendType.
    """
    _logger = Logger('hybrids')

    STATUSES = DataObject.enumerator('Status', [
        'DELETING', 'EXTENDING', 'FAILURE', 'INSTALLING', 'RUNNING',
        'SHRINKING'
    ])
    CACHES = DataObject.enumerator('Cache', {
        'BLOCK': 'block',
        'FRAGMENT': 'fragment'
    })

    __properties = [
        Property('name',
                 str,
                 unique=True,
                 indexed=True,
                 doc='Name of the vPool'),
        Property('description',
                 str,
                 mandatory=False,
                 doc='Description of the vPool'),
        Property('login',
                 str,
                 mandatory=False,
                 doc='Login/Username for the Storage BackendType.'),
        Property('password',
                 str,
                 mandatory=False,
                 doc='Password for the Storage BackendType.'),
        Property(
            'connection',
            str,
            mandatory=False,
            doc=
            'Connection (IP, URL, Domain name, Zone, ...) for the Storage BackendType.'
        ),
        Property(
            'metadata',
            dict,
            mandatory=False,
            doc='Metadata for the backends, as used by the Storage Drivers.'),
        Property(
            'rdma_enabled',
            bool,
            default=False,
            doc=
            'Has the vpool been configured to use RDMA for DTL transport, which is only possible if all storagerouters are RDMA capable'
        ),
        Property('status', STATUSES.keys(), doc='Status of the vPool'),
        Property(
            'metadata_store_bits',
            int,
            mandatory=False,
            doc=
            'StorageDrivers deployed for this vPool will make use of this amount of metadata store bits'
        )
    ]
    __relations = []
    __dynamics = [
        Dynamic('configuration', dict, 3600),
        Dynamic('statistics', dict, 4),
        Dynamic('identifier', str, 120),
        Dynamic('extensible', tuple, 60),
        Dynamic('volume_potentials', dict, 60)
    ]
    _fixed_properties = [
        'storagedriver_client', 'objectregistry_client',
        'clusterregistry_client'
    ]

    def __init__(self, *args, **kwargs):
        """
        Initializes a vPool, setting up its additional helpers
        """
        DataObject.__init__(self, *args, **kwargs)
        self._frozen = False
        self._storagedriver_client = None
        self._objectregistry_client = None
        self._clusterregistry_client = None
        self._frozen = True

    @property
    def storagedriver_client(self):
        """
        Client used for communication between Storage Driver and framework
        :return: StorageDriverClient
        """
        if self._storagedriver_client is None:
            self.reload_client('storagedriver')
        return self._storagedriver_client

    @property
    def objectregistry_client(self):
        """
        Client used for communication between Storage Driver OR and framework
        :return: ObjectRegistryClient
        """
        if self._objectregistry_client is None:
            self.reload_client('objectregistry')
        return self._objectregistry_client

    @property
    def clusterregistry_client(self):
        """
        Client used for making changes to the StorageDriver's Cluster Registry
        :return: ClusterRegistry client
        """
        if self._clusterregistry_client is None:
            self.reload_client('clusterregistry')
        return self._clusterregistry_client

    def reload_client(self, client):
        """
        Reloads the StorageDriverClient, ObjectRegistryClient or ClusterRegistry client
        """
        self._frozen = False
        if client == 'storagedriver':
            self._storagedriver_client = StorageDriverClient.load(self)
        elif client == 'objectregistry':
            self._objectregistry_client = ObjectRegistryClient.load(self)
        elif client == 'clusterregistry':
            self._clusterregistry_client = ClusterRegistryClient.load(self)
        self._frozen = True

    def _configuration(self):
        """
        VPool configuration
        """
        if not self.storagedrivers or not self.storagedrivers[0].storagerouter:
            return {}

        storagedriver_config = StorageDriverConfiguration(
            self.guid, self.storagedrivers[0].storagedriver_id)
        for expected_key in [
                'distributed_transaction_log', 'filesystem', 'volume_router',
                'volume_manager'
        ]:
            if expected_key not in storagedriver_config.configuration:
                return {}

        dtl = storagedriver_config.configuration['distributed_transaction_log']
        file_system = storagedriver_config.configuration['filesystem']
        volume_router = storagedriver_config.configuration['volume_router']
        volume_manager = storagedriver_config.configuration['volume_manager']

        dtl_host = file_system['fs_dtl_host']
        dtl_mode = file_system.get('fs_dtl_mode',
                                   StorageDriverClient.VOLDRV_DTL_ASYNC)
        cluster_size = volume_manager['default_cluster_size'] / 1024
        dtl_transport = dtl['dtl_transport']
        sco_multiplier = volume_router['vrouter_sco_multiplier']
        dtl_config_mode = file_system['fs_dtl_config_mode']
        tlog_multiplier = volume_manager['number_of_scos_in_tlog']
        non_disposable_sco_factor = volume_manager[
            'non_disposable_scos_factor']

        sco_size = sco_multiplier * cluster_size / 1024  # SCO size is in MiB ==> SCO multiplier * cluster size (4 KiB by default)
        write_buffer = tlog_multiplier * sco_size * non_disposable_sco_factor
        dtl_enabled = not (dtl_config_mode
                           == StorageDriverClient.VOLDRV_DTL_MANUAL_MODE
                           and dtl_host == '')

        try:
            mds_config = Configuration.get('/ovs/vpools/{0}/mds_config'.format(
                self.guid))
        except NotFoundException:
            mds_config = {}

        return {
            'sco_size':
            sco_size,
            'dtl_mode':
            StorageDriverClient.REVERSE_DTL_MODE_MAP[dtl_mode]
            if dtl_enabled is True else 'no_sync',
            'mds_config':
            mds_config,
            'dtl_enabled':
            dtl_enabled,
            'cluster_size':
            cluster_size,
            'write_buffer':
            write_buffer,
            'dtl_transport':
            StorageDriverClient.REVERSE_DTL_TRANSPORT_MAP[dtl_transport],
            'dtl_config_mode':
            dtl_config_mode,
            'tlog_multiplier':
            tlog_multiplier
        }

    def _statistics(self, dynamic):
        """
        Aggregates the Statistics (IOPS, Bandwidth, ...) of each vDisk served by the vPool.
        """
        from ovs.dal.hybrids.vdisk import VDisk
        statistics = {}
        for storagedriver in self.storagedrivers:
            for key, value in storagedriver.fetch_statistics().iteritems():
                if isinstance(value, dict):
                    if key not in statistics:
                        statistics[key] = {}
                        for subkey, subvalue in value.iteritems():
                            if subkey not in statistics[key]:
                                statistics[key][subkey] = 0
                            statistics[key][subkey] += subvalue
                else:
                    if key not in statistics:
                        statistics[key] = 0
                    statistics[key] += value
        statistics['timestamp'] = time.time()
        VDisk.calculate_delta(self._key, dynamic, statistics)
        return statistics

    def _identifier(self):
        """
        An identifier of this vPool in its current configuration state
        """
        return '{0}_{1}'.format(self.guid, '_'.join(self.storagedrivers_guids))

    def _extensible(self):
        """
        Verifies whether this vPool can be extended or not
        """
        reasons = []
        if self.status != VPool.STATUSES.RUNNING:
            reasons.append('non_running')
        if self.metadata_store_bits is None:
            reasons.append('voldrv_missing_info')
        return len(reasons) == 0, reasons

    def _volume_potentials(self):
        # type: () -> Dict[str, int]
        """
        Get an overview of all volume potentials for every Storagedriver in this vpool
        A possible -1 can be returned for the volume potential which indicates that the potential could not be retrieved
        :return: The overview with the volume potential
        :rtype: dict
        """
        volume_potentials = {}
        for storagedriver in self.storagedrivers:
            volume_potential = -1
            try:
                std_config = StorageDriverConfiguration(
                    storagedriver.vpool_guid, storagedriver.storagedriver_id)
                client = LocalStorageRouterClient(std_config.remote_path)
                volume_potential = client.volume_potential(
                    str(storagedriver.storagedriver_id))
            except Exception:
                self._logger.exception(
                    'Unable to retrieve configuration for storagedriver {0}'.
                    format(storagedriver.storagedriver_id))
            volume_potentials[
                storagedriver.storagerouter.guid] = volume_potential
        return volume_potentials
Exemple #20
0
class StorageDriver(DataObject):
    """
    The StorageDriver class represents a Storage Driver. A Storage Driver is an application
    on a Storage Router to which the vDisks connect. The Storage Driver is the gateway to the Storage Backend.
    """
    DISTANCES = DataObject.enumerator('Distance', {
        'NEAR': 0,
        'FAR': 10000,
        'INFINITE': 20000
    })

    _logger = LogHandler.get('dal', name='hybrid')

    __properties = [
        Property('name', str, doc='Name of the Storage Driver.'),
        Property('description',
                 str,
                 mandatory=False,
                 doc='Description of the Storage Driver.'),
        Property(
            'ports',
            dict,
            doc=
            'Ports on which the Storage Driver is listening (management, xmlrpc, dtl, edge).'
        ),
        Property('cluster_ip',
                 str,
                 doc='IP address on which the Storage Driver is listening.'),
        Property('storage_ip',
                 str,
                 doc='IP address on which the vpool is shared to hypervisor'),
        Property(
            'storagedriver_id',
            str,
            unique=True,
            indexed=True,
            doc='ID of the Storage Driver as known by the Storage Drivers.'),
        Property('mountpoint',
                 str,
                 doc='Mountpoint from which the Storage Driver serves data'),
        Property('startup_counter',
                 int,
                 default=0,
                 doc='StorageDriver startup counter')
    ]
    __relations = [
        Relation('vpool', VPool, 'storagedrivers'),
        Relation('storagerouter', StorageRouter, 'storagedrivers')
    ]
    __dynamics = [
        Dynamic('status', str, 30),
        Dynamic('statistics', dict, 4),
        Dynamic('edge_clients', list, 30),
        Dynamic('vdisks_guids', list, 15),
        Dynamic('vpool_backend_info', dict, 60),
        Dynamic('cluster_node_config', dict, 3600)
    ]

    def _status(self):
        """
        Fetches the Status of the Storage Driver.
        """
        _ = self
        return None

    def _statistics(self, dynamic):
        """
        Aggregates the Statistics (IOPS, Bandwidth, ...) of the vDisks connected to the Storage Driver.
        """
        from ovs.dal.hybrids.vdisk import VDisk
        statistics = {}
        for key, value in self.fetch_statistics().iteritems():
            statistics[key] = value
        statistics['timestamp'] = time.time()
        VDisk.calculate_delta(self._key, dynamic, statistics)
        return statistics

    def _edge_clients(self):
        """
        Retrieves all edge clients
        """
        clients = []
        try:
            for item in self.vpool.storagedriver_client.list_client_connections(
                    str(self.storagedriver_id), req_timeout_secs=2):
                clients.append({
                    'key': '{0}:{1}'.format(item.ip, item.port),
                    'object_id': item.object_id,
                    'ip': item.ip,
                    'port': item.port,
                    'server_ip': self.storage_ip,
                    'server_port': self.ports['edge']
                })
        except Exception:
            StorageDriver._logger.exception(
                'Error loading edge clients from {0}'.format(
                    self.storagedriver_id))
        clients.sort(key=lambda e: (e['ip'], e['port']))
        return clients

    def _vdisks_guids(self):
        """
        Gets the vDisk guids served by this StorageDriver.
        """
        from ovs.dal.lists.vdisklist import VDiskList
        volume_ids = []
        for entry in self.vpool.objectregistry_client.get_all_registrations():
            if entry.node_id() == self.storagedriver_id:
                volume_ids.append(entry.object_id())
        return VDiskList.get_in_volume_ids(volume_ids).guids

    def fetch_statistics(self):
        """
        Loads statistics from this vDisk - returns unprocessed data
        """
        # Load data from volumedriver
        sdstats = StorageDriverClient.EMPTY_STATISTICS()
        if self.storagedriver_id and self.vpool:
            try:
                sdstats = self.vpool.storagedriver_client.statistics_node(
                    str(self.storagedriver_id), req_timeout_secs=2)
            except Exception as ex:
                StorageDriver._logger.error(
                    'Error loading statistics_node from {0}: {1}'.format(
                        self.storagedriver_id, ex))
        # Load volumedriver data in dictionary
        return VDisk.extract_statistics(
            sdstats,
            None if len(self.vpool.vdisks) == 0 else self.vpool.vdisks[0])

    def _vpool_backend_info(self):
        """
        Retrieve some additional information about the vPool to be shown in the GUI
        Size of the global write buffer for this Storage Driver, the accelerated backend info, connection info and caching strategy
        :return: Information about vPool and accelerated Backend
        :rtype: dict
        """
        from ovs.dal.hybrids.diskpartition import DiskPartition
        from ovs.dal.hybrids.j_storagedriverpartition import StorageDriverPartition

        global_write_buffer = 0
        for partition in self.partitions:
            if partition.role == DiskPartition.ROLES.WRITE and partition.sub_role == StorageDriverPartition.SUBROLE.SCO:
                global_write_buffer += partition.size

        cache_read = None
        cache_write = None
        cache_quota_fc = None
        cache_quota_bc = None
        backend_info = None
        connection_info = None
        block_cache_read = None
        block_cache_write = None
        block_cache_backend_info = None
        block_cache_connection_info = None
        metadata_key = 'backend_aa_{0}'.format(self.storagerouter_guid)
        if metadata_key in self.vpool.metadata:
            metadata = self.vpool.metadata[metadata_key]
            backend_info = metadata['backend_info']
            connection_info = metadata['connection_info']
        metadata_key = 'backend_bc_{0}'.format(self.storagerouter_guid)
        if metadata_key in self.vpool.metadata:
            metadata = self.vpool.metadata[metadata_key]
            block_cache_backend_info = metadata['backend_info']
            block_cache_connection_info = metadata['connection_info']

        if self.storagerouter_guid in self.vpool.metadata['backend'][
                'caching_info']:
            caching_info = self.vpool.metadata['backend']['caching_info'][
                self.storagerouter_guid]
            cache_read = caching_info['fragment_cache_on_read']
            cache_write = caching_info['fragment_cache_on_write']
            cache_quota_fc = caching_info.get('quota_fc')
            cache_quota_bc = caching_info.get('quota_bc')
            block_cache_read = caching_info.get('block_cache_on_read')
            block_cache_write = caching_info.get('block_cache_on_write')

        return {
            'cache_read': cache_read,
            'cache_write': cache_write,
            'cache_quota_fc': cache_quota_fc,
            'cache_quota_bc': cache_quota_bc,
            'backend_info': backend_info,
            'connection_info': connection_info,
            'block_cache_read': block_cache_read,
            'block_cache_write': block_cache_write,
            'block_cache_backend_info': block_cache_backend_info,
            'block_cache_connection_info': block_cache_connection_info,
            'global_write_buffer': global_write_buffer
        }

    def _cluster_node_config(self):
        """
        Prepares a ClusterNodeConfig dict for the StorageDriver process
        """
        from ovs.extensions.generic.configuration import Configuration
        rdma = Configuration.get('/ovs/framework/rdma')
        distance_map = {}
        primary_domains = []
        secondary_domains = []
        for junction in self.storagerouter.domains:
            if junction.backup is False:
                primary_domains.append(junction.domain_guid)
            else:
                secondary_domains.append(junction.domain_guid)
        for sd in self.vpool.storagedrivers:
            if sd.guid == self.guid:
                continue
            if len(primary_domains) == 0:
                distance_map[str(
                    sd.storagedriver_id)] = StorageDriver.DISTANCES.NEAR
            else:
                distance = StorageDriver.DISTANCES.INFINITE
                for junction in sd.storagerouter.domains:
                    if junction.backup is False:
                        if junction.domain_guid in primary_domains:
                            distance = min(distance,
                                           StorageDriver.DISTANCES.NEAR)
                            break  # We can break here since we reached the minimum distance
                        elif junction.domain_guid in secondary_domains:
                            distance = min(distance,
                                           StorageDriver.DISTANCES.FAR)
                distance_map[str(sd.storagedriver_id)] = distance
        return {
            'vrouter_id':
            self.storagedriver_id,
            'host':
            self.storage_ip,
            'message_port':
            self.ports['management'],
            'xmlrpc_host':
            self.cluster_ip,
            'xmlrpc_port':
            self.ports['xmlrpc'],
            'failovercache_host':
            self.storage_ip,
            'failovercache_port':
            self.ports['dtl'],
            'network_server_uri':
            '{0}://{1}:{2}'.format('rdma' if rdma else 'tcp', self.storage_ip,
                                   self.ports['edge']),
            'node_distance_map':
            distance_map
        }
class AlbaBackend(DataObject):
    """
    The AlbaBackend provides ALBA specific information
    """
    SCALINGS = DataObject.enumerator('Scaling', ['GLOBAL', 'LOCAL'])
    STATUSES = DataObject.enumerator(
        'Status', {
            'UNKNOWN': 'unknown',
            'FAILURE': 'failure',
            'WARNING': 'warning',
            'RUNNING': 'running'
        })  # lower-case values for backwards compatibility

    _logger = Logger('hybrids')
    __properties = [
        Property('alba_id',
                 str,
                 mandatory=False,
                 indexed=True,
                 doc='ALBA internal identifier'),
        Property('scaling',
                 SCALINGS.keys(),
                 doc='Scaling for an ALBA Backend can be {0}'.format(
                     ' or '.join(SCALINGS.keys())))
    ]
    __relations = [
        Relation('backend',
                 Backend,
                 'alba_backend',
                 onetoone=True,
                 doc='Linked generic Backend')
    ]
    __dynamics = [
        Dynamic('local_stack', dict, 15, locked=True),
        Dynamic('statistics', dict, 5, locked=True),
        Dynamic('ns_data', list, 60, locked=True),
        Dynamic('usages', dict, 60, locked=True),
        Dynamic('presets', list, 60, locked=True),
        Dynamic('available', bool, 60),
        Dynamic('name', str, 3600),
        Dynamic('osd_statistics', dict, 5, locked=True),
        Dynamic('linked_backend_guids', set, 30, locked=True),
        Dynamic('remote_stack', dict, 60, locked=True),
        Dynamic('local_summary', dict, 15, locked=True),
        Dynamic('live_status', str, 30, locked=True)
    ]

    def _local_stack(self):
        """
        Returns a live list of all disks known to this AlbaBackend
        """
        if self.abm_cluster is None:
            return {}  # No ABM cluster yet, so backend not fully installed yet

        # Load information from node
        osd_statistics = self.osd_statistics

        def _load_live_info(_node, _storage_map):
            node_id = _node.node_id
            _storage_map[node_id] = {}
            for slot_id, _slot_data in _node.stack.iteritems():
                # Pre-fill some info
                _storage_map[node_id][slot_id] = {
                    'osds': {},
                    'name': slot_id,
                    'status': 'error',
                    'status_detail': 'unknown'
                }
                # Extend the OSD info with the usage information
                for osd_id, osd_data in _slot_data.get('osds', {}).iteritems():
                    if osd_id in osd_statistics:
                        stats = osd_statistics[osd_id]
                        osd_data['usage'] = {
                            'size':
                            int(stats['capacity']),
                            'used':
                            int(stats['disk_usage']),
                            'available':
                            int(stats['capacity'] - stats['disk_usage'])
                        }
                _storage_map[node_id][slot_id].update(_slot_data)

        threads = []
        storage_map = {}
        for node in AlbaNodeList.get_albanodes():
            thread = Thread(target=_load_live_info, args=(node, storage_map))
            thread.start()
            threads.append(thread)
        for thread in threads:
            thread.join()

        return storage_map

    def _statistics(self):
        """
        Returns statistics for all its asds
        """
        data_keys = [
            'apply', 'multi_get', 'range', 'range_entries', 'statistics'
        ]
        statistics = {}
        for key in data_keys:
            statistics[key] = {
                'n': 0,
                'n_ps': 0,
                'avg': [],
                'max': [],
                'min': []
            }
        for asd in self.osds:
            asd_stats = asd.statistics
            if not asd_stats:
                continue
            for key in data_keys:
                statistics[key]['n'] += asd_stats[key]['n']
                statistics[key]['n_ps'] += asd_stats[key]['n_ps']
                statistics[key]['avg'].append(asd_stats[key]['avg'])
                statistics[key]['max'].append(asd_stats[key]['max'])
                statistics[key]['min'].append(asd_stats[key]['min'])
        for key in data_keys:
            statistics[key]['max'] = max(statistics[key]['max']) if len(
                statistics[key]['max']) > 0 else 0
            statistics[key]['min'] = min(statistics[key]['min']) if len(
                statistics[key]['min']) > 0 else 0
            if len(statistics[key]['avg']) > 0:
                statistics[key]['avg'] = sum(statistics[key]['avg']) / len(
                    statistics[key]['avg'])
            else:
                statistics[key]['avg'] = 0
        statistics['creation'] = time.time()
        return statistics

    def _ns_data(self):
        """
        Loads namespace data
        """
        if self.abm_cluster is None:
            return []  # No ABM cluster yet, so backend not fully installed yet

        config = Configuration.get_configuration_path(
            self.abm_cluster.config_location)
        return AlbaCLI.run(command='show-namespaces',
                           config=config,
                           named_params={'max': -1})[1]

    def _usages(self):
        """
        Returns an overview of free space, total space and used space
        """
        # Collect total usage
        usages = {'free': 0.0, 'size': 0.0, 'used': 0.0}

        if self.abm_cluster is None:
            return usages

        config = Configuration.get_configuration_path(
            self.abm_cluster.config_location)
        try:
            osds_stats = AlbaCLI.run(command='list-osds', config=config)
        except AlbaError:
            self._logger.exception('Unable to fetch OSD information')
            return usages

        for osd_stats in osds_stats:
            usages['size'] += osd_stats['total']
            usages['used'] += osd_stats['used']
        usages['free'] = usages['size'] - usages['used']

        return usages

    def _presets(self):
        """
        Returns the policies active on the node
        """
        if self.abm_cluster is None:
            return []  # No ABM cluster yet, so backend not fully installed yet

        osds = {}
        if self.scaling != AlbaBackend.SCALINGS.GLOBAL:
            for node_id, slots in self.local_stack.iteritems():
                osds[node_id] = 0
                for slot_id, slot_data in slots.iteritems():
                    for osd_id, osd_data in slot_data['osds'].iteritems():
                        if osd_data['status'] in [
                                AlbaNode.OSD_STATUSES.OK,
                                AlbaNode.OSD_STATUSES.WARNING
                        ] and osd_data.get('claimed_by') == self.guid:
                            osds[node_id] += 1
        config = Configuration.get_configuration_path(
            self.abm_cluster.config_location)
        presets = AlbaCLI.run(command='list-presets', config=config)
        preset_dict = {}
        for preset in presets:
            preset_dict[preset['name']] = preset
            if 'in_use' not in preset:
                preset['in_use'] = True
            if 'is_default' not in preset:
                preset['is_default'] = False
            preset['is_available'] = False
            preset['policies'] = [
                tuple(policy) for policy in preset['policies']
            ]
            preset['policy_metadata'] = {}
            active_policy = None
            for policy in preset['policies']:
                is_available = False
                available_disks = 0
                if self.scaling == AlbaBackend.SCALINGS.GLOBAL:
                    available_disks += sum(
                        self.local_summary['devices'].values())
                if self.scaling == AlbaBackend.SCALINGS.LOCAL:
                    available_disks += sum(
                        min(osds[node], policy[3]) for node in osds)
                if available_disks >= policy[2]:
                    if active_policy is None:
                        active_policy = policy
                    is_available = True
                preset['policy_metadata'][policy] = {
                    'is_active': False,
                    'in_use': False,
                    'is_available': is_available
                }
                preset['is_available'] |= is_available
            if active_policy is not None:
                preset['policy_metadata'][active_policy]['is_active'] = True
        for namespace in self.ns_data:
            if namespace['namespace']['state'] != 'active':
                continue
            policy_usage = namespace['statistics']['bucket_count']
            preset = preset_dict[namespace['namespace']['preset_name']]
            for usage in policy_usage:
                used_policy = tuple(
                    usage[0])  # Policy as reported to be "in use"
                for configured_policy in preset[
                        'policies']:  # All configured policies
                    if used_policy[0] == configured_policy[0] and used_policy[
                            1] == configured_policy[
                                1] and used_policy[3] <= configured_policy[3]:
                        preset['policy_metadata'][configured_policy][
                            'in_use'] = True
                        break
        for preset in presets:
            preset['policies'] = [str(policy) for policy in preset['policies']]
            for key in preset['policy_metadata'].keys():
                preset['policy_metadata'][str(
                    key)] = preset['policy_metadata'][key]
                del preset['policy_metadata'][key]
        return presets

    def _available(self):
        """
        Returns True if the Backend can be used
        """
        return self.backend.status == 'RUNNING'

    def _name(self):
        """
        Returns the Backend's name
        """
        return self.backend.name

    def _osd_statistics(self):
        """
        Loads statistics from all it's asds in one call
        """
        from ovs.dal.hybrids.albaosd import AlbaOSD

        statistics = {}
        if self.abm_cluster is None:
            return statistics  # No ABM cluster yet, so backend not fully installed yet

        osd_ids = [
            osd.osd_id for osd in self.osds
            if osd.osd_type in [AlbaOSD.OSD_TYPES.ASD, AlbaOSD.OSD_TYPES.AD]
        ]
        if len(osd_ids) == 0:
            return statistics
        try:
            config = Configuration.get_configuration_path(
                self.abm_cluster.config_location)
            # TODO: This will need to be changed to osd-multistatistics, see openvstorage/alba#749
            raw_statistics = AlbaCLI.run(
                command='asd-multistatistics',
                config=config,
                named_params={'long-id': ','.join(osd_ids)})
        except RuntimeError:
            return statistics
        if raw_statistics:
            for osd_id, stats in raw_statistics.iteritems():
                if stats['success'] is True:
                    statistics[osd_id] = stats['result']
        return statistics

    def _linked_backend_guids(self):
        """
        Returns a list (recursively) of all ALBA backends linked to this ALBA Backend based on the linked AlbaOSDs
        :return: List of ALBA Backend guids
        :rtype: set
        """
        # Import here to prevent from circular references
        from ovs.dal.hybrids.albaosd import AlbaOSD

        def _load_backend_info(_connection_info, _alba_backend_guid,
                               _exceptions):
            # '_exceptions' must be an immutable object to be usable outside the Thread functionality
            client = OVSClient.get_instance(
                connection_info=_connection_info,
                cache_store=VolatileFactory.get_client())
            try:
                new_guids = client.get(
                    '/alba/backends/{0}/'.format(_alba_backend_guid),
                    params={'contents':
                            'linked_backend_guids'})['linked_backend_guids']
                with lock:
                    guids.update(new_guids)
            except HttpNotFoundException:
                pass  # ALBA Backend has been deleted, we don't care we can't find the linked guids
            except HttpForbiddenException as fe:
                AlbaBackend._logger.exception(
                    'Collecting remote ALBA Backend information failed due to permission issues. {0}'
                    .format(fe))
                _exceptions.append('not_allowed')
            except Exception as ex:
                AlbaBackend._logger.exception(
                    'Collecting remote ALBA Backend information failed with error: {0}'
                    .format(ex))
                _exceptions.append('unknown')

        lock = Lock()
        guids = {self.guid}
        threads = []
        exceptions = []
        for osd in self.osds:
            if osd.osd_type == AlbaOSD.OSD_TYPES.ALBA_BACKEND and osd.metadata is not None:
                connection_info = osd.metadata['backend_connection_info']
                alba_backend_guid = osd.metadata['backend_info']['linked_guid']
                thread = Thread(target=_load_backend_info,
                                args=(connection_info, alba_backend_guid,
                                      exceptions))
                thread.start()
                threads.append(thread)
        for thread in threads:
            thread.join()

        if len(exceptions) > 0:
            return None  # This causes the 'Link Backend' button in the GUI to become disabled
        return guids

    def _remote_stack(self):
        """
        Live list of information about remote linked OSDs of type ALBA Backend
        :return: Information about all linked OSDs
        :rtype: dict
        """
        # Import here to prevent from circular references
        from ovs.dal.hybrids.albaosd import AlbaOSD

        def _load_backend_info(_connection_info, _alba_backend_guid):
            client = OVSClient.get_instance(
                connection_info=_connection_info,
                cache_store=VolatileFactory.get_client())
            return_value[_alba_backend_guid][
                'live_status'] = AlbaBackend.STATUSES.UNKNOWN
            try:
                info = client.get(
                    '/alba/backends/{0}/'.format(_alba_backend_guid),
                    params={'contents': 'local_summary,live_status'})
                with lock:
                    return_value[_alba_backend_guid].update(
                        info['local_summary'])
                    return_value[_alba_backend_guid]['live_status'] = info[
                        'live_status']
            except HttpNotFoundException as ex:
                return_value[_alba_backend_guid]['error'] = 'backend_deleted'
                self._logger.warning(
                    'AlbaBackend {0} STATUS set as FAILURE due to HttpNotFoundException: {1}'
                    .format(self.name, ex))
                return_value[_alba_backend_guid][
                    'live_status'] = AlbaBackend.STATUSES.FAILURE
            except HttpForbiddenException:
                return_value[_alba_backend_guid]['error'] = 'not_allowed'
            except Exception as ex:
                return_value[_alba_backend_guid]['error'] = 'unknown'
                AlbaBackend._logger.exception(
                    'Collecting remote ALBA Backend information failed with error: {0}'
                    .format(ex))

        # Retrieve local summaries of all related OSDs of type ALBA_BACKEND
        lock = Lock()
        threads = []
        return_value = {}
        cluster_ips = [sr.ip for sr in StorageRouterList.get_storagerouters()]
        for osd in self.osds:
            if osd.osd_type == AlbaOSD.OSD_TYPES.ALBA_BACKEND and osd.metadata is not None:
                backend_info = osd.metadata['backend_info']
                connection_info = osd.metadata['backend_connection_info']
                connection_host = connection_info['host']
                alba_backend_guid = backend_info['linked_guid']
                return_value[alba_backend_guid] = {
                    'name': backend_info['linked_name'],
                    'error': '',
                    'domain': None if osd.domain is None else {
                        'guid': osd.domain_guid,
                        'name': osd.domain.name
                    },
                    'preset': backend_info['linked_preset'],
                    'osd_id': backend_info['linked_alba_id'],
                    'local_ip': connection_host in cluster_ips,
                    'remote_host': connection_host
                }
                thread = Thread(target=_load_backend_info,
                                args=(connection_info, alba_backend_guid))
                thread.start()
                threads.append(thread)

        for thread in threads:
            thread.join()
        return return_value

    def _local_summary(self):
        """
        A local summary for an ALBA Backend containing information used to show in the GLOBAL ALBA Backend detail page
        :return: Information about used size, devices, name, scaling
        :rtype: dict
        """
        usage_info = {'size': 0, 'used': 0}
        device_info = {'red': 0, 'green': 0, 'orange': 0, 'gray': 0}
        return_value = {
            'name':
            self.name,
            'sizes':
            usage_info,
            'devices':
            device_info,
            'scaling':
            self.scaling,
            'domain_info':
            dict((backend_domain.domain_guid, backend_domain.domain.name)
                 for backend_domain in self.backend.domains),
            'backend_guid':
            self.backend.guid
        }

        # Calculate device information
        if self.scaling != AlbaBackend.SCALINGS.GLOBAL:
            for node_values in self.local_stack.itervalues():
                for slot_values in node_values.itervalues():
                    for osd_info in slot_values.get('osds', {}).itervalues():
                        if self.guid == osd_info.get('claimed_by'):
                            status = osd_info.get('status', 'unknown')
                            if status == AlbaNode.OSD_STATUSES.OK:
                                device_info['green'] += 1
                            elif status == AlbaNode.OSD_STATUSES.WARNING:
                                device_info['orange'] += 1
                            elif status == AlbaNode.OSD_STATUSES.ERROR:
                                device_info['red'] += 1
                            elif status == AlbaNode.OSD_STATUSES.UNKNOWN:
                                device_info['gray'] += 1

            # Calculate used and total size
            usage_info = self.usages

        if self.scaling != AlbaBackend.SCALINGS.LOCAL:
            for backend_values in self.remote_stack.itervalues():
                for key, value in backend_values.get('sizes', {}).iteritems():
                    usage_info[key] += value

                devices = backend_values.get('devices')
                if devices is None:
                    continue

                if devices['red'] > 0:
                    device_info['red'] += 1
                elif devices['orange'] > 0:
                    device_info['orange'] += 1
                elif devices['green'] > 0:
                    device_info['green'] += 1
                elif devices.get('gray', 0) > 0:
                    device_info['gray'] += 1

        return return_value

    def _live_status(self):
        """
        Retrieve the live status of the ALBA Backend to be displayed in the 'Backends' page in the GUI based on:
            - Maintenance agents presence
            - Maintenance agents status
            - Disk statuses
        :return: Status as reported by the plugin
        :rtype: str
        """
        if self.backend.status == Backend.STATUSES.INSTALLING:
            return 'installing'

        if self.backend.status == Backend.STATUSES.DELETING:
            return 'deleting'

        # Verify failed disks
        devices = self.local_summary['devices']
        if devices['red'] > 0:
            self._logger.warning(
                'AlbaBackend {0} STATUS set to FAILURE due to {1} failed disks'
                .format(self.name, devices['red']))
            return AlbaBackend.STATUSES.FAILURE

        # Verify remote OSDs
        remote_errors = False
        linked_backend_warning = False
        for remote_info in self.remote_stack.itervalues():
            if remote_info['error'] == 'unknown' or remote_info[
                    'live_status'] == AlbaBackend.STATUSES.FAILURE:
                message = None
                if remote_info['error'] == 'unknown':
                    message = 'unknown remote error info'
                elif remote_info[
                        'live_status'] == AlbaBackend.STATUSES.FAILURE:
                    message = 'FAILURE in live_status'
                self._logger.warning(
                    'AlbaBackend {0} STATUS set to FAILURE due to OSD {1}: {2} '
                    .format(self.name, remote_info['name'], message))
                return AlbaBackend.STATUSES.FAILURE
            if remote_info['error'] == 'not_allowed':
                remote_errors = True
            if remote_info['live_status'] == AlbaBackend.STATUSES.WARNING:
                linked_backend_warning = True

        # Retrieve ASD and maintenance service information
        def _get_node_information(_node):
            if _node not in nodes_used_by_this_backend:
                for slot_info in _node.stack.itervalues():
                    for osd_info in slot_info['osds'].itervalues():
                        if osd_info['claimed_by'] == self.guid:
                            nodes_used_by_this_backend.add(_node)
                            break
                    if _node in nodes_used_by_this_backend:
                        break

            try:
                services = _node.maintenance_services
                if self.name in services:
                    for _service_name, _service_status in services[self.name]:
                        services_for_this_backend[_service_name] = _node
                        service_states[_service_name] = _service_status
                        if _node.node_id not in services_per_node:
                            services_per_node[_node.node_id] = 0
                        services_per_node[_node.node_id] += 1
            except Exception:
                pass

        services_for_this_backend = {}
        services_per_node = {}
        service_states = {}
        nodes_used_by_this_backend = set()
        threads = []
        all_nodes = AlbaNodeList.get_albanodes()
        for node in all_nodes:
            thread = Thread(target=_get_node_information, args=(node, ))
            thread.start()
            threads.append(thread)
        for thread in threads:
            thread.join()

        zero_services = False
        if len(services_for_this_backend) == 0:
            if len(all_nodes) > 0:
                AlbaBackend._logger.error(
                    'AlbaBackend {0} STATUS set to FAILURE due to no maintenance services'
                    .format(self.name))
                return AlbaBackend.STATUSES.FAILURE
            zero_services = True

        # Verify maintenance agents status
        for service_name, node in services_for_this_backend.iteritems():
            try:
                service_status = service_states.get(service_name)
                if service_status is None or service_status != 'active':
                    AlbaBackend._logger.error(
                        'AlbaBackend {0} STATUS set to FAILURE due to non-running maintenance service(s): {1}'
                        .format(self.name, service_name))
                    return AlbaBackend.STATUSES.FAILURE
            except Exception:
                pass

        # Verify maintenance agents presence
        layout_key = '/ovs/alba/backends/{0}/maintenance/agents_layout'.format(
            self.guid)
        layout = None
        if Configuration.exists(layout_key):
            layout = Configuration.get(layout_key)
            if not isinstance(layout, list) or not any(
                    node.node_id
                    for node in all_nodes if node.node_id in layout):
                layout = None

        if layout is None:
            config_key = '/ovs/alba/backends/{0}/maintenance/nr_of_agents'.format(
                self.guid)
            expected_services = 3
            if Configuration.exists(config_key):
                expected_services = Configuration.get(config_key)
            expected_services = min(expected_services,
                                    len(nodes_used_by_this_backend)) or 1
            if len(services_for_this_backend) < expected_services:
                AlbaBackend._logger.warning(
                    'Live status for backend {0} is "warning": insufficient maintenance services'
                    .format(self.name))
                return AlbaBackend.STATUSES.WARNING
        else:
            for node_id in layout:
                if node_id not in services_per_node:
                    AlbaBackend._logger.warning(
                        'Live status for backend {0} is "warning": invalid maintenance service layout'
                        .format(self.name))
                    return AlbaBackend.STATUSES.WARNING

        # Verify local and remote OSDs
        if devices['orange'] > 0:
            AlbaBackend._logger.warning(
                'Live status for backend {0} is "warning": one or more OSDs in warning'
                .format(self.name))
            return AlbaBackend.STATUSES.WARNING

        if remote_errors is True or linked_backend_warning is True:
            AlbaBackend._logger.warning(
                'Live status for backend {0} is "warning": errors/warnings on remote stack'
                .format(self.name))
            return AlbaBackend.STATUSES.WARNING
        if zero_services is True:
            AlbaBackend._logger.warning(
                'Live status for backend {0} is "warning": no maintenance services'
                .format(self.name))
            return AlbaBackend.STATUSES.WARNING

        return AlbaBackend.STATUSES.RUNNING
Exemple #22
0
class StorageRouter(DataObject):
    """
    A StorageRouter represents the Open vStorage software stack, any (v)machine on which it is installed
    """
    _logger = Logger('hybrids')
    __properties = [Property('name', str, unique=True, doc='Name of the Storage Router.'),
                    Property('description', str, mandatory=False, doc='Description of the Storage Router.'),
                    Property('machine_id', str, unique=True, mandatory=False, indexed=True, doc='The hardware identifier of the Storage Router'),
                    Property('ip', str, unique=True, indexed=True, doc='IP Address of the Storage Router, if available'),
                    Property('heartbeats', dict, default={}, doc='Heartbeat information of various monitors'),
                    Property('node_type', ['MASTER', 'EXTRA'], default='EXTRA', doc='Indicates the node\'s type'),
                    Property('rdma_capable', bool, doc='Is this Storage Router RDMA capable'),
                    Property('last_heartbeat', float, mandatory=False, doc='When was the last (external) heartbeat send/received'),
                    Property('package_information', dict, mandatory=False, default={}, doc='Information about installed packages and potential available new versions')]
    __relations = []
    __dynamics = [Dynamic('statistics', dict, 4),
                  Dynamic('vpools_guids', list, 15),
                  Dynamic('vdisks_guids', list, 15),
                  Dynamic('status', str, 10),
                  Dynamic('partition_config', dict, 3600),
                  Dynamic('regular_domains', list, 60),
                  Dynamic('recovery_domains', list, 60),
                  Dynamic('features', dict, 3600)]

    ALBA_FEATURES = DataObject.enumerator('Alba_features', {'CACHE_QUOTA': 'cache-quota',
                                                            'BLOCK_CACHE': 'block-cache',
                                                            'AUTO_CLEANUP': 'auto-cleanup-deleted-namespaces'})
    STORAGEDRIVER_FEATURES = DataObject.enumerator('Storagedriver_features', {'DIRECTORY_UNLINK': 'directory_unlink'})

    def _statistics(self, dynamic):
        """
        Aggregates the Statistics (IOPS, Bandwidth, ...) of each vDisk.
        """
        from ovs.dal.hybrids.vdisk import VDisk
        statistics = {}
        for storagedriver in self.storagedrivers:
            for key, value in storagedriver.fetch_statistics().iteritems():
                if isinstance(value, dict):
                    if key not in statistics:
                        statistics[key] = {}
                        for subkey, subvalue in value.iteritems():
                            if subkey not in statistics[key]:
                                statistics[key][subkey] = 0
                            statistics[key][subkey] += subvalue
                else:
                    if key not in statistics:
                        statistics[key] = 0
                    statistics[key] += value
        statistics['timestamp'] = time.time()
        VDisk.calculate_delta(self._key, dynamic, statistics)
        return statistics

    def _vdisks_guids(self):
        """
        Gets the vDisk guids served by this StorageRouter.
        """
        from ovs.dal.lists.vdisklist import VDiskList
        volume_ids = []
        vpools = set()
        storagedriver_ids = []
        for storagedriver in self.storagedrivers:
            vpools.add(storagedriver.vpool)
            storagedriver_ids.append(storagedriver.storagedriver_id)
        for vpool in vpools:
            for entry in vpool.objectregistry_client.get_all_registrations():
                if entry.node_id() in storagedriver_ids:
                    volume_ids.append(entry.object_id())
        return VDiskList.get_in_volume_ids(volume_ids).guids

    def _vpools_guids(self):
        """
        Gets the vPool guids linked to this StorageRouter (trough StorageDriver)
        """
        vpool_guids = set()
        for storagedriver in self.storagedrivers:
            vpool_guids.add(storagedriver.vpool_guid)
        return list(vpool_guids)

    def _status(self):
        """
        Calculates the current Storage Router status based on various heartbeats
        """
        pointer = 0
        statusses = ['OK', 'WARNING', 'FAILURE']
        current_time = time.time()
        if self.heartbeats is not None:
            process_delay = abs(self.heartbeats.get('process', 0) - current_time)
            if process_delay > 60 * 5:
                pointer = max(pointer, 2)
            else:
                delay = abs(self.heartbeats.get('celery', 0) - current_time)
                if delay > 60 * 5:
                    pointer = max(pointer, 2)
                elif delay > 60 * 2:
                    pointer = max(pointer, 1)
        for disk in self.disks:
            if disk.state == 'MISSING':
                pointer = max(pointer, 2)
            for partition in disk.partitions:
                if partition.state == 'MISSING':
                    pointer = max(pointer, 2)
        return statusses[pointer]

    def _partition_config(self):
        """
        Returns a dict with all partition information of a given storagerouter
        """
        from ovs.dal.hybrids.diskpartition import DiskPartition
        dataset = dict((role, []) for role in DiskPartition.ROLES)
        for disk in self.disks:
            for partition in disk.partitions:
                for role in partition.roles:
                    dataset[role].append(partition.guid)
        return dataset

    def _regular_domains(self):
        """
        Returns a list of domain guids with backup flag False
        :return: List of domain guids
        """
        return [junction.domain_guid for junction in self.domains if junction.backup is False]

    def _recovery_domains(self):
        """
        Returns a list of domain guids with backup flag True
        :return: List of domain guids
        """
        return [junction.domain_guid for junction in self.domains if junction.backup is True]

    def _features(self):
        """
        Returns information about installed/available features
        :return: Dictionary containing edition and available features per component
        """
        try:
            client = SSHClient(self, username='******')
            enterprise_regex = re.compile('^(?P<edition>ee-)?(?P<version>.*)$')

            version = client.run(command=PackageFactory.VERSION_CMD_SD, allow_insecure=True, allow_nonzero=True)
            volumedriver_version = enterprise_regex.match(version).groupdict()
            volumedriver_edition = PackageFactory.EDITION_ENTERPRISE if volumedriver_version['edition'] == 'ee-' else PackageFactory.EDITION_COMMUNITY
            volumedriver_version_lv = LooseVersion(volumedriver_version['version'])
            volumedriver_features = [feature for feature, version
                                     in {self.STORAGEDRIVER_FEATURES.DIRECTORY_UNLINK: ('6.15.0', None)}.iteritems()
                                     if volumedriver_version_lv >= LooseVersion(version[0])
                                     and (version[1] is None or version[1] == volumedriver_edition)]

            version = client.run(command=PackageFactory.VERSION_CMD_ALBA, allow_insecure=True, allow_nonzero=True)
            alba_version = enterprise_regex.match(version).groupdict()
            alba_edition = PackageFactory.EDITION_ENTERPRISE if alba_version['edition'] == 'ee-' else PackageFactory.EDITION_COMMUNITY
            alba_version_lv = LooseVersion(alba_version['version'])
            alba_features = [feature for feature, version
                             in {self.ALBA_FEATURES.CACHE_QUOTA: ('1.4.4', PackageFactory.EDITION_ENTERPRISE),
                                 self.ALBA_FEATURES.BLOCK_CACHE: ('1.4.0', PackageFactory.EDITION_ENTERPRISE),
                                 self.ALBA_FEATURES.AUTO_CLEANUP: ('1.5.27', PackageFactory.EDITION_ENTERPRISE)}.iteritems()
                             if alba_version_lv >= LooseVersion(version[0])
                             and (version[1] is None or version[1] == alba_edition)]

            return {'volumedriver': {'edition': volumedriver_edition,
                                     'features': volumedriver_features},
                    'alba': {'edition': alba_edition,
                             'features': alba_features}}
        except UnableToConnectException:
            pass
        except Exception:
            StorageRouter._logger.exception('Could not load feature information')
        return {}