Example #1
0
    def __init__(self, vpool_guid, storagedriver_id):
        """
        Initializes the class
        """
        _log_level = LOG_LEVEL_MAPPING[OVSLogger(
            'extensions').getEffectiveLevel()]
        # noinspection PyCallByClass,PyTypeChecker
        storagerouterclient.Logger.setupLogging(
            OVSLogger.load_path('storagerouterclient'), _log_level)
        # noinspection PyArgumentList
        storagerouterclient.Logger.enableLogging()

        self._key = '/ovs/vpools/{0}/hosts/{1}/config'.format(
            vpool_guid, storagedriver_id)
        self._logger = OVSLogger('extensions')
        self._dirty_entries = []

        self.remote_path = Configuration.get_configuration_path(
            self._key).strip('/')
        # Load configuration
        if Configuration.exists(self._key):
            self.configuration = Configuration.get(self._key)
            self.config_missing = False
        else:
            self.configuration = {}
            self.config_missing = True
            self._logger.debug(
                'Could not find config {0}, a new one will be created'.format(
                    self._key))
Example #2
0
class MetadataServerClient(object):
    """
    Builds a MDSClient
    """
    _logger = OVSLogger('extensions')
    _log_level = LOG_LEVEL_MAPPING[_logger.getEffectiveLevel()]
    # noinspection PyCallByClass,PyTypeChecker
    storagerouterclient.Logger.setupLogging(
        OVSLogger.load_path('storagerouterclient'), _log_level)
    # noinspection PyArgumentList
    storagerouterclient.Logger.enableLogging()

    MDS_ROLE = type('MDSRole', (), {
        'MASTER': Role.Master,
        'SLAVE': Role.Slave
    })

    def __init__(self):
        """
        Dummy init method
        """
        pass

    @staticmethod
    def load(service, timeout=20):
        """
        Loads a MDSClient
        :param service: Service for which the MDSClient needs to be loaded
        :type service: ovs.dal.hybrids.service.Service
        :param timeout: All calls performed by this MDSClient instance will time out after this period (in seconds)
        :type timeout: int
        :return: An MDSClient instance for the specified Service
        :rtype: MDSClient
        """
        if service.storagerouter is None:
            raise ValueError(
                'Service {0} does not have a StorageRouter linked to it'.
                format(service.name))

        key = service.guid
        # Create MDSClient instance if no instance has been cached yet or if another timeout has been specified
        if key not in mdsclient_service_cache or timeout != mdsclient_service_cache[
                key]['timeout']:
            try:
                # noinspection PyArgumentList
                mdsclient_service_cache[key] = {
                    'client':
                    MDSClient(timeout_secs=timeout,
                              mds_node_config=MDSNodeConfig(
                                  address=str(service.storagerouter.ip),
                                  port=service.ports[0])),
                    'timeout':
                    timeout
                }
            except RuntimeError:
                MetadataServerClient._logger.exception(
                    'Error loading MDSClient on {0}'.format(
                        service.storagerouter.ip))
                return None
        return mdsclient_service_cache[key]['client']
Example #3
0
class MDSServiceController(MDSShared):
    """
    Contains all BLL related to MDSServices
    """
    _logger = Logger('lib')
    _log_level = LOG_LEVEL_MAPPING[_logger.getEffectiveLevel()]

    # noinspection PyCallByClass,PyTypeChecker
    storagerouterclient.Logger.setupLogging(
        Logger.load_path('storagerouterclient'), _log_level)
    # noinspection PyArgumentList
    storagerouterclient.Logger.enableLogging()

    @staticmethod
    def remove_mds_service(mds_service, reconfigure, allow_offline=False):
        """
        Removes an MDS service
        :param mds_service: The MDS service to remove
        :type mds_service: ovs.dal.hybrids.j_mdsservice.MDSService
        :param reconfigure: Indicates whether reconfiguration is required
        :type reconfigure: bool
        :param allow_offline: Indicates whether it's OK that the node for which mds services are cleaned is offline
        :type allow_offline: bool
        :raises RuntimeError: When vDisks present on the MDSService to be removed
                              No StorageDriver is linked to the MDSService to be removed
        :raises UnableToConnectException: When StorageRouter on which the MDSService resides is unreachable and allow_offline flag is False
        :return: None
        :rtype: NoneType
        """
        if len(mds_service.vdisks_guids) > 0 and allow_offline is False:
            raise RuntimeError(
                'Cannot remove MDSService that is still serving disks')

        if len(
                mds_service.storagedriver_partitions
        ) == 0 or mds_service.storagedriver_partitions[0].storagedriver is None:
            raise RuntimeError(
                'Failed to retrieve the linked StorageDriver to this MDS Service {0}'
                .format(mds_service.service.name))

        vpool = mds_service.vpool
        root_client = None
        storagerouter = mds_service.service.storagerouter
        storagedriver = mds_service.storagedriver_partitions[0].storagedriver
        MDSServiceController._logger.info(
            'StorageRouter {0} - vPool {1}: Removing MDS junction service for port {2}'
            .format(storagerouter.name, vpool.name,
                    mds_service.service.ports[0]))
        try:
            root_client = SSHClient(endpoint=storagerouter, username='******')
            MDSServiceController._logger.debug(
                'StorageRouter {0} - vPool {1}: Established SSH connection'.
                format(storagerouter.name, vpool.name))
        except UnableToConnectException:
            if allow_offline is True:
                MDSServiceController._logger.warning(
                    'StorageRouter {0} - vPool {1}: Allowed offline node during MDS service removal'
                    .format(storagerouter.name, vpool.name))
            else:
                MDSServiceController._logger.exception(
                    'StorageRouter {0} - vPool {1}: Failed to connect to StorageRouter'
                    .format(storagerouter.name, vpool.name))
                raise

        # Reconfigure StorageDriver
        if reconfigure is True and root_client is not None:
            mds_nodes = []
            for sd_partition in storagedriver.partitions:
                if sd_partition.role == DiskPartition.ROLES.DB and sd_partition.sub_role == StorageDriverPartition.SUBROLE.MDS and sd_partition.mds_service != mds_service:
                    service = sd_partition.mds_service.service
                    mds_nodes.append({
                        'host':
                        service.storagerouter.ip,
                        'port':
                        service.ports[0],
                        'db_directory':
                        '{0}/db'.format(sd_partition.path),
                        'scratch_directory':
                        '{0}/scratch'.format(sd_partition.path)
                    })

            # Generate the correct section in the StorageDriver's configuration
            MDSServiceController._logger.info(
                'StorageRouter {0} - vPool {1}: Configuring StorageDriver with MDS nodes: {2}'
                .format(storagerouter.name, vpool.name, mds_nodes))
            storagedriver_config = StorageDriverConfiguration(
                vpool.guid, storagedriver.storagedriver_id)
            storagedriver_config.configure_metadata_server(mds_nodes=mds_nodes)
            storagedriver_config.save(root_client)

        # Clean up model
        MDSServiceController._logger.info(
            'StorageRouter {0} - vPool {1}: Cleaning model'.format(
                storagerouter.name, vpool.name))
        directories_to_clean = []
        for sd_partition in mds_service.storagedriver_partitions:
            directories_to_clean.append(sd_partition.path)
            sd_partition.delete()

        if allow_offline is True:  # Certain vDisks might still be attached to this offline MDS service --> Delete relations
            for junction in mds_service.vdisks:
                junction.delete()

        mds_service.delete()
        mds_service.service.delete()

        # Clean up file system
        if root_client is not None:
            MDSServiceController._logger.info(
                'StorageRouter {0} - vPool {1}: Deleting directories from file system: {2}'
                .format(storagerouter.name, vpool.name, directories_to_clean))
            tries = 5
            while tries > 0:
                try:
                    root_client.dir_delete(directories=directories_to_clean,
                                           follow_symlinks=True)
                    for dir_name in directories_to_clean:
                        MDSServiceController._logger.debug(
                            'StorageRouter {0} - vPool {1}: Recursively removed directory: {2}'
                            .format(storagerouter.name, vpool.name, dir_name))
                    break
                except Exception:
                    MDSServiceController._logger.warning(
                        'StorageRouter {0} - vPool {1}: Waiting for the MDS service to go down...'
                        .format(storagerouter.name, vpool.name))
                    time.sleep(5)
                    tries -= 1
                    if tries == 0:
                        MDSServiceController._logger.exception(
                            'StorageRouter {0} - vPool {1}: Deleting directories failed'
                            .format(storagerouter.name, vpool.name))
                        raise

    @staticmethod
    @ovs_task(name='ovs.mds.mds_checkup',
              schedule=Schedule(minute='30', hour='0,4,8,12,16,20'),
              ensure_single_info={'mode': 'CHAINED'})
    def mds_checkup():
        """
        Validates the current MDS setup/configuration and takes actions where required
        Actions:
            * Verify which StorageRouters are available
            * Make mapping between vPools and its StorageRouters
            * For each vPool make sure every StorageRouter has at least 1 MDS service with capacity available
            * For each vPool retrieve the optimal configuration and store it for each StorageDriver
            * For each vPool run an ensure safety for all vDisks
        :raises RuntimeError: When ensure safety fails for any vDisk
        :return: None
        :rtype: NoneType
        """
        MDSServiceController._logger.info('Started')

        # Verify StorageRouter availability
        root_client_cache = {}
        storagerouters = StorageRouterList.get_storagerouters()
        storagerouters.sort(key=lambda _sr: ExtensionsToolbox.advanced_sort(
            element=_sr.ip, separator='.'))
        offline_nodes = []
        for storagerouter in storagerouters:
            try:
                root_client = SSHClient(endpoint=storagerouter,
                                        username='******')
                MDSServiceController._logger.debug(
                    'StorageRouter {0} - ONLINE'.format(storagerouter.name))
            except UnableToConnectException:
                root_client = None
                offline_nodes.append(storagerouter)
                MDSServiceController._logger.error(
                    'StorageRouter {0} - OFFLINE'.format(storagerouter.name))
            root_client_cache[storagerouter] = root_client

        # Create mapping per vPool and its StorageRouters
        mds_dict = collections.OrderedDict()
        for vpool in sorted(VPoolList.get_vpools(), key=lambda k: k.name):
            MDSServiceController._logger.info('vPool {0}'.format(vpool.name))
            mds_dict[vpool] = {}

            # Loop all StorageDrivers and add StorageDriver to mapping
            for storagedriver in vpool.storagedrivers:
                storagerouter = storagedriver.storagerouter
                if storagerouter not in mds_dict[vpool]:
                    mds_dict[vpool][storagerouter] = {
                        'client': root_client_cache.get(storagerouter),
                        'services': [],
                        'storagedriver': storagedriver
                    }

            # Loop all MDS Services and append services to appropriate vPool / StorageRouter combo
            mds_services = vpool.mds_services
            mds_services.sort(
                key=lambda _mds_service: ExtensionsToolbox.advanced_sort(
                    element=_mds_service.service.storagerouter.ip,
                    separator='.'))
            for mds_service in mds_services:
                service = mds_service.service
                storagerouter = service.storagerouter
                if storagerouter not in mds_dict[vpool]:
                    mds_dict[vpool][storagerouter] = {
                        'client': root_client_cache.get(storagerouter),
                        'services': [],
                        'storagedriver': None
                    }
                MDSServiceController._logger.debug(
                    'vPool {0} - StorageRouter {1} - Service on port {2}'.
                    format(vpool.name, storagerouter.name, service.ports[0]))
                mds_dict[vpool][storagerouter]['services'].append(mds_service)

        failures = []
        for vpool, storagerouter_info in mds_dict.iteritems():
            # Make sure there's at least 1 MDS on every StorageRouter that's not overloaded
            # Remove all MDS Services which have been manually marked for removal (by setting its capacity to 0)
            max_load = Configuration.get(
                '/ovs/vpools/{0}/mds_config|mds_maxload'.format(vpool.guid))
            for storagerouter in sorted(storagerouter_info,
                                        key=lambda k: k.ip):
                total_load = 0.0
                root_client = mds_dict[vpool][storagerouter]['client']
                mds_services = mds_dict[vpool][storagerouter]['services']

                for mds_service in list(
                        sorted(mds_services, key=lambda k: k.number)):
                    port = mds_service.service.ports[0]
                    number = mds_service.number
                    # Manual intervention required here in order for the MDS to be cleaned up
                    # @TODO: Remove this and make a dynamic calculation to check which MDSes to remove
                    if mds_service.capacity == 0 and len(
                            mds_service.vdisks_guids) == 0:
                        MDSServiceController._logger.warning(
                            'vPool {0} - StorageRouter {1} - MDS Service {2} on port {3}: Removing'
                            .format(vpool.name, storagerouter.name, number,
                                    port))
                        try:
                            MDSServiceController.remove_mds_service(
                                mds_service=mds_service,
                                reconfigure=True,
                                allow_offline=root_client is None)
                        except Exception:
                            MDSServiceController._logger.exception(
                                'vPool {0} - StorageRouter {1} - MDS Service {2} on port {3}: Failed to remove'
                                .format(vpool.name, storagerouter.name, number,
                                        port))
                        mds_services.remove(mds_service)
                    else:
                        _, next_load = MDSServiceController.get_mds_load(
                            mds_service=mds_service)
                        if next_load == float('inf'):
                            total_load = sys.maxint * -1  # Cast to lowest possible value if any MDS service capacity is set to infinity
                        else:
                            total_load += next_load

                        if next_load < max_load:
                            MDSServiceController._logger.debug(
                                'vPool {0} - StorageRouter {1} - MDS Service {2} on port {3}: Capacity available - Load at {4}%'
                                .format(vpool.name, storagerouter.name, number,
                                        port, next_load))
                        else:
                            MDSServiceController._logger.debug(
                                'vPool {0} - StorageRouter {1} - MDS Service {2} on port {3}: No capacity available - Load at {4}%'
                                .format(vpool.name, storagerouter.name, number,
                                        port, next_load))

                if total_load >= max_load * len(mds_services):
                    mds_services_to_add = int(
                        math.ceil((total_load - max_load * len(mds_services)) /
                                  max_load))
                    MDSServiceController._logger.info(
                        'vPool {0} - StorageRouter {1} - Average load per service {2:.2f}% - Max load per service {3:.2f}% - {4} MDS service{5} will be added'
                        .format(vpool.name, storagerouter.name,
                                total_load / len(mds_services), max_load,
                                mds_services_to_add,
                                '' if mds_services_to_add == 1 else 's'))

                    for _ in range(mds_services_to_add):
                        MDSServiceController._logger.info(
                            'vPool {0} - StorageRouter {1} - Adding new MDS Service'
                            .format(vpool.name, storagerouter.name))
                        try:
                            mds_services.append(
                                MDSServiceController.prepare_mds_service(
                                    storagerouter=storagerouter, vpool=vpool))
                        except Exception:
                            MDSServiceController._logger.exception(
                                'vPool {0} - StorageRouter {1} - Failed to create new MDS Service'
                                .format(vpool.name, storagerouter.name))

            # After potentially having added new MDSes, retrieve the optimal configuration
            mds_config_set = {}
            try:
                mds_config_set = MDSServiceController.get_mds_storagedriver_config_set(
                    vpool=vpool, offline_nodes=offline_nodes)
                MDSServiceController._logger.debug(
                    'vPool {0} - Optimal configuration {1}'.format(
                        vpool.name, mds_config_set))
            except (NotFoundException, RuntimeError):
                MDSServiceController._logger.exception(
                    'vPool {0} - Failed to retrieve the optimal configuration'.
                    format(vpool.name))

            # Apply the optimal MDS configuration per StorageDriver
            for storagerouter in sorted(storagerouter_info,
                                        key=lambda k: k.ip):
                root_client = mds_dict[vpool][storagerouter]['client']
                storagedriver = mds_dict[vpool][storagerouter]['storagedriver']

                if storagedriver is None:
                    MDSServiceController._logger.critical(
                        'vPool {0} - StorageRouter {1} - No matching StorageDriver found'
                        .format(vpool.name, storagerouter.name))
                    continue
                if storagerouter.guid not in mds_config_set:
                    MDSServiceController._logger.critical(
                        'vPool {0} - StorageRouter {1} - Not marked as offline, but could not retrieve an optimal MDS config'
                        .format(vpool.name, storagerouter.name))
                    continue
                if root_client is None:
                    MDSServiceController._logger.debug(
                        'vPool {0} - StorageRouter {1} - Marked as offline, not setting optimal MDS configuration'
                        .format(vpool.name, storagerouter.name))
                    continue

                storagedriver_config = StorageDriverConfiguration(
                    vpool_guid=vpool.guid,
                    storagedriver_id=storagedriver.storagedriver_id)
                if storagedriver_config.config_missing is False:
                    optimal_mds_config = mds_config_set[storagerouter.guid]
                    MDSServiceController._logger.debug(
                        'vPool {0} - StorageRouter {1} - Storing optimal MDS configuration: {2}'
                        .format(vpool.name, storagerouter.name,
                                optimal_mds_config))
                    # Filesystem section in StorageDriver configuration are all parameters used for vDisks created directly on the filesystem
                    # So when a vDisk gets created on the filesystem, these MDSes will be assigned to them
                    storagedriver_config.configure_filesystem(
                        fs_metadata_backend_mds_nodes=optimal_mds_config)
                    storagedriver_config.save(root_client)

            # Execute a safety check, making sure the master/slave configuration is optimal.
            MDSServiceController._logger.info(
                'vPool {0} - Ensuring safety for all vDisks'.format(
                    vpool.name))
            for vdisk in vpool.vdisks:
                try:
                    MDSServiceController.ensure_safety(vdisk_guid=vdisk.guid)
                except Exception:
                    message = 'Ensure safety for vDisk {0} with guid {1} failed'.format(
                        vdisk.name, vdisk.guid)
                    MDSServiceController._logger.exception(message)
                    failures.append(message)
        if len(failures) > 0:
            raise RuntimeError('\n - ' + '\n - '.join(failures))
        MDSServiceController._logger.info('Finished')

    # noinspection PyUnresolvedReferences
    @staticmethod
    @ovs_task(name='ovs.mds.ensure_safety',
              ensure_single_info={'mode': 'CHAINED'})
    def ensure_safety(vdisk_guid, excluded_storagerouter_guids=None):
        """
        Ensures (or tries to ensure) the safety of a given vDisk.
        Assumptions:
            * A local overloaded master is better than a non-local non-overloaded master
            * Prefer master/slaves to be on different hosts, a subsequent slave on the same node doesn't add safety
            * Don't actively overload services (e.g. configure an MDS as slave causing it to get overloaded)
            * Too much safety is not wanted (it adds loads to nodes while not required)
            * Order of slaves is:
                * All slaves on StorageRouters in primary Domain of vDisk host
                * All slaves on StorageRouters in secondary Domain of vDisk host
                * Eg: Safety of 2 (1 master + 1 slave)
                    mds config = [local master in primary, slave in secondary]
                * Eg: Safety of 3 (1 master + 2 slaves)
                    mds config = [local master in primary, slave in primary, slave in secondary]
                * Eg: Safety of 4 (1 master + 3 slaves)
                    mds config = [local master in primary, slave in primary, slave in secondary, slave in secondary]
        :param vdisk_guid: vDisk GUID to calculate a new safety for
        :type vdisk_guid: str
        :param excluded_storagerouter_guids: GUIDs of StorageRouters to leave out of calculation (Eg: When 1 is down or unavailable)
        :type excluded_storagerouter_guids: list[str]
        :raises RuntimeError: If host of vDisk is part of the excluded StorageRouters
                              If host of vDisk is not part of the StorageRouters in the primary domain
                              If catchup command fails for a slave
                              If MDS client cannot be created for any of the current or new MDS services
                              If updateMetadataBackendConfig would fail for whatever reason
        :raises SRCObjectNotFoundException: If vDisk does not have a StorageRouter GUID
        :return: None
        :rtype: NoneType
        """
        if excluded_storagerouter_guids is None:
            excluded_storagerouter_guids = []

        safety_ensurer = SafetyEnsurer(vdisk_guid,
                                       excluded_storagerouter_guids)
        safety_ensurer.ensure_safety()

    @staticmethod
    def get_preferred_mds(storagerouter, vpool):
        """
        Gets the MDS on this StorageRouter/vPool pair which is preferred to achieve optimal balancing
        :param storagerouter: StorageRouter to retrieve the best MDS service for
        :type storagerouter: ovs.dal.hybrids.storagerouter.StorageRouter
        :param vpool: vPool to retrieve the best MDS service for
        :type vpool: ovs.dal.hybrids.vpool.VPool
        :return: Preferred MDS service (least loaded), current load on that MDS service
        :rtype: tuple(ovs.dal.hybrids.j_mdsservice.MDSService, float)
        """
        mds_info = (None, float('inf'))
        for mds_service in vpool.mds_services:
            if mds_service.service.storagerouter_guid == storagerouter.guid:
                load = MDSServiceController.get_mds_load(
                    mds_service=mds_service)[0]
                if mds_info[0] is None or load < mds_info[1]:
                    mds_info = (mds_service, load)
        return mds_info

    @staticmethod
    def get_mds_storagedriver_config_set(vpool, offline_nodes=None):
        """
        Builds a configuration for all StorageRouters from a given vPool with following goals:
            * Primary MDS is the local one
            * All slaves are on different hosts
            * Maximum `mds_safety` nodes are returned
        The configuration returned is the default configuration used by the volumedriver of which in normal use-cases
        only the 1st entry is used, because at volume creation time, the volumedriver needs to create 1 master MDS
        During ensure_safety, we actually create/set the MDS slaves for each volume

        :param vpool: vPool to get StorageDriver configuration for
        :type vpool: ovs.dal.hybrids.vpool.VPool
        :param offline_nodes: Nodes which are currently unreachable via the SSHClient functionality
        :type offline_nodes: list
        :raises RuntimeError: When no MDS Service can be found for a specific vPool/StorageRouter combo
        :raises NotFoundException: When configuration management is unavailable
        :return: MDS configuration for a vPool
        :rtype: dict[list]
        """
        if offline_nodes is None:
            offline_nodes = []
        mds_per_storagerouter = {}
        mds_per_load = {}
        for storagedriver in vpool.storagedrivers:
            storagerouter = storagedriver.storagerouter
            if storagerouter in offline_nodes:
                continue
            mds_service, load = MDSServiceController.get_preferred_mds(
                storagerouter, vpool)
            if mds_service is None:
                raise RuntimeError('Could not find an MDS service')
            mds_per_storagerouter[storagerouter] = {
                'host': storagerouter.ip,
                'port': mds_service.service.ports[0]
            }
            if load not in mds_per_load:
                mds_per_load[load] = []
            mds_per_load[load].append(storagerouter)

        safety = Configuration.get(
            '/ovs/vpools/{0}/mds_config|mds_safety'.format(vpool.guid))
        config_set = {}
        for storagerouter, ip_info in mds_per_storagerouter.iteritems():
            config_set[storagerouter.guid] = [ip_info]
            for importance in ['primary', 'secondary']:
                domains = [
                    junction.domain for junction in storagerouter.domains
                    if junction.backup is (importance == 'secondary')
                ]
                possible_storagerouters = set()
                for domain in domains:
                    possible_storagerouters.update(
                        StorageRouterList.
                        get_primary_storagerouters_for_domain(domain))

                for load in sorted(mds_per_load):
                    if len(config_set[storagerouter.guid]) >= safety:
                        break
                    other_storagerouters = mds_per_load[load]
                    random.shuffle(other_storagerouters)
                    for other_storagerouter in other_storagerouters:
                        if len(config_set[storagerouter.guid]) >= safety:
                            break
                        if other_storagerouter != storagerouter and other_storagerouter in possible_storagerouters:
                            config_set[storagerouter.guid].append(
                                mds_per_storagerouter[other_storagerouter])
        return config_set

    @staticmethod
    def monitor_mds_layout():
        """
        Prints the current MDS layout
        :return: None
        :rtype: NoneType
        """
        try:
            while True:
                output = [
                    '', 'Open vStorage - MDS debug information',
                    '=====================================',
                    'timestamp: {0}'.format(datetime.datetime.now()), ''
                ]
                vpools_deployed = False
                for storagerouter in sorted(
                        StorageRouterList.get_storagerouters(),
                        key=lambda k: k.name):
                    vpools = set(sd.vpool
                                 for sd in storagerouter.storagedrivers)
                    if len(vpools) > 0:
                        vpools_deployed = True
                        output.append('+ {0} ({1})'.format(
                            storagerouter.name, storagerouter.ip))
                    for vpool in sorted(vpools, key=lambda k: k.name):
                        output.append('  + {0}'.format(vpool.name))
                        for mds_service in sorted(vpool.mds_services,
                                                  key=lambda k: k.number):
                            if mds_service.service.storagerouter_guid == storagerouter.guid:
                                masters, slaves = 0, 0
                                for junction in mds_service.vdisks:
                                    if junction.is_master:
                                        masters += 1
                                    else:
                                        slaves += 1
                                capacity = mds_service.capacity
                                if capacity == -1:
                                    capacity = 'infinite'
                                load, _ = MDSServiceController.get_mds_load(
                                    mds_service)
                                if load == float('inf'):
                                    load = 'infinite'
                                else:
                                    load = '{0}%'.format(round(load, 2))
                                output.append(
                                    '    + {0} - port {1} - {2} master(s), {3} slave(s) - capacity: {4}, load: {5}'
                                    .format(mds_service.number,
                                            mds_service.service.ports[0],
                                            masters, slaves, capacity, load))
                if vpools_deployed is False:
                    output.append('No vPools deployed')
                print '\x1b[2J\x1b[H' + '\n'.join(output)
                time.sleep(1)
        except KeyboardInterrupt:
            pass

    @staticmethod
    @ovs_task(name='ovs.mds.mds_catchup',
              schedule=Schedule(minute='30', hour='*/2'),
              ensure_single_info={'mode': 'DEFAULT'})
    def mds_catchup():
        """
        Looks to catch up all MDS slaves which are too far behind
        Only one catch for every storagedriver is invoked
        """

        # Only for caching purposes
        def storagedriver_worker(queue, error_list):
            # type: (Queue.Queue, List[str]) -> None
            while not queue.empty():
                mds_catch_up = queue.get()  # type: MDSCatchUp
                try:
                    mds_catch_up.catch_up(async=False)
                except Exception as ex:
                    MDSServiceController._logger.exception(
                        'Exceptions while catching for vDisk {0}'.format(
                            mds_catch_up.vdisk.guid))
                    error_list.append(str(ex))
                finally:
                    queue.task_done()

        storagedriver_queues = {}
        for vdisk in VDiskList.get_vdisks():
            if vdisk.storagedriver_id not in storagedriver_queues:
                storagedriver_queues[vdisk.storagedriver_id] = Queue.Queue()
            # Putting it in the Queue ensures that the reference is still there so the caching is used optimally
            catch_up = MDSCatchUp(vdisk.guid)
            storagedriver_queues[vdisk.storagedriver_id].put(catch_up)

        errors = []
        threads = []
        for storadriver_id, storagedriver_queue in storagedriver_queues.iteritems(
        ):
            thread = Thread(target=storagedriver_worker,
                            args=(
                                storagedriver_queue,
                                errors,
                            ))
            thread.start()
            threads.append(thread)
        for thread in threads:
            thread.join()

        if len(errors) > 0:
            raise RuntimeError(
                'Exception occurred while catching up: \n - {0}'.format(
                    '\n - '.join(errors)))
Example #4
0
class StorageRouterController(object):
    """
    Contains all BLL related to StorageRouter
    """
    _logger = Logger('lib')
    _log_level = LOG_LEVEL_MAPPING[_logger.getEffectiveLevel()]
    _os_manager = OSFactory.get_manager()
    _service_manager = ServiceFactory.get_manager()

    # noinspection PyCallByClass,PyTypeChecker
    storagerouterclient.Logger.setupLogging(
        Logger.load_path('storagerouterclient'), _log_level)
    # noinspection PyArgumentList
    storagerouterclient.Logger.enableLogging()

    @staticmethod
    @ovs_task(name='ovs.storagerouter.ping')
    def ping(storagerouter_guid, timestamp):
        """
        Update a StorageRouter's celery heartbeat
        :param storagerouter_guid: Guid of the StorageRouter to update
        :type storagerouter_guid: str
        :param timestamp: Timestamp to compare to
        :type timestamp: float
        :return: None
        :rtype: NoneType
        """
        with volatile_mutex(
                'storagerouter_heartbeat_{0}'.format(storagerouter_guid)):
            storagerouter = StorageRouter(storagerouter_guid)
            if timestamp > storagerouter.heartbeats.get('celery', 0):
                storagerouter.heartbeats['celery'] = timestamp
                storagerouter.save()

    @staticmethod
    @ovs_task(name='ovs.storagerouter.get_metadata')
    def get_metadata(storagerouter_guid):
        """
        Gets physical information about the specified StorageRouter
        :param storagerouter_guid: StorageRouter guid to retrieve the metadata for
        :type storagerouter_guid: str
        :return: Metadata information about the StorageRouter
        :rtype: dict
        """
        return {
            'partitions':
            StorageRouterController.get_partition_info(storagerouter_guid),
            'ipaddresses':
            StorageRouterController.get_ip_addresses(storagerouter_guid),
            'scrub_available':
            StorageRouterController.check_scrub_partition_present()
        }

    @staticmethod
    def get_ip_addresses(storagerouter_guid):
        """
        Retrieves the IP addresses of a StorageRouter
        :param storagerouter_guid: Guid of the StorageRouter
        :return: List of IP addresses
        :rtype: list
        """
        client = SSHClient(endpoint=StorageRouter(storagerouter_guid))
        return StorageRouterController._os_manager.get_ip_addresses(
            client=client)

    @staticmethod
    def get_partition_info(storagerouter_guid):
        """
        Retrieves information about the partitions of a Storagerouter
        :param storagerouter_guid: Guid of the Storagerouter
        :type storagerouter_guid: str
        :return: dict with information about the partitions
        :rtype: dict
        """
        storagerouter = StorageRouter(storagerouter_guid)
        client = SSHClient(endpoint=storagerouter)
        services_mds = ServiceTypeList.get_by_name(
            ServiceType.SERVICE_TYPES.MD_SERVER).services
        services_arakoon = [
            service for service in ServiceTypeList.get_by_name(
                ServiceType.SERVICE_TYPES.ARAKOON).services
            if service.name != 'arakoon-ovsdb' and service.is_internal is True
        ]

        partitions = dict((role, []) for role in DiskPartition.ROLES)
        for disk in storagerouter.disks:
            for disk_partition in disk.partitions:
                claimed_space_by_fwk = 0
                used_space_by_system = 0
                available_space_by_system = 0
                for storagedriver_partition in disk_partition.storagedrivers:
                    claimed_space_by_fwk += storagedriver_partition.size if storagedriver_partition.size is not None else 0
                    if client.dir_exists(storagedriver_partition.path):
                        try:
                            used_space_by_system += int(
                                client.run([
                                    'du', '-B', '1', '-d', '0',
                                    storagedriver_partition.path
                                ],
                                           timeout=5).split('\t')[0])
                        except Exception as ex:
                            StorageRouterController._logger.warning(
                                'Failed to get directory usage for {0}. {1}'.
                                format(storagedriver_partition.path, ex))

                if disk_partition.mountpoint is not None:
                    for alias in disk_partition.aliases:
                        StorageRouterController._logger.info(
                            'Verifying disk partition usage by checking path {0}'
                            .format(alias))
                        disk_partition_device = client.file_read_link(
                            path=alias)
                        try:
                            available_space_by_system = int(
                                client.run([
                                    'df', '-B', '1', '--output=avail',
                                    disk_partition_device
                                ],
                                           timeout=5).splitlines()[-1])
                            break
                        except Exception as ex:
                            StorageRouterController._logger.warning(
                                'Failed to get partition usage for {0}. {1}'.
                                format(disk_partition.mountpoint, ex))

                for role in disk_partition.roles:
                    size = 0 if disk_partition.size is None else disk_partition.size
                    if available_space_by_system > 0:
                        # Take available space reported by df then add back used by roles so that the only used space reported is space not managed by us
                        available = available_space_by_system + used_space_by_system - claimed_space_by_fwk
                    else:
                        available = size - claimed_space_by_fwk  # Subtract size for roles which have already been claimed by other vpools (but not necessarily already been fully used)

                    in_use = any(junction
                                 for junction in disk_partition.storagedrivers
                                 if junction.role == role)
                    if role == DiskPartition.ROLES.DB:
                        for service in services_arakoon:
                            if service.storagerouter_guid == storagerouter_guid:
                                in_use = True
                                break
                        for service in services_mds:
                            if service.storagerouter_guid == storagerouter_guid:
                                in_use = True
                                break

                    partitions[role].append({
                        'ssd':
                        disk.is_ssd,
                        'guid':
                        disk_partition.guid,
                        'size':
                        size,
                        'in_use':
                        in_use,
                        'usable':
                        True,  # Sizes smaller than 1GiB and smaller than 5% of largest WRITE partition will be un-usable
                        'available':
                        available if available > 0 else 0,
                        'mountpoint':
                        disk_partition.
                        folder,  # Equals to mount point unless mount point is root ('/'), then we pre-pend mount point with '/mnt/storage'
                        'storagerouter_guid':
                        storagerouter_guid
                    })

        # Strip out WRITE caches which are smaller than 5% of largest write cache size and smaller than 1GiB
        writecache_sizes = []
        for partition_info in partitions[DiskPartition.ROLES.WRITE]:
            writecache_sizes.append(partition_info['available'])
        largest_write_cache = max(
            writecache_sizes) if len(writecache_sizes) > 0 else 0
        for index, size in enumerate(writecache_sizes):
            if size < largest_write_cache * 5 / 100 or size < 1024**3:
                partitions[DiskPartition.ROLES.WRITE][index]['usable'] = False

        return partitions

    @staticmethod
    @ovs_task(name='ovs.storagerouter.get_version_info')
    def get_version_info(storagerouter_guid):
        """
        Returns version information regarding a given StorageRouter
        :param storagerouter_guid: StorageRouter guid to get version information for
        :type storagerouter_guid: str
        :return: Version information
        :rtype: dict
        """
        package_manager = PackageFactory.get_manager()
        client = SSHClient(StorageRouter(storagerouter_guid))
        return {
            'storagerouter_guid':
            storagerouter_guid,
            'versions':
            dict((pkg_name, str(version)) for pkg_name, version in
                 package_manager.get_installed_versions(client).iteritems())
        }

    @staticmethod
    @ovs_task(name='ovs.storagerouter.get_support_info')
    def get_support_info():
        """
        Returns support information for the entire cluster
        :return: Support information
        :rtype: dict
        """
        celery_scheduling = Configuration.get(
            key='/ovs/framework/scheduling/celery', default={})
        stats_monkey_enabled = any(
            celery_scheduling.get(key) is not None for key in
            ['ovs.stats_monkey.run_all', 'alba.stats_monkey.run_all'])
        return {
            'cluster_id':
            Configuration.get(key='/ovs/framework/cluster_id'),
            'stats_monkey':
            stats_monkey_enabled,
            'support_agent':
            Configuration.get(key='/ovs/framework/support|support_agent'),
            'remote_access':
            Configuration.get(key='ovs/framework/support|remote_access'),
            'stats_monkey_config':
            Configuration.get(key='ovs/framework/monitoring/stats_monkey',
                              default={})
        }

    @staticmethod
    @ovs_task(name='ovs.storagerouter.get_support_metadata')
    def get_support_metadata():
        """
        Returns support metadata for a given StorageRouter. This should be a routed task!
        :return: Metadata of the StorageRouter
        :rtype: dict
        """
        return SupportAgent().get_heartbeat_data()

    @staticmethod
    @ovs_task(name='ovs.storagerouter.get_logfiles')
    def get_logfiles(local_storagerouter_guid):
        """
        Collects logs, moves them to a web-accessible location and returns log tgz's filename
        :param local_storagerouter_guid: StorageRouter guid to retrieve log files on
        :type local_storagerouter_guid: str
        :return: Name of tgz containing the logs
        :rtype: str
        """
        this_storagerouter = System.get_my_storagerouter()
        this_client = SSHClient(this_storagerouter, username='******')
        logfile = this_client.run(['ovs', 'collect', 'logs']).strip()
        logfilename = logfile.split('/')[-1]

        storagerouter = StorageRouter(local_storagerouter_guid)
        webpath = '/opt/OpenvStorage/webapps/frontend/downloads'
        client = SSHClient(storagerouter, username='******')
        client.dir_create(webpath)
        client.file_upload('{0}/{1}'.format(webpath, logfilename), logfile)
        client.run(['chmod', '666', '{0}/{1}'.format(webpath, logfilename)])
        return logfilename

    @staticmethod
    @ovs_task(name='ovs.storagerouter.get_proxy_config')
    def get_proxy_config(vpool_guid, storagerouter_guid):
        """
        Gets the ALBA proxy for a given StorageRouter and vPool
        :param storagerouter_guid: Guid of the StorageRouter on which the ALBA proxy is configured
        :type storagerouter_guid: str
        :param vpool_guid: Guid of the vPool for which the proxy is configured
        :type vpool_guid: str
        :return: The ALBA proxy configuration
        :rtype: dict
        """
        vpool = VPool(vpool_guid)
        storagerouter = StorageRouter(storagerouter_guid)
        for sd in vpool.storagedrivers:
            if sd.storagerouter_guid == storagerouter.guid:
                if len(sd.alba_proxies) == 0:
                    raise ValueError(
                        'No ALBA proxies configured for vPool {0} on StorageRouter {1}'
                        .format(vpool.name, storagerouter.name))
                return Configuration.get(
                    '/ovs/vpools/{0}/proxies/{1}/config/main'.format(
                        vpool.guid, sd.alba_proxies[0].guid))
        raise ValueError(
            'vPool {0} has not been extended to StorageRouter {1}'.format(
                vpool.name, storagerouter.name))

    @staticmethod
    @ovs_task(name='ovs.storagerouter.configure_support')
    def configure_support(support_info):
        """
        Configures support on all StorageRouters
        :param support_info: Information about which components should be configured
            {'stats_monkey': True,  # Enable/disable the stats monkey scheduled task
             'support_agent': True,  # Responsible for enabling the ovs-support-agent service, which collects heart beat data
             'remote_access': False,  # Cannot be True when support agent is False. Is responsible for opening an OpenVPN tunnel to allow for remote access
             'stats_monkey_config': {}}  # Dict with information on how to configure the stats monkey (Only required when enabling the stats monkey
        :type support_info: dict
        :return: None
        :rtype: NoneType
        """
        ExtensionsToolbox.verify_required_params(actual_params=support_info,
                                                 required_params={
                                                     'stats_monkey':
                                                     (bool, None, False),
                                                     'remote_access':
                                                     (bool, None, False),
                                                     'support_agent':
                                                     (bool, None, False),
                                                     'stats_monkey_config':
                                                     (dict, None, False)
                                                 })
        # All settings are optional, so if nothing is specified, no need to change anything
        if len(support_info) == 0:
            StorageRouterController._logger.warning(
                'Configure support called without any specific settings. Doing nothing'
            )
            return

        # Collect information
        support_agent_key = '/ovs/framework/support|support_agent'
        support_agent_new = support_info.get('support_agent')
        support_agent_old = Configuration.get(key=support_agent_key)
        support_agent_change = support_agent_new is not None and support_agent_old != support_agent_new

        remote_access_key = '/ovs/framework/support|remote_access'
        remote_access_new = support_info.get('remote_access')
        remote_access_old = Configuration.get(key=remote_access_key)
        remote_access_change = remote_access_new is not None and remote_access_old != remote_access_new

        stats_monkey_celery_key = '/ovs/framework/scheduling/celery'
        stats_monkey_config_key = '/ovs/framework/monitoring/stats_monkey'
        stats_monkey_new_config = support_info.get('stats_monkey_config')
        stats_monkey_old_config = Configuration.get(
            key=stats_monkey_config_key, default={})
        stats_monkey_celery_config = Configuration.get(
            key=stats_monkey_celery_key, default={})
        stats_monkey_new = support_info.get('stats_monkey')
        stats_monkey_old = stats_monkey_celery_config.get(
            'ovs.stats_monkey.run_all'
        ) is not None or stats_monkey_celery_config.get(
            'alba.stats_monkey.run_all') is not None
        stats_monkey_change = stats_monkey_new is not None and (
            stats_monkey_old != stats_monkey_new
            or stats_monkey_new_config != stats_monkey_old_config)

        # Make sure support agent is enabled when trying to enable remote access
        if remote_access_new is True:
            if support_agent_new is False or (support_agent_new is None
                                              and support_agent_old is False):
                raise RuntimeError(
                    'Remote access cannot be enabled without the heart beat enabled'
                )

        # Collect root_client information
        root_clients = {}
        for storagerouter in StorageRouterList.get_storagerouters():
            try:
                root_clients[storagerouter] = SSHClient(endpoint=storagerouter,
                                                        username='******')
            except UnableToConnectException:
                raise RuntimeError('Not all StorageRouters are reachable')

        if stats_monkey_new is True:
            ExtensionsToolbox.verify_required_params(
                actual_params=stats_monkey_new_config,
                required_params={
                    'host': (str, ExtensionsToolbox.regex_ip),
                    'port': (int, {
                        'min': 1,
                        'max': 65535
                    }),
                    'database': (str, None),
                    'interval': (int, {
                        'min': 1,
                        'max': 86400
                    }),
                    'transport': (str, ['influxdb', 'redis', 'graphite']),
                    'environment': (str, None)
                })
            if stats_monkey_new_config['transport'] in ['influxdb', 'reddis']:
                ExtensionsToolbox.verify_required_params(
                    actual_params=stats_monkey_new_config,
                    required_params={'password': (str, None)})

            if stats_monkey_new_config['transport'] == 'influxdb':
                ExtensionsToolbox.verify_required_params(
                    actual_params=stats_monkey_new_config,
                    required_params={'username': (str, None)})

        # Configure remote access
        if remote_access_change is True:
            Configuration.set(key=remote_access_key, value=remote_access_new)
            cid = Configuration.get('/ovs/framework/cluster_id').replace(
                r"'", r"'\''")
            for storagerouter, root_client in root_clients.iteritems():
                if remote_access_new is False:
                    StorageRouterController._logger.info(
                        'Un-configuring remote access on StorageRouter {0}'.
                        format(root_client.ip))
                    nid = storagerouter.machine_id.replace(r"'", r"'\''")
                    service_name = 'openvpn@ovs_{0}-{1}'.format(cid, nid)
                    if StorageRouterController._service_manager.has_service(
                            name=service_name, client=root_client):
                        StorageRouterController._service_manager.stop_service(
                            name=service_name, client=root_client)
                    root_client.file_delete(filenames=['/etc/openvpn/ovs_*'])

        # Configure support agent
        if support_agent_change is True:
            service_name = 'support-agent'
            Configuration.set(key=support_agent_key, value=support_agent_new)
            for root_client in root_clients.itervalues():
                if support_agent_new is True:
                    StorageRouterController._logger.info(
                        'Configuring support agent on StorageRouter {0}'.
                        format(root_client.ip))
                    if StorageRouterController._service_manager.has_service(
                            name=service_name, client=root_client) is False:
                        StorageRouterController._service_manager.add_service(
                            name=service_name, client=root_client)
                    StorageRouterController._service_manager.restart_service(
                        name=service_name, client=root_client)
                else:
                    StorageRouterController._logger.info(
                        'Un-configuring support agent on StorageRouter {0}'.
                        format(root_client.ip))
                    if StorageRouterController._service_manager.has_service(
                            name=service_name, client=root_client):
                        StorageRouterController._service_manager.stop_service(
                            name=service_name, client=root_client)
                        StorageRouterController._service_manager.remove_service(
                            name=service_name, client=root_client)

        # Configure stats monkey
        if stats_monkey_change is True:
            # 2 keys matter here:
            #    - /ovs/framework/scheduling/celery --> used to check whether the stats monkey is disabled or not
            #    - /ovs/framework/monitoring/stats_monkey --> contains the actual configuration parameters when enabling the stats monkey, such as host, port, username, ...
            service_name = 'scheduled-tasks'
            if stats_monkey_new is True:  # Enable the scheduled task by removing the key
                StorageRouterController._logger.info(
                    'Configuring stats monkey')
                interval = stats_monkey_new_config['interval']
                # The scheduled task cannot be configured to run more than once a minute, so for intervals < 60, the stats monkey task handles this itself
                StorageRouterController._logger.debug(
                    'Requested interval to run at: {0}'.format(interval))
                Configuration.set(key=stats_monkey_config_key,
                                  value=stats_monkey_new_config)
                if interval > 0:
                    days, hours, minutes, _ = ExtensionsToolbox.convert_to_days_hours_minutes_seconds(
                        seconds=interval)
                    if days == 1:  # Max interval is 24 * 60 * 60, so once every day at 3 AM
                        schedule = {'hour': '3'}
                    elif hours > 0:
                        schedule = {'hour': '*/{0}'.format(hours)}
                    else:
                        schedule = {'minute': '*/{0}'.format(minutes)}
                    stats_monkey_celery_config[
                        'ovs.stats_monkey.run_all'] = schedule
                    stats_monkey_celery_config[
                        'alba.stats_monkey.run_all'] = schedule
                    StorageRouterController._logger.debug(
                        'Configured schedule is: {0}'.format(schedule))
                else:
                    stats_monkey_celery_config.pop('ovs.stats_monkey.run_all',
                                                   None)
                    stats_monkey_celery_config.pop('alba.stats_monkey.run_all',
                                                   None)
            else:  # Disable the scheduled task by setting the values for the celery tasks to None
                StorageRouterController._logger.info(
                    'Un-configuring stats monkey')
                stats_monkey_celery_config['ovs.stats_monkey.run_all'] = None
                stats_monkey_celery_config['alba.stats_monkey.run_all'] = None

            Configuration.set(key=stats_monkey_celery_key,
                              value=stats_monkey_celery_config)
            for storagerouter in StorageRouterList.get_masters():
                root_client = root_clients[storagerouter]
                StorageRouterController._logger.debug(
                    'Restarting ovs-scheduled-tasks service on node with IP {0}'
                    .format(root_client.ip))
                StorageRouterController._service_manager.restart_service(
                    name=service_name, client=root_client)

    @staticmethod
    @ovs_task(name='ovs.storagerouter.mountpoint_exists')
    def mountpoint_exists(name, storagerouter_guid):
        """
        Checks whether a given mount point for a vPool exists
        :param name: Name of the mount point to check
        :type name: str
        :param storagerouter_guid: Guid of the StorageRouter on which to check for mount point existence
        :type storagerouter_guid: str
        :return: True if mount point not in use else False
        :rtype: bool
        """
        client = SSHClient(StorageRouter(storagerouter_guid))
        return client.dir_exists(directory='/mnt/{0}'.format(name))

    @staticmethod
    @ovs_task(name='ovs.storagerouter.refresh_hardware')
    def refresh_hardware(storagerouter_guid):
        """
        Refreshes all hardware related information
        :param storagerouter_guid: Guid of the StorageRouter to refresh the hardware on
        :type storagerouter_guid: str
        :return: None
        :rtype: NoneType
        """
        StorageRouterController.set_rdma_capability(storagerouter_guid)
        DiskController.sync_with_reality(storagerouter_guid)

    @staticmethod
    def set_rdma_capability(storagerouter_guid):
        """
        Check if the StorageRouter has been reconfigured to be able to support RDMA
        :param storagerouter_guid: Guid of the StorageRouter to check and set
        :type storagerouter_guid: str
        :return: None
        :rtype: NoneType
        """
        storagerouter = StorageRouter(storagerouter_guid)
        client = SSHClient(storagerouter, username='******')
        rdma_capable = False
        with remote(client.ip, [os], username='******') as rem:
            for root, dirs, files in rem.os.walk('/sys/class/infiniband'):
                for directory in dirs:
                    ports_dir = '/'.join([root, directory, 'ports'])
                    if not rem.os.path.exists(ports_dir):
                        continue
                    for sub_root, sub_dirs, _ in rem.os.walk(ports_dir):
                        if sub_root != ports_dir:
                            continue
                        for sub_directory in sub_dirs:
                            state_file = '/'.join(
                                [sub_root, sub_directory, 'state'])
                            if rem.os.path.exists(state_file):
                                if 'ACTIVE' in client.run(['cat', state_file]):
                                    rdma_capable = True
        storagerouter.rdma_capable = rdma_capable
        storagerouter.save()

    @staticmethod
    @ovs_task(name='ovs.storagerouter.configure_disk',
              ensure_single_info={
                  'mode': 'CHAINED',
                  'global_timeout': 1800
              })
    def configure_disk(storagerouter_guid, disk_guid, partition_guid, offset,
                       size, roles):
        """
        Configures a partition
        :param storagerouter_guid: Guid of the StorageRouter to configure a disk on
        :type storagerouter_guid: str
        :param disk_guid: Guid of the disk to configure
        :type disk_guid: str
        :param partition_guid: Guid of the partition on the disk
        :type partition_guid: str
        :param offset: Offset for the partition
        :type offset: int
        :param size: Size of the partition
        :type size: int
        :param roles: Roles assigned to the partition
        :type roles: list
        :return: None
        :rtype: NoneType
        """
        # Validations
        storagerouter = StorageRouter(storagerouter_guid)
        for role in roles:
            if role not in DiskPartition.ROLES or role == DiskPartition.ROLES.BACKEND:
                raise RuntimeError('Invalid role specified: {0}'.format(role))
        disk = Disk(disk_guid)
        if disk.storagerouter_guid != storagerouter_guid:
            raise RuntimeError(
                'The given Disk is not on the given StorageRouter')
        for partition in disk.partitions:
            if DiskPartition.ROLES.BACKEND in partition.roles:
                raise RuntimeError('The given Disk is in use by a Backend')

        if len({DiskPartition.ROLES.DB, DiskPartition.ROLES.DTL}.intersection(
                set(roles))) > 0:
            roles_on_sr = StorageRouterController._get_roles_on_storagerouter(
                storagerouter.ip)
            for role in [DiskPartition.ROLES.DB, DiskPartition.ROLES.DTL]:
                if role in roles_on_sr and role in roles and roles_on_sr[role][
                        0] != disk.name:  # DB and DTL roles still have to be unassignable
                    raise RoleDuplicationException(
                        'Disk {0} cannot have the {1} role due to presence on disk {2}'
                        .format(disk.name, role, roles_on_sr[role][0]))

        # Create partition
        if partition_guid is None:
            StorageRouterController._logger.debug(
                'Creating new partition - Offset: {0} bytes - Size: {1} bytes - Roles: {2}'
                .format(offset, size, roles))
            with remote(storagerouter.ip, [DiskTools], username='******') as rem:
                if len(disk.aliases) == 0:
                    raise ValueError(
                        'Disk {0} does not have any aliases'.format(disk.name))
                rem.DiskTools.create_partition(disk_alias=disk.aliases[0],
                                               disk_size=disk.size,
                                               partition_start=offset,
                                               partition_size=size)
            DiskController.sync_with_reality(storagerouter_guid)
            disk = Disk(disk_guid)
            end_point = offset + size
            partition = None
            for part in disk.partitions:
                if offset < part.offset + part.size and end_point > part.offset:
                    partition = part
                    break

            if partition is None:
                raise RuntimeError(
                    'No new partition detected on disk {0} after having created 1'
                    .format(disk.name))
            StorageRouterController._logger.debug('Partition created')
        else:
            StorageRouterController._logger.debug('Using existing partition')
            partition = DiskPartition(partition_guid)
            if partition.disk_guid != disk_guid:
                raise RuntimeError(
                    'The given DiskPartition is not on the given Disk')
            if partition.filesystem in [
                    'swap', 'linux_raid_member', 'LVM2_member'
            ]:
                raise RuntimeError(
                    "It is not allowed to assign roles on partitions of type: ['swap', 'linux_raid_member', 'LVM2_member']"
                )
            metadata = StorageRouterController.get_metadata(storagerouter_guid)
            partition_info = metadata['partitions']
            removed_roles = set(partition.roles) - set(roles)
            used_roles = []
            for role in removed_roles:
                for info in partition_info[role]:
                    if info['in_use'] and info['guid'] == partition.guid:
                        used_roles.append(role)
            if len(used_roles) > 0:
                raise RuntimeError(
                    'Roles in use cannot be removed. Used roles: {0}'.format(
                        ', '.join(used_roles)))

        # Add filesystem
        if partition.filesystem is None or partition_guid is None:
            StorageRouterController._logger.debug('Creating filesystem')
            if len(partition.aliases) == 0:
                raise ValueError(
                    'Partition with offset {0} does not have any aliases'.
                    format(partition.offset))
            with remote(storagerouter.ip, [DiskTools], username='******') as rem:
                rem.DiskTools.make_fs(partition_alias=partition.aliases[0])
            DiskController.sync_with_reality(storagerouter_guid)
            partition = DiskPartition(partition.guid)
            if partition.filesystem not in ['ext4', 'xfs']:
                raise RuntimeError('Unexpected filesystem')
            StorageRouterController._logger.debug('Filesystem created')

        # Mount the partition and add to FSTab
        if partition.mountpoint is None:
            StorageRouterController._logger.debug('Configuring mount point')
            with remote(storagerouter.ip, [DiskTools], username='******') as rem:
                counter = 1
                mountpoint = '/mnt/{0}{1}'.format(
                    'ssd' if disk.is_ssd else 'hdd', counter)
                while True:
                    if not rem.DiskTools.mountpoint_exists(mountpoint):
                        break
                    counter += 1
                    mountpoint = '/mnt/{0}{1}'.format(
                        'ssd' if disk.is_ssd else 'hdd', counter)
                StorageRouterController._logger.debug(
                    'Found mount point: {0}'.format(mountpoint))
                rem.DiskTools.add_fstab(partition_aliases=partition.aliases,
                                        mountpoint=mountpoint,
                                        filesystem=partition.filesystem)
                rem.DiskTools.mount(mountpoint)
            DiskController.sync_with_reality(storagerouter_guid)
            partition = DiskPartition(partition.guid)
            if partition.mountpoint != mountpoint:
                raise RuntimeError('Unexpected mount point')
            StorageRouterController._logger.debug('Mount point configured')
        partition.roles = roles
        partition.save()
        StorageRouterController._logger.debug('Partition configured')

    @staticmethod
    def check_scrub_partition_present():
        """
        Checks whether at least 1 scrub partition is present on any StorageRouter
        :return: True if at least 1 SCRUB role present in the cluster else False
        :rtype: bool
        """
        for storage_router in StorageRouterList.get_storagerouters():
            for disk in storage_router.disks:
                for partition in disk.partitions:
                    if DiskPartition.ROLES.SCRUB in partition.roles:
                        return True
        return False

    @staticmethod
    def get_mountpoints(client):
        """
        Retrieve the mount points
        :param client: SSHClient to retrieve the mount points on
        :return: List of mount points
        :rtype: list[str]
        """
        mountpoints = []
        for mountpoint in client.run(['mount', '-v']).strip().splitlines():
            mp = mountpoint.split(' ')[2] if len(
                mountpoint.split(' ')) > 2 else None
            if mp and not mp.startswith('/dev') and not mp.startswith(
                    '/proc') and not mp.startswith(
                        '/sys') and not mp.startswith(
                            '/run') and not mp.startswith(
                                '/mnt/alba-asd') and mp != '/':
                mountpoints.append(mp)
        return mountpoints

    @staticmethod
    def _retrieve_alba_arakoon_config(alba_backend_guid, ovs_client):
        """
        Retrieve the ALBA Arakoon configuration
        :param alba_backend_guid: Guid of the ALBA Backend
        :type alba_backend_guid: str
        :param ovs_client: OVS client object
        :type ovs_client: OVSClient
        :return: Arakoon configuration information
        :rtype: dict
        """
        task_id = ovs_client.get(
            '/alba/backends/{0}/get_config_metadata'.format(alba_backend_guid))
        successful, arakoon_config = ovs_client.wait_for_task(task_id,
                                                              timeout=300)
        if successful is False:
            raise RuntimeError(
                'Could not load metadata from environment {0}'.format(
                    ovs_client.ip))
        return arakoon_config

    @staticmethod
    def _revert_vpool_status(vpool,
                             status=VPool.STATUSES.RUNNING,
                             storagedriver=None,
                             client=None,
                             dirs_created=None):
        """
        Remove the vPool being created or revert the vPool being extended
        :return: None
        :rtype: NoneType
        """
        vpool.status = status
        vpool.save()

        if status == VPool.STATUSES.RUNNING:
            if len(dirs_created) > 0:
                try:
                    client.dir_delete(directories=dirs_created)
                except Exception:
                    StorageRouterController._logger.warning(
                        'Failed to clean up following directories: {0}'.format(
                            ', '.join(dirs_created)))

            if storagedriver is not None:
                for sdp in storagedriver.partitions:
                    sdp.delete()
                for proxy in storagedriver.alba_proxies:
                    proxy.delete()
                storagedriver.delete()
            if len(vpool.storagedrivers) == 0:
                vpool.delete()
                if Configuration.dir_exists(
                        key='/ovs/vpools/{0}'.format(vpool.guid)):
                    Configuration.delete(
                        key='/ovs/vpools/{0}'.format(vpool.guid))

    @staticmethod
    def _get_roles_on_storagerouter(ip):
        """
        returns a set with the roles present on the storagerouter
        :param ip: string with ip of the storagerouter
        :return: Dict
        """
        sr = StorageRouterList.get_by_ip(ip)
        roles_on_sr = {}
        for sr_disk in sr.disks:
            for partition in sr_disk.partitions:
                for part_role in partition.roles:
                    if part_role not in roles_on_sr:
                        roles_on_sr[part_role] = [sr_disk.name]
                    else:
                        roles_on_sr[part_role].append(sr_disk.name)
        return roles_on_sr
Example #5
0
class StorageDriverClient(object):
    """
    Client to access storagedriver client
    """
    _log_level = LOG_LEVEL_MAPPING[OVSLogger('extensions').getEffectiveLevel()]
    # noinspection PyCallByClass,PyTypeChecker
    storagerouterclient.Logger.setupLogging(
        OVSLogger.load_path('storagerouterclient'), _log_level)
    # noinspection PyArgumentList
    storagerouterclient.Logger.enableLogging()

    VOLDRV_DTL_SYNC = 'Synchronous'
    VOLDRV_DTL_ASYNC = 'Asynchronous'
    VOLDRV_DTL_MANUAL_MODE = 'Manual'
    VOLDRV_DTL_AUTOMATIC_MODE = 'Automatic'
    VOLDRV_DTL_TRANSPORT_TCP = 'TCP'
    VOLDRV_DTL_TRANSPORT_RSOCKET = 'RSocket'

    FRAMEWORK_DTL_SYNC = 'sync'
    FRAMEWORK_DTL_ASYNC = 'a_sync'
    FRAMEWORK_DTL_NO_SYNC = 'no_sync'
    FRAMEWORK_DTL_TRANSPORT_TCP = 'tcp'
    FRAMEWORK_DTL_TRANSPORT_RSOCKET = 'rdma'

    VDISK_DTL_MODE_MAP = {
        FRAMEWORK_DTL_SYNC: DTLMode.SYNCHRONOUS,
        FRAMEWORK_DTL_ASYNC: DTLMode.ASYNCHRONOUS,
        FRAMEWORK_DTL_NO_SYNC: None
    }
    VPOOL_DTL_MODE_MAP = {
        FRAMEWORK_DTL_SYNC: VOLDRV_DTL_SYNC,
        FRAMEWORK_DTL_ASYNC: VOLDRV_DTL_ASYNC,
        FRAMEWORK_DTL_NO_SYNC: None
    }
    VPOOL_DTL_TRANSPORT_MAP = {
        FRAMEWORK_DTL_TRANSPORT_TCP: VOLDRV_DTL_TRANSPORT_TCP,
        FRAMEWORK_DTL_TRANSPORT_RSOCKET: VOLDRV_DTL_TRANSPORT_RSOCKET
    }
    REVERSE_DTL_MODE_MAP = {
        VOLDRV_DTL_SYNC: FRAMEWORK_DTL_SYNC,
        VOLDRV_DTL_ASYNC: FRAMEWORK_DTL_ASYNC,
        DTLMode.SYNCHRONOUS: FRAMEWORK_DTL_SYNC,
        DTLMode.ASYNCHRONOUS: FRAMEWORK_DTL_ASYNC
    }
    REVERSE_DTL_TRANSPORT_MAP = {
        VOLDRV_DTL_TRANSPORT_TCP: FRAMEWORK_DTL_TRANSPORT_TCP,
        VOLDRV_DTL_TRANSPORT_RSOCKET: FRAMEWORK_DTL_TRANSPORT_RSOCKET
    }
    CLUSTER_SIZES = [4, 8, 16, 32, 64]
    TLOG_MULTIPLIER_MAP = {4: 16, 8: 8, 16: 4, 32: 2, 64: 1, 128: 1}

    DTL_STATUS = {
        '': 0,
        'ok_sync': 10,
        'ok_standalone': 20,
        'catch_up': 30,
        'checkup_required': 30,
        'degraded': 40
    }
    EMPTY_STATISTICS = staticmethod(lambda: Statistics())
    EMPTY_INFO = staticmethod(lambda: VolumeInfo())
    STAT_SUMS = {
        'operations': ['write_operations', 'read_operations'],
        'cache_hits': ['sco_cache_hits', 'cluster_cache_hits'],
        'cache_misses': ['sco_cache_misses'],
        '4k_operations': ['4k_read_operations', '4k_write_operations'],
        '4k_unaligned_operations':
        ['4k_unaligned_read_operations', '4k_unaligned_write_operations'],
        'data_transferred': ['data_written', 'data_read']
    }

    def __init__(self):
        """
        Dummy init method
        """
        pass

    @staticmethod
    def load(vpool, excluded_storagedrivers=None):
        """
        Initializes the wrapper for a given vpool
        :param vpool: vPool for which the StorageRouterClient needs to be loaded
        :type vpool: vPool
        :param excluded_storagedrivers: A list of storagedrivers that cannot be used as a client
        :type excluded_storagedrivers: list or None
        """
        if excluded_storagedrivers is None:
            excluded_storagedrivers = []
        key = vpool.identifier
        if key not in client_vpool_cache:
            cluster_contacts = []
            for storagedriver in vpool.storagedrivers[:3]:
                if storagedriver not in excluded_storagedrivers:
                    cluster_contacts.append(
                        ClusterContact(str(storagedriver.cluster_ip),
                                       storagedriver.ports['xmlrpc']))
            client = StorageRouterClient(str(vpool.guid), cluster_contacts)
            client_vpool_cache[key] = client
        return client_vpool_cache[key]