コード例 #1
0
 def reload_client(self):
     """
     Reloads the StorageDriver Client
     """
     if self.service:
         self._frozen = False
         self.metadataserver_client = MetadataServerClient.load(self.service)
         self._frozen = True
コード例 #2
0
    def ensure_safety(vdisk, excluded_storagerouters=None):
        """
        Ensures (or tries to ensure) the safety of a given vdisk (except hypervisor).
        Assumptions:
        * A local overloaded master is better than a non-local non-overloaded master
        * Prefer master/services to be on different hosts, a subsequent slave on the same node doesn't add safety
        * Don't actively overload services (e.g. configure an MDS as slave causing it to get overloaded)
        * Too much safety is not wanted (it adds loads to nodes while not required)
        :param vdisk:                   vDisk to calculate a new safety for
        :param excluded_storagerouters: Storagerouters to leave out of calculation (Eg: When 1 is down or unavailable)
        """
        def add_suitable_nodes(local_failure_domain, local_safety):
            """
            Adds nodes which are suited to serve the MDS
            :param local_failure_domain: Failure domain to take into account
            :param local_safety: Safety which needs to be met
            :return: Nodes which can be used, MDS services to use
            """
            if len(nodes) < local_safety:
                for local_load in sorted(
                        failure_domain_load_dict[local_failure_domain]):
                    for local_service in failure_domain_load_dict[
                            local_failure_domain][local_load]:
                        if len(
                                nodes
                        ) < local_safety and local_service.storagerouter.ip not in nodes:
                            try:
                                SSHClient(local_service.storagerouter)
                                new_services.append(local_service)
                                nodes.add(local_service.storagerouter.ip)
                            except UnableToConnectException:
                                logger.debug(
                                    'MDS safety: vDisk {0}: Skipping storagerouter with IP {1} as it is unreachable'
                                    .format(vdisk.guid,
                                            service.storagerouter.ip))
            return nodes, new_services

        ######################
        # GATHER INFORMATION #
        ######################
        logger.debug(
            'MDS safety: vDisk {0}: Start checkup for virtual disk {1}'.format(
                vdisk.guid, vdisk.name))
        vdisk.reload_client()
        vdisk.invalidate_dynamics(
            ['info', 'storagedriver_id', 'storagerouter_guid'])
        if vdisk.storagerouter_guid is None:
            raise ValueError(
                'Cannot ensure MDS safety for vDisk {0} with guid {1} because vDisk is not attached to any Storage Router'
                .format(vdisk.name, vdisk.guid))

        if excluded_storagerouters is None:
            excluded_storagerouters = []

        services = [
            mds_service.service for mds_service in vdisk.vpool.mds_services
            if mds_service.service.storagerouter not in excluded_storagerouters
        ]
        nodes = set(service.storagerouter.ip for service in services)

        vdisk_storagerouter = StorageRouter(vdisk.storagerouter_guid)
        primary_failure_domain = vdisk_storagerouter.primary_failure_domain
        if vdisk.secondary_failure_domain is not None:
            secondary_failure_domain = vdisk.secondary_failure_domain
        else:
            secondary_failure_domain = vdisk_storagerouter.secondary_failure_domain

        failure_domain_load_dict = {primary_failure_domain: {}}
        failure_domain_used_services_dict = {primary_failure_domain: []}
        failure_domain_available_services_dict = {primary_failure_domain: []}
        storage_router_failure_domain_dict = dict(
            (storage_router, primary_failure_domain) for storage_router in
            primary_failure_domain.primary_storagerouters)

        if secondary_failure_domain is not None:
            failure_domain_load_dict[secondary_failure_domain] = {}
            failure_domain_used_services_dict[secondary_failure_domain] = []
            failure_domain_available_services_dict[
                secondary_failure_domain] = []
            storage_router_failure_domain_dict.update(
                dict((storage_router, secondary_failure_domain)
                     for storage_router in
                     secondary_failure_domain.primary_storagerouters))

        services_load = {}
        service_per_key = {}
        for service in services:
            services_load[service] = MDSServiceController.get_mds_load(
                service.mds_service)
            service_per_key['{0}:{1}'.format(service.storagerouter.ip,
                                             service.ports[0])] = service

        configs = vdisk.info[
            'metadata_backend_config']  # Ordered MASTER, SLAVE (backup failure domain of master)
        for config in configs:
            config['key'] = '{0}:{1}'.format(config['ip'], config['port'])

        ###################################
        # VERIFY RECONFIGURATION REQUIRED #
        ###################################
        master_service = None
        reconfigure_reasons = []
        if len(configs) > 0:
            config = configs.pop(0)
            if config['key'] in service_per_key:
                master_service = service_per_key.get(config['key'])
            else:
                reconfigure_reasons.append(
                    'Master ({0}:{1}) cannot be used anymore'.format(
                        config['ip'], config['port']))
        slave_services = []
        for config in configs:
            if config['key'] in service_per_key:
                slave_services.append(service_per_key[config['key']])
            else:
                reconfigure_reasons.append(
                    'Slave ({0}:{1}) cannot be used anymore'.format(
                        config['ip'], config['port']))

        # If MDS already in use, take current load, else take next load
        tlogs = EtcdConfiguration.get('/ovs/framework/storagedriver|mds_tlogs')
        safety = EtcdConfiguration.get(
            '/ovs/framework/storagedriver|mds_safety')
        max_load = EtcdConfiguration.get(
            '/ovs/framework/storagedriver|mds_maxload')
        for service in services:
            if service == master_service or service in slave_services:
                load = services_load[service][0]
                if service.storagerouter in storage_router_failure_domain_dict:  # Services in use per failure domain
                    failure_domain_used_services_dict[
                        storage_router_failure_domain_dict[
                            service.storagerouter]].append(service)
                else:
                    reconfigure_reasons.append(
                        'Service {0} cannot be used anymore because storagerouter with IP {1} is not part of the failure domains'
                        .format(service.name, service.storagerouter.ip))
            else:
                load = services_load[service][1]
            services_load[service] = load
            if service.storagerouter in storage_router_failure_domain_dict:  # All services available in model per failure domain
                failure_domain = storage_router_failure_domain_dict[
                    service.storagerouter]
                failure_domain_available_services_dict[failure_domain].append(
                    service)
                if load <= max_load:
                    if load not in failure_domain_load_dict[failure_domain]:
                        failure_domain_load_dict[failure_domain][load] = []
                    failure_domain_load_dict[failure_domain][load].append(
                        service)

        service_nodes = []
        if master_service is not None:
            service_nodes.append(master_service.storagerouter.ip)
        for service in slave_services:
            ip = service.storagerouter.ip
            if ip in service_nodes:
                reconfigure_reasons.append(
                    'Multiple MDS services on the same node')
            else:
                service_nodes.append(ip)

        if len(service_nodes) > safety:
            reconfigure_reasons.append('Too much safety')
        if len(service_nodes) < safety and len(service_nodes) < len(nodes):
            reconfigure_reasons.append('Not enough safety')
        if master_service is not None and services_load[
                master_service] > max_load:
            reconfigure_reasons.append('Master overloaded')
        if master_service is not None and master_service.storagerouter_guid != vdisk.storagerouter_guid:
            reconfigure_reasons.append('Master is not local')
        if any(service for service in slave_services
               if services_load[service] > max_load):
            reconfigure_reasons.append('One or more slaves overloaded')

        # Check reconfigure required based upon failure domains
        recommended_primary = math.ceil(
            safety / 2.0) if secondary_failure_domain is not None else safety
        recommended_secondary = safety - recommended_primary

        if master_service is not None and master_service not in failure_domain_used_services_dict[
                primary_failure_domain]:
            # Master service not present in primary failure domain
            reconfigure_reasons.append(
                'Master service not in primary failure domain')

        primary_services_used = len(
            failure_domain_used_services_dict[primary_failure_domain])
        primary_services_available = len(
            failure_domain_available_services_dict[primary_failure_domain])
        if primary_services_used < recommended_primary and primary_services_used < primary_services_available:
            # More services can be used in primary failure domain
            reconfigure_reasons.append(
                'Not enough services in use in primary failure domain')

        if secondary_failure_domain is not None:
            # More services can be used in secondary failure domain
            secondary_services_used = len(
                failure_domain_used_services_dict[secondary_failure_domain])
            secondary_services_available = len(
                failure_domain_available_services_dict[
                    secondary_failure_domain])
            if secondary_services_used < recommended_secondary and secondary_services_used < secondary_services_available:
                reconfigure_reasons.append(
                    'Not enough services in use in secondary failure domain')

            # If secondary failure domain present, check order in which the slave services are configured
            secondary = False
            for slave_service in slave_services:
                if secondary is True and slave_service in failure_domain_used_services_dict[
                        primary_failure_domain]:
                    reconfigure_reasons.append(
                        'A slave in secondary failure domain has priority over a slave in primary failure domain'
                    )
                    break
                if slave_service in failure_domain_used_services_dict[
                        secondary_failure_domain]:
                    secondary = True

        if not reconfigure_reasons:
            logger.debug(
                'MDS safety: vDisk {0}: No reconfiguration required'.format(
                    vdisk.guid))
            MDSServiceController.sync_vdisk_to_reality(vdisk)
            return

        logger.debug(
            'MDS safety: vDisk {0}: Reconfiguration required. Reasons:'.format(
                vdisk.guid))
        for reason in reconfigure_reasons:
            logger.debug('MDS safety: vDisk {0}:    * {1}'.format(
                vdisk.guid, reason))

        # Check whether the master (if available) is non-local to the vdisk and/or is overloaded
        new_services = []
        master_ok = master_service is not None
        if master_ok is True:
            master_ok = master_service.storagerouter_guid == vdisk.storagerouter_guid and services_load[
                master_service] <= max_load

        ############################
        # CREATE NEW CONFIGURATION #
        ############################
        if master_ok:
            # Add this master to the fresh configuration
            new_services.append(master_service)
        else:
            # Try to find the best non-overloaded LOCAL MDS slave to make master
            candidate_master_service = None
            candidate_master_load = 0
            local_mds = None
            local_mds_load = 0
            for service in failure_domain_available_services_dict[
                    primary_failure_domain]:
                load = services_load[service]
                if load <= max_load and service.storagerouter_guid == vdisk.storagerouter_guid:
                    if local_mds is None or local_mds_load > load:
                        # This service is a non-overloaded local MDS
                        local_mds = service
                        local_mds_load = load
                    if service in slave_services:
                        if candidate_master_service is None or candidate_master_load > load:
                            # This service is a non-overloaded local slave
                            candidate_master_service = service
                            candidate_master_load = load
            if candidate_master_service is not None:
                # A non-overloaded local slave was found.
                client = MetadataServerClient.load(candidate_master_service)
                try:
                    amount_of_tlogs = client.catch_up(str(vdisk.volume_id),
                                                      True)
                except RuntimeError as ex:
                    if 'Namespace does not exist' in ex.message:
                        client.create_namespace(str(vdisk.volume_id))
                        amount_of_tlogs = client.catch_up(
                            str(vdisk.volume_id), True)
                    else:
                        raise
                if amount_of_tlogs < tlogs:
                    # Almost there. Catching up right now, and continue as soon as it's up-to-date
                    start = time.time()
                    client.catch_up(str(vdisk.volume_id), False)
                    logger.debug(
                        'MDS safety: vDisk {0}: Catchup took {1}s'.format(
                            vdisk.guid, round(time.time() - start, 2)))
                    # It's up to date, so add it as a new master
                    new_services.append(candidate_master_service)
                    if master_service is not None:
                        # The current master (if available) is now candidate to become one of the slaves
                        slave_services.append(master_service)
                else:
                    # It's not up to date, keep the previous master (if available) and give the local slave
                    # some more time to catch up
                    if master_service is not None:
                        new_services.append(master_service)
                    new_services.append(candidate_master_service)
                if candidate_master_service in slave_services:
                    slave_services.remove(candidate_master_service)
            else:
                # There's no non-overloaded local slave found. Keep the current master (if available) and add
                # a local MDS (if available) as slave
                if master_service is not None:
                    new_services.append(master_service)
                if local_mds is not None:
                    new_services.append(local_mds)
                    if local_mds in slave_services:
                        slave_services.remove(local_mds)

        # At this point, there might (or might not) be a (new) master, and a (catching up) slave. The rest of the non-local
        # MDS nodes must now be added to the configuration until the safety is reached. There's always one extra
        # slave recycled to make sure there's always an (almost) up-to-date slave ready for failover
        nodes = set(service.storagerouter.ip for service in new_services)

        # Recycle slave for faster failover
        secondary_node_count = 0
        service_to_recycle = None
        if len(nodes) < safety:
            if recommended_primary > 1:  # If primary is 1, we only have master in primary
                # Try to recycle slave which is in primary failure domain
                for load in sorted(
                        failure_domain_load_dict[primary_failure_domain]):
                    for service in failure_domain_load_dict[
                            primary_failure_domain][load]:
                        if service_to_recycle is None and service in slave_services and service.storagerouter.ip not in nodes:
                            try:
                                SSHClient(service.storagerouter)
                                service_to_recycle = service
                            except UnableToConnectException:
                                logger.debug(
                                    'MDS safety: vDisk {0}: Skipping storagerouter with IP {1} as it is unreachable'
                                    .format(vdisk.guid,
                                            service.storagerouter.ip))
            # Try to recycle slave which is in secondary failure domain if none found in primary
            if service_to_recycle is None and secondary_failure_domain is not None:
                for load in sorted(
                        failure_domain_load_dict[secondary_failure_domain]):
                    for service in failure_domain_load_dict[
                            secondary_failure_domain][load]:
                        if service_to_recycle is None and service in slave_services and service.storagerouter.ip not in nodes:
                            try:
                                SSHClient(service.storagerouter)
                                service_to_recycle = service
                                secondary_node_count = 1  # We do not want to configure the secondary slave BEFORE the primary slaves
                            except UnableToConnectException:
                                logger.debug(
                                    'MDS safety: vDisk {0}: Skipping storagerouter with IP {1} as it is unreachable'
                                    .format(vdisk.guid,
                                            service.storagerouter.ip))
        if service_to_recycle is not None:
            slave_services.remove(service_to_recycle)
            if secondary_node_count == 0:  # Add service to recycle because its in primary failure domain
                new_services.append(service_to_recycle)
                nodes.add(service_to_recycle.storagerouter.ip)

        # Add extra (new) slaves until primary safety reached
        nodes, new_services = add_suitable_nodes(
            local_failure_domain=primary_failure_domain,
            local_safety=recommended_primary)

        # Add recycled secondary slave after primary slaves have been added
        if secondary_node_count == 1:
            new_services.append(service_to_recycle)
            nodes.add(service_to_recycle.storagerouter.ip)

        # Add extra (new) slaves until secondary safety reached
        if secondary_failure_domain is not None:
            nodes, new_services = add_suitable_nodes(
                local_failure_domain=secondary_failure_domain,
                local_safety=safety)
            # Add extra slaves from primary failure domain in case no suitable nodes found in secondary failure domain
            if len(nodes) < safety:
                nodes, new_services = add_suitable_nodes(
                    local_failure_domain=primary_failure_domain,
                    local_safety=safety)

        # Build the new configuration and update the vdisk
        configs = []
        for service in new_services:
            client = MetadataServerClient.load(service)
            client.create_namespace(str(vdisk.volume_id))
            configs.append(
                MDSNodeConfig(address=str(service.storagerouter.ip),
                              port=service.ports[0]))
        vdisk.storagedriver_client.update_metadata_backend_config(
            volume_id=str(vdisk.volume_id),
            metadata_backend_config=MDSMetaDataBackendConfig(configs))
        MDSServiceController.sync_vdisk_to_reality(vdisk)
        logger.debug('MDS safety: vDisk {0}: Completed'.format(vdisk.guid))
コード例 #3
0
ファイル: mdsservice.py プロジェクト: winglq/framework
    def ensure_safety(vdisk, excluded_storagerouters=None):
        """
        Ensures (or tries to ensure) the safety of a given vdisk (except hypervisor).
        Assumptions:
        * A local overloaded master is better than a non-local non-overloaded master
        * Prefer master/services to be on different hosts, a subsequent slave on the same node doesn't add safety
        * Don't actively overload services (e.g. configure an MDS as slave causing it to get overloaded)
        * Too much safety is not wanted (it adds loads to nodes while not required)
        :param vdisk: vDisk to calculate a new safety for
        :type vdisk: VDisk

        :param excluded_storagerouters: Storagerouters to leave out of calculation (Eg: When 1 is down or unavailable)
        :type excluded_storagerouters: list

        :return: None
        """
        def _add_suitable_nodes(_importance, _safety):
            if len(nodes) < _safety:
                for local_load in sorted(all_info_dict[_importance]['loads']):
                    for local_service in all_info_dict[_importance]['loads'][
                            local_load]:
                        if len(
                                nodes
                        ) < _safety and local_service.storagerouter.ip not in nodes:
                            try:
                                SSHClient(local_service.storagerouter)
                                new_services.append(local_service)
                                nodes.add(local_service.storagerouter.ip)
                            except UnableToConnectException:
                                MDSServiceController._logger.debug(
                                    'MDS safety: vDisk {0}: Skipping storagerouter with IP {1} as it is unreachable'
                                    .format(vdisk.guid,
                                            service.storagerouter.ip))
            return nodes, new_services

        MDSServiceController._logger.debug(
            'MDS safety: vDisk {0}: Start checkup for virtual disk {1}'.format(
                vdisk.guid, vdisk.name))
        tlogs = Configuration.get('/ovs/framework/storagedriver|mds_tlogs')
        safety = Configuration.get('/ovs/framework/storagedriver|mds_safety')
        max_load = Configuration.get(
            '/ovs/framework/storagedriver|mds_maxload')

        ######################
        # GATHER INFORMATION #
        ######################
        vdisk.reload_client('storagedriver')
        vdisk.reload_client('objectregistry')

        vdisk.invalidate_dynamics(['storagedriver_id', 'storagerouter_guid'])
        if vdisk.storagerouter_guid is None:
            raise SRCObjectNotFoundException(
                'Cannot ensure MDS safety for vDisk {0} with guid {1} because vDisk is not attached to any Storage Router'
                .format(vdisk.name, vdisk.guid))

        if excluded_storagerouters is None:
            excluded_storagerouters = []

        # Sorted was added merely for unittests, because they rely on specific order of services and their ports
        # Default sorting behavior for relations used to be based on order in which relations were added
        # Now sorting is based on guid (DAL speedup changes)
        nodes = set()
        services = sorted([
            mds_service.service for mds_service in vdisk.vpool.mds_services
            if mds_service.service.storagerouter not in excluded_storagerouters
        ],
                          key=lambda k: k.ports)
        service_per_key = {}
        for service in services:
            nodes.add(service.storagerouter.ip)
            service_per_key['{0}:{1}'.format(service.storagerouter.ip,
                                             service.ports[0])] = service

        # Create a pool of StorageRouters being a part of the primary and secondary domains of this Storage Router
        vdisk_storagerouter = StorageRouter(vdisk.storagerouter_guid)
        primary_domains = [
            junction.domain for junction in vdisk_storagerouter.domains
            if junction.backup is False
        ]
        secondary_domains = [
            junction.domain for junction in vdisk_storagerouter.domains
            if junction.backup is True
        ]
        primary_storagerouters = set()
        secondary_storagerouters = set()
        for domain in primary_domains:
            primary_storagerouters.update(
                StorageRouterList.get_primary_storagerouters_for_domain(
                    domain))
        for domain in secondary_domains:
            secondary_storagerouters.update(
                StorageRouterList.get_primary_storagerouters_for_domain(
                    domain))

        # In case no domains have been configured
        if len(primary_storagerouters) == 0:
            primary_storagerouters = set(
                StorageRouterList.get_storagerouters())

        if vdisk_storagerouter not in primary_storagerouters or vdisk_storagerouter in secondary_storagerouters:
            raise ValueError(
                'StorageRouter {0} for vDisk {1} should be part of the primary domains and NOT be part of the secondary domains'
                .format(vdisk_storagerouter.name, vdisk.name))

        # Remove all storagerouters from secondary which are present in primary
        secondary_storagerouters = secondary_storagerouters.difference(
            primary_storagerouters)

        ###################################
        # VERIFY RECONFIGURATION REQUIRED #
        ###################################
        vdisk.invalidate_dynamics(['info'])
        configs = vdisk.info[
            'metadata_backend_config']  # Ordered MASTER, SLAVE (secondary domain of master)
        master_service = None
        reconfigure_reasons = []
        if len(configs) > 0:
            config = configs.pop(0)
            config_key = '{0}:{1}'.format(config['ip'], config['port'])
            master_service = service_per_key.get(config_key)
            if master_service is None:
                reconfigure_reasons.append(
                    'Master ({0}:{1}) cannot be used anymore'.format(
                        config['ip'], config['port']))
        slave_services = []
        for config in configs:
            config_key = '{0}:{1}'.format(config['ip'], config['port'])
            if config_key in service_per_key:
                slave_services.append(service_per_key[config_key])
            else:
                reconfigure_reasons.append(
                    'Slave ({0}:{1}) cannot be used anymore'.format(
                        config['ip'], config['port']))

        # If MDS already in use, take current load, else take next load
        all_info_dict = {
            'primary': {
                'used': [],
                'loads': {},
                'available': []
            },
            'secondary': {
                'used': [],
                'loads': {},
                'available': []
            }
        }
        services_load = {}
        for service in services:
            importance = None
            if service.storagerouter in primary_storagerouters:
                importance = 'primary'
            elif service.storagerouter in secondary_storagerouters:
                importance = 'secondary'

            loads = MDSServiceController.get_mds_load(service.mds_service)
            if service == master_service or service in slave_services:  # Service is still in use
                load = loads[0]
                if importance is not None:
                    all_info_dict[importance]['used'].append(service)
                else:
                    reconfigure_reasons.append(
                        'Service {0} cannot be used anymore because storagerouter with IP {1} is not part of the domains'
                        .format(service.name, service.storagerouter.ip))
            else:  # Service is not in use, but available
                load = loads[1]
            services_load[service] = load

            if importance is not None:
                all_info_dict[importance]['available'].append(service)
                if load <= max_load:
                    if load not in all_info_dict[importance]['loads']:
                        all_info_dict[importance]['loads'][load] = []
                    all_info_dict[importance]['loads'][load].append(service)

        service_nodes = []
        if master_service is not None:
            service_nodes.append(master_service.storagerouter.ip)
        for service in slave_services:
            ip = service.storagerouter.ip
            if ip in service_nodes:
                reconfigure_reasons.append(
                    'Multiple MDS services on the same node')
            else:
                service_nodes.append(ip)

        if len(service_nodes) > safety:
            reconfigure_reasons.append('Too much safety')
        if len(service_nodes) < safety and len(service_nodes) < len(nodes):
            reconfigure_reasons.append('Not enough safety')
        if master_service is not None and services_load[
                master_service] > max_load:
            reconfigure_reasons.append('Master overloaded')
        if master_service is not None and master_service.storagerouter_guid != vdisk.storagerouter_guid:
            reconfigure_reasons.append('Master is not local')
        if any(service for service in slave_services
               if services_load[service] > max_load):
            reconfigure_reasons.append('One or more slaves overloaded')

        # Check reconfigure required based upon domains
        recommended_primary = math.ceil(
            safety / 2.0) if len(secondary_storagerouters) > 0 else safety
        recommended_secondary = safety - recommended_primary

        if master_service is not None and master_service not in all_info_dict[
                'primary']['used']:
            # Master service not present in primary domain
            reconfigure_reasons.append('Master service not in primary domain')

        primary_services_used = len(all_info_dict['primary']['used'])
        primary_services_available = len(all_info_dict['primary']['available'])
        if primary_services_used < recommended_primary and primary_services_used < primary_services_available:
            # More services can be used in primary domain
            reconfigure_reasons.append(
                'Not enough services in use in primary domain')
        if primary_services_used > recommended_primary:
            # Too many services in primary domain
            reconfigure_reasons.append(
                'Too many services in use in primary domain')

        # More services can be used in secondary domain
        secondary_services_used = len(all_info_dict['secondary']['used'])
        secondary_services_available = len(
            all_info_dict['secondary']['available'])
        if secondary_services_used < recommended_secondary and secondary_services_used < secondary_services_available:
            reconfigure_reasons.append(
                'Not enough services in use in secondary domain')
        if secondary_services_used > recommended_secondary:
            # Too many services in secondary domain
            reconfigure_reasons.append(
                'Too many services in use in secondary domain')

        # If secondary domain present, check order in which the slave services are configured
        secondary = False
        for slave_service in slave_services:
            if secondary is True and slave_service in all_info_dict['primary'][
                    'used']:
                reconfigure_reasons.append(
                    'A slave in secondary domain has priority over a slave in primary domain'
                )
                break
            if slave_service in all_info_dict['secondary']['used']:
                secondary = True

        if not reconfigure_reasons:
            MDSServiceController._logger.debug(
                'MDS safety: vDisk {0}: No reconfiguration required'.format(
                    vdisk.guid))
            MDSServiceController.sync_vdisk_to_reality(vdisk)
            return

        MDSServiceController._logger.debug(
            'MDS safety: vDisk {0}: Reconfiguration required. Reasons:'.format(
                vdisk.guid))
        for reason in reconfigure_reasons:
            MDSServiceController._logger.debug(
                'MDS safety: vDisk {0}:    * {1}'.format(vdisk.guid, reason))

        ############################
        # CREATE NEW CONFIGURATION #
        ############################

        # Check whether the master (if available) is non-local to the vdisk and/or is overloaded
        new_services = []
        master_ok = master_service is not None
        if master_ok is True:
            master_ok = master_service.storagerouter_guid == vdisk.storagerouter_guid and services_load[
                master_service] <= max_load

        previous_master = None
        if master_ok:
            # Add this master to the fresh configuration
            new_services.append(master_service)
        else:
            # Try to find the best non-overloaded LOCAL MDS slave to make master
            candidate_master_service = None
            candidate_master_load = 0
            local_mds = None
            local_mds_load = 0
            for service in all_info_dict['primary']['available']:
                load = services_load[service]
                if load <= max_load and service.storagerouter_guid == vdisk.storagerouter_guid:
                    if local_mds is None or local_mds_load > load:
                        # This service is a non-overloaded local MDS
                        local_mds = service
                        local_mds_load = load
                    if service in slave_services:
                        if candidate_master_service is None or candidate_master_load > load:
                            # This service is a non-overloaded local slave
                            candidate_master_service = service
                            candidate_master_load = load
            if candidate_master_service is not None:
                # A non-overloaded local slave was found.
                client = MetadataServerClient.load(candidate_master_service)
                try:
                    amount_of_tlogs = client.catch_up(str(vdisk.volume_id),
                                                      True)
                except RuntimeError as ex:
                    if 'Namespace does not exist' in ex.message:
                        client.create_namespace(str(vdisk.volume_id))
                        amount_of_tlogs = client.catch_up(
                            str(vdisk.volume_id), True)
                    else:
                        raise
                if amount_of_tlogs < tlogs:
                    # Almost there. Catching up right now, and continue as soon as it's up-to-date
                    start = time.time()
                    client.catch_up(str(vdisk.volume_id), False)
                    MDSServiceController._logger.debug(
                        'MDS safety: vDisk {0}: Catchup took {1}s'.format(
                            vdisk.guid, round(time.time() - start, 2)))
                    # It's up to date, so add it as a new master
                    new_services.append(candidate_master_service)
                    if master_service is not None:
                        # The current master (if available) is now candidate to become one of the slaves
                        slave_services.append(master_service)
                        previous_master = master_service
                else:
                    # It's not up to date, keep the previous master (if available) and give the local slave some more time to catch up
                    if master_service is not None:
                        new_services.append(master_service)
                    new_services.append(candidate_master_service)
                if candidate_master_service in slave_services:
                    slave_services.remove(candidate_master_service)
            else:
                # There's no non-overloaded local slave found. Keep the current master (if available) and add a local MDS (if available) as slave
                if master_service is not None:
                    new_services.append(master_service)
                if local_mds is not None:
                    new_services.append(local_mds)
                    if local_mds in slave_services:
                        slave_services.remove(local_mds)

        # At this point, there might (or might not) be a (new) master, and a (catching up) slave. The rest of the non-local
        # MDS nodes must now be added to the configuration until the safety is reached. There's always one extra
        # slave recycled to make sure there's always an (almost) up-to-date slave ready for failover
        nodes = set(service.storagerouter.ip for service in new_services)

        # Recycle slave for faster failover
        secondary_node_count = 0
        service_to_recycle = None
        if len(nodes) < safety:
            if recommended_primary > 1:  # If primary is 1, we only have master in primary
                # Try to recycle slave which is in primary domain
                for load in sorted(all_info_dict['primary']['loads']):
                    for service in all_info_dict['primary']['loads'][load]:
                        if service_to_recycle is None and service in slave_services and service.storagerouter.ip not in nodes:
                            try:
                                SSHClient(service.storagerouter)
                                service_to_recycle = service
                            except UnableToConnectException:
                                MDSServiceController._logger.debug(
                                    'MDS safety: vDisk {0}: Skipping storagerouter with IP {1} as it is unreachable'
                                    .format(vdisk.guid,
                                            service.storagerouter.ip))
            # Try to recycle slave which is in secondary domain if none found in primary
            if service_to_recycle is None and len(
                    secondary_storagerouters) > 0:
                for load in sorted(all_info_dict['secondary']['loads']):
                    for service in all_info_dict['secondary']['loads'][load]:
                        if service_to_recycle is None and service in slave_services and service.storagerouter.ip not in nodes:
                            try:
                                SSHClient(service.storagerouter)
                                service_to_recycle = service
                                secondary_node_count = 1  # We do not want to configure the secondary slave BEFORE the primary slaves
                            except UnableToConnectException:
                                MDSServiceController._logger.debug(
                                    'MDS safety: vDisk {0}: Skipping storagerouter with IP {1} as it is unreachable'
                                    .format(vdisk.guid,
                                            service.storagerouter.ip))
        if service_to_recycle is not None:
            slave_services.remove(service_to_recycle)
            if secondary_node_count == 0:  # Add service to recycle because its in primary domain
                new_services.append(service_to_recycle)
                nodes.add(service_to_recycle.storagerouter.ip)

        # Add extra (new) slaves until primary safety reached
        nodes, new_services = _add_suitable_nodes(_importance='primary',
                                                  _safety=recommended_primary)

        # Add recycled secondary slave after primary slaves have been added
        if secondary_node_count == 1:
            new_services.append(service_to_recycle)
            nodes.add(service_to_recycle.storagerouter.ip)

        # Add extra (new) slaves until secondary safety reached
        if len(secondary_storagerouters) > 0:
            nodes, new_services = _add_suitable_nodes(_importance='secondary',
                                                      _safety=safety)
            # Add extra slaves from primary domain in case no suitable nodes found in secondary domain
            if len(nodes) < safety:
                nodes, new_services = _add_suitable_nodes(
                    _importance='primary', _safety=safety)

        # Build the new configuration and update the vdisk
        configs_no_ex_master = []
        configs_all = []
        for service in new_services:
            client = MetadataServerClient.load(service)
            client.create_namespace(str(vdisk.volume_id))
            # noinspection PyArgumentList
            config = MDSNodeConfig(address=str(service.storagerouter.ip),
                                   port=service.ports[0])
            if previous_master != service:
                configs_no_ex_master.append(config)
            configs_all.append(config)
        try:
            if len(configs_no_ex_master) != len(configs_all):
                vdisk.storagedriver_client.update_metadata_backend_config(
                    volume_id=str(vdisk.volume_id),
                    metadata_backend_config=MDSMetaDataBackendConfig(
                        configs_no_ex_master))
            vdisk.storagedriver_client.update_metadata_backend_config(
                volume_id=str(vdisk.volume_id),
                metadata_backend_config=MDSMetaDataBackendConfig(configs_all))
        except Exception:
            MDSServiceController._logger.exception(
                'MDS safety: vDisk {0}: Failed to update the metadata backend configuration'
                .format(vdisk.guid))
            raise Exception(
                'MDS configuration for volume {0} with guid {1} could not be changed'
                .format(vdisk.name, vdisk.guid))

        for service in new_services[1:]:
            client = MetadataServerClient.load(service)
            client.set_role(str(vdisk.volume_id),
                            MetadataServerClient.MDS_ROLE.SLAVE)

        MDSServiceController.sync_vdisk_to_reality(vdisk)
        MDSServiceController._logger.debug(
            'MDS safety: vDisk {0}: Completed'.format(vdisk.guid))
コード例 #4
0
ファイル: mdsservice.py プロジェクト: grimpy/openvstorage
    def ensure_safety(vdisk, excluded_storagerouters=None):
        """
        Ensures (or tries to ensure) the safety of a given vdisk (except hypervisor).
        Assumptions:
        * A local overloaded master is better than a non-local non-overloaded master
        * Prefer master/services to be on different hosts, a subsequent slave on the same node doesn't add safety
        * Don't actively overload services (e.g. configure an MDS as slave causing it to get overloaded)
        * Too much safety is not wanted (it adds loads to nodes while not required)
        :param vdisk: vDisk to calculate a new safety for
        :type vdisk: VDisk

        :param excluded_storagerouters: Storagerouters to leave out of calculation (Eg: When 1 is down or unavailable)
        :type excluded_storagerouters: list

        :return: None
        """

        def _add_suitable_nodes(_importance, _safety):
            if len(nodes) < _safety:
                for local_load in sorted(all_info_dict[_importance]["loads"]):
                    for local_service in all_info_dict[_importance]["loads"][local_load]:
                        if len(nodes) < _safety and local_service.storagerouter.ip not in nodes:
                            try:
                                SSHClient(local_service.storagerouter)
                                new_services.append(local_service)
                                nodes.add(local_service.storagerouter.ip)
                            except UnableToConnectException:
                                MDSServiceController._logger.debug(
                                    "MDS safety: vDisk {0}: Skipping storagerouter with IP {1} as it is unreachable".format(
                                        vdisk.guid, service.storagerouter.ip
                                    )
                                )
            return nodes, new_services

        MDSServiceController._logger.debug(
            "MDS safety: vDisk {0}: Start checkup for virtual disk {1}".format(vdisk.guid, vdisk.name)
        )
        tlogs = Configuration.get("/ovs/framework/storagedriver|mds_tlogs")
        safety = Configuration.get("/ovs/framework/storagedriver|mds_safety")
        max_load = Configuration.get("/ovs/framework/storagedriver|mds_maxload")

        ######################
        # GATHER INFORMATION #
        ######################
        vdisk.reload_client("storagedriver")
        vdisk.reload_client("objectregistry")

        vdisk.invalidate_dynamics(["storagedriver_id", "storagerouter_guid"])
        if vdisk.storagerouter_guid is None:
            raise SRCObjectNotFoundException(
                "Cannot ensure MDS safety for vDisk {0} with guid {1} because vDisk is not attached to any Storage Router".format(
                    vdisk.name, vdisk.guid
                )
            )

        if excluded_storagerouters is None:
            excluded_storagerouters = []

        # Sorted was added merely for unittests, because they rely on specific order of services and their ports
        # Default sorting behavior for relations used to be based on order in which relations were added
        # Now sorting is based on guid (DAL speedup changes)
        nodes = set()
        services = sorted(
            [
                mds_service.service
                for mds_service in vdisk.vpool.mds_services
                if mds_service.service.storagerouter not in excluded_storagerouters
            ],
            key=lambda k: k.ports,
        )
        service_per_key = {}
        for service in services:
            nodes.add(service.storagerouter.ip)
            service_per_key["{0}:{1}".format(service.storagerouter.ip, service.ports[0])] = service

        # Create a pool of StorageRouters being a part of the primary and secondary domains of this Storage Router
        vdisk_storagerouter = StorageRouter(vdisk.storagerouter_guid)
        primary_domains = [junction.domain for junction in vdisk_storagerouter.domains if junction.backup is False]
        secondary_domains = [junction.domain for junction in vdisk_storagerouter.domains if junction.backup is True]
        primary_storagerouters = set()
        secondary_storagerouters = set()
        for domain in primary_domains:
            primary_storagerouters.update(StorageRouterList.get_primary_storagerouters_for_domain(domain))
        for domain in secondary_domains:
            secondary_storagerouters.update(StorageRouterList.get_primary_storagerouters_for_domain(domain))

        # In case no domains have been configured
        if len(primary_storagerouters) == 0:
            primary_storagerouters = set(StorageRouterList.get_storagerouters())

        if vdisk_storagerouter not in primary_storagerouters or vdisk_storagerouter in secondary_storagerouters:
            raise ValueError(
                "StorageRouter {0} for vDisk {1} should be part of the primary domains and NOT be part of the secondary domains".format(
                    vdisk_storagerouter.name, vdisk.name
                )
            )

        # Remove all storagerouters from secondary which are present in primary
        secondary_storagerouters = secondary_storagerouters.difference(primary_storagerouters)

        ###################################
        # VERIFY RECONFIGURATION REQUIRED #
        ###################################
        vdisk.invalidate_dynamics(["info"])
        configs = vdisk.info["metadata_backend_config"]  # Ordered MASTER, SLAVE (secondary domain of master)
        master_service = None
        reconfigure_reasons = []
        if len(configs) > 0:
            config = configs.pop(0)
            config_key = "{0}:{1}".format(config["ip"], config["port"])
            master_service = service_per_key.get(config_key)
            if master_service is None:
                reconfigure_reasons.append(
                    "Master ({0}:{1}) cannot be used anymore".format(config["ip"], config["port"])
                )
        slave_services = []
        for config in configs:
            config_key = "{0}:{1}".format(config["ip"], config["port"])
            if config_key in service_per_key:
                slave_services.append(service_per_key[config_key])
            else:
                reconfigure_reasons.append(
                    "Slave ({0}:{1}) cannot be used anymore".format(config["ip"], config["port"])
                )

        # If MDS already in use, take current load, else take next load
        all_info_dict = {
            "primary": {"used": [], "loads": {}, "available": []},
            "secondary": {"used": [], "loads": {}, "available": []},
        }
        services_load = {}
        for service in services:
            importance = None
            if service.storagerouter in primary_storagerouters:
                importance = "primary"
            elif service.storagerouter in secondary_storagerouters:
                importance = "secondary"

            loads = MDSServiceController.get_mds_load(service.mds_service)
            if service == master_service or service in slave_services:  # Service is still in use
                load = loads[0]
                if importance is not None:
                    all_info_dict[importance]["used"].append(service)
                else:
                    reconfigure_reasons.append(
                        "Service {0} cannot be used anymore because storagerouter with IP {1} is not part of the domains".format(
                            service.name, service.storagerouter.ip
                        )
                    )
            else:  # Service is not in use, but available
                load = loads[1]
            services_load[service] = load

            if importance is not None:
                all_info_dict[importance]["available"].append(service)
                if load <= max_load:
                    if load not in all_info_dict[importance]["loads"]:
                        all_info_dict[importance]["loads"][load] = []
                    all_info_dict[importance]["loads"][load].append(service)

        service_nodes = []
        if master_service is not None:
            service_nodes.append(master_service.storagerouter.ip)
        for service in slave_services:
            ip = service.storagerouter.ip
            if ip in service_nodes:
                reconfigure_reasons.append("Multiple MDS services on the same node")
            else:
                service_nodes.append(ip)

        if len(service_nodes) > safety:
            reconfigure_reasons.append("Too much safety")
        if len(service_nodes) < safety and len(service_nodes) < len(nodes):
            reconfigure_reasons.append("Not enough safety")
        if master_service is not None and services_load[master_service] > max_load:
            reconfigure_reasons.append("Master overloaded")
        if master_service is not None and master_service.storagerouter_guid != vdisk.storagerouter_guid:
            reconfigure_reasons.append("Master is not local")
        if any(service for service in slave_services if services_load[service] > max_load):
            reconfigure_reasons.append("One or more slaves overloaded")

        # Check reconfigure required based upon domains
        recommended_primary = math.ceil(safety / 2.0) if len(secondary_storagerouters) > 0 else safety
        recommended_secondary = safety - recommended_primary

        if master_service is not None and master_service not in all_info_dict["primary"]["used"]:
            # Master service not present in primary domain
            reconfigure_reasons.append("Master service not in primary domain")

        primary_services_used = len(all_info_dict["primary"]["used"])
        primary_services_available = len(all_info_dict["primary"]["available"])
        if primary_services_used < recommended_primary and primary_services_used < primary_services_available:
            # More services can be used in primary domain
            reconfigure_reasons.append("Not enough services in use in primary domain")
        if primary_services_used > recommended_primary:
            # Too many services in primary domain
            reconfigure_reasons.append("Too many services in use in primary domain")

        # More services can be used in secondary domain
        secondary_services_used = len(all_info_dict["secondary"]["used"])
        secondary_services_available = len(all_info_dict["secondary"]["available"])
        if secondary_services_used < recommended_secondary and secondary_services_used < secondary_services_available:
            reconfigure_reasons.append("Not enough services in use in secondary domain")
        if secondary_services_used > recommended_secondary:
            # Too many services in secondary domain
            reconfigure_reasons.append("Too many services in use in secondary domain")

        # If secondary domain present, check order in which the slave services are configured
        secondary = False
        for slave_service in slave_services:
            if secondary is True and slave_service in all_info_dict["primary"]["used"]:
                reconfigure_reasons.append("A slave in secondary domain has priority over a slave in primary domain")
                break
            if slave_service in all_info_dict["secondary"]["used"]:
                secondary = True

        if not reconfigure_reasons:
            MDSServiceController._logger.debug("MDS safety: vDisk {0}: No reconfiguration required".format(vdisk.guid))
            MDSServiceController.sync_vdisk_to_reality(vdisk)
            return

        MDSServiceController._logger.debug(
            "MDS safety: vDisk {0}: Reconfiguration required. Reasons:".format(vdisk.guid)
        )
        for reason in reconfigure_reasons:
            MDSServiceController._logger.debug("MDS safety: vDisk {0}:    * {1}".format(vdisk.guid, reason))

        ############################
        # CREATE NEW CONFIGURATION #
        ############################

        # Check whether the master (if available) is non-local to the vdisk and/or is overloaded
        new_services = []
        master_ok = master_service is not None
        if master_ok is True:
            master_ok = (
                master_service.storagerouter_guid == vdisk.storagerouter_guid
                and services_load[master_service] <= max_load
            )

        previous_master = None
        if master_ok:
            # Add this master to the fresh configuration
            new_services.append(master_service)
        else:
            # Try to find the best non-overloaded LOCAL MDS slave to make master
            candidate_master_service = None
            candidate_master_load = 0
            local_mds = None
            local_mds_load = 0
            for service in all_info_dict["primary"]["available"]:
                load = services_load[service]
                if load <= max_load and service.storagerouter_guid == vdisk.storagerouter_guid:
                    if local_mds is None or local_mds_load > load:
                        # This service is a non-overloaded local MDS
                        local_mds = service
                        local_mds_load = load
                    if service in slave_services:
                        if candidate_master_service is None or candidate_master_load > load:
                            # This service is a non-overloaded local slave
                            candidate_master_service = service
                            candidate_master_load = load
            if candidate_master_service is not None:
                # A non-overloaded local slave was found.
                client = MetadataServerClient.load(candidate_master_service)
                try:
                    amount_of_tlogs = client.catch_up(str(vdisk.volume_id), True)
                except RuntimeError as ex:
                    if "Namespace does not exist" in ex.message:
                        client.create_namespace(str(vdisk.volume_id))
                        amount_of_tlogs = client.catch_up(str(vdisk.volume_id), True)
                    else:
                        raise
                if amount_of_tlogs < tlogs:
                    # Almost there. Catching up right now, and continue as soon as it's up-to-date
                    start = time.time()
                    client.catch_up(str(vdisk.volume_id), False)
                    MDSServiceController._logger.debug(
                        "MDS safety: vDisk {0}: Catchup took {1}s".format(vdisk.guid, round(time.time() - start, 2))
                    )
                    # It's up to date, so add it as a new master
                    new_services.append(candidate_master_service)
                    if master_service is not None:
                        # The current master (if available) is now candidate to become one of the slaves
                        slave_services.append(master_service)
                        previous_master = master_service
                else:
                    # It's not up to date, keep the previous master (if available) and give the local slave some more time to catch up
                    if master_service is not None:
                        new_services.append(master_service)
                    new_services.append(candidate_master_service)
                if candidate_master_service in slave_services:
                    slave_services.remove(candidate_master_service)
            else:
                # There's no non-overloaded local slave found. Keep the current master (if available) and add a local MDS (if available) as slave
                if master_service is not None:
                    new_services.append(master_service)
                if local_mds is not None:
                    new_services.append(local_mds)
                    if local_mds in slave_services:
                        slave_services.remove(local_mds)

        # At this point, there might (or might not) be a (new) master, and a (catching up) slave. The rest of the non-local
        # MDS nodes must now be added to the configuration until the safety is reached. There's always one extra
        # slave recycled to make sure there's always an (almost) up-to-date slave ready for failover
        nodes = set(service.storagerouter.ip for service in new_services)

        # Recycle slave for faster failover
        secondary_node_count = 0
        service_to_recycle = None
        if len(nodes) < safety:
            if recommended_primary > 1:  # If primary is 1, we only have master in primary
                # Try to recycle slave which is in primary domain
                for load in sorted(all_info_dict["primary"]["loads"]):
                    for service in all_info_dict["primary"]["loads"][load]:
                        if (
                            service_to_recycle is None
                            and service in slave_services
                            and service.storagerouter.ip not in nodes
                        ):
                            try:
                                SSHClient(service.storagerouter)
                                service_to_recycle = service
                            except UnableToConnectException:
                                MDSServiceController._logger.debug(
                                    "MDS safety: vDisk {0}: Skipping storagerouter with IP {1} as it is unreachable".format(
                                        vdisk.guid, service.storagerouter.ip
                                    )
                                )
            # Try to recycle slave which is in secondary domain if none found in primary
            if service_to_recycle is None and len(secondary_storagerouters) > 0:
                for load in sorted(all_info_dict["secondary"]["loads"]):
                    for service in all_info_dict["secondary"]["loads"][load]:
                        if (
                            service_to_recycle is None
                            and service in slave_services
                            and service.storagerouter.ip not in nodes
                        ):
                            try:
                                SSHClient(service.storagerouter)
                                service_to_recycle = service
                                secondary_node_count = (
                                    1
                                )  # We do not want to configure the secondary slave BEFORE the primary slaves
                            except UnableToConnectException:
                                MDSServiceController._logger.debug(
                                    "MDS safety: vDisk {0}: Skipping storagerouter with IP {1} as it is unreachable".format(
                                        vdisk.guid, service.storagerouter.ip
                                    )
                                )
        if service_to_recycle is not None:
            slave_services.remove(service_to_recycle)
            if secondary_node_count == 0:  # Add service to recycle because its in primary domain
                new_services.append(service_to_recycle)
                nodes.add(service_to_recycle.storagerouter.ip)

        # Add extra (new) slaves until primary safety reached
        nodes, new_services = _add_suitable_nodes(_importance="primary", _safety=recommended_primary)

        # Add recycled secondary slave after primary slaves have been added
        if secondary_node_count == 1:
            new_services.append(service_to_recycle)
            nodes.add(service_to_recycle.storagerouter.ip)

        # Add extra (new) slaves until secondary safety reached
        if len(secondary_storagerouters) > 0:
            nodes, new_services = _add_suitable_nodes(_importance="secondary", _safety=safety)
            # Add extra slaves from primary domain in case no suitable nodes found in secondary domain
            if len(nodes) < safety:
                nodes, new_services = _add_suitable_nodes(_importance="primary", _safety=safety)

        # Build the new configuration and update the vdisk
        configs_no_ex_master = []
        configs_all = []
        for service in new_services:
            client = MetadataServerClient.load(service)
            client.create_namespace(str(vdisk.volume_id))
            # noinspection PyArgumentList
            config = MDSNodeConfig(address=str(service.storagerouter.ip), port=service.ports[0])
            if previous_master != service:
                configs_no_ex_master.append(config)
            configs_all.append(config)
        try:
            if len(configs_no_ex_master) != len(configs_all):
                vdisk.storagedriver_client.update_metadata_backend_config(
                    volume_id=str(vdisk.volume_id),
                    metadata_backend_config=MDSMetaDataBackendConfig(configs_no_ex_master),
                )
            vdisk.storagedriver_client.update_metadata_backend_config(
                volume_id=str(vdisk.volume_id), metadata_backend_config=MDSMetaDataBackendConfig(configs_all)
            )
        except Exception:
            MDSServiceController._logger.exception(
                "MDS safety: vDisk {0}: Failed to update the metadata backend configuration".format(vdisk.guid)
            )
            raise Exception(
                "MDS configuration for volume {0} with guid {1} could not be changed".format(vdisk.name, vdisk.guid)
            )

        for service in new_services[1:]:
            client = MetadataServerClient.load(service)
            client.set_role(str(vdisk.volume_id), MetadataServerClient.MDS_ROLE.SLAVE)

        MDSServiceController.sync_vdisk_to_reality(vdisk)
        MDSServiceController._logger.debug("MDS safety: vDisk {0}: Completed".format(vdisk.guid))
コード例 #5
0
ファイル: mdsservice.py プロジェクト: JasperLue/openvstorage
    def ensure_safety(vdisk, excluded_storagerouters=None):
        """
        Ensures (or tries to ensure) the safety of a given vdisk (except hypervisor).
        Assumptions:
        * A local overloaded master is better than a non-local non-overloaded master
        * Prefer master/services to be on different hosts, a subsequent slave on the same node doesn't add safety
        * Don't actively overload services (e.g. configure an MDS as slave causing it to get overloaded)
        * Too much safety is not wanted (it adds loads to nodes while not required)
        :param vdisk:                   vDisk to calculate a new safety for
        :param excluded_storagerouters: Storagerouters to leave out of calculation (Eg: When 1 is down or unavailable)
        """

        logger.debug('Ensuring MDS safety for vDisk {0} with guid {1}'.format(vdisk.name, vdisk.guid))
        vdisk.reload_client()
        vdisk.invalidate_dynamics(['info', 'storagedriver_id', 'storagerouter_guid'])
        if excluded_storagerouters is None:
            excluded_storagerouters = []

        services = [mds_service.service for mds_service in vdisk.vpool.mds_services
                    if mds_service.service.storagerouter not in excluded_storagerouters]
        nodes = set(service.storagerouter.ip for service in services)

        vdisk_storagerouter = StorageRouter(vdisk.storagerouter_guid)
        primary_failure_domain = vdisk_storagerouter.primary_failure_domain
        if vdisk.secondary_failure_domain is not None:
            secondary_failure_domain = vdisk.secondary_failure_domain
        else:
            secondary_failure_domain = vdisk_storagerouter.secondary_failure_domain

        failure_domain_load_dict = {primary_failure_domain: {}}
        failure_domain_used_services_dict = {primary_failure_domain: []}
        failure_domain_available_services_dict = {primary_failure_domain: []}
        storage_router_failure_domain_dict = dict((storage_router, primary_failure_domain) for storage_router in primary_failure_domain.primary_storagerouters)

        if secondary_failure_domain is not None:
            failure_domain_load_dict[secondary_failure_domain] = {}
            failure_domain_used_services_dict[secondary_failure_domain] = []
            failure_domain_available_services_dict[secondary_failure_domain] = []
            storage_router_failure_domain_dict.update(dict((storage_router, secondary_failure_domain) for storage_router in secondary_failure_domain.primary_storagerouters))

        services_load = {}
        service_per_key = {}
        for service in services:
            services_load[service] = MDSServiceController.get_mds_load(service.mds_service)
            service_per_key['{0}:{1}'.format(service.storagerouter.ip, service.ports[0])] = service

        # List current configuration and filter out excluded services
        reconfigure_reasons = []
        configs = vdisk.info['metadata_backend_config']  # Ordered MASTER, SLAVE1 (same failure domain as master), SLAVE2 (backup failure domain of master)
        for config in configs:
            config['key'] = '{0}:{1}'.format(config['ip'], config['port'])
        master_service = None
        if len(configs) > 0:
            config = configs.pop(0)
            if config['key'] in service_per_key:
                master_service = service_per_key.get(config['key'])
            else:
                reconfigure_reasons.append('Master ({0}:{1}) cannot be used anymore'.format(config['ip'], config['port']))
        slave_services = []
        for config in configs:
            if config['key'] in service_per_key:
                slave_services.append(service_per_key[config['key']])
            else:
                reconfigure_reasons.append('Slave ({0}:{1}) cannot be used anymore'.format(config['ip'], config['port']))

        # Fix services_load
        tlogs = Configuration.get('ovs.storagedriver.mds.tlogs')
        safety = Configuration.get('ovs.storagedriver.mds.safety')
        max_load = Configuration.get('ovs.storagedriver.mds.maxload')
        for service in services:
            if service == master_service or service in slave_services:
                load = services_load[service][0]
                if service.storagerouter in storage_router_failure_domain_dict:  # Services in use per failure domain
                    failure_domain_used_services_dict[storage_router_failure_domain_dict[service.storagerouter]].append(service)
                else:
                    reconfigure_reasons.append('Service {0} cannot be used anymore because storagerouter with IP {1} is not part of the failure domains'.format(service.name, service.storagerouter.ip))
            else:
                load = services_load[service][1]
            services_load[service] = load
            if service.storagerouter in storage_router_failure_domain_dict:  # All services available in model per failure domain
                failure_domain = storage_router_failure_domain_dict[service.storagerouter]
                failure_domain_available_services_dict[failure_domain].append(service)
                if load <= max_load:
                    if load not in failure_domain_load_dict[failure_domain]:
                        failure_domain_load_dict[failure_domain][load] = []
                    failure_domain_load_dict[failure_domain][load].append(service)

        # Further checks if a reconfiguration is required.
        service_nodes = []
        if master_service is not None:
            service_nodes.append(master_service.storagerouter.ip)
        for service in slave_services:
            ip = service.storagerouter.ip
            if ip in service_nodes:
                reconfigure_reasons.append('Multiple MDS services on the same node')
            else:
                service_nodes.append(ip)

        if len(service_nodes) > safety:
            # Too much safety
            reconfigure_reasons.append('Too much safety')
        if len(service_nodes) < safety and len(service_nodes) < len(nodes):
            # Insufficient MDS services configured while there should be sufficient nodes available
            reconfigure_reasons.append('Not enough safety')
        if master_service is not None and services_load[master_service] > max_load:
            # The master service is overloaded
            reconfigure_reasons.append('Master overloaded')
        if master_service is not None and master_service.storagerouter_guid != vdisk.storagerouter_guid:
            # The master is not local
            reconfigure_reasons.append('Master is not local')
        if any(service for service in slave_services if services_load[service] > max_load):
            # There's a slave service overloaded
            reconfigure_reasons.append('One or more slaves overloaded')

        # Check reconfigure required based upon failure domains
        recommended_primary = math.ceil(safety / 2.0) if secondary_failure_domain is not None else safety
        recommended_secondary = safety - recommended_primary

        if master_service is not None and master_service not in failure_domain_used_services_dict[primary_failure_domain]:
            # Master service not present in primary failure domain
            reconfigure_reasons.append('Master service not in primary failure domain')

        primary_services_used = len(failure_domain_used_services_dict[primary_failure_domain])
        primary_services_available = len(failure_domain_available_services_dict[primary_failure_domain])
        if primary_services_used < recommended_primary and primary_services_used < primary_services_available:
            # More services can be used in primary failure domain
            reconfigure_reasons.append('Not enough services in use in primary failure domain')

        if secondary_failure_domain is not None:
            # More services can be used in secondary failure domain
            secondary_services_used = len(failure_domain_used_services_dict[secondary_failure_domain])
            secondary_services_available = len(failure_domain_available_services_dict[secondary_failure_domain])
            if secondary_services_used < recommended_secondary and secondary_services_used < secondary_services_available:
                reconfigure_reasons.append('Not enough services in use in secondary failure domain')

            # If secondary failure domain present, check order in which the slave services are configured
            secondary = False
            for slave_service in slave_services:
                if secondary is True and slave_service in failure_domain_used_services_dict[primary_failure_domain]:
                    reconfigure_reasons.append('A slave in secondary failure domain has priority over a slave in primary failure domain')
                    break
                if slave_service in failure_domain_used_services_dict[secondary_failure_domain]:
                    secondary = True

        if not reconfigure_reasons:
            logger.debug('No reconfiguration required for vdisk {0} with guid {1}'.format(vdisk.name, vdisk.guid))
            MDSServiceController.sync_vdisk_to_reality(vdisk)
            return

        logger.debug('Reconfiguration required for vdisk {0} with guid {1}'.format(vdisk.name, vdisk.guid))
        for reason in reconfigure_reasons:
            logger.debug('Reason: {0} - vdisk {1} with guid {2}'.format(reason, vdisk.name, vdisk.guid))
        # Prepare fresh configuration
        new_services = []

        # Check whether the master (if available) is non-local to the vdisk and/or is overloaded
        master_ok = master_service is not None
        if master_ok is True:
            master_ok = master_service.storagerouter_guid == vdisk.storagerouter_guid and services_load[master_service] <= max_load

        if master_ok:
            # Add this master to the fresh configuration
            new_services.append(master_service)
        else:
            # Try to find the best non-overloaded local MDS slave to make master
            candidate_master_service = None
            candidate_master_load = 0
            local_mds = None
            local_mds_load = 0
            for service in failure_domain_available_services_dict[primary_failure_domain]:
                load = services_load[service]
                if load <= max_load and service.storagerouter_guid == vdisk.storagerouter_guid:
                    if local_mds is None or local_mds_load > load:
                        # This service is a non-overloaded local MDS
                        local_mds = service
                        local_mds_load = load
                    if service in slave_services:
                        if candidate_master_service is None or candidate_master_load > load:
                            # This service is a non-overloaded local slave
                            candidate_master_service = service
                            candidate_master_load = load
            if candidate_master_service is not None:
                # A non-overloaded local slave was found.
                client = MetadataServerClient.load(candidate_master_service)
                try:
                    amount_of_tlogs = client.catch_up(str(vdisk.volume_id), True)
                except RuntimeError as ex:
                    if 'Namespace does not exist' in ex.message:
                        client.create_namespace(str(vdisk.volume_id))
                        amount_of_tlogs = client.catch_up(str(vdisk.volume_id), True)
                    else:
                        raise
                if amount_of_tlogs < tlogs:
                    # Almost there. Catching up right now, and continue as soon as it's up-to-date
                    start = time.time()
                    client.catch_up(str(vdisk.volume_id), False)
                    logger.debug('MDS catch up for vdisk {0} took {1}s'.format(vdisk.guid, round(time.time() - start, 2)))
                    # It's up to date, so add it as a new master
                    new_services.append(candidate_master_service)
                    if master_service is not None:
                        # The current master (if available) is now candidate to become one of the slaves
                        slave_services.append(master_service)
                else:
                    # It's not up to date, keep the previous master (if available) and give the local slave
                    # some more time to catch up
                    if master_service is not None:
                        new_services.append(master_service)
                    new_services.append(candidate_master_service)
                if candidate_master_service in slave_services:
                    slave_services.remove(candidate_master_service)
            else:
                # There's no non-overloaded local slave found. Keep the current master (if available) and add
                # a local MDS (if available) as slave
                if master_service is not None:
                    new_services.append(master_service)
                if local_mds is not None:
                    new_services.append(local_mds)
                    if local_mds in slave_services:
                        slave_services.remove(local_mds)

        # At this point, there might (or might not) be a (new) master, and a (catching up) slave. The rest of the non-local
        # MDS nodes must now be added to the configuration until the safety is reached. There's always one extra
        # slave recycled to make sure there's always an (almost) up-to-date slave ready for failover
        nodes = set(service.storagerouter.ip for service in new_services)

        # Recycle slave for faster failover
        secondary_node_count = 0
        service_to_recycle = None
        if len(nodes) < safety:
            # Try to recycle slave which is in primary failure domain
            for load in sorted(failure_domain_load_dict[primary_failure_domain]):
                for service in failure_domain_load_dict[primary_failure_domain][load]:
                    if service_to_recycle is None and service in slave_services and service.storagerouter.ip not in nodes:
                        try:
                            SSHClient(service.storagerouter)
                            service_to_recycle = service
                        except UnableToConnectException:
                            logger.debug('Skip {0} as it is unreachable'.format(service.storagerouter.ip))
            # Try to recycle slave which is in secondary failure domain if none found in primary
            if service_to_recycle is None and secondary_failure_domain is not None:
                for load in sorted(failure_domain_load_dict[secondary_failure_domain]):
                    for service in failure_domain_load_dict[secondary_failure_domain][load]:
                        if service_to_recycle is None and service in slave_services and service.storagerouter.ip not in nodes:
                            try:
                                SSHClient(service.storagerouter)
                                service_to_recycle = service
                                secondary_node_count = 1  # We do not want to configure the secondary slave BEFORE the primary slaves
                            except UnableToConnectException:
                                logger.debug('Skip {0} as it is unreachable'.format(service.storagerouter.ip))
        if service_to_recycle is not None:
            slave_services.remove(service_to_recycle)
            if secondary_node_count == 0:  # Add service to recycle because its in primary failure domain
                new_services.append(service_to_recycle)
                nodes.add(service_to_recycle.storagerouter.ip)

        # Add extra (new) slaves until primary safety reached
        if len(nodes) < recommended_primary:
            for load in sorted(failure_domain_load_dict[primary_failure_domain]):
                for service in failure_domain_load_dict[primary_failure_domain][load]:
                    if len(nodes) < recommended_primary and service.storagerouter.ip not in nodes:
                        try:
                            SSHClient(service.storagerouter)
                            new_services.append(service)
                            nodes.add(service.storagerouter.ip)
                        except UnableToConnectException:
                            logger.debug('Skip {0} as it is unreachable'.format(service.storagerouter.ip))
        # Add recycled secondary slave after primary slaves have been added
        if secondary_node_count == 1:
            new_services.append(service_to_recycle)
            nodes.add(service_to_recycle.storagerouter.ip)

        # Add extra (new) slaves until secondary safety reached
        if len(nodes) < safety and secondary_failure_domain is not None:
            for load in sorted(failure_domain_load_dict[secondary_failure_domain]):
                for service in failure_domain_load_dict[secondary_failure_domain][load]:
                    if len(nodes) < safety and service.storagerouter.ip not in nodes:
                        try:
                            SSHClient(service.storagerouter)
                            new_services.append(service)
                            nodes.add(service.storagerouter.ip)
                        except UnableToConnectException:
                            logger.debug('Skip {0} as it is unreachable'.format(service.storagerouter.ip))

        # Build the new configuration and update the vdisk
        configs = []
        for service in new_services:
            client = MetadataServerClient.load(service)
            client.create_namespace(str(vdisk.volume_id))
            configs.append(MDSNodeConfig(address=str(service.storagerouter.ip),
                                         port=service.ports[0]))
        vdisk.storagedriver_client.update_metadata_backend_config(volume_id=str(vdisk.volume_id),
                                                                  metadata_backend_config=MDSMetaDataBackendConfig(configs))
        MDSServiceController.sync_vdisk_to_reality(vdisk)
        logger.debug('Ensuring MDS safety for vdisk {0} completed'.format(vdisk.guid))
コード例 #6
0
    def ensure_safety(vdisk, excluded_storagerouters=None):
        """
        Ensures (or tries to ensure) the safety of a given vdisk (except hypervisor).
        Assumptions:
        * A local overloaded master is better than a non-local non-overloaded master
        * Prefer master/services to be on different hosts, a subsequent slave on the same node doesn't add safety
        * Don't actively overload services (e.g. configure an MDS as slave causing it to get overloaded)
        * Too much safety is not wanted (it adds loads to nodes while not required)
        """

        logger.debug('Ensuring MDS safety for vdisk {0}'.format(vdisk.guid))
        vdisk.reload_client()
        if excluded_storagerouters is None:
            excluded_storagerouters = []
        maxload = Configuration.get('ovs.storagedriver.mds.maxload')
        safety = Configuration.get('ovs.storagedriver.mds.safety')
        tlogs = Configuration.get('ovs.storagedriver.mds.tlogs')
        services = [mds_service.service for mds_service in vdisk.vpool.mds_services
                    if mds_service.service.storagerouter not in excluded_storagerouters]
        nodes = set(service.storagerouter.ip for service in services)
        services_load = {}
        service_per_key = {}
        for service in services:
            load, load_plus = MDSServiceController.get_mds_load(service.mds_service)
            services_load[service.guid] = load, load_plus
            service_per_key['{0}:{1}'.format(service.storagerouter.ip, service.ports[0])] = service

        # List current configuration and filter out excluded services
        reconfigure_required = False
        reconfigure_reasons = []
        vdisk.invalidate_dynamics(['info', 'storagedriver_id', 'storagerouter_guid'])
        configs = vdisk.info['metadata_backend_config']
        for config in configs:
            config['key'] = '{0}:{1}'.format(config['ip'], config['port'])
        master_service = None
        if len(configs) > 0:
            config = configs[0]
            if config['key'] in service_per_key:
                master_service = service_per_key.get(config['key'])
                configs.remove(config)
            else:
                reconfigure_required = True
                reconfigure_reasons.append('Master ({0}:{1}) cannot be used anymore'.format(config['ip'], config['port']))
        slave_services = []
        for config in configs:
            if config['key'] in service_per_key:
                slave_services.append(service_per_key[config['key']])
            else:
                reconfigure_required = True
                reconfigure_reasons.append('Slave ({0}:{1}) cannot be used anymore'.format(config['ip'], config['port']))

        # Fix services_load
        services_per_load = {}
        for service in services:
            if service == master_service or service in slave_services:
                load = services_load[service.guid][0]
            else:
                load = services_load[service.guid][1]
            services_load[service.guid] = load
            if load not in services_per_load:
                services_per_load[load] = []
            services_per_load[load].append(service)

        # Further checks if a reconfiguration is required.
        service_nodes = []
        if master_service is not None:
            service_nodes.append(master_service.storagerouter.ip)
        for service in slave_services:
            ip = service.storagerouter.ip
            if ip in service_nodes:
                reconfigure_required = True
                reconfigure_reasons.append('Multiple MDS services on the same node')
            else:
                service_nodes.append(ip)
        if len(service_nodes) > safety:
            # Too much safety
            reconfigure_required = True
            reconfigure_reasons.append('Too much safety')
        if len(service_nodes) < safety and len(service_nodes) < len(nodes):
            # Insufficient MDS services configured while there should be sufficient nodes available
            reconfigure_required = True
            reconfigure_reasons.append('Not enough safety')
        if master_service is not None and services_load[master_service.guid] > maxload:
            # The master service is overloaded
            reconfigure_required = True
            reconfigure_reasons.append('Master overloaded')
        if master_service is not None and master_service.storagerouter_guid != vdisk.storagerouter_guid:
            # The master is not local
            reconfigure_required = True
            reconfigure_reasons.append('Master is not local')
        if any(service for service in slave_services if services_load[service.guid] > maxload):
            # There's a slave service overloaded
            reconfigure_required = True
            reconfigure_reasons.append('One or more slaves overloaded')

        if reconfigure_required is False:
            logger.debug('No reconfiguration required for vdisk {0}'.format(vdisk.guid))
            MDSServiceController.sync_vdisk_to_reality(vdisk)
            return

        logger.debug('Reconfiguration required for vdisk {0}:'.format(vdisk.guid))
        for reason in reconfigure_reasons:
            logger.debug('Reason: {0} - vdisk {1}'.format(reason, vdisk.guid))
        # Prepare fresh configuration
        new_services = []

        # Check whether the master (if available) is non-local to the vdisk and/or is overloaded
        master_ok = master_service is not None
        if master_ok is True:
            master_ok = master_service.storagerouter_guid == vdisk.storagerouter_guid and services_load[master_service.guid] <= maxload

        if master_ok:
            # Add this master to the fresh configuration
            new_services.append(master_service)
        else:
            # Try to find the best non-overloaded local MDS (slave)
            candidate_master = None
            candidate_master_load = 0
            local_mds = None
            local_mds_load = 0
            for service in services:
                load = services_load[service.guid]
                if load <= maxload and service.storagerouter_guid == vdisk.storagerouter_guid:
                    if local_mds is None or local_mds_load > load:
                        # This service is a non-overloaded local MDS
                        local_mds = service
                        local_mds_load = load
                    if service in slave_services:
                        if candidate_master is None or candidate_master_load > load:
                            # This service is a non-overloaded local slave
                            candidate_master = service
                            candidate_master_load = load
            if candidate_master is not None:
                # A non-overloaded local slave was found.
                client = MetadataServerClient.load(candidate_master)
                try:
                    amount_of_tlogs = client.catch_up(str(vdisk.volume_id), True)
                except RuntimeError as ex:
                    if 'Namespace does not exist' in ex.message:
                        client.create_namespace(str(vdisk.volume_id))
                        amount_of_tlogs = client.catch_up(str(vdisk.volume_id), True)
                    else:
                        raise
                if amount_of_tlogs < tlogs:
                    # Almost there. Catching up right now, and continue as soon as it's up-to-date
                    start = time.time()
                    client.catch_up(str(vdisk.volume_id), False)
                    logger.debug('MDS catch up for vdisk {0} took {1}s'.format(vdisk.guid, round(time.time() - start, 2)))
                    # It's up to date, so add it as a new master
                    new_services.append(candidate_master)
                    if master_service is not None:
                        # The current master (if available) is now candidate for become one of the slaves
                        slave_services.append(master_service)
                else:
                    # It's not up to date, keep the previous master (if available) and give the local slave
                    # some more time to catch up
                    if master_service is not None:
                        new_services.append(master_service)
                    new_services.append(candidate_master)
                if candidate_master in slave_services:
                    slave_services.remove(candidate_master)
            else:
                # There's no non-overloaded local slave found. Keep the current master (if available) and add
                # a local MDS (if available) as slave
                if master_service is not None:
                    new_services.append(master_service)
                if local_mds is not None:
                    new_services.append(local_mds)
                    if local_mds in slave_services:
                        slave_services.remove(local_mds)

        # At this point, there might (or might not) be a (new) master, and a (catching up) slave. The rest of the non-local
        # MDS nodes must now be added to the configuration until the safety is reached. There's always one extra
        # slave recycled to make sure there's always an (almost) up-to-date slave ready for failover
        loads = sorted(load for load in services_per_load.keys() if load <= maxload)
        nodes = set(service.storagerouter.ip for service in new_services)
        slave_added = False
        if len(nodes) < safety:
            for load in loads:
                for service in services_per_load[load]:
                    if slave_added is False and service in slave_services and service.storagerouter.ip not in nodes:
                        try:
                            SSHClient(service.storagerouter)
                            new_services.append(service)
                            slave_services.remove(service)
                            nodes.add(service.storagerouter.ip)
                            slave_added = True
                        except UnableToConnectException:
                            logger.debug('Skip {0} as it is unreachable'.format(service.storagerouter.ip))
        if len(nodes) < safety:
            for load in loads:
                for service in services_per_load[load]:
                    if len(nodes) < safety and service.storagerouter.ip not in nodes:
                        try:
                            SSHClient(service.storagerouter)
                            new_services.append(service)
                            nodes.add(service.storagerouter.ip)
                        except UnableToConnectException:
                            logger.debug('Skip {0} as it is unreachable'.format(service.storagerouter.ip))

        # Build the new configuration and update the vdisk
        configs = []
        for service in new_services:
            client = MetadataServerClient.load(service)
            client.create_namespace(str(vdisk.volume_id))
            configs.append(MDSNodeConfig(address=str(service.storagerouter.ip),
                                         port=service.ports[0]))
        vdisk.storagedriver_client.update_metadata_backend_config(
            volume_id=str(vdisk.volume_id),
            metadata_backend_config=MDSMetaDataBackendConfig(configs)
        )
        MDSServiceController.sync_vdisk_to_reality(vdisk)
        logger.debug('Ensuring MDS safety for vdisk {0} completed'.format(vdisk.guid))
コード例 #7
0
ファイル: catchup.py プロジェクト: yongshengma/framework
class MDSCatchUp(MDSShared):
    """
    Class responsible for catching up MDSes asynchronously
    - Registers metadata in Arakoon to ensure that only one catchup happens
    - Offloads the catchup to a new thread: if the worker process would get killed:
      the catchup would still happen by the MDSClient so a re-locking will be happening and it will wait for the original
      catchup to finish
    """
    # Extra caching
    _volumedriver_contexts_cache = {}
    _worker_contexts_cache = {}
    _clients_cache = {}

    _logger = Logger('lib')

    _CATCH_UP_NAME_SPACE = 'ovs_jobs_catchup'
    _CATCH_UP_VDISK_KEY = '{0}_{{0}}'.format(
        _CATCH_UP_NAME_SPACE)  # Second format should be the vdisk guid

    def __init__(self, vdisk_guid):
        # type: (str) -> None
        """
        Initializes a new MDSCatchUp
        An instance populates some caches. These cached are cleared once the instance is garbage collected.
        When running MDSCatchup in bulk: add them to a list to speed up the process
        :param vdisk_guid: Guid of the vDisk to catch up for
        :type vdisk_guid: str
        """
        self.id = str(uuid.uuid4())
        self.vdisk = VDisk(vdisk_guid)
        self.mds_key = self._CATCH_UP_VDISK_KEY.format(self.vdisk.guid)
        self.tlog_threshold = Configuration.get(
            'ovs/volumedriver/mds|tlogs_behind', default=100)
        self.volumedriver_service_name = 'ovs-volumedriver_{0}'.format(
            self.vdisk.vpool.name)
        self.mds_client_timeout = Configuration.get(
            'ovs/vpools/{0}/mds_config|mds_client_connection_timeout'.format(
                self.vdisk.vpool_guid),
            default=120)
        self.mds_clients = {}
        self.dry_run = False
        self.catch_up_threads = []
        self.errors = []

        self._service_manager = ServiceFactory.get_manager()
        self._persistent = PersistentFactory.get_client()
        self._log = 'MDS catchup {0} - vDisk {1} (volume id: {2})'.format(
            self.id, self.vdisk.guid, self.vdisk.volume_id)

        self._clients = self.build_clients()
        self._volumedriver_contexts = self.get_volumedriver_contexts()
        self._worker_contexts = self.get_worker_contexts()
        self._worker_context = self._worker_contexts[
            System.get_my_storagerouter()]
        self._relevant_contexts = self._get_all_relevant_contexts(
        )  # All possible contexts (by mixing volumedriver ones with workers)

    def __del__(self):
        """
        Destructor
        """
        # All caching should be removed
        self.reset_cache()

    def get_volumedriver_contexts(self):
        # type: () -> Dict[Service, Dict[str, str]]
        """
        Return all possible contexts that can be handled in
        :return: Information about the associated volumedrivers
        :rtype: dict
        """
        contexts = {}
        if self.vdisk.vpool not in self._volumedriver_contexts_cache:
            self._volumedriver_contexts_cache[self.vdisk.vpool] = {}
        for service in self.map_mds_services_by_socket_for_vdisk(
                self.vdisk).itervalues():
            if service.storagerouter not in self._volumedriver_contexts_cache[
                    self.vdisk.
                    vpool] or 'volumedriver_pid' not in self._volumedriver_contexts_cache[
                        self.vdisk.vpool][service.storagerouter]:
                try:
                    if service.storagerouter not in self._clients:
                        client = self.build_ssh_client(service.storagerouter)
                        if client is None:
                            continue
                        self._clients[service.storagerouter] = client
                    client = self._clients[service.storagerouter]
                    volumedriver_pid = self._service_manager.get_service_pid(
                        name=self.volumedriver_service_name, client=client)
                    if volumedriver_pid == 0:
                        self._logger.warning(
                            self._format_message(
                                'Volumedriver {0} is down on StorageRouter {1}. Won\'t be able to catchup service {2}'
                                .format(self.volumedriver_service_name,
                                        service.storagerouter.ip,
                                        service.name)))
                        continue
                    volumedriver_start = self._service_manager.get_service_start_time(
                        name=self.volumedriver_service_name, client=client)
                    context = {
                        'volumedriver_pid': volumedriver_pid,
                        'volumedriver_start': volumedriver_start
                    }
                    self._volumedriver_contexts_cache[self.vdisk.vpool][
                        service.storagerouter] = context
                except:
                    self._logger.exception(
                        self._format_message(
                            'Exception while retrieving context for service {0}'
                            .format(service.name)))
                    continue
            contexts[service] = self._volumedriver_contexts_cache[
                self.vdisk.vpool][service.storagerouter]
        return contexts

    def build_clients(self):
        # type: () -> Dict[StorageRouter, SSHClient]
        """
        Builds SSHClients towards all StorageRouters
        :return: SSHClient mapped by storagerouter
        :rtype: dict((storagerouter, sshclient))
        """
        clients = {}
        for storagerouter in StorageRouterList.get_storagerouters():
            client = self.build_ssh_client(storagerouter)
            if client is not None:
                clients[storagerouter] = client
        return clients

    def get_worker_contexts(self):
        # type: () -> dict
        """
        Retrieves information about the all workers (where it is executed and under what PID)
        This information is later used to check which data can be discarded (because of interrupted workers)
        :return: Information about the current workers
        :rtype: dict
        """
        workers_context = {}
        for storagerouter, client in self._clients.iteritems():
            if storagerouter not in self._worker_contexts_cache:
                worker_pid = 0
                worker_start = None
                try:
                    # Retrieve the current start time of the process (used to create a unique key)
                    # Output of the command:
                    #                  STARTED   PID
                    # Mon Jan 22 11:49:04 2018 22287
                    worker_pid = self._service_manager.get_service_pid(
                        name='ovs-workers', client=client)
                    if worker_pid == 0:
                        self._logger.warning(
                            'The workers are down on StorageRouter {0}'.format(
                                storagerouter.guid))
                    else:
                        worker_start = self._service_manager.get_service_start_time(
                            name='ovs-workers', client=client)
                except Exception:
                    self._logger.exception(
                        self._format_message(
                            'Unable to retrieve information about the worker'))
                self._worker_contexts_cache[storagerouter] = {
                    'storagerouter_guid': storagerouter.guid,
                    'worker_pid': worker_pid,
                    'worker_start': worker_start
                }
            workers_context[storagerouter] = self._worker_contexts_cache[
                storagerouter]
        if System.get_my_storagerouter() not in workers_context:
            raise ValueError(
                self._format_message(
                    'The context about the workers on this machine should be known'
                ))
        return workers_context

    def build_ssh_client(self, storagerouter, max_retries=5):
        # type: (StorageRouter, int) -> SSHClient
        """
        Build an sshclient with retries for a certain endpoint
        :param storagerouter: Point to connect too
        :type storagerouter: StorageRouter
        :param max_retries: Maximum amount of time to retry
        :return: The built sshclient
        :rtype: SSHClient
        """
        client = self._clients_cache.get(storagerouter)
        tries = 0
        while client is None:
            tries += 1
            if tries > max_retries:
                self._logger.error(
                    self._format_message(
                        'Assuming StorageRouter {0} is dead. Unable to checkup there'
                        .format(storagerouter.ip)))
                break
            try:
                client = SSHClient(storagerouter, username='******', timeout=30)
                # Avoids re-connecting for every client
                # Cache needs to be cleared once this object is no longer so all clients can close their connection
                self._clients_cache[storagerouter] = client
            except Exception:
                self._logger.exception(
                    self._format_message(
                        'Unable to connect to StorageRouter {0} - Retrying {1} more times before assuming it is down'
                        .format(storagerouter.ip, max_retries - tries)))
        if client is not None:
            return client

    def _catch_up(self, mds_client, service, threaded):
        # type: (MDSClient, Service, bool) -> None
        """
        Perform a catchup for the service
        :param mds_client: MDSClient
        :type mds_client: volumedriver.storagerouter.storagerouterclient.MDSClient
        :param service: Associated service
        :type service: Service
        :param threaded: Working in a threaded context
        :type threaded: bool
        :return:
        """
        log_identifier = 'MDS Service {0} at {1}:{2}'.format(
            service.name, service.storagerouter.ip, service.ports[0])
        try:
            registered_catchup = self.register_catch_up(service)
            do_finally = True
            reset_volumedriver_cache = False
            try:
                self._logger.info(
                    self._format_message(
                        '{0} catch up registrations: {1}'.format(
                            log_identifier, registered_catchup)))
                if len(registered_catchup) > 1:
                    self._logger.info(
                        self._format_message(
                            '{0} is already being caught up'.format(
                                log_identifier)))
                    return
                mds_client.catch_up(str(self.vdisk.volume_id),
                                    dry_run=self.dry_run)
            except WorkerLossException:  # Thrown during unittests to simulate a worker getting killed at this stage
                do_finally = False
                raise
            except Exception:
                self._logger.exception(
                    'Exception occurred while going to/doing catch up')
                # The volumedriver might have been killed. Invalidate the cache for this instance
                reset_volumedriver_cache = True
                raise
            finally:
                if do_finally:
                    try:
                        self.unregister_catch_up(service)
                    except Exception:
                        self._logger.exception(
                            self._format_message(
                                '{0} - Failed to unregister catchup'.format(
                                    log_identifier)))
                    finally:
                        if reset_volumedriver_cache:
                            self.reset_volumedriver_cache_for_service(service)
        except Exception:
            msg = '{0} - Exception occurred while catching up'.format(
                log_identifier)
            if threaded:
                self._logger.exception(
                    self._format_message('{0} in thread'.format(msg)))
                self.errors.append(sys.exc_info())
            else:
                self._logger.exception(self._format_message('{0}'.format(msg)))
                raise

    def catch_up(self, async=True):
        # type: (bool) -> List[Tuple[Tuple[Service, int, bool]]]
        """
        Catch up all MDS services
        :param async: Perform catchups asynchronously (offload to a thread)
        When set to True (default): results can be waited for using `wait`
        :return: List with information which mdses were behind and how much
        :rtype: list
        """
        self.errors = []
        self.catch_up_threads = []
        behind = []
        for service in self._volumedriver_contexts.iterkeys():
            caught_up = False
            service_identifier = '{0} ({1}:{2})'.format(
                service.name, service.storagerouter.ip, service.ports[0])
            client = MetadataServerClient.load(service=service,
                                               timeout=self.mds_client_timeout)
            if client is None:
                self._logger.error(
                    self._format_message(
                        'Cannot establish a MDS client connection for service {0}'
                        .format(service_identifier)))
                continue
            try:
                # Verify how much the Service is behind (No catchup action is invoked)
                tlogs_behind_master = client.catch_up(str(
                    self.vdisk.volume_id),
                                                      dry_run=True)
            except RuntimeError:
                self._logger.exception(
                    self._format_message(
                        'Unable to fetch the tlogs behind master for service {0}'
                        .format(service_identifier)))
                continue
            if tlogs_behind_master >= self.tlog_threshold:
                self._logger.warning(
                    self._format_message(
                        'Service {0} is {1} tlogs behind master. Catching up because threshold was reached ({1}/{2})'
                        .format(service_identifier, tlogs_behind_master,
                                self.tlog_threshold)))
                # @todo offload to a thread
                if async:
                    thread = Thread(target=self._catch_up,
                                    args=(
                                        client,
                                        service,
                                        async,
                                    ))
                    thread.start()
                    self.catch_up_threads.append(thread)
                else:
                    self._catch_up(client, service, async)
                caught_up = True
コード例 #8
0
    def apply_reconfigurations(self, new_services, previous_master_service):
        # type: (List[Service], Service) -> None
        """
        Applies all calculated reconfigurations
        - Deploys the services
        - Notifies the Storagerouter
        :param new_services: List of new services to be used in the reconfiguration (Master and slaves)
        Note the order matters here! First the master, then slaves in primary domain, then slaves in secondary domain
        :type new_services: List[Service]
        :param previous_master_service: Previous master service incase the master should be switched around (None if no previous master)
        :type previous_master_service: Service
        :return: None
        :rtype: NoneType
        """
        # Verify an MDSClient can be created for all relevant services
        services_to_check = new_services + self.slave_services
        if self.master_service is not None:
            services_to_check.append(self.master_service)
        for service in services_to_check:
            if service not in self.mds_client_cache:
                client = MetadataServerClient.load(
                    service=service, timeout=self.mds_client_timeout)
                if client is None:
                    raise RuntimeError(
                        'Cannot establish a MDS client connection for service {0}:{1}'
                        .format(service.storagerouter.ip, service.ports[0]))
                self.mds_client_cache[service] = client

        configs_all = []
        new_namespace_services = []
        configs_without_replaced_master = []
        log_start = 'vDisk {0}'.format(self.vdisk.guid)
        for service in new_services:
            client = self.mds_client_cache[service]
            try:
                if str(self.vdisk.volume_id) not in client.list_namespaces():
                    client.create_namespace(
                        str(self.vdisk.volume_id)
                    )  # StorageDriver does not throw error if already existing or does not create a duplicate namespace
                    new_namespace_services.append(service)
            except Exception:
                self._logger.exception(
                    '{0} - Creating new namespace {1} failed for Service {2}:{3}'
                    .format(log_start, self.vdisk.volume_id,
                            service.storagerouter.ip, service.ports[0]))
                # Clean up newly created namespaces
                for new_namespace_service in new_namespace_services:
                    client = self.mds_client_cache[new_namespace_service]
                    try:
                        self._logger.warning(
                            '{0}: Deleting newly created namespace {1} for service {2}:{3}'
                            .format(log_start, self.vdisk.volume_id,
                                    new_namespace_service.storagerouter.ip,
                                    new_namespace_service.ports[0]))
                        client.remove_namespace(str(self.vdisk.volume_id))
                    except RuntimeError:
                        pass  # If somehow the namespace would not exist, we don't care.
                raise  # Currently nothing has been changed on StorageDriver level, so we can completely abort

            # noinspection PyArgumentList
            config = MDSNodeConfig(address=str(service.storagerouter.ip),
                                   port=service.ports[0])
            if previous_master_service != service:  # This only occurs when a slave has caught up with master and old master gets replaced with new master
                configs_without_replaced_master.append(config)
            configs_all.append(config)

        start = time.time()
        update_failure = False
        try:
            self._logger.debug(
                '{0} - Updating MDS configuration'.format(log_start))
            if len(configs_without_replaced_master) != len(
                    configs_all
            ):  # First update without previous master to avoid race conditions (required by voldrv)
                self._logger.debug(
                    '{0} - Without previous master: {1}:{2}'.format(
                        log_start, previous_master_service.storagerouter.ip,
                        previous_master_service.ports[0]))
                self.vdisk.storagedriver_client.update_metadata_backend_config(
                    volume_id=str(self.vdisk.volume_id),
                    metadata_backend_config=MDSMetaDataBackendConfig(
                        configs_without_replaced_master),
                    req_timeout_secs=self.sr_client_timeout)
                self._logger.debug(
                    '{0} - Updating MDS configuration without previous master took {1}s'
                    .format(log_start,
                            time.time() - start))
            self.vdisk.storagedriver_client.update_metadata_backend_config(
                volume_id=str(self.vdisk.volume_id),
                metadata_backend_config=MDSMetaDataBackendConfig(configs_all),
                req_timeout_secs=self.sr_client_timeout)
            # Verify the configuration - chosen by the framework - passed to the StorageDriver is effectively the correct configuration
            self.vdisk.invalidate_dynamics('info')
            self._logger.debug('{0} - Configuration after update: {1}'.format(
                self.vdisk.guid, self.vdisk.info['metadata_backend_config']))

            duration = time.time() - start
            if duration > 5:
                self._logger.critical(
                    '{0} - Updating MDS configuration took {1}s'.format(
                        log_start, duration))
        except RuntimeError:
            # @TODO: Timeout throws RuntimeError for now. Replace this once https://github.com/openvstorage/volumedriver/issues/349 is fixed
            if time.time(
            ) - start >= self.sr_client_timeout:  # Timeout reached, clean up must be done manually once server side finished
                self._logger.critical(
                    '{0} - Updating MDS configuration timed out'.format(
                        log_start))
                for service in [
                        svc for svc in services_to_check
                        if svc not in new_services
                ]:
                    self._logger.critical(
                        '{0} - Manual remove namespace action required for MDS {1}:{2} and namespace {3}'
                        .format(log_start, service.storagerouter.ip,
                                service.ports[0], self.vdisk.volume_id))
                for service in new_services[1:]:
                    self._logger.critical(
                        '{0} - Manual set SLAVE role action required for MDS {1}:{2} and namespace {3}'
                        .format(log_start, service.storagerouter.ip,
                                service.ports[0], self.vdisk.volume_id))
                self._logger.critical(
                    '{0} - Sync vDisk to reality action required'.format(
                        log_start))
            else:
                self._logger.exception(
                    '{0}: Failed to update the metadata backend configuration'.
                    format(log_start))
                update_failure = True  # No need to clean new namespaces if time out would have occurred
            # Always raise
            #     * In case of a timeout, the manual actions are logged and user knows the ensure_safety has failed
            #     * In any other case, the newly created namespaces are deleted
            raise
        except Exception:
            self._logger.exception(
                '{0}: Failed to update the metadata backend configuration'.
                format(log_start))
            update_failure = True
            raise
        finally:
            if update_failure is True:
                # Remove newly created namespaces when updating would go wrong to avoid storage leaks
                for new_namespace_service in new_namespace_services:
                    client = self.mds_client_cache[new_namespace_service]
                    try:
                        self._logger.warning(
                            '{0}: Deleting newly created namespace {1} for service {2}:{3}'
                            .format(log_start, self.vdisk.volume_id,
                                    new_namespace_service.storagerouter.ip,
                                    new_namespace_service.ports[0]))
                        client.remove_namespace(str(self.vdisk.volume_id))
                    except RuntimeError:
                        pass  # If somehow the namespace would not exist, we don't care.

        self._sync_vdisk_to_reality(self.vdisk)
        for service in services_to_check:
            if service not in new_services:
                self._logger.debug(
                    '{0} - Deleting namespace for vDisk on service {1}:{2}'.
                    format(log_start, service.storagerouter.ip,
                           service.ports[0]))
                client = self.mds_client_cache[service]
                try:
                    client.remove_namespace(str(self.vdisk.volume_id))
                except RuntimeError:
                    pass  # If somehow the namespace would not exist, we don't care.

        for service in new_services[1:]:
            client = self.mds_client_cache[service]
            try:
                if client.get_role(nspace=str(self.vdisk.volume_id)
                                   ) != MetadataServerClient.MDS_ROLE.SLAVE:
                    self._logger.debug(
                        '{0} - Demoting service {1}:{2} to SLAVE'.format(
                            log_start, service.storagerouter.ip,
                            service.ports[0]))
                    start = time.time()
                    client.set_role(nspace=str(self.vdisk.volume_id),
                                    role=MetadataServerClient.MDS_ROLE.SLAVE)
                    duration = time.time() - start
                    if duration > 5:
                        self._logger.critical(
                            '{0} - Demoting service {1}:{2} to SLAVE took {3}s'
                            .format(log_start, service.storagerouter.ip,
                                    service.ports[0], duration))
            except Exception:
                self._logger.critical(
                    '{0} - Failed to demote service {1}:{2} to SLAVE'.format(
                        log_start, service.storagerouter.ip, service.ports[0]))
                raise
コード例 #9
0
    def create_new_master(self):
        # type: () -> Tuple[List[Service], Service]
        """
        Check and create a new MDS master if necessary
        Master configured according to StorageDriver must be modelled
        Master must be local
        Master cannot be overloaded
        Master must be in primary domain (if no domains available, this check is irrelevant because all StorageRouters will match)
        :return: The newly created services and the previous master (if a master switch happened)
        :rtype: Tuple[List[Service], Service]
        """
        new_services = []
        previous_master = None
        log_start = 'vDisk {0}'.format(self.vdisk.guid)

        if self.master_service is not None and self.master_service.storagerouter_guid == self.vdisk.storagerouter_guid and self.services_load[
                self.
                master_service] <= self.max_load and self.master_service in self.mds_layout[
                    'primary']['used']:
            new_services.append(
                self.master_service
            )  # Master is OK, so add as 1st element to new configuration. Reconfiguration is now based purely on slave misconfiguration
            self._logger.debug(
                '{0} - Master is still OK, re-calculating slaves'.format(
                    log_start))
        else:
            # Master is not OK --> try to find the best non-overloaded LOCAL MDS slave in the primary domain to make master
            self._logger.debug(
                '{0} - Master is not OK, re-calculating master'.format(
                    log_start))
            current_load = 0
            new_local_master_service = None
            re_used_local_slave_service = None
            for service in self.mds_layout['primary']['available']:
                if service == self.master_service:
                    # Make sure the current master_service is not re-used as master for whatever reason
                    continue
                next_load = self.services_load[
                    service]  # This load indicates the load it would become if a vDisk would be moved to this Service
                if next_load <= self.max_load and service.storagerouter_guid == self.vdisk.storagerouter_guid:
                    if current_load > next_load or (
                            re_used_local_slave_service is None
                            and new_local_master_service is None):
                        current_load = next_load  # Load for least loaded service
                        new_local_master_service = service  # If no local slave is found to re-use, this new_local_master_service is used
                        if service in self.slave_services:
                            self._logger.debug(
                                '{0} - Slave service {1}:{2} will be recycled'.
                                format(log_start, service.storagerouter.ip,
                                       service.ports[0]))
                            re_used_local_slave_service = service  # A slave service is found to re-use as new master
                            self.slave_services.remove(service)

            if re_used_local_slave_service is None:
                # There's no non-overloaded local slave found. Keep the current master (if available) and add a local MDS (if available) as slave.
                # Next iteration, the newly added slave will be checked if it has caught up already
                # If amount of tlogs to catchup is < configured amount of tlogs --> we wait for catchup, so master can be removed and slave can be promoted
                if self.master_service is not None:
                    self._logger.debug(
                        '{0} - Keeping current master service'.format(
                            log_start))
                    new_services.append(self.master_service)
                if new_local_master_service is not None:
                    self._logger.debug(
                        '{0} - Adding new slave service {1}:{2} to catch up'.
                        format(log_start,
                               new_local_master_service.storagerouter.ip,
                               new_local_master_service.ports[0]))
                    new_services.append(new_local_master_service)
            else:
                # A non-overloaded local slave was found
                # We verify how many tlogs the slave is behind and do 1 of the following:
                #     1. tlogs_behind_master < tlogs configured --> Invoke the catchup action and wait for it
                #     2. tlogs_behind_master >= tlogs configured --> Add current master service as 1st in list, append non-overloaded local slave as 2nd in list and let StorageDriver do the catchup (next iteration we check again)
                # noinspection PyTypeChecker
                client = MetadataServerClient.load(
                    service=re_used_local_slave_service,
                    timeout=self.mds_client_timeout)
                if client is None:
                    raise RuntimeError(
                        'Cannot establish a MDS client connection for service {0}:{1}'
                        .format(re_used_local_slave_service.storagerouter.ip,
                                re_used_local_slave_service.ports[0]))
                self.mds_client_cache[re_used_local_slave_service] = client
                try:
                    tlogs_behind_master = client.catch_up(
                        str(self.vdisk.volume_id), dry_run=True
                    )  # Verify how much tlogs local slave Service is behind (No catchup action is invoked)
                except RuntimeError as ex:
                    if 'Namespace does not exist' in ex.message:
                        client.create_namespace(str(self.vdisk.volume_id))
                        tlogs_behind_master = client.catch_up(str(
                            self.vdisk.volume_id),
                                                              dry_run=True)
                    else:
                        raise

                self._logger.debug(
                    '{0} - Recycled slave is {1} tlogs behind'.format(
                        log_start, tlogs_behind_master))
                if tlogs_behind_master < self.tlogs:
                    start = time.time()
                    try:
                        client.catch_up(str(self.vdisk.volume_id),
                                        dry_run=False)
                        self._logger.debug('{0} - Catchup took {1}s'.format(
                            log_start, round(time.time() - start, 2)))
                    except Exception:
                        self._logger.exception(
                            '{0} - Catching up failed'.format(log_start))
                        raise  # Catchup failed, so we don't know whether the new slave can be promoted to master yet

                    # It's up to date, so add it as a new master
                    new_services.append(re_used_local_slave_service)
                    if self.master_service is not None:
                        # The current master (if available) is now candidate to become one of the slaves (Determined below during slave calculation)
                        # The current master can potentially be on a different node, thus might become slave
                        self.slave_services.insert(0, self.master_service)
                        previous_master = self.master_service
                else:
                    # It's not up to date, keep the previous master (if available) and give the local slave some more time to catch up
                    if self.master_service is not None:
                        new_services.append(self.master_service)
                    new_services.append(re_used_local_slave_service)

        service_string = ', '.join([
            "{{'ip': '{0}', 'port': {1}}}".format(service.storagerouter.ip,
                                                  service.ports[0])
            for service in new_services
        ])
        self._logger.debug(
            'vDisk {0} - Configuration after MASTER calculation: [{1}]'.format(
                self.vdisk.guid, service_string))

        return new_services, previous_master
コード例 #10
0
ファイル: mdsservice.py プロジェクト: DarumasLegs/framework
    def ensure_safety(vdisk, excluded_storagerouters=None):
        """
        Ensures (or tries to ensure) the safety of a given vdisk (except hypervisor).
        Assumptions:
        * A local overloaded master is better than a non-local non-overloaded master
        * Prefer master/services to be on different hosts, a subsequent slave on the same node doesn't add safety
        * Don't actively overload services (e.g. configure an MDS as slave causing it to get overloaded)
        * Too much safety is not wanted (it adds loads to nodes while not required)
        :param vdisk: vDisk to calculate a new safety for
        :type vdisk: VDisk

        :param excluded_storagerouters: Storagerouters to leave out of calculation (Eg: When 1 is down or unavailable)
        :type excluded_storagerouters: list

        :return: None
        """
        def _add_suitable_nodes(local_failure_domain, local_safety):
            if len(nodes) < local_safety:
                for local_load in sorted(failure_domain_load_dict[local_failure_domain]):
                    for local_service in failure_domain_load_dict[local_failure_domain][local_load]:
                        if len(nodes) < local_safety and local_service.storagerouter.ip not in nodes:
                            try:
                                SSHClient(local_service.storagerouter)
                                new_services.append(local_service)
                                nodes.add(local_service.storagerouter.ip)
                            except UnableToConnectException:
                                MDSServiceController._logger.debug('MDS safety: vDisk {0}: Skipping storagerouter with IP {1} as it is unreachable'.format(vdisk.guid, service.storagerouter.ip))
            return nodes, new_services

        ######################
        # GATHER INFORMATION #
        ######################
        MDSServiceController._logger.debug('MDS safety: vDisk {0}: Start checkup for virtual disk {1}'.format(vdisk.guid, vdisk.name))
        vdisk.reload_client()
        vdisk.invalidate_dynamics(['info', 'storagedriver_id', 'storagerouter_guid'])
        if vdisk.storagerouter_guid is None:
            raise ValueError('Cannot ensure MDS safety for vDisk {0} with guid {1} because vDisk is not attached to any Storage Router'.format(vdisk.name, vdisk.guid))

        if excluded_storagerouters is None:
            excluded_storagerouters = []

        # Sorted was added merely for unittests, because they rely on specific order of services and their ports
        # Default sorting behavior for relations used to be based on order in which relations were added
        # Now sorting is based on guid (DAL speedup changes)
        services = sorted([mds_service.service for mds_service in vdisk.vpool.mds_services
                           if mds_service.service.storagerouter not in excluded_storagerouters], key=lambda k: k.ports)
        nodes = set(service.storagerouter.ip for service in services)

        vdisk_storagerouter = StorageRouter(vdisk.storagerouter_guid)
        primary_failure_domain = vdisk_storagerouter.primary_failure_domain
        if vdisk.secondary_failure_domain is not None:
            secondary_failure_domain = vdisk.secondary_failure_domain
        else:
            secondary_failure_domain = vdisk_storagerouter.secondary_failure_domain

        failure_domain_load_dict = {primary_failure_domain: {}}
        failure_domain_used_services_dict = {primary_failure_domain: []}
        failure_domain_available_services_dict = {primary_failure_domain: []}
        storage_router_failure_domain_dict = dict((storage_router, primary_failure_domain) for storage_router in primary_failure_domain.primary_storagerouters)

        if secondary_failure_domain is not None:
            failure_domain_load_dict[secondary_failure_domain] = {}
            failure_domain_used_services_dict[secondary_failure_domain] = []
            failure_domain_available_services_dict[secondary_failure_domain] = []
            storage_router_failure_domain_dict.update(dict((storage_router, secondary_failure_domain) for storage_router in secondary_failure_domain.primary_storagerouters))

        services_load = {}
        service_per_key = {}
        for service in services:
            services_load[service] = MDSServiceController.get_mds_load(service.mds_service)
            service_per_key['{0}:{1}'.format(service.storagerouter.ip, service.ports[0])] = service

        configs = vdisk.info['metadata_backend_config']  # Ordered MASTER, SLAVE (backup failure domain of master)
        for config in configs:
            config['key'] = '{0}:{1}'.format(config['ip'], config['port'])

        ###################################
        # VERIFY RECONFIGURATION REQUIRED #
        ###################################
        master_service = None
        reconfigure_reasons = []
        if len(configs) > 0:
            config = configs.pop(0)
            if config['key'] in service_per_key:
                master_service = service_per_key.get(config['key'])
            else:
                reconfigure_reasons.append('Master ({0}:{1}) cannot be used anymore'.format(config['ip'], config['port']))
        slave_services = []
        for config in configs:
            if config['key'] in service_per_key:
                slave_services.append(service_per_key[config['key']])
            else:
                reconfigure_reasons.append('Slave ({0}:{1}) cannot be used anymore'.format(config['ip'], config['port']))

        # If MDS already in use, take current load, else take next load
        tlogs = EtcdConfiguration.get('/ovs/framework/storagedriver|mds_tlogs')
        safety = EtcdConfiguration.get('/ovs/framework/storagedriver|mds_safety')
        max_load = EtcdConfiguration.get('/ovs/framework/storagedriver|mds_maxload')
        for service in services:
            if service == master_service or service in slave_services:
                load = services_load[service][0]
                if service.storagerouter in storage_router_failure_domain_dict:  # Services in use per failure domain
                    failure_domain_used_services_dict[storage_router_failure_domain_dict[service.storagerouter]].append(service)
                else:
                    reconfigure_reasons.append('Service {0} cannot be used anymore because storagerouter with IP {1} is not part of the failure domains'.format(service.name, service.storagerouter.ip))
            else:
                load = services_load[service][1]
            services_load[service] = load
            if service.storagerouter in storage_router_failure_domain_dict:  # All services available in model per failure domain
                failure_domain = storage_router_failure_domain_dict[service.storagerouter]
                failure_domain_available_services_dict[failure_domain].append(service)
                if load <= max_load:
                    if load not in failure_domain_load_dict[failure_domain]:
                        failure_domain_load_dict[failure_domain][load] = []
                    failure_domain_load_dict[failure_domain][load].append(service)

        service_nodes = []
        if master_service is not None:
            service_nodes.append(master_service.storagerouter.ip)
        for service in slave_services:
            ip = service.storagerouter.ip
            if ip in service_nodes:
                reconfigure_reasons.append('Multiple MDS services on the same node')
            else:
                service_nodes.append(ip)

        if len(service_nodes) > safety:
            reconfigure_reasons.append('Too much safety')
        if len(service_nodes) < safety and len(service_nodes) < len(nodes):
            reconfigure_reasons.append('Not enough safety')
        if master_service is not None and services_load[master_service] > max_load:
            reconfigure_reasons.append('Master overloaded')
        if master_service is not None and master_service.storagerouter_guid != vdisk.storagerouter_guid:
            reconfigure_reasons.append('Master is not local')
        if any(service for service in slave_services if services_load[service] > max_load):
            reconfigure_reasons.append('One or more slaves overloaded')

        # Check reconfigure required based upon failure domains
        recommended_primary = math.ceil(safety / 2.0) if secondary_failure_domain is not None else safety
        recommended_secondary = safety - recommended_primary

        if master_service is not None and master_service not in failure_domain_used_services_dict[primary_failure_domain]:
            # Master service not present in primary failure domain
            reconfigure_reasons.append('Master service not in primary failure domain')

        primary_services_used = len(failure_domain_used_services_dict[primary_failure_domain])
        primary_services_available = len(failure_domain_available_services_dict[primary_failure_domain])
        if primary_services_used < recommended_primary and primary_services_used < primary_services_available:
            # More services can be used in primary failure domain
            reconfigure_reasons.append('Not enough services in use in primary failure domain')

        if secondary_failure_domain is not None:
            # More services can be used in secondary failure domain
            secondary_services_used = len(failure_domain_used_services_dict[secondary_failure_domain])
            secondary_services_available = len(failure_domain_available_services_dict[secondary_failure_domain])
            if secondary_services_used < recommended_secondary and secondary_services_used < secondary_services_available:
                reconfigure_reasons.append('Not enough services in use in secondary failure domain')

            # If secondary failure domain present, check order in which the slave services are configured
            secondary = False
            for slave_service in slave_services:
                if secondary is True and slave_service in failure_domain_used_services_dict[primary_failure_domain]:
                    reconfigure_reasons.append('A slave in secondary failure domain has priority over a slave in primary failure domain')
                    break
                if slave_service in failure_domain_used_services_dict[secondary_failure_domain]:
                    secondary = True

        if not reconfigure_reasons:
            MDSServiceController._logger.debug('MDS safety: vDisk {0}: No reconfiguration required'.format(vdisk.guid))
            MDSServiceController.sync_vdisk_to_reality(vdisk)
            return

        MDSServiceController._logger.debug('MDS safety: vDisk {0}: Reconfiguration required. Reasons:'.format(vdisk.guid))
        for reason in reconfigure_reasons:
            MDSServiceController._logger.debug('MDS safety: vDisk {0}:    * {1}'.format(vdisk.guid, reason))

        # Check whether the master (if available) is non-local to the vdisk and/or is overloaded
        new_services = []
        master_ok = master_service is not None
        if master_ok is True:
            master_ok = master_service.storagerouter_guid == vdisk.storagerouter_guid and services_load[master_service] <= max_load

        ############################
        # CREATE NEW CONFIGURATION #
        ############################
        if master_ok:
            # Add this master to the fresh configuration
            new_services.append(master_service)
        else:
            # Try to find the best non-overloaded LOCAL MDS slave to make master
            candidate_master_service = None
            candidate_master_load = 0
            local_mds = None
            local_mds_load = 0
            for service in failure_domain_available_services_dict[primary_failure_domain]:
                load = services_load[service]
                if load <= max_load and service.storagerouter_guid == vdisk.storagerouter_guid:
                    if local_mds is None or local_mds_load > load:
                        # This service is a non-overloaded local MDS
                        local_mds = service
                        local_mds_load = load
                    if service in slave_services:
                        if candidate_master_service is None or candidate_master_load > load:
                            # This service is a non-overloaded local slave
                            candidate_master_service = service
                            candidate_master_load = load
            if candidate_master_service is not None:
                # A non-overloaded local slave was found.
                client = MetadataServerClient.load(candidate_master_service)
                try:
                    amount_of_tlogs = client.catch_up(str(vdisk.volume_id), True)
                except RuntimeError as ex:
                    if 'Namespace does not exist' in ex.message:
                        client.create_namespace(str(vdisk.volume_id))
                        amount_of_tlogs = client.catch_up(str(vdisk.volume_id), True)
                    else:
                        raise
                if amount_of_tlogs < tlogs:
                    # Almost there. Catching up right now, and continue as soon as it's up-to-date
                    start = time.time()
                    client.catch_up(str(vdisk.volume_id), False)
                    MDSServiceController._logger.debug('MDS safety: vDisk {0}: Catchup took {1}s'.format(vdisk.guid, round(time.time() - start, 2)))
                    # It's up to date, so add it as a new master
                    new_services.append(candidate_master_service)
                    if master_service is not None:
                        # The current master (if available) is now candidate to become one of the slaves
                        slave_services.append(master_service)
                else:
                    # It's not up to date, keep the previous master (if available) and give the local slave
                    # some more time to catch up
                    if master_service is not None:
                        new_services.append(master_service)
                    new_services.append(candidate_master_service)
                if candidate_master_service in slave_services:
                    slave_services.remove(candidate_master_service)
            else:
                # There's no non-overloaded local slave found. Keep the current master (if available) and add
                # a local MDS (if available) as slave
                if master_service is not None:
                    new_services.append(master_service)
                if local_mds is not None:
                    new_services.append(local_mds)
                    if local_mds in slave_services:
                        slave_services.remove(local_mds)

        # At this point, there might (or might not) be a (new) master, and a (catching up) slave. The rest of the non-local
        # MDS nodes must now be added to the configuration until the safety is reached. There's always one extra
        # slave recycled to make sure there's always an (almost) up-to-date slave ready for failover
        nodes = set(service.storagerouter.ip for service in new_services)

        # Recycle slave for faster failover
        secondary_node_count = 0
        service_to_recycle = None
        if len(nodes) < safety:
            if recommended_primary > 1:  # If primary is 1, we only have master in primary
                # Try to recycle slave which is in primary failure domain
                for load in sorted(failure_domain_load_dict[primary_failure_domain]):
                    for service in failure_domain_load_dict[primary_failure_domain][load]:
                        if service_to_recycle is None and service in slave_services and service.storagerouter.ip not in nodes:
                            try:
                                SSHClient(service.storagerouter)
                                service_to_recycle = service
                            except UnableToConnectException:
                                MDSServiceController._logger.debug('MDS safety: vDisk {0}: Skipping storagerouter with IP {1} as it is unreachable'.format(vdisk.guid, service.storagerouter.ip))
            # Try to recycle slave which is in secondary failure domain if none found in primary
            if service_to_recycle is None and secondary_failure_domain is not None:
                for load in sorted(failure_domain_load_dict[secondary_failure_domain]):
                    for service in failure_domain_load_dict[secondary_failure_domain][load]:
                        if service_to_recycle is None and service in slave_services and service.storagerouter.ip not in nodes:
                            try:
                                SSHClient(service.storagerouter)
                                service_to_recycle = service
                                secondary_node_count = 1  # We do not want to configure the secondary slave BEFORE the primary slaves
                            except UnableToConnectException:
                                MDSServiceController._logger.debug('MDS safety: vDisk {0}: Skipping storagerouter with IP {1} as it is unreachable'.format(vdisk.guid, service.storagerouter.ip))
        if service_to_recycle is not None:
            slave_services.remove(service_to_recycle)
            if secondary_node_count == 0:  # Add service to recycle because its in primary failure domain
                new_services.append(service_to_recycle)
                nodes.add(service_to_recycle.storagerouter.ip)

        # Add extra (new) slaves until primary safety reached
        nodes, new_services = _add_suitable_nodes(local_failure_domain=primary_failure_domain,
                                                  local_safety=recommended_primary)

        # Add recycled secondary slave after primary slaves have been added
        if secondary_node_count == 1:
            new_services.append(service_to_recycle)
            nodes.add(service_to_recycle.storagerouter.ip)

        # Add extra (new) slaves until secondary safety reached
        if secondary_failure_domain is not None:
            nodes, new_services = _add_suitable_nodes(local_failure_domain=secondary_failure_domain,
                                                      local_safety=safety)
            # Add extra slaves from primary failure domain in case no suitable nodes found in secondary failure domain
            if len(nodes) < safety:
                nodes, new_services = _add_suitable_nodes(local_failure_domain=primary_failure_domain,
                                                          local_safety=safety)

        # Build the new configuration and update the vdisk
        configs = []
        for service in new_services:
            client = MetadataServerClient.load(service)
            client.create_namespace(str(vdisk.volume_id))
            # noinspection PyArgumentList
            configs.append(MDSNodeConfig(address=str(service.storagerouter.ip),
                                         port=service.ports[0]))
        vdisk.storagedriver_client.update_metadata_backend_config(volume_id=str(vdisk.volume_id),
                                                                  metadata_backend_config=MDSMetaDataBackendConfig(configs))
        MDSServiceController.sync_vdisk_to_reality(vdisk)
        MDSServiceController._logger.debug('MDS safety: vDisk {0}: Completed'.format(vdisk.guid))