def list(self, discover=False, ip=None, node_id=None):
        """
        Lists all available ALBA Nodes
        :param discover: If True and IP provided, return list of single ALBA node, If True and no IP provided, return all ALBA nodes else return modeled ALBA nodes
        :param ip: IP of ALBA node to retrieve
        :param node_id: ID of the ALBA node
        """
        if discover is False and (ip is not None or node_id is not None):
            raise RuntimeError('Discover is mutually exclusive with IP and nodeID')
        if (ip is None and node_id is not None) or (ip is not None and node_id is None):
            raise RuntimeError('Both IP and nodeID need to be specified')

        if discover is False:
            return AlbaNodeList.get_albanodes()

        if ip is not None:
            node = AlbaNode(volatile=True)
            node.ip = ip
            node.type = 'ASD'
            node.node_id = node_id
            node.port = EtcdConfiguration.get('/ovs/alba/asdnodes/{0}/config/main|port'.format(node_id))
            node.username = EtcdConfiguration.get('/ovs/alba/asdnodes/{0}/config/main|username'.format(node_id))
            node.password = EtcdConfiguration.get('/ovs/alba/asdnodes/{0}/config/main|password'.format(node_id))
            data = node.client.get_metadata()
            if data['_success'] is False and data['_error'] == 'Invalid credentials':
                raise RuntimeError('Invalid credentials')
            if data['node_id'] != node_id:
                raise RuntimeError('Unexpected node identifier. {0} vs {1}'.format(data['node_id'], node_id))
            node_list = DataList(AlbaNode, {})
            node_list._executed = True
            node_list._guids = [node.guid]
            node_list._objects = {node.guid: node}
            node_list._data = {node.guid: {'guid': node.guid, 'data': node._data}}
            return node_list

        nodes = {}
        model_node_ids = [node.node_id for node in AlbaNodeList.get_albanodes()]
        found_node_ids = []
        asd_node_ids = []
        if EtcdConfiguration.dir_exists('/ovs/alba/asdnodes'):
            asd_node_ids = EtcdConfiguration.list('/ovs/alba/asdnodes')

        for node_id in asd_node_ids:
            node = AlbaNode(volatile=True)
            node.type = 'ASD'
            node.node_id = node_id
            node.ip = EtcdConfiguration.get('/ovs/alba/asdnodes/{0}/config/main|ip'.format(node_id))
            node.port = EtcdConfiguration.get('/ovs/alba/asdnodes/{0}/config/main|port'.format(node_id))
            node.username = EtcdConfiguration.get('/ovs/alba/asdnodes/{0}/config/main|username'.format(node_id))
            node.password = EtcdConfiguration.get('/ovs/alba/asdnodes/{0}/config/main|password'.format(node_id))
            if node.node_id not in model_node_ids and node.node_id not in found_node_ids:
                nodes[node.guid] = node
                found_node_ids.append(node.node_id)
        node_list = DataList(AlbaNode, {})
        node_list._executed = True
        node_list._guids = nodes.keys()
        node_list._objects = nodes
        node_list._data = dict([(node.guid, {'guid': node.guid, 'data': node._data}) for node in nodes.values()])
        return node_list
예제 #2
0
 def get_node_by_id(node_id):
     """
     Retrieve ASD node by ID
     :param node_id: ID of the ASD node
     :return: ASD node information
     """
     return AlbaNodeList.get_albanode_by_node_id(node_id=node_id)
예제 #3
0
    def discover_nodes(cls):
        # type: () -> Dict[str, AlbaNode]
        """
        Discover nodes by querying the config mgmt
        :return: The discovered nodes, mapped by their guid
        :rtype: Dict[str, AlbaNode]
        """
        nodes = {}
        model_node_ids = set(node.node_id
                             for node in AlbaNodeList.get_albanodes())
        found_node_ids = set()
        node_ids_by_type = {}
        for node_type, base_config_path in {
                AlbaNode.NODE_TYPES.ASD: ASD_NODE_BASE_PATH,
                AlbaNode.NODE_TYPES.S3: S3_NODE_BASE_PATH
        }.iteritems():
            if Configuration.dir_exists(base_config_path):
                node_ids = Configuration.list(base_config_path)
                node_ids_by_type[node_type] = node_ids

        for node_type, node_ids in node_ids_by_type.iteritems():
            for node_id in node_ids:
                if node_id not in model_node_ids and node_id not in found_node_ids:
                    node = cls.model_volatile_node(node_id, node_type)
                    nodes[node.guid] = node
                    found_node_ids.add(node.node_id)
        return nodes
예제 #4
0
 def model_albanodes(**kwargs):
     """
     Add all ALBA nodes known to the config platform to the model
     :param kwargs: Kwargs containing information regarding the node
     :type kwargs: dict
     :return: None
     :rtype: NoneType
     """
     _ = kwargs
     if Configuration.dir_exists('/ovs/alba/asdnodes'):
         for node_id in Configuration.list('/ovs/alba/asdnodes'):
             node = AlbaNodeList.get_albanode_by_node_id(node_id)
             if node is None:
                 node = AlbaNode()
             main_config = Configuration.get(
                 '/ovs/alba/asdnodes/{0}/config/main'.format(node_id))
             node.type = 'ASD'
             node.node_id = node_id
             node.ip = main_config['ip']
             node.port = main_config['port']
             node.username = main_config['username']
             node.password = main_config['password']
             node.storagerouter = StorageRouterList.get_by_ip(
                 main_config['ip'])
             node.save()
예제 #5
0
 def get_albanode_by_node_id(alba_node_id):
     """
     Fetches the alba node object with the specified id
     :param alba_node_id: id of the alba node
     :return:
     """
     return AlbaNodeList.get_albanode_by_node_id(alba_node_id)
예제 #6
0
 def _merge_downtime_information_alba(cls):
     """
     Called when the 'Update' button in the GUI is pressed
     This call merges the downtime and prerequisite information present in the 'package_information' property for each ALBA Node DAL object
     :return: Information about prerequisites not met and downtime issues
     :rtype: dict
     """
     cls._logger.debug(
         'Retrieving downtime and prerequisite information for ALBA plugin')
     merged_update_info = {}
     for alba_node in AlbaNodeList.get_albanodes():
         for component_name, component_info in alba_node.package_information.iteritems(
         ):
             if component_name not in merged_update_info:
                 merged_update_info[component_name] = {
                     'downtime': [],
                     'prerequisites': []
                 }
             for downtime in component_info['downtime']:
                 if downtime not in merged_update_info[component_name][
                         'downtime']:
                     merged_update_info[component_name]['downtime'].append(
                         downtime)
             for prerequisite in component_info['prerequisites']:
                 if prerequisite not in merged_update_info[component_name][
                         'prerequisites']:
                     merged_update_info[component_name][
                         'prerequisites'].append(prerequisite)
     cls._logger.debug(
         'Retrieved downtime and prerequisite information for ALBA plugin: {0}'
         .format(merged_update_info))
     return merged_update_info
    def package_install_sdm(package_info, components):
        """
        Update the SDM packages
        :param package_info: Information about the packages (installed, candidate)
        :type package_info: dict
        :param components: Components which have been selected for update
        :type components: list
        :return: None
        """
        if 'alba' not in components:
            return

        packages_to_install = {}
        for pkg_name, pkg_info in package_info.iteritems():
            if pkg_name in AlbaUpdateController.sdm_packages:
                packages_to_install[pkg_name] = pkg_info
        if not packages_to_install:
            return

        AlbaUpdateController._logger.debug('Executing hook {0}'.format(inspect.currentframe().f_code.co_name))
        for pkg_name, pkg_info in packages_to_install.iteritems():
            for alba_node in AlbaNodeList.get_albanodes():
                AlbaUpdateController._logger.debug('{0}: Updating SDM package {1} ({2} --> {3})'.format(alba_node.ip, pkg_name, pkg_info['installed'], pkg_info['candidate']))
                try:
                    alba_node.client.execute_update(pkg_name)
                except requests.ConnectionError as ce:
                    if 'Connection aborted.' not in ce.message:  # This error is thrown due the post-update code of the SDM package which restarts the asd-manager service
                        raise
                AlbaUpdateController._logger.debug('{0}: Updated SDM package {1}'.format(alba_node.ip, pkg_name))
        AlbaUpdateController._logger.debug('Executed hook {0}'.format(inspect.currentframe().f_code.co_name))
    def register(node_id):
        """
        Adds a Node with a given node_id to the model
        :param node_id: ID of the ALBA node
        :type node_id: str

        :return: None
        """
        node = AlbaNodeList.get_albanode_by_node_id(node_id)
        if node is None:
            main_config = EtcdConfiguration.get('/ovs/alba/asdnodes/{0}/config/main'.format(node_id))
            node = AlbaNode()
            node.ip = main_config['ip']
            node.port = main_config['port']
            node.username = main_config['username']
            node.password = main_config['password']
            node.storagerouter = StorageRouterList.get_by_ip(main_config['ip'])
        data = node.client.get_metadata()
        if data['_success'] is False and data['_error'] == 'Invalid credentials':
            raise RuntimeError('Invalid credentials')
        if data['node_id'] != node_id:
            AlbaNodeController._logger.error('Unexpected node_id: {0} vs {1}'.format(data['node_id'], node_id))
            raise RuntimeError('Unexpected node identifier')
        node.node_id = node_id
        node.type = 'ASD'
        node.save()

        # increase maintenance agents count for all nodes by 1
        for backend in AlbaBackendList.get_albabackends():
            nr_of_agents_key = AlbaNodeController.NR_OF_AGENTS_ETCD_TEMPLATE.format(backend.guid)
            if EtcdConfiguration.exists(nr_of_agents_key):
                EtcdConfiguration.set(nr_of_agents_key, int(EtcdConfiguration.get(nr_of_agents_key) + 1))
            else:
                EtcdConfiguration.set(nr_of_agents_key, 1)
        AlbaNodeController.checkup_maintenance_agents()
    def _presets(self):
        """
        Returns the policies active on the node
        """
        if len(self.abm_services) == 0:
            return []  # No ABM services yet, so backend not fully installed yet

        asds = {}
        if self.scaling != AlbaBackend.SCALINGS.GLOBAL:
            for node in AlbaNodeList.get_albanodes():
                asds[node.node_id] = 0
                for disk in self.local_stack[node.node_id].values():
                    for asd_info in disk['asds'].values():
                        if asd_info['status'] in ['claimed', 'warning']:
                            asds[node.node_id] += 1
        config = Configuration.get_configuration_path('/ovs/arakoon/{0}-abm/config'.format(self.name))
        presets = AlbaCLI.run(command='list-presets', config=config)
        preset_dict = {}
        for preset in presets:
            preset_dict[preset['name']] = preset
            if 'in_use' not in preset:
                preset['in_use'] = True
            if 'is_default' not in preset:
                preset['is_default'] = False
            preset['is_available'] = False
            preset['policies'] = [tuple(policy) for policy in preset['policies']]
            preset['policy_metadata'] = {}
            active_policy = None
            for policy in preset['policies']:
                is_available = False
                available_disks = 0
                if self.scaling != AlbaBackend.SCALINGS.GLOBAL:
                    available_disks += sum(min(asds[node], policy[3]) for node in asds)
                if self.scaling != AlbaBackend.SCALINGS.LOCAL:
                    available_disks += sum(self.local_summary['devices'].values())
                if available_disks >= policy[2]:
                    if active_policy is None:
                        active_policy = policy
                    is_available = True
                preset['policy_metadata'][policy] = {'is_active': False, 'in_use': False, 'is_available': is_available}
                preset['is_available'] |= is_available
            if active_policy is not None:
                preset['policy_metadata'][active_policy]['is_active'] = True
        for namespace in self.ns_data:
            if namespace['namespace']['state'] != 'active':
                continue
            policy_usage = namespace['statistics']['bucket_count']
            preset = preset_dict[namespace['namespace']['preset_name']]
            for usage in policy_usage:
                upolicy = tuple(usage[0])  # Policy as reported to be "in use"
                for cpolicy in preset['policies']:  # All configured policies
                    if upolicy[0] == cpolicy[0] and upolicy[1] == cpolicy[1] and upolicy[3] <= cpolicy[3]:
                        preset['policy_metadata'][cpolicy]['in_use'] = True
                        break
        for preset in presets:
            preset['policies'] = [str(policy) for policy in preset['policies']]
            for key in preset['policy_metadata'].keys():
                preset['policy_metadata'][str(key)] = preset['policy_metadata'][key]
                del preset['policy_metadata'][key]
        return presets
예제 #10
0
 def register(node_id):
     """
     Adds a Node with a given node_id to the model
     :param node_id: ID of the ALBA node
     :type node_id: str
     :return: None
     """
     node = AlbaNodeList.get_albanode_by_node_id(node_id)
     if node is None:
         main_config = Configuration.get('/ovs/alba/asdnodes/{0}/config/main'.format(node_id))
         node = AlbaNode()
         node.ip = main_config['ip']
         node.port = main_config['port']
         node.username = main_config['username']
         node.password = main_config['password']
         node.storagerouter = StorageRouterList.get_by_ip(main_config['ip'])
     data = node.client.get_metadata()
     if data['_success'] is False and data['_error'] == 'Invalid credentials':
         raise RuntimeError('Invalid credentials')
     if data['node_id'] != node_id:
         AlbaNodeController._logger.error('Unexpected node_id: {0} vs {1}'.format(data['node_id'], node_id))
         raise RuntimeError('Unexpected node identifier')
     node.node_id = node_id
     node.type = 'ASD'
     node.save()
     AlbaController.checkup_maintenance_agents.delay()
예제 #11
0
    def initialise_disks(alba_backend, nr_of_disks, disk_type):
        """
        Initialize disks
        :param alba_backend: ALBA backend
        :param nr_of_disks: Amount of disks to initialize
        :param disk_type: Type of disks
        :return: None
        """
        # Assume no disks are claimed by a remote environment
        alba_backend.invalidate_dynamics(['storage_stack'])
        storage_stack = alba_backend.storage_stack

        initialised_disks = 0
        uninitialized_disk_names = []
        for disks in storage_stack.values():
            for disk_id, disk in disks.iteritems():
                if disk['status'] == 'initialized':
                    initialised_disks += 1
                elif disk['status'] == 'uninitialized':
                    uninitialized_disk_names.append(disk_id)
        nr_of_disks_to_init = nr_of_disks - initialised_disks
        if nr_of_disks_to_init <= 0:
            return True

        assert len(uninitialized_disk_names) >= nr_of_disks_to_init, "Not enough disks to initialize!"

        disks_to_init = GeneralAlba.filter_disks(uninitialized_disk_names, nr_of_disks_to_init, disk_type)
        assert len(disks_to_init) >= nr_of_disks_to_init, "Not enough disks to initialize!"

        grid_ip = General.get_config().get('main', 'grid_ip')
        alba_node = AlbaNodeList.get_albanode_by_ip(grid_ip)
        failures = AlbaNodeController.initialize_disks(alba_node.guid, dict((disk_id, 1) for disk_id in disks_to_init))
        assert not failures, 'Alba disk initialization failed for (some) disks: {0}'.format(failures)
예제 #12
0
 def register_node(node_cluster_guid, node_id=None, node_ids=None):
     # type: (str, str, List[str]) -> None
     """
     Register a AlbaNode to the AlbaNodeCluster
     :param node_cluster_guid: Guid of the AlbaNodeCluster to add the node to
     :type node_cluster_guid: basestring
     :param node_id: ID of the ALBA node to register
     :type node_id: basestring
     :param node_ids: List of IDs of AlbaNodes to register
     :type node_ids: list[str]
     :return: None
     :rtype: NoneType
     """
     if all(x is None for x in [node_id, node_ids]):
         raise ValueError('Either node_id or node_ids must be given')
     if node_ids is None:
         node_ids = [node_id]
     an_cluster = AlbaNodeCluster(node_cluster_guid)
     messages = []
     for node_id in node_ids:
         try:
             an_node = AlbaNodeList.get_albanode_by_node_id(node_id)
             if an_node is None:
                 messages.append(
                     'No AlbaNode found with ID {0}'.format(node_id))
                 continue
             # Validation
             for slot_id, slot_info in an_node.stack.iteritems():
                 for osd_id, osd_info in slot_info['osds'].iteritems():
                     claimed_by = osd_info.get('claimed_by')
                     if claimed_by is not None:  # Either UNKNOWN or a GUID:
                         if claimed_by == AlbaNode.OSD_STATUSES.UNKNOWN:
                             raise RuntimeError(
                                 'Unable to link AlbaNode {0}. No information could be retrieved about OSD {1}'
                                 .format(node_id, osd_id))
                         raise RuntimeError(
                             'Unable to link AlbaNode {0} because it already has OSDs which are claimed'
                             .format(node_id))
             try:
                 AlbaNodeClusterController.register_node_to_cluster(
                     an_cluster.guid, an_node.node_id)
             except Exception:
                 message = 'Unable to register the node under cluster'
                 AlbaNodeClusterController._logger.exception(message)
                 messages.append(message)
                 continue
             an_node.alba_node_cluster = an_cluster
             an_node.save()
         except Exception:
             message = 'Unhandled Exception occurred during the registering of AlbaNode with id {0} under AlbaNodeCluster {1}'.format(
                 node_id, node_cluster_guid)
             messages.append(message)
             AlbaNodeClusterController._logger.exception(message)
     if len(messages) > 0:
         raise ValueError(
             'Errors occurred while registering AlbaNodes with IDs {0}:\n - {1}'
             .format(node_ids, '\n - '.join(messages)))
예제 #13
0
    def get_albanode_by_ip(ip):
        """
        Fetches an albanode object by ip

        :param ip: ip of the node
        :type ip: str
        :return: ovs.dal.hybrids.albanode.AlbaNode
        """
        return AlbaNodeList.get_albanode_by_ip(ip)
예제 #14
0
    def _get_update_information_plugin_alba(cls, error_information):
        """
        Called by GenericController.refresh_package_information() every hour
        Retrieve and store the update information for all AlbaNodes
        :param error_information: Dict passed in by the thread to collect all errors
        :type error_information: dict
        :return: None
        :rtype: NoneType
        """
        cls._logger.info('Refreshing ALBA plugin update information')

        error_count = 0
        for alba_node in AlbaNodeList.get_albanodes():
            if alba_node.type == AlbaNode.NODE_TYPES.GENERIC:
                continue

            cls._logger.debug(
                'ALBA Node {0}: Refreshing update information'.format(
                    alba_node.ip))
            if alba_node.ip not in error_information:
                error_information[alba_node.ip] = []

            try:
                update_info = alba_node.client.get_package_information()
                update_info_copy = copy.deepcopy(update_info)
                cls._logger.debug(
                    'ALBA Node {0}: Update information: {1}'.format(
                        alba_node.ip, update_info))
                for component, info in update_info_copy.iteritems():
                    if len(info['packages']) == 0:
                        update_info.pop(component)

                cls._logger.debug(
                    'ALBA Node {0}: Storing update information: {1}'.format(
                        alba_node.ip, update_info))
                alba_node.package_information = update_info
                alba_node.save()
                cls._logger.debug(
                    'ALBA Node {0}: Refreshed update information'.format(
                        alba_node.ip))
            except (requests.ConnectionError, requests.Timeout):
                error_count += 1
                cls._logger.warning(
                    'ALBA Node {0}: Update information could not be updated'.
                    format(alba_node.ip))
                error_information[alba_node.ip].append(
                    'Connection timed out or connection refused on {0}'.format(
                        alba_node.ip))
            except Exception as ex:
                error_count += 1
                cls._logger.exception(
                    'ALBA Node {0}: Update information could not be updated'.
                    format(alba_node.ip))
                error_information[alba_node.ip].append(ex)
        if error_count == 0:
            cls._logger.info('Refreshed ALBA plugin update information')
예제 #15
0
 def _presets(self):
     """
     Returns the policies active on the node
     """
     all_disks = self.all_disks
     disks = {}
     for node in AlbaNodeList.get_albanodes():
         disks[node.node_id] = 0
         for disk in all_disks:
             if disk['node_id'] == node.node_id and disk['status'] in ['claimed', 'warning']:
                 disks[node.node_id] += 1
     config = 'etcd://127.0.0.1:2379/ovs/arakoon/{0}-abm/config'.format(self.backend.name)
     presets = AlbaCLI.run('list-presets', config=config, as_json=True)
     preset_dict = {}
     for preset in presets:
         preset_dict[preset['name']] = preset
         if 'in_use' not in preset:
             preset['in_use'] = True
         if 'is_default' not in preset:
             preset['is_default'] = False
         preset['is_available'] = False
         preset['policies'] = [tuple(policy) for policy in preset['policies']]
         preset['policy_metadata'] = {}
         active_policy = None
         for policy in preset['policies']:
             is_available = False
             available_disks = sum(min(disks[node], policy[3]) for node in disks)
             if available_disks >= policy[2]:
                 if active_policy is None:
                     active_policy = policy
                 is_available = True
             preset['policy_metadata'][policy] = {'is_active': False, 'in_use': False, 'is_available': is_available}
             preset['is_available'] |= is_available
         if active_policy is not None:
             preset['policy_metadata'][active_policy]['is_active'] = True
     for namespace in self.ns_data:
         if namespace['namespace']['state'] != 'active':
             continue
         policy_usage = namespace['statistics']['bucket_count']
         preset = preset_dict[namespace['namespace']['preset_name']]
         for usage in policy_usage:
             upolicy = tuple(usage[0])  # Policy as reported to be "in use"
             for cpolicy in preset['policies']:  # All configured policies
                 if upolicy[0] == cpolicy[0] and upolicy[1] == cpolicy[1] and upolicy[3] <= cpolicy[3]:
                     preset['policy_metadata'][cpolicy]['in_use'] = True
                     break
     for preset in presets:
         preset['policies'] = [str(policy) for policy in preset['policies']]
         for key in preset['policy_metadata'].keys():
             preset['policy_metadata'][str(key)] = preset['policy_metadata'][key]
             del preset['policy_metadata'][key]
     return presets
예제 #16
0
 def ipmi_check(cls, result_handler):
     """
     :param result_handler: logging object
     :type result_handler: ovs.extensions.healthcheck.result.HCResults
     :return:
     """
     for albanode in AlbaNodeList.get_albanodes():
         node_id = albanode.node_id
         ipmi_config_loc = '/ovs/alba/asdnodes/{0}/config/ipmi'.format(
             node_id)
         if not Configuration.exists(ipmi_config_loc):
             result_handler.skip(
                 'No IPMI info found on AlbaNode with ID {0}'.format(
                     node_id))
             continue
         ipmi_config = Configuration.get(ipmi_config_loc)
         ip = ipmi_config.get('ip')
         try:
             controller = IPMIController(
                 ip=ip,
                 username=ipmi_config.get('username'),
                 password=ipmi_config.get('password'),
                 client=SSHClient(System.get_my_storagerouter()))
         except:
             result_handler.failure(
                 'IPMI settings are not valid for AlbaNode with ID {0}'.
                 format(node_id))
             continue
         try:
             status = controller.status_node().get(ip)
             if status == IPMIController.IPMI_POWER_ON:
                 result_handler.success(
                     'IPMI AlbaNode with ID {0} status is POWER ON'.format(
                         node_id))
             elif status == IPMIController.IPMI_POWER_OFF:
                 result_handler.warning(
                     'IPMI AlbaNode with ID {0} status is POWER OFF'.format(
                         node_id))
         except IPMITimeOutException as ex:
             result_handler.failure(
                 "IPMI AlbaNode with ID {0} timed out: '{1}'".format(
                     node_id, ex))
         except IPMICallException as ex:
             result_handler.failure(
                 "IPMI AlbaNode with ID {0} call failed: '{1}'".format(
                     node_id, ex))
         except Exception:
             msg = 'Could not retrieve info through IPMI for AlbaNode with ID {0}'.format(
                 node_id)
             cls.logger.exception(msg)
             result_handler.exception(msg)
예제 #17
0
    def filter_disks(disk_names, amount, disk_type):
        """
        Filter the available disks
        :param disk_names: Disks to filter
        :param amount: Amount to retrieve
        :param disk_type: Type of disk
        :return: Filtered disks
        """
        node_ids = []
        list_of_available_disks = {}
        filtered_disks = {}
        disk_count = 0
        # disk_names = dictionary with node_ids as keys and values as a list of uninitialised disk names
        # {u'InA44YDJTKxFGvIKqD3CxYMlK7XxryZ0': [u'ata-TOSHIBA_MK2002TSKB_52Q2KSOTF',
        #                                        u'ata-TOSHIBA_MK2002TSKB_52Q3KR6TF',
        #                                        u'ata-TOSHIBA_MK2002TSKB_52Q2KSORF',
        #                                        u'ata-TOSHIBA_MK2002TSKB_52Q2KSOVF',
        #                                        u'ata-TOSHIBA_MK2002TSKB_52Q2KSOUF']}
        for node_id in disk_names.iterkeys():
            node_ids.append(node_id)
            list_of_available_disks[node_id] = []
            filtered_disks[node_id] = []
            alba_node = AlbaNodeList.get_albanode_by_node_id(node_id)
            storagerouter = GeneralStorageRouter.get_storage_router_by_ip(ip=alba_node.ip)
            root_client = SSHClient(storagerouter, username='******')
            hdds, ssds = GeneralDisk.get_physical_disks(client=root_client)
            if disk_type == 'SATA':
                for hdd in hdds.values():
                    # add it to list_of_available_disks only if it's found in the uninitialised list for that node
                    if hdd['name'] in disk_names[node_id]:
                        list_of_available_disks[node_id].append(hdd)
            if disk_type == 'SSD':
                for ssd in ssds.values():
                    # add it to list_of_available_disks only if it's found in the uninitialised list for that node
                    if ssd['name'] in disk_names[node_id]:
                        list_of_available_disks[node_id].append(ssd)
            disk_count += len(list_of_available_disks[node_id])

        count = 0
        # all disks might be on a single node so we are going with the check to max of what we need
        for disk_index in range(amount):
            for node_id in node_ids:
                # if we still need disks we will add all disks found at the count value index in the list_of_available_disks disk lists
                if count < amount:
                    if disk_index < len(list_of_available_disks[node_id]):
                        filtered_disks[node_id].append('/dev/disk/by-id/' + list_of_available_disks[node_id][disk_index]['name'])
                        count += 1
        # this should run through the whole list even if we haven't reached the amount of disks needed
        return filtered_disks
예제 #18
0
 def _merge_package_information_alba(cls):
     """
     Retrieve the information stored in the 'package_information' property on the ALBA Node DAL object
     This actually returns all information stored in the 'package_information' property including downtime info, prerequisites, services, ...
     The caller of this function will strip out and merge the relevant package information
     :return: Update information for all ALBA Nodes
     :rtype: dict
     """
     cls._logger.debug('Retrieving package information for ALBA plugin')
     update_info = {}
     for alba_node in AlbaNodeList.get_albanodes():
         if alba_node.type == AlbaNode.NODE_TYPES.GENERIC:
             continue
         update_info[alba_node.ip] = alba_node.package_information
     cls._logger.debug('Retrieved package information for ALBA plugin')
     return update_info
예제 #19
0
 def register(node_id=None, node_type=None, name=None):
     """
     Adds a Node with a given node_id to the model
     :param node_id: ID of the ALBA node
     :type node_id: str
     :param node_type: Type of the node to create
     :type node_type: str
     :param name: Optional name of the node
     :type name: str
     :return: None
     :rtype: NoneType
     """
     # Generic is a special case. Nothing is registered within config mgmt
     if node_type == AlbaNode.NODE_TYPES.GENERIC:
         node = AlbaNode()
         node.name = name
         node.node_id = ''.join(
             random.choice(string.ascii_letters + string.digits)
             for _ in range(32))
         node.type = AlbaNode.NODE_TYPES.GENERIC
         node.save()
     else:
         # Both S3 and ASD type can be added now
         if node_id is None:
             raise RuntimeError('A node_id must be given for type ASD/S3')
         node = AlbaNodeList.get_albanode_by_node_id(
             node_id) or AlbaNodeController.get_discovered_node(node_id)
         if not node:
             # No node could be found in the model or within the discovered nodes. User might have specified the ID
             # of a node that does not exist
             raise RuntimeError(
                 'No node with node_id {0} was found'.format(node_id))
         data = node.client.get_metadata()
         if data['_success'] is False and data[
                 '_error'] == 'Invalid credentials':
             raise RuntimeError('Invalid credentials')
         if data['node_id'] != node_id:
             AlbaNodeController._logger.error(
                 'Unexpected node_id: {0} vs {1}'.format(
                     data['node_id'], node_id))
             raise RuntimeError('Unexpected node identifier')
         if node.type == AlbaNode.NODE_TYPES.S3:
             # The transaction Arakoon is needed. This wil check deployment & extend
             AlbaArakoonController.configure_s3_transaction_cluster()
         node.volatile = False
         node.save()
     AlbaController.checkup_maintenance_agents.delay()
예제 #20
0
 def add_units(self, albabackend, osds):
     """
     Add storage units to the backend and register with alba nsm
     DEPRECATED API call - Use 'add_osds' instead
     :param albabackend: ALBA backend to add units to
     :type albabackend: AlbaBackend
     :param osds: Dict of osd_id as key, disk_id as value
     :type osds: Dict
     :return: Asynchronous result of a CeleryTask
     :rtype: celery.result.AsyncResult
     """
     # Currently backwards compatible, should be removed at some point
     # Map to fill slots for backwards compatibility
     # Old call data:
     # {osd_id: disk_id}
     osd_type = 'ASD'
     osd_info = []
     stack = None
     for osd_id, disk_alias in osds.iteritems():
         slot_id = disk_alias.split('/')[-1]
         # Add units is pushed for a single ALBA Node so stack should be fetched one
         if stack is None:
             for alba_node in AlbaNodeList.get_albanodes():
                 _stack = alba_node.stack
                 if slot_id in _stack:
                     stack = _stack
                     break
         if stack is None:
             raise HttpNotAcceptableException(
                 error='stack_not_found',
                 error_description=
                 'Could not find the matching stack for slot with ID {0}'.
                 format(slot_id))
         _osd = stack[slot_id]['osds'].get(osd_id)
         if _osd is None:
             raise HttpNotFoundException(
                 error='osd_not_found',
                 error_description='Could not find OSD {0} on Slot {1}'.
                 format(osd_id, slot_id))
         osd_info.append({
             'slot_id': slot_id,
             'osd_type': osd_type,
             'ips': _osd['ips'],
             'port': _osd['port']
         })
     return AlbaController.add_osds.s(
         albabackend.guid, osd_info).apply_async(queue='ovs_masters')
    def _local_stack(self):
        """
        Returns a live list of all disks known to this AlbaBackend
        """
        if self.abm_cluster is None:
            return {}  # No ABM cluster yet, so backend not fully installed yet

        # Load information from node
        osd_statistics = self.osd_statistics

        def _load_live_info(_node, _storage_map):
            node_id = _node.node_id
            _storage_map[node_id] = {}
            for slot_id, _slot_data in _node.stack.iteritems():
                # Pre-fill some info
                _storage_map[node_id][slot_id] = {
                    'osds': {},
                    'name': slot_id,
                    'status': 'error',
                    'status_detail': 'unknown'
                }
                # Extend the OSD info with the usage information
                for osd_id, osd_data in _slot_data.get('osds', {}).iteritems():
                    if osd_id in osd_statistics:
                        stats = osd_statistics[osd_id]
                        osd_data['usage'] = {
                            'size':
                            int(stats['capacity']),
                            'used':
                            int(stats['disk_usage']),
                            'available':
                            int(stats['capacity'] - stats['disk_usage'])
                        }
                _storage_map[node_id][slot_id].update(_slot_data)

        threads = []
        storage_map = {}
        for node in AlbaNodeList.get_albanodes():
            thread = Thread(target=_load_live_info, args=(node, storage_map))
            thread.start()
            threads.append(thread)
        for thread in threads:
            thread.join()

        return storage_map
예제 #22
0
 def unregister_node(node_cluster_guid, node_id):
     # type: (str) -> None
     """
     Unregisters an AlbaNode from the AlbaNodeCluster
     This will update the cluster to no longer work with active/passive
     :param node_cluster_guid: Guid of the AlbaNodeCluster to add the node to
     :type node_cluster_guid: basestring
     :param node_id: ID of the ALBA node to register
     :type node_id: basestring
     :return: None
     :rtype: NoneType
     """
     _ = node_cluster_guid
     an_node = AlbaNodeList.get_albanode_by_node_id(node_id)
     an_node.alba_node_cluster = None
     an_node.save()
     raise NotImplementedError(
         'Actions after removing the relation has not yet been implemented')
예제 #23
0
    def initialise_disks(alba_backend, nr_of_disks, disk_type):
        """
        Initialize disks
        :param alba_backend: ALBA backend
        :param nr_of_disks: Amount of disks to initialize
        :param disk_type: Type of disks
        :return: None
        """
        # Assume no disks are claimed by a remote environment
        alba_backend.invalidate_dynamics(['local_stack'])
        local_stack = alba_backend.local_stack

        initialised_disks = 0
        uninitialised_disks = 0
        uninitialized_disk_names = {}
        for disks in local_stack.values():
            for disk_id, disk in disks.iteritems():
                if disk['status'] == 'initialized':
                    initialised_disks += 1
                elif disk['status'] == 'uninitialized':
                    uninitialised_disks += 1
                    if disk['node_id'] in uninitialized_disk_names.keys():
                        uninitialized_disk_names[disk['node_id']].append(disk_id)
                    else:
                        uninitialized_disk_names[disk['node_id']] = [disk_id]
        nr_of_disks_to_init = nr_of_disks - initialised_disks
        if nr_of_disks_to_init <= 0:
            return True

        assert uninitialised_disks >= nr_of_disks_to_init, "Not enough disks to initialize!"

        disks_to_init = GeneralAlba.filter_disks(uninitialized_disk_names, nr_of_disks_to_init, disk_type)
        disks_found = 0
        for node_id, disks in disks_to_init.iteritems():
            disks_found += len(disks)
        assert disks_found >= nr_of_disks_to_init, "Not enough disks to initialize!"

        for node_id, disks in disks_to_init.iteritems():
            alba_node = AlbaNodeList.get_albanode_by_node_id(node_id)
            failures = AlbaNodeController.initialize_disks(alba_node.guid, dict(('/dev/disk/by-id/' + disk_id, 1) for disk_id in disks))
            assert not failures,\
                'Alba disk initialization failed for (some) disks: {0}'.format(failures)
예제 #24
0
 def _wait_for_asd_count_with_status(_alba_backend, _nr_of_asds, status):
     grid_ip = General.get_config().get('main', 'grid_ip')
     alba_node = AlbaNodeList.get_albanode_by_ip(grid_ip)
     counter = GeneralAlba.ALBA_TIMER / GeneralAlba.ALBA_TIMER_STEP
     asds_with_status = {}
     while counter > 0:
         GeneralAlba.logger.info('counter: {0}'.format(counter))
         _alba_backend.invalidate_dynamics(['storage_stack'])
         if alba_node.node_id in _alba_backend.storage_stack:
             for _disk in _alba_backend.storage_stack[alba_node.node_id].values():
                 for _asd_id, _asd in _disk['asds'].iteritems():
                     if _asd['status'] == status:
                         asds_with_status[_asd_id] = _disk.get('guid')
         GeneralAlba.logger.info('looking for {0} asds with status {1}: {2}'.format(_nr_of_asds, status, asds_with_status))
         if len(asds_with_status) >= _nr_of_asds:
             break
         counter -= 1
         time.sleep(GeneralAlba.ALBA_TIMER_STEP)
     assert len(asds_with_status) >= _nr_of_asds,\
         "Unable to find {0} asds, only found {1} asds with status: {2}.\n".format(_nr_of_asds, len(asds_with_status), status)
     return asds_with_status
    def get_package_information_alba_plugin_storage_nodes(information):
        """
        Called by GenericController.refresh_package_information() every hour

        Retrieve and store the package information for all AlbaNodes
        :return: None
        """
        for alba_node in AlbaNodeList.get_albanodes():
            if alba_node.ip not in information:
                information[alba_node.ip] = {'errors': []}
            elif 'errors' not in information[alba_node.ip]:
                information[alba_node.ip]['errors'] = []

            try:
                alba_node.package_information = alba_node.client.get_package_information()
                alba_node.save()
            except (requests.ConnectionError, requests.Timeout):
                AlbaUpdateController._logger.warning('Update information for Alba Node with IP {0} could not be updated'.format(alba_node.ip))
                information[alba_node.ip]['errors'].append('Connection timed out or connection refused on {0}'.format(alba_node.ip))
            except Exception as ex:
                information[alba_node.ip]['errors'].append(ex)
예제 #26
0
 def model_albanodes(**kwargs):
     """
     Add all ALBA nodes known to the config platform to the model
     :param kwargs: Kwargs containing information regarding the node
     :type kwargs: dict
     :return: None
     """
     _ = kwargs
     if Configuration.dir_exists('/ovs/alba/asdnodes'):
         for node_id in Configuration.list('/ovs/alba/asdnodes'):
             node = AlbaNodeList.get_albanode_by_node_id(node_id)
             if node is None:
                 node = AlbaNode()
             main_config = Configuration.get('/ovs/alba/asdnodes/{0}/config/main'.format(node_id))
             node.type = 'ASD'
             node.node_id = node_id
             node.ip = main_config['ip']
             node.port = main_config['port']
             node.username = main_config['username']
             node.password = main_config['password']
             node.storagerouter = StorageRouterList.get_by_ip(main_config['ip'])
             node.save()
예제 #27
0
    def list(self, discover=False, ip=None, node_id=None):
        """
        Lists all available ALBA Nodes
        :param discover: If True and IP provided, return list of single ALBA node, If True and no IP provided, return all ALBA nodes else return modeled ALBA nodes
        :type discover: bool
        :param ip: IP of ALBA node to retrieve
        :type ip: str
        :param node_id: ID of the ALBA node
        :type node_id: str
        :return: A list of ALBA nodes
        :rtype: ovs.dal.datalist.DataList
        """
        if discover is False and (ip is not None or node_id is not None):
            raise HttpNotAcceptableException(
                error='invalid_data',
                error_description=
                'Discover is mutually exclusive with IP and nodeID')
        if (ip is None and node_id is not None) or (ip is not None
                                                    and node_id is None):
            raise HttpNotAcceptableException(
                error='invalid_data',
                error_description='Both IP and nodeID need to be specified')

        if discover is False:
            return AlbaNodeList.get_albanodes()

        # Discover nodes
        nodes = self._discover_nodes(ip=ip, node_id=node_id)
        # Build the DataList
        node_list = DataList(AlbaNode)
        node_list._executed = True
        node_list._guids = nodes.keys()
        node_list._objects = nodes
        node_list._data = dict([(node.guid, {
            'guid': node.guid,
            'data': node._data
        }) for node in nodes.values()])
        return node_list
예제 #28
0
 def ipmi_check(cls, result_handler):
     """
     :param result_handler: logging object
     :type result_handler: ovs.extensions.healthcheck.result.HCResults
     :return:
     """
     for albanode in AlbaNodeList.get_albanodes():
         node_id = albanode.node_id
         ipmi_config_loc = '/ovs/alba/asdnodes/{0}/config/ipmi'.format(node_id)
         if not Configuration.exists(ipmi_config_loc):
             result_handler.skip('No IPMI info found on AlbaNode with ID {0}'.format(node_id))
             continue
         ipmi_config = Configuration.get(ipmi_config_loc)
         ip = ipmi_config.get('ip')
         try:
             controller = IPMIController(ip=ip,
                                         username=ipmi_config.get('username'),
                                         password=ipmi_config.get('password'),
                                         client=SSHClient(System.get_my_storagerouter()))
         except:
             result_handler.failure('IPMI settings are not valid for AlbaNode with ID {0}'.format(node_id))
             continue
         try:
             status = controller.status_node().get(ip)
             if status == IPMIController.IPMI_POWER_ON:
                 result_handler.success('IPMI AlbaNode with ID {0} status is POWER ON'.format(node_id))
             elif status == IPMIController.IPMI_POWER_OFF:
                 result_handler.warning('IPMI AlbaNode with ID {0} status is POWER OFF'.format(node_id))
         except IPMITimeOutException as ex:
             result_handler.failure("IPMI AlbaNode with ID {0} timed out: '{1}'".format(node_id, ex))
         except IPMICallException as ex:
             result_handler.failure("IPMI AlbaNode with ID {0} call failed: '{1}'".format(node_id, ex))
         except Exception:
             msg = 'Could not retrieve info through IPMI for AlbaNode with ID {0}'.format(node_id)
             cls.logger.exception(msg)
             result_handler.exception(msg)
    def post_update_alba_plugin_alba(components):
        """
        Execute some functionality after the ALBA plugin packages have been updated
        For alba:
            * Restart arakoon-amb, arakoon-nsm on every client (if present and required)
            * Execute post-update functionality on every ALBA node
        :param components: Update components which have been executed
        :type components: list
        :return: None
        """
        if 'alba' not in components:
            return

        # Update ALBA nodes
        AlbaUpdateController._logger.debug('Executing hook {0}'.format(inspect.currentframe().f_code.co_name))
        for node in AlbaNodeList.get_albanodes():
            if node.client.get_package_information():
                AlbaUpdateController._logger.debug('{0}: Restarting services'.format(node.ip))
                node.client.restart_services()

        # Renew maintenance services
        AlbaUpdateController._logger.debug('Checkup maintenance agents')
        AlbaController.checkup_maintenance_agents.delay()
        AlbaUpdateController._logger.debug('Executed hook {0}'.format(inspect.currentframe().f_code.co_name))
예제 #30
0
    def migrate_sdm():
        """
        Executes async migrations for ALBA SDM node. It doesn't matter too much when they are executed, as long as they get eventually executed.
        This code will typically contain:
        * "dangerous" migration code (it needs certain running services)
        * Migration code depending on a cluster-wide state
        * ...
        """
        from ovs.dal.lists.albanodelist import AlbaNodeList

        AlbaMigrationController._logger.info(
            'Preparing out of band migrations for SDM...')
        for alba_node in AlbaNodeList.get_albanodes():
            try:
                AlbaMigrationController._logger.info(
                    'Executing post-update migration code for ALBA Node {0}'.
                    format(alba_node.node_id))
                alba_node.client.update_execute_migration_code()
            except Exception:
                AlbaMigrationController._logger.exception(
                    'Executing post-update migration code for ALBA Node {0} failed'
                    .format(alba_node.node_id))
        AlbaMigrationController._logger.info(
            'Finished out of band migrations for SDM')
    def checkup_maintenance_agents():
        """
        Check if requested nr of maintenance agents / backend is actually present
        Add / remove as necessary
        :return: None
        """
        service_template_key = 'alba-maintenance_{0}-{1}'
        maintenance_agents_map = {}
        asd_nodes = AlbaNodeList.get_albanodes()
        nr_of_storage_nodes = len(asd_nodes)

        def _get_node_load(backend_name):
            highest_load = 0
            lowest_load = sys.maxint
            agent_load = {'high_load_node': asd_nodes[0] if asd_nodes else None,
                          'low_load_node': asd_nodes[0] if asd_nodes else None,
                          'total_load': 0}
            for asd_node in asd_nodes:
                actual_nr_of_agents = 0
                maint_services = asd_node.client.list_maintenance_services()
                for service_name in maint_services:
                    if service_template_key.format(backend_name, '') in service_name:
                        actual_nr_of_agents += 1
                if actual_nr_of_agents > highest_load:
                    agent_load['high_load_node'] = asd_node
                    highest_load = actual_nr_of_agents
                if actual_nr_of_agents < lowest_load:
                    agent_load['low_load_node'] = asd_node
                    lowest_load = actual_nr_of_agents
                agent_load['total_load'] += actual_nr_of_agents

            return agent_load

        alba_backends = AlbaBackendList.get_albabackends()
        for alba_backend in alba_backends:
            nr_of_agents_key = AlbaNodeController.NR_OF_AGENTS_ETCD_TEMPLATE.format(alba_backend.guid)
            name = alba_backend.backend.name
            if not EtcdConfiguration.exists(nr_of_agents_key):
                EtcdConfiguration.set(nr_of_agents_key, nr_of_storage_nodes)
            required_nr = EtcdConfiguration.get(nr_of_agents_key)
            maintenance_agents_map[name] = {'required': required_nr,
                                            'actual': _get_node_load(name)['total_load'],
                                            'backend': alba_backend.backend}

        for name, values in maintenance_agents_map.iteritems():
            AlbaNodeController._logger.info('Checking backend: {0}'.format(name))
            to_process = values['required'] - values['actual']

            if to_process == 0:
                AlbaNodeController._logger.info('No action required for: {0}'.format(name))
            elif to_process >= 0:
                AlbaNodeController._logger.info('Adding {0} maintenance agent(s) for {1}'.format(to_process, name))
                for _ in xrange(to_process):
                    unique_hash = ''.join(random.choice(string.ascii_letters + string.digits) for _ in range(16))
                    node = _get_node_load(name)['low_load_node']
                    AlbaNodeController._logger.info('Service to add: ' + service_template_key.format(name, unique_hash))
                    if node and node.client:
                        node.client.add_maintenance_service(service_template_key.format(name, unique_hash),
                                                            values['backend'].alba_backend.guid,
                                                            AlbaController.get_abm_service_name(values['backend']))
                        AlbaNodeController._logger.info('Service added')
            else:
                to_process = abs(to_process)
                AlbaNodeController._logger.info('Removing {0} maintenance agent(s) for {1}'.format(to_process, name))
                for _ in xrange(to_process):
                    node = _get_node_load(name)['high_load_node']
                    services = node.client.list_maintenance_services()
                    if services and node and node.client:
                        for service in services:
                            if 'alba-maintenance_' + name in service:
                                node.client.remove_maintenance_service(service)
                                break
예제 #32
0
    def _get_update_information_cluster_alba(cls, client, update_info,
                                             package_info):
        """
        In this function the services for each component / package combination are defined
        This service information consists out of:
            * Services to stop (before update) and start (after update of packages) -> 'services_stop_start'
            * Services to restart after update (post-update logic)                  -> 'services_post_update'
            * Down-times which will be caused due to service restarts               -> 'downtime'
            * Prerequisites that have not been met                                  -> 'prerequisites'

        Verify whether all relevant services have the correct binary active
        Whether a service has the correct binary version in use, we use the ServiceFactory.get_service_update_versions functionality
        When a service has an older binary version running, we add this information to the 'update_info'

        This combined information is then stored in the 'package_information' of the StorageRouter DAL object

        :param client: SSHClient on which to retrieve the service information required for an update
        :type client: ovs.extensions.generic.sshclient.SSHClient
        :param update_info: Dictionary passed in by the thread calling this function used to store all update information
        :type update_info: dict
        :param package_info: Dictionary containing the components and packages which have an update available for current SSHClient
        :type package_info: dict
        :return: None
        :rtype: NoneType
        """
        cls._logger.info(
            'StorageRouter {0}: Refreshing ALBA update information'.format(
                client.ip))
        try:
            binaries = cls._package_manager.get_binary_versions(client=client)
            storagerouter = StorageRouterList.get_by_ip(ip=client.ip)
            cls._logger.debug('StorageRouter {0}: Binary versions: {1}'.format(
                client.ip, binaries))

            # Retrieve Arakoon information
            arakoon_info = {}
            for service in storagerouter.services:
                if service.type.name not in [
                        ServiceType.SERVICE_TYPES.ALBA_MGR,
                        ServiceType.SERVICE_TYPES.NS_MGR
                ]:
                    continue

                if service.type.name == ServiceType.SERVICE_TYPES.ALBA_MGR:
                    cluster_name = service.abm_service.abm_cluster.name
                    alba_backend_name = service.abm_service.abm_cluster.alba_backend.name
                else:
                    cluster_name = service.nsm_service.nsm_cluster.name
                    alba_backend_name = service.nsm_service.nsm_cluster.alba_backend.name

                cls._logger.debug(
                    'StorageRouter {0}: Retrieving update information for Arakoon cluster {1}'
                    .format(client.ip, cluster_name))
                arakoon_update_info = ArakoonInstaller.get_arakoon_update_info(
                    cluster_name=cluster_name)
                cls._logger.debug(
                    'StorageRouter {0}: Arakoon update information for cluster {1}: {2}'
                    .format(client.ip, cluster_name, arakoon_update_info))
                if arakoon_update_info['internal'] is True:
                    arakoon_info[arakoon_update_info['service_name']] = [
                        'backend', alba_backend_name
                    ] if arakoon_update_info['downtime'] is True else None

            for component, package_names in PackageFactory.get_package_info(
            )['names'].iteritems():
                package_names = sorted(package_names)
                cls._logger.debug(
                    'StorageRouter {0}: Validating component {1} and related packages: {2}'
                    .format(client.ip, component, package_names))

                if component not in update_info[client.ip]:
                    update_info[client.ip][component] = copy.deepcopy(
                        ServiceFactory.DEFAULT_UPDATE_ENTRY)
                svc_component_info = update_info[client.ip][component]
                pkg_component_info = package_info.get(component, {})

                for package_name in package_names:
                    cls._logger.debug(
                        'StorageRouter {0}: Validating ALBA plugin related package {1}'
                        .format(client.ip, package_name))
                    if package_name == PackageFactory.PKG_OVS_BACKEND and package_name in pkg_component_info:
                        if ['gui', None] not in svc_component_info['downtime']:
                            svc_component_info['downtime'].append(
                                ['gui', None])
                        if ['api', None] not in svc_component_info['downtime']:
                            svc_component_info['downtime'].append(
                                ['api', None])
                        svc_component_info['services_stop_start'][10].append(
                            'ovs-watcher-framework')
                        svc_component_info['services_stop_start'][20].append(
                            'memcached')
                        cls._logger.debug(
                            'StorageRouter {0}: Added services "ovs-watcher-framework" and "memcached" to stop-start services'
                            .format(client.ip))
                        cls._logger.debug(
                            'StorageRouter {0}: Added GUI and API to downtime'.
                            format(client.ip))

                    elif package_name in [
                            PackageFactory.PKG_ALBA, PackageFactory.PKG_ALBA_EE
                    ]:
                        # Retrieve proxy service information
                        for service in storagerouter.services:
                            if service.type.name != ServiceType.SERVICE_TYPES.ALBA_PROXY or service.alba_proxy is None:
                                continue

                            service_version = None
                            if package_name not in pkg_component_info:
                                service_version = ServiceFactory.get_service_update_versions(
                                    client=client,
                                    service_name=service.name,
                                    binary_versions=binaries)

                            cls._logger.debug(
                                'StorageRouter {0}: Service {1} is running version {2}'
                                .format(client.ip, service.name,
                                        service_version))
                            if package_name in pkg_component_info or service_version is not None:
                                if service_version is not None and package_name not in svc_component_info[
                                        'packages']:
                                    svc_component_info['packages'][
                                        package_name] = service_version
                                svc_component_info['services_post_update'][
                                    10].append('ovs-{0}'.format(service.name))
                                cls._logger.debug(
                                    'StorageRouter {0}: Added service {1} to post-update services'
                                    .format(client.ip,
                                            'ovs-{0}'.format(service.name)))

                                downtime = [
                                    'proxy',
                                    service.alba_proxy.storagedriver.vpool.name
                                ]
                                if downtime not in svc_component_info[
                                        'downtime']:
                                    svc_component_info['downtime'].append(
                                        downtime)
                                    cls._logger.debug(
                                        'StorageRouter {0}: Added ALBA proxy downtime for vPool {1} to downtime'
                                        .format(
                                            client.ip, service.alba_proxy.
                                            storagedriver.vpool.name))

                    if package_name in [
                            PackageFactory.PKG_ALBA,
                            PackageFactory.PKG_ALBA_EE,
                            PackageFactory.PKG_ARAKOON
                    ]:
                        for service_name, downtime in arakoon_info.iteritems():
                            service_version = ServiceFactory.get_service_update_versions(
                                client=client,
                                service_name=service_name,
                                binary_versions=binaries,
                                package_name=package_name)
                            cls._logger.debug(
                                'StorageRouter {0}: Arakoon service {1} information: {2}'
                                .format(client.ip, service_name,
                                        service_version))

                            if package_name in pkg_component_info or service_version is not None:
                                svc_component_info['services_post_update'][
                                    10].append('ovs-{0}'.format(service_name))
                                cls._logger.debug(
                                    'StorageRouter {0}: Added service {1} to post-update services'
                                    .format(client.ip,
                                            'ovs-{0}'.format(service_name)))
                                if service_version is not None and package_name not in svc_component_info[
                                        'packages']:
                                    svc_component_info['packages'][
                                        package_name] = service_version
                                if downtime is not None and downtime not in svc_component_info[
                                        'downtime']:
                                    svc_component_info['downtime'].append(
                                        downtime)
                                    cls._logger.debug(
                                        'StorageRouter {0}: Added Arakoon cluster for ALBA Backend {1} to downtime'
                                        .format(client.ip, downtime[1]))

                    # Extend the service information with the package information related to this repository for current StorageRouter
                    if package_name in pkg_component_info and package_name not in svc_component_info[
                            'packages']:
                        cls._logger.debug(
                            'StorageRouter {0}: Adding package {1} because it has an update available'
                            .format(client.ip, package_name))
                        svc_component_info['packages'][
                            package_name] = pkg_component_info[package_name]

                if component == PackageFactory.COMP_ALBA:
                    for alba_node in AlbaNodeList.get_albanodes():
                        try:
                            alba_node.client.get_metadata()
                        except:
                            svc_component_info['prerequisites'].append(
                                ['alba_node_unresponsive', alba_node.ip])
                            cls._logger.debug(
                                'StorageRouter {0}: Added unresponsive ALBA Node {1} to prerequisites'
                                .format(client.ip, alba_node.ip))

                # Verify whether migration (DAL and extension) code needs to be executed (only if no packages have an update available so far)
                elif component == PackageFactory.COMP_FWK and PackageFactory.PKG_OVS_BACKEND not in svc_component_info[
                        'packages']:
                    cls._logger.debug(
                        'StorageRouter {0}: No updates detected, checking for required migrations'
                        .format(client.ip))
                    # Extension migration check
                    key = '/ovs/framework/hosts/{0}/versions'.format(
                        System.get_my_machine_id(client=client))
                    old_version = Configuration.get(key, default={}).get(
                        PackageFactory.COMP_MIGRATION_ALBA)
                    installed_version = str(
                        cls._package_manager.get_installed_versions(
                            client=client,
                            package_names=[PackageFactory.PKG_OVS_BACKEND
                                           ])[PackageFactory.PKG_OVS_BACKEND])
                    migrations_detected = False
                    if old_version is not None:
                        cls._logger.debug(
                            'StorageRouter {0}: Current running version for {1} extension migrations: {2}'
                            .format(client.ip, PackageFactory.COMP_ALBA,
                                    old_version))
                        with remote(client.ip, [ExtensionMigrator]) as rem:
                            cls._logger.debug(
                                'StorageRouter {0}: Available version for {1} extension migrations: {2}'
                                .format(client.ip, PackageFactory.COMP_ALBA,
                                        rem.ExtensionMigrator.THIS_VERSION))
                            if rem.ExtensionMigrator.THIS_VERSION > old_version:
                                migrations_detected = True
                                svc_component_info['packages'][
                                    PackageFactory.PKG_OVS_BACKEND] = {
                                        'installed': 'migrations',
                                        'candidate': installed_version
                                    }

                    # DAL migration check
                    if migrations_detected is False:
                        persistent_client = PersistentFactory.get_client()
                        old_version = persistent_client.get(
                            'ovs_model_version').get(
                                PackageFactory.COMP_MIGRATION_ALBA
                            ) if persistent_client.exists(
                                'ovs_model_version') else None
                        if old_version is not None:
                            cls._logger.debug(
                                'StorageRouter {0}: Current running version for {1} DAL migrations: {2}'
                                .format(client.ip, PackageFactory.COMP_ALBA,
                                        old_version))
                            with remote(client.ip, [DALMigrator]) as rem:
                                cls._logger.debug(
                                    'StorageRouter {0}: Available version for {1} DAL migrations: {2}'
                                    .format(client.ip,
                                            PackageFactory.COMP_ALBA,
                                            rem.DALMigrator.THIS_VERSION))
                                if rem.DALMigrator.THIS_VERSION > old_version:
                                    svc_component_info['packages'][
                                        PackageFactory.PKG_OVS_BACKEND] = {
                                            'installed': 'migrations',
                                            'candidate': installed_version
                                        }

            cls._logger.info(
                'StorageRouter {0}: Refreshed ALBA update information'.format(
                    client.ip))
        except Exception as ex:
            cls._logger.exception(
                'StorageRouter {0}: Refreshing ALBA update information failed'.
                format(client.ip))
            if 'errors' not in update_info[client.ip]:
                update_info[client.ip]['errors'] = []
            update_info[client.ip]['errors'].append(ex)
    def get_update_information_alba_plugin(information):
        """
        Called when the 'Update' button in the GUI is pressed
        This call collects additional information about the packages which can be updated
        Eg:
            * Downtime for Arakoons
            * Downtime for StorageDrivers
            * Prerequisites that haven't been met
            * Services which will be stopped during update
            * Services which will be restarted after update
        """
        # Verify arakoon info
        arakoon_ovs_info = {'down': False,
                            'name': None,
                            'internal': False}
        arakoon_cacc_info = {'down': False,
                             'name': None,
                             'internal': False}
        for cluster in ['cacc', 'ovsdb']:
            cluster_name = ArakoonClusterConfig.get_cluster_name(cluster)
            if cluster_name is None:
                continue

            if cluster == 'cacc':
                arakoon_metadata = ArakoonInstaller.get_arakoon_metadata_by_cluster_name(cluster_name=cluster_name, filesystem=True, ip=System.get_my_storagerouter().ip)
            else:
                arakoon_metadata = ArakoonInstaller.get_arakoon_metadata_by_cluster_name(cluster_name=cluster_name)

            if arakoon_metadata['internal'] is True:
                config = ArakoonClusterConfig(cluster_id=cluster_name, filesystem=(cluster == 'cacc'))
                config.load_config(System.get_my_storagerouter().ip if cluster == 'cacc' else None)
                if cluster == 'ovsdb':
                    arakoon_ovs_info['down'] = len(config.nodes) < 3
                    arakoon_ovs_info['name'] = arakoon_metadata['cluster_name']
                    arakoon_ovs_info['internal'] = True
                else:
                    arakoon_cacc_info['name'] = arakoon_metadata['cluster_name']
                    arakoon_cacc_info['internal'] = True

        # Verify StorageRouter downtime
        fwk_prerequisites = []
        all_storagerouters = StorageRouterList.get_storagerouters()
        for storagerouter in all_storagerouters:
            try:
                SSHClient(endpoint=storagerouter, username='******')
            except UnableToConnectException:
                fwk_prerequisites.append(['node_down', storagerouter.name])

        # Verify ALBA node responsiveness
        alba_prerequisites = []
        for alba_node in AlbaNodeList.get_albanodes():
            try:
                alba_node.client.get_metadata()
            except Exception:
                alba_prerequisites.append(['alba_node_unresponsive', alba_node.ip])

        for key in ['framework', 'alba']:
            if key not in information:
                information[key] = {'packages': {},
                                    'downtime': [],
                                    'prerequisites': fwk_prerequisites if key == 'framework' else alba_prerequisites,
                                    'services_stop_start': set(),
                                    'services_post_update': set()}

            for storagerouter in StorageRouterList.get_storagerouters():
                if key not in storagerouter.package_information:
                    continue

                # Retrieve Arakoon issues
                arakoon_downtime = []
                arakoon_services = []
                for service in storagerouter.services:
                    if service.type.name not in [ServiceType.SERVICE_TYPES.ALBA_MGR, ServiceType.SERVICE_TYPES.NS_MGR]:
                        continue

                    if service.type.name == ServiceType.SERVICE_TYPES.ALBA_MGR:
                        cluster_name = AlbaController.get_abm_cluster_name(alba_backend=service.abm_service.alba_backend)
                    else:
                        cluster_name = AlbaController.get_nsm_cluster_name(alba_backend=service.nsm_service.alba_backend, number=service.nsm_service.number)
                    if Configuration.exists('/ovs/arakoon/{0}/config'.format(cluster_name), raw=True) is False:
                        continue
                    arakoon_metadata = ArakoonInstaller.get_arakoon_metadata_by_cluster_name(cluster_name=cluster_name)
                    if arakoon_metadata['internal'] is True:
                        arakoon_services.append('ovs-{0}'.format(service.name))
                        config = ArakoonClusterConfig(cluster_id=cluster_name, filesystem=False)
                        config.load_config()
                        if len(config.nodes) < 3:
                            if service.type.name == ServiceType.SERVICE_TYPES.NS_MGR:
                                arakoon_downtime.append(['backend', service.nsm_service.alba_backend.name])
                            else:
                                arakoon_downtime.append(['backend', service.abm_service.alba_backend.name])

                for package_name, package_info in storagerouter.package_information[key].iteritems():
                    if package_name not in AlbaUpdateController.alba_plugin_packages:
                        continue  # Only gather information for the core packages

                    information[key]['services_post_update'].update(package_info.pop('services_to_restart'))
                    if package_name not in information[key]['packages']:
                        information[key]['packages'][package_name] = {}
                    information[key]['packages'][package_name].update(package_info)

                    if package_name == 'openvstorage-backend':
                        if ['gui', None] not in information[key]['downtime']:
                            information[key]['downtime'].append(['gui', None])
                        if ['api', None] not in information[key]['downtime']:
                            information[key]['downtime'].append(['api', None])
                        information[key]['services_stop_start'].update({'watcher-framework', 'memcached'})
                    elif package_name == 'alba':
                        for down in arakoon_downtime:
                            if down not in information[key]['downtime']:
                                information[key]['downtime'].append(down)
                        information[key]['services_post_update'].update(arakoon_services)
                    elif package_name == 'arakoon':
                        if key == 'framework':
                            framework_arakoons = set()
                            if arakoon_ovs_info['internal'] is True:
                                framework_arakoons.add('ovs-arakoon-{0}'.format(arakoon_ovs_info['name']))
                            if arakoon_cacc_info['internal'] is True:
                                framework_arakoons.add('ovs-arakoon-{0}'.format(arakoon_cacc_info['name']))

                            information[key]['services_post_update'].update(framework_arakoons)
                            if arakoon_ovs_info['down'] is True and ['ovsdb', None] not in information[key]['downtime']:
                                information[key]['downtime'].append(['ovsdb', None])
                        else:
                            for down in arakoon_downtime:
                                if down not in information[key]['downtime']:
                                    information[key]['downtime'].append(down)
                            information[key]['services_post_update'].update(arakoon_services)

            for alba_node in AlbaNodeList.get_albanodes():
                for package_name, package_info in alba_node.package_information.get(key, {}).iteritems():
                    if package_name not in AlbaUpdateController.sdm_packages:
                        continue  # Only gather information for the SDM packages

                    information[key]['services_post_update'].update(package_info.pop('services_to_restart'))
                    if package_name not in information[key]['packages']:
                        information[key]['packages'][package_name] = {}
                    information[key]['packages'][package_name].update(package_info)
        return information
예제 #34
0
 def get_alba_nodes():
     """
     Retrieve all ALBA nodes
     :return: Data-object list of ALBA nodes
     """
     return AlbaNodeList.get_albanodes()
    def migrate(previous_version):
        """
        Migrates from a given version to the current version. It uses 'previous_version' to be smart
        wherever possible, but the code should be able to migrate any version towards the expected version.
        When this is not possible, the code can set a minimum version and raise when it is not met.
        :param previous_version: The previous version from which to start the migration
        :type previous_version: float
        """

        working_version = previous_version

        if working_version == 0:
            # Initial version:
            # * Add any basic configuration or model entries

            # Add backends
            for backend_type_info in [('ALBA', 'alba')]:
                code = backend_type_info[1]
                backend_type = BackendTypeList.get_backend_type_by_code(code)
                if backend_type is None:
                    backend_type = BackendType()
                backend_type.name = backend_type_info[0]
                backend_type.code = code
                backend_type.save()

            # Add service types
            for service_type_info in [ServiceType.SERVICE_TYPES.NS_MGR, ServiceType.SERVICE_TYPES.ALBA_MGR]:
                service_type = ServiceType()
                service_type.name = service_type_info
                service_type.save()

        # From here on, all actual migration should happen to get to the expected state for THIS RELEASE
        elif working_version < ALBAMigrator.THIS_VERSION:

            # Migrate unique constraints
            import hashlib
            from ovs.dal.helpers import HybridRunner, Descriptor
            from ovs.extensions.storage.persistentfactory import PersistentFactory
            client = PersistentFactory.get_client()
            hybrid_structure = HybridRunner.get_hybrids()
            for class_descriptor in hybrid_structure.values():
                cls = Descriptor().load(class_descriptor).get_object()
                classname = cls.__name__.lower()
                unique_key = 'ovs_unique_{0}_{{0}}_'.format(classname)
                uniques = []
                # noinspection PyProtectedMember
                for prop in cls._properties:
                    if prop.unique is True and len([k for k in client.prefix(unique_key.format(prop.name))]) == 0:
                        uniques.append(prop.name)
                if len(uniques) > 0:
                    prefix = 'ovs_data_{0}_'.format(classname)
                    for key in client.prefix(prefix):
                        data = client.get(key)
                        for property_name in uniques:
                            ukey = '{0}{1}'.format(unique_key.format(property_name), hashlib.sha1(str(data[property_name])).hexdigest())
                            client.set(ukey, key)

            # Changes on AlbaNodes & AlbaDisks
            from ovs.dal.lists.albanodelist import AlbaNodeList
            storagerouter_guids = []
            for alba_node in AlbaNodeList.get_albanodes():
                # StorageRouter - AlbaNode 1-to-many relation changes to 1-to-1
                if alba_node.storagerouter_guid is not None:
                    if alba_node.storagerouter_guid in storagerouter_guids:
                        alba_node.storagerouter = None
                        alba_node.save()
                    else:
                        storagerouter_guids.append(alba_node.storagerouter_guid)
                # Complete rework of the way we detect devices to assign roles or use as ASD
                # Allow loop-, raid-, nvme-, ??-devices and logical volumes as ASD
                # More info: https://github.com/openvstorage/framework/issues/792
                for alba_disk in alba_node.disks:
                    if alba_disk.aliases is not None:
                        continue
                    if 'name' in alba_disk._data:
                        alba_disk.aliases = ['/dev/disk/by-id/{0}'.format(alba_disk._data['name'])]
                        alba_disk.save()

        return ALBAMigrator.THIS_VERSION
예제 #36
0
    def _package_install_plugin_alba(cls, components=None):
        """
        Update the packages related to the ASD manager
        :param components: Components which have been selected for update
        :type components: list
        :return: Boolean indicating whether to continue with the update or not
        :rtype: bool
        """
        cls._logger.info('Updating packages for ALBA plugin')
        if components is None:
            components = [PackageFactory.COMP_ALBA]

        abort = False
        alba_nodes = sorted(
            AlbaNodeList.get_albanodes_by_type(AlbaNode.NODE_TYPES.ASD),
            key=lambda an: ExtensionsToolbox.advanced_sort(element=an.ip,
                                                           separator='.'))
        for alba_node in alba_nodes:
            cls._logger.debug('ALBA Node {0}: Verifying packages'.format(
                alba_node.ip))
            for component in components:
                packages = alba_node.package_information.get(
                    component, {}).get('packages', {})
                package_names = sorted(packages)
                # Always install the extensions package first
                if PackageFactory.PKG_OVS_EXTENSIONS in package_names:
                    package_names.remove(PackageFactory.PKG_OVS_EXTENSIONS)
                    package_names.insert(0, PackageFactory.PKG_OVS_EXTENSIONS)

                if len(package_names) > 0:
                    cls._logger.debug(
                        'ALBA Node {0}: Packages for component {1}: {2}'.
                        format(alba_node.ip, component, package_names))
                for package_name in package_names:
                    try:
                        installed = packages[package_name]['installed']
                        candidate = packages[package_name]['candidate']

                        if candidate == alba_node.client.update_installed_version_package(
                                package_name=package_name):
                            # Package has already been installed by another hook
                            continue

                        cls._logger.debug(
                            'ALBA Node {0}: Updating package {1} ({2} --> {3})'
                            .format(alba_node.ip, package_name, installed,
                                    candidate))
                        alba_node.client.execute_update(package_name)
                        cls._logger.debug(
                            'ALBA Node {0}: Updated package {1}'.format(
                                alba_node.ip, package_name))
                    except requests.ConnectionError as ce:
                        if 'Connection aborted.' not in ce.message:  # This error is thrown due the post-update code of the SDM package which restarts the asd-manager service
                            cls._logger.exception(
                                'ALBA Node {0}: Failed to update package {1}'.
                                format(alba_node.ip, package_name))
                            abort = True
                    except Exception:
                        cls._logger.exception(
                            'ALBA Node {0}: Failed to update package {1}'.
                            format(alba_node.ip, package_name))
                        abort = True

        if abort is False:
            cls._logger.info('Updated packages for ALBA plugin')
        return abort
    def get_backend_stats():
        """
        Send backend stats for each backend to InfluxDB
        """
        points = []
        abms = []
        abs = []

        for service in ServiceList.get_services():
            if service.type.name == ServiceType.SERVICE_TYPES.ALBA_MGR:
                abms.append(service.name)

        for ab in AlbaNodeList.get_albanodes():
            abs.append(ab.node_id)

        abms = list(set(abms))

        config = "etcd://127.0.0.1:2379/ovs/arakoon/{}/config".format(abms[0])
        try:
            decommissioning_osds = AlbaCLI.run('list-decommissioning-osds', config=config, to_json=True)
        except Exception as ex:
            StatsmonkeyScheduledTaskController._logger.error('{0}'.format(ex.message))
            return None

        filtered_osds = []

        for ab in abs:
            filtered_osds += [osd for osd in decommissioning_osds if osd['node_id'] == ab]

        abl = AlbaBackendList.get_albabackends()

        for ab in abl:
            try:
                stat = {
                    'measurement': 'backend_stats',
                    'tags': {
                        'backend_name': ab.name
                    },
                    'fields': {
                        'gets': ab.statistics['multi_get']['n'],
                        'puts': ab.statistics['apply']['n']
                    }
                }
                stat_asd = {
                    'decommissioning': len(filtered_osds),
                    'decommissioned': 0,
                    'claimed': 0,
                    'warning': 0,
                    'failure': 0,
                    'error': 0
                }

                for disks in ab.local_stack.values():
                    for disk in disks.values():
                        for asd in disk['asds'].values():
                            if asd['alba_backend_guid'] == ab.guid:
                                status = asd['status']
                                status_detail = asd['status_detail']
                                if status_detail == 'decommissioned':
                                    status = status_detail
                                if status not in stat_asd:
                                    stat_asd[status] = 0
                                stat_asd[status] += 1

                for status in stat_asd:
                    stat['fields'][status] = stat_asd[status]
                points.append(stat)
            except Exception as ex:
                StatsmonkeyScheduledTaskController._logger.error(ex.message)

        if len(points) == 0:
            StatsmonkeyScheduledTaskController._logger.info("No statistics found")
            return None

        StatsmonkeyScheduledTaskController._send_stats(points)
        return points
예제 #38
0
    def _post_update_alba_plugin_alba(cls, components):
        """
        Execute some functionality after the ALBA plugin packages have been updated for the ASD manager nodes
        :param components: Update components which have been executed
        :type components: list
        :return: None
        :rtype: NoneType
        """
        if PackageFactory.COMP_ALBA not in components:
            return

        # First run post-update migrations to update services, config mgmt, ... and restart services afterwards
        for method_name in ['migrate', 'migrate_sdm']:
            try:
                # noinspection PyUnresolvedReferences
                from ovs.lib.albamigration import AlbaMigrationController
                cls._logger.debug(
                    'Executing migration code: AlbaMigrationController.{0}()'.
                    format(method_name))
                getattr(AlbaMigrationController, method_name)()
            except ImportError:
                cls._logger.error('Could not import AlbaMigrationController')
            except Exception:
                cls._logger.exception(
                    'Migration code for the ALBA plugin failed to be executed')

        # Update ALBA nodes
        method_name = inspect.currentframe().f_code.co_name
        cls._logger.info('Executing hook {0}'.format(method_name))
        alba_nodes = sorted(
            AlbaNodeList.get_albanodes_by_type(AlbaNode.NODE_TYPES.ASD),
            key=lambda an: ExtensionsToolbox.advanced_sort(element=an.ip,
                                                           separator='.'))
        for alba_node in alba_nodes:
            services_to_restart = []
            for component in components:
                if component not in alba_node.package_information:
                    continue

                component_info = alba_node.package_information[component]
                if 'services_post_update' not in component_info:
                    # Package_information still has the old format, so refresh update information
                    # This can occur when updating from earlier than 2.11.0 to 2.11.0 and older
                    try:
                        GenericController.refresh_package_information()
                    except:
                        cls._logger.exception(
                            '{0}: Refreshing package information failed'.
                            format(alba_node.ip))
                    alba_node.discard()
                    component_info = alba_node.package_information.get(
                        component, {})

                services_post_update = dict(
                    (int(key), value) for key, value in component_info.get(
                        'services_post_update', {}).iteritems())
                for restart_order in sorted(services_post_update):
                    for service_name in sorted(
                            services_post_update[restart_order]):
                        if service_name not in services_to_restart:
                            services_to_restart.append(service_name)

            if len(services_to_restart) > 0:
                alba_node.client.restart_services(
                    service_names=services_to_restart)

        # Renew maintenance services
        cls._logger.info('Checkup maintenance agents')
        AlbaController.checkup_maintenance_agents.delay()

        cls._logger.info('Executed hook {0}'.format(method_name))
예제 #39
0
    def _all_disks(self):
        """
        Returns a live list of all disks known to this AlbaBackend
        """
        from ovs.dal.lists.albanodelist import AlbaNodeList
        from ovs.dal.lists.albabackendlist import AlbaBackendList

        alba_backend_map = {}
        for a_backend in AlbaBackendList.get_albabackends():
            alba_backend_map[a_backend.alba_id] = a_backend
        node_disk_map = {}
        alba_nodes = AlbaNodeList.get_albanodes()
        for node in alba_nodes:
            node_disk_map[node.node_id] = []

        # Load OSDs
        config = 'etcd://127.0.0.1:2379/ovs/arakoon/{0}-abm/config'.format(self.backend.name)
        for found_osd in AlbaCLI.run('list-all-osds', config=config, as_json=True):
            node_id = found_osd['node_id']
            if node_id in node_disk_map:
                node_disk_map[node_id].append({'osd': found_osd})

        # Load all_disk information
        def _load_disks(_node, _list):
            for _disk in _node.all_disks:
                found = False
                for container in _list:
                    if 'osd' in container and container['osd']['long_id'] == _disk.get('asd_id'):
                        container['disk'] = _disk
                        found = True
                        break
                if found is False:
                    _list.append({'disk': _disk})
        threads = []
        for node in alba_nodes:
            thread = Thread(target=_load_disks, args=(node, node_disk_map[node.node_id]))
            thread.start()
            threads.append(thread)
        for thread in threads:
            thread.join()

        # Make mapping between node IDs and the relevant OSDs and disks
        def _process_disk(_info, _disks, _node):
            disk = _info.get('disk')
            if disk is None:
                return
            disk_status = 'uninitialized'
            disk_status_detail = ''
            disk_alba_backend_guid = ''
            if disk['available'] is False:
                osd = _info.get('osd')
                disk_alba_state = disk['state']['state']
                if disk_alba_state == 'ok':
                    if osd is None:
                        disk_status = 'initialized'
                    elif osd['id'] is None:
                        alba_id = osd['alba_id']
                        if alba_id is None:
                            disk_status = 'available'
                        else:
                            disk_status = 'unavailable'
                            alba_backend = alba_backend_map.get(alba_id)
                            if alba_backend is not None:
                                disk_alba_backend_guid = alba_backend.guid
                    else:
                        disk_status = 'error'
                        disk_status_detail = 'communicationerror'
                        disk_alba_backend_guid = self.guid

                        for asd in _node.asds:
                            if asd.asd_id == disk['asd_id'] and asd.statistics != {}:
                                disk_status = 'warning'
                                disk_status_detail = 'recenterrors'

                                read = osd['read'] or [0]
                                write = osd['write'] or [0]
                                errors = osd['errors']
                                global_interval_key = '/ovs/alba/backends/global_gui_error_interval'
                                backend_interval_key = '/ovs/alba/backends/{0}/gui_error_interval'.format(self.guid)
                                interval = EtcdConfiguration.get(global_interval_key)
                                if EtcdConfiguration.exists(backend_interval_key):
                                    interval = EtcdConfiguration.get(backend_interval_key)
                                if len(errors) == 0 or (len(read + write) > 0 and max(min(read), min(write)) > max(error[0] for error in errors) + interval):
                                    disk_status = 'claimed'
                                    disk_status_detail = ''
                elif disk_alba_state == 'decommissioned':
                    disk_status = 'unavailable'
                    disk_status_detail = 'decommissioned'
                else:
                    disk_status = 'error'
                    disk_status_detail = disk['state']['detail']
                    alba_backend = alba_backend_map.get(osd.get('alba_id'))
                    if alba_backend is not None:
                        disk_alba_backend_guid = alba_backend.guid
            disk['status'] = disk_status
            disk['status_detail'] = disk_status_detail
            disk['alba_backend_guid'] = disk_alba_backend_guid
            _disks.append(disk)

        def _worker(_queue, _disks):
            while True:
                try:
                    item = _queue.get(False)
                    _process_disk(item['info'], _disks, item['node'])
                except Empty:
                    return

        queue = Queue()
        for node in alba_nodes:
            for info in node_disk_map[node.node_id]:
                queue.put({'info': info,
                           'node': node})
        disks = []
        threads = []
        for i in range(5):
            thread = Thread(target=_worker, args=(queue, disks))
            thread.start()
            threads.append(thread)
        for thread in threads:
            thread.join()
        return disks
    def _local_stack(self):
        """
        Returns a live list of all disks known to this AlbaBackend
        """
        from ovs.dal.lists.albanodelist import AlbaNodeList
        from ovs.dal.lists.albabackendlist import AlbaBackendList

        if len(self.abm_services) == 0:
            return {}  # No ABM services yet, so backend not fully installed yet

        alba_backend_map = {}
        for alba_backend in AlbaBackendList.get_albabackends():
            alba_backend_map[alba_backend.alba_id] = alba_backend

        # Load information based on the model
        asd_map = {}
        storage_map = {}
        alba_nodes = AlbaNodeList.get_albanodes()
        for node in alba_nodes:
            node_id = node.node_id
            storage_map[node_id] = {}
            for disk in node.disks:
                disk_id = disk.aliases[0].split('/')[-1]
                storage_map[node_id][disk_id] = {'asds': {},
                                                 'name': disk_id,
                                                 'guid': disk.guid,
                                                 'status': 'error',
                                                 'aliases': disk.aliases,
                                                 'status_detail': 'unknown'}
                for osd in disk.osds:
                    osd_id = osd.osd_id
                    data = {'asd_id': osd_id,
                            'guid': osd.guid,
                            'status': 'error',
                            'status_detail': 'unknown',
                            'alba_backend_guid': osd.alba_backend_guid}
                    asd_map[osd_id] = data
                    storage_map[node_id][disk_id]['asds'][osd_id] = data

        # Load information from node
        def _load_live_info(_node, _node_data):
            _data = _node.storage_stack
            if _data['status'] != 'ok':
                for disk_entry in _node_data.values():
                    disk_entry['status_detail'] = _data['status']
                    for entry in disk_entry.get('asds', {}).values():
                        entry['status_detail'] = _data['status']
            else:
                for _disk_id, disk_asd_info in _data['stack'].iteritems():
                    if _disk_id not in _node_data:
                        _node_data[_disk_id] = {'asds': {}}
                    entry = _node_data[_disk_id]
                    disk_info = copy.deepcopy(disk_asd_info)
                    del disk_info['asds']
                    entry.update(disk_info)
                    asds_info = disk_asd_info['asds']
                    for _asd_id, asd_info in asds_info.iteritems():
                        if _asd_id not in _node_data[_disk_id]['asds']:
                            _node_data[_disk_id]['asds'][_asd_id] = asd_info
                        else:
                            _node_data[_disk_id]['asds'][_asd_id].update(asd_info)

        threads = []
        for node in alba_nodes:
            thread = Thread(target=_load_live_info, args=(node, storage_map[node.node_id]))
            thread.start()
            threads.append(thread)
        for thread in threads:
            thread.join()

        # Mix in usage information
        for asd_id, stats in self.asd_statistics.iteritems():
            if asd_id in asd_map:
                asd_map[asd_id]['usage'] = {'size': int(stats['capacity']),
                                            'used': int(stats['disk_usage']),
                                            'available': int(stats['capacity'] - stats['disk_usage'])}

        # Load information from alba
        backend_interval_key = '/ovs/alba/backends/{0}/gui_error_interval'.format(self.guid)
        if Configuration.exists(backend_interval_key):
            interval = Configuration.get(backend_interval_key)
        else:
            interval = Configuration.get('/ovs/alba/backends/global_gui_error_interval')
        config = Configuration.get_configuration_path('/ovs/arakoon/{0}-abm/config'.format(self.name))
        asds = {}
        for found_osd in AlbaCLI.run(command='list-all-osds', config=config):
            asds[found_osd['long_id']] = found_osd
        for node_data in storage_map.values():
            for _disk in node_data.values():
                for asd_id, asd_data in _disk['asds'].iteritems():
                    if asd_id not in asds:
                        continue
                    found_osd = asds[asd_id]
                    if 'state' not in asd_data:
                        continue
                    if found_osd.get('decommissioned') is True:
                        asd_data['status'] = 'unavailable'
                        asd_data['status_detail'] = 'decommissioned'
                        continue
                    state = asd_data['state']
                    if state == 'ok':
                        if found_osd['id'] is None:
                            alba_id = found_osd['alba_id']
                            if alba_id is None:
                                asd_data['status'] = 'available'
                            else:
                                asd_data['status'] = 'unavailable'
                                alba_backend = alba_backend_map.get(alba_id)
                                if alba_backend is not None:
                                    asd_data['alba_backend_guid'] = alba_backend.guid
                        else:
                            asd_data['alba_backend_guid'] = self.guid
                            asd_data['status'] = 'warning'
                            asd_data['status_detail'] = 'recenterrors'

                            read = found_osd['read'] or [0]
                            write = found_osd['write'] or [0]
                            errors = found_osd['errors']
                            if len(errors) == 0 or (len(read + write) > 0 and max(min(read), min(write)) > max(error[0] for error in errors) + interval):
                                asd_data['status'] = 'claimed'
                                asd_data['status_detail'] = ''
                    else:
                        asd_data['status'] = 'error'
                        asd_data['status_detail'] = asd_data.get('state_detail', '')
                        alba_backend = alba_backend_map.get(found_osd.get('alba_id'))
                        if alba_backend is not None:
                            asd_data['alba_backend_guid'] = alba_backend.guid
        return storage_map
 def merge_package_information_alba_plugin():
     """
     Retrieve the package information for the ALBA plugin, so the core code can merge it all together
     :return: Package information for ALBA nodes
     """
     return dict((node.ip, node.package_information) for node in AlbaNodeList.get_albanodes())
    def _live_status(self):
        """
        Retrieve the live status of the ALBA Backend to be displayed in the 'Backends' page in the GUI based on:
            - Maintenance agents presence
            - Maintenance agents status
            - Disk statuses
        :return: Status as reported by the plugin
        :rtype: str
        """
        if self.backend.status == Backend.STATUSES.INSTALLING:
            return 'installing'

        if self.backend.status == Backend.STATUSES.DELETING:
            return 'deleting'

        # Verify failed disks
        devices = self.local_summary['devices']
        if devices['red'] > 0:
            self._logger.warning(
                'AlbaBackend {0} STATUS set to FAILURE due to {1} failed disks'
                .format(self.name, devices['red']))
            return AlbaBackend.STATUSES.FAILURE

        # Verify remote OSDs
        remote_errors = False
        linked_backend_warning = False
        for remote_info in self.remote_stack.itervalues():
            if remote_info['error'] == 'unknown' or remote_info[
                    'live_status'] == AlbaBackend.STATUSES.FAILURE:
                message = None
                if remote_info['error'] == 'unknown':
                    message = 'unknown remote error info'
                elif remote_info[
                        'live_status'] == AlbaBackend.STATUSES.FAILURE:
                    message = 'FAILURE in live_status'
                self._logger.warning(
                    'AlbaBackend {0} STATUS set to FAILURE due to OSD {1}: {2} '
                    .format(self.name, remote_info['name'], message))
                return AlbaBackend.STATUSES.FAILURE
            if remote_info['error'] == 'not_allowed':
                remote_errors = True
            if remote_info['live_status'] == AlbaBackend.STATUSES.WARNING:
                linked_backend_warning = True

        # Retrieve ASD and maintenance service information
        def _get_node_information(_node):
            if _node not in nodes_used_by_this_backend:
                for slot_info in _node.stack.itervalues():
                    for osd_info in slot_info['osds'].itervalues():
                        if osd_info['claimed_by'] == self.guid:
                            nodes_used_by_this_backend.add(_node)
                            break
                    if _node in nodes_used_by_this_backend:
                        break

            try:
                services = _node.maintenance_services
                if self.name in services:
                    for _service_name, _service_status in services[self.name]:
                        services_for_this_backend[_service_name] = _node
                        service_states[_service_name] = _service_status
                        if _node.node_id not in services_per_node:
                            services_per_node[_node.node_id] = 0
                        services_per_node[_node.node_id] += 1
            except Exception:
                pass

        services_for_this_backend = {}
        services_per_node = {}
        service_states = {}
        nodes_used_by_this_backend = set()
        threads = []
        all_nodes = AlbaNodeList.get_albanodes()
        for node in all_nodes:
            thread = Thread(target=_get_node_information, args=(node, ))
            thread.start()
            threads.append(thread)
        for thread in threads:
            thread.join()

        zero_services = False
        if len(services_for_this_backend) == 0:
            if len(all_nodes) > 0:
                AlbaBackend._logger.error(
                    'AlbaBackend {0} STATUS set to FAILURE due to no maintenance services'
                    .format(self.name))
                return AlbaBackend.STATUSES.FAILURE
            zero_services = True

        # Verify maintenance agents status
        for service_name, node in services_for_this_backend.iteritems():
            try:
                service_status = service_states.get(service_name)
                if service_status is None or service_status != 'active':
                    AlbaBackend._logger.error(
                        'AlbaBackend {0} STATUS set to FAILURE due to non-running maintenance service(s): {1}'
                        .format(self.name, service_name))
                    return AlbaBackend.STATUSES.FAILURE
            except Exception:
                pass

        # Verify maintenance agents presence
        layout_key = '/ovs/alba/backends/{0}/maintenance/agents_layout'.format(
            self.guid)
        layout = None
        if Configuration.exists(layout_key):
            layout = Configuration.get(layout_key)
            if not isinstance(layout, list) or not any(
                    node.node_id
                    for node in all_nodes if node.node_id in layout):
                layout = None

        if layout is None:
            config_key = '/ovs/alba/backends/{0}/maintenance/nr_of_agents'.format(
                self.guid)
            expected_services = 3
            if Configuration.exists(config_key):
                expected_services = Configuration.get(config_key)
            expected_services = min(expected_services,
                                    len(nodes_used_by_this_backend)) or 1
            if len(services_for_this_backend) < expected_services:
                AlbaBackend._logger.warning(
                    'Live status for backend {0} is "warning": insufficient maintenance services'
                    .format(self.name))
                return AlbaBackend.STATUSES.WARNING
        else:
            for node_id in layout:
                if node_id not in services_per_node:
                    AlbaBackend._logger.warning(
                        'Live status for backend {0} is "warning": invalid maintenance service layout'
                        .format(self.name))
                    return AlbaBackend.STATUSES.WARNING

        # Verify local and remote OSDs
        if devices['orange'] > 0:
            AlbaBackend._logger.warning(
                'Live status for backend {0} is "warning": one or more OSDs in warning'
                .format(self.name))
            return AlbaBackend.STATUSES.WARNING

        if remote_errors is True or linked_backend_warning is True:
            AlbaBackend._logger.warning(
                'Live status for backend {0} is "warning": errors/warnings on remote stack'
                .format(self.name))
            return AlbaBackend.STATUSES.WARNING
        if zero_services is True:
            AlbaBackend._logger.warning(
                'Live status for backend {0} is "warning": no maintenance services'
                .format(self.name))
            return AlbaBackend.STATUSES.WARNING

        return AlbaBackend.STATUSES.RUNNING
예제 #43
0
    def migrate():
        """
        Executes async migrations. It doesn't matter too much when they are executed, as long as they get eventually
        executed. This code will typically contain:
        * "dangerous" migration code (it needs certain running services)
        * Migration code depending on a cluster-wide state
        * ...
        """
        AlbaMigrationController._logger.info(
            'Preparing out of band migrations...')

        from ovs.dal.hybrids.diskpartition import DiskPartition
        from ovs.dal.lists.albabackendlist import AlbaBackendList
        from ovs.dal.lists.albanodelist import AlbaNodeList
        from ovs.dal.lists.albaosdlist import AlbaOSDList
        from ovs.dal.lists.storagerouterlist import StorageRouterList
        from ovs.extensions.generic.configuration import Configuration
        from ovs.extensions.generic.sshclient import SSHClient, UnableToConnectException
        from ovs.extensions.migration.migration.albamigrator import ExtensionMigrator
        from ovs.extensions.packages.albapackagefactory import PackageFactory
        from ovs.extensions.services.albaservicefactory import ServiceFactory
        from ovs.extensions.plugins.albacli import AlbaCLI, AlbaError
        from ovs.lib.alba import AlbaController
        from ovs.lib.disk import DiskController

        AlbaMigrationController._logger.info('Start out of band migrations...')

        #############################################
        # Introduction of IP:port combination on OSDs
        osd_info_map = {}
        alba_backends = AlbaBackendList.get_albabackends()
        for alba_backend in alba_backends:
            AlbaMigrationController._logger.info(
                'Verifying ALBA Backend {0}'.format(alba_backend.name))
            if alba_backend.abm_cluster is None:
                AlbaMigrationController._logger.warning(
                    'ALBA Backend {0} does not have an ABM cluster registered'.
                    format(alba_backend.name))
                continue

            AlbaMigrationController._logger.debug(
                'Retrieving configuration path for ALBA Backend {0}'.format(
                    alba_backend.name))
            try:
                config = Configuration.get_configuration_path(
                    alba_backend.abm_cluster.config_location)
            except:
                AlbaMigrationController._logger.exception(
                    'Failed to retrieve the configuration path for ALBA Backend {0}'
                    .format(alba_backend.name))
                continue

            AlbaMigrationController._logger.info(
                'Retrieving OSD information for ALBA Backend {0}'.format(
                    alba_backend.name))
            try:
                osd_info = AlbaCLI.run(command='list-all-osds', config=config)
            except (AlbaError, RuntimeError):
                AlbaMigrationController._logger.exception(
                    'Failed to retrieve OSD information for ALBA Backend {0}'.
                    format(alba_backend.name))
                continue

            for osd_info in osd_info:
                if osd_info.get('long_id'):
                    osd_info_map[osd_info['long_id']] = {
                        'ips': osd_info.get('ips', []),
                        'port': osd_info.get('port')
                    }

        for osd in AlbaOSDList.get_albaosds():
            if osd.osd_id not in osd_info_map:
                AlbaMigrationController._logger.warning(
                    'OSD with ID {0} is modelled but could not be found through ALBA'
                    .format(osd.osd_id))
                continue

            ips = osd_info_map[osd.osd_id]['ips']
            port = osd_info_map[osd.osd_id]['port']
            changes = False
            if osd.ips is None:
                changes = True
                osd.ips = ips
            if osd.port is None:
                changes = True
                osd.port = port
            if changes is True:
                AlbaMigrationController._logger.info(
                    'Updating OSD with ID {0} with IPS {1} and port {2}'.
                    format(osd.osd_id, ips, port))
                osd.save()

        ###################################################
        # Read preference for GLOBAL ALBA Backends (1.10.3)  (https://github.com/openvstorage/framework-alba-plugin/issues/452)
        if Configuration.get(key='/ovs/framework/migration|read_preference',
                             default=False) is False:
            try:
                name_backend_map = dict((alba_backend.name, alba_backend)
                                        for alba_backend in alba_backends)
                for alba_node in AlbaNodeList.get_albanodes():
                    AlbaMigrationController._logger.info(
                        'Processing maintenance services running on ALBA Node {0} with ID {1}'
                        .format(alba_node.ip, alba_node.node_id))
                    alba_node.invalidate_dynamics('maintenance_services')
                    for alba_backend_name, services in alba_node.maintenance_services.iteritems(
                    ):
                        if alba_backend_name not in name_backend_map:
                            AlbaMigrationController._logger.error(
                                'ALBA Node {0} has services for an ALBA Backend {1} which is not modelled'
                                .format(alba_node.ip, alba_backend_name))
                            continue

                        alba_backend = name_backend_map[alba_backend_name]
                        AlbaMigrationController._logger.info(
                            'Processing {0} ALBA Backend {1} with GUID {2}'.
                            format(alba_backend.scaling, alba_backend.name,
                                   alba_backend.guid))
                        if alba_backend.scaling == alba_backend.SCALINGS.LOCAL:
                            read_preferences = [alba_node.node_id]
                        else:
                            read_preferences = AlbaController.get_read_preferences_for_global_backend(
                                alba_backend=alba_backend,
                                alba_node_id=alba_node.node_id,
                                read_preferences=[])

                        for service_name, _ in services:
                            AlbaMigrationController._logger.info(
                                'Processing service {0}'.format(service_name))
                            old_config_key = '/ovs/alba/backends/{0}/maintenance/config'.format(
                                alba_backend.guid)
                            new_config_key = '/ovs/alba/backends/{0}/maintenance/{1}/config'.format(
                                alba_backend.guid, service_name)
                            if Configuration.exists(key=old_config_key):
                                new_config = Configuration.get(
                                    key=old_config_key)
                                new_config[
                                    'read_preference'] = read_preferences
                                Configuration.set(key=new_config_key,
                                                  value=new_config)
                for alba_backend in alba_backends:
                    Configuration.delete(
                        key='/ovs/alba/backends/{0}/maintenance/config'.format(
                            alba_backend.guid))
                AlbaController.checkup_maintenance_agents.delay()

                Configuration.set(
                    key='/ovs/framework/migration|read_preference', value=True)
            except Exception:
                AlbaMigrationController._logger.exception(
                    'Updating read preferences for ALBA Backends failed')

        #######################################################
        # Storing actual package name in version files (1.11.0) (https://github.com/openvstorage/framework/issues/1876)
        changed_clients = set()
        storagerouters = StorageRouterList.get_storagerouters()
        if Configuration.get(
                key=
                '/ovs/framework/migration|actual_package_name_in_version_file_alba',
                default=False) is False:
            try:
                service_manager = ServiceFactory.get_manager()
                alba_pkg_name, alba_version_cmd = PackageFactory.get_package_and_version_cmd_for(
                    component=PackageFactory.COMP_ALBA)
                for storagerouter in storagerouters:
                    try:
                        root_client = SSHClient(
                            endpoint=storagerouter.ip, username='******'
                        )  # Use '.ip' instead of StorageRouter object because this code is executed during post-update at which point the heartbeat has not been updated for some time
                    except UnableToConnectException:
                        AlbaMigrationController._logger.exception(
                            'Updating actual package name for version files failed on StorageRouter {0}'
                            .format(storagerouter.ip))
                        continue

                    for file_name in root_client.file_list(
                            directory=ServiceFactory.RUN_FILE_DIR):
                        if not file_name.endswith('.version'):
                            continue
                        file_path = '{0}/{1}'.format(
                            ServiceFactory.RUN_FILE_DIR, file_name)
                        contents = root_client.file_read(filename=file_path)
                        if alba_pkg_name == PackageFactory.PKG_ALBA_EE and '{0}='.format(
                                PackageFactory.PKG_ALBA) in contents:
                            # Rewrite the version file in the RUN_FILE_DIR
                            contents = contents.replace(
                                PackageFactory.PKG_ALBA,
                                PackageFactory.PKG_ALBA_EE)
                            root_client.file_write(filename=file_path,
                                                   contents=contents)

                            # Regenerate the service and update the EXTRA_VERSION_CMD in the configuration management
                            service_name = file_name.split('.')[0]
                            service_config_key = ServiceFactory.SERVICE_CONFIG_KEY.format(
                                storagerouter.machine_id, service_name)
                            if Configuration.exists(key=service_config_key):
                                service_config = Configuration.get(
                                    key=service_config_key)
                                if 'EXTRA_VERSION_CMD' in service_config:
                                    service_config[
                                        'EXTRA_VERSION_CMD'] = '{0}=`{1}`'.format(
                                            alba_pkg_name, alba_version_cmd)
                                    Configuration.set(key=service_config_key,
                                                      value=service_config)
                                    service_manager.regenerate_service(
                                        name='ovs-arakoon',
                                        client=root_client,
                                        target_name='ovs-{0}'.format(
                                            service_name)
                                    )  # Leave out .version
                                    changed_clients.add(root_client)
                Configuration.set(
                    key=
                    '/ovs/framework/migration|actual_package_name_in_version_file_alba',
                    value=True)
            except Exception:
                AlbaMigrationController._logger.exception(
                    'Updating actual package name for version files failed')

        for root_client in changed_clients:
            try:
                root_client.run(['systemctl', 'daemon-reload'])
            except Exception:
                AlbaMigrationController._logger.exception(
                    'Executing command "systemctl daemon-reload" failed')

        ####################################
        # Fix for migration version (1.11.0)
        # Previous code could potentially store a higher version number in the config management than the actual version number
        if Configuration.get(
                key='/ovs/framework/migration|alba_migration_version_fix',
                default=False) is False:
            try:
                for storagerouter in storagerouters:
                    config_key = '/ovs/framework/hosts/{0}/versions'.format(
                        storagerouter.machine_id)
                    if Configuration.exists(key=config_key):
                        versions = Configuration.get(key=config_key)
                        if versions.get(PackageFactory.COMP_MIGRATION_ALBA,
                                        0) > ExtensionMigrator.THIS_VERSION:
                            versions[
                                PackageFactory.
                                COMP_MIGRATION_ALBA] = ExtensionMigrator.THIS_VERSION
                            Configuration.set(key=config_key, value=versions)
                Configuration.set(
                    key='/ovs/framework/migration|alba_migration_version_fix',
                    value=True)
            except Exception:
                AlbaMigrationController._logger.exception(
                    'Updating migration version failed')

        ####################################
        # Enable auto-cleanup
        migration_auto_cleanup_key = '/ovs/framework/migration|alba_auto_cleanup'
        if Configuration.get(key=migration_auto_cleanup_key,
                             default=False) is False:
            try:
                for storagerouter in StorageRouterList.get_storagerouters():
                    storagerouter.invalidate_dynamics(
                        'features')  # New feature was added
                errors = []
                for alba_backend in AlbaBackendList.get_albabackends():
                    try:
                        AlbaController.set_auto_cleanup(alba_backend.guid)
                    except Exception as ex:
                        AlbaMigrationController._logger.exception(
                            'Failed to set the auto-cleanup for ALBA Backend {0}'
                            .format(alba_backend.name))
                        errors.append(ex)
                if len(errors) == 0:
                    Configuration.set(key=migration_auto_cleanup_key,
                                      value=True)
            except Exception:
                AlbaMigrationController._logger.exception(
                    'Updating auto cleanup failed')

        ####################################
        # Change cache eviction
        migration_random_eviction_key = '/ovs/framework/migration|alba_cache_eviction_random'
        if Configuration.get(key=migration_random_eviction_key,
                             default=False) is False:
            try:
                errors = []
                for alba_backend in AlbaBackendList.get_albabackends():
                    try:
                        AlbaController.set_cache_eviction(alba_backend.guid)
                    except Exception as ex:
                        AlbaMigrationController._logger.exception(
                            'Failed to set the auto-cleanup for ALBA Backend {0}'
                            .format(alba_backend.name))
                        errors.append(ex)
                if len(errors) == 0:
                    Configuration.set(key=migration_random_eviction_key,
                                      value=True)
            except Exception:
                AlbaMigrationController._logger.exception(
                    'Updating auto cleanup failed')

        ###################################################
        # Sync all disks and apply the backend role. Backend role was removed with the AD (since 1.10)
        albanode_backend_role_sync_key = '/ovs/framework/migration|albanode_backend_role_sync'
        if not Configuration.get(key=albanode_backend_role_sync_key,
                                 default=False):
            try:
                errors = []
                for alba_node in AlbaNodeList.get_albanodes():
                    try:
                        if not alba_node.storagerouter:
                            continue
                        stack = alba_node.client.get_stack()  # type: dict
                        for slot_id, slot_information in stack.iteritems():
                            osds = slot_information.get('osds',
                                                        {})  # type: dict
                            slot_aliases = slot_information.get(
                                'aliases', [])  # type: list
                            if not osds:  # No osds means no partition was made
                                continue
                            # Sync to add all potential partitions that will need a backend role
                            DiskController.sync_with_reality(
                                storagerouter_guid=alba_node.storagerouter_guid
                            )
                            for disk in alba_node.storagerouter.disks:
                                if set(disk.aliases).intersection(
                                        set(slot_aliases)):
                                    partition = disk.partitions[0]
                                    if DiskPartition.ROLES.BACKEND not in partition.roles:
                                        partition.roles.append(
                                            DiskPartition.ROLES.BACKEND)
                                        partition.save()
                    except Exception as ex:
                        AlbaMigrationController._logger.exception(
                            'Syncing for storagerouter/albanode {0} failed'.
                            format(alba_node.storagerouter.ip))
                        errors.append(ex)
                if not errors:
                    Configuration.set(key=albanode_backend_role_sync_key,
                                      value=True)
            except Exception:
                AlbaMigrationController._logger.exception(
                    'Syncing up the disks for backend roles failed')

        AlbaMigrationController._logger.info('Finished out of band migrations')
예제 #44
0
    def migrate(previous_version):
        """
        Migrates from a given version to the current version. It uses 'previous_version' to be smart
        wherever possible, but the code should be able to migrate any version towards the expected version.
        When this is not possible, the code can set a minimum version and raise when it is not met.
        :param previous_version: The previous version from which to start the migration
        :type previous_version: float
        """

        working_version = previous_version

        if working_version == 0:
            from ovs.dal.hybrids.servicetype import ServiceType
            # Initial version:
            # * Add any basic configuration or model entries

            # Add backends
            for backend_type_info in [('ALBA', 'alba')]:
                code = backend_type_info[1]
                backend_type = BackendTypeList.get_backend_type_by_code(code)
                if backend_type is None:
                    backend_type = BackendType()
                backend_type.name = backend_type_info[0]
                backend_type.code = code
                backend_type.save()

            # Add service types
            for service_type_info in [
                    ServiceType.SERVICE_TYPES.NS_MGR,
                    ServiceType.SERVICE_TYPES.ALBA_MGR,
                    ServiceType.SERVICE_TYPES.ALBA_S3_TRANSACTION
            ]:
                service_type = ServiceType()
                service_type.name = service_type_info
                service_type.save()

        # From here on, all actual migration should happen to get to the expected state for THIS RELEASE
        elif working_version < DALMigrator.THIS_VERSION:
            import hashlib
            from ovs.dal.exceptions import ObjectNotFoundException
            from ovs.dal.helpers import HybridRunner, Descriptor
            from ovs.dal.hybrids.albaabmcluster import ABMCluster
            from ovs.dal.hybrids.albaosd import AlbaOSD
            from ovs.dal.hybrids.albansmcluster import NSMCluster
            from ovs.dal.hybrids.j_abmservice import ABMService
            from ovs.dal.hybrids.j_nsmservice import NSMService
            from ovs.dal.hybrids.service import Service
            from ovs.dal.hybrids.servicetype import ServiceType
            from ovs.dal.lists.albabackendlist import AlbaBackendList
            from ovs.dal.lists.albanodelist import AlbaNodeList
            from ovs.dal.lists.servicetypelist import ServiceTypeList
            from ovs.dal.lists.storagerouterlist import StorageRouterList
            from ovs.extensions.db.arakooninstaller import ArakoonClusterConfig, ArakoonInstaller
            from ovs.extensions.generic.configuration import Configuration, NotFoundException
            from ovs_extensions.generic.toolbox import ExtensionsToolbox
            from ovs.extensions.plugins.albacli import AlbaCLI
            from ovs.extensions.storage.persistentfactory import PersistentFactory

            # Migrate unique constraints & indexes
            client = PersistentFactory.get_client()
            hybrid_structure = HybridRunner.get_hybrids()
            for class_descriptor in hybrid_structure.values():
                cls = Descriptor().load(class_descriptor).get_object()
                classname = cls.__name__.lower()
                unique_key = 'ovs_unique_{0}_{{0}}_'.format(classname)
                index_prefix = 'ovs_index_{0}|{{0}}|'.format(classname)
                index_key = 'ovs_index_{0}|{{0}}|{{1}}'.format(classname)
                uniques = []
                indexes = []
                # noinspection PyProtectedMember
                for prop in cls._properties:
                    if prop.unique is True and len([
                            k for k in client.prefix(
                                unique_key.format(prop.name))
                    ]) == 0:
                        uniques.append(prop.name)
                    if prop.indexed is True and len([
                            k for k in client.prefix(
                                index_prefix.format(prop.name))
                    ]) == 0:
                        indexes.append(prop.name)
                if len(uniques) > 0 or len(indexes) > 0:
                    prefix = 'ovs_data_{0}_'.format(classname)
                    for key, data in client.prefix_entries(prefix):
                        for property_name in uniques:
                            ukey = '{0}{1}'.format(
                                unique_key.format(property_name),
                                hashlib.sha1(str(
                                    data[property_name])).hexdigest())
                            client.set(ukey, key)
                        for property_name in indexes:
                            if property_name not in data:
                                continue  # This is the case when there's a new indexed property added.
                            ikey = index_key.format(
                                property_name,
                                hashlib.sha1(str(
                                    data[property_name])).hexdigest())
                            index = list(
                                client.get_multi([ikey], must_exist=False))[0]
                            transaction = client.begin_transaction()
                            if index is None:
                                client.assert_value(ikey,
                                                    None,
                                                    transaction=transaction)
                                client.set(ikey, [key],
                                           transaction=transaction)
                            elif key not in index:
                                client.assert_value(ikey,
                                                    index[:],
                                                    transaction=transaction)
                                client.set(ikey,
                                           index + [key],
                                           transaction=transaction)
                            client.apply_transaction(transaction)

            #############################################
            # Introduction of ABMCluster and NSMCluster #
            #############################################
            # Verify presence of unchanged ALBA Backends
            alba_backends = AlbaBackendList.get_albabackends()
            changes_required = False
            for alba_backend in alba_backends:
                if alba_backend.abm_cluster is None or len(
                        alba_backend.nsm_clusters) == 0:
                    changes_required = True
                    break

            if changes_required:
                # Retrieve ABM and NSM clusters
                abm_cluster_info = []
                nsm_cluster_info = []
                for cluster_name in Configuration.list('/ovs/arakoon'):
                    try:
                        metadata = ArakoonInstaller.get_arakoon_metadata_by_cluster_name(
                            cluster_name=cluster_name)
                        if metadata[
                                'cluster_type'] == ServiceType.ARAKOON_CLUSTER_TYPES.ABM:
                            abm_cluster_info.append(metadata)
                        elif metadata[
                                'cluster_type'] == ServiceType.ARAKOON_CLUSTER_TYPES.NSM:
                            nsm_cluster_info.append(metadata)
                    except NotFoundException:
                        continue

                # Retrieve NSM Arakoon cluster information
                cluster_arakoon_map = {}
                for cluster_info in abm_cluster_info + nsm_cluster_info:
                    cluster_name = cluster_info['cluster_name']
                    arakoon_config = ArakoonClusterConfig(
                        cluster_id=cluster_name)
                    cluster_arakoon_map[
                        cluster_name] = arakoon_config.export_dict()

                storagerouter_map = dict(
                    (storagerouter.machine_id, storagerouter) for storagerouter
                    in StorageRouterList.get_storagerouters())
                alba_backend_id_map = dict((alba_backend.alba_id, alba_backend)
                                           for alba_backend in alba_backends)
                for cluster_info in abm_cluster_info:
                    internal = cluster_info['internal']
                    cluster_name = cluster_info['cluster_name']
                    config_location = Configuration.get_configuration_path(
                        key=ArakoonClusterConfig.CONFIG_KEY.format(
                            cluster_name))
                    try:
                        alba_id = AlbaCLI.run(command='get-alba-id',
                                              config=config_location,
                                              named_params={'attempts':
                                                            3})['id']
                        nsm_hosts = AlbaCLI.run(command='list-nsm-hosts',
                                                config=config_location,
                                                named_params={'attempts': 3})
                    except RuntimeError:
                        continue

                    alba_backend = alba_backend_id_map.get(alba_id)
                    if alba_backend is None:  # ALBA Backend with ID not found in model
                        continue
                    if alba_backend.abm_cluster is not None and len(
                            alba_backend.nsm_clusters
                    ) > 0:  # Clusters already exist
                        continue

                    # Create ABM Cluster
                    if alba_backend.abm_cluster is None:
                        abm_cluster = ABMCluster()
                        abm_cluster.name = cluster_name
                        abm_cluster.alba_backend = alba_backend
                        abm_cluster.config_location = ArakoonClusterConfig.CONFIG_KEY.format(
                            cluster_name)
                        abm_cluster.save()
                    else:
                        abm_cluster = alba_backend.abm_cluster

                    # Create ABM Services
                    abm_arakoon_config = cluster_arakoon_map[cluster_name]
                    abm_arakoon_config.pop('global')
                    arakoon_nodes = abm_arakoon_config.keys()
                    if internal is False:
                        services_to_create = 1
                    else:
                        if set(arakoon_nodes).difference(
                                set(storagerouter_map.keys())):
                            continue
                        services_to_create = len(arakoon_nodes)
                    for index in range(services_to_create):
                        service = Service()
                        service.name = 'arakoon-{0}-abm'.format(
                            alba_backend.name)
                        service.type = ServiceTypeList.get_by_name(
                            ServiceType.SERVICE_TYPES.ALBA_MGR)
                        if internal is True:
                            arakoon_node_config = abm_arakoon_config[
                                arakoon_nodes[index]]
                            service.ports = [
                                arakoon_node_config['client_port'],
                                arakoon_node_config['messaging_port']
                            ]
                            service.storagerouter = storagerouter_map[
                                arakoon_nodes[index]]
                        else:
                            service.ports = []
                            service.storagerouter = None
                        service.save()

                        abm_service = ABMService()
                        abm_service.service = service
                        abm_service.abm_cluster = abm_cluster
                        abm_service.save()

                    # Create NSM Clusters
                    for cluster_index, nsm_host in enumerate(
                            sorted(nsm_hosts,
                                   key=lambda host: ExtensionsToolbox.
                                   advanced_sort(host['cluster_id'], '_'))):
                        nsm_cluster_name = nsm_host['cluster_id']
                        nsm_arakoon_config = cluster_arakoon_map.get(
                            nsm_cluster_name)
                        if nsm_arakoon_config is None:
                            continue

                        number = cluster_index if internal is False else int(
                            nsm_cluster_name.split('_')[-1])
                        nsm_cluster = NSMCluster()
                        nsm_cluster.name = nsm_cluster_name
                        nsm_cluster.number = number
                        nsm_cluster.alba_backend = alba_backend
                        nsm_cluster.config_location = ArakoonClusterConfig.CONFIG_KEY.format(
                            nsm_cluster_name)
                        nsm_cluster.save()

                        # Create NSM Services
                        nsm_arakoon_config.pop('global')
                        arakoon_nodes = nsm_arakoon_config.keys()
                        if internal is False:
                            services_to_create = 1
                        else:
                            if set(arakoon_nodes).difference(
                                    set(storagerouter_map.keys())):
                                continue
                            services_to_create = len(arakoon_nodes)
                        for service_index in range(services_to_create):
                            service = Service()
                            service.name = 'arakoon-{0}-nsm_{1}'.format(
                                alba_backend.name, number)
                            service.type = ServiceTypeList.get_by_name(
                                ServiceType.SERVICE_TYPES.NS_MGR)
                            if internal is True:
                                arakoon_node_config = nsm_arakoon_config[
                                    arakoon_nodes[service_index]]
                                service.ports = [
                                    arakoon_node_config['client_port'],
                                    arakoon_node_config['messaging_port']
                                ]
                                service.storagerouter = storagerouter_map[
                                    arakoon_nodes[service_index]]
                            else:
                                service.ports = []
                                service.storagerouter = None
                            service.save()

                            nsm_service = NSMService()
                            nsm_service.service = service
                            nsm_service.nsm_cluster = nsm_cluster
                            nsm_service.save()

            # Clean up all junction services no longer linked to an ALBA Backend
            all_nsm_services = [
                service.nsm_service for service in ServiceTypeList.get_by_name(
                    ServiceType.SERVICE_TYPES.NS_MGR).services
                if service.nsm_service.nsm_cluster is None
            ]
            all_abm_services = [
                service.abm_service for service in ServiceTypeList.get_by_name(
                    ServiceType.SERVICE_TYPES.ALBA_MGR).services
                if service.abm_service.abm_cluster is None
            ]
            for abm_service in all_abm_services:
                abm_service.delete()
                abm_service.service.delete()
            for nsm_service in all_nsm_services:
                nsm_service.delete()
                nsm_service.service.delete()

            ################################
            # Introduction of Active Drive #
            ################################
            # Update slot_id and Alba Node relation for all OSDs
            client = PersistentFactory.get_client()
            disk_osd_map = {}
            for key, data in client.prefix_entries('ovs_data_albaosd_'):
                alba_disk_guid = data.get('alba_disk', {}).get('guid')
                if alba_disk_guid is not None:
                    if alba_disk_guid not in disk_osd_map:
                        disk_osd_map[alba_disk_guid] = []
                    disk_osd_map[alba_disk_guid].append(
                        key.replace('ovs_data_albaosd_', ''))
                try:
                    value = client.get(key)
                    value.pop('alba_disk', None)
                    client.set(key=key, value=value)
                except Exception:
                    pass  # We don't care if we would have any leftover AlbaDisk information in _data, but its cleaner not to

            alba_guid_node_map = dict(
                (an.guid, an) for an in AlbaNodeList.get_albanodes())
            for key, data in client.prefix_entries('ovs_data_albadisk_'):
                alba_disk_guid = key.replace('ovs_data_albadisk_', '')
                alba_node_guid = data.get('alba_node', {}).get('guid')
                if alba_disk_guid in disk_osd_map and alba_node_guid in alba_guid_node_map and len(
                        data.get('aliases', [])) > 0:
                    slot_id = data['aliases'][0].split('/')[-1]
                    for osd_guid in disk_osd_map[alba_disk_guid]:
                        try:
                            osd = AlbaOSD(osd_guid)
                        except ObjectNotFoundException:
                            continue
                        osd.slot_id = slot_id
                        osd.alba_node = alba_guid_node_map[alba_node_guid]
                        osd.save()
                client.delete(key=key, must_exist=False)

            # Remove unique constraints for AlbaNode IP
            for key in client.prefix('ovs_unique_albanode_ip_'):
                client.delete(key=key, must_exist=False)

            # Remove relation for all Alba Disks
            for key in client.prefix('ovs_reverseindex_albadisk_'):
                client.delete(key=key, must_exist=False)

            # Remove the relation between AlbaNode and AlbaDisk
            for key in client.prefix('ovs_reverseindex_albanode_'):
                if '|disks|' in key:
                    client.delete(key=key, must_exist=False)

        return DALMigrator.THIS_VERSION
    def _storage_stack(self):
        """
        Returns a live list of all disks known to this AlbaBackend
        """
        from ovs.dal.lists.albanodelist import AlbaNodeList
        from ovs.dal.lists.albabackendlist import AlbaBackendList

        if len(self.abm_services) == 0:
            return {}  # No ABM services yet, so backend not fully installed yet

        storage_map = {}
        asd_map = {}

        alba_backend_map = {}
        for alba_backend in AlbaBackendList.get_albabackends():
            alba_backend_map[alba_backend.alba_id] = alba_backend

        # Load information based on the model
        alba_nodes = AlbaNodeList.get_albanodes()
        for node in alba_nodes:
            node_id = node.node_id
            storage_map[node_id] = {}
            for disk in node.disks:
                disk_id = disk.name
                storage_map[node_id][disk_id] = {'name': disk_id,
                                                 'guid': disk.guid,
                                                 'status': 'error',
                                                 'status_detail': 'unknown',
                                                 'asds': {}}
                for asd in disk.asds:
                    asd_id = asd.asd_id
                    data = {'asd_id': asd_id,
                            'guid': asd.guid,
                            'status': 'error',
                            'status_detail': 'unknown',
                            'alba_backend_guid': asd.alba_backend_guid}
                    asd_map[asd_id] = data
                    storage_map[node_id][disk_id]['asds'][asd_id] = data

        # Load information from node
        def _load_live_info(_node, _node_data):
            # Live disk information
            try:
                disk_data = _node.client.get_disks()
            except (requests.ConnectionError, requests.Timeout):
                for entry in _node_data.values():
                    entry['status_detail'] = 'nodedown'
                disk_data = {}
            for _disk_id, disk_info in disk_data.iteritems():
                if _disk_id in _node_data:
                    entry = _node_data[_disk_id]
                else:
                    entry = {'name': _disk_id,
                             'status': 'unknown',
                             'status_detail': '',
                             'asds': {}}
                    _node_data[_disk_id] = entry
                entry.update(disk_info)
                if disk_info['state'] == 'ok':
                    entry['status'] = 'uninitialized' if disk_info['available'] is True else 'initialized'
                    entry['status_detail'] = ''
                else:
                    entry['status'] = disk_info['state']
                    entry['status_detail'] = disk_info.get('state_detail', '')
            # Live ASD information
            try:
                _asd_data = _node.client.get_asds()
            except (requests.ConnectionError, requests.Timeout):
                for disk_entry in _node_data.values():
                    for entry in disk_entry['asds'].values():
                        entry['status_detail'] = 'nodedown'
                _asd_data = {}
            for _disk_id, asds in _asd_data.iteritems():
                if _disk_id not in _node_data:
                    continue
                for _asd_id, asd_info in asds.iteritems():
                    entry = {'asd_id': _asd_id,
                             'status': 'error' if asd_info['state'] == 'error' else 'initialized',
                             'status_detail': asd_info.get('state_detail', ''),
                             'state': asd_info['state'],
                             'state_detail': asd_info.get('state_detail', '')}
                    if _asd_id not in _node_data[_disk_id]['asds']:
                        _node_data[_disk_id]['asds'][_asd_id] = entry
                        asd_map[_asd_id] = entry
                    else:
                        _node_data[_disk_id]['asds'][_asd_id].update(entry)
        threads = []
        for node in alba_nodes:
            thread = Thread(target=_load_live_info, args=(node, storage_map[node.node_id]))
            thread.start()
            threads.append(thread)
        for thread in threads:
            thread.join()

        # Mix in usage information
        for asd_id, stats in self.asd_statistics.iteritems():
            if asd_id in asd_map:
                asd_map[asd_id]['usage'] = {'size': int(stats['capacity']),
                                            'used': int(stats['disk_usage']),
                                            'available': int(stats['capacity'] - stats['disk_usage'])}

        # Load information from alba
        backend_interval_key = '/ovs/alba/backends/{0}/gui_error_interval'.format(self.guid)
        if EtcdConfiguration.exists(backend_interval_key):
            interval = EtcdConfiguration.get(backend_interval_key)
        else:
            interval = EtcdConfiguration.get('/ovs/alba/backends/global_gui_error_interval')
        config = 'etcd://127.0.0.1:2379/ovs/arakoon/{0}/config'.format(self.abm_services[0].service.name)
        for found_osd in AlbaCLI.run('list-all-osds', config=config, as_json=True):
            node_id = found_osd['node_id']
            asd_id = found_osd['long_id']
            for _disk in storage_map.get(node_id, {}).values():
                asd_data = _disk['asds'].get(asd_id, {})
                if 'state' not in asd_data:
                    continue
                if found_osd.get('decommissioned') is True:
                    asd_data['status'] = 'unavailable'
                    asd_data['status_detail'] = 'decommissioned'
                    continue
                state = asd_data['state']
                if state == 'ok':
                    if found_osd['id'] is None:
                        alba_id = found_osd['alba_id']
                        if alba_id is None:
                            asd_data['status'] = 'available'
                        else:
                            asd_data['status'] = 'unavailable'
                            alba_backend = alba_backend_map.get(alba_id)
                            if alba_backend is not None:
                                asd_data['alba_backend_guid'] = alba_backend.guid
                    else:
                        asd_data['alba_backend_guid'] = self.guid
                        asd_data['status'] = 'warning'
                        asd_data['status_detail'] = 'recenterrors'

                        read = found_osd['read'] or [0]
                        write = found_osd['write'] or [0]
                        errors = found_osd['errors']
                        if len(errors) == 0 or (len(read + write) > 0 and max(min(read), min(write)) > max(error[0] for error in errors) + interval):
                            asd_data['status'] = 'claimed'
                            asd_data['status_detail'] = ''
                else:
                    asd_data['status'] = 'error'
                    asd_data['status_detail'] = asd_data.get('state_detail', '')
                    alba_backend = alba_backend_map.get(found_osd.get('alba_id'))
                    if alba_backend is not None:
                        asd_data['alba_backend_guid'] = alba_backend.guid
        return storage_map