def remove_slot(node_guid, slot_id): """ Removes a disk :param node_guid: Guid of the node to remove a disk from :type node_guid: str :param slot_id: Slot ID :type slot_id: str :return: None :rtype: NoneType """ # Verify client connectivity node = AlbaNode(node_guid) osds = [osd for osd in node.osds if osd.slot_id == slot_id] if len(osds) > 0: raise RuntimeError('A slot with claimed OSDs can\'t be removed') node.client.clear_slot(slot_id) node.invalidate_dynamics() # Sync model if node.storagerouter is not None: stack = node.client.get_stack() # type: dict slot_information = stack.get(slot_id, {}) slot_aliases = slot_information.get('aliases', []) for disk in node.storagerouter.disks: if set(disk.aliases).intersection(set(slot_aliases)): partition = disk.partitions[0] if DiskPartition.ROLES.BACKEND in partition.roles: partition.roles.remove(DiskPartition.ROLES.BACKEND) partition.save() DiskController.sync_with_reality( storagerouter_guid=node.storagerouter_guid)
def remove_disk(node_guid, device_alias): """ Removes a disk :param node_guid: Guid of the node to remove a disk from :type node_guid: str :param device_alias: Alias of the device to remove (eg: /dev/disk/by-path/pci-0000:03:00.0-sas-0x5000c29f4cf04566-lun-0) :type device_alias: str :return: None """ asds = {} node = AlbaNode(node_guid) node_id = node.node_id device_id = device_alias.split('/')[-1] offline_node = False # Verify client connectivity try: _ = node.client.get_disks() except (requests.ConnectionError, requests.Timeout, InvalidCredentialsError): AlbaNodeController._logger.warning('Could not connect to node {0} to validate disks'.format(node.guid)) offline_node = True # Retrieve ASD information for the ALBA Disk for backend in AlbaBackendList.get_albabackends(): local_stack = backend.local_stack if node_id in local_stack and device_id in local_stack[node_id]: asds.update(local_stack[node_id][device_id]['asds']) for asd_info in asds.values(): if (offline_node is False and asd_info.get('status') != 'available') or (offline_node is True and asd_info.get('status_detail') == 'nodedown'): AlbaNodeController._logger.error('Disk {0} has still non-available ASDs on node {1}'.format(device_alias, node.ip)) raise RuntimeError('Disk {0} on ALBA node {1} has still some non-available ASDs'.format(device_alias, node_id)) # Retrieve the Disk from the framework model matching the ALBA Disk disk_to_clear = None for disk in DiskList.get_disks(): if device_alias in disk.aliases: disk_to_clear = disk break # Remove the ALBA Disk making use of the ASD Manager Client if offline_node is False: result = node.client.remove_disk(disk_id=device_id, partition_aliases=disk_to_clear.partitions[0].aliases if len(disk_to_clear.partitions) > 0 else []) if result['_success'] is False: raise RuntimeError('Error removing disk {0}: {1}'.format(device_alias, result['_error'])) # Clean the model for model_disk in node.disks: if device_alias in model_disk.aliases: for osd in model_disk.osds: osd.delete() model_disk.delete() if disk_to_clear is not None: for partition in disk_to_clear.partitions: partition.roles = [] partition.mountpoint = None partition.save() node.invalidate_dynamics() if node.storagerouter is not None: DiskController.sync_with_reality(storagerouter_guid=node.storagerouter_guid)
def reset_osd(node_cluster_guid, node_guid, osd_id, expected_safety): # type: (str, str, str, Dict[str, int]) -> None """ Removes and re-adds an OSD to a Disk :param node_cluster_guid: Guid of the AlbaNodeCluster :type node_cluster_guid: str :param node_guid: Guid of the node to reset an OSD of :type node_guid: str :param osd_id: OSD to reset :type osd_id: str :param expected_safety: Expected safety after having reset the disk :type expected_safety: dict :return: None :rtype: NoneType """ node_cluster = AlbaNodeCluster(node_cluster_guid) active_node = AlbaNode(node_guid) if active_node not in node_cluster.alba_nodes: raise ValueError( 'The requested active AlbaNode is not part of AlbaNodeCluster {0}' .format(node_cluster.guid)) osd = AlbaOSDList.get_by_osd_id(osd_id) fill_slot_extra = active_node.client.build_slot_params(osd) disk_aliases = AlbaNodeClusterController.remove_osd( node_guid=node_guid, osd_id=osd_id, expected_safety=expected_safety) if len(disk_aliases) == 0: return try: active_node.client.fill_slot(osd.slot_id, fill_slot_extra) except (requests.ConnectionError, requests.Timeout): AlbaNodeClusterController._logger.warning( 'Could not connect to node {0} to (re)configure ASD'.format( active_node.guid)) return except NotFoundError: # Can occur when the slot id could not be matched with an existing slot on the alba-asd manager # This error can be anticipated when the status of the osd would be 'missing' in the nodes stack but that would be too much overhead message = 'Could not add a new OSD. The requested slot {0} could not be found'.format( osd.slot_id) AlbaNodeClusterController._logger.warning(message) raise RuntimeError( '{0}. Slot {1} might no longer be present on Alba node {2}'. format(message, osd.slot_id, node_guid)) # Invalidate the stack and sync towards all passive sides active_node.invalidate_dynamics('stack') for node in node_cluster.alba_nodes: if node != active_node: try: node.client.sync_stack(active_node.stack) except: AlbaNodeClusterController._logger.exception( 'Error while syncing stacks to the passive side')
def remove_node(node_guid): """ Removes an ALBA node :param node_guid: Guid of the ALBA node to remove :type node_guid: str :return: None :rtype: NoneType """ node = AlbaNode(node_guid) if node.type == AlbaNode.NODE_TYPES.ASD: for slot_id, slot_info in node.stack.iteritems(): for osd_id, osd_info in slot_info['osds'].iteritems(): if AlbaOSDList.get_by_osd_id(osd_id=osd_id) is not None: AlbaNodeController.remove_osd(node_guid=node.guid, osd_id=osd_id, expected_safety=None) if slot_info['available'] is False: AlbaNodeController.remove_slot(node_guid=node.guid, slot_id=slot_id) name_guid_map = dict( (alba_backend.name, alba_backend.guid) for alba_backend in AlbaBackendList.get_albabackends()) try: # This loop will delete the services AND their configuration from the configuration management node.invalidate_dynamics('maintenance_services') for alba_backend_name, service_info in node.maintenance_services.iteritems( ): for service_name, status in service_info: node.client.remove_maintenance_service( name=service_name, alba_backend_guid=name_guid_map.get( alba_backend_name)) except (requests.ConnectionError, requests.Timeout): AlbaNodeController._logger.exception( 'Could not connect to node {0} to retrieve the maintenance services' .format(node.guid)) except InvalidCredentialsError: AlbaNodeController._logger.warning( 'Failed to retrieve the maintenance services for ALBA node {0}' .format(node.node_id)) node.delete() for alba_backend in AlbaBackendList.get_albabackends(): alba_backend.invalidate_dynamics(['live_status']) alba_backend.backend.invalidate_dynamics(['live_status']) AlbaController.checkup_maintenance_agents.delay()
def remove_disk(node_guid, disk): """ Removes a disk :param node_guid: Guid of the node to remove a disk from :type node_guid: str :param disk: Disk name to remove :type disk: str :return: None """ node = AlbaNode(node_guid) offline_node = False try: if disk not in node.client.get_disks(): raise RuntimeError('Disk {0} not available on node {1}'.format(disk, node.guid)) except (requests.ConnectionError, requests.Timeout): AlbaNodeController._logger.warning('Could not connect to node {0} to validate disks'.format(node.guid)) offline_node = True node_id = node.node_id asds = {} for backend in AlbaBackendList.get_albabackends(): storage_stack = backend.storage_stack if node_id in storage_stack and disk in storage_stack[node_id]: asds.update(storage_stack[node_id][disk]['asds']) for asd_info in asds.values(): if (offline_node is False and asd_info['status'] != 'available') or (offline_node is True and asd_info['status_detail'] == 'nodedown'): AlbaNodeController._logger.error('Disk {0} has still non-available ASDs on node {1}'.format(disk, node.ip)) raise RuntimeError('Disk {0} has still some non-available ASDs'.format(disk)) if offline_node is False: result = node.client.remove_disk(disk) if result['_success'] is False: raise RuntimeError('Error removing disk {0}: {1}'.format(disk, result['_error'])) for model_disk in node.disks: if model_disk.name == disk: for asd in model_disk.asds: asd.delete() model_disk.delete() node.invalidate_dynamics() if node.storagerouter is not None: DiskController.sync_with_reality(node.storagerouter_guid)
def reset_osd(node_guid, osd_id, expected_safety): """ Removes and re-adds an OSD to a Disk :param node_guid: Guid of the node to reset an OSD of :type node_guid: str :param osd_id: OSD to reset :type osd_id: str :param expected_safety: Expected safety after having reset the disk :type expected_safety: dict :return: None :rtype: NoneType """ node = AlbaNode(node_guid) osd = AlbaOSDList.get_by_osd_id(osd_id) fill_slot_extra = node.client.build_slot_params(osd) disk_aliases = AlbaNodeController.remove_osd( node_guid=node_guid, osd_id=osd_id, expected_safety=expected_safety) if len(disk_aliases) == 0: return try: AlbaNodeController._fill_slot(node, osd.slot_id, fill_slot_extra) except (requests.ConnectionError, requests.Timeout): AlbaNodeController._logger.warning( 'Could not connect to node {0} to (re)configure ASD'.format( node.guid)) except NotFoundError: # Can occur when the slot id could not be matched with an existing slot on the alba-asd manager # This error can be anticipated when the status of the osd would be 'missing' in the nodes stack but that would be too much overhead message = 'Could not add a new OSD. The requested slot {0} could not be found'.format( osd.slot_id) AlbaNodeController._logger.warning(message) raise RuntimeError( '{0}. Slot {1} might no longer be present on Alba node {2}'. format(message, osd.slot_id, node_guid)) node.invalidate_dynamics('stack')
def remove_slot(node_cluster_guid, node_guid, slot_id): # type: (str, str, str) -> None """ Removes a slot :param node_cluster_guid: Guid of the node cluster to remove a disk from :type node_cluster_guid: str :param node_guid: Guid of the AlbaNode to act as the 'active' side :type node_guid: basestring :param slot_id: Slot ID :type slot_id: str :return: None :rtype: NoneType """ node_cluster = AlbaNodeCluster(node_cluster_guid) active_node = AlbaNode(node_guid) if active_node not in node_cluster.alba_nodes: raise ValueError( 'The requested active AlbaNode is not part of AlbaNodeCluster {0}' .format(node_cluster.guid)) osds = [osd for osd in active_node.osds if osd.slot_id == slot_id] if len(osds) > 0: raise RuntimeError('A slot with claimed OSDs can\'t be removed') active_node.client.clear_slot(slot_id) active_node.invalidate_dynamics() # Invalidate the stack and sync towards all passive sides for node in node_cluster.alba_nodes: if node != active_node: try: node.client.sync_stack(active_node.stack) except: AlbaNodeClusterController._logger.exception( 'Error while syncing stacks to the passive side') if active_node.storagerouter is not None: DiskController.sync_with_reality( storagerouter_guid=active_node.storagerouter_guid)
def remove_osd(node_guid, osd_id, expected_safety): """ Removes an OSD :param node_guid: Guid of the node to remove an OSD from :type node_guid: str :param osd_id: ID of the OSD to remove :type osd_id: str :param expected_safety: Expected safety after having removed the OSD :type expected_safety: dict or None :return: Aliases of the disk on which the OSD was removed :rtype: list """ # Retrieve corresponding OSD in model node = AlbaNode(node_guid) AlbaNodeController._logger.debug('Removing OSD {0} at node {1}'.format( osd_id, node.ip)) osd = AlbaOSDList.get_by_osd_id(osd_id) alba_backend = osd.alba_backend if expected_safety is None: AlbaNodeController._logger.warning( 'Skipping safety check for OSD {0} on backend {1} - this is dangerous' .format(osd_id, alba_backend.guid)) else: final_safety = AlbaController.calculate_safety( alba_backend_guid=alba_backend.guid, removal_osd_ids=[osd_id]) safety_lost = final_safety['lost'] safety_crit = final_safety['critical'] if (safety_crit != 0 or safety_lost != 0) and ( safety_crit != expected_safety['critical'] or safety_lost != expected_safety['lost']): raise RuntimeError( 'Cannot remove OSD {0} as the current safety is not as expected ({1} vs {2})' .format(osd_id, final_safety, expected_safety)) AlbaNodeController._logger.debug( 'Safety OK for OSD {0} on backend {1}'.format( osd_id, alba_backend.guid)) AlbaNodeController._logger.debug( 'Purging OSD {0} on backend {1}'.format(osd_id, alba_backend.guid)) AlbaController.remove_units(alba_backend_guid=alba_backend.guid, osd_ids=[osd_id]) # Delete the OSD result = node.client.delete_osd(slot_id=osd.slot_id, osd_id=osd_id) if result['_success'] is False: raise RuntimeError('Error removing OSD: {0}'.format( result['_error'])) # Clean configuration management and model - Well, just try it at least if Configuration.exists(ASD_CONFIG.format(osd_id), raw=True): Configuration.delete(ASD_CONFIG_DIR.format(osd_id), raw=True) osd.delete() node.invalidate_dynamics() if alba_backend is not None: alba_backend.invalidate_dynamics() alba_backend.backend.invalidate_dynamics() if node.storagerouter is not None: try: DiskController.sync_with_reality( storagerouter_guid=node.storagerouter_guid) except UnableToConnectException: AlbaNodeController._logger.warning( 'Skipping disk sync since StorageRouter {0} is offline'. format(node.storagerouter.name)) return [osd.slot_id]
def fill_slots(node_guid, osd_information, metadata=None): """ Creates 1 or more new OSDs :param node_guid: Guid of the node to which the disks belong :type node_guid: str :param osd_information: Information about the amount of OSDs to add to each Slot :type osd_information: list :param metadata: Metadata to add to the OSD (connection information for remote Backend, general Backend information) :type metadata: dict :return: None :rtype: NoneType """ metadata_type_validation = { 'integer': (int, None), 'osd_type': (str, AlbaOSD.OSD_TYPES.keys()), 'ip': (str, ExtensionsToolbox.regex_ip), 'port': (int, { 'min': 1, 'max': 65535 }) } node = AlbaNode(node_guid) required_params = {'slot_id': (str, None)} can_be_filled = False for flow in ['fill', 'fill_add']: if node.node_metadata[flow] is False: continue can_be_filled = True if flow == 'fill_add': required_params['alba_backend_guid'] = (str, None) for key, mtype in node.node_metadata['{0}_metadata'.format( flow)].iteritems(): if mtype in metadata_type_validation: required_params[key] = metadata_type_validation[mtype] if can_be_filled is False: raise ValueError('The given node does not support filling slots') validation_reasons = [] for osd_info in osd_information: # type: dict try: ExtensionsToolbox.verify_required_params( required_params=required_params, actual_params=osd_info) except RuntimeError as ex: validation_reasons.append(str(ex)) if len(validation_reasons) > 0: raise ValueError('Missing required parameter:\n *{0}'.format( '\n* '.join(validation_reasons))) for osd_info in osd_information: if node.node_metadata['fill'] is True: # Only filling is required AlbaNodeController._fill_slot( node, osd_info['slot_id'], dict((key, osd_info[key]) for key in node.node_metadata['fill_metadata'])) elif node.node_metadata['fill_add'] is True: # Fill the slot created_osds = AlbaNodeController._fill_slot( node, osd_info['slot_id'], dict((key, osd_info[key]) for key in node.node_metadata['fill_add_metadata'])) # And add/claim the OSD if node.type == AlbaNode.NODE_TYPES.S3: # The S3 manager returns the information about the osd when filling it for created_osd_info in created_osds: osd_info.update( created_osd_info ) # Add additional information about the osd AlbaController.add_osds( alba_backend_guid=osd_info['alba_backend_guid'], osds=[osd_info], alba_node_guid=node_guid, metadata=metadata) else: AlbaController.add_osds( alba_backend_guid=osd_info['alba_backend_guid'], osds=[osd_info], alba_node_guid=node_guid, metadata=metadata) node.invalidate_dynamics('stack')
def fill_slots(node_cluster_guid, node_guid, osd_information, metadata=None): # type: (str, str, List[Dict[str, Any]]) -> None """ Creates 1 or more new OSDs :param node_cluster_guid: Guid of the node cluster to which the disks belong :type node_cluster_guid: basestring :param node_guid: Guid of the AlbaNode to act as the 'active' side :type node_guid: basestring :param osd_information: Information about the amount of OSDs to add to each Slot :type osd_information: list :param metadata: Metadata to add to the OSD (connection information for remote Backend, general Backend information) :type metadata: dict :return: None :rtype: NoneType """ metadata_type_validation = { 'integer': (int, None), 'osd_type': (str, AlbaOSD.OSD_TYPES.keys()), 'ip': (str, ExtensionsToolbox.regex_ip), 'port': (int, { 'min': 1, 'max': 65535 }) } node_cluster = AlbaNodeCluster(node_cluster_guid) # Check for the active side if it's part of the cluster active_node = AlbaNode(node_guid) if active_node not in node_cluster.alba_nodes: raise ValueError( 'The requested active AlbaNode is not part of AlbaNodeCluster {0}' .format(node_cluster.guid)) required_params = {'slot_id': (str, None)} can_be_filled = False for flow in ['fill', 'fill_add']: if node_cluster.cluster_metadata[flow] is False: continue can_be_filled = True if flow == 'fill_add': required_params['alba_backend_guid'] = (str, None) for key, mtype in node_cluster.cluster_metadata[ '{0}_metadata'.format(flow)].iteritems(): if mtype in metadata_type_validation: required_params[key] = metadata_type_validation[mtype] if can_be_filled is False: raise ValueError( 'The given node cluster does not support filling slots') validation_reasons = [] for slot_info in osd_information: try: ExtensionsToolbox.verify_required_params( required_params=required_params, actual_params=slot_info) except RuntimeError as ex: validation_reasons.append(str(ex)) if len(validation_reasons) > 0: raise ValueError('Missing required parameter:\n *{0}'.format( '\n* '.join(validation_reasons))) for slot_info in osd_information: if node_cluster.cluster_metadata['fill'] is True: # Only filling is required active_node.client.fill_slot( slot_id=slot_info['slot_id'], extra=dict((key, slot_info[key]) for key in node_cluster.cluster_metadata['fill_metadata'])) elif node_cluster.cluster_metadata['fill_add'] is True: # Fill the slot active_node.client.fill_slot( slot_id=slot_info['slot_id'], extra=dict( (key, slot_info[key]) for key in node_cluster.cluster_metadata['fill_add_metadata'])) # And add/claim the OSD AlbaController.add_osds( alba_backend_guid=slot_info['alba_backend_guid'], osds=[slot_info], alba_node_guid=node_guid, metadata=metadata) # Invalidate the stack and sync towards all passive sides active_node.invalidate_dynamics('stack') for node in node_cluster.alba_nodes: if node != active_node: try: node.client.sync_stack(active_node.stack) except: AlbaNodeClusterController._logger.exception( 'Error while syncing stacks to the passive side') node_cluster.invalidate_dynamics('stack')