def remove_slot(node_guid, slot_id): """ Removes a disk :param node_guid: Guid of the node to remove a disk from :type node_guid: str :param slot_id: Slot ID :type slot_id: str :return: None :rtype: NoneType """ # Verify client connectivity node = AlbaNode(node_guid) osds = [osd for osd in node.osds if osd.slot_id == slot_id] if len(osds) > 0: raise RuntimeError('A slot with claimed OSDs can\'t be removed') node.client.clear_slot(slot_id) node.invalidate_dynamics() # Sync model if node.storagerouter is not None: stack = node.client.get_stack() # type: dict slot_information = stack.get(slot_id, {}) slot_aliases = slot_information.get('aliases', []) for disk in node.storagerouter.disks: if set(disk.aliases).intersection(set(slot_aliases)): partition = disk.partitions[0] if DiskPartition.ROLES.BACKEND in partition.roles: partition.roles.remove(DiskPartition.ROLES.BACKEND) partition.save() DiskController.sync_with_reality( storagerouter_guid=node.storagerouter_guid)
def model_alba_node(node_id, node_type, ip=None): # type: (str, str, Optional[str]) -> AlbaNode """ Models a non-existing AlbaNode :param node_id: ID of the node :type node_id: str :param node_type: Type of the node :type node_type: str :param ip: IP of the node :type ip: str :return: The modeled node :rtype: AlbaNode """ node = AlbaNode() node.type = node_type node.node_id = node_id config_path = AlbaNode.CONFIG_LOCATIONS[node_type].format( node_id) # type str node.ip = ip or Configuration.get(os.path.join(config_path, 'main|ip')) node.port = Configuration.get(os.path.join(config_path, 'main|port')) node.username = Configuration.get( os.path.join(config_path, 'main|username')) node.password = Configuration.get( os.path.join(config_path, 'main|password')) node.storagerouter = StorageRouterList.get_by_ip(node.ip) return node
def restart_slot(node_guid, slot_id): """ Restarts a slot :param node_guid: Guid of the ALBA Node to restart a slot on :type node_guid: str :param slot_id: ID of the slot (eg. pci-0000:03:00.0-sas-0x5000c29f4cf04566-lun-0) :type slot_id: str :return: None :rtype: NoneType """ node = AlbaNode(node_guid) AlbaNodeController._logger.debug( 'Restarting slot {0} on node {1}'.format(slot_id, node.ip)) try: if slot_id not in node.client.get_stack(): AlbaNodeController._logger.exception( 'Slot {0} not available for restart on ALBA Node {1}'. format(slot_id, node.ip)) raise RuntimeError('Could not find slot') except (requests.ConnectionError, requests.Timeout): AlbaNodeController._logger.warning( 'Could not connect to node {0} to validate slot'.format( node.guid)) raise result = node.client.restart_slot(slot_id=slot_id) if result['_success'] is False: raise RuntimeError('Error restarting slot: {0}'.format( result['_error'])) for backend in AlbaBackendList.get_albabackends(): backend.invalidate_dynamics()
def restart_osd(node_guid, osd_id): """ Restarts an OSD on a given Node :param node_guid: Guid of the node to restart an OSD on :type node_guid: str :param osd_id: ID of the OSD to restart :type osd_id: str :return: None :rtype: NoneType """ node = AlbaNode(node_guid) osd = AlbaOSDList.get_by_osd_id(osd_id) if osd.alba_node_guid != node.guid: raise RuntimeError('Could not locate OSD {0} on node {1}'.format( osd_id, node_guid)) try: result = node.client.restart_osd(osd.slot_id, osd.osd_id) if result['_success'] is False: AlbaNodeController._logger.error( 'Error restarting OSD: {0}'.format(result['_error'])) raise RuntimeError(result['_error']) except (requests.ConnectionError, requests.Timeout): AlbaNodeController._logger.warning( 'Could not connect to node {0} to restart OSD'.format( node.guid)) raise
def model_albanodes(**kwargs): """ Add all ALBA nodes known to the config platform to the model :param kwargs: Kwargs containing information regarding the node :type kwargs: dict :return: None :rtype: NoneType """ _ = kwargs if Configuration.dir_exists('/ovs/alba/asdnodes'): for node_id in Configuration.list('/ovs/alba/asdnodes'): node = AlbaNodeList.get_albanode_by_node_id(node_id) if node is None: node = AlbaNode() main_config = Configuration.get( '/ovs/alba/asdnodes/{0}/config/main'.format(node_id)) node.type = 'ASD' node.node_id = node_id node.ip = main_config['ip'] node.port = main_config['port'] node.username = main_config['username'] node.password = main_config['password'] node.storagerouter = StorageRouterList.get_by_ip( main_config['ip']) node.save()
def get_logfiles(albanode_guid, local_storagerouter_guid): """ Collects logs, moves them to a web-accessible location and returns log tgz's filename :param albanode_guid: Alba Node guid to retrieve log files on :type albanode_guid: str :param local_storagerouter_guid: Guid of the StorageRouter on which the collect logs was initiated, eg: through the GUI :type local_storagerouter_guid: str :return: Name of tgz containing the logs :rtype: str """ web_path = '/opt/OpenvStorage/webapps/frontend/downloads' alba_node = AlbaNode(albanode_guid) logfile_name = alba_node.client.get_logs()['filename'] download_url = 'https://{0}:{1}@{2}:{3}/downloads/{4}'.format( alba_node.username, alba_node.password, alba_node.ip, alba_node.port, logfile_name) client = SSHClient(endpoint=StorageRouter(local_storagerouter_guid), username='******') client.dir_create(web_path) client.run([ 'wget', download_url, '--directory-prefix', web_path, '--no-check-certificate' ]) client.run(['chmod', '666', '{0}/{1}'.format(web_path, logfile_name)]) return logfile_name
def get_albanode(guid): """ Fetches an albanode object by guid :param guid: guid of albanode :type guid: str :return: ovs.dal.hybrids.albanode.AlbaNode """ return AlbaNode(guid)
def move_slot(node_guid, slot_id, destination_node_guid): """ Move a slot from one node to another. If the same disk can be accessed, all ASD ownership is moved This is a Dual Controller feature :param node_guid: Guid of the owner node :type node_guid: str :param slot_id: Identifier of the slot :type slot_id: str :param destination_node_guid: Guid of the destination node :type destination_node_guid: str :return: None :rtype: NoneType """ origin_node = AlbaNode(node_guid) destination_node = AlbaNode(destination_node_guid) # Validation if origin_node.alba_node_cluster is None: raise ValueError( 'Node with guid {0} is not part of a cluster'.format( node_guid)) if origin_node.alba_node_cluster != destination_node.alba_node_cluster: raise ValueError('The nodes are not part of the same cluster') if slot_id not in origin_node.stack: raise ValueError( 'Slot with ID {0} is not available in the origin node with guid {1}' .format(slot_id, node_guid)) if slot_id not in destination_node.stack: raise ValueError( 'Slot with ID {0} is not available in the destination node with guid {1}' .format(slot_id, destination_node_guid)) # Stop the OSDs on the origin try: origin_node.client.stop_slot(slot_id) except: AlbaNodeClusterController._logger.exception( 'Unable to stop the slot ') raise try: # Update all references in Alba AlbaController.update_osds() raise NotImplementedError() except: raise
def reset_osd(node_cluster_guid, node_guid, osd_id, expected_safety): # type: (str, str, str, Dict[str, int]) -> None """ Removes and re-adds an OSD to a Disk :param node_cluster_guid: Guid of the AlbaNodeCluster :type node_cluster_guid: str :param node_guid: Guid of the node to reset an OSD of :type node_guid: str :param osd_id: OSD to reset :type osd_id: str :param expected_safety: Expected safety after having reset the disk :type expected_safety: dict :return: None :rtype: NoneType """ node_cluster = AlbaNodeCluster(node_cluster_guid) active_node = AlbaNode(node_guid) if active_node not in node_cluster.alba_nodes: raise ValueError( 'The requested active AlbaNode is not part of AlbaNodeCluster {0}' .format(node_cluster.guid)) osd = AlbaOSDList.get_by_osd_id(osd_id) fill_slot_extra = active_node.client.build_slot_params(osd) disk_aliases = AlbaNodeClusterController.remove_osd( node_guid=node_guid, osd_id=osd_id, expected_safety=expected_safety) if len(disk_aliases) == 0: return try: active_node.client.fill_slot(osd.slot_id, fill_slot_extra) except (requests.ConnectionError, requests.Timeout): AlbaNodeClusterController._logger.warning( 'Could not connect to node {0} to (re)configure ASD'.format( active_node.guid)) return except NotFoundError: # Can occur when the slot id could not be matched with an existing slot on the alba-asd manager # This error can be anticipated when the status of the osd would be 'missing' in the nodes stack but that would be too much overhead message = 'Could not add a new OSD. The requested slot {0} could not be found'.format( osd.slot_id) AlbaNodeClusterController._logger.warning(message) raise RuntimeError( '{0}. Slot {1} might no longer be present on Alba node {2}'. format(message, osd.slot_id, node_guid)) # Invalidate the stack and sync towards all passive sides active_node.invalidate_dynamics('stack') for node in node_cluster.alba_nodes: if node != active_node: try: node.client.sync_stack(active_node.stack) except: AlbaNodeClusterController._logger.exception( 'Error while syncing stacks to the passive side')
def generate_empty_slot(alba_node_guid): """ Generates an empty slot on the alba node :param alba_node_guid: Guid of the AlbaNode to generate a slot on :type alba_node_guid: str :return: Slot information :rtype: dict """ alba_node = AlbaNode(alba_node_guid) if alba_node.type not in [ AlbaNode.NODE_TYPES.GENERIC, AlbaNode.NODE_TYPES.S3 ]: raise RuntimeError( 'An empty slot can only be generated for a generic node') return {str(uuid.uuid4()): {'status': alba_node.SLOT_STATUSES.EMPTY}}
def register(node_id=None, node_type=None, name=None): """ Adds a Node with a given node_id to the model :param node_id: ID of the ALBA node :type node_id: str :param node_type: Type of the node to create :type node_type: str :param name: Optional name of the node :type name: str :return: None :rtype: NoneType """ # Generic is a special case. Nothing is registered within config mgmt if node_type == AlbaNode.NODE_TYPES.GENERIC: node = AlbaNode() node.name = name node.node_id = ''.join( random.choice(string.ascii_letters + string.digits) for _ in range(32)) node.type = AlbaNode.NODE_TYPES.GENERIC node.save() else: # Both S3 and ASD type can be added now if node_id is None: raise RuntimeError('A node_id must be given for type ASD/S3') node = AlbaNodeList.get_albanode_by_node_id( node_id) or AlbaNodeController.get_discovered_node(node_id) if not node: # No node could be found in the model or within the discovered nodes. User might have specified the ID # of a node that does not exist raise RuntimeError( 'No node with node_id {0} was found'.format(node_id)) data = node.client.get_metadata() if data['_success'] is False and data[ '_error'] == 'Invalid credentials': raise RuntimeError('Invalid credentials') if data['node_id'] != node_id: AlbaNodeController._logger.error( 'Unexpected node_id: {0} vs {1}'.format( data['node_id'], node_id)) raise RuntimeError('Unexpected node identifier') if node.type == AlbaNode.NODE_TYPES.S3: # The transaction Arakoon is needed. This wil check deployment & extend AlbaArakoonController.configure_s3_transaction_cluster() node.volatile = False node.save() AlbaController.checkup_maintenance_agents.delay()
def remove_node(node_guid): """ Removes an ALBA node :param node_guid: Guid of the ALBA node to remove :type node_guid: str :return: None :rtype: NoneType """ node = AlbaNode(node_guid) if node.type == AlbaNode.NODE_TYPES.ASD: for slot_id, slot_info in node.stack.iteritems(): for osd_id, osd_info in slot_info['osds'].iteritems(): if AlbaOSDList.get_by_osd_id(osd_id=osd_id) is not None: AlbaNodeController.remove_osd(node_guid=node.guid, osd_id=osd_id, expected_safety=None) if slot_info['available'] is False: AlbaNodeController.remove_slot(node_guid=node.guid, slot_id=slot_id) name_guid_map = dict( (alba_backend.name, alba_backend.guid) for alba_backend in AlbaBackendList.get_albabackends()) try: # This loop will delete the services AND their configuration from the configuration management node.invalidate_dynamics('maintenance_services') for alba_backend_name, service_info in node.maintenance_services.iteritems( ): for service_name, status in service_info: node.client.remove_maintenance_service( name=service_name, alba_backend_guid=name_guid_map.get( alba_backend_name)) except (requests.ConnectionError, requests.Timeout): AlbaNodeController._logger.exception( 'Could not connect to node {0} to retrieve the maintenance services' .format(node.guid)) except InvalidCredentialsError: AlbaNodeController._logger.warning( 'Failed to retrieve the maintenance services for ALBA node {0}' .format(node.node_id)) node.delete() for alba_backend in AlbaBackendList.get_albabackends(): alba_backend.invalidate_dynamics(['live_status']) alba_backend.backend.invalidate_dynamics(['live_status']) AlbaController.checkup_maintenance_agents.delay()
def reset_osd(node_guid, osd_id, expected_safety): """ Removes and re-adds an OSD to a Disk :param node_guid: Guid of the node to reset an OSD of :type node_guid: str :param osd_id: OSD to reset :type osd_id: str :param expected_safety: Expected safety after having reset the disk :type expected_safety: dict :return: None :rtype: NoneType """ node = AlbaNode(node_guid) osd = AlbaOSDList.get_by_osd_id(osd_id) fill_slot_extra = node.client.build_slot_params(osd) disk_aliases = AlbaNodeController.remove_osd( node_guid=node_guid, osd_id=osd_id, expected_safety=expected_safety) if len(disk_aliases) == 0: return try: AlbaNodeController._fill_slot(node, osd.slot_id, fill_slot_extra) except (requests.ConnectionError, requests.Timeout): AlbaNodeController._logger.warning( 'Could not connect to node {0} to (re)configure ASD'.format( node.guid)) except NotFoundError: # Can occur when the slot id could not be matched with an existing slot on the alba-asd manager # This error can be anticipated when the status of the osd would be 'missing' in the nodes stack but that would be too much overhead message = 'Could not add a new OSD. The requested slot {0} could not be found'.format( osd.slot_id) AlbaNodeController._logger.warning(message) raise RuntimeError( '{0}. Slot {1} might no longer be present on Alba node {2}'. format(message, osd.slot_id, node_guid)) node.invalidate_dynamics('stack')
def remove_slot(node_cluster_guid, node_guid, slot_id): # type: (str, str, str) -> None """ Removes a slot :param node_cluster_guid: Guid of the node cluster to remove a disk from :type node_cluster_guid: str :param node_guid: Guid of the AlbaNode to act as the 'active' side :type node_guid: basestring :param slot_id: Slot ID :type slot_id: str :return: None :rtype: NoneType """ node_cluster = AlbaNodeCluster(node_cluster_guid) active_node = AlbaNode(node_guid) if active_node not in node_cluster.alba_nodes: raise ValueError( 'The requested active AlbaNode is not part of AlbaNodeCluster {0}' .format(node_cluster.guid)) osds = [osd for osd in active_node.osds if osd.slot_id == slot_id] if len(osds) > 0: raise RuntimeError('A slot with claimed OSDs can\'t be removed') active_node.client.clear_slot(slot_id) active_node.invalidate_dynamics() # Invalidate the stack and sync towards all passive sides for node in node_cluster.alba_nodes: if node != active_node: try: node.client.sync_stack(active_node.stack) except: AlbaNodeClusterController._logger.exception( 'Error while syncing stacks to the passive side') if active_node.storagerouter is not None: DiskController.sync_with_reality( storagerouter_guid=active_node.storagerouter_guid)
def fill_slots(node_guid, osd_information, metadata=None): """ Creates 1 or more new OSDs :param node_guid: Guid of the node to which the disks belong :type node_guid: str :param osd_information: Information about the amount of OSDs to add to each Slot :type osd_information: list :param metadata: Metadata to add to the OSD (connection information for remote Backend, general Backend information) :type metadata: dict :return: None :rtype: NoneType """ metadata_type_validation = { 'integer': (int, None), 'osd_type': (str, AlbaOSD.OSD_TYPES.keys()), 'ip': (str, ExtensionsToolbox.regex_ip), 'port': (int, { 'min': 1, 'max': 65535 }) } node = AlbaNode(node_guid) required_params = {'slot_id': (str, None)} can_be_filled = False for flow in ['fill', 'fill_add']: if node.node_metadata[flow] is False: continue can_be_filled = True if flow == 'fill_add': required_params['alba_backend_guid'] = (str, None) for key, mtype in node.node_metadata['{0}_metadata'.format( flow)].iteritems(): if mtype in metadata_type_validation: required_params[key] = metadata_type_validation[mtype] if can_be_filled is False: raise ValueError('The given node does not support filling slots') validation_reasons = [] for osd_info in osd_information: # type: dict try: ExtensionsToolbox.verify_required_params( required_params=required_params, actual_params=osd_info) except RuntimeError as ex: validation_reasons.append(str(ex)) if len(validation_reasons) > 0: raise ValueError('Missing required parameter:\n *{0}'.format( '\n* '.join(validation_reasons))) for osd_info in osd_information: if node.node_metadata['fill'] is True: # Only filling is required AlbaNodeController._fill_slot( node, osd_info['slot_id'], dict((key, osd_info[key]) for key in node.node_metadata['fill_metadata'])) elif node.node_metadata['fill_add'] is True: # Fill the slot created_osds = AlbaNodeController._fill_slot( node, osd_info['slot_id'], dict((key, osd_info[key]) for key in node.node_metadata['fill_add_metadata'])) # And add/claim the OSD if node.type == AlbaNode.NODE_TYPES.S3: # The S3 manager returns the information about the osd when filling it for created_osd_info in created_osds: osd_info.update( created_osd_info ) # Add additional information about the osd AlbaController.add_osds( alba_backend_guid=osd_info['alba_backend_guid'], osds=[osd_info], alba_node_guid=node_guid, metadata=metadata) else: AlbaController.add_osds( alba_backend_guid=osd_info['alba_backend_guid'], osds=[osd_info], alba_node_guid=node_guid, metadata=metadata) node.invalidate_dynamics('stack')
def fill_slots(node_cluster_guid, node_guid, osd_information, metadata=None): # type: (str, str, List[Dict[str, Any]]) -> None """ Creates 1 or more new OSDs :param node_cluster_guid: Guid of the node cluster to which the disks belong :type node_cluster_guid: basestring :param node_guid: Guid of the AlbaNode to act as the 'active' side :type node_guid: basestring :param osd_information: Information about the amount of OSDs to add to each Slot :type osd_information: list :param metadata: Metadata to add to the OSD (connection information for remote Backend, general Backend information) :type metadata: dict :return: None :rtype: NoneType """ metadata_type_validation = { 'integer': (int, None), 'osd_type': (str, AlbaOSD.OSD_TYPES.keys()), 'ip': (str, ExtensionsToolbox.regex_ip), 'port': (int, { 'min': 1, 'max': 65535 }) } node_cluster = AlbaNodeCluster(node_cluster_guid) # Check for the active side if it's part of the cluster active_node = AlbaNode(node_guid) if active_node not in node_cluster.alba_nodes: raise ValueError( 'The requested active AlbaNode is not part of AlbaNodeCluster {0}' .format(node_cluster.guid)) required_params = {'slot_id': (str, None)} can_be_filled = False for flow in ['fill', 'fill_add']: if node_cluster.cluster_metadata[flow] is False: continue can_be_filled = True if flow == 'fill_add': required_params['alba_backend_guid'] = (str, None) for key, mtype in node_cluster.cluster_metadata[ '{0}_metadata'.format(flow)].iteritems(): if mtype in metadata_type_validation: required_params[key] = metadata_type_validation[mtype] if can_be_filled is False: raise ValueError( 'The given node cluster does not support filling slots') validation_reasons = [] for slot_info in osd_information: try: ExtensionsToolbox.verify_required_params( required_params=required_params, actual_params=slot_info) except RuntimeError as ex: validation_reasons.append(str(ex)) if len(validation_reasons) > 0: raise ValueError('Missing required parameter:\n *{0}'.format( '\n* '.join(validation_reasons))) for slot_info in osd_information: if node_cluster.cluster_metadata['fill'] is True: # Only filling is required active_node.client.fill_slot( slot_id=slot_info['slot_id'], extra=dict((key, slot_info[key]) for key in node_cluster.cluster_metadata['fill_metadata'])) elif node_cluster.cluster_metadata['fill_add'] is True: # Fill the slot active_node.client.fill_slot( slot_id=slot_info['slot_id'], extra=dict( (key, slot_info[key]) for key in node_cluster.cluster_metadata['fill_add_metadata'])) # And add/claim the OSD AlbaController.add_osds( alba_backend_guid=slot_info['alba_backend_guid'], osds=[slot_info], alba_node_guid=node_guid, metadata=metadata) # Invalidate the stack and sync towards all passive sides active_node.invalidate_dynamics('stack') for node in node_cluster.alba_nodes: if node != active_node: try: node.client.sync_stack(active_node.stack) except: AlbaNodeClusterController._logger.exception( 'Error while syncing stacks to the passive side') node_cluster.invalidate_dynamics('stack')
def remove_osd(node_guid, osd_id, expected_safety): """ Removes an OSD :param node_guid: Guid of the node to remove an OSD from :type node_guid: str :param osd_id: ID of the OSD to remove :type osd_id: str :param expected_safety: Expected safety after having removed the OSD :type expected_safety: dict or None :return: Aliases of the disk on which the OSD was removed :rtype: list """ # Retrieve corresponding OSD in model node = AlbaNode(node_guid) AlbaNodeController._logger.debug('Removing OSD {0} at node {1}'.format( osd_id, node.ip)) osd = AlbaOSDList.get_by_osd_id(osd_id) alba_backend = osd.alba_backend if expected_safety is None: AlbaNodeController._logger.warning( 'Skipping safety check for OSD {0} on backend {1} - this is dangerous' .format(osd_id, alba_backend.guid)) else: final_safety = AlbaController.calculate_safety( alba_backend_guid=alba_backend.guid, removal_osd_ids=[osd_id]) safety_lost = final_safety['lost'] safety_crit = final_safety['critical'] if (safety_crit != 0 or safety_lost != 0) and ( safety_crit != expected_safety['critical'] or safety_lost != expected_safety['lost']): raise RuntimeError( 'Cannot remove OSD {0} as the current safety is not as expected ({1} vs {2})' .format(osd_id, final_safety, expected_safety)) AlbaNodeController._logger.debug( 'Safety OK for OSD {0} on backend {1}'.format( osd_id, alba_backend.guid)) AlbaNodeController._logger.debug( 'Purging OSD {0} on backend {1}'.format(osd_id, alba_backend.guid)) AlbaController.remove_units(alba_backend_guid=alba_backend.guid, osd_ids=[osd_id]) # Delete the OSD result = node.client.delete_osd(slot_id=osd.slot_id, osd_id=osd_id) if result['_success'] is False: raise RuntimeError('Error removing OSD: {0}'.format( result['_error'])) # Clean configuration management and model - Well, just try it at least if Configuration.exists(ASD_CONFIG.format(osd_id), raw=True): Configuration.delete(ASD_CONFIG_DIR.format(osd_id), raw=True) osd.delete() node.invalidate_dynamics() if alba_backend is not None: alba_backend.invalidate_dynamics() alba_backend.backend.invalidate_dynamics() if node.storagerouter is not None: try: DiskController.sync_with_reality( storagerouter_guid=node.storagerouter_guid) except UnableToConnectException: AlbaNodeController._logger.warning( 'Skipping disk sync since StorageRouter {0} is offline'. format(node.storagerouter.name)) return [osd.slot_id]
def initiate_failover(cls, node_guid): # type: (basestring) -> None """ Initiate an OSD failover for a particular AlbaNode This AlbaNode has to be part of an AlbaNodeCluster with multiple AlbaNodes :param node_guid: Guid of the AlbaNode :type node_guid: basestring :return: None :rtype: NoneType """ with Configuration.lock('albanode_{0}_failover'.format(node_guid), wait=5, expiration=60): node = AlbaNode(node_guid) node_cluster = node.albanode_cluster if node_cluster is None: raise ValueError( 'Unable to failover Node with guid {0} as it has no relation to a cluster' .format(node_guid)) other_node_guids = [ guid for guid in node.albanode_cluster.albanode_guids if guid != node_guid ] if len(other_node_guids) == 0: raise ValueError( 'Unable to failover Node with guid {0} as there are no failover candidates' .format(node_guid)) while len(other_node_guids) > 0: # Select random failover node from the pool failover_node = AlbaNode( other_node_guids.pop( random.randrange(len(other_node_guids)))) cls._logger.info( 'Checking if Node with guid {0} is responsive so a failover can happen' .format(failover_node.guid)) success = False count = 0 while success is False: count += 1 if count > 3: cls._logger.error( 'Node with guid {0} is not responsive. Looking for another node' .format(failover_node.guid)) break try: failover_node.client.get_metadata() success = True continue # Avoid sleep except: cls._logger.exception( 'Node with guid {0} is not responsive'.format( failover_node.guid)) time.sleep(5) if success is False: # Another node must be selected continue # Kill current node through IPMI ipmi_info = node.ipmi_info try: ipmi_controller = IPMIController(client=cls._client, **ipmi_info) ipmi_controller.power_off_node() except: cls._logger.exception( 'Unable to control node with guid {0} through IPMI'. format(node_guid)) raise RuntimeError('No failover happened. Exhausted all options')
def build_dal_structure(structure, previous_structure=None): """ Builds a service structure Example: structure = AlbaDalHelper.build_service_structure({ 'alba_backends': [1], 'alba_nodes': [1] }) """ if previous_structure is None: previous_structure = {} alba_osds = previous_structure.get('alba_osds', {}) alba_nodes = previous_structure.get('alba_nodes', {}) backend_types = previous_structure.get('backend_types', {}) service_types = previous_structure.get('service_types', {}) alba_backends = previous_structure.get('alba_backends', {}) alba_abm_clusters = previous_structure.get('alba_abm_clusters', {}) alba_nsm_clusters = previous_structure.get('alba_nsm_clusters', {}) if 1 not in backend_types: backend_type = BackendType() backend_type.code = 'alba' backend_type.name = 'ALBA' backend_type.save() backend_types[1] = backend_type if 'AlbaManager' not in service_types: service_type = ServiceTypeList.get_by_name('AlbaManager') if service_type is None: service_type = ServiceType() service_type.name = 'AlbaManager' service_type.save() service_types['AlbaManager'] = service_type if 'NamespaceManager' not in service_types: service_type = ServiceTypeList.get_by_name('NamespaceManager') if service_type is None: service_type = ServiceType() service_type.name = 'NamespaceManager' service_type.save() service_types['NamespaceManager'] = service_type for ab_id, scaling in structure.get('alba_backends', ()): if ab_id not in alba_backends: backend = Backend() backend.name = 'backend_{0}'.format(ab_id) backend.backend_type = backend_types[1] backend.save() alba_backend = AlbaBackend() alba_backend.backend = backend alba_backend.scaling = getattr(AlbaBackend.SCALINGS, scaling) alba_backend.alba_id = str(ab_id) alba_backend.save() alba_backends[ab_id] = alba_backend for ab_id in structure.get('alba_abm_clusters', ()): if ab_id not in alba_abm_clusters: if ab_id not in alba_backends: raise ValueError('Non-existing ALBA Backend ID provided') alba_backend = alba_backends[ab_id] abm_cluster = ABMCluster() abm_cluster.name = '{0}-abm'.format(alba_backend.name) abm_cluster.alba_backend = alba_backend abm_cluster.config_location = '/ovs/arakoon/{0}-abm/config'.format( alba_backend.name) abm_cluster.save() abm_service = Service() abm_service.name = 'arakoon-{0}-abm'.format(alba_backend.name) abm_service.type = service_types['AlbaManager'] abm_service.ports = [] abm_service.storagerouter = None abm_service.save() abm_junction_service = ABMService() abm_junction_service.service = abm_service abm_junction_service.abm_cluster = abm_cluster abm_junction_service.save() alba_abm_clusters[ab_id] = abm_cluster for ab_id, amount in structure.get('alba_nsm_clusters', ()): if ab_id not in alba_nsm_clusters or amount != len( alba_nsm_clusters[ab_id]): if ab_id not in alba_backends: raise ValueError('Non-existing ALBA Backend ID provided') alba_backend = alba_backends[ab_id] alba_nsm_clusters[ab_id] = [] nsm_clusters = dict( (nsm_cluster.number, nsm_cluster) for nsm_cluster in alba_backend.nsm_clusters) for number in range(amount): if number in nsm_clusters: alba_nsm_clusters[ab_id].append(nsm_clusters[number]) continue nsm_cluster = NSMCluster() nsm_cluster.name = '{0}-nsm_{1}'.format( alba_backend.name, number) nsm_cluster.number = number nsm_cluster.alba_backend = alba_backend nsm_cluster.config_location = '/ovs/arakoon/{0}-nsm_{1}/config'.format( alba_backend.name, number) nsm_cluster.save() nsm_service = Service() nsm_service.name = 'arakoon-{0}-nsm_{1}'.format( alba_backend.name, number) nsm_service.type = service_types['NamespaceManager'] nsm_service.ports = [] nsm_service.storagerouter = None nsm_service.save() nsm_junction_service = NSMService() nsm_junction_service.service = nsm_service nsm_junction_service.nsm_cluster = nsm_cluster nsm_junction_service.save() alba_nsm_clusters[ab_id].append(nsm_cluster) for an_id in structure.get('alba_nodes', []): if an_id not in alba_nodes: alba_node = AlbaNode() alba_node.ip = '10.1.0.{0}'.format(an_id) alba_node.port = 8500 alba_node.username = str(an_id) alba_node.password = str(an_id) alba_node.node_id = 'node_{0}'.format(an_id) alba_node.save() alba_nodes[an_id] = alba_node if alba_node in ManagerClientMockup.test_results: ManagerClientMockup.test_results[alba_node].update( {'get_metadata': { '_version': 3 }}) else: ManagerClientMockup.test_results[alba_node] = { 'get_metadata': { '_version': 3 } } for ao_id, ab_id, an_id, slot_id in structure.get('alba_osds', ()): if ao_id not in alba_osds: osd = AlbaOSD() osd.osd_id = 'alba_osd_{0}'.format(ao_id) osd.osd_type = AlbaOSD.OSD_TYPES.ASD osd.alba_backend = alba_backends[ab_id] osd.alba_node = alba_nodes[an_id] osd.slot_id = 'alba_slot_{0}'.format(slot_id) osd.ips = ['127.0.0.{0}'.format(ao_id)] osd.port = 35000 + ao_id osd.save() alba_osds[ao_id] = osd return { 'alba_osds': alba_osds, 'alba_nodes': alba_nodes, 'backend_types': backend_types, 'service_types': service_types, 'alba_backends': alba_backends, 'alba_abm_clusters': alba_abm_clusters, 'alba_nsm_clusters': alba_nsm_clusters }