def update_preset(alba_backend_guid, name, policies): """ Updates policies for an existing preset to Alba :param alba_backend_guid: Guid of the ALBA backend :type alba_backend_guid: str :param name: Name of backend :type name: str :param policies: New policy list to be sent to alba :type policies: list :return: None """ # VALIDATIONS AlbaPresetController._validate_policies_param(policies=policies) alba_backend = AlbaBackend(alba_backend_guid) if name not in [preset['name'] for preset in alba_backend.presets]: raise RuntimeError('Could not find a preset with name {0} for ALBA Backend {1}'.format(name, alba_backend.name)) # UPDATE PRESET AlbaPresetController._logger.debug('Updating preset {0} with policies {1}'.format(name, policies)) config = Configuration.get_configuration_path(ArakoonInstaller.CONFIG_KEY.format(AlbaController.get_abm_cluster_name(alba_backend=alba_backend))) temp_config_file = tempfile.mktemp() with open(temp_config_file, 'wb') as data_file: data_file.write(json.dumps({'policies': policies})) data_file.flush() AlbaCLI.run(command='update-preset', config=config, named_params={'input-url': temp_config_file}, extra_params=[name]) alba_backend.invalidate_dynamics() os.remove(temp_config_file)
def delete_preset(alba_backend_guid, name): """ Deletes a preset from the Alba backend :param alba_backend_guid: Guid of the ALBA backend :type alba_backend_guid: str :param name: Name of the preset :type name: str :return: None """ # VALIDATIONS alba_backend = AlbaBackend(alba_backend_guid) preset_default_map = dict((preset['name'], preset['is_default']) for preset in alba_backend.presets) if name not in preset_default_map: AlbaPresetController._logger.warning( 'Preset with name {0} for ALBA Backend {1} could not be found, so not deleting' .format(name, alba_backend.name)) return if preset_default_map[name] is True: raise RuntimeError('Cannot delete the default preset') # DELETE PRESET AlbaPresetController._logger.debug('Deleting preset {0}'.format(name)) config = Configuration.get_configuration_path( alba_backend.abm_cluster.config_location) AlbaCLI.run(command='delete-preset', config=config, extra_params=[name]) alba_backend.invalidate_dynamics()
def update_preset(alba_backend_guid, name, policies): """ Updates policies for an existing preset to Alba :param alba_backend_guid: Guid of the ALBA backend :type alba_backend_guid: str :param name: Name of preset :type name: str :param policies: New policy list to be sent to alba :type policies: list :return: None """ # VALIDATIONS AlbaPresetController._validate_policies_param(policies=policies) alba_backend = AlbaBackend(alba_backend_guid) if name not in [preset['name'] for preset in alba_backend.presets]: raise RuntimeError( 'Could not find a preset with name {0} for ALBA Backend {1}'. format(name, alba_backend.name)) # UPDATE PRESET AlbaPresetController._logger.debug( 'Updating preset {0} with policies {1}'.format(name, policies)) config = Configuration.get_configuration_path( alba_backend.abm_cluster.config_location) temp_config_file = tempfile.mktemp() with open(temp_config_file, 'wb') as data_file: data_file.write(json.dumps({'policies': policies})) data_file.flush() AlbaCLI.run(command='update-preset', config=config, named_params={'input-url': temp_config_file}, extra_params=[name]) alba_backend.invalidate_dynamics() os.remove(temp_config_file)
def get_stats_nsms(cls): """ Retrieve the amount of NSMs deployed and their statistics """ if cls._config is None: cls.validate_and_retrieve_config() stats = [] errors = False environment = cls._config['environment'] for alba_backend in AlbaBackendList.get_albabackends(): for nsm in alba_backend.nsm_clusters: stats.append({ 'tags': { 'nsm_number': nsm.number, 'environment': environment, 'backend_name': alba_backend.name, 'abm_service_name': alba_backend.abm_cluster.name }, 'fields': { 'load': float(AlbaArakoonController.get_load(nsm)) }, 'measurement': 'nsm' }) config_path = Configuration.get_configuration_path( alba_backend.abm_cluster.config_location) try: nsm_host_ids = [ nsm_host['id'] for nsm_host in AlbaCLI.run(command='list-nsm-hosts', config=config_path) ] nsm_hosts_statistics = AlbaCLI.run( command='nsm-hosts-statistics', config=config_path, named_params={'nsm-hosts': ','.join(nsm_host_ids)}) for nsm_host_id, statistics in nsm_hosts_statistics.iteritems( ): stats.append({ 'tags': { 'nsm_name': nsm_host_id, 'environment': environment, 'backend_name': alba_backend.name }, 'fields': cls._convert_to_float_values(statistics['statistics']), 'measurement': 'nsm_statistic' }) except Exception: errors = True cls._logger.exception( 'Retrieving NSM statistics for ALBA Backend {0} failed'. format(alba_backend.name)) return errors, stats
def test_debug_flag(self): with_debug_output = AlbaCLI.run('list-namespaces', config='/opt/OpenvStorage/config/arakoon/alba-abm/alba-abm.cfg', as_json=True, debug=True, client=self._get_SSHClient(self._get_my_ip(), 'root')) with_debug_length = len(with_debug_output[0]) + len(with_debug_output[1]) without_debug_output = AlbaCLI.run('list-namespaces', config='/opt/OpenvStorage/config/arakoon/alba-abm/alba-abm.cfg', as_json=True, debug=False, client=self._get_SSHClient(self._get_my_ip(), 'root')) without_debug_length = len(without_debug_output) assert with_debug_length > without_debug_length,\ "additional logging expected with debug=True:\n {0}\n{1}".format(with_debug_output, without_debug_output)
def get_stats_proxies(cls): """ Retrieve statistics for all ALBA proxies """ if cls._config is None: cls.validate_and_retrieve_config() stats = [] errors = False environment = cls._config['environment'] vpool_namespace_cache = {} for storagedriver in StorageDriverList.get_storagedrivers(): for alba_proxy_service in storagedriver.alba_proxies: ip = storagedriver.storage_ip port = alba_proxy_service.service.ports[0] try: vpool = storagedriver.vpool if vpool.guid not in vpool_namespace_cache: vpool_namespace_cache[ vpool. guid] = vpool.storagedriver_client.list_volumes( req_timeout_secs=5) active_namespaces = vpool_namespace_cache[vpool.guid] for namespace_stats in AlbaCLI.run( command='proxy-statistics', named_params={ 'host': ip, 'port': port })['ns_stats']: namespace = namespace_stats[0] if namespace not in active_namespaces: continue stats.append({ 'tags': { 'server': storagedriver.storagerouter.name, 'namespace': namespace, 'vpool_name': vpool.name, 'environment': environment, 'backend_name': vpool.metadata['backend']['backend_info'] ['name'], 'service_name': alba_proxy_service.service.name }, 'fields': cls._convert_to_float_values(namespace_stats[1]), 'measurement': 'proxyperformance_namespace' }) except Exception: errors = True cls._logger.exception( "Failed to retrieve proxy statistics for proxy service running at {0}:{1}" .format(ip, port)) return errors, stats
def _osd_statistics(self): """ Loads statistics from all it's asds in one call """ from ovs.dal.hybrids.albaosd import AlbaOSD statistics = {} if self.abm_cluster is None: return statistics # No ABM cluster yet, so backend not fully installed yet osd_ids = [ osd.osd_id for osd in self.osds if osd.osd_type in [AlbaOSD.OSD_TYPES.ASD, AlbaOSD.OSD_TYPES.AD] ] if len(osd_ids) == 0: return statistics try: config = Configuration.get_configuration_path( self.abm_cluster.config_location) # TODO: This will need to be changed to osd-multistatistics, see openvstorage/alba#749 raw_statistics = AlbaCLI.run( command='asd-multistatistics', config=config, named_params={'long-id': ','.join(osd_ids)}) except RuntimeError: return statistics if raw_statistics: for osd_id, stats in raw_statistics.iteritems(): if stats['success'] is True: statistics[osd_id] = stats['result'] return statistics
def _all_disks(self): """ Returns a live list of all disks on this node """ try: disks = self.client.get_disks(reraise=True) except (requests.ConnectionError, requests.Timeout): from ovs.dal.lists.albabackendlist import AlbaBackendList disks = [] asd_ids = [] for backend in AlbaBackendList.get_albabackends(): # All backends of this node config = 'etcd://127.0.0.1:2379/ovs/arakoon/{0}-abm/config'.format(backend.name) osds = AlbaCLI.run('list-all-osds', config=config, as_json=True) for osd in osds: node_id = osd.get('node_id') asd_id = osd.get('long_id') decommissioned = osd.get('decommissioned') if node_id == self.node_id and asd_id not in asd_ids and decommissioned is False: asd_ids.append(asd_id) disks.append({'asd_id': asd_id, 'node_id': osd.get('node_id'), 'port': osd.get('port'), 'ips': osd.get('ips'), 'available': False, 'state': {'state': 'error', 'detail': 'nodedown'}, 'log_level': 'info', 'device': asd_id, 'home': asd_id, 'mountpoint': asd_id, 'name': asd_id, 'usage': {'available': 0, 'size': 0, 'used': 0}}) return disks
def _presets(self): """ Returns the policies active on the node """ if len(self.abm_services) == 0: return [] # No ABM services yet, so backend not fully installed yet asds = {} if self.scaling != AlbaBackend.SCALINGS.GLOBAL: for node in AlbaNodeList.get_albanodes(): asds[node.node_id] = 0 for disk in self.local_stack[node.node_id].values(): for asd_info in disk['asds'].values(): if asd_info['status'] in ['claimed', 'warning']: asds[node.node_id] += 1 config = Configuration.get_configuration_path('/ovs/arakoon/{0}-abm/config'.format(self.name)) presets = AlbaCLI.run(command='list-presets', config=config) preset_dict = {} for preset in presets: preset_dict[preset['name']] = preset if 'in_use' not in preset: preset['in_use'] = True if 'is_default' not in preset: preset['is_default'] = False preset['is_available'] = False preset['policies'] = [tuple(policy) for policy in preset['policies']] preset['policy_metadata'] = {} active_policy = None for policy in preset['policies']: is_available = False available_disks = 0 if self.scaling != AlbaBackend.SCALINGS.GLOBAL: available_disks += sum(min(asds[node], policy[3]) for node in asds) if self.scaling != AlbaBackend.SCALINGS.LOCAL: available_disks += sum(self.local_summary['devices'].values()) if available_disks >= policy[2]: if active_policy is None: active_policy = policy is_available = True preset['policy_metadata'][policy] = {'is_active': False, 'in_use': False, 'is_available': is_available} preset['is_available'] |= is_available if active_policy is not None: preset['policy_metadata'][active_policy]['is_active'] = True for namespace in self.ns_data: if namespace['namespace']['state'] != 'active': continue policy_usage = namespace['statistics']['bucket_count'] preset = preset_dict[namespace['namespace']['preset_name']] for usage in policy_usage: upolicy = tuple(usage[0]) # Policy as reported to be "in use" for cpolicy in preset['policies']: # All configured policies if upolicy[0] == cpolicy[0] and upolicy[1] == cpolicy[1] and upolicy[3] <= cpolicy[3]: preset['policy_metadata'][cpolicy]['in_use'] = True break for preset in presets: preset['policies'] = [str(policy) for policy in preset['policies']] for key in preset['policy_metadata'].keys(): preset['policy_metadata'][str(key)] = preset['policy_metadata'][key] del preset['policy_metadata'][key] return presets
def get_load(nsm_cluster): """ Calculates the load of an NSM node, returning a float percentage :param nsm_cluster: NSM cluster to retrieve the load for :type nsm_cluster: ovs.dal.hybrids.albansmcluster.NSMCluster :return: Load of the NSM service :rtype: float """ service_capacity = float(nsm_cluster.capacity) if service_capacity < 0: return 50.0 if service_capacity == 0: return float('inf') config = Configuration.get_configuration_path( key=nsm_cluster.alba_backend.abm_cluster.config_location) hosts_data = AlbaCLI.run(command='list-nsm-hosts', config=config) try: host = [ host for host in hosts_data if host['id'] == nsm_cluster.name ][0] except IndexError: raise ValueError( 'No host data could be retrieved from Alba for NSM cluster {0}' .format(nsm_cluster.name)) usage = host['namespaces_count'] return round(usage / service_capacity * 100.0, 5)
def _ns_data(self): """ Loads namespace data """ if len(self.abm_services) == 0: return [] # No ABM services yet, so backend not fully installed yet config = 'etcd://127.0.0.1:2379/ovs/arakoon/{0}/config'.format(self.abm_services[0].service.name) return AlbaCLI.run('show-namespaces', config=config, extra_params=['--max=-1'], as_json=True)[1]
def _ns_data(self): """ Loads namespace data """ if len(self.abm_services) == 0: return [] # No ABM services yet, so backend not fully installed yet config = Configuration.get_configuration_path('/ovs/arakoon/{0}-abm/config'.format(self.name)) return AlbaCLI.run(command='show-namespaces', config=config, named_params={'max': -1})[1]
def verify_namespaces(): """ Verify namespaces for all backends """ logger.info('verify namespace task scheduling started') job_factor = 10 job_factor_key = '/ovs/alba/backends/job_factor' if EtcdConfiguration.exists(job_factor_key): job_factor = EtcdConfiguration.get(job_factor_key) else: EtcdConfiguration.set(job_factor_key, job_factor) for albabackend in AlbaBackendList.get_albabackends(): config = 'etcd://127.0.0.1:2379/ovs/arakoon/{0}-abm/config'.format(albabackend.backend.name) namespaces = AlbaCLI.run('list-namespaces', config=config, as_json=True) for namespace in namespaces: logger.info('verifying namespace: {0} scheduled ...'.format(namespace['name'])) AlbaCLI.run('verify-namespace {0} --factor={1}'.format(namespace['name'], job_factor)) logger.info('verify namespace task scheduling finished')
def _ns_data(self): """ Loads namespace data """ if self.abm_cluster is None: return [] # No ABM cluster yet, so backend not fully installed yet config = Configuration.get_configuration_path( self.abm_cluster.config_location) return AlbaCLI.run(command='show-namespaces', config=config, named_params={'max': -1})[1]
def _presets(self): """ Returns the policies active on the node """ all_disks = self.all_disks disks = {} for node in AlbaNodeList.get_albanodes(): disks[node.node_id] = 0 for disk in all_disks: if disk['node_id'] == node.node_id and disk['status'] in ['claimed', 'warning']: disks[node.node_id] += 1 config = 'etcd://127.0.0.1:2379/ovs/arakoon/{0}-abm/config'.format(self.backend.name) presets = AlbaCLI.run('list-presets', config=config, as_json=True) preset_dict = {} for preset in presets: preset_dict[preset['name']] = preset if 'in_use' not in preset: preset['in_use'] = True if 'is_default' not in preset: preset['is_default'] = False preset['is_available'] = False preset['policies'] = [tuple(policy) for policy in preset['policies']] preset['policy_metadata'] = {} active_policy = None for policy in preset['policies']: is_available = False available_disks = sum(min(disks[node], policy[3]) for node in disks) if available_disks >= policy[2]: if active_policy is None: active_policy = policy is_available = True preset['policy_metadata'][policy] = {'is_active': False, 'in_use': False, 'is_available': is_available} preset['is_available'] |= is_available if active_policy is not None: preset['policy_metadata'][active_policy]['is_active'] = True for namespace in self.ns_data: if namespace['namespace']['state'] != 'active': continue policy_usage = namespace['statistics']['bucket_count'] preset = preset_dict[namespace['namespace']['preset_name']] for usage in policy_usage: upolicy = tuple(usage[0]) # Policy as reported to be "in use" for cpolicy in preset['policies']: # All configured policies if upolicy[0] == cpolicy[0] and upolicy[1] == cpolicy[1] and upolicy[3] <= cpolicy[3]: preset['policy_metadata'][cpolicy]['in_use'] = True break for preset in presets: preset['policies'] = [str(policy) for policy in preset['policies']] for key in preset['policy_metadata'].keys(): preset['policy_metadata'][str(key)] = preset['policy_metadata'][key] del preset['policy_metadata'][key] return presets
def verify_namespaces(): """ Verify namespaces for all backends """ AlbaScheduledTaskController._logger.info('verify namespace task scheduling started') verification_factor = 10 verification_factor_key = '/ovs/alba/backends/verification_factor' if EtcdConfiguration.exists(verification_factor_key): verification_factor = EtcdConfiguration.get(verification_factor_key) else: EtcdConfiguration.set(verification_factor_key, verification_factor) for albabackend in AlbaBackendList.get_albabackends(): backend_name = albabackend.abm_services[0].service.name if albabackend.abm_services else albabackend.name + '-abm' config = 'etcd://127.0.0.1:2379/ovs/arakoon/{0}/config'.format(backend_name) namespaces = AlbaCLI.run('list-namespaces', config=config, as_json=True) for namespace in namespaces: AlbaScheduledTaskController._logger.info('verifying namespace: {0} scheduled ...'.format(namespace['name'])) AlbaCLI.run('verify-namespace {0} --factor={1}'.format(namespace['name'], verification_factor)) AlbaScheduledTaskController._logger.info('verify namespace task scheduling finished')
def delete_preset(alba_backend_guid, name): """ Deletes a preset from the Alba backend :param alba_backend_guid: Guid of the ALBA backend :type alba_backend_guid: str :param name: Name of the preset :type name: str :return: None """ # VALIDATIONS alba_backend = AlbaBackend(alba_backend_guid) preset_default_map = dict((preset['name'], preset['is_default']) for preset in alba_backend.presets) if name not in preset_default_map: AlbaPresetController._logger.warning('Preset with name {0} for ALBA Backend {1} could not be found, so not deleting'.format(name, alba_backend.name)) return if preset_default_map[name] is True: raise RuntimeError('Cannot delete the default preset') # DELETE PRESET AlbaPresetController._logger.debug('Deleting preset {0}'.format(name)) config = Configuration.get_configuration_path(ArakoonInstaller.CONFIG_KEY.format(AlbaController.get_abm_cluster_name(alba_backend=alba_backend))) AlbaCLI.run(command='delete-preset', config=config, extra_params=[name]) alba_backend.invalidate_dynamics()
def get_stats_alba_backends(cls): """ Retrieve statistics about all ALBA Backends and their maintenance work """ if cls._config is None: cls.validate_and_retrieve_config() stats = [] errors = False environment = cls._config['environment'] for alba_backend in AlbaBackendList.get_albabackends(): try: local_summary = alba_backend.local_summary sizes = local_summary['sizes'] devices = local_summary['devices'] stats.append({ 'tags': { 'environment': environment, 'backend_name': alba_backend.name }, 'fields': { 'red': int(devices['red']), 'free': float(sizes['size'] - sizes['used']), 'used': float(sizes['used']), 'green': int(devices['green']), 'orange': int(devices['orange']), 'maintenance_work': int( AlbaCLI.run( command='list-work', config=Configuration.get_configuration_path( alba_backend.abm_cluster.config_location)) ['count']) }, 'measurement': 'backend' }) except Exception: errors = True cls._logger.exception( 'Retrieving statistics for ALBA Backend {0} failed'.format( alba_backend.name)) return errors, stats
def _asd_statistics(self): """ Loads statistics from all it's asds in one call """ config = 'etcd://127.0.0.1:2379/ovs/arakoon/{0}-abm/config'.format(self.backend.name) statistics = {} if len(self.asds) == 0: return statistics asd_ids = [asd.asd_id for asd in self.asds] try: raw_statistics = AlbaCLI.run('asd-multistatistics', long_id=','.join(asd_ids), config=config, as_json=True) except RuntimeError: return statistics for asd_id, stats in raw_statistics.iteritems(): if stats['success'] is True: statistics[asd_id] = stats['result'] return statistics
def _asd_statistics(self): """ Loads statistics from all it's asds in one call """ from ovs.dal.hybrids.albaosd import AlbaOSD statistics = {} if len(self.abm_services) == 0: return statistics # No ABM services yet, so backend not fully installed yet asd_ids = [osd.osd_id for osd in self.osds if osd.osd_type == AlbaOSD.OSD_TYPES.ASD] if len(asd_ids) == 0: return statistics try: config = Configuration.get_configuration_path('/ovs/arakoon/{0}-abm/config'.format(self.name)) raw_statistics = AlbaCLI.run(command='asd-multistatistics', config=config, named_params={'long-id': ','.join(asd_ids)}) except RuntimeError: return statistics for asd_id, stats in raw_statistics.iteritems(): if stats['success'] is True: statistics[asd_id] = stats['result'] return statistics
def _usages(self): """ Returns an overview of free space, total space and used space """ # Collect total usage usages = {'free': 0.0, 'size': 0.0, 'used': 0.0} if self.abm_cluster is None: return usages config = Configuration.get_configuration_path( self.abm_cluster.config_location) try: osds_stats = AlbaCLI.run(command='list-osds', config=config) except AlbaError: self._logger.exception('Unable to fetch OSD information') return usages for osd_stats in osds_stats: usages['size'] += osd_stats['total'] usages['used'] += osd_stats['used'] usages['free'] = usages['size'] - usages['used'] return usages
def add_preset(alba_backend_guid, name, compression, policies, encryption, fragment_size=None): """ Adds a preset to Alba :param alba_backend_guid: Guid of the ALBA backend :type alba_backend_guid: str :param name: Name of the preset :type name: str :param compression: Compression type for the preset (none | snappy | bz2) :type compression: str :param policies: Policies for the preset :type policies: list :param encryption: Encryption for the preset (none | aes-cbc-256 | aes-ctr-256) :type encryption: str :param fragment_size: Size of a fragment in bytes (e.g. 1048576) :type fragment_size: int :return: None """ # VALIDATIONS if not re.match(Toolbox.regex_preset, name): raise ValueError('Invalid preset name specified') compression_options = ['snappy', 'bz2', 'none'] if compression not in compression_options: raise ValueError( 'Invalid compression format specified, please choose from: "{0}"' .format('", "'.join(compression_options))) encryption_options = ['aes-cbc-256', 'aes-ctr-256', 'none'] if encryption not in encryption_options: raise ValueError( 'Invalid encryption format specified, please choose from: "{0}"' .format('", "'.join(encryption_options))) if fragment_size is not None and (not isinstance(fragment_size, int) or not 16 <= fragment_size <= 1024**3): raise ValueError( 'Fragment size should be a positive integer smaller than 1 GiB' ) AlbaPresetController._validate_policies_param(policies=policies) alba_backend = AlbaBackend(alba_backend_guid) if name in [preset['name'] for preset in alba_backend.presets]: raise RuntimeError( 'Preset with name {0} already exists'.format(name)) # ADD PRESET preset = { 'compression': compression, 'object_checksum': { 'default': ['crc-32c'], 'verify_upload': True, 'allowed': [['none'], ['sha-1'], ['crc-32c']] }, 'osds': ['all'], 'fragment_size': 16 * 1024**2 if fragment_size is None else int(fragment_size), 'policies': policies, 'fragment_checksum': ['crc-32c'], 'fragment_encryption': ['none'], 'in_use': False, 'name': name } # Generate encryption key temp_key_file = None if encryption != 'none': encryption_key = ''.join( random.choice(chr(random.randint(32, 126))) for _ in range(32)) temp_key_file = tempfile.mktemp() with open(temp_key_file, 'wb') as temp_file: temp_file.write(encryption_key) temp_file.flush() preset['fragment_encryption'] = [ '{0}'.format(encryption), '{0}'.format(temp_key_file) ] # Dump preset content on filesystem config = Configuration.get_configuration_path( alba_backend.abm_cluster.config_location) temp_config_file = tempfile.mktemp() with open(temp_config_file, 'wb') as data_file: data_file.write(json.dumps(preset)) data_file.flush() # Create preset AlbaPresetController._logger.debug( 'Adding preset {0} with compression {1} and policies {2}'.format( name, compression, policies)) AlbaCLI.run(command='create-preset', config=config, named_params={'input-url': temp_config_file}, extra_params=[name]) # Cleanup alba_backend.invalidate_dynamics() for filename in [temp_key_file, temp_config_file]: if filename and os.path.exists(filename) and os.path.isfile( filename): os.remove(filename)
def _ns_data(self): """ Loads namespace data """ config = 'etcd://127.0.0.1:2379/ovs/arakoon/{0}-abm/config'.format(self.backend.name) return AlbaCLI.run('show-namespaces', config=config, extra_params=['--max=-1'], as_json=True)[1]
def _all_disks(self): """ Returns a live list of all disks known to this AlbaBackend """ from ovs.dal.lists.albanodelist import AlbaNodeList from ovs.dal.lists.albabackendlist import AlbaBackendList alba_backend_map = {} for a_backend in AlbaBackendList.get_albabackends(): alba_backend_map[a_backend.alba_id] = a_backend node_disk_map = {} alba_nodes = AlbaNodeList.get_albanodes() for node in alba_nodes: node_disk_map[node.node_id] = [] # Load OSDs config = 'etcd://127.0.0.1:2379/ovs/arakoon/{0}-abm/config'.format(self.backend.name) for found_osd in AlbaCLI.run('list-all-osds', config=config, as_json=True): node_id = found_osd['node_id'] if node_id in node_disk_map: node_disk_map[node_id].append({'osd': found_osd}) # Load all_disk information def _load_disks(_node, _list): for _disk in _node.all_disks: found = False for container in _list: if 'osd' in container and container['osd']['long_id'] == _disk.get('asd_id'): container['disk'] = _disk found = True break if found is False: _list.append({'disk': _disk}) threads = [] for node in alba_nodes: thread = Thread(target=_load_disks, args=(node, node_disk_map[node.node_id])) thread.start() threads.append(thread) for thread in threads: thread.join() # Make mapping between node IDs and the relevant OSDs and disks def _process_disk(_info, _disks, _node): disk = _info.get('disk') if disk is None: return disk_status = 'uninitialized' disk_status_detail = '' disk_alba_backend_guid = '' if disk['available'] is False: osd = _info.get('osd') disk_alba_state = disk['state']['state'] if disk_alba_state == 'ok': if osd is None: disk_status = 'initialized' elif osd['id'] is None: alba_id = osd['alba_id'] if alba_id is None: disk_status = 'available' else: disk_status = 'unavailable' alba_backend = alba_backend_map.get(alba_id) if alba_backend is not None: disk_alba_backend_guid = alba_backend.guid else: disk_status = 'error' disk_status_detail = 'communicationerror' disk_alba_backend_guid = self.guid for asd in _node.asds: if asd.asd_id == disk['asd_id'] and asd.statistics != {}: disk_status = 'warning' disk_status_detail = 'recenterrors' read = osd['read'] or [0] write = osd['write'] or [0] errors = osd['errors'] global_interval_key = '/ovs/alba/backends/global_gui_error_interval' backend_interval_key = '/ovs/alba/backends/{0}/gui_error_interval'.format(self.guid) interval = EtcdConfiguration.get(global_interval_key) if EtcdConfiguration.exists(backend_interval_key): interval = EtcdConfiguration.get(backend_interval_key) if len(errors) == 0 or (len(read + write) > 0 and max(min(read), min(write)) > max(error[0] for error in errors) + interval): disk_status = 'claimed' disk_status_detail = '' elif disk_alba_state == 'decommissioned': disk_status = 'unavailable' disk_status_detail = 'decommissioned' else: disk_status = 'error' disk_status_detail = disk['state']['detail'] alba_backend = alba_backend_map.get(osd.get('alba_id')) if alba_backend is not None: disk_alba_backend_guid = alba_backend.guid disk['status'] = disk_status disk['status_detail'] = disk_status_detail disk['alba_backend_guid'] = disk_alba_backend_guid _disks.append(disk) def _worker(_queue, _disks): while True: try: item = _queue.get(False) _process_disk(item['info'], _disks, item['node']) except Empty: return queue = Queue() for node in alba_nodes: for info in node_disk_map[node.node_id]: queue.put({'info': info, 'node': node}) disks = [] threads = [] for i in range(5): thread = Thread(target=_worker, args=(queue, disks)) thread.start() threads.append(thread) for thread in threads: thread.join() return disks
def test_alba_cli(self): output = AlbaCLI.run('list-namespaces', config='/opt/OpenvStorage/config/arakoon/alba-abm/alba-abm.cfg', as_json=True, debug=True, client=self._get_SSHClient(self._get_my_ip(), 'root')) logging.log(1, "alba_cli output: {0}".format(output))
def migrate(): """ Executes async migrations. It doesn't matter too much when they are executed, as long as they get eventually executed. This code will typically contain: * "dangerous" migration code (it needs certain running services) * Migration code depending on a cluster-wide state * ... """ AlbaMigrationController._logger.info( 'Preparing out of band migrations...') from ovs.dal.hybrids.diskpartition import DiskPartition from ovs.dal.lists.albabackendlist import AlbaBackendList from ovs.dal.lists.albanodelist import AlbaNodeList from ovs.dal.lists.albaosdlist import AlbaOSDList from ovs.dal.lists.storagerouterlist import StorageRouterList from ovs.extensions.generic.configuration import Configuration from ovs.extensions.generic.sshclient import SSHClient, UnableToConnectException from ovs.extensions.migration.migration.albamigrator import ExtensionMigrator from ovs.extensions.packages.albapackagefactory import PackageFactory from ovs.extensions.services.albaservicefactory import ServiceFactory from ovs.extensions.plugins.albacli import AlbaCLI, AlbaError from ovs.lib.alba import AlbaController from ovs.lib.disk import DiskController AlbaMigrationController._logger.info('Start out of band migrations...') ############################################# # Introduction of IP:port combination on OSDs osd_info_map = {} alba_backends = AlbaBackendList.get_albabackends() for alba_backend in alba_backends: AlbaMigrationController._logger.info( 'Verifying ALBA Backend {0}'.format(alba_backend.name)) if alba_backend.abm_cluster is None: AlbaMigrationController._logger.warning( 'ALBA Backend {0} does not have an ABM cluster registered'. format(alba_backend.name)) continue AlbaMigrationController._logger.debug( 'Retrieving configuration path for ALBA Backend {0}'.format( alba_backend.name)) try: config = Configuration.get_configuration_path( alba_backend.abm_cluster.config_location) except: AlbaMigrationController._logger.exception( 'Failed to retrieve the configuration path for ALBA Backend {0}' .format(alba_backend.name)) continue AlbaMigrationController._logger.info( 'Retrieving OSD information for ALBA Backend {0}'.format( alba_backend.name)) try: osd_info = AlbaCLI.run(command='list-all-osds', config=config) except (AlbaError, RuntimeError): AlbaMigrationController._logger.exception( 'Failed to retrieve OSD information for ALBA Backend {0}'. format(alba_backend.name)) continue for osd_info in osd_info: if osd_info.get('long_id'): osd_info_map[osd_info['long_id']] = { 'ips': osd_info.get('ips', []), 'port': osd_info.get('port') } for osd in AlbaOSDList.get_albaosds(): if osd.osd_id not in osd_info_map: AlbaMigrationController._logger.warning( 'OSD with ID {0} is modelled but could not be found through ALBA' .format(osd.osd_id)) continue ips = osd_info_map[osd.osd_id]['ips'] port = osd_info_map[osd.osd_id]['port'] changes = False if osd.ips is None: changes = True osd.ips = ips if osd.port is None: changes = True osd.port = port if changes is True: AlbaMigrationController._logger.info( 'Updating OSD with ID {0} with IPS {1} and port {2}'. format(osd.osd_id, ips, port)) osd.save() ################################################### # Read preference for GLOBAL ALBA Backends (1.10.3) (https://github.com/openvstorage/framework-alba-plugin/issues/452) if Configuration.get(key='/ovs/framework/migration|read_preference', default=False) is False: try: name_backend_map = dict((alba_backend.name, alba_backend) for alba_backend in alba_backends) for alba_node in AlbaNodeList.get_albanodes(): AlbaMigrationController._logger.info( 'Processing maintenance services running on ALBA Node {0} with ID {1}' .format(alba_node.ip, alba_node.node_id)) alba_node.invalidate_dynamics('maintenance_services') for alba_backend_name, services in alba_node.maintenance_services.iteritems( ): if alba_backend_name not in name_backend_map: AlbaMigrationController._logger.error( 'ALBA Node {0} has services for an ALBA Backend {1} which is not modelled' .format(alba_node.ip, alba_backend_name)) continue alba_backend = name_backend_map[alba_backend_name] AlbaMigrationController._logger.info( 'Processing {0} ALBA Backend {1} with GUID {2}'. format(alba_backend.scaling, alba_backend.name, alba_backend.guid)) if alba_backend.scaling == alba_backend.SCALINGS.LOCAL: read_preferences = [alba_node.node_id] else: read_preferences = AlbaController.get_read_preferences_for_global_backend( alba_backend=alba_backend, alba_node_id=alba_node.node_id, read_preferences=[]) for service_name, _ in services: AlbaMigrationController._logger.info( 'Processing service {0}'.format(service_name)) old_config_key = '/ovs/alba/backends/{0}/maintenance/config'.format( alba_backend.guid) new_config_key = '/ovs/alba/backends/{0}/maintenance/{1}/config'.format( alba_backend.guid, service_name) if Configuration.exists(key=old_config_key): new_config = Configuration.get( key=old_config_key) new_config[ 'read_preference'] = read_preferences Configuration.set(key=new_config_key, value=new_config) for alba_backend in alba_backends: Configuration.delete( key='/ovs/alba/backends/{0}/maintenance/config'.format( alba_backend.guid)) AlbaController.checkup_maintenance_agents.delay() Configuration.set( key='/ovs/framework/migration|read_preference', value=True) except Exception: AlbaMigrationController._logger.exception( 'Updating read preferences for ALBA Backends failed') ####################################################### # Storing actual package name in version files (1.11.0) (https://github.com/openvstorage/framework/issues/1876) changed_clients = set() storagerouters = StorageRouterList.get_storagerouters() if Configuration.get( key= '/ovs/framework/migration|actual_package_name_in_version_file_alba', default=False) is False: try: service_manager = ServiceFactory.get_manager() alba_pkg_name, alba_version_cmd = PackageFactory.get_package_and_version_cmd_for( component=PackageFactory.COMP_ALBA) for storagerouter in storagerouters: try: root_client = SSHClient( endpoint=storagerouter.ip, username='******' ) # Use '.ip' instead of StorageRouter object because this code is executed during post-update at which point the heartbeat has not been updated for some time except UnableToConnectException: AlbaMigrationController._logger.exception( 'Updating actual package name for version files failed on StorageRouter {0}' .format(storagerouter.ip)) continue for file_name in root_client.file_list( directory=ServiceFactory.RUN_FILE_DIR): if not file_name.endswith('.version'): continue file_path = '{0}/{1}'.format( ServiceFactory.RUN_FILE_DIR, file_name) contents = root_client.file_read(filename=file_path) if alba_pkg_name == PackageFactory.PKG_ALBA_EE and '{0}='.format( PackageFactory.PKG_ALBA) in contents: # Rewrite the version file in the RUN_FILE_DIR contents = contents.replace( PackageFactory.PKG_ALBA, PackageFactory.PKG_ALBA_EE) root_client.file_write(filename=file_path, contents=contents) # Regenerate the service and update the EXTRA_VERSION_CMD in the configuration management service_name = file_name.split('.')[0] service_config_key = ServiceFactory.SERVICE_CONFIG_KEY.format( storagerouter.machine_id, service_name) if Configuration.exists(key=service_config_key): service_config = Configuration.get( key=service_config_key) if 'EXTRA_VERSION_CMD' in service_config: service_config[ 'EXTRA_VERSION_CMD'] = '{0}=`{1}`'.format( alba_pkg_name, alba_version_cmd) Configuration.set(key=service_config_key, value=service_config) service_manager.regenerate_service( name='ovs-arakoon', client=root_client, target_name='ovs-{0}'.format( service_name) ) # Leave out .version changed_clients.add(root_client) Configuration.set( key= '/ovs/framework/migration|actual_package_name_in_version_file_alba', value=True) except Exception: AlbaMigrationController._logger.exception( 'Updating actual package name for version files failed') for root_client in changed_clients: try: root_client.run(['systemctl', 'daemon-reload']) except Exception: AlbaMigrationController._logger.exception( 'Executing command "systemctl daemon-reload" failed') #################################### # Fix for migration version (1.11.0) # Previous code could potentially store a higher version number in the config management than the actual version number if Configuration.get( key='/ovs/framework/migration|alba_migration_version_fix', default=False) is False: try: for storagerouter in storagerouters: config_key = '/ovs/framework/hosts/{0}/versions'.format( storagerouter.machine_id) if Configuration.exists(key=config_key): versions = Configuration.get(key=config_key) if versions.get(PackageFactory.COMP_MIGRATION_ALBA, 0) > ExtensionMigrator.THIS_VERSION: versions[ PackageFactory. COMP_MIGRATION_ALBA] = ExtensionMigrator.THIS_VERSION Configuration.set(key=config_key, value=versions) Configuration.set( key='/ovs/framework/migration|alba_migration_version_fix', value=True) except Exception: AlbaMigrationController._logger.exception( 'Updating migration version failed') #################################### # Enable auto-cleanup migration_auto_cleanup_key = '/ovs/framework/migration|alba_auto_cleanup' if Configuration.get(key=migration_auto_cleanup_key, default=False) is False: try: for storagerouter in StorageRouterList.get_storagerouters(): storagerouter.invalidate_dynamics( 'features') # New feature was added errors = [] for alba_backend in AlbaBackendList.get_albabackends(): try: AlbaController.set_auto_cleanup(alba_backend.guid) except Exception as ex: AlbaMigrationController._logger.exception( 'Failed to set the auto-cleanup for ALBA Backend {0}' .format(alba_backend.name)) errors.append(ex) if len(errors) == 0: Configuration.set(key=migration_auto_cleanup_key, value=True) except Exception: AlbaMigrationController._logger.exception( 'Updating auto cleanup failed') #################################### # Change cache eviction migration_random_eviction_key = '/ovs/framework/migration|alba_cache_eviction_random' if Configuration.get(key=migration_random_eviction_key, default=False) is False: try: errors = [] for alba_backend in AlbaBackendList.get_albabackends(): try: AlbaController.set_cache_eviction(alba_backend.guid) except Exception as ex: AlbaMigrationController._logger.exception( 'Failed to set the auto-cleanup for ALBA Backend {0}' .format(alba_backend.name)) errors.append(ex) if len(errors) == 0: Configuration.set(key=migration_random_eviction_key, value=True) except Exception: AlbaMigrationController._logger.exception( 'Updating auto cleanup failed') ################################################### # Sync all disks and apply the backend role. Backend role was removed with the AD (since 1.10) albanode_backend_role_sync_key = '/ovs/framework/migration|albanode_backend_role_sync' if not Configuration.get(key=albanode_backend_role_sync_key, default=False): try: errors = [] for alba_node in AlbaNodeList.get_albanodes(): try: if not alba_node.storagerouter: continue stack = alba_node.client.get_stack() # type: dict for slot_id, slot_information in stack.iteritems(): osds = slot_information.get('osds', {}) # type: dict slot_aliases = slot_information.get( 'aliases', []) # type: list if not osds: # No osds means no partition was made continue # Sync to add all potential partitions that will need a backend role DiskController.sync_with_reality( storagerouter_guid=alba_node.storagerouter_guid ) for disk in alba_node.storagerouter.disks: if set(disk.aliases).intersection( set(slot_aliases)): partition = disk.partitions[0] if DiskPartition.ROLES.BACKEND not in partition.roles: partition.roles.append( DiskPartition.ROLES.BACKEND) partition.save() except Exception as ex: AlbaMigrationController._logger.exception( 'Syncing for storagerouter/albanode {0} failed'. format(alba_node.storagerouter.ip)) errors.append(ex) if not errors: Configuration.set(key=albanode_backend_role_sync_key, value=True) except Exception: AlbaMigrationController._logger.exception( 'Syncing up the disks for backend roles failed') AlbaMigrationController._logger.info('Finished out of band migrations')
def _stack(self): """ Returns an overview of this node's storage stack """ from ovs.dal.hybrids.albabackend import AlbaBackend from ovs.dal.lists.albabackendlist import AlbaBackendList def _move(info): for move in [('state', 'status'), ('state_detail', 'status_detail')]: if move[0] in info: info[move[1]] = info[move[0]] del info[move[0]] stack = {} node_down = False # Fetch stack from asd-manager try: remote_stack = self.client.get_stack() for slot_id, slot_data in remote_stack.iteritems(): stack[slot_id] = {'status': 'ok'} stack[slot_id].update(slot_data) # Migrate state > status _move(stack[slot_id]) for osd_data in slot_data.get('osds', {}).itervalues(): _move(osd_data) except (requests.ConnectionError, requests.Timeout, InvalidCredentialsError): self._logger.warning( 'Error during stack retrieval. Assuming that the node is down') node_down = True model_osds = {} found_osds = {} # Apply own model to fetched stack for osd in self.osds: model_osds[osd.osd_id] = osd # Initially set the info if osd.slot_id not in stack: stack[osd.slot_id] = { 'status': self.OSD_STATUSES.UNKNOWN if node_down is True else self.OSD_STATUSES.MISSING, 'status_detail': self.OSD_STATUS_DETAILS.NODEDOWN if node_down is True else '', 'osds': {} } osd_data = stack[osd.slot_id]['osds'].get(osd.osd_id, {}) stack[osd.slot_id]['osds'][ osd.osd_id] = osd_data # Initially set the info in the stack osd_data.update(osd.stack_info) if node_down is True: osd_data['status'] = self.OSD_STATUSES.UNKNOWN osd_data['status_detail'] = self.OSD_STATUS_DETAILS.NODEDOWN elif osd.alba_backend_guid is not None: # Osds has been claimed # Load information from alba if osd.alba_backend_guid not in found_osds: found_osds[osd.alba_backend_guid] = {} if osd.alba_backend.abm_cluster is not None: config = Configuration.get_configuration_path( osd.alba_backend.abm_cluster.config_location) try: for found_osd in AlbaCLI.run( command='list-all-osds', config=config): found_osds[osd.alba_backend_guid][ found_osd['long_id']] = found_osd except (AlbaError, RuntimeError): self._logger.exception( 'Listing all osds has failed') osd_data['status'] = self.OSD_STATUSES.UNKNOWN osd_data[ 'status_detail'] = self.OSD_STATUS_DETAILS.ALBAERROR continue if osd.osd_id not in found_osds[osd.alba_backend_guid]: # Not claimed by any backend thus not in use continue found_osd = found_osds[osd.alba_backend_guid][osd.osd_id] if found_osd['decommissioned'] is True: osd_data['status'] = self.OSD_STATUSES.UNAVAILABLE osd_data[ 'status_detail'] = self.OSD_STATUS_DETAILS.DECOMMISSIONED continue backend_interval_key = '/ovs/alba/backends/{0}/gui_error_interval'.format( osd.alba_backend_guid) if Configuration.exists(backend_interval_key): interval = Configuration.get(backend_interval_key) else: interval = Configuration.get( '/ovs/alba/backends/global_gui_error_interval') read = found_osd['read'] or [0] write = found_osd['write'] or [0] errors = found_osd['errors'] osd_data['status'] = self.OSD_STATUSES.WARNING osd_data['status_detail'] = self.OSD_STATUS_DETAILS.ERROR if len(errors) == 0 or (len(read + write) > 0 and max(min(read), min(write)) > max(error[0] for error in errors) + interval): osd_data['status'] = self.OSD_STATUSES.OK osd_data['status_detail'] = '' statistics = {} for slot_info in stack.itervalues(): for osd_id, osd in slot_info['osds'].iteritems(): if osd.get( 'status_detail') == self.OSD_STATUS_DETAILS.ACTIVATING: osd['claimed_by'] = 'unknown' # We won't be able to connect to it just yet continue if osd_id not in model_osds: # The osd is known by the remote node but not in the model # In that case, let's connect to the OSD to see whether we get some info from it try: ips = osd['hosts'] if 'hosts' in osd and len( osd['hosts']) > 0 else osd.get('ips', []) port = osd['port'] claimed_by = 'unknown' for ip in ips: try: # Output will be None if it is not claimed claimed_by = AlbaCLI.run('get-osd-claimed-by', named_params={ 'host': ip, 'port': port }) break except (AlbaError, RuntimeError): self._logger.warning( 'get-osd-claimed-by failed for IP:port {0}:{1}' .format(ip, port)) alba_backend = AlbaBackendList.get_by_alba_id( claimed_by) osd['claimed_by'] = alba_backend.guid if alba_backend is not None else claimed_by except KeyError: osd['claimed_by'] = 'unknown' except: self._logger.exception( 'Could not load OSD info: {0}'.format(osd_id)) osd['claimed_by'] = 'unknown' if osd.get('status') not in ['error', 'warning']: osd['status'] = self.OSD_STATUSES.ERROR osd['status_detail'] = self.OSD_STATUS_DETAILS.UNREACHABLE claimed_by = osd.get('claimed_by', 'unknown') if claimed_by == 'unknown': continue try: alba_backend = AlbaBackend(claimed_by) except ObjectNotFoundException: continue # Add usage information if alba_backend not in statistics: statistics[alba_backend] = alba_backend.osd_statistics osd_statistics = statistics[alba_backend] if osd_id not in osd_statistics: continue stats = osd_statistics[osd_id] osd['usage'] = { 'size': int(stats['capacity']), 'used': int(stats['disk_usage']), 'available': int(stats['capacity'] - stats['disk_usage']) } return stack
def _storage_stack(self): """ Returns a live list of all disks known to this AlbaBackend """ from ovs.dal.lists.albanodelist import AlbaNodeList from ovs.dal.lists.albabackendlist import AlbaBackendList if len(self.abm_services) == 0: return {} # No ABM services yet, so backend not fully installed yet storage_map = {} asd_map = {} alba_backend_map = {} for alba_backend in AlbaBackendList.get_albabackends(): alba_backend_map[alba_backend.alba_id] = alba_backend # Load information based on the model alba_nodes = AlbaNodeList.get_albanodes() for node in alba_nodes: node_id = node.node_id storage_map[node_id] = {} for disk in node.disks: disk_id = disk.name storage_map[node_id][disk_id] = {'name': disk_id, 'guid': disk.guid, 'status': 'error', 'status_detail': 'unknown', 'asds': {}} for asd in disk.asds: asd_id = asd.asd_id data = {'asd_id': asd_id, 'guid': asd.guid, 'status': 'error', 'status_detail': 'unknown', 'alba_backend_guid': asd.alba_backend_guid} asd_map[asd_id] = data storage_map[node_id][disk_id]['asds'][asd_id] = data # Load information from node def _load_live_info(_node, _node_data): # Live disk information try: disk_data = _node.client.get_disks() except (requests.ConnectionError, requests.Timeout): for entry in _node_data.values(): entry['status_detail'] = 'nodedown' disk_data = {} for _disk_id, disk_info in disk_data.iteritems(): if _disk_id in _node_data: entry = _node_data[_disk_id] else: entry = {'name': _disk_id, 'status': 'unknown', 'status_detail': '', 'asds': {}} _node_data[_disk_id] = entry entry.update(disk_info) if disk_info['state'] == 'ok': entry['status'] = 'uninitialized' if disk_info['available'] is True else 'initialized' entry['status_detail'] = '' else: entry['status'] = disk_info['state'] entry['status_detail'] = disk_info.get('state_detail', '') # Live ASD information try: _asd_data = _node.client.get_asds() except (requests.ConnectionError, requests.Timeout): for disk_entry in _node_data.values(): for entry in disk_entry['asds'].values(): entry['status_detail'] = 'nodedown' _asd_data = {} for _disk_id, asds in _asd_data.iteritems(): if _disk_id not in _node_data: continue for _asd_id, asd_info in asds.iteritems(): entry = {'asd_id': _asd_id, 'status': 'error' if asd_info['state'] == 'error' else 'initialized', 'status_detail': asd_info.get('state_detail', ''), 'state': asd_info['state'], 'state_detail': asd_info.get('state_detail', '')} if _asd_id not in _node_data[_disk_id]['asds']: _node_data[_disk_id]['asds'][_asd_id] = entry asd_map[_asd_id] = entry else: _node_data[_disk_id]['asds'][_asd_id].update(entry) threads = [] for node in alba_nodes: thread = Thread(target=_load_live_info, args=(node, storage_map[node.node_id])) thread.start() threads.append(thread) for thread in threads: thread.join() # Mix in usage information for asd_id, stats in self.asd_statistics.iteritems(): if asd_id in asd_map: asd_map[asd_id]['usage'] = {'size': int(stats['capacity']), 'used': int(stats['disk_usage']), 'available': int(stats['capacity'] - stats['disk_usage'])} # Load information from alba backend_interval_key = '/ovs/alba/backends/{0}/gui_error_interval'.format(self.guid) if EtcdConfiguration.exists(backend_interval_key): interval = EtcdConfiguration.get(backend_interval_key) else: interval = EtcdConfiguration.get('/ovs/alba/backends/global_gui_error_interval') config = 'etcd://127.0.0.1:2379/ovs/arakoon/{0}/config'.format(self.abm_services[0].service.name) for found_osd in AlbaCLI.run('list-all-osds', config=config, as_json=True): node_id = found_osd['node_id'] asd_id = found_osd['long_id'] for _disk in storage_map.get(node_id, {}).values(): asd_data = _disk['asds'].get(asd_id, {}) if 'state' not in asd_data: continue if found_osd.get('decommissioned') is True: asd_data['status'] = 'unavailable' asd_data['status_detail'] = 'decommissioned' continue state = asd_data['state'] if state == 'ok': if found_osd['id'] is None: alba_id = found_osd['alba_id'] if alba_id is None: asd_data['status'] = 'available' else: asd_data['status'] = 'unavailable' alba_backend = alba_backend_map.get(alba_id) if alba_backend is not None: asd_data['alba_backend_guid'] = alba_backend.guid else: asd_data['alba_backend_guid'] = self.guid asd_data['status'] = 'warning' asd_data['status_detail'] = 'recenterrors' read = found_osd['read'] or [0] write = found_osd['write'] or [0] errors = found_osd['errors'] if len(errors) == 0 or (len(read + write) > 0 and max(min(read), min(write)) > max(error[0] for error in errors) + interval): asd_data['status'] = 'claimed' asd_data['status_detail'] = '' else: asd_data['status'] = 'error' asd_data['status_detail'] = asd_data.get('state_detail', '') alba_backend = alba_backend_map.get(found_osd.get('alba_id')) if alba_backend is not None: asd_data['alba_backend_guid'] = alba_backend.guid return storage_map
def add_preset(alba_backend_guid, name, compression, policies, encryption, fragment_size=None): """ Adds a preset to Alba :param alba_backend_guid: Guid of the ALBA backend :type alba_backend_guid: str :param name: Name of the preset :type name: str :param compression: Compression type for the preset (none | snappy | bz2) :type compression: str :param policies: Policies for the preset :type policies: list :param encryption: Encryption for the preset (none | aes-cbc-256 | aes-ctr-256) :type encryption: str :param fragment_size: Size of a fragment in bytes (e.g. 1048576) :type fragment_size: int :return: None """ # VALIDATIONS if not re.match(Toolbox.regex_preset, name): raise ValueError('Invalid preset name specified') compression_options = ['snappy', 'bz2', 'none'] if compression not in compression_options: raise ValueError('Invalid compression format specified, please choose from: "{0}"'.format('", "'.join(compression_options))) encryption_options = ['aes-cbc-256', 'aes-ctr-256', 'none'] if encryption not in encryption_options: raise ValueError('Invalid encryption format specified, please choose from: "{0}"'.format('", "'.join(encryption_options))) if fragment_size is not None and (not isinstance(fragment_size, int) or not 16 <= fragment_size <= 1024 ** 3): raise ValueError('Fragment size should be a positive integer smaller than 1 GiB') AlbaPresetController._validate_policies_param(policies=policies) alba_backend = AlbaBackend(alba_backend_guid) if name in [preset['name'] for preset in alba_backend.presets]: raise RuntimeError('Preset with name {0} already exists'.format(name)) # ADD PRESET preset = {'compression': compression, 'object_checksum': {'default': ['crc-32c'], 'verify_upload': True, 'allowed': [['none'], ['sha-1'], ['crc-32c']]}, 'osds': ['all'], 'fragment_size': 16 * 1024 ** 2 if fragment_size is None else int(fragment_size), 'policies': policies, 'fragment_checksum': ['crc-32c'], 'fragment_encryption': ['none'], 'in_use': False, 'name': name} # Generate encryption key temp_key_file = None if encryption != 'none': encryption_key = ''.join(random.choice(chr(random.randint(32, 126))) for _ in range(32)) temp_key_file = tempfile.mktemp() with open(temp_key_file, 'wb') as temp_file: temp_file.write(encryption_key) temp_file.flush() preset['fragment_encryption'] = ['{0}'.format(encryption), '{0}'.format(temp_key_file)] # Dump preset content on filesystem config = Configuration.get_configuration_path(ArakoonInstaller.CONFIG_KEY.format(AlbaController.get_abm_cluster_name(alba_backend=alba_backend))) temp_config_file = tempfile.mktemp() with open(temp_config_file, 'wb') as data_file: data_file.write(json.dumps(preset)) data_file.flush() # Create preset AlbaPresetController._logger.debug('Adding preset {0} with compression {1} and policies {2}'.format(name, compression, policies)) AlbaCLI.run(command='create-preset', config=config, named_params={'input-url': temp_config_file}, extra_params=[name]) # Cleanup alba_backend.invalidate_dynamics() for filename in [temp_key_file, temp_config_file]: if filename and os.path.exists(filename) and os.path.isfile(filename): os.remove(filename)
def get_stats_vdisks(cls): """ Retrieve statistics about all vDisks on the system. Check the safety, storage amount on the Backend, fail-over status and others """ if cls._config is None: cls.validate_and_retrieve_config() stats = [] errors = False environment = cls._config['environment'] alba_backend_info = {} for alba_backend in AlbaBackendList.get_albabackends(): config_path = Configuration.get_configuration_path( alba_backend.abm_cluster.config_location) disk_safety = {} namespace_usage = {} # Retrieve namespace, preset and disk safety information try: preset_info = AlbaCLI.run( command='list-presets', config=config_path ) # Not using alba_backend.presets, because it takes a whole lot longer to retrieve all_namespace_info = AlbaCLI.run(command='show-namespaces', config=config_path, extra_params=['--max=-1'])[1] all_disk_safety_info = AlbaCLI.run(command='get-disk-safety', config=config_path) except Exception: errors = True cls._logger.exception( 'Retrieving information for ALBA Backend {0} failed'. format(alba_backend.name)) continue alba_backend_info[alba_backend.guid] = { 'disk_safety': disk_safety, 'namespace_usage': namespace_usage } # Parse namespace information for namespace_info in all_namespace_info: namespace_usage[namespace_info['name']] = float( namespace_info['statistics']['storage']) # Parse preset information policies = [] preset_name = None for preset in preset_info: if preset['in_use'] is not True: continue preset_name = preset['name'] policies.extend(preset['policies']) if preset_name is None: continue # Parse disk safety information total_objects = 0 max_lost_disks = 0 max_disk_safety = 0 bucket_overview = {} disk_lost_overview = {} disk_safety_overview = {} for disk_safety_info in all_disk_safety_info: safety = disk_safety_info['safety'] volume_id = disk_safety_info['namespace'] disk_safety[volume_id] = float( safety) if safety is not None else safety for bucket_safety in disk_safety_info['bucket_safety']: bucket = bucket_safety['bucket'] objects = bucket_safety['count'] remaining_safety = bucket_safety['remaining_safety'] if bucket[1] > max_lost_disks: max_lost_disks = bucket[1] if remaining_safety > max_disk_safety: max_disk_safety = remaining_safety for policy in policies: k = policy[0] == bucket[0] m = policy[1] == bucket[1] c = policy[2] <= bucket[2] x = policy[3] >= bucket[3] if k and m and c and x: if preset_name not in bucket_overview: bucket_overview[preset_name] = { 'policy': str(policy), 'presets': {} } bucket[2] -= bucket_safety['applicable_dead_osds'] if str(bucket ) not in bucket_overview[preset_name]['presets']: bucket_overview[preset_name]['presets'][str( bucket)] = { 'objects': 0, 'disk_safety': 0 } disk_lost = bucket[0] + bucket[1] - bucket[ 2] # Data fragments + parity fragments - amount of fragments to write + dead osds if disk_lost not in disk_lost_overview: disk_lost_overview[disk_lost] = 0 if remaining_safety not in disk_safety_overview: disk_safety_overview[remaining_safety] = 0 total_objects += objects disk_lost_overview[disk_lost] += objects disk_safety_overview[remaining_safety] += objects bucket_overview[preset_name]['presets'][str( bucket)]['objects'] += objects bucket_overview[preset_name]['presets'][str( bucket)]['disk_safety'] = remaining_safety # Create statistics regarding disk safety for disk_lost_number in xrange(max_lost_disks + 1): stats.append({ 'tags': { 'disk_lost': disk_lost_number, 'environment': environment, 'backend_name': alba_backend.name }, 'fields': { 'objects': disk_lost_overview.get(disk_lost_number, 0), 'total_objects': total_objects }, 'measurement': 'disk_lost' }) for disk_safety_number in xrange(max_disk_safety + 1): stats.append({ 'tags': { 'disk_safety': disk_safety_number, 'environment': environment, 'backend_name': alba_backend.name }, 'fields': { 'objects': disk_safety_overview.get(disk_safety_number, 0), 'total_objects': total_objects }, 'measurement': 'disk_safety' }) for preset_name, result in bucket_overview.iteritems(): for bucket_count, bucket_result in result['presets'].iteritems( ): stats.append({ 'tags': { 'bucket': bucket_count, 'policy': result['policy'], 'preset_name': preset_name, 'environment': environment, 'disk_safety': bucket_result['disk_safety'], 'backend_name': alba_backend.name }, 'fields': { 'objects': bucket_result['objects'], 'total_objects': total_objects }, 'measurement': 'bucket' }) # Integrate namespace and disk safety information in vPool stats for vpool in VPoolList.get_vpools(): alba_backend_guid = vpool.metadata['backend']['backend_info'][ 'alba_backend_guid'] for vdisk in vpool.vdisks: try: metrics = cls._convert_to_float_values( cls._pop_realtime_info(vdisk.statistics)) metrics['failover_mode'] = vdisk.dtl_status metrics['frontend_size'] = float(vdisk.size) metrics['failover_mode_status'] = cls._FAILOVER_MAP.get( vdisk.dtl_status, 3) if alba_backend_guid in alba_backend_info: metrics['disk_safety'] = alba_backend_info[ alba_backend_guid]['disk_safety'].get( vdisk.volume_id) metrics['backend_stored'] = alba_backend_info[ alba_backend_guid]['namespace_usage'].get( vdisk.volume_id) stats.append({ 'tags': { 'disk_name': vdisk.name, 'volume_id': vdisk.volume_id, 'vpool_name': vdisk.vpool.name, 'environment': environment, 'storagerouter_name': StorageRouter(vdisk.storagerouter_guid).name }, 'fields': metrics, 'measurement': 'vdisk' }) except Exception: errors = True cls._logger.exception( 'Retrieving statistics for vDisk {0} with guid {1} failed' .format(vdisk.name, vdisk.guid)) return errors, stats
def _local_stack(self): """ Returns a live list of all disks known to this AlbaBackend """ from ovs.dal.lists.albanodelist import AlbaNodeList from ovs.dal.lists.albabackendlist import AlbaBackendList if len(self.abm_services) == 0: return {} # No ABM services yet, so backend not fully installed yet alba_backend_map = {} for alba_backend in AlbaBackendList.get_albabackends(): alba_backend_map[alba_backend.alba_id] = alba_backend # Load information based on the model asd_map = {} storage_map = {} alba_nodes = AlbaNodeList.get_albanodes() for node in alba_nodes: node_id = node.node_id storage_map[node_id] = {} for disk in node.disks: disk_id = disk.aliases[0].split('/')[-1] storage_map[node_id][disk_id] = {'asds': {}, 'name': disk_id, 'guid': disk.guid, 'status': 'error', 'aliases': disk.aliases, 'status_detail': 'unknown'} for osd in disk.osds: osd_id = osd.osd_id data = {'asd_id': osd_id, 'guid': osd.guid, 'status': 'error', 'status_detail': 'unknown', 'alba_backend_guid': osd.alba_backend_guid} asd_map[osd_id] = data storage_map[node_id][disk_id]['asds'][osd_id] = data # Load information from node def _load_live_info(_node, _node_data): _data = _node.storage_stack if _data['status'] != 'ok': for disk_entry in _node_data.values(): disk_entry['status_detail'] = _data['status'] for entry in disk_entry.get('asds', {}).values(): entry['status_detail'] = _data['status'] else: for _disk_id, disk_asd_info in _data['stack'].iteritems(): if _disk_id not in _node_data: _node_data[_disk_id] = {'asds': {}} entry = _node_data[_disk_id] disk_info = copy.deepcopy(disk_asd_info) del disk_info['asds'] entry.update(disk_info) asds_info = disk_asd_info['asds'] for _asd_id, asd_info in asds_info.iteritems(): if _asd_id not in _node_data[_disk_id]['asds']: _node_data[_disk_id]['asds'][_asd_id] = asd_info else: _node_data[_disk_id]['asds'][_asd_id].update(asd_info) threads = [] for node in alba_nodes: thread = Thread(target=_load_live_info, args=(node, storage_map[node.node_id])) thread.start() threads.append(thread) for thread in threads: thread.join() # Mix in usage information for asd_id, stats in self.asd_statistics.iteritems(): if asd_id in asd_map: asd_map[asd_id]['usage'] = {'size': int(stats['capacity']), 'used': int(stats['disk_usage']), 'available': int(stats['capacity'] - stats['disk_usage'])} # Load information from alba backend_interval_key = '/ovs/alba/backends/{0}/gui_error_interval'.format(self.guid) if Configuration.exists(backend_interval_key): interval = Configuration.get(backend_interval_key) else: interval = Configuration.get('/ovs/alba/backends/global_gui_error_interval') config = Configuration.get_configuration_path('/ovs/arakoon/{0}-abm/config'.format(self.name)) asds = {} for found_osd in AlbaCLI.run(command='list-all-osds', config=config): asds[found_osd['long_id']] = found_osd for node_data in storage_map.values(): for _disk in node_data.values(): for asd_id, asd_data in _disk['asds'].iteritems(): if asd_id not in asds: continue found_osd = asds[asd_id] if 'state' not in asd_data: continue if found_osd.get('decommissioned') is True: asd_data['status'] = 'unavailable' asd_data['status_detail'] = 'decommissioned' continue state = asd_data['state'] if state == 'ok': if found_osd['id'] is None: alba_id = found_osd['alba_id'] if alba_id is None: asd_data['status'] = 'available' else: asd_data['status'] = 'unavailable' alba_backend = alba_backend_map.get(alba_id) if alba_backend is not None: asd_data['alba_backend_guid'] = alba_backend.guid else: asd_data['alba_backend_guid'] = self.guid asd_data['status'] = 'warning' asd_data['status_detail'] = 'recenterrors' read = found_osd['read'] or [0] write = found_osd['write'] or [0] errors = found_osd['errors'] if len(errors) == 0 or (len(read + write) > 0 and max(min(read), min(write)) > max(error[0] for error in errors) + interval): asd_data['status'] = 'claimed' asd_data['status_detail'] = '' else: asd_data['status'] = 'error' asd_data['status_detail'] = asd_data.get('state_detail', '') alba_backend = alba_backend_map.get(found_osd.get('alba_id')) if alba_backend is not None: asd_data['alba_backend_guid'] = alba_backend.guid return storage_map
def get_disk_safety(): """ Send disk safety for each vpool and the amount of namespaces with the lowest disk safety """ points = [] abms = [] for service in ServiceList.get_services(): if service.type.name == ServiceType.SERVICE_TYPES.ALBA_MGR: abms.append(service.name) abms = list(set(abms)) abl = AlbaBackendList.get_albabackends() for ab in abl: service_name = Service(ab.abm_services[0].service_guid).name if service_name not in abms: continue config = "etcd://127.0.0.1:2379/ovs/arakoon/{}/config".format(service_name) try: disk_safety = AlbaCLI.run('get-disk-safety', config=config, to_json=True) except Exception as ex: StatsmonkeyScheduledTaskController._logger.error('{0}: {1}'.format(service_name, ex.message)) continue presets = ab.presets used_preset = None for preset in presets: try: policies = preset['policy_metadata'] for policy in policies: if policies[policy]['is_active'] and policies[policy]['in_use']: used_preset = policy if used_preset is not None: used_preset = json.loads(used_preset.replace('(', '[').replace(')', ']')) max_disk_safety = used_preset[1] safety = { 'measurement': 'disk_safety', 'tags': { 'backend_name': ab.name, 'max_disk_safety': max_disk_safety, 'min_disk_safety': max_disk_safety }, 'fields': { 'amount_max_disk_safety': 0, 'amount_between_disk_safety': 0, 'amount_min_disk_safety': 0 } } stats = {} for disk in disk_safety: if disk['safety'] is not None: if disk['safety'] not in stats: stats[disk['safety']] = 0 stats[disk['safety']] += 1 min_disk_safety = min(stats.keys()) safety['tags']['min_disk_safety'] = min_disk_safety for stat in stats: if stat == max_disk_safety: safety['fields']['amount_max_disk_safety'] = stats[stat] elif stat == min_disk_safety: safety['fields']['amount_min_disk_safety'] = stats[stat] else: safety['fields']['amount_between_disk_safety'] += stats[stat] points.append(safety) except Exception as ex: StatsmonkeyScheduledTaskController._logger.error(ex.message) if len(points) == 0: StatsmonkeyScheduledTaskController._logger.info("No statistics found") return StatsmonkeyScheduledTaskController._send_stats(points) return points
def _presets(self): """ Returns the policies active on the node """ if self.abm_cluster is None: return [] # No ABM cluster yet, so backend not fully installed yet osds = {} if self.scaling != AlbaBackend.SCALINGS.GLOBAL: for node_id, slots in self.local_stack.iteritems(): osds[node_id] = 0 for slot_id, slot_data in slots.iteritems(): for osd_id, osd_data in slot_data['osds'].iteritems(): if osd_data['status'] in [ AlbaNode.OSD_STATUSES.OK, AlbaNode.OSD_STATUSES.WARNING ] and osd_data.get('claimed_by') == self.guid: osds[node_id] += 1 config = Configuration.get_configuration_path( self.abm_cluster.config_location) presets = AlbaCLI.run(command='list-presets', config=config) preset_dict = {} for preset in presets: preset_dict[preset['name']] = preset if 'in_use' not in preset: preset['in_use'] = True if 'is_default' not in preset: preset['is_default'] = False preset['is_available'] = False preset['policies'] = [ tuple(policy) for policy in preset['policies'] ] preset['policy_metadata'] = {} active_policy = None for policy in preset['policies']: is_available = False available_disks = 0 if self.scaling == AlbaBackend.SCALINGS.GLOBAL: available_disks += sum( self.local_summary['devices'].values()) if self.scaling == AlbaBackend.SCALINGS.LOCAL: available_disks += sum( min(osds[node], policy[3]) for node in osds) if available_disks >= policy[2]: if active_policy is None: active_policy = policy is_available = True preset['policy_metadata'][policy] = { 'is_active': False, 'in_use': False, 'is_available': is_available } preset['is_available'] |= is_available if active_policy is not None: preset['policy_metadata'][active_policy]['is_active'] = True for namespace in self.ns_data: if namespace['namespace']['state'] != 'active': continue policy_usage = namespace['statistics']['bucket_count'] preset = preset_dict[namespace['namespace']['preset_name']] for usage in policy_usage: used_policy = tuple( usage[0]) # Policy as reported to be "in use" for configured_policy in preset[ 'policies']: # All configured policies if used_policy[0] == configured_policy[0] and used_policy[ 1] == configured_policy[ 1] and used_policy[3] <= configured_policy[3]: preset['policy_metadata'][configured_policy][ 'in_use'] = True break for preset in presets: preset['policies'] = [str(policy) for policy in preset['policies']] for key in preset['policy_metadata'].keys(): preset['policy_metadata'][str( key)] = preset['policy_metadata'][key] del preset['policy_metadata'][key] return presets
def migrate(previous_version): """ Migrates from a given version to the current version. It uses 'previous_version' to be smart wherever possible, but the code should be able to migrate any version towards the expected version. When this is not possible, the code can set a minimum version and raise when it is not met. :param previous_version: The previous version from which to start the migration :type previous_version: float """ working_version = previous_version if working_version == 0: from ovs.dal.hybrids.servicetype import ServiceType # Initial version: # * Add any basic configuration or model entries # Add backends for backend_type_info in [('ALBA', 'alba')]: code = backend_type_info[1] backend_type = BackendTypeList.get_backend_type_by_code(code) if backend_type is None: backend_type = BackendType() backend_type.name = backend_type_info[0] backend_type.code = code backend_type.save() # Add service types for service_type_info in [ ServiceType.SERVICE_TYPES.NS_MGR, ServiceType.SERVICE_TYPES.ALBA_MGR, ServiceType.SERVICE_TYPES.ALBA_S3_TRANSACTION ]: service_type = ServiceType() service_type.name = service_type_info service_type.save() # From here on, all actual migration should happen to get to the expected state for THIS RELEASE elif working_version < DALMigrator.THIS_VERSION: import hashlib from ovs.dal.exceptions import ObjectNotFoundException from ovs.dal.helpers import HybridRunner, Descriptor from ovs.dal.hybrids.albaabmcluster import ABMCluster from ovs.dal.hybrids.albaosd import AlbaOSD from ovs.dal.hybrids.albansmcluster import NSMCluster from ovs.dal.hybrids.j_abmservice import ABMService from ovs.dal.hybrids.j_nsmservice import NSMService from ovs.dal.hybrids.service import Service from ovs.dal.hybrids.servicetype import ServiceType from ovs.dal.lists.albabackendlist import AlbaBackendList from ovs.dal.lists.albanodelist import AlbaNodeList from ovs.dal.lists.servicetypelist import ServiceTypeList from ovs.dal.lists.storagerouterlist import StorageRouterList from ovs.extensions.db.arakooninstaller import ArakoonClusterConfig, ArakoonInstaller from ovs.extensions.generic.configuration import Configuration, NotFoundException from ovs_extensions.generic.toolbox import ExtensionsToolbox from ovs.extensions.plugins.albacli import AlbaCLI from ovs.extensions.storage.persistentfactory import PersistentFactory # Migrate unique constraints & indexes client = PersistentFactory.get_client() hybrid_structure = HybridRunner.get_hybrids() for class_descriptor in hybrid_structure.values(): cls = Descriptor().load(class_descriptor).get_object() classname = cls.__name__.lower() unique_key = 'ovs_unique_{0}_{{0}}_'.format(classname) index_prefix = 'ovs_index_{0}|{{0}}|'.format(classname) index_key = 'ovs_index_{0}|{{0}}|{{1}}'.format(classname) uniques = [] indexes = [] # noinspection PyProtectedMember for prop in cls._properties: if prop.unique is True and len([ k for k in client.prefix( unique_key.format(prop.name)) ]) == 0: uniques.append(prop.name) if prop.indexed is True and len([ k for k in client.prefix( index_prefix.format(prop.name)) ]) == 0: indexes.append(prop.name) if len(uniques) > 0 or len(indexes) > 0: prefix = 'ovs_data_{0}_'.format(classname) for key, data in client.prefix_entries(prefix): for property_name in uniques: ukey = '{0}{1}'.format( unique_key.format(property_name), hashlib.sha1(str( data[property_name])).hexdigest()) client.set(ukey, key) for property_name in indexes: if property_name not in data: continue # This is the case when there's a new indexed property added. ikey = index_key.format( property_name, hashlib.sha1(str( data[property_name])).hexdigest()) index = list( client.get_multi([ikey], must_exist=False))[0] transaction = client.begin_transaction() if index is None: client.assert_value(ikey, None, transaction=transaction) client.set(ikey, [key], transaction=transaction) elif key not in index: client.assert_value(ikey, index[:], transaction=transaction) client.set(ikey, index + [key], transaction=transaction) client.apply_transaction(transaction) ############################################# # Introduction of ABMCluster and NSMCluster # ############################################# # Verify presence of unchanged ALBA Backends alba_backends = AlbaBackendList.get_albabackends() changes_required = False for alba_backend in alba_backends: if alba_backend.abm_cluster is None or len( alba_backend.nsm_clusters) == 0: changes_required = True break if changes_required: # Retrieve ABM and NSM clusters abm_cluster_info = [] nsm_cluster_info = [] for cluster_name in Configuration.list('/ovs/arakoon'): try: metadata = ArakoonInstaller.get_arakoon_metadata_by_cluster_name( cluster_name=cluster_name) if metadata[ 'cluster_type'] == ServiceType.ARAKOON_CLUSTER_TYPES.ABM: abm_cluster_info.append(metadata) elif metadata[ 'cluster_type'] == ServiceType.ARAKOON_CLUSTER_TYPES.NSM: nsm_cluster_info.append(metadata) except NotFoundException: continue # Retrieve NSM Arakoon cluster information cluster_arakoon_map = {} for cluster_info in abm_cluster_info + nsm_cluster_info: cluster_name = cluster_info['cluster_name'] arakoon_config = ArakoonClusterConfig( cluster_id=cluster_name) cluster_arakoon_map[ cluster_name] = arakoon_config.export_dict() storagerouter_map = dict( (storagerouter.machine_id, storagerouter) for storagerouter in StorageRouterList.get_storagerouters()) alba_backend_id_map = dict((alba_backend.alba_id, alba_backend) for alba_backend in alba_backends) for cluster_info in abm_cluster_info: internal = cluster_info['internal'] cluster_name = cluster_info['cluster_name'] config_location = Configuration.get_configuration_path( key=ArakoonClusterConfig.CONFIG_KEY.format( cluster_name)) try: alba_id = AlbaCLI.run(command='get-alba-id', config=config_location, named_params={'attempts': 3})['id'] nsm_hosts = AlbaCLI.run(command='list-nsm-hosts', config=config_location, named_params={'attempts': 3}) except RuntimeError: continue alba_backend = alba_backend_id_map.get(alba_id) if alba_backend is None: # ALBA Backend with ID not found in model continue if alba_backend.abm_cluster is not None and len( alba_backend.nsm_clusters ) > 0: # Clusters already exist continue # Create ABM Cluster if alba_backend.abm_cluster is None: abm_cluster = ABMCluster() abm_cluster.name = cluster_name abm_cluster.alba_backend = alba_backend abm_cluster.config_location = ArakoonClusterConfig.CONFIG_KEY.format( cluster_name) abm_cluster.save() else: abm_cluster = alba_backend.abm_cluster # Create ABM Services abm_arakoon_config = cluster_arakoon_map[cluster_name] abm_arakoon_config.pop('global') arakoon_nodes = abm_arakoon_config.keys() if internal is False: services_to_create = 1 else: if set(arakoon_nodes).difference( set(storagerouter_map.keys())): continue services_to_create = len(arakoon_nodes) for index in range(services_to_create): service = Service() service.name = 'arakoon-{0}-abm'.format( alba_backend.name) service.type = ServiceTypeList.get_by_name( ServiceType.SERVICE_TYPES.ALBA_MGR) if internal is True: arakoon_node_config = abm_arakoon_config[ arakoon_nodes[index]] service.ports = [ arakoon_node_config['client_port'], arakoon_node_config['messaging_port'] ] service.storagerouter = storagerouter_map[ arakoon_nodes[index]] else: service.ports = [] service.storagerouter = None service.save() abm_service = ABMService() abm_service.service = service abm_service.abm_cluster = abm_cluster abm_service.save() # Create NSM Clusters for cluster_index, nsm_host in enumerate( sorted(nsm_hosts, key=lambda host: ExtensionsToolbox. advanced_sort(host['cluster_id'], '_'))): nsm_cluster_name = nsm_host['cluster_id'] nsm_arakoon_config = cluster_arakoon_map.get( nsm_cluster_name) if nsm_arakoon_config is None: continue number = cluster_index if internal is False else int( nsm_cluster_name.split('_')[-1]) nsm_cluster = NSMCluster() nsm_cluster.name = nsm_cluster_name nsm_cluster.number = number nsm_cluster.alba_backend = alba_backend nsm_cluster.config_location = ArakoonClusterConfig.CONFIG_KEY.format( nsm_cluster_name) nsm_cluster.save() # Create NSM Services nsm_arakoon_config.pop('global') arakoon_nodes = nsm_arakoon_config.keys() if internal is False: services_to_create = 1 else: if set(arakoon_nodes).difference( set(storagerouter_map.keys())): continue services_to_create = len(arakoon_nodes) for service_index in range(services_to_create): service = Service() service.name = 'arakoon-{0}-nsm_{1}'.format( alba_backend.name, number) service.type = ServiceTypeList.get_by_name( ServiceType.SERVICE_TYPES.NS_MGR) if internal is True: arakoon_node_config = nsm_arakoon_config[ arakoon_nodes[service_index]] service.ports = [ arakoon_node_config['client_port'], arakoon_node_config['messaging_port'] ] service.storagerouter = storagerouter_map[ arakoon_nodes[service_index]] else: service.ports = [] service.storagerouter = None service.save() nsm_service = NSMService() nsm_service.service = service nsm_service.nsm_cluster = nsm_cluster nsm_service.save() # Clean up all junction services no longer linked to an ALBA Backend all_nsm_services = [ service.nsm_service for service in ServiceTypeList.get_by_name( ServiceType.SERVICE_TYPES.NS_MGR).services if service.nsm_service.nsm_cluster is None ] all_abm_services = [ service.abm_service for service in ServiceTypeList.get_by_name( ServiceType.SERVICE_TYPES.ALBA_MGR).services if service.abm_service.abm_cluster is None ] for abm_service in all_abm_services: abm_service.delete() abm_service.service.delete() for nsm_service in all_nsm_services: nsm_service.delete() nsm_service.service.delete() ################################ # Introduction of Active Drive # ################################ # Update slot_id and Alba Node relation for all OSDs client = PersistentFactory.get_client() disk_osd_map = {} for key, data in client.prefix_entries('ovs_data_albaosd_'): alba_disk_guid = data.get('alba_disk', {}).get('guid') if alba_disk_guid is not None: if alba_disk_guid not in disk_osd_map: disk_osd_map[alba_disk_guid] = [] disk_osd_map[alba_disk_guid].append( key.replace('ovs_data_albaosd_', '')) try: value = client.get(key) value.pop('alba_disk', None) client.set(key=key, value=value) except Exception: pass # We don't care if we would have any leftover AlbaDisk information in _data, but its cleaner not to alba_guid_node_map = dict( (an.guid, an) for an in AlbaNodeList.get_albanodes()) for key, data in client.prefix_entries('ovs_data_albadisk_'): alba_disk_guid = key.replace('ovs_data_albadisk_', '') alba_node_guid = data.get('alba_node', {}).get('guid') if alba_disk_guid in disk_osd_map and alba_node_guid in alba_guid_node_map and len( data.get('aliases', [])) > 0: slot_id = data['aliases'][0].split('/')[-1] for osd_guid in disk_osd_map[alba_disk_guid]: try: osd = AlbaOSD(osd_guid) except ObjectNotFoundException: continue osd.slot_id = slot_id osd.alba_node = alba_guid_node_map[alba_node_guid] osd.save() client.delete(key=key, must_exist=False) # Remove unique constraints for AlbaNode IP for key in client.prefix('ovs_unique_albanode_ip_'): client.delete(key=key, must_exist=False) # Remove relation for all Alba Disks for key in client.prefix('ovs_reverseindex_albadisk_'): client.delete(key=key, must_exist=False) # Remove the relation between AlbaNode and AlbaDisk for key in client.prefix('ovs_reverseindex_albanode_'): if '|disks|' in key: client.delete(key=key, must_exist=False) return DALMigrator.THIS_VERSION
def get_backend_stats(): """ Send backend stats for each backend to InfluxDB """ points = [] abms = [] abs = [] for service in ServiceList.get_services(): if service.type.name == ServiceType.SERVICE_TYPES.ALBA_MGR: abms.append(service.name) for ab in AlbaNodeList.get_albanodes(): abs.append(ab.node_id) abms = list(set(abms)) config = "etcd://127.0.0.1:2379/ovs/arakoon/{}/config".format(abms[0]) try: decommissioning_osds = AlbaCLI.run('list-decommissioning-osds', config=config, to_json=True) except Exception as ex: StatsmonkeyScheduledTaskController._logger.error('{0}'.format(ex.message)) return None filtered_osds = [] for ab in abs: filtered_osds += [osd for osd in decommissioning_osds if osd['node_id'] == ab] abl = AlbaBackendList.get_albabackends() for ab in abl: try: stat = { 'measurement': 'backend_stats', 'tags': { 'backend_name': ab.name }, 'fields': { 'gets': ab.statistics['multi_get']['n'], 'puts': ab.statistics['apply']['n'] } } stat_asd = { 'decommissioning': len(filtered_osds), 'decommissioned': 0, 'claimed': 0, 'warning': 0, 'failure': 0, 'error': 0 } for disks in ab.local_stack.values(): for disk in disks.values(): for asd in disk['asds'].values(): if asd['alba_backend_guid'] == ab.guid: status = asd['status'] status_detail = asd['status_detail'] if status_detail == 'decommissioned': status = status_detail if status not in stat_asd: stat_asd[status] = 0 stat_asd[status] += 1 for status in stat_asd: stat['fields'][status] = stat_asd[status] points.append(stat) except Exception as ex: StatsmonkeyScheduledTaskController._logger.error(ex.message) if len(points) == 0: StatsmonkeyScheduledTaskController._logger.info("No statistics found") return None StatsmonkeyScheduledTaskController._send_stats(points) return points