def register(node_id): """ Adds a Node with a given node_id to the model :param node_id: ID of the ALBA node :type node_id: str :return: None """ node = AlbaNodeList.get_albanode_by_node_id(node_id) if node is None: main_config = EtcdConfiguration.get('/ovs/alba/asdnodes/{0}/config/main'.format(node_id)) node = AlbaNode() node.ip = main_config['ip'] node.port = main_config['port'] node.username = main_config['username'] node.password = main_config['password'] node.storagerouter = StorageRouterList.get_by_ip(main_config['ip']) data = node.client.get_metadata() if data['_success'] is False and data['_error'] == 'Invalid credentials': raise RuntimeError('Invalid credentials') if data['node_id'] != node_id: AlbaNodeController._logger.error('Unexpected node_id: {0} vs {1}'.format(data['node_id'], node_id)) raise RuntimeError('Unexpected node identifier') node.node_id = node_id node.type = 'ASD' node.save() # increase maintenance agents count for all nodes by 1 for backend in AlbaBackendList.get_albabackends(): nr_of_agents_key = AlbaNodeController.NR_OF_AGENTS_ETCD_TEMPLATE.format(backend.guid) if EtcdConfiguration.exists(nr_of_agents_key): EtcdConfiguration.set(nr_of_agents_key, int(EtcdConfiguration.get(nr_of_agents_key) + 1)) else: EtcdConfiguration.set(nr_of_agents_key, 1) AlbaNodeController.checkup_maintenance_agents()
def migrate(master_ips=None, extra_ips=None): """ Executes all migrations. It keeps track of an internal "migration version" which is always increasing by one :param master_ips: IP addresses of the MASTER nodes :param extra_ips: IP addresses of the EXTRA nodes """ data = EtcdConfiguration.get('/ovs/framework/versions') if EtcdConfiguration.exists('/ovs/framework/versions') else {} migrators = [] path = os.path.join(os.path.dirname(__file__), 'migration') for filename in os.listdir(path): if os.path.isfile(os.path.join(path, filename)) and filename.endswith('.py'): name = filename.replace('.py', '') module = imp.load_source(name, os.path.join(path, filename)) for member in inspect.getmembers(module): if inspect.isclass(member[1]) and member[1].__module__ == name and 'object' in [base.__name__ for base in member[1].__bases__]: migrators.append((member[1].identifier, member[1].migrate)) end_version = 0 for identifier, method in migrators: base_version = data[identifier] if identifier in data else 0 version = method(base_version, master_ips, extra_ips) if version > end_version: end_version = version data[identifier] = end_version EtcdConfiguration.set('/ovs/framework/versions', data)
def teardown(): """ Teardown for Arakoon package, will be executed when all started tests in this package have ended Removal actions of possible things left over after the test-run :return: None """ autotest_config = General.get_config() backend_name = autotest_config.get('backend', 'name') backend = GeneralBackend.get_by_name(backend_name) if backend is not None: GeneralAlba.remove_alba_backend(backend.alba_backend) for storagerouter in GeneralStorageRouter.get_masters(): root_client = SSHClient(storagerouter, username='******') if GeneralService.get_service_status(name='ovs-scheduled-tasks', client=root_client) is False: GeneralService.start_service(name='ovs-scheduled-tasks', client=root_client) for location in TEST_CLEANUP: root_client.run('rm -rf {0}'.format(location)) for key in KEY_CLEANUP: if EtcdConfiguration.exists('{0}/{1}'.format(GeneralArakoon.ETCD_CONFIG_ROOT, key), raw = True): EtcdConfiguration.delete('{0}/{1}'.format(GeneralArakoon.ETCD_CONFIG_ROOT, key))
def delete_config(self): """ Deletes a configuration file """ key = ArakoonClusterConfig.ETCD_CONFIG_KEY.format(self.cluster_id) if EtcdConfiguration.exists(key, raw=True): EtcdConfiguration.delete(key, raw=True)
def load_metadata(self): """ Reads the metadata for an arakoon cluster from reality :return: None """ key = ArakoonClusterMetadata.ETCD_METADATA_KEY.format(self.cluster_id) if not EtcdConfiguration.exists(key): return metadata = EtcdConfiguration.get(key) if not isinstance(metadata, dict): raise ValueError('Metadata should be a dictionary') for key in ['in_use', 'internal', 'type']: if key not in metadata: raise ValueError('Not all required metadata keys are present for arakoon cluster {0}'.format(self.cluster_id)) value = metadata[key] if key == 'in_use': if not isinstance(value, bool): raise ValueError('"in_use" should be of type "bool"') self.in_use = value elif key == 'internal': if not isinstance(value, bool): raise ValueError('"internal" should be of type "bool"') self.internal = value else: if value not in ServiceType.ARAKOON_CLUSTER_TYPES: raise ValueError('Unsupported arakoon cluster type {0} found\nPlease choose from {1}'.format(value, ', '.join(ServiceType.ARAKOON_CLUSTER_TYPES))) self.cluster_type = value
def migrate(master_ips=None, extra_ips=None): """ Executes all migrations. It keeps track of an internal "migration version" which is always increasing by one :param master_ips: IP addresses of the MASTER nodes :param extra_ips: IP addresses of the EXTRA nodes """ data = EtcdConfiguration.get( '/ovs/framework/versions') if EtcdConfiguration.exists( '/ovs/framework/versions') else {} migrators = [] path = os.path.join(os.path.dirname(__file__), 'migration') for filename in os.listdir(path): if os.path.isfile(os.path.join( path, filename)) and filename.endswith('.py'): name = filename.replace('.py', '') module = imp.load_source(name, os.path.join(path, filename)) for member in inspect.getmembers(module): if inspect.isclass( member[1] ) and member[1].__module__ == name and 'object' in [ base.__name__ for base in member[1].__bases__ ]: migrators.append( (member[1].identifier, member[1].migrate)) end_version = 0 for identifier, method in migrators: base_version = data[identifier] if identifier in data else 0 version = method(base_version, master_ips, extra_ips) if version > end_version: end_version = version data[identifier] = end_version EtcdConfiguration.set('/ovs/framework/versions', data)
def config_files_check_test(): """ Verify some configuration files """ issues_found = '' etcd_keys = { "/ovs/framework/memcache", "/ovs/arakoon/ovsdb/config" } for key_to_check in etcd_keys: if not EtcdConfiguration.exists(key_to_check, raw = True): issues_found += "Couldn't find {0}\n".format(key_to_check) config_files = { "rabbitmq.config": "/etc/rabbitmq/rabbitmq.config", } grid_ip = General.get_config().get('main', 'grid_ip') ssh_pass = General.get_config().get('mgmtcenter', 'password') client = SSHClient(grid_ip, username='******', password=ssh_pass) for config_file_to_check in config_files.iterkeys(): if not client.file_exists(config_files[config_file_to_check]): issues_found += "Couldn't find {0}\n".format(config_file_to_check) assert issues_found == '', "Found the following issues while checking for the config files:{0}\n".format(issues_found)
def delete_etcd_config(cluster_name): """ Remove the etcd entry for arakoon cluster_name :param cluster_name: Name of the arakoon cluster :return: None """ etcd_key = GeneralArakoon.ETCD_CONFIG_KEY.format(cluster_name) if EtcdConfiguration.exists(etcd_key, raw=True): EtcdConfiguration.delete(os.path.dirname(etcd_key))
def _process_disk(_info, _disks, _node): disk = _info.get('disk') if disk is None: return disk_status = 'uninitialized' disk_status_detail = '' disk_alba_backend_guid = '' if disk['available'] is False: osd = _info.get('osd') disk_alba_state = disk['state']['state'] if disk_alba_state == 'ok': if osd is None: disk_status = 'initialized' elif osd['id'] is None: alba_id = osd['alba_id'] if alba_id is None: disk_status = 'available' else: disk_status = 'unavailable' alba_backend = alba_backend_map.get(alba_id) if alba_backend is not None: disk_alba_backend_guid = alba_backend.guid else: disk_status = 'error' disk_status_detail = 'communicationerror' disk_alba_backend_guid = self.guid for asd in _node.asds: if asd.asd_id == disk['asd_id'] and asd.statistics != {}: disk_status = 'warning' disk_status_detail = 'recenterrors' read = osd['read'] or [0] write = osd['write'] or [0] errors = osd['errors'] global_interval_key = '/ovs/alba/backends/global_gui_error_interval' backend_interval_key = '/ovs/alba/backends/{0}/gui_error_interval'.format(self.guid) interval = EtcdConfiguration.get(global_interval_key) if EtcdConfiguration.exists(backend_interval_key): interval = EtcdConfiguration.get(backend_interval_key) if len(errors) == 0 or (len(read + write) > 0 and max(min(read), min(write)) > max(error[0] for error in errors) + interval): disk_status = 'claimed' disk_status_detail = '' elif disk_alba_state == 'decommissioned': disk_status = 'unavailable' disk_status_detail = 'decommissioned' else: disk_status = 'error' disk_status_detail = disk['state']['detail'] alba_backend = alba_backend_map.get(osd.get('alba_id')) if alba_backend is not None: disk_alba_backend_guid = alba_backend.guid disk['status'] = disk_status disk['status_detail'] = disk_status_detail disk['alba_backend_guid'] = disk_alba_backend_guid _disks.append(disk)
def load(self): """ Loads the configuration from a given file, optionally a remote one :param client: If provided, load remote configuration """ contents = '{}' if EtcdConfiguration.exists(self.path): contents = EtcdConfiguration.get(self.path, raw=True) self.is_new = False else: logger.debug('Could not find config {0}, a new one will be created'.format(self.path)) self.dirty_entries = [] self.configuration = json.loads(contents)
def load(self): """ Loads the configuration from a given file, optionally a remote one """ self.configuration = {} if EtcdConfiguration.dir_exists(self.path.format('')): self.is_new = False for key in self.params[self.config_type]: if EtcdConfiguration.exists(self.path.format(key)): self.configuration[key] = json.loads(EtcdConfiguration.get(self.path.format(key), raw=True)) else: self._logger.debug('Could not find config {0}, a new one will be created'.format(self.path.format(''))) self.dirty_entries = []
def get_config(cluster_name): """ Retrieve the configuration for given cluster :param cluster_name: Name of the cluster :return: RawConfigParser object """ etcd_key = GeneralArakoon.ETCD_CONFIG_KEY.format(cluster_name) if not EtcdConfiguration.exists(etcd_key, raw=True): raise ValueError('Unknown arakoon cluster_name {0} provided'.format(cluster_name)) voldrv_config = EtcdConfiguration.get(etcd_key, raw=True) parser = RawConfigParser() parser.readfp(StringIO(voldrv_config)) return parser
def load(self): """ Loads the configuration from a given file, optionally a remote one """ self.configuration = {} if EtcdConfiguration.dir_exists(self.path.format('')): self.is_new = False for key in self.params[self.config_type]: if EtcdConfiguration.exists(self.path.format(key)): self.configuration[key] = json.loads( EtcdConfiguration.get(self.path.format(key), raw=True)) else: self._logger.debug( 'Could not find config {0}, a new one will be created'.format( self.path.format(''))) self.dirty_entries = []
def get_path(binary_name): """ Retrieve the absolute path for binary :param binary_name: Binary to get path for :return: Path """ machine_id = System.get_my_machine_id() config_location = '/ovs/framework/hosts/{0}/paths|{1}'.format(machine_id, binary_name) if not EtcdConfiguration.exists(config_location): try: path = check_output('which {0}'.format(binary_name), shell=True).strip() EtcdConfiguration.set(config_location, path) except CalledProcessError: return None else: path = EtcdConfiguration.get(config_location) return path
def _create_unit(fleet_name, template_file): from ovs.extensions.db.etcd.configuration import EtcdConfiguration start = time.time() while time.time() - start < 60: try: unit = FLEET_CLIENT.create_unit(fleet_name, fleet.Unit(from_string=template_file)) return unit except fleet.APIError as ae: if ae.code == 500: FleetCtl._logger.warning('API Error in fleet, most likely caused by etcd, retrying. {0}'.format(ae)) key = '/_coreos.com/fleet/job/{0}/object'.format(fleet_name) if EtcdConfiguration.exists(key): EtcdConfiguration.delete(key) time.sleep(1) else: raise raise RuntimeError('Failed to create ')
def get_arakoon_metadata_by_cluster_name(cluster_name): """ Retrieve arakoon cluster information based on its name :param cluster_name: Name of the arakoon cluster :type cluster_name: str :return: Arakoon cluster metadata information :rtype: ArakoonClusterMetadata """ if not EtcdConfiguration.exists('/ovs/arakoon', raw=True): raise ValueError('Etcd key "/ovs/arakoon" not found') for cluster in EtcdConfiguration.list('/ovs/arakoon'): if cluster == cluster_name: arakoon_metadata = ArakoonClusterMetadata(cluster_id=cluster_name) arakoon_metadata.load_metadata() return arakoon_metadata raise ValueError('No arakoon cluster found with name "{0}"'.format(cluster_name))
def migrate(master_ips=None, extra_ips=None): """ Executes all migrations. It keeps track of an internal "migration version" which is always increasing by one :param master_ips: IP addresses of the MASTER nodes :param extra_ips: IP addresses of the EXTRA nodes """ machine_id = System.get_my_machine_id() key = '/ovs/framework/hosts/{0}/versions'.format(machine_id) try: data = EtcdConfiguration.get(key) if EtcdConfiguration.exists( key) else {} except EtcdConnectionFailed: import json # Most likely 2.6 to 2.7 migration data = {} filename = '/opt/OpenvStorage/config/ovs.json' if os.path.exists(filename): with open(filename) as config_file: data = json.load(config_file).get('core', {}).get('versions', {}) migrators = [] path = '/'.join([os.path.dirname(__file__), 'migration']) for filename in os.listdir(path): if os.path.isfile('/'.join([path, filename ])) and filename.endswith('.py'): name = filename.replace('.py', '') module = imp.load_source(name, '/'.join([path, filename])) for member in inspect.getmembers(module): if inspect.isclass( member[1] ) and member[1].__module__ == name and 'object' in [ base.__name__ for base in member[1].__bases__ ]: migrators.append( (member[1].identifier, member[1].migrate)) end_version = 0 for identifier, method in migrators: base_version = data[identifier] if identifier in data else 0 version = method(base_version, master_ips, extra_ips) if version > end_version: end_version = version data[identifier] = end_version EtcdConfiguration.set(key, data)
def get_path(binary_name): """ Retrieve the absolute path for binary :param binary_name: Binary to get path for :return: Path """ machine_id = System.get_my_machine_id() config_location = '/ovs/framework/hosts/{0}/paths|{1}'.format( machine_id, binary_name) if not EtcdConfiguration.exists(config_location): try: path = check_output('which {0}'.format(binary_name), shell=True).strip() EtcdConfiguration.set(config_location, path) except CalledProcessError: return None else: path = EtcdConfiguration.get(config_location) return path
def verify_namespaces(): """ Verify namespaces for all backends """ logger.info('verify namespace task scheduling started') job_factor = 10 job_factor_key = '/ovs/alba/backends/job_factor' if EtcdConfiguration.exists(job_factor_key): job_factor = EtcdConfiguration.get(job_factor_key) else: EtcdConfiguration.set(job_factor_key, job_factor) for albabackend in AlbaBackendList.get_albabackends(): config = 'etcd://127.0.0.1:2379/ovs/arakoon/{0}-abm/config'.format(albabackend.backend.name) namespaces = AlbaCLI.run('list-namespaces', config=config, as_json=True) for namespace in namespaces: logger.info('verifying namespace: {0} scheduled ...'.format(namespace['name'])) AlbaCLI.run('verify-namespace {0} --factor={1}'.format(namespace['name'], job_factor)) logger.info('verify namespace task scheduling finished')
def _create_unit(fleet_name, template_file): from ovs.extensions.db.etcd.configuration import EtcdConfiguration start = time.time() while time.time() - start < 60: try: unit = FLEET_CLIENT.create_unit( fleet_name, fleet.Unit(from_string=template_file)) return unit except fleet.APIError as ae: if ae.code == 500: FleetCtl._logger.warning( 'API Error in fleet, most likely caused by etcd, retrying. {0}' .format(ae)) key = '/_coreos.com/fleet/job/{0}/object'.format( fleet_name) if EtcdConfiguration.exists(key): EtcdConfiguration.delete(key) time.sleep(1) else: raise raise RuntimeError('Failed to create ')
def verify_namespaces(): """ Verify namespaces for all backends """ AlbaScheduledTaskController._logger.info('verify namespace task scheduling started') verification_factor = 10 verification_factor_key = '/ovs/alba/backends/verification_factor' if EtcdConfiguration.exists(verification_factor_key): verification_factor = EtcdConfiguration.get(verification_factor_key) else: EtcdConfiguration.set(verification_factor_key, verification_factor) for albabackend in AlbaBackendList.get_albabackends(): backend_name = albabackend.abm_services[0].service.name if albabackend.abm_services else albabackend.name + '-abm' config = 'etcd://127.0.0.1:2379/ovs/arakoon/{0}/config'.format(backend_name) namespaces = AlbaCLI.run('list-namespaces', config=config, as_json=True) for namespace in namespaces: AlbaScheduledTaskController._logger.info('verifying namespace: {0} scheduled ...'.format(namespace['name'])) AlbaCLI.run('verify-namespace {0} --factor={1}'.format(namespace['name'], verification_factor)) AlbaScheduledTaskController._logger.info('verify namespace task scheduling finished')
def _send_stats(points): db_key = '/ops/db' if not EtcdConfiguration.exists(db_key): StatsmonkeyScheduledTaskController._logger.error('{} config not found'.format(db_key)) return db_config = EtcdConfiguration.get(db_key) transport = db_config.get('transport') host = db_config.get('host') port = db_config.get('port') password = db_config.get('password') database = db_config.get('database') if transport == 'influxdb': try: user = db_config.get('username') client = InfluxDBClient(host=host, port=port, username=user, password=password, database=database) StatsmonkeyScheduledTaskController._logger.info(points) client.write_points(points) except InfluxDBClientError as c: StatsmonkeyScheduledTaskController._logger.error(c.message) except InfluxDBServerError as s: StatsmonkeyScheduledTaskController._logger.error(s.message) except Exception as ex: StatsmonkeyScheduledTaskController._logger.error(ex.message) elif transport == 'redis': try: client = redis.Redis(host=host, port=port, password=password) StatsmonkeyScheduledTaskController._logger.info(points) client.lpush(database, points) except RedisError as ex: StatsmonkeyScheduledTaskController._logger.error(ex.message) except Exception as ex: StatsmonkeyScheduledTaskController._logger.error(ex.message) else: StatsmonkeyScheduledTaskController._logger.error("transport {0} not supported.".format(transport))
def migrate(master_ips=None, extra_ips=None): """ Executes all migrations. It keeps track of an internal "migration version" which is always increasing by one :param master_ips: IP addresses of the MASTER nodes :param extra_ips: IP addresses of the EXTRA nodes """ machine_id = System.get_my_machine_id() key = '/ovs/framework/hosts/{0}/versions'.format(machine_id) try: data = EtcdConfiguration.get(key) if EtcdConfiguration.exists(key) else {} except EtcdConnectionFailed: import json # Most likely 2.6 to 2.7 migration data = {} filename = '/opt/OpenvStorage/config/ovs.json' if os.path.exists(filename): with open(filename) as config_file: data = json.load(config_file).get('core', {}).get('versions', {}) migrators = [] path = os.path.join(os.path.dirname(__file__), 'migration') for filename in os.listdir(path): if os.path.isfile(os.path.join(path, filename)) and filename.endswith('.py'): name = filename.replace('.py', '') module = imp.load_source(name, os.path.join(path, filename)) for member in inspect.getmembers(module): if inspect.isclass(member[1]) and member[1].__module__ == name and 'object' in [base.__name__ for base in member[1].__bases__]: migrators.append((member[1].identifier, member[1].migrate)) end_version = 0 for identifier, method in migrators: base_version = data[identifier] if identifier in data else 0 version = method(base_version, master_ips, extra_ips) if version > end_version: end_version = version data[identifier] = end_version EtcdConfiguration.set(key, data)
def migrate(previous_version, master_ips=None, extra_ips=None): """ Migrates from any version to any version, running all migrations required If previous_version is for example 0 and this script is at verison 3 it will execute two steps: - 1 > 2 - 2 > 3 :param previous_version: The previous version from which to start the migration. :param master_ips: IP addresses of the MASTER nodes :param extra_ips: IP addresses of the EXTRA nodes """ logger = LogHandler.get('extensions', name='migration') working_version = previous_version # Version 1 introduced: # - Flexible SSD layout if working_version < 1: try: from ovs.extensions.generic.configuration import Configuration if Configuration.exists('ovs.arakoon'): Configuration.delete('ovs.arakoon', remove_root=True) Configuration.set('ovs.core.ovsdb', '/opt/OpenvStorage/db') except: logger.exception('Error migrating to version 1') working_version = 1 # Version 2 introduced: # - Registration if working_version < 2: try: import time from ovs.extensions.generic.configuration import Configuration if not Configuration.exists('ovs.core.registered'): Configuration.set('ovs.core.registered', False) Configuration.set('ovs.core.install_time', time.time()) except: logger.exception('Error migrating to version 2') working_version = 2 # Version 3 introduced: # - New arakoon clients if working_version < 3: try: from ovs.extensions.db.arakoon import ArakoonInstaller reload(ArakoonInstaller) from ovs.extensions.db.arakoon.ArakoonInstaller import ArakoonInstaller from ovs.extensions.generic.sshclient import SSHClient from ovs.extensions.generic.configuration import Configuration if master_ips is not None: for ip in master_ips: client = SSHClient(ip) if client.dir_exists( ArakoonInstaller.ARAKOON_CONFIG_DIR): for cluster_name in client.dir_list( ArakoonInstaller.ARAKOON_CONFIG_DIR): try: ArakoonInstaller.deploy_cluster( cluster_name, ip) except: pass if Configuration.exists('ovs.core.storage.persistent'): Configuration.set('ovs.core.storage.persistent', 'pyrakoon') except: logger.exception('Error migrating to version 3') working_version = 3 # Version 4 introduced: # - Etcd if working_version < 4: try: import os import json from ConfigParser import RawConfigParser from ovs.extensions.db.etcd import installer reload(installer) from ovs.extensions.db.etcd.installer import EtcdInstaller from ovs.extensions.db.etcd.configuration import EtcdConfiguration from ovs.extensions.generic.system import System host_id = System.get_my_machine_id() etcd_migrate = False if EtcdInstaller.has_cluster('127.0.0.1', 'config'): etcd_migrate = True else: if master_ips is not None and extra_ips is not None: cluster_ip = None for ip in master_ips + extra_ips: if EtcdInstaller.has_cluster(ip, 'config'): cluster_ip = ip break node_ip = None path = '/opt/OpenvStorage/config/ovs.json' if os.path.exists(path): with open(path) as config_file: config = json.load(config_file) node_ip = config['grid']['ip'] if node_ip is not None: if cluster_ip is None: EtcdInstaller.create_cluster('config', node_ip) EtcdConfiguration.initialize() EtcdConfiguration.initialize_host(host_id) else: EtcdInstaller.extend_cluster( cluster_ip, node_ip, 'config') EtcdConfiguration.initialize_host(host_id) etcd_migrate = True if etcd_migrate is True: # Migrating configuration files path = '/opt/OpenvStorage/config/ovs.json' if os.path.exists(path): with open(path) as config_file: config = json.load(config_file) EtcdConfiguration.set('/ovs/framework/cluster_id', config['support']['cid']) if not EtcdConfiguration.exists( '/ovs/framework/install_time'): EtcdConfiguration.set( '/ovs/framework/install_time', config['core']['install_time']) else: EtcdConfiguration.set( '/ovs/framework/install_time', min( EtcdConfiguration.get( '/ovs/framework/install_time'), config['core']['install_time'])) EtcdConfiguration.set('/ovs/framework/registered', config['core']['registered']) EtcdConfiguration.set( '/ovs/framework/plugins/installed', config['plugins']) EtcdConfiguration.set('/ovs/framework/stores', config['core']['storage']) EtcdConfiguration.set( '/ovs/framework/paths', { 'cfgdir': config['core']['cfgdir'], 'basedir': config['core']['basedir'], 'ovsdb': config['core']['ovsdb'] }) EtcdConfiguration.set( '/ovs/framework/support', { 'enablesupport': config['support']['enablesupport'], 'enabled': config['support']['enabled'], 'interval': config['support']['interval'] }) EtcdConfiguration.set( '/ovs/framework/storagedriver', { 'mds_safety': config['storagedriver']['mds']['safety'], 'mds_tlogs': config['storagedriver']['mds']['tlogs'], 'mds_maxload': config['storagedriver']['mds']['maxload'] }) EtcdConfiguration.set( '/ovs/framework/webapps', { 'html_endpoint': config['webapps']['html_endpoint'], 'oauth2': config['webapps']['oauth2'] }) EtcdConfiguration.set( '/ovs/framework/messagequeue', { 'endpoints': [], 'protocol': config['core']['broker']['protocol'], 'user': config['core']['broker']['login'], 'port': config['core']['broker']['port'], 'password': config['core']['broker']['password'], 'queues': config['core']['broker']['queues'] }) host_key = '/ovs/framework/hosts/{0}{{0}}'.format( host_id) EtcdConfiguration.set( host_key.format('/storagedriver'), { 'rsp': config['storagedriver']['rsp'], 'vmware_mode': config['storagedriver']['vmware_mode'] }) EtcdConfiguration.set(host_key.format('/ports'), config['ports']) EtcdConfiguration.set( host_key.format('/setupcompleted'), config['core']['setupcompleted']) EtcdConfiguration.set( host_key.format('/versions'), config['core'].get('versions', {})) EtcdConfiguration.set(host_key.format('/type'), config['core']['nodetype']) EtcdConfiguration.set(host_key.format('/ip'), config['grid']['ip']) path = '{0}/memcacheclient.cfg'.format( EtcdConfiguration.get('/ovs/framework/paths|cfgdir')) if os.path.exists(path): config = RawConfigParser() config.read(path) nodes = [ config.get(node.strip(), 'location').strip() for node in config.get('main', 'nodes').split(',') ] EtcdConfiguration.set( '/ovs/framework/memcache|endpoints', nodes) os.remove(path) path = '{0}/rabbitmqclient.cfg'.format( EtcdConfiguration.get('/ovs/framework/paths|cfgdir')) if os.path.exists(path): config = RawConfigParser() config.read(path) nodes = [ config.get(node.strip(), 'location').strip() for node in config.get('main', 'nodes').split(',') ] EtcdConfiguration.set( '/ovs/framework/messagequeue|endpoints', nodes) os.remove(path) # Migrate arakoon configuration files from ovs.extensions.db.arakoon import ArakoonInstaller reload(ArakoonInstaller) from ovs.extensions.db.arakoon.ArakoonInstaller import ArakoonInstaller, ArakoonClusterConfig from ovs.extensions.generic.sshclient import SSHClient if master_ips is not None: config_dir = '/opt/OpenvStorage/config/arakoon/' for ip in master_ips: client = SSHClient(ip) if client.dir_exists(config_dir): for cluster_name in client.dir_list( config_dir): try: with open('{0}/{1}/{1}.cfg'.format( config_dir, cluster_name)) as config_file: EtcdConfiguration.set( ArakoonClusterConfig. ETCD_CONFIG_KEY.format( cluster_name), config_file.read(), raw=True) ArakoonInstaller.deploy_cluster( cluster_name, ip) except: logger.exception( 'Error migrating {0} on {1}'. format(cluster_name, ip)) client.dir_delete(config_dir) except: logger.exception('Error migrating to version 4') working_version = 4 return working_version
def validate_alba_backend_sanity_without_claimed_disks(alba_backend): """ Validate whether the ALBA backend is configured correctly :param alba_backend: ALBA backend :return: None """ # Attribute validation assert alba_backend.available is True, 'ALBA backend {0} is not available'.format(alba_backend.backend.name) assert len(alba_backend.presets) >= 1, 'No preset found for ALBA backend {0}'.format(alba_backend.backend.name) assert len([default for default in alba_backend.presets if default['is_default'] is True]) == 1, 'Could not find default preset for backend {0}'.format(alba_backend.backend.name) assert alba_backend.backend.backend_type.code == 'alba', 'Backend type for ALBA backend is {0}'.format(alba_backend.backend.backend_type.code) assert alba_backend.backend.status == 'RUNNING', 'Status for ALBA backend is {0}'.format(alba_backend.backend.status) assert isinstance(alba_backend.metadata_information, dict) is True, 'ALBA backend {0} metadata information is not a dictionary'.format(alba_backend.backend.name) Toolbox.verify_required_params(actual_params=alba_backend.metadata_information, required_params={'nsm_partition_guids': (list, Toolbox.regex_guid)}, exact_match=True) # Validate ABM and NSM services storagerouters = GeneralStorageRouter.get_storage_routers() storagerouters_with_db_role = [sr for sr in storagerouters if GeneralStorageRouter.has_roles(storagerouter=sr, roles='DB') is True and sr.node_type == 'MASTER'] assert len(alba_backend.abm_services) == len(storagerouters_with_db_role), 'Not enough ABM services found' assert len(alba_backend.nsm_services) == len(storagerouters_with_db_role), 'Not enough NSM services found' # Validate ALBA backend ETCD structure alba_backend_key = '/ovs/alba/backends' assert EtcdConfiguration.exists(key=alba_backend_key, raw=True) is True, 'Etcd does not contain key {0}'.format(alba_backend_key) actual_etcd_keys = [key for key in EtcdConfiguration.list(alba_backend_key)] expected_etcd_keys = ['verification_schedule', 'global_gui_error_interval', alba_backend.guid, 'default_nsm_hosts'] optional_etcd_keys = ['verification_factor'] expected_keys_amount = 0 for optional_key in optional_etcd_keys: if optional_key in actual_etcd_keys: expected_keys_amount += 1 for expected_key in expected_etcd_keys: if not re.match(Toolbox.regex_guid, expected_key): expected_keys_amount += 1 assert expected_key in actual_etcd_keys, 'Key {0} was not found in tree {1}'.format(expected_key, alba_backend_key) for actual_key in list(actual_etcd_keys): if re.match(Toolbox.regex_guid, actual_key): actual_etcd_keys.remove(actual_key) # Remove all alba backend keys assert len(actual_etcd_keys) == expected_keys_amount, 'Another key was added to the {0} tree'.format(alba_backend_key) this_alba_backend_key = '{0}/{1}'.format(alba_backend_key, alba_backend.guid) actual_keys = [key for key in EtcdConfiguration.list(this_alba_backend_key)] expected_keys = ['maintenance'] assert actual_keys == expected_keys, 'Actual keys: {0} - Expected keys: {1}'.format(actual_keys, expected_keys) maintenance_key = '{0}/maintenance'.format(this_alba_backend_key) actual_keys = [key for key in EtcdConfiguration.list(maintenance_key)] expected_keys = ['nr_of_agents', 'config'] assert set(actual_keys) == set(expected_keys), 'Actual keys: {0} - Expected keys: {1}'.format(actual_keys, expected_keys) # @TODO: Add validation for config values # Validate ASD node ETCD structure alba_nodes = GeneralAlba.get_alba_nodes() assert len(alba_nodes) > 0, 'Could not find any ALBA nodes in the model' alba_node_key = '/ovs/alba/asdnodes' actual_keys = [key for key in EtcdConfiguration.list(alba_node_key)] assert len(alba_nodes) == len(actual_keys), 'Amount of ALBA nodes in model does not match amount of ALBA nodes in ETCD. In model: {0} - In Etcd: {1}'.format(len(alba_nodes), len(actual_keys)) for alba_node in alba_nodes: assert alba_node.node_id in actual_keys, 'ALBA node with ID {0} not present in ETCD'.format(alba_node.node_id) actual_asdnode_keys = [key for key in EtcdConfiguration.list('{0}/{1}'.format(alba_node_key, alba_node.node_id))] expected_asdnode_keys = ['config'] assert actual_asdnode_keys == expected_asdnode_keys, 'Actual keys: {0} - Expected keys: {1}'.format(actual_asdnode_keys, expected_asdnode_keys) actual_config_keys = [key for key in EtcdConfiguration.list('{0}/{1}/config'.format(alba_node_key, alba_node.node_id))] expected_config_keys = ['main', 'network'] assert set(actual_config_keys) == set(expected_config_keys), 'Actual keys: {0} - Expected keys: {1}'.format(actual_config_keys, expected_config_keys) # @TODO: Add validation for main and network values # Validate Arakoon ETCD structure arakoon_abm_key = '/ovs/arakoon/{0}/config'.format(alba_backend.abm_services[0].service.name) arakoon_nsm_key = '/ovs/arakoon/{0}/config'.format(alba_backend.nsm_services[0].service.name) assert EtcdConfiguration.exists(key=arakoon_abm_key, raw=True) is True, 'Etcd key {0} does not exists'.format(arakoon_abm_key) assert EtcdConfiguration.exists(key=arakoon_nsm_key, raw=True) is True, 'Etcd key {0} does not exists'.format(arakoon_nsm_key) # @TODO: Add validation for config values # Validate maintenance agents actual_amount_agents = len([service for node_services in [alba_node.client.list_maintenance_services() for alba_node in alba_nodes] for service in node_services]) expected_amount_agents = EtcdConfiguration.get('/ovs/alba/backends/{0}/maintenance/nr_of_agents'.format(alba_backend.guid)) assert actual_amount_agents == expected_amount_agents, 'Amount of maintenance agents is incorrect. Found {0} - Expected {1}'.format(actual_amount_agents, expected_amount_agents) # Validate arakoon services machine_ids = [sr.machine_id for sr in storagerouters_with_db_role] abm_service_name = alba_backend.abm_services[0].service.name nsm_service_name = alba_backend.nsm_services[0].service.name for storagerouter in storagerouters_with_db_role: root_client = SSHClient(endpoint=storagerouter, username='******') abm_arakoon_service_name = 'ovs-arakoon-{0}'.format(abm_service_name) nsm_arakoon_service_name = 'ovs-arakoon-{0}'.format(nsm_service_name) for service_name in [abm_arakoon_service_name, nsm_arakoon_service_name]: assert GeneralService.has_service(name=service_name, client=root_client) is True, 'Service {0} not deployed on Storage Router {1}'.format(service_name, storagerouter.name) assert GeneralService.get_service_status(name=service_name, client=root_client) is True, 'Service {0} not running on Storage Router {1}'.format(service_name, storagerouter.name) out, err, _ = General.execute_command('arakoon --who-master -config {0}'.format(GeneralArakoon.ETCD_CONFIG_PATH.format(abm_service_name))) assert out.strip() in machine_ids, 'Arakoon master is {0}, but should be 1 of "{1}"'.format(out.strip(), ', '.join(machine_ids))
def remove_asd(node_guid, asd_id, expected_safety): """ Removes an ASD :param node_guid: Guid of the node to remove a disk from :type node_guid: str :param asd_id: ASD to remove :type asd_id: str :param expected_safety: Expected safety after having removed the disk :type expected_safety: dict :return: True :rtype: bool """ node = AlbaNode(node_guid) AlbaNodeController._logger.debug('Removing ASD {0} at node {1}'.format(asd_id, node.ip)) model_asd = None for disk in node.disks: for asd in disk.asds: if asd.asd_id == asd_id: model_asd = asd break if model_asd is not None: break if model_asd is None: raise RuntimeError('Could not locate asd {0} in the model'.format(asd_id)) alba_backend = model_asd.alba_backend asds = {} try: asds = node.client.get_asds() except (requests.ConnectionError, requests.Timeout): AlbaNodeController._logger.warning('Could not connect to node {0} to validate asd'.format(node.guid)) disk_id = None for _disk_id in asds: if asd_id in asds[_disk_id]: disk_id = _disk_id break AlbaController.remove_units(alba_backend.guid, [asd_id], absorb_exception=True) if disk_id is not None: final_safety = AlbaController.calculate_safety(alba_backend.guid, [asd_id]) safety_lost = final_safety['lost'] safety_crit = final_safety['critical'] if (safety_crit != 0 or safety_lost != 0) and (safety_crit != expected_safety['critical'] or safety_lost != expected_safety['lost']): raise RuntimeError('Cannot remove ASD {0} as the current safety is not as expected ({1} vs {2})'.format(asd_id, final_safety, expected_safety)) result = node.client.delete_asd(disk_id, asd_id) if result['_success'] is False: raise RuntimeError('Error removing ASD: {0}'.format(result['_error'])) else: AlbaNodeController._logger.warning('Alba decommission osd {0} without safety validations (node down)'.format(asd_id)) if EtcdConfiguration.exists(AlbaNodeController.ASD_CONFIG.format(asd_id), raw=True): EtcdConfiguration.delete(AlbaNodeController.ASD_CONFIG_DIR.format(asd_id), raw=True) model_asd.delete() alba_backend.invalidate_dynamics() alba_backend.backend.invalidate_dynamics() if node.storagerouter is not None: DiskController.sync_with_reality(node.storagerouter_guid) return disk_id
def checkup_maintenance_agents(): """ Check if requested nr of maintenance agents / backend is actually present Add / remove as necessary :return: None """ service_template_key = 'alba-maintenance_{0}-{1}' maintenance_agents_map = {} asd_nodes = AlbaNodeList.get_albanodes() nr_of_storage_nodes = len(asd_nodes) def _get_node_load(backend_name): highest_load = 0 lowest_load = sys.maxint agent_load = {'high_load_node': asd_nodes[0] if asd_nodes else None, 'low_load_node': asd_nodes[0] if asd_nodes else None, 'total_load': 0} for asd_node in asd_nodes: actual_nr_of_agents = 0 maint_services = asd_node.client.list_maintenance_services() for service_name in maint_services: if service_template_key.format(backend_name, '') in service_name: actual_nr_of_agents += 1 if actual_nr_of_agents > highest_load: agent_load['high_load_node'] = asd_node highest_load = actual_nr_of_agents if actual_nr_of_agents < lowest_load: agent_load['low_load_node'] = asd_node lowest_load = actual_nr_of_agents agent_load['total_load'] += actual_nr_of_agents return agent_load alba_backends = AlbaBackendList.get_albabackends() for alba_backend in alba_backends: nr_of_agents_key = AlbaNodeController.NR_OF_AGENTS_ETCD_TEMPLATE.format(alba_backend.guid) name = alba_backend.backend.name if not EtcdConfiguration.exists(nr_of_agents_key): EtcdConfiguration.set(nr_of_agents_key, nr_of_storage_nodes) required_nr = EtcdConfiguration.get(nr_of_agents_key) maintenance_agents_map[name] = {'required': required_nr, 'actual': _get_node_load(name)['total_load'], 'backend': alba_backend.backend} for name, values in maintenance_agents_map.iteritems(): AlbaNodeController._logger.info('Checking backend: {0}'.format(name)) to_process = values['required'] - values['actual'] if to_process == 0: AlbaNodeController._logger.info('No action required for: {0}'.format(name)) elif to_process >= 0: AlbaNodeController._logger.info('Adding {0} maintenance agent(s) for {1}'.format(to_process, name)) for _ in xrange(to_process): unique_hash = ''.join(random.choice(string.ascii_letters + string.digits) for _ in range(16)) node = _get_node_load(name)['low_load_node'] AlbaNodeController._logger.info('Service to add: ' + service_template_key.format(name, unique_hash)) if node and node.client: node.client.add_maintenance_service(service_template_key.format(name, unique_hash), values['backend'].alba_backend.guid, AlbaController.get_abm_service_name(values['backend'])) AlbaNodeController._logger.info('Service added') else: to_process = abs(to_process) AlbaNodeController._logger.info('Removing {0} maintenance agent(s) for {1}'.format(to_process, name)) for _ in xrange(to_process): node = _get_node_load(name)['high_load_node'] services = node.client.list_maintenance_services() if services and node and node.client: for service in services: if 'alba-maintenance_' + name in service: node.client.remove_maintenance_service(service) break
def post_upgrade(client): """ Upgrade actions after the new packages have actually been installed :param client: SSHClient object :return: None """ # If we can reach Etcd with a valid config, and there's still an old config file present, delete it from ovs.extensions.db.etcd.configuration import EtcdConfiguration path = '/opt/OpenvStorage/config/ovs.json' if EtcdConfiguration.exists( '/ovs/framework/cluster_id') and client.file_exists(path): client.file_delete(path) # Migrate volumedriver & albaproxy configuration files import uuid from ovs.extensions.storageserver.storagedriver import StorageDriverConfiguration from ovs.dal.lists.storagedriverlist import StorageDriverList from ovs.extensions.generic.system import System with remote(client.ip, [StorageDriverConfiguration, os, open, json, System], username='******') as rem: configuration_dir = '{0}/storagedriver/storagedriver'.format( EtcdConfiguration.get('/ovs/framework/paths|cfgdir')) host_id = rem.System.get_my_machine_id() if rem.os.path.exists(configuration_dir): for storagedriver in StorageDriverList.get_storagedrivers_by_storagerouter( rem.System.get_my_storagerouter().guid): vpool = storagedriver.vpool if storagedriver.alba_proxy is not None: config_tree = '/ovs/vpools/{0}/proxies/{1}/config/{{0}}'.format( vpool.guid, storagedriver.alba_proxy.guid) # ABM config abm_config = '{0}/{1}_alba.cfg'.format( configuration_dir, vpool.name) if rem.os.path.exists(abm_config): with rem.open(abm_config) as config_file: EtcdConfiguration.set( config_tree.format('abm'), config_file.read(), raw=True) rem.os.remove(abm_config) # Albaproxy config alba_config = '{0}/{1}_alba.json'.format( configuration_dir, vpool.name) if rem.os.path.exists(alba_config): with rem.open(alba_config) as config_file: config = rem.json.load(config_file) del config['albamgr_cfg_file'] config[ 'albamgr_cfg_url'] = 'etcd://127.0.0.1:2379{0}'.format( config_tree.format('abm')) EtcdConfiguration.set( config_tree.format('main'), json.dumps(config, indent=4), raw=True) params = { 'VPOOL_NAME': vpool.name, 'VPOOL_GUID': vpool.guid, 'PROXY_ID': storagedriver.alba_proxy.guid } alba_proxy_service = 'ovs-albaproxy_{0}'.format( vpool.name) ServiceManager.add_service( name='ovs-albaproxy', params=params, client=client, target_name=alba_proxy_service) rem.os.remove(alba_config) # Volumedriver config current_file = '{0}/{1}.json'.format( configuration_dir, vpool.name) if rem.os.path.exists(current_file): readcache_size = 0 with rem.open(current_file) as config_file: config = rem.json.load(config_file) config['distributed_transaction_log'] = {} config['distributed_transaction_log'][ 'dtl_transport'] = config['failovercache'][ 'failovercache_transport'] config['distributed_transaction_log'][ 'dtl_path'] = config['failovercache'][ 'failovercache_path'] config['volume_manager'][ 'dtl_throttle_usecs'] = config['volume_manager'][ 'foc_throttle_usecs'] del config['failovercache'] del config['volume_manager']['foc_throttle_usecs'] sdc = rem.StorageDriverConfiguration( 'storagedriver', vpool.guid, storagedriver.storagedriver_id) sdc.configuration = config sdc.save(reload_config=False) for mountpoint in config['content_addressed_cache'][ 'clustercache_mount_points']: readcache_size += int(mountpoint['size'].replace( 'KiB', '')) params = { 'VPOOL_MOUNTPOINT': storagedriver.mountpoint, 'HYPERVISOR_TYPE': storagedriver.storagerouter.pmachine.hvtype, 'VPOOL_NAME': vpool.name, 'CONFIG_PATH': sdc.remote_path, 'UUID': str(uuid.uuid4()), 'OVS_UID': client.run('id -u ovs').strip(), 'OVS_GID': client.run('id -g ovs').strip(), 'KILL_TIMEOUT': str( int(readcache_size / 1024.0 / 1024.0 / 6.0 + 30)) } vmware_mode = EtcdConfiguration.get( '/ovs/framework/hosts/{0}/storagedriver|vmware_mode' .format(host_id)) dtl_service = 'ovs-dtl_{0}'.format(vpool.name) ServiceManager.add_service(name='ovs-dtl', params=params, client=client, target_name=dtl_service) if vpool.backend_type.code == 'alba': alba_proxy_service = 'ovs-albaproxy_{0}'.format( vpool.name) dependencies = [alba_proxy_service] else: dependencies = None if vmware_mode == 'ganesha': template_name = 'ovs-ganesha' else: template_name = 'ovs-volumedriver' voldrv_service = 'ovs-volumedriver_{0}'.format( vpool.name) ServiceManager.add_service( name=template_name, params=params, client=client, target_name=voldrv_service, additional_dependencies=dependencies) rem.os.remove(current_file) # Ganesha config, if available current_file = '{0}/{1}_ganesha.conf'.format( configuration_dir, vpool.name) if rem.os.path.exists(current_file): sdc = rem.StorageDriverConfiguration( 'storagedriver', vpool.guid, storagedriver.storagedriver_id) contents = '' for template in ['ganesha-core', 'ganesha-export']: contents += client.file_read( '/opt/OpenvStorage/config/templates/{0}.conf'. format(template)) params = { 'VPOOL_NAME': vpool.name, 'VPOOL_MOUNTPOINT': '/mnt/{0}'.format(vpool.name), 'CONFIG_PATH': sdc.remote_path, 'NFS_FILESYSTEM_ID': storagedriver.storagerouter.ip.split('.', 2)[-1] } for key, value in params.iteritems(): contents = contents.replace( '<{0}>'.format(key), value) client.file_write(current_file, contents)
def get(self, request, *args, **kwargs): """ Fetches metadata """ _ = args, kwargs data = { 'authenticated': False, 'authentication_state': None, 'authentication_metadata': {}, 'username': None, 'userguid': None, 'roles': [], 'identification': {}, 'storagerouter_ips': [sr.ip for sr in StorageRouterList.get_storagerouters()], 'versions': list(settings.VERSION), 'plugins': {} } try: # Gather plugin metadata plugins = {} # - Backends. BackendType plugins must set the has_plugin flag on True for backend_type in BackendTypeList.get_backend_types(): if backend_type.has_plugin is True: if backend_type.code not in plugins: plugins[backend_type.code] = [] plugins[backend_type.code] += ['backend', 'gui'] # - Generic plugins, as added to the configuration file(s) generic_plugins = EtcdConfiguration.get( '/ovs/framework/plugins/installed|generic') for plugin_name in generic_plugins: if plugin_name not in plugins: plugins[plugin_name] = [] plugins[plugin_name] += ['gui'] data['plugins'] = plugins # Fill identification data['identification'] = { 'cluster_id': EtcdConfiguration.get('/ovs/framework/cluster_id') } # Get authentication metadata authentication_metadata = {'ip': System.get_my_storagerouter().ip} for key in ['mode', 'authorize_uri', 'client_id', 'scope']: if EtcdConfiguration.exists( '/ovs/framework/webapps|oauth2.{0}'.format(key)): authentication_metadata[key] = EtcdConfiguration.get( '/ovs/framework/webapps|oauth2.{0}'.format(key)) data['authentication_metadata'] = authentication_metadata # Gather authorization metadata if 'HTTP_AUTHORIZATION' not in request.META: return HttpResponse, dict( data.items() + {'authentication_state': 'unauthenticated'}.items()) authorization_type, access_token = request.META[ 'HTTP_AUTHORIZATION'].split(' ') if authorization_type != 'Bearer': return HttpResponse, dict( data.items() + {'authentication_state': 'invalid_authorization_type' }.items()) tokens = BearerTokenList.get_by_access_token(access_token) if len(tokens) != 1: return HttpResponse, dict( data.items() + {'authentication_state': 'invalid_token'}.items()) token = tokens[0] if token.expiration < time.time(): for junction in token.roles.itersafe(): junction.delete() token.delete() return HttpResponse, dict( data.items() + {'authentication_state': 'token_expired'}.items()) # Gather user metadata user = token.client.user if not user.is_active: return HttpResponse, dict( data.items() + {'authentication_state': 'inactive_user'}.items()) roles = [j.role.code for j in token.roles] return HttpResponse, dict( data.items() + { 'authenticated': True, 'authentication_state': 'authenticated', 'username': user.username, 'userguid': user.guid, 'roles': roles, 'plugins': plugins }.items()) except Exception as ex: MetadataView._logger.exception( 'Unexpected exception: {0}'.format(ex)) return HttpResponse, dict( data.items() + {'authentication_state': 'unexpected_exception'}.items())
def validate_vpool_sanity(expected_settings): """ Check if all requirements are met for a healthy vPool :param expected_settings: Parameters used to create a vPool, which will be verified :type expected_settings: dict :return: None """ if not isinstance(expected_settings, dict) or len(expected_settings) == 0: raise ValueError('Cannot validate vpool when no settings are passed') generic_settings = expected_settings.values()[0] vpool_name = generic_settings['vpool_name'] mountpoint = '/mnt/{0}'.format(vpool_name) backend_type = generic_settings['type'] rdma_enabled = generic_settings['config_params']['dtl_transport'] == StorageDriverClient.FRAMEWORK_DTL_TRANSPORT_RSOCKET vpool = GeneralVPool.get_vpool_by_name(vpool_name=vpool_name) assert vpool is not None, 'Could not find vPool with name {0}'.format(vpool_name) vpool_config = GeneralVPool.get_configuration(vpool) # Verify some basic vPool attributes assert vpool.name == vpool_name, 'Expected name {0} for vPool'.format(vpool_name) assert vpool.backend_type.code == backend_type, 'Expected backend type {0}'.format(backend_type) assert vpool.status == VPool.STATUSES.RUNNING, 'vPool does not have RUNNING status' assert vpool.rdma_enabled == rdma_enabled, 'RDMA enabled setting is incorrect' assert set(expected_settings.keys()) == set([sd.storagerouter for sd in vpool.storagedrivers]), "vPool storagerouters don't match the expected Storage Routers" # Verify vPool Storage Driver configuration expected_vpool_config = copy.deepcopy(generic_settings['config_params']) for key, value in vpool_config.iteritems(): if key == 'dtl_enabled' or key == 'tlog_multiplier': continue if key not in expected_vpool_config: raise ValueError('Expected settings does not contain key {0}'.format(key)) if value != expected_vpool_config[key]: raise ValueError('vPool does not have expected configuration {0} for key {1}'.format(expected_vpool_config[key], key)) expected_vpool_config.pop(key) if len(expected_vpool_config) > 0: raise ValueError('Actual vPool configuration does not contain keys: {0}'.format(', '.join(expected_vpool_config.keys()))) # Prepare some fields to check config = generic_settings['config_params'] dtl_mode = config['dtl_mode'] sco_size = config['sco_size'] dedupe_mode = config['dedupe_mode'] cluster_size = config['cluster_size'] write_buffer = config['write_buffer'] dtl_transport = config['dtl_transport'] cache_strategy = config['cache_strategy'] # @TODO: Add more validations for other expected settings (instead of None) expected_config = {'backend_connection_manager': {'backend_interface_retries_on_error': 5, 'backend_interface_retry_interval_secs': 1, 'backend_interface_retry_backoff_multiplier': 2.0}, 'content_addressed_cache': {'clustercache_mount_points': None, 'read_cache_serialization_path': u'/var/rsp/{0}'.format(vpool.name)}, 'distributed_lock_store': {'dls_arakoon_cluster_id': None, 'dls_arakoon_cluster_nodes': None, 'dls_type': u'Arakoon'}, 'distributed_transaction_log': {'dtl_path': None, 'dtl_transport': dtl_transport.upper()}, 'event_publisher': {'events_amqp_routing_key': u'volumerouter', 'events_amqp_uris': None}, 'file_driver': {'fd_cache_path': None, 'fd_extent_cache_capacity': u'1024', 'fd_namespace': None}, 'filesystem': {'fs_dtl_config_mode': u'Automatic', 'fs_dtl_mode': u'{0}'.format(StorageDriverClient.VPOOL_DTL_MODE_MAP[dtl_mode]), 'fs_enable_shm_interface': 1, 'fs_file_event_rules': None, 'fs_metadata_backend_arakoon_cluster_nodes': None, 'fs_metadata_backend_mds_nodes': None, 'fs_metadata_backend_type': u'MDS', 'fs_raw_disk_suffix': None, 'fs_virtual_disk_format': None}, 'metadata_server': {'mds_nodes': None}, 'scocache': {'backoff_gap': u'2GB', 'scocache_mount_points': None, 'trigger_gap': u'1GB'}, 'threadpool_component': {'num_threads': 16}, 'volume_manager': {'clean_interval': 1, 'default_cluster_size': 1024 * cluster_size, 'dtl_throttle_usecs': 4000, 'metadata_path': None, 'non_disposable_scos_factor': float(write_buffer) / StorageDriverClient.TLOG_MULTIPLIER_MAP[sco_size] / sco_size, 'number_of_scos_in_tlog': StorageDriverClient.TLOG_MULTIPLIER_MAP[sco_size], 'read_cache_default_behaviour': StorageDriverClient.VPOOL_CACHE_MAP[cache_strategy], 'read_cache_default_mode': StorageDriverClient.VPOOL_DEDUPE_MAP[dedupe_mode], 'tlog_path': None}, 'volume_registry': {'vregistry_arakoon_cluster_id': u'voldrv', 'vregistry_arakoon_cluster_nodes': None}, 'volume_router': {'vrouter_backend_sync_timeout_ms': 5000, 'vrouter_file_read_threshold': 1024, 'vrouter_file_write_threshold': 1024, 'vrouter_id': None, 'vrouter_max_workers': 16, 'vrouter_migrate_timeout_ms': 5000, 'vrouter_min_workers': 4, 'vrouter_redirect_timeout_ms': u'5000', 'vrouter_routing_retries': 10, 'vrouter_sco_multiplier': 1024, 'vrouter_volume_read_threshold': 1024, 'vrouter_volume_write_threshold': 1024}, 'volume_router_cluster': {'vrouter_cluster_id': None}} vpool_services = {'all': ['ovs-watcher-volumedriver', 'ovs-dtl_{0}'.format(vpool.name), 'ovs-volumedriver_{0}'.format(vpool.name), 'ovs-volumerouter-consumer'], 'extra': [], 'master': ['ovs-arakoon-voldrv']} sd_partitions = {'DB': ['MD', 'MDS', 'TLOG'], 'READ': ['None'], 'WRITE': ['FD', 'DTL', 'SCO'], 'SCRUB': ['None']} if backend_type == 'alba': backend_metadata = {'name': (str, None), 'preset': (str, Toolbox.regex_preset), 'backend_guid': (str, Toolbox.regex_guid), 'arakoon_config': (dict, None), 'connection': (dict, {'host': (str, Toolbox.regex_ip, False), 'port': (int, {'min': 1, 'max': 65535}), 'client_id': (str, Toolbox.regex_guid), 'client_secret': (str, None), 'local': (bool, None)}), 'backend_info': (dict, {'policies': (list, None), 'sco_size': (float, None), 'frag_size': (float, None), 'total_size': (float, None), 'nsm_partition_guids': (list, Toolbox.regex_guid)})} required = {'backend': (dict, backend_metadata), 'backend_aa': (dict, backend_metadata, False)} Toolbox.verify_required_params(required_params=required, actual_params=vpool.metadata) vpool_services['all'].append("ovs-albaproxy_{0}".format(vpool.name)) sd_partitions['WRITE'].append('FCACHE') expected_config['backend_connection_manager'].update({'alba_connection_host': None, 'alba_connection_port': None, 'alba_connection_preset': None, 'alba_connection_timeout': 15, 'backend_type': u'{0}'.format(vpool.backend_type.code.upper())}) elif backend_type == 'distributed': expected_config['backend_connection_manager'].update({'backend_type': u'LOCAL', 'local_connection_path': u'{0}'.format(generic_settings['distributed_mountpoint'])}) assert EtcdConfiguration.exists('/ovs/arakoon/voldrv/config', raw=True), 'Volumedriver arakoon does not exist' # Do some verifications for all SDs storage_ip = None voldrv_config = GeneralArakoon.get_config('voldrv') all_files = GeneralVPool.get_related_files(vpool=vpool) all_directories = GeneralVPool.get_related_directories(vpool=vpool) for storagedriver in vpool.storagedrivers: storagerouter = storagedriver.storagerouter root_client = SSHClient(storagerouter, username='******') assert EtcdConfiguration.exists('/ovs/vpools/{0}/hosts/{1}/config'.format(vpool.guid, storagedriver.storagedriver_id), raw=True), 'vPool config not found in etcd' current_config_sections = set([item for item in EtcdConfiguration.list('/ovs/vpools/{0}/hosts/{1}/config'.format(vpool.guid, storagedriver.storagedriver_id))]) assert not current_config_sections.difference(set(expected_config.keys())), 'New section appeared in the storage driver config in etcd' assert not set(expected_config.keys()).difference(current_config_sections), 'Config section expected for storage driver, but not found in etcd' for key, values in expected_config.iteritems(): current_config = EtcdConfiguration.get('/ovs/vpools/{0}/hosts/{1}/config/{2}'.format(vpool.guid, storagedriver.storagedriver_id, key)) assert set(current_config.keys()).union(set(values.keys())) == set(values.keys()), 'Not all expected keys match for key "{0}" on Storage Driver {1}'.format(key, storagedriver.name) for sub_key, value in current_config.iteritems(): expected_value = values[sub_key] if expected_value is None: continue assert value == expected_value, 'Key: {0} - Sub key: {1} - Value: {2} - Expected value: {3}'.format(key, sub_key, value, expected_value) # Check services if storagerouter.node_type == 'MASTER': for service_name in vpool_services['all'] + vpool_services['master']: if service_name == 'ovs-arakoon-voldrv' and GeneralStorageDriver.has_role(storagedriver, 'DB') is False: continue if ServiceManager.get_service_status(name=service_name, client=root_client) is not True: raise ValueError('Service {0} is not running on node {1}'.format(service_name, storagerouter.ip)) else: for service_name in vpool_services['all'] + vpool_services['extra']: if ServiceManager.get_service_status(name=service_name, client=root_client) is not True: raise ValueError('Service {0} is not running on node {1}'.format(service_name, storagerouter.ip)) # Check arakoon config if not voldrv_config.has_section(storagerouter.machine_id): raise ValueError('Voldrv arakoon cluster does not have section {0}'.format(storagerouter.machine_id)) # Basic SD checks assert storagedriver.cluster_ip == storagerouter.ip, 'Incorrect cluster IP. Expected: {0} - Actual: {1}'.format(storagerouter.ip, storagedriver.cluster_ip) assert storagedriver.mountpoint == '/mnt/{0}'.format(vpool.name), 'Incorrect mountpoint. Expected: {0} - Actual: {1}'.format(mountpoint, storagedriver.mountpoint) if storage_ip is not None: assert storagedriver.storage_ip == storage_ip, 'Incorrect storage IP. Expected: {0} - Actual: {1}'.format(storage_ip, storagedriver.storage_ip) storage_ip = storagedriver.storage_ip # Check required directories and files if storagerouter.guid not in all_directories: raise ValueError('Could not find directory information for Storage Router {0}'.format(storagerouter.ip)) if storagerouter.guid not in all_files: raise ValueError('Could not find file information for Storage Router {0}'.format(storagerouter.ip)) for directory in all_directories[storagerouter.guid]: if root_client.dir_exists(directory) is False: raise ValueError('Directory {0} does not exist on Storage Router {1}'.format(directory, storagerouter.ip)) for file_name in all_files[storagerouter.guid]: if root_client.file_exists(file_name) is False: raise ValueError('File {0} does not exist on Storage Router {1}'.format(file_name, storagerouter.ip)) for partition in storagedriver.partitions: if partition.role in sd_partitions and partition.sub_role in sd_partitions[partition.role]: sd_partitions[partition.role].remove(partition.sub_role) elif partition.role in sd_partitions and partition.sub_role is None: sd_partitions[partition.role].remove('None') # Verify vPool writeable if storagerouter.pmachine.hvtype == 'VMWARE': GeneralVPool.mount_vpool(vpool=vpool, root_client=root_client) vdisk = GeneralVDisk.create_volume(size=10, vpool=vpool, root_client=root_client) GeneralVDisk.write_to_volume(vdisk=vdisk, vpool=vpool, root_client=root_client, count=10, bs='1M', input_type='random') GeneralVDisk.delete_volume(vdisk=vdisk, vpool=vpool, root_client=root_client) for role, sub_roles in sd_partitions.iteritems(): for sub_role in sub_roles: raise ValueError('Not a single Storage Driver found with partition role {0} and sub-role {1}'.format(role, sub_role))
def post_upgrade(client): """ Upgrade actions after the new packages have actually been installed :param client: SSHClient object :return: None """ # If we can reach Etcd with a valid config, and there's still an old config file present, delete it from ovs.extensions.db.etcd.configuration import EtcdConfiguration path = '/opt/OpenvStorage/config/ovs.json' if EtcdConfiguration.exists('/ovs/framework/cluster_id') and client.file_exists(path): client.file_delete(path) # Migrate volumedriver & albaproxy configuration files import uuid from ovs.extensions.storageserver.storagedriver import StorageDriverConfiguration from ovs.dal.lists.storagedriverlist import StorageDriverList from ovs.extensions.generic.system import System with remote(client.ip, [StorageDriverConfiguration, os, open, json, System], username='******') as rem: configuration_dir = '{0}/storagedriver/storagedriver'.format(EtcdConfiguration.get('/ovs/framework/paths|cfgdir')) host_id = rem.System.get_my_machine_id() if rem.os.path.exists(configuration_dir): for storagedriver in StorageDriverList.get_storagedrivers_by_storagerouter(rem.System.get_my_storagerouter().guid): vpool = storagedriver.vpool if storagedriver.alba_proxy is not None: config_tree = '/ovs/vpools/{0}/proxies/{1}/config/{{0}}'.format(vpool.guid, storagedriver.alba_proxy.guid) # ABM config abm_config = '{0}/{1}_alba.cfg'.format(configuration_dir, vpool.name) if rem.os.path.exists(abm_config): with rem.open(abm_config) as config_file: EtcdConfiguration.set(config_tree.format('abm'), config_file.read(), raw=True) rem.os.remove(abm_config) # Albaproxy config alba_config = '{0}/{1}_alba.json'.format(configuration_dir, vpool.name) if rem.os.path.exists(alba_config): with rem.open(alba_config) as config_file: config = rem.json.load(config_file) del config['albamgr_cfg_file'] config['albamgr_cfg_url'] = 'etcd://127.0.0.1:2379{0}'.format(config_tree.format('abm')) EtcdConfiguration.set(config_tree.format('main'), json.dumps(config, indent=4), raw=True) params = {'VPOOL_NAME': vpool.name, 'VPOOL_GUID': vpool.guid, 'PROXY_ID': storagedriver.alba_proxy.guid} alba_proxy_service = 'ovs-albaproxy_{0}'.format(vpool.name) ServiceManager.add_service(name='ovs-albaproxy', params=params, client=client, target_name=alba_proxy_service) rem.os.remove(alba_config) # Volumedriver config current_file = '{0}/{1}.json'.format(configuration_dir, vpool.name) if rem.os.path.exists(current_file): readcache_size = 0 with rem.open(current_file) as config_file: config = rem.json.load(config_file) config['distributed_transaction_log'] = {} config['distributed_transaction_log']['dtl_transport'] = config['failovercache']['failovercache_transport'] config['distributed_transaction_log']['dtl_path'] = config['failovercache']['failovercache_path'] config['volume_manager']['dtl_throttle_usecs'] = config['volume_manager']['foc_throttle_usecs'] del config['failovercache'] del config['volume_manager']['foc_throttle_usecs'] sdc = rem.StorageDriverConfiguration('storagedriver', vpool.guid, storagedriver.storagedriver_id) sdc.configuration = config sdc.save(reload_config=False) for mountpoint in config['content_addressed_cache']['clustercache_mount_points']: readcache_size += int(mountpoint['size'].replace('KiB', '')) params = {'VPOOL_MOUNTPOINT': storagedriver.mountpoint, 'HYPERVISOR_TYPE': storagedriver.storagerouter.pmachine.hvtype, 'VPOOL_NAME': vpool.name, 'CONFIG_PATH': sdc.remote_path, 'UUID': str(uuid.uuid4()), 'OVS_UID': client.run('id -u ovs').strip(), 'OVS_GID': client.run('id -g ovs').strip(), 'KILL_TIMEOUT': str(int(readcache_size / 1024.0 / 1024.0 / 6.0 + 30))} vmware_mode = EtcdConfiguration.get('/ovs/framework/hosts/{0}/storagedriver|vmware_mode'.format(host_id)) dtl_service = 'ovs-dtl_{0}'.format(vpool.name) ServiceManager.add_service(name='ovs-dtl', params=params, client=client, target_name=dtl_service) if vpool.backend_type.code == 'alba': alba_proxy_service = 'ovs-albaproxy_{0}'.format(vpool.name) dependencies = [alba_proxy_service] else: dependencies = None if vmware_mode == 'ganesha': template_name = 'ovs-ganesha' else: template_name = 'ovs-volumedriver' voldrv_service = 'ovs-volumedriver_{0}'.format(vpool.name) ServiceManager.add_service(name=template_name, params=params, client=client, target_name=voldrv_service, additional_dependencies=dependencies) rem.os.remove(current_file) # Ganesha config, if available current_file = '{0}/{1}_ganesha.conf'.format(configuration_dir, vpool.name) if rem.os.path.exists(current_file): sdc = rem.StorageDriverConfiguration('storagedriver', vpool.guid, storagedriver.storagedriver_id) contents = '' for template in ['ganesha-core', 'ganesha-export']: contents += client.file_read('/opt/OpenvStorage/config/templates/{0}.conf'.format(template)) params = {'VPOOL_NAME': vpool.name, 'VPOOL_MOUNTPOINT': '/mnt/{0}'.format(vpool.name), 'CONFIG_PATH': sdc.remote_path, 'NFS_FILESYSTEM_ID': storagedriver.storagerouter.ip.split('.', 2)[-1]} for key, value in params.iteritems(): contents = contents.replace('<{0}>'.format(key), value) client.file_write(current_file, contents)
def check_vpool_cleanup(vpool_info, storagerouters=None): """ Check if everything related to a vPool has been cleaned up on the storagerouters provided vpool_info should be a dictionary containing: - type - guid - files - directories - name (optional) - vpool (optional) If vpool is provided: - storagerouters need to be provided, because on these Storage Routers, we check whether the vPool has been cleaned up If name is provided: - If storagerouters is NOT provided, all Storage Routers will be checked for a correct vPool removal - If storagerouters is provided, only these Storage Routers will be checked for a correct vPool removal :param vpool_info: Information about the vPool :param storagerouters: Storage Routers to check if vPool has been cleaned up :return: None """ for required_param in ['type', 'guid', 'files', 'directories']: if required_param not in vpool_info: raise ValueError('Incorrect vpool_info provided') if 'vpool' in vpool_info and 'name' in vpool_info: raise ValueError('vpool and name are mutually exclusive') if 'vpool' not in vpool_info and 'name' not in vpool_info: raise ValueError('Either vpool or vpool_name needs to be provided') vpool = vpool_info.get('vpool') vpool_name = vpool_info.get('name') vpool_guid = vpool_info['guid'] vpool_type = vpool_info['type'] files = vpool_info['files'] directories = vpool_info['directories'] supported_backend_types = GeneralBackend.get_valid_backendtypes() if vpool_type not in supported_backend_types: raise ValueError('Unsupported Backend Type provided. Please choose from: {0}'.format(', '.join(supported_backend_types))) if storagerouters is None: storagerouters = GeneralStorageRouter.get_storage_routers() if vpool_name is not None: assert GeneralVPool.get_vpool_by_name(vpool_name=vpool_name) is None, 'A vPool with name {0} still exists'.format(vpool_name) # Prepare some fields to check vpool_name = vpool.name if vpool else vpool_name vpool_services = ['ovs-dtl_{0}'.format(vpool_name), 'ovs-volumedriver_{0}'.format(vpool_name)] if vpool_type == 'alba': vpool_services.append('ovs-albaproxy_{0}'.format(vpool_name)) # Check etcd if vpool is None: assert EtcdConfiguration.exists('/ovs/vpools/{0}'.format(vpool_guid), raw=True) is False, 'vPool config still found in etcd' else: remaining_sd_ids = set([storagedriver.storagedriver_id for storagedriver in vpool.storagedrivers]) current_sd_ids = set([item for item in EtcdConfiguration.list('/ovs/vpools/{0}/hosts'.format(vpool_guid))]) assert not remaining_sd_ids.difference(current_sd_ids), 'There are more storagedrivers modelled than present in etcd' assert not current_sd_ids.difference(remaining_sd_ids), 'There are more storagedrivers in etcd than present in model' # Perform checks on all storagerouters where vpool was removed for storagerouter in storagerouters: # Check management center mgmt_center = GeneralManagementCenter.get_mgmt_center(pmachine=storagerouter.pmachine) if mgmt_center is not None: assert GeneralManagementCenter.is_host_configured(pmachine=storagerouter.pmachine) is False, 'Management Center is still configured on Storage Router {0}'.format(storagerouter.ip) # Check MDS services mds_services = GeneralService.get_services_by_name(ServiceType.SERVICE_TYPES.MD_SERVER) assert len([mds_service for mds_service in mds_services if mds_service.storagerouter_guid == storagerouter.guid]) == 0, 'There are still MDS services present for Storage Router {0}'.format(storagerouter.ip) # Check services root_client = SSHClient(storagerouter, username='******') for service in vpool_services: if ServiceManager.has_service(service, client=root_client): raise RuntimeError('Service {0} is still configured on Storage Router {1}'.format(service, storagerouter.ip)) # Check KVM vpool if storagerouter.pmachine.hvtype == 'KVM': vpool_overview = root_client.run('virsh pool-list --all').splitlines() vpool_overview.pop(1) vpool_overview.pop(0) for vpool_info in vpool_overview: kvm_vpool_name = vpool_info.split()[0].strip() if vpool_name == kvm_vpool_name: raise ValueError('vPool {0} is still defined on Storage Router {1}'.format(vpool_name, storagerouter.ip)) # Check file and directory existence if storagerouter.guid not in directories: raise ValueError('Could not find directory information for Storage Router {0}'.format(storagerouter.ip)) if storagerouter.guid not in files: raise ValueError('Could not find file information for Storage Router {0}'.format(storagerouter.ip)) for directory in directories[storagerouter.guid]: assert root_client.dir_exists(directory) is False, 'Directory {0} still exists on Storage Router {1}'.format(directory, storagerouter.ip) for file_name in files[storagerouter.guid]: assert root_client.file_exists(file_name) is False, 'File {0} still exists on Storage Router {1}'.format(file_name, storagerouter.ip) # Look for errors in storagedriver log for error_type in ['error', 'fatal']: cmd = "cat -vet /var/log/ovs/volumedriver/{0}.log | tail -1000 | grep ' {1} '; echo true > /dev/null".format(vpool_name, error_type) errors = [] for line in root_client.run(cmd).splitlines(): if "HierarchicalArakoon" in line: continue errors.append(line) if len(errors) > 0: if error_type == 'error': print 'Volumedriver log file contains errors on Storage Router {0}\n - {1}'.format(storagerouter.ip, '\n - '.join(errors)) else: raise RuntimeError('Fatal errors found in volumedriver log file on Storage Router {0}\n - {1}'.format(storagerouter.ip, '\n - '.join(errors)))
def migrate(previous_version, master_ips=None, extra_ips=None): """ Migrates from any version to any version, running all migrations required If previous_version is for example 0 and this script is at verison 3 it will execute two steps: - 1 > 2 - 2 > 3 :param previous_version: The previous version from which to start the migration. :param master_ips: IP addresses of the MASTER nodes :param extra_ips: IP addresses of the EXTRA nodes """ working_version = previous_version # Version 1 introduced: # - Flexible SSD layout if working_version < 1: try: from ovs.extensions.generic.configuration import Configuration if Configuration.exists('ovs.arakoon'): Configuration.delete('ovs.arakoon', remove_root=True) Configuration.set('ovs.core.ovsdb', '/opt/OpenvStorage/db') except: logger.exception('Error migrating to version 1') working_version = 1 # Version 2 introduced: # - Registration if working_version < 2: try: import time from ovs.extensions.generic.configuration import Configuration if not Configuration.exists('ovs.core.registered'): Configuration.set('ovs.core.registered', False) Configuration.set('ovs.core.install_time', time.time()) except: logger.exception('Error migrating to version 2') working_version = 2 # Version 3 introduced: # - New arakoon clients if working_version < 3: try: from ovs.extensions.db.arakoon import ArakoonInstaller reload(ArakoonInstaller) from ovs.extensions.db.arakoon.ArakoonInstaller import ArakoonInstaller from ovs.extensions.generic.sshclient import SSHClient from ovs.extensions.generic.configuration import Configuration if master_ips is not None: for ip in master_ips: client = SSHClient(ip) if client.dir_exists(ArakoonInstaller.ARAKOON_CONFIG_DIR): for cluster_name in client.dir_list(ArakoonInstaller.ARAKOON_CONFIG_DIR): try: ArakoonInstaller.deploy_cluster(cluster_name, ip) except: pass if Configuration.exists('ovs.core.storage.persistent'): Configuration.set('ovs.core.storage.persistent', 'pyrakoon') except: logger.exception('Error migrating to version 3') working_version = 3 # Version 4 introduced: # - Etcd if working_version < 4: try: import os import json from ConfigParser import RawConfigParser from ovs.extensions.db.etcd import installer reload(installer) from ovs.extensions.db.etcd.installer import EtcdInstaller from ovs.extensions.db.etcd.configuration import EtcdConfiguration from ovs.extensions.generic.system import System host_id = System.get_my_machine_id() etcd_migrate = False if EtcdInstaller.has_cluster('127.0.0.1', 'config'): etcd_migrate = True else: if master_ips is not None and extra_ips is not None: cluster_ip = None for ip in master_ips + extra_ips: if EtcdInstaller.has_cluster(ip, 'config'): cluster_ip = ip break node_ip = None path = '/opt/OpenvStorage/config/ovs.json' if os.path.exists(path): with open(path) as config_file: config = json.load(config_file) node_ip = config['grid']['ip'] if node_ip is not None: if cluster_ip is None: EtcdInstaller.create_cluster('config', node_ip) EtcdConfiguration.initialize() EtcdConfiguration.initialize_host(host_id) else: EtcdInstaller.extend_cluster(cluster_ip, node_ip, 'config') EtcdConfiguration.initialize_host(host_id) etcd_migrate = True if etcd_migrate is True: # Migrating configuration files path = '/opt/OpenvStorage/config/ovs.json' if os.path.exists(path): with open(path) as config_file: config = json.load(config_file) EtcdConfiguration.set('/ovs/framework/cluster_id', config['support']['cid']) if not EtcdConfiguration.exists('/ovs/framework/install_time'): EtcdConfiguration.set('/ovs/framework/install_time', config['core']['install_time']) else: EtcdConfiguration.set('/ovs/framework/install_time', min(EtcdConfiguration.get('/ovs/framework/install_time'), config['core']['install_time'])) EtcdConfiguration.set('/ovs/framework/registered', config['core']['registered']) EtcdConfiguration.set('/ovs/framework/plugins/installed', config['plugins']) EtcdConfiguration.set('/ovs/framework/stores', config['core']['storage']) EtcdConfiguration.set('/ovs/framework/paths', {'cfgdir': config['core']['cfgdir'], 'basedir': config['core']['basedir'], 'ovsdb': config['core']['ovsdb']}) EtcdConfiguration.set('/ovs/framework/support', {'enablesupport': config['support']['enablesupport'], 'enabled': config['support']['enabled'], 'interval': config['support']['interval']}) EtcdConfiguration.set('/ovs/framework/storagedriver', {'mds_safety': config['storagedriver']['mds']['safety'], 'mds_tlogs': config['storagedriver']['mds']['tlogs'], 'mds_maxload': config['storagedriver']['mds']['maxload']}) EtcdConfiguration.set('/ovs/framework/webapps', {'html_endpoint': config['webapps']['html_endpoint'], 'oauth2': config['webapps']['oauth2']}) EtcdConfiguration.set('/ovs/framework/messagequeue', {'endpoints': [], 'protocol': config['core']['broker']['protocol'], 'user': config['core']['broker']['login'], 'port': config['core']['broker']['port'], 'password': config['core']['broker']['password'], 'queues': config['core']['broker']['queues']}) host_key = '/ovs/framework/hosts/{0}{{0}}'.format(host_id) EtcdConfiguration.set(host_key.format('/storagedriver'), {'rsp': config['storagedriver']['rsp'], 'vmware_mode': config['storagedriver']['vmware_mode']}) EtcdConfiguration.set(host_key.format('/ports'), config['ports']) EtcdConfiguration.set(host_key.format('/setupcompleted'), config['core']['setupcompleted']) EtcdConfiguration.set(host_key.format('/versions'), config['core'].get('versions', {})) EtcdConfiguration.set(host_key.format('/type'), config['core']['nodetype']) EtcdConfiguration.set(host_key.format('/ip'), config['grid']['ip']) path = '{0}/memcacheclient.cfg'.format(EtcdConfiguration.get('/ovs/framework/paths|cfgdir')) if os.path.exists(path): config = RawConfigParser() config.read(path) nodes = [config.get(node.strip(), 'location').strip() for node in config.get('main', 'nodes').split(',')] EtcdConfiguration.set('/ovs/framework/memcache|endpoints', nodes) os.remove(path) path = '{0}/rabbitmqclient.cfg'.format(EtcdConfiguration.get('/ovs/framework/paths|cfgdir')) if os.path.exists(path): config = RawConfigParser() config.read(path) nodes = [config.get(node.strip(), 'location').strip() for node in config.get('main', 'nodes').split(',')] EtcdConfiguration.set('/ovs/framework/messagequeue|endpoints', nodes) os.remove(path) # Migrate arakoon configuration files from ovs.extensions.db.arakoon import ArakoonInstaller reload(ArakoonInstaller) from ovs.extensions.db.arakoon.ArakoonInstaller import ArakoonInstaller, ArakoonClusterConfig from ovs.extensions.generic.sshclient import SSHClient if master_ips is not None: config_dir = '/opt/OpenvStorage/config/arakoon/' for ip in master_ips: client = SSHClient(ip) if client.dir_exists(config_dir): for cluster_name in client.dir_list(config_dir): try: with open('{0}/{1}/{1}.cfg'.format(config_dir, cluster_name)) as config_file: EtcdConfiguration.set(ArakoonClusterConfig.ETCD_CONFIG_KEY.format(cluster_name), config_file.read(), raw=True) ArakoonInstaller.deploy_cluster(cluster_name, ip) except: logger.exception('Error migrating {0} on {1}'.format(cluster_name, ip)) client.dir_delete(config_dir) except: logger.exception('Error migrating to version 4') working_version = 4 return working_version
def get(self, request, *args, **kwargs): """ Fetches metadata """ _ = args, kwargs data = {'authenticated': False, 'authentication_state': None, 'authentication_metadata': {}, 'username': None, 'userguid': None, 'roles': [], 'identification': {}, 'storagerouter_ips': [sr.ip for sr in StorageRouterList.get_storagerouters()], 'versions': list(settings.VERSION), 'plugins': {}, 'registration': {'registered': False, 'remaining': None}} try: # Gather plugin metadata plugins = {} # - Backends. BackendType plugins must set the has_plugin flag on True for backend_type in BackendTypeList.get_backend_types(): if backend_type.has_plugin is True: if backend_type.code not in plugins: plugins[backend_type.code] = [] plugins[backend_type.code] += ['backend', 'gui'] # - Generic plugins, as added to the configuration file(s) generic_plugins = EtcdConfiguration.get('/ovs/framework/plugins/installed|generic') for plugin_name in generic_plugins: if plugin_name not in plugins: plugins[plugin_name] = [] plugins[plugin_name] += ['gui'] data['plugins'] = plugins # Fill identification data['identification'] = {'cluster_id': EtcdConfiguration.get('/ovs/framework/cluster_id')} # Registration data registered = EtcdConfiguration.get('/ovs/framework/registered') data['registration']['registered'] = registered if registered is False: cluster_install_time = EtcdConfiguration.get('/ovs/framework/install_time') if cluster_install_time is not None: timeout_days = 30 * 24 * 60 * 60 data['registration']['remaining'] = (timeout_days - time.time() + cluster_install_time) / 24 / 60 / 60 # Get authentication metadata authentication_metadata = {'ip': System.get_my_storagerouter().ip} for key in ['mode', 'authorize_uri', 'client_id', 'scope']: if EtcdConfiguration.exists('/ovs/framework/webapps|oauth2.{0}'.format(key)): authentication_metadata[key] = EtcdConfiguration.get('/ovs/framework/webapps|oauth2.{0}'.format(key)) data['authentication_metadata'] = authentication_metadata # Gather authorization metadata if 'HTTP_AUTHORIZATION' not in request.META: return HttpResponse, dict(data.items() + {'authentication_state': 'unauthenticated'}.items()) authorization_type, access_token = request.META['HTTP_AUTHORIZATION'].split(' ') if authorization_type != 'Bearer': return HttpResponse, dict(data.items() + {'authentication_state': 'invalid_authorization_type'}.items()) tokens = BearerTokenList.get_by_access_token(access_token) if len(tokens) != 1: return HttpResponse, dict(data.items() + {'authentication_state': 'invalid_token'}.items()) token = tokens[0] if token.expiration < time.time(): for junction in token.roles.itersafe(): junction.delete() token.delete() return HttpResponse, dict(data.items() + {'authentication_state': 'token_expired'}.items()) # Gather user metadata user = token.client.user if not user.is_active: return HttpResponse, dict(data.items() + {'authentication_state': 'inactive_user'}.items()) roles = [j.role.code for j in token.roles] return HttpResponse, dict(data.items() + {'authenticated': True, 'authentication_state': 'authenticated', 'username': user.username, 'userguid': user.guid, 'roles': roles, 'plugins': plugins}.items()) except Exception as ex: logger.exception('Unexpected exception: {0}'.format(ex)) return HttpResponse, dict(data.items() + {'authentication_state': 'unexpected_exception'}.items())
def _storage_stack(self): """ Returns a live list of all disks known to this AlbaBackend """ from ovs.dal.lists.albanodelist import AlbaNodeList from ovs.dal.lists.albabackendlist import AlbaBackendList if len(self.abm_services) == 0: return {} # No ABM services yet, so backend not fully installed yet storage_map = {} asd_map = {} alba_backend_map = {} for alba_backend in AlbaBackendList.get_albabackends(): alba_backend_map[alba_backend.alba_id] = alba_backend # Load information based on the model alba_nodes = AlbaNodeList.get_albanodes() for node in alba_nodes: node_id = node.node_id storage_map[node_id] = {} for disk in node.disks: disk_id = disk.name storage_map[node_id][disk_id] = {'name': disk_id, 'guid': disk.guid, 'status': 'error', 'status_detail': 'unknown', 'asds': {}} for asd in disk.asds: asd_id = asd.asd_id data = {'asd_id': asd_id, 'guid': asd.guid, 'status': 'error', 'status_detail': 'unknown', 'alba_backend_guid': asd.alba_backend_guid} asd_map[asd_id] = data storage_map[node_id][disk_id]['asds'][asd_id] = data # Load information from node def _load_live_info(_node, _node_data): # Live disk information try: disk_data = _node.client.get_disks() except (requests.ConnectionError, requests.Timeout): for entry in _node_data.values(): entry['status_detail'] = 'nodedown' disk_data = {} for _disk_id, disk_info in disk_data.iteritems(): if _disk_id in _node_data: entry = _node_data[_disk_id] else: entry = {'name': _disk_id, 'status': 'unknown', 'status_detail': '', 'asds': {}} _node_data[_disk_id] = entry entry.update(disk_info) if disk_info['state'] == 'ok': entry['status'] = 'uninitialized' if disk_info['available'] is True else 'initialized' entry['status_detail'] = '' else: entry['status'] = disk_info['state'] entry['status_detail'] = disk_info.get('state_detail', '') # Live ASD information try: _asd_data = _node.client.get_asds() except (requests.ConnectionError, requests.Timeout): for disk_entry in _node_data.values(): for entry in disk_entry['asds'].values(): entry['status_detail'] = 'nodedown' _asd_data = {} for _disk_id, asds in _asd_data.iteritems(): if _disk_id not in _node_data: continue for _asd_id, asd_info in asds.iteritems(): entry = {'asd_id': _asd_id, 'status': 'error' if asd_info['state'] == 'error' else 'initialized', 'status_detail': asd_info.get('state_detail', ''), 'state': asd_info['state'], 'state_detail': asd_info.get('state_detail', '')} if _asd_id not in _node_data[_disk_id]['asds']: _node_data[_disk_id]['asds'][_asd_id] = entry asd_map[_asd_id] = entry else: _node_data[_disk_id]['asds'][_asd_id].update(entry) threads = [] for node in alba_nodes: thread = Thread(target=_load_live_info, args=(node, storage_map[node.node_id])) thread.start() threads.append(thread) for thread in threads: thread.join() # Mix in usage information for asd_id, stats in self.asd_statistics.iteritems(): if asd_id in asd_map: asd_map[asd_id]['usage'] = {'size': int(stats['capacity']), 'used': int(stats['disk_usage']), 'available': int(stats['capacity'] - stats['disk_usage'])} # Load information from alba backend_interval_key = '/ovs/alba/backends/{0}/gui_error_interval'.format(self.guid) if EtcdConfiguration.exists(backend_interval_key): interval = EtcdConfiguration.get(backend_interval_key) else: interval = EtcdConfiguration.get('/ovs/alba/backends/global_gui_error_interval') config = 'etcd://127.0.0.1:2379/ovs/arakoon/{0}/config'.format(self.abm_services[0].service.name) for found_osd in AlbaCLI.run('list-all-osds', config=config, as_json=True): node_id = found_osd['node_id'] asd_id = found_osd['long_id'] for _disk in storage_map.get(node_id, {}).values(): asd_data = _disk['asds'].get(asd_id, {}) if 'state' not in asd_data: continue if found_osd.get('decommissioned') is True: asd_data['status'] = 'unavailable' asd_data['status_detail'] = 'decommissioned' continue state = asd_data['state'] if state == 'ok': if found_osd['id'] is None: alba_id = found_osd['alba_id'] if alba_id is None: asd_data['status'] = 'available' else: asd_data['status'] = 'unavailable' alba_backend = alba_backend_map.get(alba_id) if alba_backend is not None: asd_data['alba_backend_guid'] = alba_backend.guid else: asd_data['alba_backend_guid'] = self.guid asd_data['status'] = 'warning' asd_data['status_detail'] = 'recenterrors' read = found_osd['read'] or [0] write = found_osd['write'] or [0] errors = found_osd['errors'] if len(errors) == 0 or (len(read + write) > 0 and max(min(read), min(write)) > max(error[0] for error in errors) + interval): asd_data['status'] = 'claimed' asd_data['status_detail'] = '' else: asd_data['status'] = 'error' asd_data['status_detail'] = asd_data.get('state_detail', '') alba_backend = alba_backend_map.get(found_osd.get('alba_id')) if alba_backend is not None: asd_data['alba_backend_guid'] = alba_backend.guid return storage_map