def __init__(self, source, name=None): """ Initializes the logger """ parent_invoker = inspect.stack()[1] if not __file__.startswith(parent_invoker[1]) or parent_invoker[3] != "get": raise RuntimeError( "Cannot invoke instance from outside this class. Please use LogHandler.get(source, name=None) instead" ) if name is None: name = Configuration.get("ovs.logging.default_name") log_filename = LogHandler.load_path(source) formatter = logging.Formatter( "%(asctime)s - [%(process)s] - [%(levelname)s] - [{0}] - [%(name)s] - %(message)s".format(source) ) handler = logging.FileHandler(log_filename) handler.setFormatter(formatter) self.logger = logging.getLogger(name) self.logger.propagate = True self.logger.setLevel(getattr(logging, Configuration.get("ovs.logging.level"))) self.logger.addHandler(handler)
def __init__(self): """ Initializes the client """ self._enable_support = Configuration.get('ovs.support.enablesupport') self.interval = int(Configuration.get('ovs.support.interval')) self._url = 'https://monitoring.openvstorage.com/api/support/heartbeat/'
def get_client(client_type=None): """ Returns a volatile storage client """ if not hasattr(VolatileFactory, 'store') or VolatileFactory.store is None: if client_type is None: client_type = Configuration.get('ovs.core.storage.volatile') VolatileFactory.store = None if client_type == 'memcache': from ovs.extensions.storage.volatile.memcachestore import MemcacheStore memcache_servers = list() memcache_config = RawConfigParser() memcache_config.read(os.path.join(Configuration.get('ovs.core.cfgdir'), 'memcacheclient.cfg')) nodes = [node.strip() for node in memcache_config.get('main', 'nodes').split(',')] nodes.sort() for node in nodes: location = memcache_config.get(node, 'location') memcache_servers.append(location) VolatileFactory.store = MemcacheStore(memcache_servers) if client_type == 'default': from ovs.extensions.storage.volatile.dummystore import DummyVolatileStore VolatileFactory.store = DummyVolatileStore() if VolatileFactory.store is None: raise RuntimeError('Invalid client_type specified') return VolatileFactory.store
def is_service_internally_managed(service): """ Validate whether the service is internally or externally managed :param service: Service to verify :type service: str :return: True if internally managed, False otherwise :rtype: bool """ if service not in ['memcached', 'rabbitmq']: raise ValueError('Can only check memcached or rabbitmq') service_name_map = {'memcached': 'memcache', 'rabbitmq': 'messagequeue'}[service] config_key = '/ovs/framework/{0}'.format(service_name_map) if not Configuration.exists(key=config_key): return True if not Configuration.exists(key='{0}|metadata'.format(config_key)): raise ValueError('Not all required keys ({0}) for {1} are present in the configuration management'.format(config_key, service)) metadata = Configuration.get('{0}|metadata'.format(config_key)) if 'internal' not in metadata: raise ValueError('Internal flag not present in metadata for {0}.\nPlease provide a key: {1} and value "metadata": {{"internal": True/False}}'.format(service, config_key)) internal = metadata['internal'] if internal is False: if not Configuration.exists(key='{0}|endpoints'.format(config_key)): raise ValueError('Externally managed {0} cluster must have "endpoints" information\nPlease provide a key: {1} and value "endpoints": [<ip:port>]'.format(service, config_key)) endpoints = Configuration.get(key='{0}|endpoints'.format(config_key)) if not isinstance(endpoints, list) or len(endpoints) == 0: raise ValueError('The endpoints for {0} cannot be empty and must be a list'.format(service)) return internal
def load_path(source): log_filename = '{0}/{1}.log'.format( Configuration.get('ovs.logging.path'), LogHandler.targets[source] if source in LogHandler.targets else Configuration.get('ovs.logging.default_file') ) if not os.path.exists(log_filename): open(log_filename, 'a').close() os.chmod(log_filename, 0o666) return log_filename
def get_heartbeat_data(): """ Returns heartbeat data """ data = {'cid': Configuration.get('ovs.support.cid'), 'nid': Configuration.get('ovs.support.nid'), 'metadata': {}, 'errors': []} try: # Versions data['metadata']['versions'] = PackageManager.get_versions() except Exception, ex: data['errors'].append(str(ex))
def on_demote(cluster_ip, master_ip, offline_node_ips=None): """ Handles the demote for the StorageDrivers :param cluster_ip: IP of the node to demote :type cluster_ip: str :param master_ip: IP of the master node :type master_ip: str :param offline_node_ips: IPs of nodes which are offline :type offline_node_ips: list :return: None """ _ = master_ip if offline_node_ips is None: offline_node_ips = [] servicetype = ServiceTypeList.get_by_name(ServiceType.SERVICE_TYPES.ARAKOON) current_service = None remaining_ips = [] for service in servicetype.services: if service.name == 'arakoon-voldrv' and service.is_internal is True: # Externally managed arakoon cluster service does not have storage router if service.storagerouter.ip == cluster_ip: current_service = service elif service.storagerouter.ip not in offline_node_ips: remaining_ips.append(service.storagerouter.ip) if current_service is not None: if len(remaining_ips) == 0: raise RuntimeError('Could not find any remaining arakoon nodes for the voldrv cluster') StorageDriverController._logger.debug('* Shrink StorageDriver cluster') cluster_name = str(Configuration.get('/ovs/framework/arakoon_clusters|voldrv')) ArakoonInstaller.shrink_cluster(deleted_node_ip=cluster_ip, remaining_node_ips=remaining_ips, cluster_name=cluster_name, offline_nodes=offline_node_ips) current_service.delete() StorageDriverController._configure_arakoon_to_volumedriver(cluster_name=cluster_name)
def __init__(self, config_type, vpool_name, number=None): """ Initializes the class """ def make_configure(sct): """ section closure """ return lambda **kwargs: self._add(sct, **kwargs) if config_type not in ["storagedriver", "metadataserver"]: raise RuntimeError("Invalid configuration type. Allowed: storagedriver, metadataserver") self.config_type = config_type self.vpool_name = vpool_name self.configuration = {} self.is_new = True self.dirty_entries = [] self.number = number self.params = copy.deepcopy(StorageDriverConfiguration.parameters) # Never use parameters directly self.base_path = "{0}/storagedriver/{1}".format(Configuration.get("ovs.core.cfgdir"), self.config_type) if self.number is None: self.path = "{0}/{1}.json".format(self.base_path, self.vpool_name) else: self.path = "{0}/{1}_{2}.json".format(self.base_path, self.vpool_name, self.number) # Fix some manual "I know what I'm doing" overrides backend_connection_manager = "backend_connection_manager" self.params[self.config_type][backend_connection_manager]["optional"].append("s3_connection_strict_consistency") # Generate configure_* methods for section in self.params[self.config_type]: setattr(self, "configure_{0}".format(section), make_configure(section))
def migrate(master_ips=None, extra_ips=None): """ Executes all migrations. It keeps track of an internal "migration version" which is always increasing by one :param master_ips: IP addresses of the MASTER nodes :param extra_ips: IP addresses of the EXTRA nodes """ machine_id = System.get_my_machine_id() key = '/ovs/framework/hosts/{0}/versions'.format(machine_id) data = Configuration.get(key) if Configuration.exists(key) else {} migrators = [] path = '/'.join([os.path.dirname(__file__), 'migration']) for filename in os.listdir(path): if os.path.isfile('/'.join([path, filename])) and filename.endswith('.py'): name = filename.replace('.py', '') module = imp.load_source(name, '/'.join([path, filename])) for member in inspect.getmembers(module): if inspect.isclass(member[1]) and member[1].__module__ == name and 'object' in [base.__name__ for base in member[1].__bases__]: migrators.append((member[1].identifier, member[1].migrate)) end_version = 0 for identifier, method in migrators: base_version = data[identifier] if identifier in data else 0 version = method(base_version, master_ips, extra_ips) if version > end_version: end_version = version data[identifier] = end_version Configuration.set(key, data)
def load_target_definition(source, allow_override=False): """ Load the logger target :param source: Source :param allow_override: Allow override :return: Target definition """ logging_target = {'type': 'console'} try: from ovs.extensions.generic.configuration import Configuration logging_target = Configuration.get('/ovs/framework/logging') except: pass target_type = logging_target.get('type', 'console') if allow_override is True and 'OVS_LOGTYPE_OVERRIDE' in os.environ: target_type = os.environ['OVS_LOGTYPE_OVERRIDE'] if target_type == 'redis': queue = logging_target.get('queue', '/ovs/logging') if '{0}' in queue: queue = queue.format(source) return {'type': 'redis', 'queue': '/{0}'.format(queue.lstrip('/')), 'host': logging_target.get('host', 'localhost'), 'port': logging_target.get('port', 6379)} if target_type == 'file': return {'type': 'file', 'filename': LogHandler.load_path(source)} return {'type': 'console'}
def get_mds_storagedriver_config_set(vpool): """ Builds a configuration for all StorageRouters from a given VPool with following goals: * Primary MDS is the local one * All slaves are on different hosts * Maximum `mds.safety` nodes are returned """ mds_per_storagerouter = {} mds_per_load = {} for storagedriver in vpool.storagedrivers: storagerouter = storagedriver.storagerouter mds_service, load = MDSServiceController.get_preferred_mds(storagerouter, vpool, include_load=True) mds_per_storagerouter[storagerouter.guid] = {'host': storagerouter.ip, 'port': mds_service.service.ports[0]} if load not in mds_per_load: mds_per_load[load] = [] mds_per_load[load].append(storagerouter.guid) safety = Configuration.get('ovs.storagedriver.mds.safety') config_set = {} for storagerouter_guid in mds_per_storagerouter: config_set[storagerouter_guid] = [mds_per_storagerouter[storagerouter_guid]] for load in sorted(mds_per_load.keys()): if len(config_set[storagerouter_guid]) >= safety: break sr_guids = mds_per_load[load] random.shuffle(sr_guids) for sr_guid in sr_guids: if len(config_set[storagerouter_guid]) >= safety: break if sr_guid != storagerouter_guid: config_set[storagerouter_guid].append(mds_per_storagerouter[sr_guid]) return config_set
def register(node_id): """ Adds a Node with a given node_id to the model :param node_id: ID of the ALBA node :type node_id: str :return: None """ node = AlbaNodeList.get_albanode_by_node_id(node_id) if node is None: main_config = Configuration.get('/ovs/alba/asdnodes/{0}/config/main'.format(node_id)) node = AlbaNode() node.ip = main_config['ip'] node.port = main_config['port'] node.username = main_config['username'] node.password = main_config['password'] node.storagerouter = StorageRouterList.get_by_ip(main_config['ip']) data = node.client.get_metadata() if data['_success'] is False and data['_error'] == 'Invalid credentials': raise RuntimeError('Invalid credentials') if data['node_id'] != node_id: AlbaNodeController._logger.error('Unexpected node_id: {0} vs {1}'.format(data['node_id'], node_id)) raise RuntimeError('Unexpected node identifier') node.node_id = node_id node.type = 'ASD' node.save() AlbaController.checkup_maintenance_agents.delay()
def test_multi_node(self): base_port = Configuration.get('ovs.ports.arakoon')[0] cluster = 'one' nodes = sorted(TestArakoonInstaller.nodes.keys()) nodes = dict((node, SSHClient(node)) for node in nodes) first_node = nodes.keys()[0] ArakoonInstaller.create_cluster(cluster, first_node, []) for node in nodes[1:]: ArakoonInstaller.extend_cluster(first_node, node, cluster, []) expected = TestArakoonInstaller.expected_global.format(cluster, ','.join(TestArakoonInstaller.nodes[node] for node in nodes)) for node in nodes: expected += TestArakoonInstaller.expected_base.format(TestArakoonInstaller.nodes[node], node, base_port, base_port + 1) expected = expected.strip() for node, client in nodes.iteritems(): contents = client.file_read(self._get_config_path(cluster)) self.assertEqual(contents.strip(), expected.strip()) ArakoonInstaller.shrink_cluster(nodes[1], first_node, cluster) expected = TestArakoonInstaller.expected_global.format(cluster, ','.join(TestArakoonInstaller.nodes[node] for node in nodes[1:])) for node in nodes.keys()[1:]: expected += TestArakoonInstaller.expected_base.format(TestArakoonInstaller.nodes[node], node, base_port, base_port + 1) expected = expected.strip() for node, client in nodes.iteritems(): if node == first_node: continue contents = client.file_read(self._get_config_path(cluster)) self.assertEqual(contents.strip(), expected.strip())
def configure_avahi(client, node_name, node_type, logger): """ Configure Avahi :param client: Client on which to configure avahi :type client: ovs.extensions.generic.sshclient.SSHClient :param node_name: Name of the node to set in Avahi :type node_name: str :param node_type: Type of the node ('master' or 'extra') :type node_type: str :param logger: Logger object used for logging :type logger: ovs.log.log_handler.LogHandler :return: None """ cluster_name = Configuration.get('/ovs/framework/cluster_name') Toolbox.log(logger=logger, messages='Announcing service') client.file_write(NodeTypeController.avahi_filename, """<?xml version="1.0" standalone='no'?> <!--*-nxml-*--> <!DOCTYPE service-group SYSTEM "avahi-service.dtd"> <!-- $Id$ --> <service-group> <name replace-wildcards="yes">ovs_cluster_{0}_{1}_{3}</name> <service> <type>_ovs_{2}_node._tcp</type> <port>443</port> </service> </service-group>""".format(cluster_name, node_name, node_type, client.ip.replace('.', '_'))) Toolbox.change_service_state(client, 'avahi-daemon', 'restart', NodeTypeController._logger)
def _has_plugin(self): """ Checks whether this BackendType has a plugin installed """ try: return self.code in Configuration.get('ovs.plugins.backends') except: return False
def _get_memcache_nodes(): """ Get the memcache nodes """ memcache_ini = RawConfigParser() memcache_ini.read(os.path.join(Configuration.get('ovs.core.cfgdir'), 'memcacheclient.cfg')) nodes = [node.strip() for node in memcache_ini.get('main', 'nodes').split(',')] return [memcache_ini.get(node, 'location') for node in nodes]
def run_event_consumer(): """ Check whether to run the event consumer """ rmq_config = RawConfigParser() rmq_config.read(os.path.join(Configuration.get('ovs.core.cfgdir'), 'rabbitmqclient.cfg')) machine_id = System.get_my_machine_id() return rmq_config.has_section(machine_id)
def get_mds_storagedriver_config_set(vpool, check_online=False): """ Builds a configuration for all StorageRouters from a given VPool with following goals: * Primary MDS is the local one * All slaves are on different hosts * Maximum `mds_safety` nodes are returned The configuration returned is the default configuration used by the volumedriver of which in normal use-cases only the 1st entry is used, because at volume creation time, the volumedriver needs to create 1 master MDS During ensure_safety, we actually create/set the MDS slaves for each volume :param vpool: vPool to get storagedriver configuration for :type vpool: VPool :param check_online: Check whether the storage routers are actually responsive :type check_online: bool :return: MDS configuration for a vPool :rtype: dict """ mds_per_storagerouter = {} mds_per_load = {} for storagedriver in vpool.storagedrivers: storagerouter = storagedriver.storagerouter if check_online is True: try: SSHClient(storagerouter) except UnableToConnectException: continue mds_service, load = MDSServiceController.get_preferred_mds(storagerouter, vpool) if mds_service is None: raise RuntimeError('Could not find an MDS service') mds_per_storagerouter[storagerouter] = {'host': storagerouter.ip, 'port': mds_service.service.ports[0]} if load not in mds_per_load: mds_per_load[load] = [] mds_per_load[load].append(storagerouter) safety = Configuration.get('/ovs/framework/storagedriver|mds_safety') config_set = {} for storagerouter, ip_info in mds_per_storagerouter.iteritems(): config_set[storagerouter.guid] = [ip_info] for importance in ['primary', 'secondary']: domains = [junction.domain for junction in storagerouter.domains if junction.backup is (importance == 'secondary')] possible_storagerouters = set() for domain in domains: possible_storagerouters.update(StorageRouterList.get_primary_storagerouters_for_domain(domain)) for load in sorted(mds_per_load): if len(config_set[storagerouter.guid]) >= safety: break other_storagerouters = mds_per_load[load] random.shuffle(other_storagerouters) for other_storagerouter in other_storagerouters: if len(config_set[storagerouter.guid]) >= safety: break if other_storagerouter != storagerouter and other_storagerouter in possible_storagerouters: config_set[storagerouter.guid].append(mds_per_storagerouter[other_storagerouter]) return config_set
def test_single_node(self): base_port = Configuration.get('ovs.ports.arakoon')[0] cluster = 'one' node = sorted(TestArakoonInstaller.nodes.keys())[0] ArakoonInstaller.create_cluster(cluster, node, []) contents = SSHClient(node).file_read(self._get_config_path(cluster)) expected = TestArakoonInstaller.expected_global.format(cluster, TestArakoonInstaller.nodes[node]) expected += TestArakoonInstaller.expected_base.format(TestArakoonInstaller.nodes[node], node, base_port, base_port + 1) self.assertEqual(contents.strip(), expected.strip())
def _get_nsm_state(abm): state = {} data = VirtualAlbaBackend.data[abm] for nsm in data['nsms']: contents = Configuration.get(nsm['_config_key'], raw=True) config = ArakoonClusterConfig(nsm['id'], filesystem=False) config.read_config(contents) state[nsm['id']] = [node.name for node in config.nodes] return state
def _configure_amqp_to_volumedriver(): Toolbox.log(logger=NodeTypeController._logger, messages='Update existing vPools') login = Configuration.get('/ovs/framework/messagequeue|user') password = Configuration.get('/ovs/framework/messagequeue|password') protocol = Configuration.get('/ovs/framework/messagequeue|protocol') uris = [] for endpoint in Configuration.get('/ovs/framework/messagequeue|endpoints'): uris.append({'amqp_uri': '{0}://{1}:{2}@{3}'.format(protocol, login, password, endpoint)}) if Configuration.dir_exists('/ovs/vpools'): for vpool_guid in Configuration.list('/ovs/vpools'): for storagedriver_id in Configuration.list('/ovs/vpools/{0}/hosts'.format(vpool_guid)): storagedriver_config = StorageDriverConfiguration('storagedriver', vpool_guid, storagedriver_id) storagedriver_config.load() storagedriver_config.configure_event_publisher(events_amqp_routing_key=Configuration.get('/ovs/framework/messagequeue|queues.storagedriver'), events_amqp_uris=uris) storagedriver_config.save()
def load_config(self, ip=None): """ Reads a configuration from reality """ if self.filesystem is False: contents = Configuration.get(self.config_path, raw=True) else: client = self._load_client(ip) contents = client.file_read(self.config_path) self.read_config(contents)
def get_path(binary_name): config_location = 'ovs.path.{0}'.format(binary_name) path = Configuration.get(config_location) if not path: try: path = check_output('which {0}'.format(binary_name), shell=True).strip() Configuration.set(config_location, path) except CalledProcessError: return None return path
def __init__(self): """ Initializes the client """ self._enable_support = Configuration.get('/ovs/framework/support|enablesupport') self.interval = Configuration.get('/ovs/framework/support|interval') self._url = 'https://monitoring.openvstorage.com/api/support/heartbeat/' init_info = check_output('cat /proc/1/comm', shell=True) # All service classes used in below code should share the exact same interface! if 'init' in init_info: version_info = check_output('init --version', shell=True) if 'upstart' in version_info: self.servicemanager = 'upstart' else: RuntimeError('There was no known service manager detected in /proc/1/comm') elif 'systemd' in init_info: self.servicemanager = 'systemd' else: raise RuntimeError('There was no known service manager detected in /proc/1/comm')
def check_memcached_ports(result_handler): """ Checks the connection of this node to all Memcached endpoints :param result_handler: logging object :type result_handler: ovs.extensions.healthcheck.result.HCResults :return: None :rtype: NoneType """ memcached_ips = [endpoint.rsplit(':')[0] for endpoint in Configuration.get('ovs/framework/memcache|endpoints', default=[])] return OpenvStorageHealthCheck._check_extra_ports(result_handler, 'memcached', ips=memcached_ips)
def _get_nsm_max_capacity_before_overload(alba_backend, max_load=None): """ Get the maximum amount of namespaces that the nsm clusters can hold for a backend before being marked as overloaded :param alba_backend: AlbaBackend object to use :type alba_backend: ovs.dal.hybrids.albabackend.AlbaBackend :return: The max amount of namespaces :rtype: int """ max_load = max_load or Configuration.get('ovs/framework/plugins/alba/config|nsm.maxload') return sum(int(math.ceil(nsm.capacity * (float(max_load)/100.0))) for nsm in alba_backend.nsm_clusters)
def load(self): """ Loads the configuration from a given file, optionally a remote one """ self.configuration = {} if Configuration.exists(self.key): self.is_new = False self.configuration = json.loads(Configuration.get(self.key, raw=True)) else: self._logger.debug('Could not find config {0}, a new one will be created'.format(self.key)) self.dirty_entries = []
def __init__(self, cluster): """ Initializes the client """ contents = Configuration.get(PyrakoonStore.CONFIG_KEY.format(cluster), raw=True) parser = RawConfigParser() parser.readfp(StringIO(contents)) nodes = {} for node in parser.get("global", "cluster").split(","): node = node.strip() nodes[node] = ([parser.get(node, "ip")], parser.get(node, "client_port")) self._client = PyrakoonClient(cluster, nodes)
def config_read(self, key): if self.is_local is True: from ovs.extensions.generic.configuration import Configuration return Configuration.get(key) else: read = """ import sys, json sys.path.append('/opt/OpenvStorage') from ovs.extensions.generic.configuration import Configuration print json.dumps(Configuration.get('{0}')) """.format(key) return json.loads(self.run('python -c """{0}"""'.format(read)))
def _validate_local_memcache_servers(ip_client_map): """ Reads the memcache client configuration file from one of the given nodes, and validates whether it can reach all nodes to handle a possible future memcache restart """ if len(ip_client_map) <= 1: return True ips = [endpoint.split(':')[0] for endpoint in Configuration.get('/ovs/framework/memcache|endpoints')] for ip in ips: if ip not in ip_client_map: return False return True
def services_running(self, target): """ Check all services are running :param target: Target to check :return: Boolean """ try: key = 'ovs-watcher-{0}'.format(str(uuid.uuid4())) value = str(time.time()) if target in ['config', 'framework']: self.log_message(target, 'Testing configuration store...', 0) from ovs.extensions.generic.configuration import Configuration try: Configuration.list('/') except Exception as ex: self.log_message( target, ' Error during configuration store test: {0}'.format( ex), 2) return False from ovs.extensions.db.arakooninstaller import ArakoonInstaller, ArakoonClusterConfig from ovs_extensions.db.arakoon.pyrakoon.pyrakoon.compat import NoGuarantee from ovs.extensions.generic.configuration import Configuration with open(Configuration.CACC_LOCATION) as config_file: contents = config_file.read() config = ArakoonClusterConfig( cluster_id=Configuration.ARAKOON_NAME, load_config=False) config.read_config(contents=contents) client = ArakoonInstaller.build_client(config) contents = client.get(ArakoonInstaller.INTERNAL_CONFIG_KEY, consistency=NoGuarantee()) if Watcher.LOG_CONTENTS != contents: try: config.read_config( contents=contents ) # Validate whether the contents are not corrupt except Exception as ex: self.log_message( target, ' Configuration stored in configuration store seems to be corrupt: {0}' .format(ex), 2) return False temp_filename = '{0}~'.format(Configuration.CACC_LOCATION) with open(temp_filename, 'w') as config_file: config_file.write(contents) config_file.flush() os.fsync(config_file) os.rename(temp_filename, Configuration.CACC_LOCATION) Watcher.LOG_CONTENTS = contents self.log_message(target, ' Configuration store OK', 0) if target == 'framework': # Volatile self.log_message(target, 'Testing volatile store...', 0) max_tries = 5 tries = 0 while tries < max_tries: try: try: logging.disable(logging.WARNING) from ovs.extensions.storage.volatilefactory import VolatileFactory VolatileFactory.store = None volatile = VolatileFactory.get_client() volatile.set(key, value) if volatile.get(key) == value: volatile.delete(key) break volatile.delete(key) finally: logging.disable(logging.NOTSET) except Exception as message: self.log_message( target, ' Error during volatile store test: {0}'.format( message), 2) key = 'ovs-watcher-{0}'.format(str( uuid.uuid4())) # Get another key time.sleep(1) tries += 1 if tries == max_tries: self.log_message(target, ' Volatile store not working correctly', 2) return False self.log_message( target, ' Volatile store OK after {0} tries'.format(tries), 0) # Persistent self.log_message(target, 'Testing persistent store...', 0) max_tries = 5 tries = 0 while tries < max_tries: try: try: logging.disable(logging.WARNING) persistent = PersistentFactory.get_client() persistent.nop() break finally: logging.disable(logging.NOTSET) except Exception as message: self.log_message( target, ' Error during persistent store test: {0}'.format( message), 2) time.sleep(1) tries += 1 if tries == max_tries: self.log_message( target, ' Persistent store not working correctly', 2) return False self.log_message( target, ' Persistent store OK after {0} tries'.format(tries), 0) if target == 'volumedriver': # Arakoon, voldrv cluster self.log_message(target, 'Testing arakoon (voldrv)...', 0) max_tries = 5 tries = 0 while tries < max_tries: try: from ovs.extensions.generic.configuration import Configuration from ovs_extensions.storage.persistent.pyrakoonstore import PyrakoonStore cluster_name = str( Configuration.get( '/ovs/framework/arakoon_clusters|voldrv')) configuration = Configuration.get( '/ovs/arakoon/{0}/config'.format(cluster_name), raw=True) client = PyrakoonStore(cluster=cluster_name, configuration=configuration) client.nop() break except Exception as message: self.log_message( target, ' Error during arakoon (voldrv) test: {0}'.format( message), 2) time.sleep(1) tries += 1 if tries == max_tries: self.log_message( target, ' Arakoon (voldrv) not working correctly', 2) return False self.log_message(target, ' Arakoon (voldrv) OK', 0) if target in ['framework', 'volumedriver']: # RabbitMQ self.log_message(target, 'Test rabbitMQ...', 0) import pika from ovs.extensions.generic.configuration import Configuration messagequeue = Configuration.get('/ovs/framework/messagequeue') rmq_servers = messagequeue['endpoints'] good_node = False for server in rmq_servers: try: connection_string = '{0}://{1}:{2}@{3}/%2F'.format( messagequeue['protocol'], messagequeue['user'], messagequeue['password'], server) connection = pika.BlockingConnection( pika.URLParameters(connection_string)) channel = connection.channel() channel.basic_publish( '', 'ovs-watcher', str(time.time()), pika.BasicProperties(content_type='text/plain', delivery_mode=1)) connection.close() good_node = True except Exception as message: self.log_message( target, ' Error during rabbitMQ test on node {0}: {1}'. format(server, message), 2) if good_node is False: self.log_message( target, ' No working rabbitMQ node could be found', 2) return False self.log_message(target, ' RabbitMQ test OK', 0) self.log_message(target, 'All tests OK', 0) return True except Exception as ex: self.log_message(target, 'Unexpected exception: {0}'.format(ex), 2) return False
def _get_store_info(cls): return {'nodes': Configuration.get('/ovs/framework/memcache|endpoints')}
def mds_checkup(): """ Validates the current MDS setup/configuration and takes actions where required Actions: * Verify which StorageRouters are available * Make mapping between vPools and its StorageRouters * For each vPool make sure every StorageRouter has at least 1 MDS service with capacity available * For each vPool retrieve the optimal configuration and store it for each StorageDriver * For each vPool run an ensure safety for all vDisks :raises RuntimeError: When ensure safety fails for any vDisk :return: None :rtype: NoneType """ MDSServiceController._logger.info('Started') # Verify StorageRouter availability root_client_cache = {} storagerouters = StorageRouterList.get_storagerouters() storagerouters.sort(key=lambda _sr: ExtensionsToolbox.advanced_sort( element=_sr.ip, separator='.')) offline_nodes = [] for storagerouter in storagerouters: try: root_client = SSHClient(endpoint=storagerouter, username='******') MDSServiceController._logger.debug( 'StorageRouter {0} - ONLINE'.format(storagerouter.name)) except UnableToConnectException: root_client = None offline_nodes.append(storagerouter) MDSServiceController._logger.error( 'StorageRouter {0} - OFFLINE'.format(storagerouter.name)) root_client_cache[storagerouter] = root_client # Create mapping per vPool and its StorageRouters mds_dict = collections.OrderedDict() for vpool in sorted(VPoolList.get_vpools(), key=lambda k: k.name): MDSServiceController._logger.info('vPool {0}'.format(vpool.name)) mds_dict[vpool] = {} # Loop all StorageDrivers and add StorageDriver to mapping for storagedriver in vpool.storagedrivers: storagerouter = storagedriver.storagerouter if storagerouter not in mds_dict[vpool]: mds_dict[vpool][storagerouter] = { 'client': root_client_cache.get(storagerouter), 'services': [], 'storagedriver': storagedriver } # Loop all MDS Services and append services to appropriate vPool / StorageRouter combo mds_services = vpool.mds_services mds_services.sort( key=lambda _mds_service: ExtensionsToolbox.advanced_sort( element=_mds_service.service.storagerouter.ip, separator='.')) for mds_service in mds_services: service = mds_service.service storagerouter = service.storagerouter if storagerouter not in mds_dict[vpool]: mds_dict[vpool][storagerouter] = { 'client': root_client_cache.get(storagerouter), 'services': [], 'storagedriver': None } MDSServiceController._logger.debug( 'vPool {0} - StorageRouter {1} - Service on port {2}'. format(vpool.name, storagerouter.name, service.ports[0])) mds_dict[vpool][storagerouter]['services'].append(mds_service) failures = [] for vpool, storagerouter_info in mds_dict.iteritems(): # Make sure there's at least 1 MDS on every StorageRouter that's not overloaded # Remove all MDS Services which have been manually marked for removal (by setting its capacity to 0) max_load = Configuration.get( '/ovs/vpools/{0}/mds_config|mds_maxload'.format(vpool.guid)) for storagerouter in sorted(storagerouter_info, key=lambda k: k.ip): total_load = 0.0 root_client = mds_dict[vpool][storagerouter]['client'] mds_services = mds_dict[vpool][storagerouter]['services'] for mds_service in list( sorted(mds_services, key=lambda k: k.number)): port = mds_service.service.ports[0] number = mds_service.number # Manual intervention required here in order for the MDS to be cleaned up # @TODO: Remove this and make a dynamic calculation to check which MDSes to remove if mds_service.capacity == 0 and len( mds_service.vdisks_guids) == 0: MDSServiceController._logger.warning( 'vPool {0} - StorageRouter {1} - MDS Service {2} on port {3}: Removing' .format(vpool.name, storagerouter.name, number, port)) try: MDSServiceController.remove_mds_service( mds_service=mds_service, reconfigure=True, allow_offline=root_client is None) except Exception: MDSServiceController._logger.exception( 'vPool {0} - StorageRouter {1} - MDS Service {2} on port {3}: Failed to remove' .format(vpool.name, storagerouter.name, number, port)) mds_services.remove(mds_service) else: _, next_load = MDSServiceController.get_mds_load( mds_service=mds_service) if next_load == float('inf'): total_load = sys.maxint * -1 # Cast to lowest possible value if any MDS service capacity is set to infinity else: total_load += next_load if next_load < max_load: MDSServiceController._logger.debug( 'vPool {0} - StorageRouter {1} - MDS Service {2} on port {3}: Capacity available - Load at {4}%' .format(vpool.name, storagerouter.name, number, port, next_load)) else: MDSServiceController._logger.debug( 'vPool {0} - StorageRouter {1} - MDS Service {2} on port {3}: No capacity available - Load at {4}%' .format(vpool.name, storagerouter.name, number, port, next_load)) if total_load >= max_load * len(mds_services): mds_services_to_add = int( math.ceil((total_load - max_load * len(mds_services)) / max_load)) MDSServiceController._logger.info( 'vPool {0} - StorageRouter {1} - Average load per service {2:.2f}% - Max load per service {3:.2f}% - {4} MDS service{5} will be added' .format(vpool.name, storagerouter.name, total_load / len(mds_services), max_load, mds_services_to_add, '' if mds_services_to_add == 1 else 's')) for _ in range(mds_services_to_add): MDSServiceController._logger.info( 'vPool {0} - StorageRouter {1} - Adding new MDS Service' .format(vpool.name, storagerouter.name)) try: mds_services.append( MDSServiceController.prepare_mds_service( storagerouter=storagerouter, vpool=vpool)) except Exception: MDSServiceController._logger.exception( 'vPool {0} - StorageRouter {1} - Failed to create new MDS Service' .format(vpool.name, storagerouter.name)) # After potentially having added new MDSes, retrieve the optimal configuration mds_config_set = {} try: mds_config_set = MDSServiceController.get_mds_storagedriver_config_set( vpool=vpool, offline_nodes=offline_nodes) MDSServiceController._logger.debug( 'vPool {0} - Optimal configuration {1}'.format( vpool.name, mds_config_set)) except (NotFoundException, RuntimeError): MDSServiceController._logger.exception( 'vPool {0} - Failed to retrieve the optimal configuration'. format(vpool.name)) # Apply the optimal MDS configuration per StorageDriver for storagerouter in sorted(storagerouter_info, key=lambda k: k.ip): root_client = mds_dict[vpool][storagerouter]['client'] storagedriver = mds_dict[vpool][storagerouter]['storagedriver'] if storagedriver is None: MDSServiceController._logger.critical( 'vPool {0} - StorageRouter {1} - No matching StorageDriver found' .format(vpool.name, storagerouter.name)) continue if storagerouter.guid not in mds_config_set: MDSServiceController._logger.critical( 'vPool {0} - StorageRouter {1} - Not marked as offline, but could not retrieve an optimal MDS config' .format(vpool.name, storagerouter.name)) continue if root_client is None: MDSServiceController._logger.debug( 'vPool {0} - StorageRouter {1} - Marked as offline, not setting optimal MDS configuration' .format(vpool.name, storagerouter.name)) continue storagedriver_config = StorageDriverConfiguration( vpool_guid=vpool.guid, storagedriver_id=storagedriver.storagedriver_id) if storagedriver_config.config_missing is False: optimal_mds_config = mds_config_set[storagerouter.guid] MDSServiceController._logger.debug( 'vPool {0} - StorageRouter {1} - Storing optimal MDS configuration: {2}' .format(vpool.name, storagerouter.name, optimal_mds_config)) # Filesystem section in StorageDriver configuration are all parameters used for vDisks created directly on the filesystem # So when a vDisk gets created on the filesystem, these MDSes will be assigned to them storagedriver_config.configure_filesystem( fs_metadata_backend_mds_nodes=optimal_mds_config) storagedriver_config.save(root_client) # Execute a safety check, making sure the master/slave configuration is optimal. MDSServiceController._logger.info( 'vPool {0} - Ensuring safety for all vDisks'.format( vpool.name)) for vdisk in vpool.vdisks: try: MDSServiceController.ensure_safety(vdisk_guid=vdisk.guid) except Exception: message = 'Ensure safety for vDisk {0} with guid {1} failed'.format( vdisk.name, vdisk.guid) MDSServiceController._logger.exception(message) failures.append(message) if len(failures) > 0: raise RuntimeError('\n - ' + '\n - '.join(failures)) MDSServiceController._logger.info('Finished')
def promote_or_demote_node(node_action, cluster_ip=None, execute_rollback=False): """ Promotes or demotes the local node :param node_action: Demote or promote :type node_action: str :param cluster_ip: IP of node to promote or demote :type cluster_ip: str :param execute_rollback: In case of failure revert the changes made :type execute_rollback: bool :return: None """ if node_action not in ('promote', 'demote'): raise ValueError('Nodes can only be promoted or demoted') Toolbox.log(logger=NodeTypeController._logger, messages='Open vStorage Setup - {0}'.format( node_action.capitalize()), boxed=True) try: Toolbox.log(logger=NodeTypeController._logger, messages='Collecting information', title=True) machine_id = System.get_my_machine_id() if Configuration.get('/ovs/framework/hosts/{0}/setupcompleted'. format(machine_id)) is False: raise RuntimeError('No local OVS setup found.') if cluster_ip and not re.match(Toolbox.regex_ip, cluster_ip): raise RuntimeError( 'Incorrect IP provided ({0})'.format(cluster_ip)) if cluster_ip: client = SSHClient(endpoint=cluster_ip) machine_id = System.get_my_machine_id(client) node_type = Configuration.get( '/ovs/framework/hosts/{0}/type'.format(machine_id)) if node_action == 'promote' and node_type == 'MASTER': raise RuntimeError('This node is already master.') elif node_action == 'demote' and node_type == 'EXTRA': raise RuntimeError('This node should be a master.') elif node_type not in ['MASTER', 'EXTRA']: raise RuntimeError('This node is not correctly configured.') master_ip = None offline_nodes = [] online = True target_client = None if node_action == 'demote' and cluster_ip: # Demote an offline node from ovs.dal.lists.storagerouterlist import StorageRouterList from ovs.lib.storagedriver import StorageDriverController ip = cluster_ip unique_id = None ip_client_map = {} for storage_router in StorageRouterList.get_storagerouters(): try: client = SSHClient(storage_router.ip, username='******') if storage_router.node_type == 'MASTER': master_ip = storage_router.ip ip_client_map[storage_router.ip] = client except UnableToConnectException: if storage_router.ip == cluster_ip: online = False unique_id = storage_router.machine_id StorageDriverController.mark_offline( storagerouter_guid=storage_router.guid) offline_nodes.append(storage_router) if online is True: raise RuntimeError( "If the node is online, please use 'ovs setup demote' executed on the node you wish to demote" ) if master_ip is None: raise RuntimeError( 'Failed to retrieve another responsive MASTER node') else: target_password = Toolbox.ask_validate_password( ip='127.0.0.1', logger=NodeTypeController._logger) target_client = SSHClient('127.0.0.1', username='******', password=target_password) unique_id = System.get_my_machine_id(target_client) ip = Configuration.get( '/ovs/framework/hosts/{0}/ip'.format(unique_id)) storagerouter_info = NodeTypeController.retrieve_storagerouter_info_via_host( ip=target_client.ip, password=target_password) node_ips = [ sr_info['ip'] for sr_info in storagerouter_info.itervalues() ] master_node_ips = [ sr_info['ip'] for sr_info in storagerouter_info.itervalues() if sr_info['type'] == 'master' and sr_info['ip'] != ip ] if len(master_node_ips) == 0: if node_action == 'promote': raise RuntimeError('No master node could be found') else: raise RuntimeError( 'It is not possible to remove the only master') master_ip = master_node_ips[0] ip_client_map = dict( (node_ip, SSHClient(node_ip, username='******')) for node_ip in node_ips) if node_action == 'demote': for cluster_name in Configuration.list('/ovs/arakoon'): config = ArakoonClusterConfig(cluster_id=cluster_name) arakoon_client = ArakoonInstaller.build_client(config) metadata = json.loads( arakoon_client.get(ArakoonInstaller.METADATA_KEY)) if len(config.nodes) == 1 and config.nodes[ 0].ip == ip and metadata.get('internal') is True: raise RuntimeError( 'Demote is not supported when single node Arakoon cluster(s) are present on the node to be demoted.' ) configure_rabbitmq = Toolbox.is_service_internally_managed( service='rabbitmq') configure_memcached = Toolbox.is_service_internally_managed( service='memcached') if node_action == 'promote': try: NodeTypeController.promote_node( cluster_ip=ip, master_ip=master_ip, ip_client_map=ip_client_map, unique_id=unique_id, configure_memcached=configure_memcached, configure_rabbitmq=configure_rabbitmq) except Exception: if execute_rollback is True: NodeTypeController.demote_node( cluster_ip=ip, master_ip=master_ip, ip_client_map=ip_client_map, unique_id=unique_id, unconfigure_memcached=configure_memcached, unconfigure_rabbitmq=configure_rabbitmq, offline_nodes=offline_nodes) elif target_client is not None: target_client.file_write('/tmp/ovs_rollback', 'demote') raise else: try: NodeTypeController.demote_node( cluster_ip=ip, master_ip=master_ip, ip_client_map=ip_client_map, unique_id=unique_id, unconfigure_memcached=configure_memcached, unconfigure_rabbitmq=configure_rabbitmq, offline_nodes=offline_nodes) except Exception: if execute_rollback is True: NodeTypeController.promote_node( cluster_ip=ip, master_ip=master_ip, ip_client_map=ip_client_map, unique_id=unique_id, configure_memcached=configure_memcached, configure_rabbitmq=configure_rabbitmq) elif target_client is not None: target_client.file_write('/tmp/ovs_rollback', 'promote') raise Toolbox.log(logger=NodeTypeController._logger, messages='\n') Toolbox.log(logger=NodeTypeController._logger, messages='{0} complete.'.format( node_action.capitalize()), boxed=True) except Exception as exception: Toolbox.log(logger=NodeTypeController._logger, messages='\n') Toolbox.log( logger=NodeTypeController._logger, messages=['An unexpected error occurred:', str(exception)], boxed=True, loglevel='exception') sys.exit(1) except KeyboardInterrupt: Toolbox.log(logger=NodeTypeController._logger, messages='\n') Toolbox.log( logger=NodeTypeController._logger, messages= 'This setup was aborted. Open vStorage may be in an inconsistent state, make sure to validate the installation.', boxed=True, loglevel='error') sys.exit(1)
def get(self, request, *args, **kwargs): """ Fetches metadata """ _ = args, kwargs data = { 'authenticated': False, 'authentication_state': None, 'authentication_metadata': {}, 'username': None, 'userguid': None, 'roles': [], 'identification': {}, 'storagerouter_ips': [sr.ip for sr in StorageRouterList.get_storagerouters()], 'versions': list(settings.VERSION), 'plugins': {} } try: # Gather plugin metadata plugins = {} # - Backends. BackendType plugins must set the has_plugin flag on True for backend_type in BackendTypeList.get_backend_types(): if backend_type.has_plugin is True: if backend_type.code not in plugins: plugins[backend_type.code] = [] plugins[backend_type.code] += ['backend', 'gui'] # - Generic plugins, as added to the configuration file(s) generic_plugins = Configuration.get('ovs.plugins.generic') for plugin_name in generic_plugins: if plugin_name not in plugins: plugins[plugin_name] = [] plugins[plugin_name] += ['gui'] data['plugins'] = plugins # Fill identification data['identification'] = { 'cluster_id': Configuration.get('ovs.support.cid') } # Get authentication metadata authentication_metadata = {'ip': System.get_my_storagerouter().ip} for key in ['mode', 'authorize_uri', 'client_id', 'scope']: if Configuration.exists('ovs.webapps.oauth2.{0}'.format(key)): authentication_metadata[key] = Configuration.get( 'ovs.webapps.oauth2.{0}'.format(key)) data['authentication_metadata'] = authentication_metadata # Gather authorization metadata if 'HTTP_AUTHORIZATION' not in request.META: return HttpResponse, dict( data.items() + {'authentication_state': 'unauthenticated'}.items()) authorization_type, access_token = request.META[ 'HTTP_AUTHORIZATION'].split(' ') if authorization_type != 'Bearer': return HttpResponse, dict( data.items() + {'authentication_state': 'invalid_authorization_type' }.items()) tokens = BearerTokenList.get_by_access_token(access_token) if len(tokens) != 1: return HttpResponse, dict( data.items() + {'authentication_state': 'invalid_token'}.items()) token = tokens[0] if token.expiration < time.time(): for junction in token.roles.itersafe(): junction.delete() token.delete() return HttpResponse, dict( data.items() + {'authentication_state': 'token_expired'}.items()) # Gather user metadata user = token.client.user if not user.is_active: return HttpResponse, dict( data.items() + {'authentication_state': 'inactive_user'}.items()) roles = [j.role.code for j in token.roles] return HttpResponse, dict( data.items() + { 'authenticated': True, 'authentication_state': 'authenticated', 'username': user.username, 'userguid': user.guid, 'roles': roles, 'plugins': plugins }.items()) except Exception as ex: logger.exception('Unexpected exception: {0}'.format(ex)) return HttpResponse, dict( data.items() + {'authentication_state': 'unexpected_exception'}.items())
def _stack(self): """ Returns an overview of this node's storage stack """ from ovs.dal.hybrids.albabackend import AlbaBackend from ovs.dal.lists.albabackendlist import AlbaBackendList def _move(info): for move in [('state', 'status'), ('state_detail', 'status_detail')]: if move[0] in info: info[move[1]] = info[move[0]] del info[move[0]] stack = {} node_down = False # Fetch stack from asd-manager try: remote_stack = self.client.get_stack() for slot_id, slot_data in remote_stack.iteritems(): stack[slot_id] = {'status': 'ok'} stack[slot_id].update(slot_data) # Migrate state > status _move(stack[slot_id]) for osd_data in slot_data.get('osds', {}).itervalues(): _move(osd_data) except (requests.ConnectionError, requests.Timeout, InvalidCredentialsError): self._logger.warning( 'Error during stack retrieval. Assuming that the node is down') node_down = True model_osds = {} found_osds = {} # Apply own model to fetched stack for osd in self.osds: model_osds[osd.osd_id] = osd # Initially set the info if osd.slot_id not in stack: stack[osd.slot_id] = { 'status': self.OSD_STATUSES.UNKNOWN if node_down is True else self.OSD_STATUSES.MISSING, 'status_detail': self.OSD_STATUS_DETAILS.NODEDOWN if node_down is True else '', 'osds': {} } osd_data = stack[osd.slot_id]['osds'].get(osd.osd_id, {}) stack[osd.slot_id]['osds'][ osd.osd_id] = osd_data # Initially set the info in the stack osd_data.update(osd.stack_info) if node_down is True: osd_data['status'] = self.OSD_STATUSES.UNKNOWN osd_data['status_detail'] = self.OSD_STATUS_DETAILS.NODEDOWN elif osd.alba_backend_guid is not None: # Osds has been claimed # Load information from alba if osd.alba_backend_guid not in found_osds: found_osds[osd.alba_backend_guid] = {} if osd.alba_backend.abm_cluster is not None: config = Configuration.get_configuration_path( osd.alba_backend.abm_cluster.config_location) try: for found_osd in AlbaCLI.run( command='list-all-osds', config=config): found_osds[osd.alba_backend_guid][ found_osd['long_id']] = found_osd except (AlbaError, RuntimeError): self._logger.exception( 'Listing all osds has failed') osd_data['status'] = self.OSD_STATUSES.UNKNOWN osd_data[ 'status_detail'] = self.OSD_STATUS_DETAILS.ALBAERROR continue if osd.osd_id not in found_osds[osd.alba_backend_guid]: # Not claimed by any backend thus not in use continue found_osd = found_osds[osd.alba_backend_guid][osd.osd_id] if found_osd['decommissioned'] is True: osd_data['status'] = self.OSD_STATUSES.UNAVAILABLE osd_data[ 'status_detail'] = self.OSD_STATUS_DETAILS.DECOMMISSIONED continue backend_interval_key = '/ovs/alba/backends/{0}/gui_error_interval'.format( osd.alba_backend_guid) if Configuration.exists(backend_interval_key): interval = Configuration.get(backend_interval_key) else: interval = Configuration.get( '/ovs/alba/backends/global_gui_error_interval') read = found_osd['read'] or [0] write = found_osd['write'] or [0] errors = found_osd['errors'] osd_data['status'] = self.OSD_STATUSES.WARNING osd_data['status_detail'] = self.OSD_STATUS_DETAILS.ERROR if len(errors) == 0 or (len(read + write) > 0 and max(min(read), min(write)) > max(error[0] for error in errors) + interval): osd_data['status'] = self.OSD_STATUSES.OK osd_data['status_detail'] = '' statistics = {} for slot_info in stack.itervalues(): for osd_id, osd in slot_info['osds'].iteritems(): if osd.get( 'status_detail') == self.OSD_STATUS_DETAILS.ACTIVATING: osd['claimed_by'] = 'unknown' # We won't be able to connect to it just yet continue if osd_id not in model_osds: # The osd is known by the remote node but not in the model # In that case, let's connect to the OSD to see whether we get some info from it try: ips = osd['hosts'] if 'hosts' in osd and len( osd['hosts']) > 0 else osd.get('ips', []) port = osd['port'] claimed_by = 'unknown' for ip in ips: try: # Output will be None if it is not claimed claimed_by = AlbaCLI.run('get-osd-claimed-by', named_params={ 'host': ip, 'port': port }) break except (AlbaError, RuntimeError): self._logger.warning( 'get-osd-claimed-by failed for IP:port {0}:{1}' .format(ip, port)) alba_backend = AlbaBackendList.get_by_alba_id( claimed_by) osd['claimed_by'] = alba_backend.guid if alba_backend is not None else claimed_by except KeyError: osd['claimed_by'] = 'unknown' except: self._logger.exception( 'Could not load OSD info: {0}'.format(osd_id)) osd['claimed_by'] = 'unknown' if osd.get('status') not in ['error', 'warning']: osd['status'] = self.OSD_STATUSES.ERROR osd['status_detail'] = self.OSD_STATUS_DETAILS.UNREACHABLE claimed_by = osd.get('claimed_by', 'unknown') if claimed_by == 'unknown': continue try: alba_backend = AlbaBackend(claimed_by) except ObjectNotFoundException: continue # Add usage information if alba_backend not in statistics: statistics[alba_backend] = alba_backend.osd_statistics osd_statistics = statistics[alba_backend] if osd_id not in osd_statistics: continue stats = osd_statistics[osd_id] osd['usage'] = { 'size': int(stats['capacity']), 'used': int(stats['disk_usage']), 'available': int(stats['capacity'] - stats['disk_usage']) } return stack
for task in return_data['tasks']: self._process_task(task['code'], task['metadata']) except Exception, ex: logger.exception('Unexpected error processing tasks: {0}'.format(ex)) raise if 'interval' in return_data: interval = return_data['interval'] if interval != self.interval: self.interval = interval self._update_config('interval', str(interval)) self.interval = return_data['interval'] if __name__ == '__main__': try: if Configuration.get('ovs.support.enabled') is False: print 'Support not enabled' sys.exit(0) logger.info('Starting up') client = SupportAgent() while True: try: client.run() time.sleep(client.interval) except KeyboardInterrupt: raise except Exception, exception: logger.exception('Unexpected error during run: {0}'.format(exception)) time.sleep(10) except KeyboardInterrupt: print 'Aborting...'
def promote_node(cluster_ip, master_ip, ip_client_map, unique_id, configure_memcached, configure_rabbitmq): """ Promotes a given node """ from ovs.dal.lists.storagerouterlist import StorageRouterList from ovs.dal.lists.servicetypelist import ServiceTypeList from ovs.dal.lists.servicelist import ServiceList from ovs.dal.hybrids.service import Service Toolbox.log(logger=NodeTypeController._logger, messages='Promoting node', title=True) service_manager = ServiceFactory.get_manager() if configure_memcached is True: if NodeTypeController._validate_local_memcache_servers( ip_client_map) is False: raise RuntimeError( 'Not all memcache nodes can be reached which is required for promoting a node.' ) target_client = ip_client_map[cluster_ip] machine_id = System.get_my_machine_id(target_client) node_name, _ = target_client.get_hostname() master_client = ip_client_map[master_ip] storagerouter = StorageRouterList.get_by_machine_id(unique_id) storagerouter.node_type = 'MASTER' storagerouter.save() external_config = Configuration.get('/ovs/framework/external_config') if external_config is None: Toolbox.log(logger=NodeTypeController._logger, messages='Joining Arakoon configuration cluster') arakoon_installer = ArakoonInstaller(cluster_name='config') arakoon_installer.load(ip=master_ip) arakoon_installer.extend_cluster( new_ip=cluster_ip, base_dir=Configuration.get('/ovs/framework/paths|ovsdb')) arakoon_installer.restart_cluster_after_extending( new_ip=cluster_ip) service_manager.register_service( node_name=machine_id, service_metadata=arakoon_installer.service_metadata[cluster_ip] ) # Find other (arakoon) master nodes arakoon_cluster_name = str( Configuration.get('/ovs/framework/arakoon_clusters|ovsdb')) arakoon_metadata = ArakoonInstaller.get_arakoon_metadata_by_cluster_name( cluster_name=arakoon_cluster_name) config = ArakoonClusterConfig(cluster_id=arakoon_cluster_name) master_node_ips = [node.ip for node in config.nodes] if cluster_ip in master_node_ips: master_node_ips.remove(cluster_ip) if len(master_node_ips) == 0: raise RuntimeError( 'There should be at least one other master node') arakoon_ports = [] if arakoon_metadata['internal'] is True: Toolbox.log(logger=NodeTypeController._logger, messages='Joining Arakoon OVS DB cluster') arakoon_installer = ArakoonInstaller( cluster_name=arakoon_cluster_name) arakoon_installer.load() arakoon_installer.extend_cluster( new_ip=cluster_ip, base_dir=Configuration.get('/ovs/framework/paths|ovsdb')) arakoon_installer.restart_cluster_after_extending( new_ip=cluster_ip) arakoon_ports = arakoon_installer.ports[cluster_ip] if configure_memcached is True: NodeTypeController.configure_memcached( client=target_client, logger=NodeTypeController._logger) NodeTypeController.add_services(client=target_client, node_type='master', logger=NodeTypeController._logger) Toolbox.log(logger=NodeTypeController._logger, messages='Update configurations') if configure_memcached is True: endpoints = Configuration.get('/ovs/framework/memcache|endpoints') endpoint = '{0}:11211'.format(cluster_ip) if endpoint not in endpoints: endpoints.append(endpoint) Configuration.set('/ovs/framework/memcache|endpoints', endpoints) if configure_rabbitmq is True: endpoints = Configuration.get( '/ovs/framework/messagequeue|endpoints') endpoint = '{0}:5672'.format(cluster_ip) if endpoint not in endpoints: endpoints.append(endpoint) Configuration.set('/ovs/framework/messagequeue|endpoints', endpoints) if arakoon_metadata['internal'] is True: Toolbox.log(logger=NodeTypeController._logger, messages='Restarting master node services') PersistentFactory.store = None VolatileFactory.store = None if 'arakoon-ovsdb' not in [ s.name for s in ServiceList.get_services() if s.is_internal is False or s.storagerouter.ip == cluster_ip ]: service = Service() service.name = 'arakoon-ovsdb' service.type = ServiceTypeList.get_by_name( ServiceType.SERVICE_TYPES.ARAKOON) service.ports = arakoon_ports service.storagerouter = storagerouter service.save() if configure_rabbitmq is True: NodeTypeController.configure_rabbitmq( client=target_client, logger=NodeTypeController._logger) # Copy rabbitmq cookie rabbitmq_cookie_file = '/var/lib/rabbitmq/.erlang.cookie' Toolbox.log(logger=NodeTypeController._logger, messages='Copying RabbitMQ cookie') contents = master_client.file_read(rabbitmq_cookie_file) master_hostname, _ = master_client.get_hostname() target_client.dir_create(os.path.dirname(rabbitmq_cookie_file)) target_client.file_write(rabbitmq_cookie_file, contents) target_client.file_chmod(rabbitmq_cookie_file, mode=0400) target_client.run(['rabbitmq-server', '-detached']) time.sleep(5) target_client.run(['rabbitmqctl', 'stop_app']) time.sleep(5) target_client.run([ 'rabbitmqctl', 'join_cluster', 'rabbit@{0}'.format(master_hostname) ]) time.sleep(5) target_client.run(['rabbitmqctl', 'stop']) time.sleep(5) # Enable HA for the rabbitMQ queues ServiceFactory.change_service_state(target_client, 'rabbitmq-server', 'start', NodeTypeController._logger) NodeTypeController.check_rabbitmq_and_enable_ha_mode( client=target_client, logger=NodeTypeController._logger) NodeTypeController._configure_amqp_to_volumedriver() Toolbox.log(logger=NodeTypeController._logger, messages='Starting services') services = ['memcached', 'arakoon-ovsdb', 'rabbitmq-server'] if arakoon_metadata['internal'] is True: services.remove('arakoon-ovsdb') for service in services: if service_manager.has_service(service, client=target_client): ServiceFactory.change_service_state(target_client, service, 'start', NodeTypeController._logger) Toolbox.log(logger=NodeTypeController._logger, messages='Restarting services') NodeTypeController.restart_framework_and_memcache_services( clients=ip_client_map, logger=NodeTypeController._logger) if Toolbox.run_hooks(component='nodetype', sub_component='promote', logger=NodeTypeController._logger, cluster_ip=cluster_ip, master_ip=master_ip): Toolbox.log(logger=NodeTypeController._logger, messages='Restarting services') NodeTypeController.restart_framework_and_memcache_services( clients=ip_client_map, logger=NodeTypeController._logger) if NodeTypeController.avahi_installed( client=target_client, logger=NodeTypeController._logger) is True: NodeTypeController.configure_avahi( client=target_client, node_name=node_name, node_type='master', logger=NodeTypeController._logger) Configuration.set('/ovs/framework/hosts/{0}/type'.format(machine_id), 'MASTER') target_client.run( ['chown', '-R', 'ovs:ovs', '/opt/OpenvStorage/config']) Configuration.set( '/ovs/framework/hosts/{0}/promotecompleted'.format(machine_id), True) if target_client.file_exists('/tmp/ovs_rollback'): target_client.file_delete('/tmp/ovs_rollback') Toolbox.log(logger=NodeTypeController._logger, messages='Promote complete')
def migrate(): """ Executes async migrations. It doesn't matter too much when they are executed, as long as they get eventually executed. This code will typically contain: * "dangerous" migration code (it needs certain running services) * Migration code depending on a cluster-wide state * ... * Successfully finishing a piece of migration code, should create an entry in /ovs/framework/migration in case it should not be executed again * Eg: /ovs/framework/migration|stats_monkey_integration: True """ MigrationController._logger.info('Preparing out of band migrations...') from ovs.dal.lists.servicetypelist import ServiceTypeList from ovs.dal.lists.storagedriverlist import StorageDriverList from ovs.dal.lists.storagerouterlist import StorageRouterList from ovs.dal.lists.vpoollist import VPoolList from ovs.extensions.db.arakooninstaller import ArakoonInstaller from ovs.extensions.generic.configuration import Configuration from ovs.extensions.generic.sshclient import SSHClient from ovs.extensions.generic.system import System from ovs_extensions.generic.toolbox import ExtensionsToolbox from ovs.extensions.migration.migration.ovsmigrator import ExtensionMigrator from ovs.extensions.packages.packagefactory import PackageFactory from ovs_extensions.services.interfaces.systemd import Systemd from ovs.extensions.services.servicefactory import ServiceFactory from ovs.extensions.storageserver.storagedriver import StorageDriverConfiguration from ovs.lib.helpers.storagedriver.installer import StorageDriverInstaller MigrationController._logger.info('Start out of band migrations...') service_manager = ServiceFactory.get_manager() sr_client_map = {} for storagerouter in StorageRouterList.get_storagerouters(): sr_client_map[storagerouter.guid] = SSHClient( endpoint=storagerouter. ip, # Is triggered during post-update code too during which the ovs-watcher-framework service is still down and thus not refreshing the heartbeat --> use IP i/o StorageRouter username='******') ######################################################### # Addition of 'ExecReload' for AlbaProxy SystemD services if ServiceFactory.get_service_type() == 'systemd': changed_clients = set() for storagedriver in StorageDriverList.get_storagedrivers(): root_client = sr_client_map[storagedriver.storagerouter_guid] for alba_proxy in storagedriver.alba_proxies: service = alba_proxy.service service_name = 'ovs-{0}'.format(service.name) if not service_manager.has_service(name=service_name, client=root_client): continue if 'ExecReload=' in root_client.file_read( filename='/lib/systemd/system/{0}.service'.format( service_name)): continue try: service_manager.regenerate_service( name=StorageDriverInstaller.SERVICE_TEMPLATE_PROXY, client=root_client, target_name=service_name) changed_clients.add(root_client) except: MigrationController._logger.exception( 'Error rebuilding service {0}'.format( service_name)) for root_client in changed_clients: root_client.run(['systemctl', 'daemon-reload']) ################################################################## # Adjustment of open file descriptors for Arakoon services to 8192 changed_clients = set() for storagerouter in StorageRouterList.get_storagerouters(): root_client = sr_client_map[storagerouter.guid] for service_name in service_manager.list_services( client=root_client): if not service_name.startswith('ovs-arakoon-'): continue if ServiceFactory.get_service_type() == 'systemd': path = '/lib/systemd/system/{0}.service'.format( service_name) check = 'LimitNOFILE=8192' else: path = '/etc/init/{0}.conf'.format(service_name) check = 'limit nofile 8192 8192' if not root_client.file_exists(path): continue if check in root_client.file_read(path): continue try: service_manager.regenerate_service( name='ovs-arakoon', client=root_client, target_name=service_name) changed_clients.add(root_client) ExtensionsToolbox.edit_version_file( client=root_client, package_name='arakoon', old_run_file='{0}/{1}.version'.format( ServiceFactory.RUN_FILE_DIR, service_name)) except: MigrationController._logger.exception( 'Error rebuilding service {0}'.format(service_name)) for root_client in changed_clients: root_client.run(['systemctl', 'daemon-reload']) ############################# # Migrate to multiple proxies for storagedriver in StorageDriverList.get_storagedrivers(): vpool = storagedriver.vpool root_client = sr_client_map[storagedriver.storagerouter_guid] for alba_proxy in storagedriver.alba_proxies: # Rename alba_proxy service in model service = alba_proxy.service old_service_name = 'albaproxy_{0}'.format(vpool.name) new_service_name = 'albaproxy_{0}_0'.format(vpool.name) if old_service_name != service.name: continue service.name = new_service_name service.save() if not service_manager.has_service(name=old_service_name, client=root_client): continue old_configuration_key = '/ovs/framework/hosts/{0}/services/{1}'.format( storagedriver.storagerouter.machine_id, old_service_name) if not Configuration.exists(key=old_configuration_key): continue # Add '-reboot' to alba_proxy services (because of newly created services and removal of old service) ExtensionsToolbox.edit_version_file( client=root_client, package_name='alba', old_run_file='{0}/{1}.version'.format( ServiceFactory.RUN_FILE_DIR, old_service_name), new_run_file='{0}/{1}.version'.format( ServiceFactory.RUN_FILE_DIR, new_service_name)) # Register new service and remove old service service_manager.add_service( name=StorageDriverInstaller.SERVICE_TEMPLATE_PROXY, client=root_client, params=Configuration.get(old_configuration_key), target_name='ovs-{0}'.format(new_service_name)) # Update scrub proxy config proxy_config_key = '/ovs/vpools/{0}/proxies/{1}/config/main'.format( vpool.guid, alba_proxy.guid) proxy_config = None if Configuration.exists( key=proxy_config_key) is False else Configuration.get( proxy_config_key) if proxy_config is not None: fragment_cache = proxy_config.get( StorageDriverConfiguration.CACHE_FRAGMENT, ['none', {}]) if fragment_cache[0] == 'alba' and fragment_cache[1].get( 'cache_on_write' ) is True: # Accelerated ALBA configured fragment_cache_scrub_info = copy.deepcopy( fragment_cache) fragment_cache_scrub_info[1]['cache_on_read'] = False proxy_scrub_config_key = '/ovs/vpools/{0}/proxies/scrub/generic_scrub'.format( vpool.guid) proxy_scrub_config = None if Configuration.exists( key=proxy_scrub_config_key ) is False else Configuration.get( proxy_scrub_config_key) if proxy_scrub_config is not None and proxy_scrub_config[ StorageDriverConfiguration.CACHE_FRAGMENT] == [ 'none' ]: proxy_scrub_config[ StorageDriverConfiguration. CACHE_FRAGMENT] = fragment_cache_scrub_info Configuration.set(key=proxy_scrub_config_key, value=proxy_scrub_config) # Update 'backend_connection_manager' section changes = False storagedriver_config = StorageDriverConfiguration( vpool.guid, storagedriver.storagedriver_id) if 'backend_connection_manager' not in storagedriver_config.configuration: continue current_config = storagedriver_config.configuration[ 'backend_connection_manager'] if current_config.get('backend_type') != 'MULTI': changes = True backend_connection_manager = {'backend_type': 'MULTI'} for index, proxy in enumerate( sorted(storagedriver.alba_proxies, key=lambda pr: pr.service.ports[0])): backend_connection_manager[str(index)] = copy.deepcopy( current_config) # noinspection PyUnresolvedReferences backend_connection_manager[str( index)]['alba_connection_use_rora'] = True # noinspection PyUnresolvedReferences backend_connection_manager[str(index)][ 'alba_connection_rora_manifest_cache_capacity'] = 5000 # noinspection PyUnresolvedReferences for key, value in backend_connection_manager[str( index)].items(): if key.startswith('backend_interface'): backend_connection_manager[key] = value # noinspection PyUnresolvedReferences del backend_connection_manager[str(index)][key] for key, value in { 'backend_interface_retries_on_error': 5, 'backend_interface_retry_interval_secs': 1, 'backend_interface_retry_backoff_multiplier': 2.0 }.iteritems(): if key not in backend_connection_manager: backend_connection_manager[key] = value else: backend_connection_manager = current_config for value in backend_connection_manager.values(): if isinstance(value, dict): for key, val in value.items(): if key.startswith('backend_interface'): backend_connection_manager[key] = val changes = True del value[key] for key, value in { 'backend_interface_retries_on_error': 5, 'backend_interface_retry_interval_secs': 1, 'backend_interface_retry_backoff_multiplier': 2.0 }.iteritems(): if key not in backend_connection_manager: changes = True backend_connection_manager[key] = value if changes is True: storagedriver_config.clear_backend_connection_manager() storagedriver_config.configure_backend_connection_manager( **backend_connection_manager) storagedriver_config.save(root_client) # Add '-reboot' to volumedriver services (because of updated 'backend_connection_manager' section) ExtensionsToolbox.edit_version_file( client=root_client, package_name='volumedriver', old_run_file='{0}/{1}.version'.format( ServiceFactory.RUN_FILE_DIR, 'volumedriver_{0}'.format(vpool.name))) if service_manager.__class__ == Systemd: root_client.run(['systemctl', 'daemon-reload']) ######################################## # Update metadata_store_bits information vpools = VPoolList.get_vpools() for vpool in vpools: bits = None for storagedriver in vpool.storagedrivers: key = '/ovs/framework/hosts/{0}/services/volumedriver_{1}'.format( storagedriver.storagerouter.machine_id, vpool.name) if Configuration.exists( key=key ) and 'METADATASTORE_BITS' not in Configuration.get(key=key): if bits is None: entries = service_manager.extract_from_service_file( name='ovs-volumedriver_{0}'.format(vpool.name), client=sr_client_map[ storagedriver.storagerouter_guid], entries=['METADATASTORE_BITS=']) if len(entries) == 1: bits = entries[0].split('=')[-1] bits = int(bits) if bits.isdigit() else 5 if bits is not None: try: content = Configuration.get(key=key) content['METADATASTORE_BITS'] = bits Configuration.set(key=key, value=content) except: MigrationController._logger.exception( 'Error updating volumedriver info for vPool {0} on StorageRouter {1}' .format(vpool.name, storagedriver.storagerouter.name)) if bits is not None: vpool.metadata_store_bits = bits vpool.save() ##################################### # Update the vPool metadata structure def _update_metadata_structure(metadata): metadata = copy.deepcopy(metadata) cache_structure = { 'read': False, 'write': False, 'is_backend': False, 'quota': None, 'backend_info': { 'name': None, # Will be filled in when is_backend is true 'backend_guid': None, 'alba_backend_guid': None, 'policies': None, 'preset': None, 'arakoon_config': None, 'connection_info': { 'client_id': None, 'client_secret': None, 'host': None, 'port': None, 'local': None } } } structure_map = { StorageDriverConfiguration.CACHE_BLOCK: { 'read': 'block_cache_on_read', 'write': 'block_cache_on_write', 'quota': 'quota_bc', 'backend_prefix': 'backend_bc_{0}' }, StorageDriverConfiguration.CACHE_FRAGMENT: { 'read': 'fragment_cache_on_read', 'write': 'fragment_cache_on_write', 'quota': 'quota_fc', 'backend_prefix': 'backend_aa_{0}' } } if 'arakoon_config' in metadata[ 'backend']: # Arakoon config should be placed under the backend info metadata['backend']['backend_info'][ 'arakoon_config'] = metadata['backend'].pop( 'arakoon_config') if 'connection_info' in metadata[ 'backend']: # Connection info sohuld be placed under the backend info metadata['backend']['backend_info'][ 'connection_info'] = metadata['backend'].pop( 'connection_info') if 'caching_info' not in metadata: # Caching info is the new key would_be_caching_info = {} metadata['caching_info'] = would_be_caching_info # Extract all caching data for every storagerouter current_caching_info = metadata['backend'].pop( 'caching_info') # Pop to mutate metadata for storagerouter_guid in current_caching_info.iterkeys(): current_cache_data = current_caching_info[ storagerouter_guid] storagerouter_caching_info = {} would_be_caching_info[ storagerouter_guid] = storagerouter_caching_info for cache_type, cache_type_mapping in structure_map.iteritems( ): new_cache_structure = copy.deepcopy(cache_structure) storagerouter_caching_info[ cache_type] = new_cache_structure for new_structure_key, old_structure_key in cache_type_mapping.iteritems( ): if new_structure_key == 'backend_prefix': # Get possible backend related info metadata_key = old_structure_key.format( storagerouter_guid) if metadata_key not in metadata: continue backend_data = metadata.pop( metadata_key) # Pop to mutate metadata new_cache_structure['is_backend'] = True # Copy over the old data new_cache_structure['backend_info'][ 'arakoon_config'] = backend_data[ 'arakoon_config'] new_cache_structure['backend_info'].update( backend_data['backend_info']) new_cache_structure['backend_info'][ 'connection_info'].update( backend_data['connection_info']) else: new_cache_structure[ new_structure_key] = current_cache_data.get( old_structure_key) return metadata vpools = VPoolList.get_vpools() for vpool in vpools: try: new_metadata = _update_metadata_structure(vpool.metadata) vpool.metadata = new_metadata vpool.save() except KeyError: MigrationController._logger.exception( 'Exceptions occurred when updating the metadata for vPool {0}' .format(vpool.name)) ############################################## # Always use indent=4 during Configuration set def _resave_all_config_entries(config_path='/ovs'): """ Recursive functions which checks every config management key if its a directory or not. If not a directory, we retrieve the config and just save it again using the new indentation logic """ for item in Configuration.list(config_path): new_path = config_path + '/' + item print new_path if Configuration.dir_exists(new_path) is True: _resave_all_config_entries(config_path=new_path) else: try: _config = Configuration.get(new_path) Configuration.set(new_path, _config) except: _config = Configuration.get(new_path, raw=True) Configuration.set(new_path, _config, raw=True) if ExtensionMigrator.THIS_VERSION <= 13: # There is no way of checking whether this new indentation logic has been applied, so we only perform this for version 13 and lower MigrationController._logger.info( 'Re-saving every configuration setting with new indentation rules' ) _resave_all_config_entries() ############################ # Update some default values def _update_manifest_cache_size(_proxy_config_key): updated = False manifest_cache_size = 500 * 1024 * 1024 if Configuration.exists(key=_proxy_config_key): _proxy_config = Configuration.get(key=_proxy_config_key) for cache_type in [ StorageDriverConfiguration.CACHE_BLOCK, StorageDriverConfiguration.CACHE_FRAGMENT ]: if cache_type in _proxy_config and _proxy_config[ cache_type][0] == 'alba': if _proxy_config[cache_type][1][ 'manifest_cache_size'] != manifest_cache_size: updated = True _proxy_config[cache_type][1][ 'manifest_cache_size'] = manifest_cache_size if _proxy_config['manifest_cache_size'] != manifest_cache_size: updated = True _proxy_config['manifest_cache_size'] = manifest_cache_size if updated is True: Configuration.set(key=_proxy_config_key, value=_proxy_config) return updated for storagedriver in StorageDriverList.get_storagedrivers(): try: vpool = storagedriver.vpool root_client = sr_client_map[storagedriver.storagerouter_guid] _update_manifest_cache_size( '/ovs/vpools/{0}/proxies/scrub/generic_scrub'.format( vpool.guid) ) # Generic scrub proxy is deployed every time scrubbing kicks in, so no need to restart these services for alba_proxy in storagedriver.alba_proxies: if _update_manifest_cache_size( '/ovs/vpools/{0}/proxies/{1}/config/main'.format( vpool.guid, alba_proxy.guid)) is True: # Add '-reboot' to alba_proxy services (because of newly created services and removal of old service) ExtensionsToolbox.edit_version_file( client=root_client, package_name='alba', old_run_file='{0}/{1}.version'.format( ServiceFactory.RUN_FILE_DIR, alba_proxy.service.name)) # Update 'backend_connection_manager' section changes = False storagedriver_config = StorageDriverConfiguration( vpool.guid, storagedriver.storagedriver_id) if 'backend_connection_manager' not in storagedriver_config.configuration: continue current_config = storagedriver_config.configuration[ 'backend_connection_manager'] for key, value in current_config.iteritems(): if key.isdigit() is True: if value.get( 'alba_connection_asd_connection_pool_capacity' ) != 10: changes = True value[ 'alba_connection_asd_connection_pool_capacity'] = 10 if value.get('alba_connection_timeout') != 30: changes = True value['alba_connection_timeout'] = 30 if value.get( 'alba_connection_rora_manifest_cache_capacity' ) != 25000: changes = True value[ 'alba_connection_rora_manifest_cache_capacity'] = 25000 if changes is True: storagedriver_config.clear_backend_connection_manager() storagedriver_config.configure_backend_connection_manager( **current_config) storagedriver_config.save(root_client) # Add '-reboot' to volumedriver services (because of updated 'backend_connection_manager' section) ExtensionsToolbox.edit_version_file( client=root_client, package_name='volumedriver', old_run_file='{0}/{1}.version'.format( ServiceFactory.RUN_FILE_DIR, 'volumedriver_{0}'.format(vpool.name))) except Exception: MigrationController._logger.exception( 'Updating default configuration values failed for StorageDriver {0}' .format(storagedriver.storagedriver_id)) #################################################### # Remove proxy fail fast as env variable for proxies changed_clients = set() for storagerouter in StorageRouterList.get_storagerouters(): root_client = sr_client_map[storagerouter.guid] for service_name in service_manager.list_services( client=root_client): if not service_name.startswith('ovs-albaproxy_'): continue if ServiceFactory.get_service_type() == 'systemd': path = '/lib/systemd/system/{0}.service'.format( service_name) check = 'Environment=ALBA_FAIL_FAST=true' else: path = '/etc/init/{0}.conf'.format(service_name) check = 'env ALBA_FAIL_FAST=true' if not root_client.file_exists(path): continue if check not in root_client.file_read(path): continue try: service_manager.regenerate_service( name=StorageDriverInstaller.SERVICE_TEMPLATE_PROXY, client=root_client, target_name=service_name) changed_clients.add(root_client) ExtensionsToolbox.edit_version_file( client=root_client, package_name='alba', old_run_file='{0}/{1}.version'.format( ServiceFactory.RUN_FILE_DIR, service_name)) except: MigrationController._logger.exception( 'Error rebuilding service {0}'.format(service_name)) for root_client in changed_clients: root_client.run(['systemctl', 'daemon-reload']) ###################################### # Integration of stats monkey (2.10.2) if Configuration.get( key='/ovs/framework/migration|stats_monkey_integration', default=False) is False: try: # Get content of old key into new key old_stats_monkey_key = '/statsmonkey/statsmonkey' if Configuration.exists(key=old_stats_monkey_key) is True: Configuration.set( key='/ovs/framework/monitoring/stats_monkey', value=Configuration.get(key=old_stats_monkey_key)) Configuration.delete(key=old_stats_monkey_key) # Make sure to disable the stats monkey by default or take over the current schedule if it was configured manually before celery_key = '/ovs/framework/scheduling/celery' current_value = None scheduling_config = Configuration.get(key=celery_key, default={}) if 'statsmonkey.run_all_stats' in scheduling_config: # Old celery task name of the stats monkey current_value = scheduling_config.pop( 'statsmonkey.run_all_stats') scheduling_config['ovs.stats_monkey.run_all'] = current_value scheduling_config['alba.stats_monkey.run_all'] = current_value Configuration.set(key=celery_key, value=scheduling_config) support_key = '/ovs/framework/support' support_config = Configuration.get(key=support_key) support_config['support_agent'] = support_config.pop( 'enabled', True) support_config['remote_access'] = support_config.pop( 'enablesupport', False) Configuration.set(key=support_key, value=support_config) # Make sure once this finished, it never runs again by setting this key to True Configuration.set( key='/ovs/framework/migration|stats_monkey_integration', value=True) except Exception: MigrationController._logger.exception( 'Integration of stats monkey failed') ###################################################### # Write away cluster ID to a file for back-up purposes try: cluster_id = Configuration.get(key='/ovs/framework/cluster_id', default=None) with open(CONFIG_STORE_LOCATION, 'r') as config_file: config = json.load(config_file) if cluster_id is not None and config.get('cluster_id', None) is None: config['cluster_id'] = cluster_id with open(CONFIG_STORE_LOCATION, 'w') as config_file: json.dump(config, config_file, indent=4) except Exception: MigrationController._logger.exception( 'Writing cluster id to a file failed.') ######################################################### # Additional string formatting in Arakoon services (2.11) try: if Configuration.get( key='/ovs/framework/migration|arakoon_service_update', default=False) is False: arakoon_service_names = [ ArakoonInstaller.get_service_name_for_cluster( cluster_name=cluster_name) for cluster_name in Configuration.list(key='ovs/arakoon') ] for storagerouter in StorageRouterList.get_masters(): for service_name in arakoon_service_names: config_key = ServiceFactory.SERVICE_CONFIG_KEY.format( storagerouter.machine_id, service_name) if Configuration.exists(key=config_key): config = Configuration.get(key=config_key) config[ 'RUN_FILE_DIR'] = ServiceFactory.RUN_FILE_DIR config[ 'ARAKOON_PKG_NAME'] = PackageFactory.PKG_ARAKOON config[ 'ARAKOON_VERSION_CMD'] = PackageFactory.VERSION_CMD_ARAKOON Configuration.set(key=config_key, value=config) # Make sure once this finished, it never runs again by setting this key to True Configuration.set( key='/ovs/framework/migration|arakoon_service_update', value=True) except Exception: MigrationController._logger.exception( 'Updating the string formatting for the Arakoon services failed' ) ############################################################ # Additional string formatting in ALBA proxy services (2.11) changed_clients = set() try: if Configuration.get( key='/ovs/framework/migration|alba_proxy_service_update', default=False) is False: alba_pkg_name, alba_version_cmd = PackageFactory.get_package_and_version_cmd_for( component=PackageFactory.COMP_ALBA) for service in ServiceTypeList.get_by_name( 'AlbaProxy').services: root_client = sr_client_map[service.storagerouter_guid] config_key = ServiceFactory.SERVICE_CONFIG_KEY.format( service.storagerouter.machine_id, service.name) if Configuration.exists(key=config_key): config = Configuration.get(key=config_key) config['RUN_FILE_DIR'] = ServiceFactory.RUN_FILE_DIR config['ALBA_PKG_NAME'] = alba_pkg_name config['ALBA_VERSION_CMD'] = alba_version_cmd Configuration.set(key=config_key, value=config) service_manager.regenerate_service( name=StorageDriverInstaller.SERVICE_TEMPLATE_PROXY, client=root_client, target_name='ovs-{0}'.format(service.name)) changed_clients.add(root_client) # Make sure once this finished, it never runs again by setting this key to True Configuration.set( key='/ovs/framework/migration|alba_proxy_service_update', value=True) except Exception: MigrationController._logger.exception( 'Updating the string formatting for the Arakoon services failed' ) ############################################################ # Additional string formatting in DTL/VOLDRV services (2.11) try: if Configuration.get( key='/ovs/framework/migration|voldrv_service_update', default=False) is False: sd_pkg_name, sd_version_cmd = PackageFactory.get_package_and_version_cmd_for( component=PackageFactory.COMP_SD) for vpool in VPoolList.get_vpools(): for storagedriver in vpool.storagedrivers: root_client = sr_client_map[ storagedriver.storagerouter_guid] for entry in ['dtl', 'volumedriver']: service_name = '{0}_{1}'.format(entry, vpool.name) service_template = StorageDriverInstaller.SERVICE_TEMPLATE_DTL if entry == 'dtl' else StorageDriverInstaller.SERVICE_TEMPLATE_SD config_key = ServiceFactory.SERVICE_CONFIG_KEY.format( storagedriver.storagerouter.machine_id, service_name) if Configuration.exists(key=config_key): config = Configuration.get(key=config_key) config[ 'RUN_FILE_DIR'] = ServiceFactory.RUN_FILE_DIR config['VOLDRV_PKG_NAME'] = sd_pkg_name config['VOLDRV_VERSION_CMD'] = sd_version_cmd Configuration.set(key=config_key, value=config) service_manager.regenerate_service( name=service_template, client=root_client, target_name='ovs-{0}'.format(service_name)) changed_clients.add(root_client) # Make sure once this finished, it never runs again by setting this key to True Configuration.set( key='/ovs/framework/migration|voldrv_service_update', value=True) except Exception: MigrationController._logger.exception( 'Updating the string formatting for the Arakoon services failed' ) ####################################################### # Storing actual package name in version files (2.11.0) (https://github.com/openvstorage/framework/issues/1876) if Configuration.get( key= '/ovs/framework/migration|actual_package_name_in_version_file', default=False) is False: try: voldrv_pkg_name, _ = PackageFactory.get_package_and_version_cmd_for( component=PackageFactory.COMP_SD) for storagerouter in StorageRouterList.get_storagerouters(): root_client = sr_client_map.get(storagerouter.guid) if root_client is None: continue for file_name in root_client.file_list( directory=ServiceFactory.RUN_FILE_DIR): if not file_name.endswith('.version'): continue file_path = '{0}/{1}'.format( ServiceFactory.RUN_FILE_DIR, file_name) contents = root_client.file_read(filename=file_path) regenerate = False if voldrv_pkg_name == PackageFactory.PKG_VOLDRV_SERVER: if 'volumedriver-server' in contents: regenerate = True contents = contents.replace( 'volumedriver-server', PackageFactory.PKG_VOLDRV_SERVER) root_client.file_write(filename=file_path, contents=contents) elif voldrv_pkg_name == PackageFactory.PKG_VOLDRV_SERVER_EE: if 'volumedriver-server' in contents or PackageFactory.PKG_VOLDRV_SERVER in contents: regenerate = True contents = contents.replace( 'volumedriver-server', PackageFactory.PKG_VOLDRV_SERVER_EE) contents = contents.replace( PackageFactory.PKG_VOLDRV_SERVER, PackageFactory.PKG_VOLDRV_SERVER_EE) root_client.file_write(filename=file_path, contents=contents) if regenerate is True: service_manager.regenerate_service( name=StorageDriverInstaller. SERVICE_TEMPLATE_DTL if file_name.startswith('dtl') else StorageDriverInstaller.SERVICE_TEMPLATE_SD, client=root_client, target_name='ovs-{0}'.format( file_name.split('.') [0])) # Leave out .version changed_clients.add(root_client) Configuration.set( key= '/ovs/framework/migration|actual_package_name_in_version_file', value=True) except Exception: MigrationController._logger.exception( 'Updating actual package name for version files failed') for root_client in changed_clients: try: root_client.run(['systemctl', 'daemon-reload']) except Exception: MigrationController._logger.exception( 'Executing command "systemctl daemon-reload" failed') ######################################################### # Addition of 'Environment=OCAMLRUNPARAM='b,a=1,s=4096k,O=50' for AlbaProxy SystemD services if ServiceFactory.get_service_type() == 'systemd': changed_clients = set() for storagedriver in StorageDriverList.get_storagedrivers(): root_client = sr_client_map[storagedriver.storagerouter_guid] for alba_proxy in storagedriver.alba_proxies: service = alba_proxy.service service_name = 'ovs-{0}'.format(service.name) if not service_manager.has_service(name=service_name, client=root_client): continue if "Environment=OCAMLRUNPARAM='b,a=1,s=4096k,O=50" in root_client.file_read( filename='/lib/systemd/system/{0}.service'.format( service_name)): continue try: service_manager.regenerate_service( name='ovs-albaproxy', client=root_client, target_name=service_name) changed_clients.add(root_client) except: MigrationController._logger.exception( 'Error rebuilding service {0}'.format( service_name)) for root_client in changed_clients: root_client.run(['systemctl', 'daemon-reload']) ######################################################### # Addition of 'Environment=OCAMLRUNPARAM='b,a=1,s=4096k,O=50' for Arakoon SystemD services if ServiceFactory.get_service_type() == 'systemd': changed_clients = set() for storagerouter in StorageRouterList.get_storagerouters(): root_client = sr_client_map[storagerouter.guid] for service_name in service_manager.list_services( client=root_client): if not service_name.startswith('ovs-arakoon-'): continue if not service_manager.has_service(name=service_name, client=root_client): continue if "Environment=OCAMLRUNPARAM='b,a=1,s=4096k,O=50" in root_client.file_read( filename='/lib/systemd/system/{0}.service'.format( service_name)): continue try: service_manager.regenerate_service( name='ovs-arakoon', client=root_client, target_name=service_name) changed_clients.add(root_client) except: MigrationController._logger.exception( 'Error rebuilding service {0}'.format( service_name)) for root_client in changed_clients: root_client.run(['systemctl', 'daemon-reload']) ################################################### # Disable MDS checkup job by default mds_catch_up_migration_key = '/ovs/framework/migration|mds_catch_up' if Configuration.get(key=mds_catch_up_migration_key, default=False) is False: try: celery_key = '/ovs/framework/scheduling/celery' catch_up_key = 'ovs.mds.mds_catchup' scheduling_config = Configuration.get(key=celery_key, default={}) if catch_up_key not in scheduling_config: scheduling_config[catch_up_key] = None # Disable Configuration.set(key=celery_key, value=scheduling_config) Configuration.set(mds_catch_up_migration_key, True) except: MigrationController._logger.exception( 'Integration of mds_catch_up failed') ################################################### # The components need to register themselves to avoid throwing the configuration away for storagerouter in StorageRouterList.get_storagerouters(): registration_key = Configuration.generate_registration_key( storagerouter.machine_id) if System.get_component_identifier() not in Configuration.get( registration_key, default=[]): Configuration.register_usage(System.get_component_identifier(), registration_key) MigrationController._logger.info('Finished out of band migrations')
def get(self, request, *args, **kwargs): """ Handles token post """ _ = args, kwargs html_endpoint = Configuration.get( '/ovs/framework/webapps|html_endpoint') if 'code' not in request.GET: OAuth2RedirectView._logger.error( 'Got OAuth2 redirection request without code') return HttpResponseRedirect(html_endpoint) code = request.GET['code'] if 'state' not in request.GET: OAuth2RedirectView._logger.error( 'Got OAuth2 redirection request without state') return HttpResponseRedirect(html_endpoint) state = request.GET['state'] if 'error' in request.GET: error = request.GET['error'] description = request.GET[ 'error_description'] if 'error_description' in request.GET else '' OAuth2RedirectView._logger.error( 'Error {0} during OAuth2 redirection request: {1}'.format( error, description)) return HttpResponseRedirect(html_endpoint) base_url = Configuration.get('/ovs/framework/webapps|oauth2.token_uri') client_id = Configuration.get( '/ovs/framework/webapps|oauth2.client_id') client_secret = Configuration.get( '/ovs/framework/webapps|oauth2.client_secret') parameters = { 'grant_type': 'authorization_code', 'redirect_url': 'https://{0}/api/oauth2/redirect/'.format( System.get_my_storagerouter().ip), 'client_id': client_id, 'code': code } url = '{0}?{1}'.format(base_url, urllib.urlencode(parameters)) headers = { 'Accept': 'application/json', 'Authorization': 'Basic {0}'.format( base64.b64encode('{0}:{1}'.format(client_id, client_secret)).strip()) } raw_response = requests.post(url=url, headers=headers, verify=False) response = raw_response.json() if 'error' in response: error = response['error'] description = response[ 'error_description'] if 'error_description' in response else '' OAuth2RedirectView._logger.error( 'Error {0} during OAuth2 redirection access token: {1}'.format( error, description)) return HttpResponseRedirect(html_endpoint) token = response['access_token'] expires_in = response['expires_in'] clients = ClientList.get_by_types('INTERNAL', 'CLIENT_CREDENTIALS') client = None for current_client in clients: if current_client.user.group.name == 'administrators': client = current_client break if client is None: OAuth2RedirectView._logger.error( 'Could not find INTERNAL CLIENT_CREDENTIALS client in administrator group.' ) return HttpResponseRedirect(html_endpoint) roles = RoleList.get_roles_by_codes(['read', 'write', 'manage']) access_token, _ = Toolbox.generate_tokens(client, generate_access=True, scopes=roles) access_token.expiration = int(time.time() + expires_in) access_token.access_token = token access_token.save() expires = datetime.datetime.now() + datetime.timedelta(minutes=2) response = HttpResponseRedirect(html_endpoint) response.set_cookie('state', state, expires=expires, secure=True) response.set_cookie('accesstoken', token, expires=expires, secure=True) return response
def migrate(previous_version): """ Migrates from a given version to the current version. It uses 'previous_version' to be smart wherever possible, but the code should be able to migrate any version towards the expected version. When this is not possible, the code can set a minimum version and raise when it is not met. :param previous_version: The previous version from which to start the migration :type previous_version: float """ working_version = previous_version if working_version == 0: # Initial version: # * Set the version to THIS RELEASE version from ovs.dal.hybrids.user import User from ovs.dal.hybrids.group import Group from ovs.dal.hybrids.role import Role from ovs.dal.hybrids.client import Client from ovs.dal.hybrids.j_rolegroup import RoleGroup from ovs.dal.hybrids.j_roleclient import RoleClient from ovs.dal.hybrids.servicetype import ServiceType from ovs.dal.hybrids.branding import Branding # Create groups admin_group = Group() admin_group.name = 'administrators' admin_group.description = 'Administrators' admin_group.save() viewers_group = Group() viewers_group.name = 'viewers' viewers_group.description = 'Viewers' viewers_group.save() # Create users admin = User() admin.username = '******' admin.password = hashlib.sha256('admin').hexdigest() admin.is_active = True admin.group = admin_group admin.save() # Create internal OAuth 2 clients admin_pw_client = Client() admin_pw_client.ovs_type = 'INTERNAL' admin_pw_client.grant_type = 'PASSWORD' admin_pw_client.user = admin admin_pw_client.save() admin_cc_client = Client() admin_cc_client.ovs_type = 'INTERNAL' admin_cc_client.grant_type = 'CLIENT_CREDENTIALS' admin_cc_client.client_secret = ''.join( random.choice(string.ascii_letters + string.digits + '|_=+*#@!/-[]{}<>.?,\'";:~') for _ in range(128)) admin_cc_client.user = admin admin_cc_client.save() # Create roles read_role = Role() read_role.code = 'read' read_role.name = 'Read' read_role.description = 'Can read objects' read_role.save() write_role = Role() write_role.code = 'write' write_role.name = 'Write' write_role.description = 'Can write objects' write_role.save() manage_role = Role() manage_role.code = 'manage' manage_role.name = 'Manage' manage_role.description = 'Can manage the system' manage_role.save() # Attach groups to roles mapping = [(admin_group, [read_role, write_role, manage_role]), (viewers_group, [read_role])] for setting in mapping: for role in setting[1]: rolegroup = RoleGroup() rolegroup.group = setting[0] rolegroup.role = role rolegroup.save() for user in setting[0].users: for role in setting[1]: for client in user.clients: roleclient = RoleClient() roleclient.client = client roleclient.role = role roleclient.save() # Add service types for service_type_info in [ ServiceType.SERVICE_TYPES.MD_SERVER, ServiceType.SERVICE_TYPES.ALBA_PROXY, ServiceType.SERVICE_TYPES.ARAKOON ]: service_type = ServiceType() service_type.name = service_type_info service_type.save() # Branding branding = Branding() branding.name = 'Default' branding.description = 'Default bootstrap theme' branding.css = 'bootstrap-default.min.css' branding.productname = 'Open vStorage' branding.is_default = True branding.save() slate = Branding() slate.name = 'Slate' slate.description = 'Dark bootstrap theme' slate.css = 'bootstrap-slate.min.css' slate.productname = 'Open vStorage' slate.is_default = False slate.save() # From here on, all actual migration should happen to get to the expected state for THIS RELEASE elif working_version < DALMigrator.THIS_VERSION: from ovs.dal.datalist import DataList from ovs.dal.helpers import HybridRunner, Descriptor from ovs.dal.hybrids.diskpartition import DiskPartition from ovs.dal.hybrids.j_storagedriverpartition import StorageDriverPartition from ovs.dal.lists.vpoollist import VPoolList from ovs.extensions.generic.configuration import Configuration from ovs.extensions.storage.persistentfactory import PersistentFactory persistent_client = PersistentFactory.get_client() if working_version < 16: # The list caching keys were changed to class|field|list_id instead of class|list_id|field persistent_client.delete_prefix( DataList.generate_persistent_cache_key()) # Migrate unique constraints & indexes hybrid_structure = HybridRunner.get_hybrids() for class_descriptor in hybrid_structure.values(): cls = Descriptor().load(class_descriptor).get_object() classname = cls.__name__.lower() unique_key = 'ovs_unique_{0}_{{0}}_'.format(classname) index_prefix = 'ovs_index_{0}|{{0}}|'.format(classname) index_key = 'ovs_index_{0}|{{0}}|{{1}}'.format(classname) uniques = [] indexes = [] # noinspection PyProtectedMember for prop in cls._properties: if prop.unique is True and len([ k for k in persistent_client.prefix( unique_key.format(prop.name)) ]) == 0: uniques.append(prop.name) if prop.indexed is True and len([ k for k in persistent_client.prefix( index_prefix.format(prop.name)) ]) == 0: indexes.append(prop.name) if len(uniques) > 0 or len(indexes) > 0: prefix = 'ovs_data_{0}_'.format(classname) for key, data in persistent_client.prefix_entries(prefix): for property_name in uniques: ukey = '{0}{1}'.format( unique_key.format(property_name), hashlib.sha1(str( data[property_name])).hexdigest()) persistent_client.set(ukey, key) for property_name in indexes: if property_name not in data: continue # This is the case when there's a new indexed property added. ikey = index_key.format( property_name, hashlib.sha1(str( data[property_name])).hexdigest()) index = list( persistent_client.get_multi( [ikey], must_exist=False))[0] transaction = persistent_client.begin_transaction() if index is None: persistent_client.assert_value( ikey, None, transaction=transaction) persistent_client.set(ikey, [key], transaction=transaction) elif key not in index: persistent_client.assert_value( ikey, index[:], transaction=transaction) persistent_client.set(ikey, index + [key], transaction=transaction) persistent_client.apply_transaction(transaction) # Clean up - removal of obsolete 'cfgdir' paths = Configuration.get(key='/ovs/framework/paths') if 'cfgdir' in paths: paths.pop('cfgdir') Configuration.set(key='/ovs/framework/paths', value=paths) # Rewrite indices 'alba_proxy' --> 'alba_proxies' changes = False transaction = persistent_client.begin_transaction() for old_key in persistent_client.prefix( 'ovs_reverseindex_storagedriver'): if '|alba_proxy|' in old_key: changes = True new_key = old_key.replace('|alba_proxy|', '|alba_proxies|') persistent_client.set(key=new_key, value=0, transaction=transaction) persistent_client.delete(key=old_key, transaction=transaction) if changes is True: persistent_client.apply_transaction(transaction=transaction) # Introduction of DTL role (Replaces DTL sub_role) for vpool in VPoolList.get_vpools(): for storagedriver in vpool.storagedrivers: for junction_partition_guid in storagedriver.partitions_guids: junction_partition = StorageDriverPartition( junction_partition_guid) if junction_partition.role == DiskPartition.ROLES.WRITE and junction_partition.sub_role == 'DTL': junction_partition.role = DiskPartition.ROLES.DTL junction_partition.sub_role = None junction_partition.save() if DiskPartition.ROLES.DTL not in junction_partition.partition.roles: junction_partition.partition.roles.append( DiskPartition.ROLES.DTL) junction_partition.partition.save() return DALMigrator.THIS_VERSION
if os.environ.get('RUNNING_UNITTESTS') == 'True': inspect = InspectMockup celery = CeleryMockup() else: # Update the BACKEND_ALIASES when this item is loaded in (to support the Arakoon backend) BACKEND_ALIASES.update({ 'arakoon': 'ovs.extensions.celery.arakoonresult:ArakoonResultBackend' }) # Register the YAML encoder register('ovsyaml', YamlExtender.ordered_dump, yaml.safe_load, content_type='application/x-yaml', content_encoding='utf-8') memcache_servers = Configuration.get('/ovs/framework/memcache|endpoints') rmq_servers = Configuration.get('/ovs/framework/messagequeue|endpoints') unique_id = System.get_my_machine_id() include = [] path = '/'.join([os.path.dirname(__file__), 'lib']) for filename in os.listdir(path): if os.path.isfile('/'.join([ path, filename ])) and filename.endswith('.py') and filename != '__init__.py': name = filename.replace('.py', '') include.append('ovs.lib.{0}'.format(name)) celery = Celery('ovs', include=include)
def demote_node(cluster_ip, master_ip, ip_client_map, unique_id, unconfigure_memcached, unconfigure_rabbitmq, offline_nodes=None): """ Demotes a given node """ from ovs.dal.lists.storagerouterlist import StorageRouterList Toolbox.log(logger=NodeTypeController._logger, messages='Demoting node', title=True) service_manager = ServiceFactory.get_manager() if offline_nodes is None: offline_nodes = [] if unconfigure_memcached is True and len(offline_nodes) == 0: if NodeTypeController._validate_local_memcache_servers( ip_client_map) is False: raise RuntimeError( 'Not all memcache nodes can be reached which is required for demoting a node.' ) # Find other (arakoon) master nodes arakoon_cluster_name = str( Configuration.get('/ovs/framework/arakoon_clusters|ovsdb')) arakoon_metadata = ArakoonInstaller.get_arakoon_metadata_by_cluster_name( cluster_name=arakoon_cluster_name) config = ArakoonClusterConfig(cluster_id=arakoon_cluster_name) master_node_ips = [node.ip for node in config.nodes] shrink = False if cluster_ip in master_node_ips: shrink = True master_node_ips.remove(cluster_ip) if len(master_node_ips) == 0: raise RuntimeError( 'There should be at least one other master node') storagerouter = StorageRouterList.get_by_machine_id(unique_id) storagerouter.node_type = 'EXTRA' storagerouter.save() offline_node_ips = [node.ip for node in offline_nodes] if arakoon_metadata['internal'] is True and shrink is True: Toolbox.log(logger=NodeTypeController._logger, messages='Leaving Arakoon {0} cluster'.format( arakoon_cluster_name)) arakoon_installer = ArakoonInstaller( cluster_name=arakoon_cluster_name) arakoon_installer.load() arakoon_installer.shrink_cluster(removal_ip=cluster_ip, offline_nodes=offline_node_ips) arakoon_installer.restart_cluster_after_shrinking() try: external_config = Configuration.get( '/ovs/framework/external_config') if external_config is None and shrink is True: Toolbox.log(logger=NodeTypeController._logger, messages='Leaving Arakoon config cluster') arakoon_installer = ArakoonInstaller(cluster_name='config') arakoon_installer.load(ip=master_node_ips[0]) arakoon_installer.shrink_cluster( removal_ip=cluster_ip, offline_nodes=offline_node_ips) arakoon_installer.restart_cluster_after_shrinking() except Exception as ex: Toolbox.log( logger=NodeTypeController._logger, messages=['\nFailed to leave configuration cluster', ex], loglevel='exception') Toolbox.log(logger=NodeTypeController._logger, messages='Update configurations') try: if unconfigure_memcached is True: endpoints = Configuration.get( '/ovs/framework/memcache|endpoints') endpoint = '{0}:{1}'.format(cluster_ip, 11211) if endpoint in endpoints: endpoints.remove(endpoint) Configuration.set('/ovs/framework/memcache|endpoints', endpoints) if unconfigure_rabbitmq is True: endpoints = Configuration.get( '/ovs/framework/messagequeue|endpoints') endpoint = '{0}:{1}'.format(cluster_ip, 5672) if endpoint in endpoints: endpoints.remove(endpoint) Configuration.set('/ovs/framework/messagequeue|endpoints', endpoints) except Exception as ex: Toolbox.log(logger=NodeTypeController._logger, messages=['\nFailed to update configurations', ex], loglevel='exception') if arakoon_metadata['internal'] is True: Toolbox.log(logger=NodeTypeController._logger, messages='Restarting master node services') remaining_nodes = ip_client_map.keys()[:] if cluster_ip in remaining_nodes: remaining_nodes.remove(cluster_ip) PersistentFactory.store = None VolatileFactory.store = None for service in storagerouter.services: if service.name == 'arakoon-ovsdb': service.delete() target_client = None if storagerouter in offline_nodes: if unconfigure_rabbitmq is True: Toolbox.log( logger=NodeTypeController._logger, messages='Removing/unconfiguring offline RabbitMQ node') client = ip_client_map[master_ip] try: client.run([ 'rabbitmqctl', 'forget_cluster_node', 'rabbit@{0}'.format(storagerouter.name) ]) except Exception as ex: Toolbox.log(logger=NodeTypeController._logger, messages=[ '\nFailed to forget RabbitMQ cluster node', ex ], loglevel='exception') else: target_client = ip_client_map[cluster_ip] if unconfigure_rabbitmq is True: Toolbox.log(logger=NodeTypeController._logger, messages='Removing/unconfiguring RabbitMQ') try: if service_manager.has_service('rabbitmq-server', client=target_client): ServiceFactory.change_service_state( target_client, 'rabbitmq-server', 'stop', NodeTypeController._logger) target_client.run(['rabbitmq-server', '-detached']) time.sleep(5) target_client.run(['rabbitmqctl', 'stop_app']) time.sleep(5) target_client.run(['rabbitmqctl', 'reset']) time.sleep(5) target_client.run(['rabbitmqctl', 'stop']) time.sleep(5) target_client.file_unlink( "/var/lib/rabbitmq/.erlang.cookie") ServiceFactory.change_service_state( target_client, 'rabbitmq-server', 'stop', NodeTypeController._logger) # To be sure except Exception as ex: Toolbox.log(logger=NodeTypeController._logger, messages=[ '\nFailed to remove/unconfigure RabbitMQ', ex ], loglevel='exception') Toolbox.log(logger=NodeTypeController._logger, messages='Stopping services') services = ['memcached', 'rabbitmq-server'] if unconfigure_rabbitmq is False: services.remove('rabbitmq-server') if unconfigure_memcached is False: services.remove('memcached') for service in services: if service_manager.has_service(service, client=target_client): Toolbox.log( logger=NodeTypeController._logger, messages='Stopping service {0}'.format(service)) try: ServiceFactory.change_service_state( target_client, service, 'stop', NodeTypeController._logger) except Exception as ex: Toolbox.log( logger=NodeTypeController._logger, messages=[ '\nFailed to stop service'.format(service), ex ], loglevel='exception') Toolbox.log(logger=NodeTypeController._logger, messages='Removing services') services = [ 'scheduled-tasks', 'webapp-api', 'volumerouter-consumer' ] for service in services: if service_manager.has_service(service, client=target_client): Toolbox.log( logger=NodeTypeController._logger, messages='Removing service {0}'.format(service)) try: ServiceFactory.change_service_state( target_client, service, 'stop', NodeTypeController._logger) service_manager.remove_service(service, client=target_client) except Exception as ex: Toolbox.log( logger=NodeTypeController._logger, messages=[ '\nFailed to remove service'.format(service), ex ], loglevel='exception') if service_manager.has_service('workers', client=target_client): service_manager.add_service( name='workers', client=target_client, params={'WORKER_QUEUE': '{0}'.format(unique_id)}) try: NodeTypeController._configure_amqp_to_volumedriver() except Exception as ex: Toolbox.log( logger=NodeTypeController._logger, messages=['\nFailed to configure AMQP to Storage Driver', ex], loglevel='exception') Toolbox.log(logger=NodeTypeController._logger, messages='Restarting services') NodeTypeController.restart_framework_and_memcache_services( clients=ip_client_map, logger=NodeTypeController._logger, offline_node_ips=offline_node_ips) if Toolbox.run_hooks(component='nodetype', sub_component='demote', logger=NodeTypeController._logger, cluster_ip=cluster_ip, master_ip=master_ip, offline_node_ips=offline_node_ips): Toolbox.log(logger=NodeTypeController._logger, messages='Restarting services') NodeTypeController.restart_framework_and_memcache_services( clients=ip_client_map, logger=NodeTypeController._logger, offline_node_ips=offline_node_ips) if storagerouter not in offline_nodes: target_client = ip_client_map[cluster_ip] node_name, _ = target_client.get_hostname() if NodeTypeController.avahi_installed( client=target_client, logger=NodeTypeController._logger) is True: NodeTypeController.configure_avahi( client=target_client, node_name=node_name, node_type='extra', logger=NodeTypeController._logger) Configuration.set( '/ovs/framework/hosts/{0}/type'.format(storagerouter.machine_id), 'EXTRA') if target_client is not None and target_client.file_exists( '/tmp/ovs_rollback'): target_client.file_write('/tmp/ovs_rollback', 'rollback') Toolbox.log(logger=NodeTypeController._logger, messages='Demote complete', title=True)
def mds_checkup_single(vpool_guid, mds_dict=None, offline_nodes=None): # type: (str, collections.OrderedDict, List[StorageRouter]) -> None """ Validates the current MDS setup/configuration and takes actions where required Actions: * Verify which StorageRouters are available * Make mapping between vPools and its StorageRouters * For each vPool make sure every StorageRouter has at least 1 MDS service with capacity available * For each vPool retrieve the optimal configuration and store it for each StorageDriver * For each vPool run an ensure safety for all vDisks :param vpool_guid: Guid of the VPool to do the checkup for :type vpool_guid: str :param mds_dict: OrderedDict containing all mds related information :type mds_dict: collections.OrderedDict :param offline_nodes: Nodes that are marked as unreachable :type offline_nodes: List[StorageRouter] :raises RuntimeError: When ensure safety fails for any vDisk :return: None :rtype: NoneType :raises: MDSCheckupEnsureSafetyFailures when the ensure safety has failed for any vdisk """ params_to_verify = [mds_dict, offline_nodes] vpool = VPool(vpool_guid) if any(p is not None for p in params_to_verify) and not all( p is not None for p in params_to_verify): raise ValueError( 'Both mds_dict and offline_nodes must be given instead of providing either one' ) if not mds_dict: mds_dict, offline_nodes = MDSServiceController._get_mds_information( [vpool]) ensure_safety_failures = [] storagerouter_info = mds_dict[vpool] # Make sure there's at least 1 MDS on every StorageRouter that's not overloaded # Remove all MDS Services which have been manually marked for removal (by setting its capacity to 0) max_load = Configuration.get( '/ovs/vpools/{0}/mds_config|mds_maxload'.format(vpool.guid)) for storagerouter in sorted(storagerouter_info, key=lambda k: k.ip): total_load = 0.0 root_client = mds_dict[vpool][storagerouter]['client'] mds_services = mds_dict[vpool][storagerouter]['services'] for mds_service in list( sorted(mds_services, key=lambda k: k.number)): port = mds_service.service.ports[0] number = mds_service.number # Manual intervention required here in order for the MDS to be cleaned up # @TODO: Remove this and make a dynamic calculation to check which MDSes to remove if mds_service.capacity == 0 and len( mds_service.vdisks_guids) == 0: MDSServiceController._logger.warning( 'vPool {0} - StorageRouter {1} - MDS Service {2} on port {3}: Removing' .format(vpool.name, storagerouter.name, number, port)) try: MDSServiceController.remove_mds_service( mds_service=mds_service, reconfigure=True, allow_offline=root_client is None) except Exception: MDSServiceController._logger.exception( 'vPool {0} - StorageRouter {1} - MDS Service {2} on port {3}: Failed to remove' .format(vpool.name, storagerouter.name, number, port)) mds_services.remove(mds_service) else: _, next_load = MDSServiceController.get_mds_load( mds_service=mds_service) if next_load == float('inf'): total_load = sys.maxint * -1 # Cast to lowest possible value if any MDS service capacity is set to infinity else: total_load += next_load if next_load < max_load: MDSServiceController._logger.debug( 'vPool {0} - StorageRouter {1} - MDS Service {2} on port {3}: Capacity available - Load at {4}%' .format(vpool.name, storagerouter.name, number, port, next_load)) else: MDSServiceController._logger.debug( 'vPool {0} - StorageRouter {1} - MDS Service {2} on port {3}: No capacity available - Load at {4}%' .format(vpool.name, storagerouter.name, number, port, next_load)) if total_load >= max_load * len(mds_services): mds_services_to_add = int( math.ceil((total_load - max_load * len(mds_services)) / max_load)) MDSServiceController._logger.info( 'vPool {0} - StorageRouter {1} - Average load per service {2:.2f}% - Max load per service {3:.2f}% - {4} MDS service{5} will be added' .format(vpool.name, storagerouter.name, total_load / len(mds_services), max_load, mds_services_to_add, '' if mds_services_to_add == 1 else 's')) for _ in range(mds_services_to_add): MDSServiceController._logger.info( 'vPool {0} - StorageRouter {1} - Adding new MDS Service' .format(vpool.name, storagerouter.name)) try: mds_services.append( MDSServiceController.prepare_mds_service( storagerouter=storagerouter, vpool=vpool)) except Exception: MDSServiceController._logger.exception( 'vPool {0} - StorageRouter {1} - Failed to create new MDS Service' .format(vpool.name, storagerouter.name)) # After potentially having added new MDSes, retrieve the optimal configuration mds_config_set = {} try: mds_config_set = MDSServiceController.get_mds_storagedriver_config_set( vpool=vpool, offline_nodes=offline_nodes) MDSServiceController._logger.debug( 'vPool {0} - Optimal configuration {1}'.format( vpool.name, mds_config_set)) except (NotFoundException, RuntimeError): MDSServiceController._logger.exception( 'vPool {0} - Failed to retrieve the optimal configuration'. format(vpool.name)) # Apply the optimal MDS configuration per StorageDriver for storagerouter in sorted(storagerouter_info, key=lambda k: k.ip): root_client = mds_dict[vpool][storagerouter]['client'] storagedriver = mds_dict[vpool][storagerouter]['storagedriver'] if storagedriver is None: MDSServiceController._logger.critical( 'vPool {0} - StorageRouter {1} - No matching StorageDriver found' .format(vpool.name, storagerouter.name)) continue if storagerouter.guid not in mds_config_set: MDSServiceController._logger.critical( 'vPool {0} - StorageRouter {1} - Not marked as offline, but could not retrieve an optimal MDS config' .format(vpool.name, storagerouter.name)) continue if root_client is None: MDSServiceController._logger.debug( 'vPool {0} - StorageRouter {1} - Marked as offline, not setting optimal MDS configuration' .format(vpool.name, storagerouter.name)) continue storagedriver_config = StorageDriverConfiguration( vpool_guid=vpool.guid, storagedriver_id=storagedriver.storagedriver_id) if storagedriver_config.config_missing is False: optimal_mds_config = mds_config_set[storagerouter.guid] MDSServiceController._logger.debug( 'vPool {0} - StorageRouter {1} - Storing optimal MDS configuration: {2}' .format(vpool.name, storagerouter.name, optimal_mds_config)) # Filesystem section in StorageDriver configuration are all parameters used for vDisks created directly on the filesystem # So when a vDisk gets created on the filesystem, these MDSes will be assigned to them storagedriver_config.configure_filesystem( fs_metadata_backend_mds_nodes=optimal_mds_config) storagedriver_config.save(root_client) # Execute a safety check, making sure the master/slave configuration is optimal. MDSServiceController._logger.info( 'vPool {0} - Ensuring safety for all vDisks'.format(vpool.name)) for vdisk in vpool.vdisks: try: MDSServiceController.ensure_safety(vdisk_guid=vdisk.guid) except Exception: message = 'Ensure safety for vDisk {0} with guid {1} failed'.format( vdisk.name, vdisk.guid) MDSServiceController._logger.exception(message) ensure_safety_failures.append(message) if ensure_safety_failures: raise MDSCheckupEnsureSafetyFailures( '\n - ' + '\n - '.join(ensure_safety_failures))
def get_mds_storagedriver_config_set(vpool, offline_nodes=None): """ Builds a configuration for all StorageRouters from a given vPool with following goals: * Primary MDS is the local one * All slaves are on different hosts * Maximum `mds_safety` nodes are returned The configuration returned is the default configuration used by the volumedriver of which in normal use-cases only the 1st entry is used, because at volume creation time, the volumedriver needs to create 1 master MDS During ensure_safety, we actually create/set the MDS slaves for each volume :param vpool: vPool to get StorageDriver configuration for :type vpool: ovs.dal.hybrids.vpool.VPool :param offline_nodes: Nodes which are currently unreachable via the SSHClient functionality :type offline_nodes: list :raises RuntimeError: When no MDS Service can be found for a specific vPool/StorageRouter combo :raises NotFoundException: When configuration management is unavailable :return: MDS configuration for a vPool :rtype: dict[list] """ if offline_nodes is None: offline_nodes = [] mds_per_storagerouter = {} mds_per_load = {} for storagedriver in vpool.storagedrivers: storagerouter = storagedriver.storagerouter if storagerouter in offline_nodes: continue mds_service, load = MDSServiceController.get_preferred_mds( storagerouter, vpool) if mds_service is None: raise RuntimeError('Could not find an MDS service') mds_per_storagerouter[storagerouter] = { 'host': storagerouter.ip, 'port': mds_service.service.ports[0] } if load not in mds_per_load: mds_per_load[load] = [] mds_per_load[load].append(storagerouter) safety = Configuration.get( '/ovs/vpools/{0}/mds_config|mds_safety'.format(vpool.guid)) config_set = {} for storagerouter, ip_info in mds_per_storagerouter.iteritems(): config_set[storagerouter.guid] = [ip_info] for importance in ['primary', 'secondary']: domains = [ junction.domain for junction in storagerouter.domains if junction.backup is (importance == 'secondary') ] possible_storagerouters = set() for domain in domains: possible_storagerouters.update( StorageRouterList. get_primary_storagerouters_for_domain(domain)) for load in sorted(mds_per_load): if len(config_set[storagerouter.guid]) >= safety: break other_storagerouters = mds_per_load[load] random.shuffle(other_storagerouters) for other_storagerouter in other_storagerouters: if len(config_set[storagerouter.guid]) >= safety: break if other_storagerouter != storagerouter and other_storagerouter in possible_storagerouters: config_set[storagerouter.guid].append( mds_per_storagerouter[other_storagerouter]) return config_set
def migrate(): """ Executes async migrations. It doesn't matter too much when they are executed, as long as they get eventually executed. This code will typically contain: * "dangerous" migration code (it needs certain running services) * Migration code depending on a cluster-wide state * ... """ MigrationController._logger.info('Preparing out of band migrations...') from ovs.dal.lists.storagedriverlist import StorageDriverList from ovs.dal.lists.storagerouterlist import StorageRouterList from ovs.dal.lists.vpoollist import VPoolList from ovs.extensions.generic.configuration import Configuration from ovs.extensions.generic.sshclient import SSHClient from ovs_extensions.generic.toolbox import ExtensionsToolbox from ovs_extensions.services.interfaces.systemd import Systemd from ovs.extensions.services.servicefactory import ServiceFactory from ovs.extensions.storageserver.storagedriver import StorageDriverConfiguration from ovs.lib.generic import GenericController MigrationController._logger.info('Start out of band migrations...') service_manager = ServiceFactory.get_manager() sr_client_map = {} for storagerouter in StorageRouterList.get_storagerouters(): sr_client_map[storagerouter.guid] = SSHClient(endpoint=storagerouter, username='******') ######################################################### # Addition of 'ExecReload' for AlbaProxy SystemD services if ServiceFactory.get_service_type() == 'systemd': changed_clients = set() for storagedriver in StorageDriverList.get_storagedrivers(): root_client = sr_client_map[storagedriver.storagerouter_guid] for alba_proxy in storagedriver.alba_proxies: service = alba_proxy.service service_name = 'ovs-{0}'.format(service.name) if not service_manager.has_service(name=service_name, client=root_client): continue if 'ExecReload=' in root_client.file_read(filename='/lib/systemd/system/{0}.service'.format(service_name)): continue try: service_manager.regenerate_service(name='ovs-albaproxy', client=root_client, target_name=service_name) changed_clients.add(root_client) except: MigrationController._logger.exception('Error rebuilding service {0}'.format(service_name)) for root_client in changed_clients: root_client.run(['systemctl', 'daemon-reload']) ################################################################## # Adjustment of open file descriptors for Arakoon services to 8192 changed_clients = set() for storagerouter in StorageRouterList.get_storagerouters(): root_client = sr_client_map[storagerouter.guid] for service_name in service_manager.list_services(client=root_client): if not service_name.startswith('ovs-arakoon-'): continue if ServiceFactory.get_service_type() == 'systemd': path = '/lib/systemd/system/{0}.service'.format(service_name) check = 'LimitNOFILE=8192' else: path = '/etc/init/{0}.conf'.format(service_name) check = 'limit nofile 8192 8192' if not root_client.file_exists(path): continue if check in root_client.file_read(path): continue try: service_manager.regenerate_service(name='ovs-arakoon', client=root_client, target_name=service_name) changed_clients.add(root_client) ExtensionsToolbox.edit_version_file(client=root_client, package_name='arakoon', old_service_name=service_name) except: MigrationController._logger.exception('Error rebuilding service {0}'.format(service_name)) for root_client in changed_clients: root_client.run(['systemctl', 'daemon-reload']) ############################# # Migrate to multiple proxies for storagedriver in StorageDriverList.get_storagedrivers(): vpool = storagedriver.vpool root_client = sr_client_map[storagedriver.storagerouter_guid] for alba_proxy in storagedriver.alba_proxies: # Rename alba_proxy service in model service = alba_proxy.service old_service_name = 'albaproxy_{0}'.format(vpool.name) new_service_name = 'albaproxy_{0}_0'.format(vpool.name) if old_service_name != service.name: continue service.name = new_service_name service.save() if not service_manager.has_service(name=old_service_name, client=root_client): continue old_configuration_key = '/ovs/framework/hosts/{0}/services/{1}'.format(storagedriver.storagerouter.machine_id, old_service_name) if not Configuration.exists(key=old_configuration_key): continue # Add '-reboot' to alba_proxy services (because of newly created services and removal of old service) ExtensionsToolbox.edit_version_file(client=root_client, package_name='alba', old_service_name=old_service_name, new_service_name=new_service_name) # Register new service and remove old service service_manager.add_service(name='ovs-albaproxy', client=root_client, params=Configuration.get(old_configuration_key), target_name='ovs-{0}'.format(new_service_name)) # Update scrub proxy config proxy_config_key = '/ovs/vpools/{0}/proxies/{1}/config/main'.format(vpool.guid, alba_proxy.guid) proxy_config = None if Configuration.exists(key=proxy_config_key) is False else Configuration.get(proxy_config_key) if proxy_config is not None: fragment_cache = proxy_config.get('fragment_cache', ['none', {}]) if fragment_cache[0] == 'alba' and fragment_cache[1].get('cache_on_write') is True: # Accelerated ALBA configured fragment_cache_scrub_info = copy.deepcopy(fragment_cache) fragment_cache_scrub_info[1]['cache_on_read'] = False proxy_scrub_config_key = '/ovs/vpools/{0}/proxies/scrub/generic_scrub'.format(vpool.guid) proxy_scrub_config = None if Configuration.exists(key=proxy_scrub_config_key) is False else Configuration.get(proxy_scrub_config_key) if proxy_scrub_config is not None and proxy_scrub_config['fragment_cache'] == ['none']: proxy_scrub_config['fragment_cache'] = fragment_cache_scrub_info Configuration.set(proxy_scrub_config_key, json.dumps(proxy_scrub_config, indent=4), raw=True) # Update 'backend_connection_manager' section changes = False storagedriver_config = StorageDriverConfiguration('storagedriver', vpool.guid, storagedriver.storagedriver_id) storagedriver_config.load() if 'backend_connection_manager' not in storagedriver_config.configuration: continue current_config = storagedriver_config.configuration['backend_connection_manager'] if current_config.get('backend_type') != 'MULTI': changes = True backend_connection_manager = {'backend_type': 'MULTI'} for index, proxy in enumerate(sorted(storagedriver.alba_proxies, key=lambda pr: pr.service.ports[0])): backend_connection_manager[str(index)] = copy.deepcopy(current_config) # noinspection PyUnresolvedReferences backend_connection_manager[str(index)]['alba_connection_use_rora'] = True # noinspection PyUnresolvedReferences backend_connection_manager[str(index)]['alba_connection_rora_manifest_cache_capacity'] = 5000 # noinspection PyUnresolvedReferences for key, value in backend_connection_manager[str(index)].items(): if key.startswith('backend_interface'): backend_connection_manager[key] = value # noinspection PyUnresolvedReferences del backend_connection_manager[str(index)][key] for key, value in {'backend_interface_retries_on_error': 5, 'backend_interface_retry_interval_secs': 1, 'backend_interface_retry_backoff_multiplier': 2.0}.iteritems(): if key not in backend_connection_manager: backend_connection_manager[key] = value else: backend_connection_manager = current_config for value in backend_connection_manager.values(): if isinstance(value, dict): for key, val in value.items(): if key.startswith('backend_interface'): backend_connection_manager[key] = val changes = True del value[key] for key, value in {'backend_interface_retries_on_error': 5, 'backend_interface_retry_interval_secs': 1, 'backend_interface_retry_backoff_multiplier': 2.0}.iteritems(): if key not in backend_connection_manager: changes = True backend_connection_manager[key] = value if changes is True: storagedriver_config.clear_backend_connection_manager() storagedriver_config.configure_backend_connection_manager(**backend_connection_manager) storagedriver_config.save(root_client) # Add '-reboot' to volumedriver services (because of updated 'backend_connection_manager' section) ExtensionsToolbox.edit_version_file(client=root_client, package_name='volumedriver', old_service_name='volumedriver_{0}'.format(vpool.name)) if service_manager.ImplementationClass == Systemd: root_client.run(['systemctl', 'daemon-reload']) ######################################## # Update metadata_store_bits information for vpool in VPoolList.get_vpools(): bits = None for storagedriver in vpool.storagedrivers: key = '/ovs/framework/hosts/{0}/services/volumedriver_{1}'.format(storagedriver.storagerouter.machine_id, vpool.name) if Configuration.exists(key=key) and 'METADATASTORE_BITS' not in Configuration.get(key=key): if bits is None: entries = service_manager.extract_from_service_file(name='ovs-volumedriver_{0}'.format(vpool.name), client=sr_client_map[storagedriver.storagerouter_guid], entries=['METADATASTORE_BITS=']) if len(entries) == 1: bits = entries[0].split('=')[-1] bits = int(bits) if bits.isdigit() else 5 if bits is not None: try: content = Configuration.get(key=key) content['METADATASTORE_BITS'] = bits Configuration.set(key=key, value=content) except: MigrationController._logger.exception('Error updating volumedriver info for vPool {0} on StorageRouter {1}'.format(vpool.name, storagedriver.storagerouter.name)) if bits is not None: vpool.metadata_store_bits = bits vpool.save() MigrationController._logger.info('Finished out of band migrations') GenericController.refresh_package_information()
def validate_cluster(cluster_name='ovsdb'): """ Validate if the chosen cluster is * deployed on all required nodes * running on all required nodes * working correctly on all required nodes :param cluster_name: name of a existing arakoon cluster (DEFAULT=ovsdb) :type cluster_name: str :return: """ ArakoonValidation.LOGGER.info("Starting validating arakoon cluster") master_storagerouters = [ storagerouter.ip for storagerouter in StorageRouterList.get_masters() ] assert len(master_storagerouters ) >= 2, 'Environment has only `{0}` node(s)'.format( len(master_storagerouters)) master_storagerouters.sort() arakoon_service_name = "ovs-arakoon-{0}".format(cluster_name) service_manager = ServiceFactory.get_manager() for storagerouter_ip in master_storagerouters: client = SSHClient(storagerouter_ip, username='******') # check if service file is available ArakoonValidation.LOGGER.info( "Validating if cluster service `{0}` is available on node `{1}`" .format(cluster_name, storagerouter_ip)) assert service_manager.has_service(arakoon_service_name, client), "Service file of `{0}` does not exists on storagerouter `{1}`"\ .format(cluster_name, storagerouter_ip) # check if service is running on system ArakoonValidation.LOGGER.info( "Validating if cluster service `{0}` is running on node `{1}`". format(cluster_name, storagerouter_ip)) assert service_manager.get_service_status(arakoon_service_name, client) == 'active', \ "Service of `{0}` is not running on storagerouter `{1}`".format(cluster_name, storagerouter_ip) # perform nop, get and set on cluster key = 'integration-tests-{0}'.format(str(uuid.uuid4())) value = str(time.time()) ArakoonValidation.LOGGER.info( "Validating if cluster `{0}` works".format(cluster_name)) # determine if there is a healthy cluster configuration = Configuration.get( '/ovs/arakoon/{0}/config'.format(cluster_name), raw=True) client = PyrakoonStore(cluster_name, configuration) client.nop() # perform set, get & compare client.set(key, value) get_value = client.get(key) assert get_value == value, "Value mismatch on cluster `{0}`, get value `{1}`, " \ "expected value `{2}` on key `{3}`".format(cluster_name, get_value, value, key) # perform delete client.delete(key) try: assert not client.get(key), "Key `{0}` still exists on cluster `{1}` after deleting it"\ .format(key, cluster_name) except KeyNotFoundException: # key not found so test has passed assert True ArakoonValidation.LOGGER.info("Finished validating arakoon cluster")
def _get_int(key): return int(Configuration.get(key))
ch.basic_ack(delivery_tag=method.delivery_tag) import argparse parser = argparse.ArgumentParser(description='Rabbitmq Event Processor for OVS', formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument('rabbitmq_queue', type=str, help='Rabbitmq queue name') parser.add_argument('--durable', dest='queue_durable', action='store_const', default=False, const=True, help='Declare queue as durable') logger = Logger('extensions-rabbitmq') args = parser.parse_args() try: run_event_consumer = False my_ip = Configuration.get('/ovs/framework/hosts/{0}/ip'.format(System.get_my_machine_id())) for endpoint in Configuration.get('/ovs/framework/messagequeue|endpoints'): if endpoint.startswith(my_ip): run_event_consumer = True if run_event_consumer is True: # Load mapping mapping = {} path = '/'.join([os.path.dirname(__file__), 'mappings']) for filename in os.listdir(path): if os.path.isfile('/'.join([path, filename])) and filename.endswith('.py'): name = filename.replace('.py', '') mod = imp.load_source(name, '/'.join([path, filename])) for member in inspect.getmembers(mod, predicate=inspect.isclass): if member[1].__module__ == name and 'object' in [base.__name__ for base in member[1].__bases__]: this_mapping = member[1].mapping
def configure_support(support_info): """ Configures support on all StorageRouters :param support_info: Information about which components should be configured {'stats_monkey': True, # Enable/disable the stats monkey scheduled task 'support_agent': True, # Responsible for enabling the ovs-support-agent service, which collects heart beat data 'remote_access': False, # Cannot be True when support agent is False. Is responsible for opening an OpenVPN tunnel to allow for remote access 'stats_monkey_config': {}} # Dict with information on how to configure the stats monkey (Only required when enabling the stats monkey :type support_info: dict :return: None :rtype: NoneType """ ExtensionsToolbox.verify_required_params(actual_params=support_info, required_params={ 'stats_monkey': (bool, None, False), 'remote_access': (bool, None, False), 'support_agent': (bool, None, False), 'stats_monkey_config': (dict, None, False) }) # All settings are optional, so if nothing is specified, no need to change anything if len(support_info) == 0: StorageRouterController._logger.warning( 'Configure support called without any specific settings. Doing nothing' ) return # Collect information support_agent_key = '/ovs/framework/support|support_agent' support_agent_new = support_info.get('support_agent') support_agent_old = Configuration.get(key=support_agent_key) support_agent_change = support_agent_new is not None and support_agent_old != support_agent_new remote_access_key = '/ovs/framework/support|remote_access' remote_access_new = support_info.get('remote_access') remote_access_old = Configuration.get(key=remote_access_key) remote_access_change = remote_access_new is not None and remote_access_old != remote_access_new stats_monkey_celery_key = '/ovs/framework/scheduling/celery' stats_monkey_config_key = '/ovs/framework/monitoring/stats_monkey' stats_monkey_new_config = support_info.get('stats_monkey_config') stats_monkey_old_config = Configuration.get( key=stats_monkey_config_key, default={}) stats_monkey_celery_config = Configuration.get( key=stats_monkey_celery_key, default={}) stats_monkey_new = support_info.get('stats_monkey') stats_monkey_old = stats_monkey_celery_config.get( 'ovs.stats_monkey.run_all' ) is not None or stats_monkey_celery_config.get( 'alba.stats_monkey.run_all') is not None stats_monkey_change = stats_monkey_new is not None and ( stats_monkey_old != stats_monkey_new or stats_monkey_new_config != stats_monkey_old_config) # Make sure support agent is enabled when trying to enable remote access if remote_access_new is True: if support_agent_new is False or (support_agent_new is None and support_agent_old is False): raise RuntimeError( 'Remote access cannot be enabled without the heart beat enabled' ) # Collect root_client information root_clients = {} for storagerouter in StorageRouterList.get_storagerouters(): try: root_clients[storagerouter] = SSHClient(endpoint=storagerouter, username='******') except UnableToConnectException: raise RuntimeError('Not all StorageRouters are reachable') if stats_monkey_new is True: ExtensionsToolbox.verify_required_params( actual_params=stats_monkey_new_config, required_params={ 'host': (str, ExtensionsToolbox.regex_ip), 'port': (int, { 'min': 1, 'max': 65535 }), 'database': (str, None), 'interval': (int, { 'min': 1, 'max': 86400 }), 'transport': (str, ['influxdb', 'redis', 'graphite']), 'environment': (str, None) }) if stats_monkey_new_config['transport'] in ['influxdb', 'reddis']: ExtensionsToolbox.verify_required_params( actual_params=stats_monkey_new_config, required_params={'password': (str, None)}) if stats_monkey_new_config['transport'] == 'influxdb': ExtensionsToolbox.verify_required_params( actual_params=stats_monkey_new_config, required_params={'username': (str, None)}) # Configure remote access if remote_access_change is True: Configuration.set(key=remote_access_key, value=remote_access_new) cid = Configuration.get('/ovs/framework/cluster_id').replace( r"'", r"'\''") for storagerouter, root_client in root_clients.iteritems(): if remote_access_new is False: StorageRouterController._logger.info( 'Un-configuring remote access on StorageRouter {0}'. format(root_client.ip)) nid = storagerouter.machine_id.replace(r"'", r"'\''") service_name = 'openvpn@ovs_{0}-{1}'.format(cid, nid) if StorageRouterController._service_manager.has_service( name=service_name, client=root_client): StorageRouterController._service_manager.stop_service( name=service_name, client=root_client) root_client.file_delete(filenames=['/etc/openvpn/ovs_*']) # Configure support agent if support_agent_change is True: service_name = 'support-agent' Configuration.set(key=support_agent_key, value=support_agent_new) for root_client in root_clients.itervalues(): if support_agent_new is True: StorageRouterController._logger.info( 'Configuring support agent on StorageRouter {0}'. format(root_client.ip)) if StorageRouterController._service_manager.has_service( name=service_name, client=root_client) is False: StorageRouterController._service_manager.add_service( name=service_name, client=root_client) StorageRouterController._service_manager.restart_service( name=service_name, client=root_client) else: StorageRouterController._logger.info( 'Un-configuring support agent on StorageRouter {0}'. format(root_client.ip)) if StorageRouterController._service_manager.has_service( name=service_name, client=root_client): StorageRouterController._service_manager.stop_service( name=service_name, client=root_client) StorageRouterController._service_manager.remove_service( name=service_name, client=root_client) # Configure stats monkey if stats_monkey_change is True: # 2 keys matter here: # - /ovs/framework/scheduling/celery --> used to check whether the stats monkey is disabled or not # - /ovs/framework/monitoring/stats_monkey --> contains the actual configuration parameters when enabling the stats monkey, such as host, port, username, ... service_name = 'scheduled-tasks' if stats_monkey_new is True: # Enable the scheduled task by removing the key StorageRouterController._logger.info( 'Configuring stats monkey') interval = stats_monkey_new_config['interval'] # The scheduled task cannot be configured to run more than once a minute, so for intervals < 60, the stats monkey task handles this itself StorageRouterController._logger.debug( 'Requested interval to run at: {0}'.format(interval)) Configuration.set(key=stats_monkey_config_key, value=stats_monkey_new_config) if interval > 0: days, hours, minutes, _ = ExtensionsToolbox.convert_to_days_hours_minutes_seconds( seconds=interval) if days == 1: # Max interval is 24 * 60 * 60, so once every day at 3 AM schedule = {'hour': '3'} elif hours > 0: schedule = {'hour': '*/{0}'.format(hours)} else: schedule = {'minute': '*/{0}'.format(minutes)} stats_monkey_celery_config[ 'ovs.stats_monkey.run_all'] = schedule stats_monkey_celery_config[ 'alba.stats_monkey.run_all'] = schedule StorageRouterController._logger.debug( 'Configured schedule is: {0}'.format(schedule)) else: stats_monkey_celery_config.pop('ovs.stats_monkey.run_all', None) stats_monkey_celery_config.pop('alba.stats_monkey.run_all', None) else: # Disable the scheduled task by setting the values for the celery tasks to None StorageRouterController._logger.info( 'Un-configuring stats monkey') stats_monkey_celery_config['ovs.stats_monkey.run_all'] = None stats_monkey_celery_config['alba.stats_monkey.run_all'] = None Configuration.set(key=stats_monkey_celery_key, value=stats_monkey_celery_config) for storagerouter in StorageRouterList.get_masters(): root_client = root_clients[storagerouter] StorageRouterController._logger.debug( 'Restarting ovs-scheduled-tasks service on node with IP {0}' .format(root_client.ip)) StorageRouterController._service_manager.restart_service( name=service_name, client=root_client)
def create_hprm_config_files(vpool_guid, local_storagerouter_guid, parameters): """ Create the required configuration files to be able to make use of HPRM (aka PRACC) This configuration will be zipped and made available for download :param vpool_guid: The guid of the VPool for which a HPRM manager needs to be deployed :type vpool_guid: str :param local_storagerouter_guid: The guid of the StorageRouter the API was requested on :type local_storagerouter_guid: str :param parameters: Additional information required for the HPRM configuration files :type parameters: dict :return: Name of the zipfile containing the configuration files :rtype: str """ # Validations required_params = {'port': (int, {'min': 1, 'max': 65535}), 'identifier': (str, ExtensionsToolbox.regex_vpool)} ExtensionsToolbox.verify_required_params(actual_params=parameters, required_params=required_params) vpool = VPool(vpool_guid) identifier = parameters['identifier'] config_path = None local_storagerouter = StorageRouter(local_storagerouter_guid) for sd in vpool.storagedrivers: if len(sd.alba_proxies) == 0: raise ValueError('No ALBA proxies configured for vPool {0} on StorageRouter {1}'.format(vpool.name, sd.storagerouter.name)) config_path = '/ovs/vpools/{0}/proxies/{1}/config/{{0}}'.format(vpool.guid, sd.alba_proxies[0].guid) if config_path is None: raise ValueError('vPool {0} has not been extended any StorageRouter'.format(vpool.name)) proxy_cfg = Configuration.get(key=config_path.format('main')) cache_info = {} arakoons = {} cache_types = VPool.CACHES.values() if not any(ctype in parameters for ctype in cache_types): raise ValueError('At least one cache type should be passed: {0}'.format(', '.join(cache_types))) for ctype in cache_types: if ctype not in parameters: continue required_dict = {'read': (bool, None), 'write': (bool, None)} required_params.update({ctype: (dict, required_dict)}) ExtensionsToolbox.verify_required_params(actual_params=parameters, required_params=required_params) read = parameters[ctype]['read'] write = parameters[ctype]['write'] if read is False and write is False: cache_info[ctype] = ['none'] continue path = parameters[ctype].get('path') if path is not None: path = path.strip() if not path or path.endswith('/.') or '..' in path or '/./' in path: raise ValueError('Invalid path specified') required_dict.update({'path': (str, None), 'size': (int, {'min': 1, 'max': 10 * 1024})}) ExtensionsToolbox.verify_required_params(actual_params=parameters, required_params=required_params) while '//' in path: path = path.replace('//', '/') cache_info[ctype] = ['local', {'path': path, 'max_size': parameters[ctype]['size'] * 1024 ** 3, 'cache_on_read': read, 'cache_on_write': write}] else: required_dict.update({'backend_info': (dict, {'preset': (str, ExtensionsToolbox.regex_preset), 'alba_backend_guid': (str, ExtensionsToolbox.regex_guid), 'alba_backend_name': (str, ExtensionsToolbox.regex_backend)}), 'connection_info': (dict, {'host': (str, ExtensionsToolbox.regex_ip, False), 'port': (int, {'min': 1, 'max': 65535}, False), 'client_id': (str, ExtensionsToolbox.regex_guid, False), 'client_secret': (str, None, False)})}) ExtensionsToolbox.verify_required_params(actual_params=parameters, required_params=required_params) connection_info = parameters[ctype]['connection_info'] if connection_info['host']: # Remote Backend for accelerated Backend alba_backend_guid = parameters[ctype]['backend_info']['alba_backend_guid'] ovs_client = OVSClient.get_instance(connection_info=connection_info) arakoon_config = VPoolShared.retrieve_alba_arakoon_config(alba_backend_guid=alba_backend_guid, ovs_client=ovs_client) arakoons[ctype] = ArakoonClusterConfig.convert_config_to(arakoon_config, return_type='INI') else: # Local Backend for accelerated Backend alba_backend_name = parameters[ctype]['backend_info']['alba_backend_name'] if Configuration.exists(key='/ovs/arakoon/{0}-abm/config'.format(alba_backend_name), raw=True) is False: raise ValueError('Arakoon cluster for ALBA Backend {0} could not be retrieved'.format(alba_backend_name)) arakoons[ctype] = Configuration.get(key='/ovs/arakoon/{0}-abm/config'.format(alba_backend_name), raw=True) cache_info[ctype] = ['alba', {'albamgr_cfg_url': '/etc/hprm/{0}/{1}_cache_arakoon.ini'.format(identifier, ctype), 'bucket_strategy': ['1-to-1', {'prefix': vpool.guid, 'preset': parameters[ctype]['backend_info']['preset']}], 'manifest_cache_size': proxy_cfg['manifest_cache_size'], 'cache_on_read': read, 'cache_on_write': write}] tgz_name = 'hprm_config_files_{0}_{1}.tgz'.format(identifier, vpool.name) config = {'ips': ['127.0.0.1'], 'port': parameters['port'], 'pracc': {'uds_path': '/var/run/hprm/{0}/uds_path'.format(identifier), 'max_clients': 1000, 'max_read_buf_size': 64 * 1024, # Buffer size for incoming requests (in bytes) 'thread_pool_size': 64}, # Amount of threads 'transport': 'tcp', 'log_level': 'info', 'read_preference': proxy_cfg['read_preference'], 'albamgr_cfg_url': '/etc/hprm/{0}/arakoon.ini'.format(identifier), 'manifest_cache_size': proxy_cfg['manifest_cache_size']} file_contents_map = {} for ctype in cache_types: if ctype in cache_info: config['{0}_cache'.format(ctype)] = cache_info[ctype] if ctype in arakoons: file_contents_map['/opt/OpenvStorage/config/{0}/{1}_cache_arakoon.ini'.format(identifier, ctype)] = arakoons[ctype] file_contents_map.update({'/opt/OpenvStorage/config/{0}/config.json'.format(identifier): json.dumps(config, indent=4), '/opt/OpenvStorage/config/{0}/arakoon.ini'.format(identifier): Configuration.get(key=config_path.format('abm'), raw=True)}) local_client = SSHClient(endpoint=local_storagerouter) local_client.dir_create(directories='/opt/OpenvStorage/config/{0}'.format(identifier)) local_client.dir_create(directories='/opt/OpenvStorage/webapps/frontend/downloads') for file_name, contents in file_contents_map.iteritems(): local_client.file_write(contents=contents, filename=file_name) local_client.run(command=['tar', '--transform', 's#^config/{0}#{0}#'.format(identifier), '-czf', '/opt/OpenvStorage/webapps/frontend/downloads/{0}'.format(tgz_name), 'config/{0}'.format(identifier)]) local_client.dir_delete(directories='/opt/OpenvStorage/config/{0}'.format(identifier)) return tgz_name
def _configuration(self): """ VPool configuration """ if not self.storagedrivers or not self.storagedrivers[0].storagerouter: return {} storagedriver_config = StorageDriverConfiguration( self.guid, self.storagedrivers[0].storagedriver_id) for expected_key in [ 'distributed_transaction_log', 'filesystem', 'volume_router', 'volume_manager' ]: if expected_key not in storagedriver_config.configuration: return {} dtl = storagedriver_config.configuration['distributed_transaction_log'] file_system = storagedriver_config.configuration['filesystem'] volume_router = storagedriver_config.configuration['volume_router'] volume_manager = storagedriver_config.configuration['volume_manager'] dtl_host = file_system['fs_dtl_host'] dtl_mode = file_system.get('fs_dtl_mode', StorageDriverClient.VOLDRV_DTL_ASYNC) cluster_size = volume_manager['default_cluster_size'] / 1024 dtl_transport = dtl['dtl_transport'] sco_multiplier = volume_router['vrouter_sco_multiplier'] dtl_config_mode = file_system['fs_dtl_config_mode'] tlog_multiplier = volume_manager['number_of_scos_in_tlog'] non_disposable_sco_factor = volume_manager[ 'non_disposable_scos_factor'] sco_size = sco_multiplier * cluster_size / 1024 # SCO size is in MiB ==> SCO multiplier * cluster size (4 KiB by default) write_buffer = tlog_multiplier * sco_size * non_disposable_sco_factor dtl_enabled = not (dtl_config_mode == StorageDriverClient.VOLDRV_DTL_MANUAL_MODE and dtl_host == '') try: mds_config = Configuration.get('/ovs/vpools/{0}/mds_config'.format( self.guid)) except NotFoundException: mds_config = {} return { 'sco_size': sco_size, 'dtl_mode': StorageDriverClient.REVERSE_DTL_MODE_MAP[dtl_mode] if dtl_enabled is True else 'no_sync', 'mds_config': mds_config, 'dtl_enabled': dtl_enabled, 'cluster_size': cluster_size, 'write_buffer': write_buffer, 'dtl_transport': StorageDriverClient.REVERSE_DTL_TRANSPORT_MAP[dtl_transport], 'dtl_config_mode': dtl_config_mode, 'tlog_multiplier': tlog_multiplier }
def get(self): """ returns OpenAPI specs """ version = settings.VERSION[-1] data = {'swagger': '2.0', 'info': {'title': 'Open vStorage', 'description': 'The Open vStorage API.', 'version': str(version)}, 'basePath': '/api', 'schemes': ['https'], 'consumes': ['application/json'], 'produces': ['application/json; version={0}'.format(version)], 'paths': {'/': {'get': {'summary': 'Retrieve API metadata', 'operationId': 'api', 'responses': {'200': {'descirption': 'API metadata', 'schema': {'type': 'object', 'title': 'APIMetadata', 'properties': {'authenticated': {'type': 'boolean', 'description': 'Indicates whether the client is authenticated.'}, 'authentication_state': {'type': 'string', 'description': 'Povides more information on the "authenticated" state of a client.', 'enum': ['unauthenticated', 'invalid_authorization_type', 'invalid_token', 'token_expired', 'inactive_user', 'authenticated', 'unexpected_exception']}, 'authentication_metadata': {'type': 'object', 'title': 'AuthenticationMetadata', 'description': 'Contains information on the usage of an optional 3rd party OAuth2.0 authentication service.', 'properties': {'ip': {'type': 'string', 'description': 'The IP address of the current node.'}, 'mode': {'type': 'string', 'description': 'Indicates wheter the "local" or a "remote" authentication endpoint should be used.', 'enum': ['local', 'remote']}, 'authorize_uri': {'type': 'string', 'description': 'The URI to which the user has to be redirect to authenticate.'}, 'client_id': {'type': 'string', 'description': 'The client identifier to be used when authenticating.'}, 'scope': {'type': 'string', 'description': 'The scope that has to be requested to the authentication endpoint.'}}, 'required': []}, 'username': {'type': 'string', 'description': 'The username of the client or null if not available.'}, 'userguid': {'type': 'string', 'description': 'The GUID (primary key) of the client\'s user or null if not available.'}, 'roles': {'type': 'array', 'description': 'An array of the scopes that were granted to the client.', 'items': {'type': 'string'}}, 'identification': {'type': 'object', 'title': 'APIIdentification', 'description': 'Contains identification information about the API/environment.', 'properties': {'cluster_id': {'type': 'string', 'description': 'Environment identification string.'}}, 'required': ['cluster_id']}, 'storagerouter_ips': {'type': 'array', 'description': 'An array containing the IP addresses of all StorageRouters in the environment.', 'items': {'type': 'string'}}, 'versions': {'type': 'array', 'description': 'An array of all versions that this instance of the API supports.', 'items': {'type': 'integer'}}, 'plugins': {}}, 'required': ['authenticated', 'authentication_state', 'authentication_metadata', 'username', 'userguid', 'roles', 'identification', 'storagerouter_ips', 'versions', 'plugins']}}}}}}, 'definitions': {'APIError': {'type': 'object', 'properties': {'error': {'type': 'string', 'description': 'An error code'}, 'error_description': {'type': 'string', 'description': 'Descriptive error message'}}, 'required': ['error', 'error_description']}}, 'securityDefinitions': {'oauth2': {'type': 'oauth2', 'flow': 'password', 'tokenUrl': 'oauth2/token', 'scopes': {'read': 'Read access', 'write': 'Write access', 'manage': 'Management access'}}}, 'security': [{'oauth2': ['read', 'write', 'manage']}]} # Plugin information plugins = {} for backend_type in BackendTypeList.get_backend_types(): if backend_type.has_plugin is True: if backend_type.code not in plugins: plugins[backend_type.code] = [] plugins[backend_type.code] += ['backend', 'gui'] generic_plugins = Configuration.get('/ovs/framework/plugins/installed|generic') for plugin_name in generic_plugins: if plugin_name not in plugins: plugins[plugin_name] = [] plugins[plugin_name] += ['gui'] data['paths']['/']['get']['responses']['200']['schema']['properties']['plugins'] = { 'type': 'object', 'title': 'PluginMetadata', 'description': 'Contains information about plugins active in the system. Each property represents a plugin and the area where they provide functionality.', 'properties': {plugin: {'type': 'array', 'description': 'An array of all areas the plugin provides functionality.', 'items': {'type': 'string'}} for (plugin, info) in plugins.iteritems()}, 'required': [] } # API paths def load_parameters(_fun): # Parameters by @load decorators parameter_info = [] mandatory_args = _fun.ovs_metadata['load']['mandatory'] optional_args = _fun.ovs_metadata['load']['optional'] object_type = _fun.ovs_metadata['load']['object_type'] entries = ['version', 'request', 'local_storagerouter', 'pk', 'contents'] if object_type is not None: object_arg = object_type.__name__.lower() if object_arg in mandatory_args or object_arg in optional_args: parameter_info.append({'name': 'guid', 'in': 'path', 'description': 'Identifier of the object on which to call is applied.', 'required': True, 'type': 'string'}) entries.append(object_arg) for entry in entries: if entry in mandatory_args: mandatory_args.remove(entry) if entry in optional_args: optional_args.remove(entry) docs = _fun.__doc__ doc_info = {} if docs is not None: for match in re.finditer(':(param|type) (.*?): (.*)', docs, re.MULTILINE): entries = match.groups() if entries[1] not in doc_info: doc_info[entries[1]] = {} doc_info[entries[1]][entries[0]] = entries[2] for argument in mandatory_args + optional_args: info = {'name': argument, 'in': 'query', 'required': argument in mandatory_args, 'type': 'string'} if argument in doc_info: description = doc_info[argument].get('param') if description: info['description'] = description type_info = doc_info[argument].get('type') if type_info: if type_info in ['int', 'long']: info['type'] = 'integer' elif type_info in ['float']: info['type'] = 'number' elif type_info in ['bool']: info['type'] = 'boolean' elif type_info in ['str', 'basestring', 'unicode']: info['type'] = 'string' elif type_info in ['dict']: info['type'] = 'object' parameter_info.append(info) # Parameters by @returns_* decorators return_info = _fun.ovs_metadata.get('returns', None) if return_info is not None: # Extra parameters params = return_info['parameters'] fields = [] if 'contents' in params or 'sorting' in params: _cls = return_info['object_type'] fields = [prop.name for prop in _cls._properties] + \ ['{0}_guid'.format(rel.name) for rel in _cls._relations] + \ [dynamic.name for dynamic in _cls._dynamics] relation_info = RelationMapper.load_foreign_relations(_cls) if relation_info is not None: fields += [('{0}_guid' if rel_info['list'] is False else '{0}_guids').format(key) for key, rel_info in relation_info.iteritems()] fields = fields + ['-{0}'.format(field) for field in fields] for parameter in params: if parameter == 'contents': parameter_info.append({'name': 'contents', 'in': 'query', 'description': 'Specify the returned contents.', 'required': True, 'collectionFormat': 'csv', 'type': 'array', 'enum': ['_dynamics', '_relations', 'guid'] + fields, 'items': {'type': 'string'}}) elif parameter == 'paging': parameter_info.append({'name': 'page', 'in': 'query', 'description': 'Specifies the page to be returned.', 'required': False, 'type': 'integer'}) parameter_info.append({'name': 'page_size', 'in': 'query', 'description': 'Specifies the size of a page. Supported values: 10, 25, 50 and 100. Requires "page" to be set.', 'required': False, 'type': 'integer'}) elif parameter == 'sorting': parameter_info.append({'name': 'sort', 'in': 'query', 'description': 'Specifies the sorting of the list.', 'required': False, 'default': params[parameter], 'enum': ['guid', '-guid'] + fields, 'type': 'array', 'items': {'type': 'string'}}) return parameter_info def load_response(_fun): response_code = '200' response_schema = None return_info = _fun.ovs_metadata.get('returns', None) if return_info is not None: return_type, _return_code = return_info['returns'] if _return_code is not None: response_code = _return_code if return_type == 'object': _cls = return_info['object_type'] response_schema = {'$ref': '#/definitions/{0}'.format(_cls.__name__)} elif return_type == 'list': _cls = return_info['object_type'] class_schema = {'$ref': '#/definitions/{0}'.format(_cls.__name__)} fields = [prop.name for prop in _cls._properties] + \ ['{0}_guid'.format(rel.name) for rel in _cls._relations] + \ [dynamic.name for dynamic in _cls._dynamics] relation_info = RelationMapper.load_foreign_relations(_cls) if relation_info is not None: fields += [('{0}_guid' if rel_info['list'] is False else '{0}_guids').format(key) for key, rel_info in relation_info.iteritems()] fields = fields + ['-{0}'.format(field) for field in fields] response_schema = {'type': 'object', 'title': 'DataList', 'properties': {'_contents': {'type': 'array', 'description': 'Requested contents.', 'items': {'type': 'string'}, 'required': True, 'collectionFormat': 'csv', 'enum': ['_dynamics', '_relations', 'guid'] + fields}, '_paging': {'type': 'object', 'title': 'PagingMetadata', 'properties': {'total_items': {'type': 'integer', 'description': 'Total items available.'}, 'max_page': {'type': 'integer', 'description': 'Last page available.'}, 'end_number': {'type': 'integer', 'description': '1-based index of the last item in the current page.'}, 'current_page': {'type': 'integer', 'description': 'Current page number.'}, 'page_size': {'type': 'integer', 'description': 'Number of items in the current page.'}, 'start_number': {'type': 'integer', 'description': '1-based index of the first item in the current page'}}, 'required': ['total_items', 'max_page', 'end_number', 'current_page', 'page_size', 'start_number']}, '_sorting': {'type': 'array', 'description': 'Applied sorting', 'items': {'type': 'string'}, 'required': True, 'collectionFormat': 'csv', 'enum': ['-guid', 'guid'] + fields}, 'data': {'type': 'array', 'description': 'List of serialized {0}s.'.format(_cls.__name__), 'required': True, 'items': class_schema}}, 'required': ['_contents', '_paging', '_sorting', 'data']} else: docs = _fun.__doc__ doc_info = {} if docs is not None: for match in re.finditer(':(return|rtype): (.*)', docs, re.MULTILINE): entries = match.groups() doc_info[entries[0]] = entries[1] if return_type == 'task': task_return = '' if 'return' in doc_info: task_return = ' The task returns: {0}'.format(doc_info['return']) response_schema = {'type': 'string', 'description': 'A task identifier.{0}'.format(task_return)} elif return_type is None: response_schema = {'type': 'string'} if 'return' in doc_info: response_schema['description'] = doc_info['return'] if 'rtype' in doc_info: type_info = doc_info['rtype'] if type_info in ['int', 'long']: response_schema['type'] = 'integer' elif type_info in ['float']: response_schema['type'] = 'number' elif type_info in ['bool']: response_schema['type'] = 'boolean' elif type_info in ['str', 'basestring', 'unicode']: response_schema['type'] = 'string' elif type_info in ['dict']: response_schema['type'] = 'object' elif type_info in ['None']: response_schema = None response_code = '204' return response_code, response_schema paths = data['paths'] path = '/'.join([os.path.dirname(__file__), 'backend', 'views']) for filename in os.listdir(path): if os.path.isfile('/'.join([path, filename])) and filename.endswith('.py'): name = filename.replace('.py', '') module = imp.load_source(name, '/'.join([path, filename])) for member in inspect.getmembers(module): if inspect.isclass(member[1]) \ and member[1].__module__ == name \ and 'ViewSet' in [base.__name__ for base in member[1].__bases__]: cls = member[1] if hasattr(cls, 'skip_spec') and cls.skip_spec is True: continue base_calls = {'list': ['get', '/{0}/'], 'retrieve': ['get', '/{0}/{{guid}}/'], 'create': ['post', '/{0}/'], 'destroy': ['delete', '/{0}/{{guid}}/'], 'partial_update': ['patch', '/{0}/{{guid}}/']} for call, route_data in base_calls.iteritems(): if hasattr(cls, call): fun = getattr(cls, call) docstring = fun.__doc__.strip().split('\n')[0] parameters = load_parameters(fun) return_code, schema = load_response(fun) route = {route_data[0]: {'summary': docstring, 'operationId': '{0}.{1}'.format(member[1].prefix, call), 'responses': {return_code: {'description': docstring}, 'default': {'description': 'Error payload', 'schema': {'$ref': '#/definitions/APIError'}}}, 'parameters': parameters}} if schema is not None: route[route_data[0]]['responses'][return_code]['schema'] = schema current_path = route_data[1].format(member[1].prefix) if current_path not in paths: paths[current_path] = {} paths[current_path].update(route) funs = [fun[1] for fun in inspect.getmembers(cls, predicate=inspect.ismethod) if fun[0] not in base_calls.keys()] for fun in funs: if hasattr(fun, 'bind_to_methods'): routes = {} docstring = fun.__doc__.strip().split('\n')[0] parameters = load_parameters(fun) return_code, schema = load_response(fun) name = fun.__name__ for verb in fun.bind_to_methods: routes[verb] = {'summary': docstring, 'operationId': '{0}.{1}_{2}'.format(member[1].prefix, verb, name), 'responses': {return_code: {'description': docstring}, 'default': {'description': 'Error payload', 'schema': {'$ref': '#/definitions/APIError'}}}, 'parameters': parameters} if schema is not None: routes[verb]['responses'][return_code]['schema'] = schema paths['/{0}/{{guid}}/{1}/'.format(member[1].prefix, name)] = routes # DataObject / hybrids def build_property(prop): _docstring = prop.docstring or prop.name _docstring = _docstring.replace('None', 'null').replace('True', 'true').replace('False', 'false') info = {'description': _docstring} if prop.default is not None: info['default'] = prop.default if prop.property_type == int: info['type'] = 'integer' elif prop.property_type == float: info['type'] = 'number' elif prop.property_type == long: info['type'] = 'integer' elif prop.property_type == str: info['type'] = 'string' elif prop.property_type == bool: info['type'] = 'boolean' elif prop.property_type == list: info['type'] = 'array' elif prop.property_type == dict: info['type'] = 'object' elif prop.property_type == set: info['type'] = 'array' elif isinstance(prop.property_type, list): # enumerator info['type'] = 'string' info['enum'] = prop.property_type return info def build_relation(_cls, relation): itemtype = relation.foreign_type.__name__ if relation.foreign_type is not None else _cls.__name__ _docstring = '{1} instance identifier{3}. One-to-{0} relation with {1}.{2}.'.format( 'one' if relation.onetoone is True else 'many', itemtype, ('{0}_guid' if relation.onetoone is True else '{0}_guids').format(relation.foreign_key), '' if relation.mandatory is True else ', null if relation is not set' ) info = {'description': _docstring, 'type': 'string'} return '{0}_guid'.format(relation.name), info def build_dynamic(_cls, dynamic): _docstring = dynamic.name if hasattr(_cls, '_{0}'.format(dynamic.name)): docs = getattr(_cls, '_{0}'.format(dynamic.name)).__doc__ if docs is not None: _docstring = docs.strip().split('\n')[0] _docstring = _docstring.replace('None', 'null').replace('True', 'true').replace('False', 'false') _docstring = '{0} (dynamic property, cache timeout: {1}s)'.format(_docstring, dynamic.timeout) info = {'description': _docstring, 'readOnly': True} if dynamic.return_type == int: info['type'] = 'integer' elif dynamic.return_type == float: info['type'] = 'number' elif dynamic.return_type == long: info['type'] = 'integer' elif dynamic.return_type == str: info['type'] = 'string' elif dynamic.return_type == bool: info['type'] = 'boolean' elif dynamic.return_type == list: info['type'] = 'array' elif dynamic.return_type == dict: info['type'] = 'object' elif dynamic.return_type == set: info['type'] = 'array' elif isinstance(dynamic.return_type, list): # enumerator info['type'] = 'string' info['enum'] = dynamic.return_type return info def build_remote_relation(relation): key, relation_info = relation remote_cls = Descriptor().load(relation_info['class']).get_object() _docstring = '{1} instance identifier{3}. One-to-{0} relation with {1}.{2}.'.format( 'one' if relation_info['list'] is False else 'many', remote_cls.__name__, '{0}_guid'.format(relation_info['key']), '' if relation_info['list'] is False else 's' ) info = {'description': _docstring, 'readOnly': True} if relation_info['list'] is True: info['type'] = 'array' info['items'] = {'type': 'string'} _name = '{0}_guids'.format(key) else: info['type'] = 'string' _name = '{0}_guid'.format(key) return _name, info def get_properties(_cls): properties = {} properties.update({prop.name: build_property(prop) for prop in _cls._properties}) properties.update(dict(build_relation(_cls, relation) for relation in _cls._relations)) properties.update({dynamic.name: build_dynamic(_cls, dynamic) for dynamic in _cls._dynamics}) relation_info = RelationMapper.load_foreign_relations(_cls) if relation_info is not None: properties.update(dict(build_remote_relation(relation) for relation in relation_info.iteritems())) return properties def get_required_properties(_cls): required = [] for prop in _cls._properties: if prop.mandatory is True: required.append(prop.name) for relation in _cls._relations: if relation.mandatory is True: required.append('{0}_guid'.format(relation.name)) return required definitions = data['definitions'] definitions['DataObject'] = {'type': 'object', 'title': 'DataObject', 'description': 'Root object inherited by all hybrid objects. Shall not be used directly.', 'properties': {'guid': {'type': 'string', 'description': 'Identifier of the object.'}}, 'required': ['guid']} hybrid_structure = HybridRunner.get_hybrids() for class_descriptor in hybrid_structure.values(): cls = Descriptor().load(class_descriptor).get_object() definitions[cls.__name__] = {'description': cls.__doc__.strip().split('\n')[0], 'allOf': [{'$ref': '#/definitions/DataObject'}, {'type': 'object', 'properties': get_properties(cls), 'required': get_required_properties(cls)}]} return data
import os from ConfigParser import RawConfigParser from kombu import Queue from celery import Celery from celery.signals import task_postrun, worker_process_init from ovs.lib.messaging import MessageController from ovs.log.logHandler import LogHandler from ovs.extensions.storage.volatilefactory import VolatileFactory from ovs.extensions.storage.persistentfactory import PersistentFactory from ovs.extensions.generic.system import System from ovs.extensions.generic.configuration import Configuration memcache_ini = RawConfigParser() memcache_ini.read( os.path.join(Configuration.get('ovs.core.cfgdir'), 'memcacheclient.cfg')) memcache_nodes = [ node.strip() for node in memcache_ini.get('main', 'nodes').split(',') ] memcache_servers = map(lambda n: memcache_ini.get(n, 'location'), memcache_nodes) rmq_ini = RawConfigParser() rmq_ini.read( os.path.join(Configuration.get('ovs.core.cfgdir'), 'rabbitmqclient.cfg')) rmq_nodes = [node.strip() for node in rmq_ini.get('main', 'nodes').split(',')] rmq_servers = map(lambda n: rmq_ini.get(n, 'location'), rmq_nodes) unique_id = System.get_my_machine_id() include = []
def _live_status(self): """ Retrieve the live status of the ALBA Backend to be displayed in the 'Backends' page in the GUI based on: - Maintenance agents presence - Maintenance agents status - Disk statuses :return: Status as reported by the plugin :rtype: str """ if self.backend.status == Backend.STATUSES.INSTALLING: return 'installing' if self.backend.status == Backend.STATUSES.DELETING: return 'deleting' # Verify failed disks devices = self.local_summary['devices'] if devices['red'] > 0: self._logger.warning( 'AlbaBackend {0} STATUS set to FAILURE due to {1} failed disks' .format(self.name, devices['red'])) return AlbaBackend.STATUSES.FAILURE # Verify remote OSDs remote_errors = False linked_backend_warning = False for remote_info in self.remote_stack.itervalues(): if remote_info['error'] == 'unknown' or remote_info[ 'live_status'] == AlbaBackend.STATUSES.FAILURE: message = None if remote_info['error'] == 'unknown': message = 'unknown remote error info' elif remote_info[ 'live_status'] == AlbaBackend.STATUSES.FAILURE: message = 'FAILURE in live_status' self._logger.warning( 'AlbaBackend {0} STATUS set to FAILURE due to OSD {1}: {2} ' .format(self.name, remote_info['name'], message)) return AlbaBackend.STATUSES.FAILURE if remote_info['error'] == 'not_allowed': remote_errors = True if remote_info['live_status'] == AlbaBackend.STATUSES.WARNING: linked_backend_warning = True # Retrieve ASD and maintenance service information def _get_node_information(_node): if _node not in nodes_used_by_this_backend: for slot_info in _node.stack.itervalues(): for osd_info in slot_info['osds'].itervalues(): if osd_info['claimed_by'] == self.guid: nodes_used_by_this_backend.add(_node) break if _node in nodes_used_by_this_backend: break try: services = _node.maintenance_services if self.name in services: for _service_name, _service_status in services[self.name]: services_for_this_backend[_service_name] = _node service_states[_service_name] = _service_status if _node.node_id not in services_per_node: services_per_node[_node.node_id] = 0 services_per_node[_node.node_id] += 1 except Exception: pass services_for_this_backend = {} services_per_node = {} service_states = {} nodes_used_by_this_backend = set() threads = [] all_nodes = AlbaNodeList.get_albanodes() for node in all_nodes: thread = Thread(target=_get_node_information, args=(node, )) thread.start() threads.append(thread) for thread in threads: thread.join() zero_services = False if len(services_for_this_backend) == 0: if len(all_nodes) > 0: AlbaBackend._logger.error( 'AlbaBackend {0} STATUS set to FAILURE due to no maintenance services' .format(self.name)) return AlbaBackend.STATUSES.FAILURE zero_services = True # Verify maintenance agents status for service_name, node in services_for_this_backend.iteritems(): try: service_status = service_states.get(service_name) if service_status is None or service_status != 'active': AlbaBackend._logger.error( 'AlbaBackend {0} STATUS set to FAILURE due to non-running maintenance service(s): {1}' .format(self.name, service_name)) return AlbaBackend.STATUSES.FAILURE except Exception: pass # Verify maintenance agents presence layout_key = '/ovs/alba/backends/{0}/maintenance/agents_layout'.format( self.guid) layout = None if Configuration.exists(layout_key): layout = Configuration.get(layout_key) if not isinstance(layout, list) or not any( node.node_id for node in all_nodes if node.node_id in layout): layout = None if layout is None: config_key = '/ovs/alba/backends/{0}/maintenance/nr_of_agents'.format( self.guid) expected_services = 3 if Configuration.exists(config_key): expected_services = Configuration.get(config_key) expected_services = min(expected_services, len(nodes_used_by_this_backend)) or 1 if len(services_for_this_backend) < expected_services: AlbaBackend._logger.warning( 'Live status for backend {0} is "warning": insufficient maintenance services' .format(self.name)) return AlbaBackend.STATUSES.WARNING else: for node_id in layout: if node_id not in services_per_node: AlbaBackend._logger.warning( 'Live status for backend {0} is "warning": invalid maintenance service layout' .format(self.name)) return AlbaBackend.STATUSES.WARNING # Verify local and remote OSDs if devices['orange'] > 0: AlbaBackend._logger.warning( 'Live status for backend {0} is "warning": one or more OSDs in warning' .format(self.name)) return AlbaBackend.STATUSES.WARNING if remote_errors is True or linked_backend_warning is True: AlbaBackend._logger.warning( 'Live status for backend {0} is "warning": errors/warnings on remote stack' .format(self.name)) return AlbaBackend.STATUSES.WARNING if zero_services is True: AlbaBackend._logger.warning( 'Live status for backend {0} is "warning": no maintenance services' .format(self.name)) return AlbaBackend.STATUSES.WARNING return AlbaBackend.STATUSES.RUNNING
def migrate(previous_version, master_ips=None, extra_ips=None): """ Migrates from a given version to the current version. It uses 'previous_version' to be smart wherever possible, but the code should be able to migrate any version towards the expected version. When this is not possible, the code can set a minimum version and raise when it is not met. :param previous_version: The previous version from which to start the migration :type previous_version: float :param master_ips: IP addresses of the MASTER nodes :type master_ips: list or None :param extra_ips: IP addresses of the EXTRA nodes :type extra_ips: list or None """ _ = master_ips, extra_ips working_version = previous_version # From here on, all actual migration should happen to get to the expected state for THIS RELEASE if working_version < ExtensionMigrator.THIS_VERSION: try: from ovs.dal.lists.storagerouterlist import StorageRouterList from ovs.dal.lists.vpoollist import VPoolList from ovs.extensions.generic.configuration import Configuration from ovs.extensions.services.servicefactory import ServiceFactory from ovs.extensions.generic.sshclient import SSHClient from ovs.extensions.generic.system import System local_machine_id = System.get_my_machine_id() local_ip = Configuration.get( '/ovs/framework/hosts/{0}/ip'.format(local_machine_id)) local_client = SSHClient(endpoint=local_ip, username='******') # Multiple Proxies if local_client.dir_exists( directory= '/opt/OpenvStorage/config/storagedriver/storagedriver' ): local_client.dir_delete(directories=[ '/opt/OpenvStorage/config/storagedriver/storagedriver' ]) # MDS safety granularity on vPool level mds_safety_key = '/ovs/framework/storagedriver' if Configuration.exists(key=mds_safety_key): current_mds_settings = Configuration.get( key=mds_safety_key) for vpool in VPoolList.get_vpools(): vpool_key = '/ovs/vpools/{0}'.format(vpool.guid) if Configuration.dir_exists(key=vpool_key): Configuration.set( key='{0}/mds_config'.format(vpool_key), value=current_mds_settings) Configuration.delete(key=mds_safety_key) # Introduction of edition key if Configuration.get(key=Configuration.EDITION_KEY, default=None) not in [ PackageFactory.EDITION_COMMUNITY, PackageFactory.EDITION_ENTERPRISE ]: for storagerouter in StorageRouterList.get_storagerouters( ): try: Configuration.set( key=Configuration.EDITION_KEY, value=storagerouter.features['alba'] ['edition']) break except: continue except: ExtensionMigrator._logger.exception( 'Error occurred while executing the migration code') # Don't update migration version with latest version, resulting in next migration trying again to execute this code return ExtensionMigrator.THIS_VERSION - 1 return ExtensionMigrator.THIS_VERSION
def _get_client_type(cls): return Configuration.get('/ovs/framework/stores|volatile')
def _get_client_type(cls): return Configuration.get('/ovs/framework/stores|persistent')
def _voldrv_arakoon_checkup(create_cluster): def _add_service(service_storagerouter, arakoon_ports, service_name): """ Add a service to the storage router """ new_service = Service() new_service.name = service_name new_service.type = service_type new_service.ports = arakoon_ports new_service.storagerouter = service_storagerouter new_service.save() return new_service current_ips = [] current_services = [] service_type = ServiceTypeList.get_by_name( ServiceType.SERVICE_TYPES.ARAKOON) cluster_name = Configuration.get( '/ovs/framework/arakoon_clusters').get('voldrv') if cluster_name is not None: arakoon_service_name = ArakoonInstaller.get_service_name_for_cluster( cluster_name=cluster_name) for service in service_type.services: if service.name == arakoon_service_name: current_services.append(service) if service.is_internal is True: current_ips.append(service.storagerouter.ip) all_sr_ips = [ storagerouter.ip for storagerouter in StorageRouterList.get_slaves() ] available_storagerouters = {} for storagerouter in StorageRouterList.get_masters(): storagerouter.invalidate_dynamics(['partition_config']) if len(storagerouter.partition_config[DiskPartition.ROLES.DB]) > 0: available_storagerouters[storagerouter] = DiskPartition( storagerouter.partition_config[DiskPartition.ROLES.DB][0]) all_sr_ips.append(storagerouter.ip) if create_cluster is True and len( current_services) == 0: # Create new cluster metadata = ArakoonInstaller.get_unused_arakoon_metadata_and_claim( cluster_type=ServiceType.ARAKOON_CLUSTER_TYPES.SD) if metadata is None: # No externally managed cluster found, we create 1 ourselves if not available_storagerouters: raise RuntimeError( 'Could not find any Storage Router with a DB role') storagerouter, partition = available_storagerouters.items()[0] arakoon_voldrv_cluster = 'voldrv' arakoon_installer = ArakoonInstaller( cluster_name=arakoon_voldrv_cluster) arakoon_installer.create_cluster( cluster_type=ServiceType.ARAKOON_CLUSTER_TYPES.SD, ip=storagerouter.ip, base_dir=partition.folder, log_sinks=LogHandler.get_sink_path( 'arakoon-server_{0}'.format(arakoon_voldrv_cluster)), crash_log_sinks=LogHandler.get_sink_path( 'arakoon-server-crash_{0}'.format( arakoon_voldrv_cluster))) arakoon_installer.start_cluster() ports = arakoon_installer.ports[storagerouter.ip] metadata = arakoon_installer.metadata current_ips.append(storagerouter.ip) else: ports = [] storagerouter = None cluster_name = metadata['cluster_name'] Configuration.set('/ovs/framework/arakoon_clusters|voldrv', cluster_name) StorageDriverController._logger.info( 'Claiming {0} managed arakoon cluster: {1}'.format( 'externally' if storagerouter is None else 'internally', cluster_name)) StorageDriverController._configure_arakoon_to_volumedriver( cluster_name=cluster_name) current_services.append( _add_service( service_storagerouter=storagerouter, arakoon_ports=ports, service_name=ArakoonInstaller.get_service_name_for_cluster( cluster_name=cluster_name))) cluster_name = Configuration.get( '/ovs/framework/arakoon_clusters').get('voldrv') if cluster_name is None: return metadata = ArakoonInstaller.get_arakoon_metadata_by_cluster_name( cluster_name=cluster_name) if 0 < len(current_services) < len( available_storagerouters) and metadata['internal'] is True: for storagerouter, partition in available_storagerouters.iteritems( ): if storagerouter.ip in current_ips: continue arakoon_installer = ArakoonInstaller(cluster_name=cluster_name) arakoon_installer.load() arakoon_installer.extend_cluster( new_ip=storagerouter.ip, base_dir=partition.folder, log_sinks=LogHandler.get_sink_path( 'arakoon-server_{0}'.format(cluster_name)), crash_log_sinks=LogHandler.get_sink_path( 'arakoon-server-crash_{0}'.format(cluster_name))) _add_service( service_storagerouter=storagerouter, arakoon_ports=arakoon_installer.ports[storagerouter.ip], service_name=ArakoonInstaller.get_service_name_for_cluster( cluster_name=cluster_name)) current_ips.append(storagerouter.ip) arakoon_installer.restart_cluster_after_extending( new_ip=storagerouter.ip) StorageDriverController._configure_arakoon_to_volumedriver( cluster_name=cluster_name)
def configure_rabbitmq(client, logger): """ Configure RabbitMQ :param client: Client on which to configure RabbitMQ :type client: ovs_extensions.generic.sshclient.SSHClient :param logger: Logger object used for logging :type logger: ovs.extensions.generic.logger.Logger :return: None """ Toolbox.log(logger=logger, messages='Setting up RabbitMQ') service_manager = ServiceFactory.get_manager() rabbitmq_port = Configuration.get( '/ovs/framework/messagequeue|endpoints')[0].split(':')[1] rabbitmq_login = Configuration.get('/ovs/framework/messagequeue|user') rabbitmq_password = Configuration.get( '/ovs/framework/messagequeue|password') client.file_write( '/etc/rabbitmq/rabbitmq.config', """[ {{rabbit, [{{tcp_listeners, [{0}]}}, {{default_user, <<"{1}">>}}, {{default_pass, <<"{2}">>}}, {{cluster_partition_handling, autoheal}}, {{log_levels, [{{connection, warning}}]}}, {{vm_memory_high_watermark, 0.2}}]}} ].""".format(rabbitmq_port, rabbitmq_login, rabbitmq_password)) rabbitmq_running, same_process = service_manager.is_rabbitmq_running( client=client) if rabbitmq_running is True: # Example output of 'list_users' command # Listing users ... # guest [administrator] # ovs [] # ... done. users = [ user.split('\t')[0] for user in client.run( ['rabbitmqctl', 'list_users']).splitlines() if '\t' in user and '[' in user and ']' in user ] if 'ovs' in users: Toolbox.log(logger=logger, messages='Already configured RabbitMQ') return ServiceFactory.change_service_state(client, 'rabbitmq-server', 'stop', logger) client.run(['rabbitmq-server', '-detached']) time.sleep(5) # Sometimes/At random the rabbitmq server takes longer than 5 seconds to start, # and the next command fails so the best solution is to retry several times # Also retry the add_user/set_permissions, and validate the result retry = 0 while retry < 10: users = Toolbox.retry_client_run( client=client, command=['rabbitmqctl', 'list_users'], logger=logger).splitlines() users = [ usr.split('\t')[0] for usr in users if '\t' in usr and '[' in usr and ']' in usr ] logger.debug('Rabbitmq users {0}'.format(users)) if 'ovs' in users: logger.debug('User ovs configured in rabbitmq') break logger.debug( Toolbox.retry_client_run(client=client, command=[ 'rabbitmqctl', 'add_user', rabbitmq_login, rabbitmq_password ], logger=logger)) logger.debug( Toolbox.retry_client_run(client=client, command=[ 'rabbitmqctl', 'set_permissions', rabbitmq_login, '.*', '.*', '.*' ], logger=logger)) retry += 1 time.sleep(1) client.run(['rabbitmqctl', 'stop']) time.sleep(5)