def _configure_arakoon_to_volumedriver(offline_node_ips=None): print 'Update existing vPools' logger.info('Update existing vPools') if offline_node_ips is None: offline_node_ips = [] for storagerouter in StorageRouterList.get_storagerouters(): config = ArakoonClusterConfig('voldrv') config.load_config() arakoon_nodes = [] for node in config.nodes: arakoon_nodes.append({'host': node.ip, 'port': node.client_port, 'node_id': node.name}) with Remote(storagerouter.ip, [os, RawConfigParser, EtcdConfiguration, StorageDriverConfiguration], 'ovs') as remote: configuration_dir = '{0}/storagedriver/storagedriver'.format(EtcdConfiguration.get('/ovs/framework/paths|cfgdir')) if not remote.os.path.exists(configuration_dir): remote.os.makedirs(configuration_dir) for json_file in remote.os.listdir(configuration_dir): vpool_name = json_file.replace('.json', '') if json_file.endswith('.json'): if remote.os.path.exists('{0}/{1}.cfg'.format(configuration_dir, vpool_name)): continue # There's also a .cfg file, so this is an alba_proxy configuration file storagedriver_config = remote.StorageDriverConfiguration('storagedriver', vpool_name) storagedriver_config.load() storagedriver_config.configure_volume_registry(vregistry_arakoon_cluster_id='voldrv', vregistry_arakoon_cluster_nodes=arakoon_nodes) storagedriver_config.configure_distributed_lock_store(dls_type='Arakoon', dls_arakoon_cluster_id='voldrv', dls_arakoon_cluster_nodes=arakoon_nodes) storagedriver_config.save(reload_config=True)
def _configure_arakoon_to_volumedriver(): print 'Update existing vPools' logger.info('Update existing vPools') config = ArakoonClusterConfig('voldrv') config.load_config() arakoon_nodes = [] for node in config.nodes: arakoon_nodes.append({ 'host': node.ip, 'port': node.client_port, 'node_id': node.name }) if EtcdConfiguration.dir_exists('/ovs/vpools'): for vpool_guid in EtcdConfiguration.list('/ovs/vpools'): for storagedriver_id in EtcdConfiguration.list( '/ovs/vpools/{0}/hosts'.format(vpool_guid)): storagedriver_config = StorageDriverConfiguration( 'storagedriver', vpool_guid, storagedriver_id) storagedriver_config.load() storagedriver_config.configure_volume_registry( vregistry_arakoon_cluster_id='voldrv', vregistry_arakoon_cluster_nodes=arakoon_nodes) storagedriver_config.configure_distributed_lock_store( dls_type='Arakoon', dls_arakoon_cluster_id='voldrv', dls_arakoon_cluster_nodes=arakoon_nodes) storagedriver_config.save(reload_config=True)
def _configure_arakoon_to_volumedriver(): print "Update existing vPools" logger.info("Update existing vPools") for storagerouter in StorageRouterList.get_storagerouters(): config = ArakoonClusterConfig("voldrv") config.load_config() arakoon_nodes = [] for node in config.nodes: arakoon_nodes.append({"host": node.ip, "port": node.client_port, "node_id": node.name}) with Remote( storagerouter.ip, [os, RawConfigParser, EtcdConfiguration, StorageDriverConfiguration], "ovs" ) as remote: configuration_dir = "{0}/storagedriver/storagedriver".format( EtcdConfiguration.get("/ovs/framework/paths|cfgdir") ) if not remote.os.path.exists(configuration_dir): remote.os.makedirs(configuration_dir) for json_file in remote.os.listdir(configuration_dir): vpool_name = json_file.replace(".json", "") if json_file.endswith(".json"): if remote.os.path.exists("{0}/{1}.cfg".format(configuration_dir, vpool_name)): continue # There's also a .cfg file, so this is an alba_proxy configuration file storagedriver_config = remote.StorageDriverConfiguration("storagedriver", vpool_name) storagedriver_config.load() storagedriver_config.configure_volume_registry( vregistry_arakoon_cluster_id="voldrv", vregistry_arakoon_cluster_nodes=arakoon_nodes ) storagedriver_config.configure_distributed_lock_store( dls_type="Arakoon", dls_arakoon_cluster_id="voldrv", dls_arakoon_cluster_nodes=arakoon_nodes ) storagedriver_config.save(reload_config=True)
def collapse_arakoon(): """ Collapse Arakoon's Tlogs :return: None """ ScheduledTaskController._logger.info('Starting arakoon collapse') storagerouters = StorageRouterList.get_storagerouters() cluster_info = [('cacc', storagerouters[0], True)] cluster_names = [] for service in ServiceList.get_services(): if service.is_internal is True and service.type.name in (ServiceType.SERVICE_TYPES.ARAKOON, ServiceType.SERVICE_TYPES.NS_MGR, ServiceType.SERVICE_TYPES.ALBA_MGR): cluster = service.name.replace('arakoon-', '') if cluster in cluster_names: continue cluster_names.append(cluster) cluster_info.append((cluster, service.storagerouter, False)) workload = {} for cluster, storagerouter, filesystem in cluster_info: ScheduledTaskController._logger.debug(' Collecting info for cluster {0}'.format(cluster)) config = ArakoonClusterConfig(cluster, filesystem=filesystem) config.load_config(storagerouter.ip) for node in config.nodes: if node.ip not in workload: workload[node.ip] = {'node_id': node.name, 'clusters': []} workload[node.ip]['clusters'].append((cluster, filesystem)) for storagerouter in storagerouters: try: if storagerouter.ip not in workload: continue node_workload = workload[storagerouter.ip] client = SSHClient(storagerouter) for cluster, filesystem in node_workload['clusters']: try: ScheduledTaskController._logger.debug(' Collapsing cluster {0} on {1}'.format(cluster, storagerouter.ip)) if filesystem is True: config_path = ArakoonClusterConfig.CONFIG_FILE.format(cluster) else: config_path = Configuration.get_configuration_path(ArakoonClusterConfig.CONFIG_KEY.format(cluster)) client.run(['arakoon', '--collapse-local', node_workload['node_id'], '2', '-config', config_path]) ScheduledTaskController._logger.info(' Collapsing cluster {0} on {1} completed'.format(cluster, storagerouter.ip)) except: ScheduledTaskController._logger.exception(' Collapsing cluster {0} on {1} failed'.format(cluster, storagerouter.ip)) except UnableToConnectException: ScheduledTaskController._logger.error(' Could not collapse any cluster on {0} (not reachable)'.format(storagerouter.name)) ScheduledTaskController._logger.info('Arakoon collapse finished')
def load(vpool): """ Initializes the wrapper for a given vpool :param vpool: vPool for which the ClusterRegistryClient needs to be loaded """ if os.environ.get('RUNNING_UNITTESTS') == 'True': return ClusterRegistry(str(vpool.guid), None, None) key = vpool.identifier if key not in crclient_vpool_cache: arakoon_cluster_name = str(Configuration.get('/ovs/framework/arakoon_clusters|voldrv')) config = ArakoonClusterConfig(cluster_id=arakoon_cluster_name, filesystem=False) config.load_config() arakoon_node_configs = [] for node in config.nodes: arakoon_node_configs.append(ArakoonNodeConfig(str(node.name), str(node.ip), node.client_port)) client = ClusterRegistry(str(vpool.guid), arakoon_cluster_name, arakoon_node_configs) crclient_vpool_cache[key] = client return crclient_vpool_cache[key]
def _configure_arakoon_to_volumedriver(cluster_name): StorageDriverController._logger.info('Update existing vPools') config = ArakoonClusterConfig(cluster_id=cluster_name, filesystem=False) config.load_config() arakoon_nodes = [] for node in config.nodes: arakoon_nodes.append({'host': node.ip, 'port': node.client_port, 'node_id': node.name}) if Configuration.dir_exists('/ovs/vpools'): for vpool_guid in Configuration.list('/ovs/vpools'): for storagedriver_id in Configuration.list('/ovs/vpools/{0}/hosts'.format(vpool_guid)): storagedriver_config = StorageDriverConfiguration('storagedriver', vpool_guid, storagedriver_id) storagedriver_config.load() storagedriver_config.configure_volume_registry(vregistry_arakoon_cluster_id=cluster_name, vregistry_arakoon_cluster_nodes=arakoon_nodes) storagedriver_config.configure_distributed_lock_store(dls_type='Arakoon', dls_arakoon_cluster_id=cluster_name, dls_arakoon_cluster_nodes=arakoon_nodes) storagedriver_config.save(reload_config=True)
def load(vpool): """ Initializes the wrapper for a given vpool :param vpool: vPool for which the ObjectRegistryClient needs to be loaded """ if os.environ.get('RUNNING_UNITTESTS') == 'True': return ORClient(str(vpool.guid), None, None) key = vpool.identifier if key not in oclient_vpool_cache: arakoon_node_configs = [] arakoon_cluster_name = str( Configuration.get('/ovs/framework/arakoon_clusters|voldrv')) config = ArakoonClusterConfig(cluster_id=arakoon_cluster_name, filesystem=False) config.load_config() for node in config.nodes: arakoon_node_configs.append( ArakoonNodeConfig(str(node.name), str(node.ip), node.client_port)) client = ORClient(str(vpool.guid), str(arakoon_cluster_name), arakoon_node_configs) oclient_vpool_cache[key] = client return oclient_vpool_cache[key]
def test_cluster_maintenance(self): """ Validates whether a cluster can be correctly created """ Configuration.set('/ovs/framework/hosts/1/ports', {'arakoon': [10000, 10100]}) Configuration.set('/ovs/framework/hosts/2/ports', {'arakoon': [20000, 20100]}) structure = Helper.build_service_structure( {'storagerouters': [1, 2]} ) storagerouters = structure['storagerouters'] System._machine_id = {storagerouters[1].ip: '1', storagerouters[2].ip: '2'} # Create new cluster mountpoint = storagerouters[1].disks[0].partitions[0].mountpoint if os.path.exists(mountpoint) and mountpoint != '/': shutil.rmtree(mountpoint) base_dir = mountpoint + '/test_create_cluster' info = ArakoonInstaller.create_cluster('test', ServiceType.ARAKOON_CLUSTER_TYPES.FWK, storagerouters[1].ip, base_dir) reality = Helper.extract_dir_structure(base_dir) expected = {'dirs': {'arakoon': {'dirs': {'test': {'dirs': {'tlogs': {'dirs': {}, 'files': []}, 'db': {'dirs': {}, 'files': []}}, 'files': []}}, 'files': []}}, 'files': []} self.assertDictEqual(reality, expected) expected = '{0}\n\n{1}\n\n'.format(ArakoonInstallerTester.EXPECTED_CLUSTER_CONFIG.format('1', 'test', ''), ArakoonInstallerTester.EXPECTED_NODE_CONFIG.format( '1', storagerouters[1].ip, 10000, base_dir, '1', 10001 )) self.assertEqual(Configuration.get(ArakoonInstaller.CONFIG_KEY.format('test'), raw=True), expected) # @TODO: assert service availability here. It should be stopped ArakoonInstaller.start_cluster('test', storagerouters[1].ip, filesystem=False) # @TODO: assert the service is running config = ArakoonClusterConfig('test', filesystem=False) config.load_config(storagerouters[1].ip) client = ArakoonInstaller.build_client(config) reality = client.get(ArakoonInstaller.INTERNAL_CONFIG_KEY) self.assertEqual(reality, expected) self.assertFalse(client.exists(ArakoonInstaller.METADATA_KEY)) ArakoonInstaller.claim_cluster('test', storagerouters[1].ip, filesystem=False, metadata=info['metadata']) reality = json.loads(client.get(ArakoonInstaller.METADATA_KEY)) expected = {'cluster_name': 'test', 'cluster_type': 'FWK', 'in_use': True, 'internal': True} self.assertDictEqual(reality, expected) # Extending cluster mountpoint = storagerouters[2].disks[0].partitions[0].mountpoint if os.path.exists(mountpoint) and mountpoint != '/': shutil.rmtree(mountpoint) base_dir2 = mountpoint + '/test_extend_cluster' ArakoonInstaller.extend_cluster(storagerouters[1].ip, storagerouters[2].ip, 'test', base_dir2) reality = Helper.extract_dir_structure(base_dir) expected = {'dirs': {'arakoon': {'dirs': {'test': {'dirs': {'tlogs': {'dirs': {}, 'files': []}, 'db': {'dirs': {}, 'files': []}}, 'files': []}}, 'files': []}}, 'files': []} self.assertDictEqual(reality, expected) expected = '{0}\n\n{1}\n\n{2}\n\n'.format(ArakoonInstallerTester.EXPECTED_CLUSTER_CONFIG.format('1,2', 'test', ''), ArakoonInstallerTester.EXPECTED_NODE_CONFIG.format( '1', storagerouters[1].ip, 10000, base_dir, '1', 10001 ), ArakoonInstallerTester.EXPECTED_NODE_CONFIG.format( '2', storagerouters[2].ip, 20000, base_dir2, '2', 20001 )) self.assertEqual(Configuration.get(ArakoonInstaller.CONFIG_KEY.format('test'), raw=True), expected) # @TODO: assert service availability here. It should be stopped catchup_command = 'arakoon --node 2 -config file://opt/OpenvStorage/config/framework.json?key=/ovs/arakoon/test/config -catchup-only' SSHClient._run_returns[catchup_command] = None SSHClient._run_recordings = [] ArakoonInstaller.restart_cluster_add('test', [storagerouters[1].ip], storagerouters[2].ip, filesystem=False) self.assertIn(catchup_command, SSHClient._run_recordings) # @TODO: assert the service is running config = ArakoonClusterConfig('test', filesystem=False) config.load_config(storagerouters[2].ip) client = ArakoonInstaller.build_client(config) reality = client.get(ArakoonInstaller.INTERNAL_CONFIG_KEY) self.assertEqual(reality, expected) reality = json.loads(client.get(ArakoonInstaller.METADATA_KEY)) expected = {'cluster_name': 'test', 'cluster_type': 'FWK', 'in_use': True, 'internal': True} self.assertDictEqual(reality, expected) # Shrinking cluster ArakoonInstaller.shrink_cluster(storagerouters[1].ip, storagerouters[2].ip, 'test') reality = Helper.extract_dir_structure(base_dir) expected = {'dirs': {'arakoon': {'dirs': {'test': {'dirs': {}, 'files': []}}, 'files': []}}, 'files': []} self.assertDictEqual(reality, expected) expected = '{0}\n\n{1}\n\n'.format(ArakoonInstallerTester.EXPECTED_CLUSTER_CONFIG.format('2', 'test', ''), ArakoonInstallerTester.EXPECTED_NODE_CONFIG.format( '2', storagerouters[2].ip, 20000, base_dir2, '2', 20001 )) self.assertEqual(Configuration.get(ArakoonInstaller.CONFIG_KEY.format('test'), raw=True), expected) # @TODO: assert service availability here. It should be stopped ArakoonInstaller.restart_cluster_remove('test', [storagerouters[2].ip], filesystem=False) # @TODO: assert the service is running config = ArakoonClusterConfig('test', filesystem=False) config.load_config(storagerouters[2].ip) client = ArakoonInstaller.build_client(config) reality = client.get(ArakoonInstaller.INTERNAL_CONFIG_KEY) self.assertEqual(reality, expected) reality = json.loads(client.get(ArakoonInstaller.METADATA_KEY)) expected = {'cluster_name': 'test', 'cluster_type': 'FWK', 'in_use': True, 'internal': True} self.assertDictEqual(reality, expected)
def get_update_information_core(information): """ Called when the 'Update' button in the GUI is pressed This call collects additional information about the packages which can be updated Eg: * Downtime for Arakoons * Downtime for StorageDrivers * Prerequisites that haven't been met * Services which will be stopped during update * Services which will be restarted after update """ # Verify arakoon info arakoon_ovs_info = {'down': False, 'name': None, 'internal': False} arakoon_cacc_info = {'down': False, 'name': None, 'internal': False} arakoon_voldrv_info = {'down': False, 'name': None, 'internal': False} for cluster in ['cacc', 'ovsdb', 'voldrv']: cluster_name = ArakoonClusterConfig.get_cluster_name(cluster) if cluster_name is None: continue if cluster == 'cacc': arakoon_metadata = ArakoonInstaller.get_arakoon_metadata_by_cluster_name(cluster_name=cluster_name, filesystem=True, ip=System.get_my_storagerouter().ip) else: arakoon_metadata = ArakoonInstaller.get_arakoon_metadata_by_cluster_name(cluster_name=cluster_name) if arakoon_metadata['internal'] is True: config = ArakoonClusterConfig(cluster_id=cluster_name, filesystem=(cluster == 'cacc')) config.load_config(System.get_my_storagerouter().ip if cluster == 'cacc' else None) if cluster == 'ovsdb': arakoon_ovs_info['down'] = len(config.nodes) < 3 arakoon_ovs_info['name'] = arakoon_metadata['cluster_name'] arakoon_ovs_info['internal'] = True elif cluster == 'voldrv': arakoon_voldrv_info['down'] = len(config.nodes) < 3 arakoon_voldrv_info['name'] = arakoon_metadata['cluster_name'] arakoon_voldrv_info['internal'] = True else: arakoon_cacc_info['name'] = arakoon_metadata['cluster_name'] arakoon_cacc_info['internal'] = True # Verify StorageRouter downtime prerequisites = [] all_storagerouters = StorageRouterList.get_storagerouters() for storagerouter in all_storagerouters: try: SSHClient(endpoint=storagerouter, username='******') except UnableToConnectException: prerequisites.append(['node_down', storagerouter.name]) for key in ['framework', 'storagedriver']: if key not in information: information[key] = {'packages': {}, 'downtime': [], 'prerequisites': prerequisites, 'services_stop_start': set(), 'services_post_update': set()} for storagerouter in all_storagerouters: if key not in storagerouter.package_information: continue # Retrieve ALBA proxy issues alba_services = [] alba_downtime = [] for service in storagerouter.services: if service.type.name != ServiceType.SERVICE_TYPES.ALBA_PROXY or service.alba_proxy is None: continue alba_services.append(service.name) alba_downtime.append(['proxy', service.alba_proxy.storagedriver.vpool.name]) # Retrieve StorageDriver issues storagedriver_downtime = [] storagedriver_services = [] for sd in storagerouter.storagedrivers: # Order of services is important, first we want to stop all volume-drivers, then DTLs storagedriver_services.append('ovs-volumedriver_{0}'.format(sd.vpool.name)) for sd in storagerouter.storagedrivers: storagedriver_services.append('ovs-dtl_{0}'.format(sd.vpool.name)) if len(sd.vdisks_guids) > 0: storagedriver_downtime.append(['voldrv', sd.vpool.name]) # Retrieve the actual update information for package_name, package_info in storagerouter.package_information[key].iteritems(): if package_name not in UpdateController.all_core_packages: continue # Only gather information for the core packages information[key]['services_post_update'].update(package_info.pop('services_to_restart')) if package_name not in information[key]['packages']: information[key]['packages'][package_name] = {} information[key]['packages'][package_name].update(package_info) if package_name == 'openvstorage': if ['gui', None] not in information[key]['downtime']: information[key]['downtime'].append(['gui', None]) if ['api', None] not in information[key]['downtime']: information[key]['downtime'].append(['api', None]) information[key]['services_stop_start'].update({'watcher-framework', 'memcached'}) elif package_name == 'alba': for down in alba_downtime: if down not in information[key]['downtime']: information[key]['downtime'].append(down) information[key]['services_post_update'].update(alba_services) elif package_name == 'volumedriver-no-dedup-base': for down in storagedriver_downtime: if down not in information[key]['downtime']: information[key]['downtime'].append(down) information[key]['services_post_update'].update(storagedriver_services) elif package_name == 'volumedriver-no-dedup-server': for down in storagedriver_downtime: if down not in information[key]['downtime']: information[key]['downtime'].append(down) information[key]['services_post_update'].update(storagedriver_services) elif package_name == 'arakoon': if key == 'framework': framework_arakoons = set() if arakoon_ovs_info['internal'] is True: framework_arakoons.add('ovs-arakoon-{0}'.format(arakoon_ovs_info['name'])) if arakoon_cacc_info['internal'] is True: framework_arakoons.add('ovs-arakoon-{0}'.format(arakoon_cacc_info['name'])) information[key]['services_post_update'].update(framework_arakoons) if arakoon_ovs_info['down'] is True and ['ovsdb', None] not in information[key]['downtime']: information[key]['downtime'].append(['ovsdb', None]) elif arakoon_voldrv_info['internal'] is True: information[key]['services_post_update'].update({'ovs-arakoon-{0}'.format(arakoon_voldrv_info['name'])}) if arakoon_voldrv_info['down'] is True and ['voldrv', None] not in information[key]['downtime']: information[key]['downtime'].append(['voldrv', None]) return information
def get_update_information_alba_plugin(information): """ Called when the 'Update' button in the GUI is pressed This call collects additional information about the packages which can be updated Eg: * Downtime for Arakoons * Downtime for StorageDrivers * Prerequisites that haven't been met * Services which will be stopped during update * Services which will be restarted after update """ # Verify arakoon info arakoon_ovs_info = {'down': False, 'name': None, 'internal': False} arakoon_cacc_info = {'down': False, 'name': None, 'internal': False} for cluster in ['cacc', 'ovsdb']: cluster_name = ArakoonClusterConfig.get_cluster_name(cluster) if cluster_name is None: continue if cluster == 'cacc': arakoon_metadata = ArakoonInstaller.get_arakoon_metadata_by_cluster_name(cluster_name=cluster_name, filesystem=True, ip=System.get_my_storagerouter().ip) else: arakoon_metadata = ArakoonInstaller.get_arakoon_metadata_by_cluster_name(cluster_name=cluster_name) if arakoon_metadata['internal'] is True: config = ArakoonClusterConfig(cluster_id=cluster_name, filesystem=(cluster == 'cacc')) config.load_config(System.get_my_storagerouter().ip if cluster == 'cacc' else None) if cluster == 'ovsdb': arakoon_ovs_info['down'] = len(config.nodes) < 3 arakoon_ovs_info['name'] = arakoon_metadata['cluster_name'] arakoon_ovs_info['internal'] = True else: arakoon_cacc_info['name'] = arakoon_metadata['cluster_name'] arakoon_cacc_info['internal'] = True # Verify StorageRouter downtime fwk_prerequisites = [] all_storagerouters = StorageRouterList.get_storagerouters() for storagerouter in all_storagerouters: try: SSHClient(endpoint=storagerouter, username='******') except UnableToConnectException: fwk_prerequisites.append(['node_down', storagerouter.name]) # Verify ALBA node responsiveness alba_prerequisites = [] for alba_node in AlbaNodeList.get_albanodes(): try: alba_node.client.get_metadata() except Exception: alba_prerequisites.append(['alba_node_unresponsive', alba_node.ip]) for key in ['framework', 'alba']: if key not in information: information[key] = {'packages': {}, 'downtime': [], 'prerequisites': fwk_prerequisites if key == 'framework' else alba_prerequisites, 'services_stop_start': set(), 'services_post_update': set()} for storagerouter in StorageRouterList.get_storagerouters(): if key not in storagerouter.package_information: continue # Retrieve Arakoon issues arakoon_downtime = [] arakoon_services = [] for service in storagerouter.services: if service.type.name not in [ServiceType.SERVICE_TYPES.ALBA_MGR, ServiceType.SERVICE_TYPES.NS_MGR]: continue if service.type.name == ServiceType.SERVICE_TYPES.ALBA_MGR: cluster_name = AlbaController.get_abm_cluster_name(alba_backend=service.abm_service.alba_backend) else: cluster_name = AlbaController.get_nsm_cluster_name(alba_backend=service.nsm_service.alba_backend, number=service.nsm_service.number) if Configuration.exists('/ovs/arakoon/{0}/config'.format(cluster_name), raw=True) is False: continue arakoon_metadata = ArakoonInstaller.get_arakoon_metadata_by_cluster_name(cluster_name=cluster_name) if arakoon_metadata['internal'] is True: arakoon_services.append('ovs-{0}'.format(service.name)) config = ArakoonClusterConfig(cluster_id=cluster_name, filesystem=False) config.load_config() if len(config.nodes) < 3: if service.type.name == ServiceType.SERVICE_TYPES.NS_MGR: arakoon_downtime.append(['backend', service.nsm_service.alba_backend.name]) else: arakoon_downtime.append(['backend', service.abm_service.alba_backend.name]) for package_name, package_info in storagerouter.package_information[key].iteritems(): if package_name not in AlbaUpdateController.alba_plugin_packages: continue # Only gather information for the core packages information[key]['services_post_update'].update(package_info.pop('services_to_restart')) if package_name not in information[key]['packages']: information[key]['packages'][package_name] = {} information[key]['packages'][package_name].update(package_info) if package_name == 'openvstorage-backend': if ['gui', None] not in information[key]['downtime']: information[key]['downtime'].append(['gui', None]) if ['api', None] not in information[key]['downtime']: information[key]['downtime'].append(['api', None]) information[key]['services_stop_start'].update({'watcher-framework', 'memcached'}) elif package_name == 'alba': for down in arakoon_downtime: if down not in information[key]['downtime']: information[key]['downtime'].append(down) information[key]['services_post_update'].update(arakoon_services) elif package_name == 'arakoon': if key == 'framework': framework_arakoons = set() if arakoon_ovs_info['internal'] is True: framework_arakoons.add('ovs-arakoon-{0}'.format(arakoon_ovs_info['name'])) if arakoon_cacc_info['internal'] is True: framework_arakoons.add('ovs-arakoon-{0}'.format(arakoon_cacc_info['name'])) information[key]['services_post_update'].update(framework_arakoons) if arakoon_ovs_info['down'] is True and ['ovsdb', None] not in information[key]['downtime']: information[key]['downtime'].append(['ovsdb', None]) else: for down in arakoon_downtime: if down not in information[key]['downtime']: information[key]['downtime'].append(down) information[key]['services_post_update'].update(arakoon_services) for alba_node in AlbaNodeList.get_albanodes(): for package_name, package_info in alba_node.package_information.get(key, {}).iteritems(): if package_name not in AlbaUpdateController.sdm_packages: continue # Only gather information for the SDM packages information[key]['services_post_update'].update(package_info.pop('services_to_restart')) if package_name not in information[key]['packages']: information[key]['packages'][package_name] = {} information[key]['packages'][package_name].update(package_info) return information
def promote_or_demote_node(node_action, cluster_ip=None, execute_rollback=False): """ Promotes or demotes the local node :param node_action: Demote or promote :type node_action: str :param cluster_ip: IP of node to promote or demote :type cluster_ip: str :param execute_rollback: In case of failure revert the changes made :type execute_rollback: bool :return: None """ if node_action not in ('promote', 'demote'): raise ValueError('Nodes can only be promoted or demoted') Toolbox.log(logger=NodeTypeController._logger, messages='Open vStorage Setup - {0}'.format(node_action.capitalize()), boxed=True) try: Toolbox.log(logger=NodeTypeController._logger, messages='Collecting information', title=True) machine_id = System.get_my_machine_id() if Configuration.get('/ovs/framework/hosts/{0}/setupcompleted'.format(machine_id)) is False: raise RuntimeError('No local OVS setup found.') if cluster_ip and not re.match(Toolbox.regex_ip, cluster_ip): raise RuntimeError('Incorrect IP provided ({0})'.format(cluster_ip)) if cluster_ip: client = SSHClient(endpoint=cluster_ip) machine_id = System.get_my_machine_id(client) node_type = Configuration.get('/ovs/framework/hosts/{0}/type'.format(machine_id)) if node_action == 'promote' and node_type == 'MASTER': raise RuntimeError('This node is already master.') elif node_action == 'demote' and node_type == 'EXTRA': raise RuntimeError('This node should be a master.') elif node_type not in ['MASTER', 'EXTRA']: raise RuntimeError('This node is not correctly configured.') master_ip = None offline_nodes = [] online = True target_client = None if node_action == 'demote' and cluster_ip: # Demote an offline node from ovs.dal.lists.storagerouterlist import StorageRouterList from ovs.lib.storagedriver import StorageDriverController ip = cluster_ip unique_id = None ip_client_map = {} for storage_router in StorageRouterList.get_storagerouters(): try: client = SSHClient(storage_router.ip, username='******') if storage_router.node_type == 'MASTER': master_ip = storage_router.ip ip_client_map[storage_router.ip] = client except UnableToConnectException: if storage_router.ip == cluster_ip: online = False unique_id = storage_router.machine_id StorageDriverController.mark_offline(storagerouter_guid=storage_router.guid) offline_nodes.append(storage_router) if online is True: raise RuntimeError("If the node is online, please use 'ovs setup demote' executed on the node you wish to demote") if master_ip is None: raise RuntimeError('Failed to retrieve another responsive MASTER node') else: target_password = Toolbox.ask_validate_password(ip='127.0.0.1', logger=NodeTypeController._logger) target_client = SSHClient('127.0.0.1', username='******', password=target_password) unique_id = System.get_my_machine_id(target_client) ip = Configuration.get('/ovs/framework/hosts/{0}/ip'.format(unique_id)) storagerouter_info = NodeTypeController.retrieve_storagerouter_info_via_host(ip=target_client.ip, password=target_password) node_ips = [sr_info['ip'] for sr_info in storagerouter_info.itervalues()] master_node_ips = [sr_info['ip'] for sr_info in storagerouter_info.itervalues() if sr_info['type'] == 'master' and sr_info['ip'] != ip] if len(master_node_ips) == 0: if node_action == 'promote': raise RuntimeError('No master node could be found') else: raise RuntimeError('It is not possible to remove the only master') master_ip = master_node_ips[0] ip_client_map = dict((node_ip, SSHClient(node_ip, username='******')) for node_ip in node_ips) if node_action == 'demote': for cluster_name in Configuration.list('/ovs/arakoon'): config = ArakoonClusterConfig(cluster_name, False) config.load_config() arakoon_client = ArakoonInstaller.build_client(config) metadata = json.loads(arakoon_client.get(ArakoonInstaller.METADATA_KEY)) if len(config.nodes) == 1 and config.nodes[0].ip == ip and metadata.get('internal') is True: raise RuntimeError('Demote is not supported when single node Arakoon cluster(s) are present on the node to be demoted.') configure_rabbitmq = Toolbox.is_service_internally_managed(service='rabbitmq') configure_memcached = Toolbox.is_service_internally_managed(service='memcached') if node_action == 'promote': try: NodeTypeController.promote_node(cluster_ip=ip, master_ip=master_ip, ip_client_map=ip_client_map, unique_id=unique_id, configure_memcached=configure_memcached, configure_rabbitmq=configure_rabbitmq) except Exception: if execute_rollback is True: NodeTypeController.demote_node(cluster_ip=ip, master_ip=master_ip, ip_client_map=ip_client_map, unique_id=unique_id, unconfigure_memcached=configure_memcached, unconfigure_rabbitmq=configure_rabbitmq, offline_nodes=offline_nodes) elif target_client is not None: target_client.file_write('/tmp/ovs_rollback', 'demote') raise else: try: NodeTypeController.demote_node(cluster_ip=ip, master_ip=master_ip, ip_client_map=ip_client_map, unique_id=unique_id, unconfigure_memcached=configure_memcached, unconfigure_rabbitmq=configure_rabbitmq, offline_nodes=offline_nodes) except Exception: if execute_rollback is True: NodeTypeController.promote_node(cluster_ip=ip, master_ip=master_ip, ip_client_map=ip_client_map, unique_id=unique_id, configure_memcached=configure_memcached, configure_rabbitmq=configure_rabbitmq) elif target_client is not None: target_client.file_write('/tmp/ovs_rollback', 'promote') raise Toolbox.log(logger=NodeTypeController._logger, messages='\n') Toolbox.log(logger=NodeTypeController._logger, messages='{0} complete.'.format(node_action.capitalize()), boxed=True) except Exception as exception: Toolbox.log(logger=NodeTypeController._logger, messages='\n') Toolbox.log(logger=NodeTypeController._logger, messages=['An unexpected error occurred:', str(exception)], boxed=True, loglevel='exception') sys.exit(1) except KeyboardInterrupt: Toolbox.log(logger=NodeTypeController._logger, messages='\n') Toolbox.log(logger=NodeTypeController._logger, messages='This setup was aborted. Open vStorage may be in an inconsistent state, make sure to validate the installation.', boxed=True, loglevel='error') sys.exit(1)
def demote_node(cluster_ip, master_ip, ip_client_map, unique_id, unconfigure_memcached, unconfigure_rabbitmq, offline_nodes=None): """ Demotes a given node """ from ovs.dal.lists.storagerouterlist import StorageRouterList Toolbox.log(logger=NodeTypeController._logger, messages='Demoting node', title=True) if offline_nodes is None: offline_nodes = [] if unconfigure_memcached is True and len(offline_nodes) == 0: if NodeTypeController._validate_local_memcache_servers(ip_client_map) is False: raise RuntimeError('Not all memcache nodes can be reached which is required for demoting a node.') # Find other (arakoon) master nodes arakoon_cluster_name = str(Configuration.get('/ovs/framework/arakoon_clusters|ovsdb')) arakoon_metadata = ArakoonInstaller.get_arakoon_metadata_by_cluster_name(cluster_name=arakoon_cluster_name) config = ArakoonClusterConfig(cluster_id=arakoon_cluster_name, filesystem=False) config.load_config() master_node_ips = [node.ip for node in config.nodes] if cluster_ip in master_node_ips: master_node_ips.remove(cluster_ip) if len(master_node_ips) == 0: raise RuntimeError('There should be at least one other master node') storagerouter = StorageRouterList.get_by_machine_id(unique_id) storagerouter.node_type = 'EXTRA' storagerouter.save() offline_node_ips = [node.ip for node in offline_nodes] if arakoon_metadata['internal'] is True: Toolbox.log(logger=NodeTypeController._logger, messages='Leaving Arakoon {0} cluster'.format(arakoon_cluster_name)) ArakoonInstaller.shrink_cluster(deleted_node_ip=cluster_ip, remaining_node_ips=master_node_ips, cluster_name=arakoon_cluster_name, offline_nodes=offline_node_ips) try: external_config = Configuration.get('/ovs/framework/external_config') if external_config is None: config_store = Configuration.get_store() if config_store == 'arakoon': Toolbox.log(logger=NodeTypeController._logger, messages='Leaving Arakoon config cluster') ArakoonInstaller.shrink_cluster(deleted_node_ip=cluster_ip, remaining_node_ips=master_node_ips, cluster_name='config', offline_nodes=offline_node_ips, filesystem=True) else: from ovs.extensions.db.etcd.installer import EtcdInstaller Toolbox.log(logger=NodeTypeController._logger, messages='Leaving Etcd cluster') EtcdInstaller.shrink_cluster(master_ip, cluster_ip, 'config', offline_node_ips) except Exception as ex: Toolbox.log(logger=NodeTypeController._logger, messages=['\nFailed to leave configuration cluster', ex], loglevel='exception') Toolbox.log(logger=NodeTypeController._logger, messages='Update configurations') try: if unconfigure_memcached is True: endpoints = Configuration.get('/ovs/framework/memcache|endpoints') endpoint = '{0}:{1}'.format(cluster_ip, 11211) if endpoint in endpoints: endpoints.remove(endpoint) Configuration.set('/ovs/framework/memcache|endpoints', endpoints) if unconfigure_rabbitmq is True: endpoints = Configuration.get('/ovs/framework/messagequeue|endpoints') endpoint = '{0}:{1}'.format(cluster_ip, 5672) if endpoint in endpoints: endpoints.remove(endpoint) Configuration.set('/ovs/framework/messagequeue|endpoints', endpoints) except Exception as ex: Toolbox.log(logger=NodeTypeController._logger, messages=['\nFailed to update configurations', ex], loglevel='exception') if arakoon_metadata['internal'] is True: Toolbox.log(logger=NodeTypeController._logger, messages='Restarting master node services') remaining_nodes = ip_client_map.keys()[:] if cluster_ip in remaining_nodes: remaining_nodes.remove(cluster_ip) PersistentFactory.store = None VolatileFactory.store = None for service in storagerouter.services: if service.name == 'arakoon-ovsdb': service.delete() target_client = None if storagerouter in offline_nodes: if unconfigure_rabbitmq is True: Toolbox.log(logger=NodeTypeController._logger, messages='Removing/unconfiguring offline RabbitMQ node') client = ip_client_map[master_ip] try: client.run(['rabbitmqctl', 'forget_cluster_node', 'rabbit@{0}'.format(storagerouter.name)]) except Exception as ex: Toolbox.log(logger=NodeTypeController._logger, messages=['\nFailed to forget RabbitMQ cluster node', ex], loglevel='exception') else: target_client = ip_client_map[cluster_ip] if unconfigure_rabbitmq is True: Toolbox.log(logger=NodeTypeController._logger, messages='Removing/unconfiguring RabbitMQ') try: if ServiceManager.has_service('rabbitmq-server', client=target_client): Toolbox.change_service_state(target_client, 'rabbitmq-server', 'stop', NodeTypeController._logger) target_client.run(['rabbitmq-server', '-detached']) time.sleep(5) target_client.run(['rabbitmqctl', 'stop_app']) time.sleep(5) target_client.run(['rabbitmqctl', 'reset']) time.sleep(5) target_client.run(['rabbitmqctl', 'stop']) time.sleep(5) target_client.file_unlink("/var/lib/rabbitmq/.erlang.cookie") Toolbox.change_service_state(target_client, 'rabbitmq-server', 'stop', NodeTypeController._logger) # To be sure except Exception as ex: Toolbox.log(logger=NodeTypeController._logger, messages=['\nFailed to remove/unconfigure RabbitMQ', ex], loglevel='exception') Toolbox.log(logger=NodeTypeController._logger, messages='Stopping services') services = ['memcached', 'rabbitmq-server'] if unconfigure_rabbitmq is False: services.remove('rabbitmq-server') if unconfigure_memcached is False: services.remove('memcached') for service in services: if ServiceManager.has_service(service, client=target_client): Toolbox.log(logger=NodeTypeController._logger, messages='Stopping service {0}'.format(service)) try: Toolbox.change_service_state(target_client, service, 'stop', NodeTypeController._logger) except Exception as ex: Toolbox.log(logger=NodeTypeController._logger, messages=['\nFailed to stop service'.format(service), ex], loglevel='exception') Toolbox.log(logger=NodeTypeController._logger, messages='Removing services') services = ['scheduled-tasks', 'webapp-api', 'volumerouter-consumer'] for service in services: if ServiceManager.has_service(service, client=target_client): Toolbox.log(logger=NodeTypeController._logger, messages='Removing service {0}'.format(service)) try: Toolbox.change_service_state(target_client, service, 'stop', NodeTypeController._logger) ServiceManager.remove_service(service, client=target_client) except Exception as ex: Toolbox.log(logger=NodeTypeController._logger, messages=['\nFailed to remove service'.format(service), ex], loglevel='exception') if ServiceManager.has_service('workers', client=target_client): ServiceManager.add_service(name='workers', client=target_client, params={'WORKER_QUEUE': '{0}'.format(unique_id)}) try: NodeTypeController._configure_amqp_to_volumedriver() except Exception as ex: Toolbox.log(logger=NodeTypeController._logger, messages=['\nFailed to configure AMQP to Storage Driver', ex], loglevel='exception') Toolbox.log(logger=NodeTypeController._logger, messages='Restarting services') NodeTypeController.restart_framework_and_memcache_services(clients=ip_client_map, logger=NodeTypeController._logger, offline_node_ips=offline_node_ips) if Toolbox.run_hooks(component='nodetype', sub_component='demote', logger=NodeTypeController._logger, cluster_ip=cluster_ip, master_ip=master_ip, offline_node_ips=offline_node_ips): Toolbox.log(logger=NodeTypeController._logger, messages='Restarting services') NodeTypeController.restart_framework_and_memcache_services(clients=ip_client_map, logger=NodeTypeController._logger, offline_node_ips=offline_node_ips) if storagerouter not in offline_nodes: target_client = ip_client_map[cluster_ip] node_name, _ = target_client.get_hostname() if NodeTypeController.avahi_installed(client=target_client, logger=NodeTypeController._logger) is True: NodeTypeController.configure_avahi(client=target_client, node_name=node_name, node_type='extra', logger=NodeTypeController._logger) Configuration.set('/ovs/framework/hosts/{0}/type'.format(storagerouter.machine_id), 'EXTRA') if target_client is not None and target_client.file_exists('/tmp/ovs_rollback'): target_client.file_write('/tmp/ovs_rollback', 'rollback') Toolbox.log(logger=NodeTypeController._logger, messages='Demote complete', title=True)
def promote_node(cluster_ip, master_ip, ip_client_map, unique_id, configure_memcached, configure_rabbitmq): """ Promotes a given node """ from ovs.dal.lists.storagerouterlist import StorageRouterList from ovs.dal.lists.servicetypelist import ServiceTypeList from ovs.dal.lists.servicelist import ServiceList from ovs.dal.hybrids.service import Service Toolbox.log(logger=NodeTypeController._logger, messages='Promoting node', title=True) if configure_memcached is True: if NodeTypeController._validate_local_memcache_servers(ip_client_map) is False: raise RuntimeError('Not all memcache nodes can be reached which is required for promoting a node.') target_client = ip_client_map[cluster_ip] machine_id = System.get_my_machine_id(target_client) node_name, _ = target_client.get_hostname() master_client = ip_client_map[master_ip] storagerouter = StorageRouterList.get_by_machine_id(unique_id) storagerouter.node_type = 'MASTER' storagerouter.save() external_config = Configuration.get('/ovs/framework/external_config') if external_config is None: config_store = Configuration.get_store() if config_store == 'arakoon': Toolbox.log(logger=NodeTypeController._logger, messages='Joining Arakoon configuration cluster') metadata = ArakoonInstaller.extend_cluster(master_ip=master_ip, new_ip=cluster_ip, cluster_name='config', base_dir=Configuration.get('/ovs/framework/paths|ovsdb'), ports=[26400, 26401], filesystem=True) ArakoonInstaller.restart_cluster_add(cluster_name='config', current_ips=metadata['ips'], new_ip=cluster_ip, filesystem=True) ServiceManager.register_service(node_name=machine_id, service_metadata=metadata['service_metadata']) else: from ovs.extensions.db.etcd.installer import EtcdInstaller Toolbox.log(logger=NodeTypeController._logger, messages='Joining Etcd cluster') EtcdInstaller.extend_cluster(master_ip, cluster_ip, 'config') # Find other (arakoon) master nodes arakoon_cluster_name = str(Configuration.get('/ovs/framework/arakoon_clusters|ovsdb')) arakoon_metadata = ArakoonInstaller.get_arakoon_metadata_by_cluster_name(cluster_name=arakoon_cluster_name) config = ArakoonClusterConfig(cluster_id=arakoon_cluster_name, filesystem=False) config.load_config() master_node_ips = [node.ip for node in config.nodes] if cluster_ip in master_node_ips: master_node_ips.remove(cluster_ip) if len(master_node_ips) == 0: raise RuntimeError('There should be at least one other master node') arakoon_ports = [] if arakoon_metadata['internal'] is True: Toolbox.log(logger=NodeTypeController._logger, messages='Joining Arakoon OVS DB cluster') result = ArakoonInstaller.extend_cluster(master_ip=master_ip, new_ip=cluster_ip, cluster_name=arakoon_cluster_name, base_dir=Configuration.get('/ovs/framework/paths|ovsdb')) ArakoonInstaller.restart_cluster_add(cluster_name=arakoon_cluster_name, current_ips=result['ips'], new_ip=cluster_ip, filesystem=False) arakoon_ports = [result['client_port'], result['messaging_port']] if configure_memcached is True: NodeTypeController.configure_memcached(client=target_client, logger=NodeTypeController._logger) NodeTypeController.add_services(client=target_client, node_type='master', logger=NodeTypeController._logger) Toolbox.log(logger=NodeTypeController._logger, messages='Update configurations') if configure_memcached is True: endpoints = Configuration.get('/ovs/framework/memcache|endpoints') endpoint = '{0}:11211'.format(cluster_ip) if endpoint not in endpoints: endpoints.append(endpoint) Configuration.set('/ovs/framework/memcache|endpoints', endpoints) if configure_rabbitmq is True: endpoints = Configuration.get('/ovs/framework/messagequeue|endpoints') endpoint = '{0}:5672'.format(cluster_ip) if endpoint not in endpoints: endpoints.append(endpoint) Configuration.set('/ovs/framework/messagequeue|endpoints', endpoints) if arakoon_metadata['internal'] is True: Toolbox.log(logger=NodeTypeController._logger, messages='Restarting master node services') ArakoonInstaller.restart_cluster_add(cluster_name=arakoon_cluster_name, current_ips=master_node_ips, new_ip=cluster_ip, filesystem=False) PersistentFactory.store = None VolatileFactory.store = None if 'arakoon-ovsdb' not in [s.name for s in ServiceList.get_services() if s.is_internal is False or s.storagerouter.ip == cluster_ip]: service = Service() service.name = 'arakoon-ovsdb' service.type = ServiceTypeList.get_by_name(ServiceType.SERVICE_TYPES.ARAKOON) service.ports = arakoon_ports service.storagerouter = storagerouter service.save() if configure_rabbitmq is True: NodeTypeController.configure_rabbitmq(client=target_client, logger=NodeTypeController._logger) # Copy rabbitmq cookie rabbitmq_cookie_file = '/var/lib/rabbitmq/.erlang.cookie' Toolbox.log(logger=NodeTypeController._logger, messages='Copying Rabbit MQ cookie') contents = master_client.file_read(rabbitmq_cookie_file) master_hostname, _ = master_client.get_hostname() target_client.dir_create(os.path.dirname(rabbitmq_cookie_file)) target_client.file_write(rabbitmq_cookie_file, contents) target_client.file_chmod(rabbitmq_cookie_file, mode=400) target_client.run(['rabbitmq-server', '-detached']) time.sleep(5) target_client.run(['rabbitmqctl', 'stop_app']) time.sleep(5) target_client.run(['rabbitmqctl', 'join_cluster', 'rabbit@{0}'.format(master_hostname)]) time.sleep(5) target_client.run(['rabbitmqctl', 'stop']) time.sleep(5) # Enable HA for the rabbitMQ queues Toolbox.change_service_state(target_client, 'rabbitmq-server', 'start', NodeTypeController._logger) NodeTypeController.check_rabbitmq_and_enable_ha_mode(client=target_client, logger=NodeTypeController._logger) NodeTypeController._configure_amqp_to_volumedriver() Toolbox.log(logger=NodeTypeController._logger, messages='Starting services') services = ['memcached', 'arakoon-ovsdb', 'rabbitmq-server', 'etcd-config'] if arakoon_metadata['internal'] is True: services.remove('arakoon-ovsdb') for service in services: if ServiceManager.has_service(service, client=target_client): Toolbox.change_service_state(target_client, service, 'start', NodeTypeController._logger) Toolbox.log(logger=NodeTypeController._logger, messages='Restarting services') NodeTypeController.restart_framework_and_memcache_services(clients=ip_client_map, logger=NodeTypeController._logger) if Toolbox.run_hooks(component='nodetype', sub_component='promote', logger=NodeTypeController._logger, cluster_ip=cluster_ip, master_ip=master_ip): Toolbox.log(logger=NodeTypeController._logger, messages='Restarting services') NodeTypeController.restart_framework_and_memcache_services(clients=ip_client_map, logger=NodeTypeController._logger) if NodeTypeController.avahi_installed(client=target_client, logger=NodeTypeController._logger) is True: NodeTypeController.configure_avahi(client=target_client, node_name=node_name, node_type='master', logger=NodeTypeController._logger) Configuration.set('/ovs/framework/hosts/{0}/type'.format(machine_id), 'MASTER') target_client.run(['chown', '-R', 'ovs:ovs', '/opt/OpenvStorage/config']) Configuration.set('/ovs/framework/hosts/{0}/promotecompleted'.format(machine_id), True) if target_client.file_exists('/tmp/ovs_rollback'): target_client.file_delete('/tmp/ovs_rollback') Toolbox.log(logger=NodeTypeController._logger, messages='Promote complete')
def test_cluster_maintenance(self): """ Validates whether a cluster can be correctly created """ Configuration.set('/ovs/framework/hosts/1/ports', {'arakoon': [10000, 10100]}) Configuration.set('/ovs/framework/hosts/2/ports', {'arakoon': [20000, 20100]}) structure = Helper.build_service_structure({'storagerouters': [1, 2]}) storagerouters = structure['storagerouters'] System._machine_id = { storagerouters[1].ip: '1', storagerouters[2].ip: '2' } # Create new cluster mountpoint = storagerouters[1].disks[0].partitions[0].mountpoint if os.path.exists(mountpoint) and mountpoint != '/': shutil.rmtree(mountpoint) base_dir = mountpoint + '/test_create_cluster' info = ArakoonInstaller.create_cluster( 'test', ServiceType.ARAKOON_CLUSTER_TYPES.FWK, storagerouters[1].ip, base_dir) reality = Helper.extract_dir_structure(base_dir) expected = { 'dirs': { 'arakoon': { 'dirs': { 'test': { 'dirs': { 'tlogs': { 'dirs': {}, 'files': [] }, 'db': { 'dirs': {}, 'files': [] } }, 'files': [] } }, 'files': [] } }, 'files': [] } self.assertDictEqual(reality, expected) expected = '{0}\n\n{1}\n\n'.format( ArakoonInstallerTester.EXPECTED_CLUSTER_CONFIG.format( '1', 'test', ''), ArakoonInstallerTester.EXPECTED_NODE_CONFIG.format( '1', storagerouters[1].ip, 10000, base_dir, '1', 10001)) self.assertEqual( Configuration.get(ArakoonInstaller.CONFIG_KEY.format('test'), raw=True), expected) # @TODO: assert service availability here. It should be stopped ArakoonInstaller.start_cluster('test', storagerouters[1].ip, filesystem=False) # @TODO: assert the service is running config = ArakoonClusterConfig('test', filesystem=False) config.load_config(storagerouters[1].ip) client = ArakoonInstaller.build_client(config) reality = client.get(ArakoonInstaller.INTERNAL_CONFIG_KEY) self.assertEqual(reality, expected) self.assertFalse(client.exists(ArakoonInstaller.METADATA_KEY)) ArakoonInstaller.claim_cluster('test', storagerouters[1].ip, filesystem=False, metadata=info['metadata']) reality = json.loads(client.get(ArakoonInstaller.METADATA_KEY)) expected = { 'cluster_name': 'test', 'cluster_type': 'FWK', 'in_use': True, 'internal': True } self.assertDictEqual(reality, expected) # Extending cluster mountpoint = storagerouters[2].disks[0].partitions[0].mountpoint if os.path.exists(mountpoint) and mountpoint != '/': shutil.rmtree(mountpoint) base_dir2 = mountpoint + '/test_extend_cluster' ArakoonInstaller.extend_cluster(storagerouters[1].ip, storagerouters[2].ip, 'test', base_dir2) reality = Helper.extract_dir_structure(base_dir) expected = { 'dirs': { 'arakoon': { 'dirs': { 'test': { 'dirs': { 'tlogs': { 'dirs': {}, 'files': [] }, 'db': { 'dirs': {}, 'files': [] } }, 'files': [] } }, 'files': [] } }, 'files': [] } self.assertDictEqual(reality, expected) expected = '{0}\n\n{1}\n\n{2}\n\n'.format( ArakoonInstallerTester.EXPECTED_CLUSTER_CONFIG.format( '1,2', 'test', ''), ArakoonInstallerTester.EXPECTED_NODE_CONFIG.format( '1', storagerouters[1].ip, 10000, base_dir, '1', 10001), ArakoonInstallerTester.EXPECTED_NODE_CONFIG.format( '2', storagerouters[2].ip, 20000, base_dir2, '2', 20001)) self.assertEqual( Configuration.get(ArakoonInstaller.CONFIG_KEY.format('test'), raw=True), expected) # @TODO: assert service availability here. It should be stopped catchup_command = 'arakoon --node 2 -config file://opt/OpenvStorage/config/framework.json?key=/ovs/arakoon/test/config -catchup-only' SSHClient._run_returns[catchup_command] = None SSHClient._run_recordings = [] ArakoonInstaller.restart_cluster_add('test', [storagerouters[1].ip], storagerouters[2].ip, filesystem=False) self.assertIn(catchup_command, SSHClient._run_recordings) # @TODO: assert the service is running config = ArakoonClusterConfig('test', filesystem=False) config.load_config(storagerouters[2].ip) client = ArakoonInstaller.build_client(config) reality = client.get(ArakoonInstaller.INTERNAL_CONFIG_KEY) self.assertEqual(reality, expected) reality = json.loads(client.get(ArakoonInstaller.METADATA_KEY)) expected = { 'cluster_name': 'test', 'cluster_type': 'FWK', 'in_use': True, 'internal': True } self.assertDictEqual(reality, expected) # Shrinking cluster ArakoonInstaller.shrink_cluster(storagerouters[1].ip, storagerouters[2].ip, 'test') reality = Helper.extract_dir_structure(base_dir) expected = { 'dirs': { 'arakoon': { 'dirs': { 'test': { 'dirs': {}, 'files': [] } }, 'files': [] } }, 'files': [] } self.assertDictEqual(reality, expected) expected = '{0}\n\n{1}\n\n'.format( ArakoonInstallerTester.EXPECTED_CLUSTER_CONFIG.format( '2', 'test', ''), ArakoonInstallerTester.EXPECTED_NODE_CONFIG.format( '2', storagerouters[2].ip, 20000, base_dir2, '2', 20001)) self.assertEqual( Configuration.get(ArakoonInstaller.CONFIG_KEY.format('test'), raw=True), expected) # @TODO: assert service availability here. It should be stopped ArakoonInstaller.restart_cluster_remove('test', [storagerouters[2].ip], filesystem=False) # @TODO: assert the service is running config = ArakoonClusterConfig('test', filesystem=False) config.load_config(storagerouters[2].ip) client = ArakoonInstaller.build_client(config) reality = client.get(ArakoonInstaller.INTERNAL_CONFIG_KEY) self.assertEqual(reality, expected) reality = json.loads(client.get(ArakoonInstaller.METADATA_KEY)) expected = { 'cluster_name': 'test', 'cluster_type': 'FWK', 'in_use': True, 'internal': True } self.assertDictEqual(reality, expected)
def collapse_arakoon(): """ Collapse Arakoon's Tlogs :return: None """ ScheduledTaskController._logger.info('Starting arakoon collapse') storagerouters = StorageRouterList.get_storagerouters() cluster_info = [('cacc', storagerouters[0], True)] cluster_names = [] for service in ServiceList.get_services(): if service.is_internal is True and service.type.name in ( ServiceType.SERVICE_TYPES.ARAKOON, ServiceType.SERVICE_TYPES.NS_MGR, ServiceType.SERVICE_TYPES.ALBA_MGR): cluster = service.name.replace('arakoon-', '') if cluster in cluster_names: continue cluster_names.append(cluster) cluster_info.append((cluster, service.storagerouter, False)) workload = {} for cluster, storagerouter, filesystem in cluster_info: ScheduledTaskController._logger.debug( ' Collecting info for cluster {0}'.format(cluster)) config = ArakoonClusterConfig(cluster, filesystem=filesystem) config.load_config(storagerouter.ip) for node in config.nodes: if node.ip not in workload: workload[node.ip] = {'node_id': node.name, 'clusters': []} workload[node.ip]['clusters'].append((cluster, filesystem)) for storagerouter in storagerouters: try: if storagerouter.ip not in workload: continue node_workload = workload[storagerouter.ip] client = SSHClient(storagerouter) for cluster, filesystem in node_workload['clusters']: try: ScheduledTaskController._logger.debug( ' Collapsing cluster {0} on {1}'.format( cluster, storagerouter.ip)) if filesystem is True: config_path = ArakoonClusterConfig.CONFIG_FILE.format( cluster) else: config_path = Configuration.get_configuration_path( ArakoonClusterConfig.CONFIG_KEY.format( cluster)) client.run([ 'arakoon', '--collapse-local', node_workload['node_id'], '2', '-config', config_path ]) ScheduledTaskController._logger.info( ' Collapsing cluster {0} on {1} completed'.format( cluster, storagerouter.ip)) except: ScheduledTaskController._logger.exception( ' Collapsing cluster {0} on {1} failed'.format( cluster, storagerouter.ip)) except UnableToConnectException: ScheduledTaskController._logger.error( ' Could not collapse any cluster on {0} (not reachable)'. format(storagerouter.name)) ScheduledTaskController._logger.info('Arakoon collapse finished')