def on_demote(cluster_ip, master_ip, offline_node_ips=None): """ Handles the demote for the StorageDrivers :param cluster_ip: IP of the node to demote :param master_ip: IP of the master node :param offline_node_ips: IPs of nodes which are offline """ if offline_node_ips is None: offline_node_ips = [] client = SSHClient(cluster_ip, username='******') if cluster_ip not in offline_node_ips else None servicetype = ServiceTypeList.get_by_name('Arakoon') current_service = None remaining_ips = [] for service in servicetype.services: if service.name == 'arakoon-voldrv': if service.storagerouter.ip == cluster_ip: current_service = service elif service.storagerouter.ip not in offline_node_ips: remaining_ips.append(service.storagerouter.ip) if current_service is not None: print '* Shrink StorageDriver cluster' ArakoonInstaller.shrink_cluster(master_ip, cluster_ip, 'voldrv', offline_node_ips) if client is not None and ServiceManager.has_service(current_service.name, client=client) is True: ServiceManager.stop_service(current_service.name, client=client) ServiceManager.remove_service(current_service.name, client=client) ArakoonInstaller.restart_cluster_remove('voldrv', remaining_ips) current_service.delete() StorageDriverController._configure_arakoon_to_volumedriver(offline_node_ips)
def on_demote(cluster_ip, master_ip): """ Handles the demote for the StorageDrivers :param cluster_ip: IP of the node to demote :param master_ip: IP of the master node """ client = SSHClient(cluster_ip, username='******') servicetype = ServiceTypeList.get_by_name('Arakoon') current_service = None remaining_ips = [] for service in servicetype.services: if service.name == 'arakoon-voldrv': if service.storagerouter.ip == cluster_ip: current_service = service else: remaining_ips.append(service.storagerouter.ip) if current_service is not None: print '* Shrink StorageDriver cluster' ArakoonInstaller.shrink_cluster(master_ip, cluster_ip, 'voldrv') if ServiceManager.has_service(current_service.name, client=client) is True: ServiceManager.stop_service(current_service.name, client=client) ServiceManager.remove_service(current_service.name, client=client) ArakoonInstaller.restart_cluster_remove('voldrv', remaining_ips) current_service.delete() for storagerouter in StorageRouterList.get_storagerouters(): ArakoonInstaller.deploy_to_slave(master_ip, storagerouter.ip, 'voldrv') StorageDriverController._configure_arakoon_to_volumedriver()
def on_demote(cluster_ip, master_ip, offline_node_ips=None): """ Handles the demote for the StorageDrivers :param cluster_ip: IP of the node to demote :param master_ip: IP of the master node :param offline_node_ips: IPs of nodes which are offline """ _ = master_ip if offline_node_ips is None: offline_node_ips = [] client = SSHClient( cluster_ip, username='******') if cluster_ip not in offline_node_ips else None servicetype = ServiceTypeList.get_by_name('Arakoon') current_service = None remaining_ips = [] for service in servicetype.services: if service.name == 'arakoon-voldrv': if service.storagerouter.ip == cluster_ip: current_service = service elif service.storagerouter.ip not in offline_node_ips: remaining_ips.append(service.storagerouter.ip) if current_service is not None: print '* Shrink StorageDriver cluster' ArakoonInstaller.shrink_cluster(cluster_ip, 'voldrv', offline_node_ips) if client is not None and ServiceManager.has_service( current_service.name, client=client) is True: ServiceManager.stop_service(current_service.name, client=client) ServiceManager.remove_service(current_service.name, client=client) ArakoonInstaller.restart_cluster_remove('voldrv', remaining_ips) current_service.delete() StorageDriverController._configure_arakoon_to_volumedriver()
def remove_services(client, node_type, logger): """ Remove all services managed by OVS :param client: Client on which to remove the services :type client: ovs.extensions.generic.sshclient.SSHClient :param node_type: Type of node, can be 'master' or 'extra' :type node_type: str :param logger: Logger object used for logging :type logger: ovs.log.log_handler.LogHandler :return: None """ Toolbox.log(logger=logger, messages="Removing services") stop_only = ["rabbitmq-server", "memcached"] services = ["workers", "support-agent", "watcher-framework"] if node_type == "master": services += ["scheduled-tasks", "webapp-api", "volumerouter-consumer"] if Toolbox.is_service_internally_managed(service="rabbitmq") is True: services.append("rabbitmq-server") if Toolbox.is_service_internally_managed(service="memcached") is True: services.append("memcached") for service in services: if ServiceManager.has_service(service, client=client): Toolbox.log( logger=logger, messages="{0} service {1}".format("Removing" if service not in stop_only else "Stopping", service), ) ServiceManager.stop_service(service, client=client) if service not in stop_only: ServiceManager.remove_service(service, client=client)
def remove(cluster_name, client): """ Removes an etcd service :param client: Client on which to remove the service :param cluster_name: The name of the cluster service to remove """ if ServiceManager.has_service('etcd-{0}'.format(cluster_name), client=client) is True: ServiceManager.remove_service('etcd-{0}'.format(cluster_name), client=client)
def remove(cluster_name, client): """ Removes an arakoon service """ if ServiceManager.has_service('arakoon-{0}'.format(cluster_name), client=client) is True: ServiceManager.remove_service('arakoon-{0}'.format(cluster_name), client=client)
def remove(cluster_name, client): """ Removes an arakoon service :param cluster_name: The name of the cluster service to remove :type cluster_name: str :param client: Client on which to remove the service :type client: SSHClient :return: None """ if ServiceManager.has_service('arakoon-{0}'.format(cluster_name), client=client) is True: ServiceManager.remove_service('arakoon-{0}'.format(cluster_name), client=client)
def remove(cluster_name, client, delay_unregistration=False): """ Removes an arakoon service :param cluster_name: The name of the cluster service to remove :type cluster_name: str :param client: Client on which to remove the service :type client: SSHClient :param delay_unregistration: Un-register the service right away or not :type delay_unregistration: bool :return: None """ service_name = ArakoonInstaller.get_service_name_for_cluster(cluster_name=cluster_name) if ServiceManager.has_service(name=service_name, client=client) is True: ServiceManager.remove_service(name=service_name, client=client, delay_unregistration=delay_unregistration)
def on_demote(cluster_ip, master_ip, offline_node_ips=None): """ Handles the demote for the StorageDrivers :param cluster_ip: IP of the node to demote :type cluster_ip: str :param master_ip: IP of the master node :type master_ip: str :param offline_node_ips: IPs of nodes which are offline :type offline_node_ips: list :return: None """ _ = master_ip if offline_node_ips is None: offline_node_ips = [] client = SSHClient( cluster_ip, username='******') if cluster_ip not in offline_node_ips else None servicetype = ServiceTypeList.get_by_name( ServiceType.SERVICE_TYPES.ARAKOON) current_service = None remaining_ips = [] for service in servicetype.services: if service.name == 'arakoon-voldrv' and service.is_internal is True: # Externally managed arakoon cluster service does not have storage router if service.storagerouter.ip == cluster_ip: current_service = service elif service.storagerouter.ip not in offline_node_ips: remaining_ips.append(service.storagerouter.ip) if current_service is not None: StorageDriverController._logger.debug( '* Shrink StorageDriver cluster') cluster_name = str( EtcdConfiguration.get( '/ovs/framework/arakoon_clusters|voldrv')) ArakoonInstaller.shrink_cluster(deleted_node_ip=cluster_ip, cluster_name=cluster_name, offline_nodes=offline_node_ips) if client is not None and ServiceManager.has_service( current_service.name, client=client) is True: ServiceManager.stop_service(current_service.name, client=client) ServiceManager.remove_service(current_service.name, client=client) ArakoonInstaller.restart_cluster_remove(cluster_name, remaining_ips) current_service.delete() StorageDriverController._configure_arakoon_to_volumedriver( cluster_name=cluster_name)
def on_remove(cluster_ip, complete_removal): """ Handles the StorageDriver removal part of a node :param cluster_ip: IP of the node which is being removed from the cluster :type cluster_ip: str :param complete_removal: Unused for StorageDriver, used for AlbaController :type complete_removal: bool :return: None """ _ = complete_removal service_name = 'watcher-volumedriver' try: client = SSHClient(endpoint=cluster_ip, username='******') if ServiceManager.has_service(name=service_name, client=client): ServiceManager.stop_service(name=service_name, client=client) ServiceManager.remove_service(name=service_name, client=client) except UnableToConnectException: pass
def on_demote(cluster_ip, master_ip, offline_node_ips=None): """ Handles the demote for the StorageDrivers :param cluster_ip: IP of the node to demote :type cluster_ip: str :param master_ip: IP of the master node :type master_ip: str :param offline_node_ips: IPs of nodes which are offline :type offline_node_ips: list :return: None """ _ = master_ip if offline_node_ips is None: offline_node_ips = [] client = SSHClient(cluster_ip, username='******') if cluster_ip not in offline_node_ips else None servicetype = ServiceTypeList.get_by_name(ServiceType.SERVICE_TYPES.ARAKOON) current_service = None remaining_ips = [] for service in servicetype.services: if service.name == 'arakoon-voldrv' and service.is_internal is True: # Externally managed arakoon cluster service does not have storage router if service.storagerouter.ip == cluster_ip: current_service = service elif service.storagerouter.ip not in offline_node_ips: remaining_ips.append(service.storagerouter.ip) if current_service is not None: if len(remaining_ips) == 0: raise RuntimeError('Could not find any remaining arakoon nodes for the voldrv cluster') StorageDriverController._logger.debug('* Shrink StorageDriver cluster') cluster_name = str(Configuration.get('/ovs/framework/arakoon_clusters|voldrv')) ArakoonInstaller.shrink_cluster(deleted_node_ip=cluster_ip, remaining_node_ip=remaining_ips[0], cluster_name=cluster_name, offline_nodes=offline_node_ips) if client is not None and ServiceManager.has_service(current_service.name, client=client) is True: ServiceManager.stop_service(current_service.name, client=client) ServiceManager.remove_service(current_service.name, client=client) ArakoonInstaller.restart_cluster_remove(cluster_name, remaining_ips, filesystem=False) current_service.delete() StorageDriverController._configure_arakoon_to_volumedriver(cluster_name=cluster_name)
def unconfigure_host(self, ip): if self._is_devstack is False and self._is_openstack is False or self._cinder_installed is False or self._nova_installed is False: self._logger.warning('Unconfigure host: No OpenStack nor DevStack installation detected or Cinder and Nova plugins are not installed') return # 1. Remove driver code self._logger.info('*** Unconfiguring host with IP {0} ***'.format(ip)) self._logger.info(' Removing driver code') if self._is_devstack is True: self.client.file_delete(self._devstack_driver) else: self.client.file_delete('{0}/cinder/volume/drivers/openvstorage.py'.format(self._driver_location)) # 2. Removing users from group self._logger.info(' Removing users from group ovs') for user in ['libvirt-qemu', 'stack'] if self._is_devstack is True else self._openstack_users: self.client.run('deluser {0} ovs'.format(user)) # 3. Revert patches self._logger.info(' Reverting patches') nova_base_path = self._get_base_path('nova') cinder_base_path = self._get_base_path('cinder') if self._is_devstack is True: nova_volume_file = '{0}/virt/libvirt/volume.py'.format(nova_base_path) nova_driver_file = '{0}/virt/libvirt/driver.py'.format(nova_base_path) cinder_brick_initiator_file = '{0}/brick/initiator/connector.py'.format(cinder_base_path) else: nova_volume_file = '{0}/nova/virt/libvirt/volume.py'.format(self._driver_location) nova_driver_file = '{0}/nova/virt/libvirt/driver.py'.format(self._driver_location) cinder_brick_initiator_file = '{0}/cinder/brick/initiator/connector.py'.format(self._driver_location) self._logger.info(' Reverting patched file: {0}'.format(nova_volume_file)) new_contents = [] skip_class = False for line in self.client.file_read(nova_volume_file).splitlines(): if line.startswith('class LibvirtFileVolumeDriver(LibvirtBaseVolumeDriver):'): skip_class = True continue if line.startswith('class'): skip_class = False if skip_class is False: new_contents.append(line) self.client.file_write(nova_volume_file, "".join(new_contents)) self._logger.info(' Reverting patched file: {0}'.format(nova_driver_file)) new_contents = [] for line in self.client.file_read(nova_driver_file).splitlines(): stripped_line = line.strip() if stripped_line.startswith("'file=nova.virt.libvirt.volume.LibvirtFileVolumeDriver'"): continue new_contents.append(line) self.client.file_write(nova_driver_file, "".join(new_contents)) if os.path.exists(cinder_brick_initiator_file): self._logger.info(' Reverting patched file: {0}'.format(cinder_brick_initiator_file)) self.client.run("""sed -i 's/elif protocol in ["LOCAL", "FILE"]:/elif protocol == "LOCAL":/g' {0}""".format(cinder_brick_initiator_file)) # 4. Unconfigure messaging driver self._logger.info(' Unconfiguring messaging driver') nova_messaging_driver = 'nova.openstack.common.notifier.rpc_notifier' if self._stack_version == 'juno' else 'messaging' cinder_messaging_driver = 'cinder.openstack.common.notifier.rpc_notifier' if self._stack_version == 'juno' else 'messaging' with remote(ip, [RawConfigParser, open], 'root') as rem: for config_file, driver in {self._NOVA_CONF: nova_messaging_driver, self._CINDER_CONF: cinder_messaging_driver}.iteritems(): cfg = rem.RawConfigParser() cfg.read([config_file]) if cfg.has_option("DEFAULT", "notification_driver"): cfg.remove_option("DEFAULT", "notification_driver") if cfg.has_option("DEFAULT", "notification_topics"): notification_topics = cfg.get("DEFAULT", "notification_topics").split(",") if "notifications" in notification_topics: notification_topics.remove("notifications") cfg.set("DEFAULT", "notification_topics", ",".join(notification_topics)) if config_file == self._NOVA_CONF: for param, value in {'notify_on_any_change': 'True', 'notify_on_state_change': 'vm_and_task_state'}.iteritems(): if cfg.has_option("DEFAULT", param): cfg.remove_option("DEFAULT", param) with rem.open(config_file, "w") as fp: cfg.write(fp) # 5. Disable events consumer self._logger.info(' Disabling events consumer') service_name = 'ovs-openstack-events-consumer' if ServiceManager.has_service(service_name, self.client): ServiceManager.stop_service(service_name, self.client) ServiceManager.disable_service(service_name, self.client) ServiceManager.remove_service(service_name, self.client)
def demote_node(cluster_ip, master_ip, ip_client_map, unique_id, unconfigure_memcached, unconfigure_rabbitmq, offline_nodes=None): """ Demotes a given node """ from ovs.dal.lists.storagerouterlist import StorageRouterList Toolbox.log(logger=NodeTypeController._logger, messages='Demoting node', title=True) if offline_nodes is None: offline_nodes = [] if unconfigure_memcached is True and len(offline_nodes) == 0: if NodeTypeController._validate_local_memcache_servers(ip_client_map) is False: raise RuntimeError('Not all memcache nodes can be reached which is required for demoting a node.') # Find other (arakoon) master nodes arakoon_cluster_name = str(Configuration.get('/ovs/framework/arakoon_clusters|ovsdb')) arakoon_metadata = ArakoonInstaller.get_arakoon_metadata_by_cluster_name(cluster_name=arakoon_cluster_name) config = ArakoonClusterConfig(cluster_id=arakoon_cluster_name, filesystem=False) config.load_config() master_node_ips = [node.ip for node in config.nodes] if cluster_ip in master_node_ips: master_node_ips.remove(cluster_ip) if len(master_node_ips) == 0: raise RuntimeError('There should be at least one other master node') storagerouter = StorageRouterList.get_by_machine_id(unique_id) storagerouter.node_type = 'EXTRA' storagerouter.save() offline_node_ips = [node.ip for node in offline_nodes] if arakoon_metadata['internal'] is True: Toolbox.log(logger=NodeTypeController._logger, messages='Leaving Arakoon {0} cluster'.format(arakoon_cluster_name)) ArakoonInstaller.shrink_cluster(deleted_node_ip=cluster_ip, remaining_node_ips=master_node_ips, cluster_name=arakoon_cluster_name, offline_nodes=offline_node_ips) try: external_config = Configuration.get('/ovs/framework/external_config') if external_config is None: config_store = Configuration.get_store() if config_store == 'arakoon': Toolbox.log(logger=NodeTypeController._logger, messages='Leaving Arakoon config cluster') ArakoonInstaller.shrink_cluster(deleted_node_ip=cluster_ip, remaining_node_ips=master_node_ips, cluster_name='config', offline_nodes=offline_node_ips, filesystem=True) else: from ovs.extensions.db.etcd.installer import EtcdInstaller Toolbox.log(logger=NodeTypeController._logger, messages='Leaving Etcd cluster') EtcdInstaller.shrink_cluster(master_ip, cluster_ip, 'config', offline_node_ips) except Exception as ex: Toolbox.log(logger=NodeTypeController._logger, messages=['\nFailed to leave configuration cluster', ex], loglevel='exception') Toolbox.log(logger=NodeTypeController._logger, messages='Update configurations') try: if unconfigure_memcached is True: endpoints = Configuration.get('/ovs/framework/memcache|endpoints') endpoint = '{0}:{1}'.format(cluster_ip, 11211) if endpoint in endpoints: endpoints.remove(endpoint) Configuration.set('/ovs/framework/memcache|endpoints', endpoints) if unconfigure_rabbitmq is True: endpoints = Configuration.get('/ovs/framework/messagequeue|endpoints') endpoint = '{0}:{1}'.format(cluster_ip, 5672) if endpoint in endpoints: endpoints.remove(endpoint) Configuration.set('/ovs/framework/messagequeue|endpoints', endpoints) except Exception as ex: Toolbox.log(logger=NodeTypeController._logger, messages=['\nFailed to update configurations', ex], loglevel='exception') if arakoon_metadata['internal'] is True: Toolbox.log(logger=NodeTypeController._logger, messages='Restarting master node services') remaining_nodes = ip_client_map.keys()[:] if cluster_ip in remaining_nodes: remaining_nodes.remove(cluster_ip) PersistentFactory.store = None VolatileFactory.store = None for service in storagerouter.services: if service.name == 'arakoon-ovsdb': service.delete() target_client = None if storagerouter in offline_nodes: if unconfigure_rabbitmq is True: Toolbox.log(logger=NodeTypeController._logger, messages='Removing/unconfiguring offline RabbitMQ node') client = ip_client_map[master_ip] try: client.run(['rabbitmqctl', 'forget_cluster_node', 'rabbit@{0}'.format(storagerouter.name)]) except Exception as ex: Toolbox.log(logger=NodeTypeController._logger, messages=['\nFailed to forget RabbitMQ cluster node', ex], loglevel='exception') else: target_client = ip_client_map[cluster_ip] if unconfigure_rabbitmq is True: Toolbox.log(logger=NodeTypeController._logger, messages='Removing/unconfiguring RabbitMQ') try: if ServiceManager.has_service('rabbitmq-server', client=target_client): Toolbox.change_service_state(target_client, 'rabbitmq-server', 'stop', NodeTypeController._logger) target_client.run(['rabbitmq-server', '-detached']) time.sleep(5) target_client.run(['rabbitmqctl', 'stop_app']) time.sleep(5) target_client.run(['rabbitmqctl', 'reset']) time.sleep(5) target_client.run(['rabbitmqctl', 'stop']) time.sleep(5) target_client.file_unlink("/var/lib/rabbitmq/.erlang.cookie") Toolbox.change_service_state(target_client, 'rabbitmq-server', 'stop', NodeTypeController._logger) # To be sure except Exception as ex: Toolbox.log(logger=NodeTypeController._logger, messages=['\nFailed to remove/unconfigure RabbitMQ', ex], loglevel='exception') Toolbox.log(logger=NodeTypeController._logger, messages='Stopping services') services = ['memcached', 'rabbitmq-server'] if unconfigure_rabbitmq is False: services.remove('rabbitmq-server') if unconfigure_memcached is False: services.remove('memcached') for service in services: if ServiceManager.has_service(service, client=target_client): Toolbox.log(logger=NodeTypeController._logger, messages='Stopping service {0}'.format(service)) try: Toolbox.change_service_state(target_client, service, 'stop', NodeTypeController._logger) except Exception as ex: Toolbox.log(logger=NodeTypeController._logger, messages=['\nFailed to stop service'.format(service), ex], loglevel='exception') Toolbox.log(logger=NodeTypeController._logger, messages='Removing services') services = ['scheduled-tasks', 'webapp-api', 'volumerouter-consumer'] for service in services: if ServiceManager.has_service(service, client=target_client): Toolbox.log(logger=NodeTypeController._logger, messages='Removing service {0}'.format(service)) try: Toolbox.change_service_state(target_client, service, 'stop', NodeTypeController._logger) ServiceManager.remove_service(service, client=target_client) except Exception as ex: Toolbox.log(logger=NodeTypeController._logger, messages=['\nFailed to remove service'.format(service), ex], loglevel='exception') if ServiceManager.has_service('workers', client=target_client): ServiceManager.add_service(name='workers', client=target_client, params={'WORKER_QUEUE': '{0}'.format(unique_id)}) try: NodeTypeController._configure_amqp_to_volumedriver() except Exception as ex: Toolbox.log(logger=NodeTypeController._logger, messages=['\nFailed to configure AMQP to Storage Driver', ex], loglevel='exception') Toolbox.log(logger=NodeTypeController._logger, messages='Restarting services') NodeTypeController.restart_framework_and_memcache_services(clients=ip_client_map, logger=NodeTypeController._logger, offline_node_ips=offline_node_ips) if Toolbox.run_hooks(component='nodetype', sub_component='demote', logger=NodeTypeController._logger, cluster_ip=cluster_ip, master_ip=master_ip, offline_node_ips=offline_node_ips): Toolbox.log(logger=NodeTypeController._logger, messages='Restarting services') NodeTypeController.restart_framework_and_memcache_services(clients=ip_client_map, logger=NodeTypeController._logger, offline_node_ips=offline_node_ips) if storagerouter not in offline_nodes: target_client = ip_client_map[cluster_ip] node_name, _ = target_client.get_hostname() if NodeTypeController.avahi_installed(client=target_client, logger=NodeTypeController._logger) is True: NodeTypeController.configure_avahi(client=target_client, node_name=node_name, node_type='extra', logger=NodeTypeController._logger) Configuration.set('/ovs/framework/hosts/{0}/type'.format(storagerouter.machine_id), 'EXTRA') if target_client is not None and target_client.file_exists('/tmp/ovs_rollback'): target_client.file_write('/tmp/ovs_rollback', 'rollback') Toolbox.log(logger=NodeTypeController._logger, messages='Demote complete', title=True)
def execute_scrub_work(queue, vpool, scrub_info, error_messages): """ Executes scrub work for a given vDisk queue and vPool, based on scrub_info :param queue: a Queue with vDisk guids that need to be scrubbed (they should only be member of a single vPool) :type queue: Queue :param vpool: the vPool object of the vDisks :type vpool: VPool :param scrub_info: A dict containing scrub information: `scrub_path` with the path where to scrub and `storage_router` with the StorageRouter that needs to do the work :type scrub_info: dict :param error_messages: A list of error messages to be filled :type error_messages: list :return: a list of error messages :rtype: list """ def _verify_mds_config(current_vdisk): current_vdisk.invalidate_dynamics('info') vdisk_configs = current_vdisk.info['metadata_backend_config'] if len(vdisk_configs) == 0: raise RuntimeError('Could not load MDS configuration') return vdisk_configs client = None lock_time = 5 * 60 storagerouter = scrub_info['storage_router'] scrub_directory = '{0}/scrub_work_{1}_{2}'.format(scrub_info['scrub_path'], vpool.name, storagerouter.name) scrub_config_key = 'ovs/vpools/{0}/proxies/scrub/scrub_config_{1}'.format(vpool.guid, storagerouter.guid) backend_config_key = 'ovs/vpools/{0}/proxies/scrub/backend_config_{1}'.format(vpool.guid, storagerouter.guid) alba_proxy_service = 'ovs-albaproxy_{0}_{1}_scrub'.format(vpool.name, storagerouter.name) # Deploy a proxy try: with file_mutex(name='ovs_albaproxy_scrub', wait=lock_time): ScheduledTaskController._logger.info('Scrubber - vPool {0} - StorageRouter {1} - Deploying ALBA proxy {2}'.format(vpool.name, storagerouter.name, alba_proxy_service)) client = SSHClient(storagerouter, 'root') client.dir_create(scrub_directory) client.dir_chmod(scrub_directory, 0777) # Celery task executed by 'ovs' user and should be able to write in it if ServiceManager.has_service(name=alba_proxy_service, client=client) is True and ServiceManager.get_service_status(name=alba_proxy_service, client=client) is True: ScheduledTaskController._logger.info('Scrubber - vPool {0} - StorageRouter {1} - Re-using existing proxy service {2}'.format(vpool.name, storagerouter.name, alba_proxy_service)) scrub_config = Configuration.get(scrub_config_key) else: machine_id = System.get_my_machine_id(client) port_range = Configuration.get('/ovs/framework/hosts/{0}/ports|storagedriver'.format(machine_id)) port = System.get_free_ports(selected_range=port_range, nr=1, client=client)[0] # Scrub config # {u'albamgr_cfg_url': u'arakoon://config/ovs/vpools/71e2f717-f270-4a41-bbb0-d4c8c084d43e/proxies/64759516-3471-4321-b912-fb424568fc5b/config/abm?ini=%2Fopt%2FOpenvStorage%2Fconfig%2Farakoon_cacc.ini', # u'fragment_cache': [u'none'], # u'ips': [u'127.0.0.1'], # u'log_level': u'info', # u'manifest_cache_size': 17179869184, # u'port': 0, # u'transport': u'tcp'} # Backend config # {u'alba_connection_host': u'10.100.193.155', # u'alba_connection_port': 26204, # u'alba_connection_preset': u'preset', # u'alba_connection_timeout': 15, # u'alba_connection_transport': u'TCP', # u'backend_interface_retries_on_error': 5, # u'backend_interface_retry_backoff_multiplier': 2.0, # u'backend_interface_retry_interval_secs': 1, # u'backend_type': u'ALBA'} scrub_config = Configuration.get('ovs/vpools/{0}/proxies/scrub/generic_scrub'.format(vpool.guid)) scrub_config['port'] = port scrub_config['transport'] = 'tcp' Configuration.set(scrub_config_key, json.dumps(scrub_config, indent=4), raw=True) params = {'VPOOL_NAME': vpool.name, 'LOG_SINK': LogHandler.get_sink_path('alba_proxy'), 'CONFIG_PATH': Configuration.get_configuration_path(scrub_config_key)} ServiceManager.add_service(name='ovs-albaproxy', params=params, client=client, target_name=alba_proxy_service) ServiceManager.start_service(name=alba_proxy_service, client=client) ScheduledTaskController._logger.info('Scrubber - vPool {0} - StorageRouter {1} - Deployed ALBA proxy {2}'.format(vpool.name, storagerouter.name, alba_proxy_service)) backend_config = Configuration.get('ovs/vpools/{0}/hosts/{1}/config'.format(vpool.guid, vpool.storagedrivers[0].storagedriver_id))['backend_connection_manager'] backend_config['alba_connection_host'] = '127.0.0.1' backend_config['alba_connection_port'] = scrub_config['port'] Configuration.set(backend_config_key, json.dumps({"backend_connection_manager": backend_config}, indent=4), raw=True) except Exception: message = 'Scrubber - vPool {0} - StorageRouter {1} - An error occurred deploying ALBA proxy {2}'.format(vpool.name, storagerouter.name, alba_proxy_service) error_messages.append(message) ScheduledTaskController._logger.exception(message) if client is not None and ServiceManager.has_service(name=alba_proxy_service, client=client) is True: if ServiceManager.get_service_status(name=alba_proxy_service, client=client) is True: ServiceManager.stop_service(name=alba_proxy_service, client=client) ServiceManager.remove_service(name=alba_proxy_service, client=client) if Configuration.exists(scrub_config_key): Configuration.delete(scrub_config_key) try: # Empty the queue with vDisks to scrub with remote(storagerouter.ip, [VDisk]) as rem: while True: vdisk = None vdisk_guid = queue.get(False) try: # Check MDS master is local. Trigger MDS handover if necessary vdisk = rem.VDisk(vdisk_guid) ScheduledTaskController._logger.info('Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - Started scrubbing at location {3}'.format(vpool.name, storagerouter.name, vdisk.name, scrub_directory)) configs = _verify_mds_config(current_vdisk=vdisk) storagedriver = StorageDriverList.get_by_storagedriver_id(vdisk.storagedriver_id) if configs[0].get('ip') != storagedriver.storagerouter.ip: ScheduledTaskController._logger.info('Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - MDS master is not local, trigger handover'.format(vpool.name, storagerouter.name, vdisk.name)) MDSServiceController.ensure_safety(VDisk(vdisk_guid)) # Do not use a remote VDisk instance here configs = _verify_mds_config(current_vdisk=vdisk) if configs[0].get('ip') != storagedriver.storagerouter.ip: ScheduledTaskController._logger.warning('Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - Skipping because master MDS still not local'.format(vpool.name, storagerouter.name, vdisk.name)) continue # Do the actual scrubbing with vdisk.storagedriver_client.make_locked_client(str(vdisk.volume_id)) as locked_client: ScheduledTaskController._logger.info('Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - Retrieve and apply scrub work'.format(vpool.name, storagerouter.name, vdisk.name)) work_units = locked_client.get_scrubbing_workunits() for work_unit in work_units: res = locked_client.scrub(work_unit=work_unit, scratch_dir=scrub_directory, log_sinks=[LogHandler.get_sink_path('scrubber', allow_override=True)], backend_config=Configuration.get_configuration_path(backend_config_key)) locked_client.apply_scrubbing_result(scrubbing_work_result=res) if work_units: ScheduledTaskController._logger.info('Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - {3} work units successfully applied'.format(vpool.name, storagerouter.name, vdisk.name, len(work_units))) else: ScheduledTaskController._logger.info('Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - No scrubbing required'.format(vpool.name, storagerouter.name, vdisk.name)) except Exception: if vdisk is None: message = 'Scrubber - vPool {0} - StorageRouter {1} - vDisk with guid {2} could not be found'.format(vpool.name, storagerouter.name, vdisk_guid) else: message = 'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - Scrubbing failed'.format(vpool.name, storagerouter.name, vdisk.name) error_messages.append(message) ScheduledTaskController._logger.exception(message) except Empty: # Raised when all items have been fetched from the queue ScheduledTaskController._logger.info('Scrubber - vPool {0} - StorageRouter {1} - Queue completely processed'.format(vpool.name, storagerouter.name)) except Exception: message = 'Scrubber - vPool {0} - StorageRouter {1} - Scrubbing failed'.format(vpool.name, storagerouter.name) error_messages.append(message) ScheduledTaskController._logger.exception(message) # Delete the proxy again try: with file_mutex(name='ovs_albaproxy_scrub', wait=lock_time): ScheduledTaskController._logger.info('Scrubber - vPool {0} - StorageRouter {1} - Removing service {2}'.format(vpool.name, storagerouter.name, alba_proxy_service)) client = SSHClient(storagerouter, 'root') client.dir_delete(scrub_directory) if ServiceManager.has_service(alba_proxy_service, client=client): ServiceManager.stop_service(alba_proxy_service, client=client) ServiceManager.remove_service(alba_proxy_service, client=client) if Configuration.exists(scrub_config_key): Configuration.delete(scrub_config_key) ScheduledTaskController._logger.info('Scrubber - vPool {0} - StorageRouter {1} - Removed service {2}'.format(vpool.name, storagerouter.name, alba_proxy_service)) except Exception: message = 'Scrubber - vPool {0} - StorageRouter {1} - Removing service {2} failed'.format(vpool.name, storagerouter.name, alba_proxy_service) error_messages.append(message) ScheduledTaskController._logger.exception(message)
def remove(cluster_name, client): """ Removes an arakoon service """ if ServiceManager.has_service("arakoon-{0}".format(cluster_name), client=client) is True: ServiceManager.remove_service("arakoon-{0}".format(cluster_name), client=client)
def unconfigure_host(self, ip): if self._is_devstack is False and self._is_openstack is False or self._cinder_installed is False or self._nova_installed is False: self._logger.warning( 'Unconfigure host: No OpenStack nor DevStack installation detected or Cinder and Nova plugins are not installed' ) return # 1. Remove driver code self._logger.info('*** Unconfiguring host with IP {0} ***'.format(ip)) self._logger.info(' Removing driver code') if self._is_devstack is True: self.client.file_delete(self._devstack_driver) else: self.client.file_delete( '{0}/cinder/volume/drivers/openvstorage.py'.format( self._driver_location)) # 2. Removing users from group self._logger.info(' Removing users from group ovs') for user in ['libvirt-qemu', 'stack' ] if self._is_devstack is True else self._openstack_users: self.client.run('deluser {0} ovs'.format(user)) # 3. Revert patches self._logger.info(' Reverting patches') nova_base_path = self._get_base_path('nova') cinder_base_path = self._get_base_path('cinder') if self._is_devstack is True: nova_volume_file = '{0}/virt/libvirt/volume.py'.format( nova_base_path) nova_driver_file = '{0}/virt/libvirt/driver.py'.format( nova_base_path) cinder_brick_initiator_file = '{0}/brick/initiator/connector.py'.format( cinder_base_path) else: nova_volume_file = '{0}/nova/virt/libvirt/volume.py'.format( self._driver_location) nova_driver_file = '{0}/nova/virt/libvirt/driver.py'.format( self._driver_location) cinder_brick_initiator_file = '{0}/cinder/brick/initiator/connector.py'.format( self._driver_location) self._logger.info( ' Reverting patched file: {0}'.format(nova_volume_file)) new_contents = [] skip_class = False for line in self.client.file_read(nova_volume_file).splitlines(): if line.startswith( 'class LibvirtFileVolumeDriver(LibvirtBaseVolumeDriver):'): skip_class = True continue if line.startswith('class'): skip_class = False if skip_class is False: new_contents.append(line) self.client.file_write(nova_volume_file, "".join(new_contents)) self._logger.info( ' Reverting patched file: {0}'.format(nova_driver_file)) new_contents = [] for line in self.client.file_read(nova_driver_file).splitlines(): stripped_line = line.strip() if stripped_line.startswith( "'file=nova.virt.libvirt.volume.LibvirtFileVolumeDriver'"): continue new_contents.append(line) self.client.file_write(nova_driver_file, "".join(new_contents)) if os.path.exists(cinder_brick_initiator_file): self._logger.info(' Reverting patched file: {0}'.format( cinder_brick_initiator_file)) self.client.run( """sed -i 's/elif protocol in ["LOCAL", "FILE"]:/elif protocol == "LOCAL":/g' {0}""" .format(cinder_brick_initiator_file)) # 4. Unconfigure messaging driver self._logger.info(' Unconfiguring messaging driver') nova_messaging_driver = 'nova.openstack.common.notifier.rpc_notifier' if self._stack_version == 'juno' else 'messaging' cinder_messaging_driver = 'cinder.openstack.common.notifier.rpc_notifier' if self._stack_version == 'juno' else 'messaging' with remote(ip, [RawConfigParser, open], 'root') as rem: for config_file, driver in { self._NOVA_CONF: nova_messaging_driver, self._CINDER_CONF: cinder_messaging_driver }.iteritems(): cfg = rem.RawConfigParser() cfg.read([config_file]) if cfg.has_option("DEFAULT", "notification_driver"): cfg.remove_option("DEFAULT", "notification_driver") if cfg.has_option("DEFAULT", "notification_topics"): notification_topics = cfg.get( "DEFAULT", "notification_topics").split(",") if "notifications" in notification_topics: notification_topics.remove("notifications") cfg.set("DEFAULT", "notification_topics", ",".join(notification_topics)) if config_file == self._NOVA_CONF: for param, value in { 'notify_on_any_change': 'True', 'notify_on_state_change': 'vm_and_task_state' }.iteritems(): if cfg.has_option("DEFAULT", param): cfg.remove_option("DEFAULT", param) with rem.open(config_file, "w") as fp: cfg.write(fp) # 5. Disable events consumer self._logger.info(' Disabling events consumer') service_name = 'ovs-openstack-events-consumer' if ServiceManager.has_service(service_name, self.client): ServiceManager.stop_service(service_name, self.client) ServiceManager.disable_service(service_name, self.client) ServiceManager.remove_service(service_name, self.client)
def remove_node(node_ip, silent=None): """ Remove the node with specified IP from the cluster :param node_ip: IP of the node to remove :type node_ip: str :param silent: If silent == '--force-yes' no question will be asked to confirm the removal :type silent: str :return: None """ from ovs.lib.storagedriver import StorageDriverController from ovs.lib.storagerouter import StorageRouterController from ovs.dal.lists.storagerouterlist import StorageRouterList Toolbox.log(logger=NodeRemovalController._logger, messages="Remove node", boxed=True) Toolbox.log( logger=NodeRemovalController._logger, messages="WARNING: Some of these steps may take a very long time, please check the logs for more information\n\n", ) ############### # VALIDATIONS # ############### try: node_ip = node_ip.strip() if not isinstance(node_ip, str): raise ValueError("Node IP must be a string") if not re.match(SSHClient.IP_REGEX, node_ip): raise ValueError("Invalid IP {0} specified".format(node_ip)) storage_router_all = StorageRouterList.get_storagerouters() storage_router_masters = StorageRouterList.get_masters() storage_router_all_ips = set([storage_router.ip for storage_router in storage_router_all]) storage_router_master_ips = set([storage_router.ip for storage_router in storage_router_masters]) storage_router_to_remove = StorageRouterList.get_by_ip(node_ip) if node_ip not in storage_router_all_ips: raise ValueError( "Unknown IP specified\nKnown in model:\n - {0}\nSpecified for removal:\n - {1}".format( "\n - ".join(storage_router_all_ips), node_ip ) ) if len(storage_router_all_ips) == 1: raise RuntimeError("Removing the only node is not possible") if node_ip in storage_router_master_ips and len(storage_router_master_ips) == 1: raise RuntimeError("Removing the only master node is not possible") if System.get_my_storagerouter() == storage_router_to_remove: raise RuntimeError( "The node to be removed cannot be identical to the node on which the removal is initiated" ) Toolbox.log( logger=NodeRemovalController._logger, messages="Creating SSH connections to remaining master nodes" ) master_ip = None ip_client_map = {} storage_routers_offline = [] storage_router_to_remove_online = True for storage_router in storage_router_all: try: client = SSHClient(storage_router, username="******") if client.run(["pwd"]): Toolbox.log( logger=NodeRemovalController._logger, messages=" Node with IP {0:<15} successfully connected to".format(storage_router.ip), ) ip_client_map[storage_router.ip] = client if storage_router != storage_router_to_remove and storage_router.node_type == "MASTER": master_ip = storage_router.ip except UnableToConnectException: Toolbox.log( logger=NodeRemovalController._logger, messages=" Node with IP {0:<15} is unreachable".format(storage_router.ip), ) storage_routers_offline.append(storage_router) if storage_router == storage_router_to_remove: storage_router_to_remove_online = False if len(ip_client_map) == 0 or master_ip is None: raise RuntimeError("Could not connect to any master node in the cluster") storage_router_to_remove.invalidate_dynamics("vdisks_guids") if ( len(storage_router_to_remove.vdisks_guids) > 0 ): # vDisks are supposed to be moved away manually before removing a node raise RuntimeError("Still vDisks attached to Storage Router {0}".format(storage_router_to_remove.name)) internal_memcached = Toolbox.is_service_internally_managed(service="memcached") internal_rabbit_mq = Toolbox.is_service_internally_managed(service="rabbitmq") memcached_endpoints = Configuration.get(key="/ovs/framework/memcache|endpoints") rabbit_mq_endpoints = Configuration.get(key="/ovs/framework/messagequeue|endpoints") copy_memcached_endpoints = list(memcached_endpoints) copy_rabbit_mq_endpoints = list(rabbit_mq_endpoints) for endpoint in memcached_endpoints: if endpoint.startswith(storage_router_to_remove.ip): copy_memcached_endpoints.remove(endpoint) for endpoint in rabbit_mq_endpoints: if endpoint.startswith(storage_router_to_remove.ip): copy_rabbit_mq_endpoints.remove(endpoint) if len(copy_memcached_endpoints) == 0 and internal_memcached is True: raise RuntimeError( "Removal of provided nodes will result in a complete removal of the memcached service" ) if len(copy_rabbit_mq_endpoints) == 0 and internal_rabbit_mq is True: raise RuntimeError( "Removal of provided nodes will result in a complete removal of the messagequeue service" ) except Exception as exception: Toolbox.log( logger=NodeRemovalController._logger, messages=[str(exception)], boxed=True, loglevel="exception" ) sys.exit(1) ################# # CONFIRMATIONS # ################# interactive = silent != "--force-yes" remove_asd_manager = not interactive # Remove ASD manager if non-interactive else ask if interactive is True: proceed = Interactive.ask_yesno( message="Are you sure you want to remove node {0}?".format(storage_router_to_remove.name), default_value=False, ) if proceed is False: Toolbox.log(logger=NodeRemovalController._logger, messages="Abort removal", title=True) sys.exit(1) if storage_router_to_remove_online is True: client = SSHClient(endpoint=storage_router_to_remove, username="******") if ServiceManager.has_service(name="asd-manager", client=client): remove_asd_manager = Interactive.ask_yesno( message="Do you also want to remove the ASD manager and related ASDs?", default_value=False ) if remove_asd_manager is True or storage_router_to_remove_online is False: for function in Toolbox.fetch_hooks("setup", "validate_asd_removal"): validation_output = function(storage_router_to_remove.ip) if validation_output["confirm"] is True: if Interactive.ask_yesno(message=validation_output["question"], default_value=False) is False: remove_asd_manager = False break ########### # REMOVAL # ########### try: Toolbox.log( logger=NodeRemovalController._logger, messages="Starting removal of node {0} - {1}".format( storage_router_to_remove.name, storage_router_to_remove.ip ), ) if storage_router_to_remove_online is False: Toolbox.log( logger=NodeRemovalController._logger, messages=" Marking all Storage Drivers served by Storage Router {0} as offline".format( storage_router_to_remove.ip ), ) StorageDriverController.mark_offline(storagerouter_guid=storage_router_to_remove.guid) # Remove vPools Toolbox.log( logger=NodeRemovalController._logger, messages=" Removing vPools from node".format(storage_router_to_remove.ip), ) storage_routers_offline_guids = [ sr.guid for sr in storage_routers_offline if sr.guid != storage_router_to_remove.guid ] for storage_driver in storage_router_to_remove.storagedrivers: Toolbox.log( logger=NodeRemovalController._logger, messages=" Removing vPool {0} from node".format(storage_driver.vpool.name), ) StorageRouterController.remove_storagedriver( storagedriver_guid=storage_driver.guid, offline_storage_router_guids=storage_routers_offline_guids ) # Demote if MASTER if storage_router_to_remove.node_type == "MASTER": NodeTypeController.demote_node( cluster_ip=storage_router_to_remove.ip, master_ip=master_ip, ip_client_map=ip_client_map, unique_id=storage_router_to_remove.machine_id, unconfigure_memcached=internal_memcached, unconfigure_rabbitmq=internal_rabbit_mq, offline_nodes=storage_routers_offline, ) # Stop / remove services Toolbox.log(logger=NodeRemovalController._logger, messages="Stopping and removing services") config_store = Configuration.get_store() if storage_router_to_remove_online is True: client = SSHClient(endpoint=storage_router_to_remove, username="******") NodeRemovalController.remove_services( client=client, node_type=storage_router_to_remove.node_type.lower(), logger=NodeRemovalController._logger, ) service = "watcher-config" if ServiceManager.has_service(service, client=client): Toolbox.log(logger=NodeRemovalController._logger, messages="Removing service {0}".format(service)) ServiceManager.stop_service(service, client=client) ServiceManager.remove_service(service, client=client) if config_store == "etcd": from ovs.extensions.db.etcd.installer import EtcdInstaller if Configuration.get(key="/ovs/framework/external_config") is None: Toolbox.log(logger=NodeRemovalController._logger, messages=" Removing Etcd cluster") try: EtcdInstaller.stop("config", client) EtcdInstaller.remove("config", client) except Exception as ex: Toolbox.log( logger=NodeRemovalController._logger, messages=["\nFailed to unconfigure Etcd", ex], loglevel="exception", ) Toolbox.log(logger=NodeRemovalController._logger, messages="Removing Etcd proxy") EtcdInstaller.remove_proxy("config", client.ip) Toolbox.run_hooks( component="noderemoval", sub_component="remove", logger=NodeRemovalController._logger, cluster_ip=storage_router_to_remove.ip, complete_removal=remove_asd_manager, ) # Clean up model Toolbox.log(logger=NodeRemovalController._logger, messages="Removing node from model") for service in storage_router_to_remove.services: service.delete() for disk in storage_router_to_remove.disks: for partition in disk.partitions: partition.delete() disk.delete() for j_domain in storage_router_to_remove.domains: j_domain.delete() Configuration.delete("/ovs/framework/hosts/{0}".format(storage_router_to_remove.machine_id)) NodeTypeController.restart_framework_and_memcache_services( clients=ip_client_map, offline_node_ips=[node.ip for node in storage_routers_offline], logger=NodeRemovalController._logger, ) if storage_router_to_remove_online is True: client = SSHClient(endpoint=storage_router_to_remove, username="******") if config_store == "arakoon": client.file_delete(filenames=[ArakoonConfiguration.CACC_LOCATION]) client.file_delete(filenames=[Configuration.BOOTSTRAP_CONFIG_LOCATION]) storage_router_to_remove.delete() Toolbox.log(logger=NodeRemovalController._logger, messages="Successfully removed node\n") except Exception as exception: Toolbox.log(logger=NodeRemovalController._logger, messages="\n") Toolbox.log( logger=NodeRemovalController._logger, messages=["An unexpected error occurred:", str(exception)], boxed=True, loglevel="exception", ) sys.exit(1) except KeyboardInterrupt: Toolbox.log(logger=NodeRemovalController._logger, messages="\n") Toolbox.log( logger=NodeRemovalController._logger, messages="This setup was aborted. Open vStorage may be in an inconsistent state, make sure to validate the installation.", boxed=True, loglevel="error", ) sys.exit(1) if remove_asd_manager is True: Toolbox.log(logger=NodeRemovalController._logger, messages="\nRemoving ASD Manager") with remote(storage_router_to_remove.ip, [os]) as rem: rem.os.system("asd-manager remove --force-yes") Toolbox.log(logger=NodeRemovalController._logger, messages="Remove nodes finished", title=True)
def execute_scrub_work(queue, vpool, scrub_info, error_messages): """ Executes scrub work for a given vDisk queue and vPool, based on scrub_info :param queue: a Queue with vDisk guids that need to be scrubbed (they should only be member of a single vPool) :type queue: Queue :param vpool: the vPool object of the vDisks :type vpool: VPool :param scrub_info: A dict containing scrub information: `scrub_path` with the path where to scrub and `storage_router` with the StorageRouter that needs to do the work :type scrub_info: dict :param error_messages: A list of error messages to be filled :type error_messages: list :return: a list of error messages :rtype: list """ def _verify_mds_config(current_vdisk): current_vdisk.invalidate_dynamics('info') vdisk_configs = current_vdisk.info['metadata_backend_config'] if len(vdisk_configs) == 0: raise RuntimeError('Could not load MDS configuration') return vdisk_configs client = None lock_time = 5 * 60 storagerouter = scrub_info['storage_router'] scrub_directory = '{0}/scrub_work_{1}_{2}'.format( scrub_info['scrub_path'], vpool.name, storagerouter.name) scrub_config_key = 'ovs/vpools/{0}/proxies/scrub/scrub_config_{1}'.format( vpool.guid, storagerouter.guid) backend_config_key = 'ovs/vpools/{0}/proxies/scrub/backend_config_{1}'.format( vpool.guid, storagerouter.guid) alba_proxy_service = 'ovs-albaproxy_{0}_{1}_scrub'.format( vpool.name, storagerouter.name) # Deploy a proxy try: with file_mutex(name='ovs_albaproxy_scrub', wait=lock_time): ScheduledTaskController._logger.info( 'Scrubber - vPool {0} - StorageRouter {1} - Deploying ALBA proxy {2}' .format(vpool.name, storagerouter.name, alba_proxy_service)) client = SSHClient(storagerouter, 'root') client.dir_create(scrub_directory) client.dir_chmod( scrub_directory, 0777 ) # Celery task executed by 'ovs' user and should be able to write in it if ServiceManager.has_service( name=alba_proxy_service, client=client ) is True and ServiceManager.get_service_status( name=alba_proxy_service, client=client) is True: ScheduledTaskController._logger.info( 'Scrubber - vPool {0} - StorageRouter {1} - Re-using existing proxy service {2}' .format(vpool.name, storagerouter.name, alba_proxy_service)) scrub_config = Configuration.get(scrub_config_key) else: machine_id = System.get_my_machine_id(client) port_range = Configuration.get( '/ovs/framework/hosts/{0}/ports|storagedriver'.format( machine_id)) port = System.get_free_ports(selected_range=port_range, nr=1, client=client)[0] # Scrub config # {u'albamgr_cfg_url': u'arakoon://config/ovs/vpools/71e2f717-f270-4a41-bbb0-d4c8c084d43e/proxies/64759516-3471-4321-b912-fb424568fc5b/config/abm?ini=%2Fopt%2FOpenvStorage%2Fconfig%2Farakoon_cacc.ini', # u'fragment_cache': [u'none'], # u'ips': [u'127.0.0.1'], # u'log_level': u'info', # u'manifest_cache_size': 17179869184, # u'port': 0, # u'transport': u'tcp'} # Backend config # {u'alba_connection_host': u'10.100.193.155', # u'alba_connection_port': 26204, # u'alba_connection_preset': u'preset', # u'alba_connection_timeout': 15, # u'alba_connection_transport': u'TCP', # u'backend_interface_retries_on_error': 5, # u'backend_interface_retry_backoff_multiplier': 2.0, # u'backend_interface_retry_interval_secs': 1, # u'backend_type': u'ALBA'} scrub_config = Configuration.get( 'ovs/vpools/{0}/proxies/scrub/generic_scrub'.format( vpool.guid)) scrub_config['port'] = port scrub_config['transport'] = 'tcp' Configuration.set(scrub_config_key, json.dumps(scrub_config, indent=4), raw=True) params = { 'VPOOL_NAME': vpool.name, 'LOG_SINK': LogHandler.get_sink_path('alba_proxy'), 'CONFIG_PATH': Configuration.get_configuration_path(scrub_config_key) } ServiceManager.add_service(name='ovs-albaproxy', params=params, client=client, target_name=alba_proxy_service) ServiceManager.start_service(name=alba_proxy_service, client=client) ScheduledTaskController._logger.info( 'Scrubber - vPool {0} - StorageRouter {1} - Deployed ALBA proxy {2}' .format(vpool.name, storagerouter.name, alba_proxy_service)) backend_config = Configuration.get( 'ovs/vpools/{0}/hosts/{1}/config'.format( vpool.guid, vpool.storagedrivers[0].storagedriver_id ))['backend_connection_manager'] backend_config['alba_connection_host'] = '127.0.0.1' backend_config['alba_connection_port'] = scrub_config['port'] Configuration.set( backend_config_key, json.dumps({"backend_connection_manager": backend_config}, indent=4), raw=True) except Exception: message = 'Scrubber - vPool {0} - StorageRouter {1} - An error occurred deploying ALBA proxy {2}'.format( vpool.name, storagerouter.name, alba_proxy_service) error_messages.append(message) ScheduledTaskController._logger.exception(message) if client is not None and ServiceManager.has_service( name=alba_proxy_service, client=client) is True: if ServiceManager.get_service_status(name=alba_proxy_service, client=client) is True: ServiceManager.stop_service(name=alba_proxy_service, client=client) ServiceManager.remove_service(name=alba_proxy_service, client=client) if Configuration.exists(scrub_config_key): Configuration.delete(scrub_config_key) try: # Empty the queue with vDisks to scrub with remote(storagerouter.ip, [VDisk]) as rem: while True: vdisk = None vdisk_guid = queue.get(False) try: # Check MDS master is local. Trigger MDS handover if necessary vdisk = rem.VDisk(vdisk_guid) ScheduledTaskController._logger.info( 'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - Started scrubbing at location {3}' .format(vpool.name, storagerouter.name, vdisk.name, scrub_directory)) configs = _verify_mds_config(current_vdisk=vdisk) storagedriver = StorageDriverList.get_by_storagedriver_id( vdisk.storagedriver_id) if configs[0].get( 'ip') != storagedriver.storagerouter.ip: ScheduledTaskController._logger.info( 'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - MDS master is not local, trigger handover' .format(vpool.name, storagerouter.name, vdisk.name)) MDSServiceController.ensure_safety( VDisk(vdisk_guid) ) # Do not use a remote VDisk instance here configs = _verify_mds_config(current_vdisk=vdisk) if configs[0].get( 'ip') != storagedriver.storagerouter.ip: ScheduledTaskController._logger.warning( 'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - Skipping because master MDS still not local' .format(vpool.name, storagerouter.name, vdisk.name)) continue # Do the actual scrubbing with vdisk.storagedriver_client.make_locked_client( str(vdisk.volume_id)) as locked_client: ScheduledTaskController._logger.info( 'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - Retrieve and apply scrub work' .format(vpool.name, storagerouter.name, vdisk.name)) work_units = locked_client.get_scrubbing_workunits( ) for work_unit in work_units: res = locked_client.scrub( work_unit=work_unit, scratch_dir=scrub_directory, log_sinks=[ LogHandler.get_sink_path( 'scrubber', allow_override=True) ], backend_config=Configuration. get_configuration_path(backend_config_key)) locked_client.apply_scrubbing_result( scrubbing_work_result=res) if work_units: ScheduledTaskController._logger.info( 'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - {3} work units successfully applied' .format(vpool.name, storagerouter.name, vdisk.name, len(work_units))) else: ScheduledTaskController._logger.info( 'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - No scrubbing required' .format(vpool.name, storagerouter.name, vdisk.name)) except Exception: if vdisk is None: message = 'Scrubber - vPool {0} - StorageRouter {1} - vDisk with guid {2} could not be found'.format( vpool.name, storagerouter.name, vdisk_guid) else: message = 'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - Scrubbing failed'.format( vpool.name, storagerouter.name, vdisk.name) error_messages.append(message) ScheduledTaskController._logger.exception(message) except Empty: # Raised when all items have been fetched from the queue ScheduledTaskController._logger.info( 'Scrubber - vPool {0} - StorageRouter {1} - Queue completely processed' .format(vpool.name, storagerouter.name)) except Exception: message = 'Scrubber - vPool {0} - StorageRouter {1} - Scrubbing failed'.format( vpool.name, storagerouter.name) error_messages.append(message) ScheduledTaskController._logger.exception(message) # Delete the proxy again try: with file_mutex(name='ovs_albaproxy_scrub', wait=lock_time): ScheduledTaskController._logger.info( 'Scrubber - vPool {0} - StorageRouter {1} - Removing service {2}' .format(vpool.name, storagerouter.name, alba_proxy_service)) client = SSHClient(storagerouter, 'root') client.dir_delete(scrub_directory) if ServiceManager.has_service(alba_proxy_service, client=client): ServiceManager.stop_service(alba_proxy_service, client=client) ServiceManager.remove_service(alba_proxy_service, client=client) if Configuration.exists(scrub_config_key): Configuration.delete(scrub_config_key) ScheduledTaskController._logger.info( 'Scrubber - vPool {0} - StorageRouter {1} - Removed service {2}' .format(vpool.name, storagerouter.name, alba_proxy_service)) except Exception: message = 'Scrubber - vPool {0} - StorageRouter {1} - Removing service {2} failed'.format( vpool.name, storagerouter.name, alba_proxy_service) error_messages.append(message) ScheduledTaskController._logger.exception(message)