def on_demote(cluster_ip, master_ip, offline_node_ips=None):
     """
     Handles the demote for the StorageDrivers
     :param cluster_ip: IP of the node to demote
     :param master_ip: IP of the master node
     :param offline_node_ips: IPs of nodes which are offline
     """
     if offline_node_ips is None:
         offline_node_ips = []
     client = SSHClient(cluster_ip, username='******') if cluster_ip not in offline_node_ips else None
     servicetype = ServiceTypeList.get_by_name('Arakoon')
     current_service = None
     remaining_ips = []
     for service in servicetype.services:
         if service.name == 'arakoon-voldrv':
             if service.storagerouter.ip == cluster_ip:
                 current_service = service
             elif service.storagerouter.ip not in offline_node_ips:
                 remaining_ips.append(service.storagerouter.ip)
     if current_service is not None:
         print '* Shrink StorageDriver cluster'
         ArakoonInstaller.shrink_cluster(master_ip, cluster_ip, 'voldrv', offline_node_ips)
         if client is not None and ServiceManager.has_service(current_service.name, client=client) is True:
             ServiceManager.stop_service(current_service.name, client=client)
             ServiceManager.remove_service(current_service.name, client=client)
         ArakoonInstaller.restart_cluster_remove('voldrv', remaining_ips)
         current_service.delete()
         StorageDriverController._configure_arakoon_to_volumedriver(offline_node_ips)
 def on_demote(cluster_ip, master_ip):
     """
     Handles the demote for the StorageDrivers
     :param cluster_ip: IP of the node to demote
     :param master_ip: IP of the master node
     """
     client = SSHClient(cluster_ip, username='******')
     servicetype = ServiceTypeList.get_by_name('Arakoon')
     current_service = None
     remaining_ips = []
     for service in servicetype.services:
         if service.name == 'arakoon-voldrv':
             if service.storagerouter.ip == cluster_ip:
                 current_service = service
             else:
                 remaining_ips.append(service.storagerouter.ip)
     if current_service is not None:
         print '* Shrink StorageDriver cluster'
         ArakoonInstaller.shrink_cluster(master_ip, cluster_ip, 'voldrv')
         if ServiceManager.has_service(current_service.name, client=client) is True:
             ServiceManager.stop_service(current_service.name, client=client)
             ServiceManager.remove_service(current_service.name, client=client)
         ArakoonInstaller.restart_cluster_remove('voldrv', remaining_ips)
         current_service.delete()
         for storagerouter in StorageRouterList.get_storagerouters():
             ArakoonInstaller.deploy_to_slave(master_ip, storagerouter.ip, 'voldrv')
         StorageDriverController._configure_arakoon_to_volumedriver()
 def on_demote(cluster_ip, master_ip, offline_node_ips=None):
     """
     Handles the demote for the StorageDrivers
     :param cluster_ip: IP of the node to demote
     :param master_ip: IP of the master node
     :param offline_node_ips: IPs of nodes which are offline
     """
     _ = master_ip
     if offline_node_ips is None:
         offline_node_ips = []
     client = SSHClient(
         cluster_ip,
         username='******') if cluster_ip not in offline_node_ips else None
     servicetype = ServiceTypeList.get_by_name('Arakoon')
     current_service = None
     remaining_ips = []
     for service in servicetype.services:
         if service.name == 'arakoon-voldrv':
             if service.storagerouter.ip == cluster_ip:
                 current_service = service
             elif service.storagerouter.ip not in offline_node_ips:
                 remaining_ips.append(service.storagerouter.ip)
     if current_service is not None:
         print '* Shrink StorageDriver cluster'
         ArakoonInstaller.shrink_cluster(cluster_ip, 'voldrv',
                                         offline_node_ips)
         if client is not None and ServiceManager.has_service(
                 current_service.name, client=client) is True:
             ServiceManager.stop_service(current_service.name,
                                         client=client)
             ServiceManager.remove_service(current_service.name,
                                           client=client)
         ArakoonInstaller.restart_cluster_remove('voldrv', remaining_ips)
         current_service.delete()
         StorageDriverController._configure_arakoon_to_volumedriver()
    def remove_services(client, node_type, logger):
        """
        Remove all services managed by OVS
        :param client: Client on which to remove the services
        :type client: ovs.extensions.generic.sshclient.SSHClient
        :param node_type: Type of node, can be 'master' or 'extra'
        :type node_type: str
        :param logger: Logger object used for logging
        :type logger: ovs.log.log_handler.LogHandler
        :return: None
        """
        Toolbox.log(logger=logger, messages="Removing services")
        stop_only = ["rabbitmq-server", "memcached"]
        services = ["workers", "support-agent", "watcher-framework"]
        if node_type == "master":
            services += ["scheduled-tasks", "webapp-api", "volumerouter-consumer"]
            if Toolbox.is_service_internally_managed(service="rabbitmq") is True:
                services.append("rabbitmq-server")
            if Toolbox.is_service_internally_managed(service="memcached") is True:
                services.append("memcached")

        for service in services:
            if ServiceManager.has_service(service, client=client):
                Toolbox.log(
                    logger=logger,
                    messages="{0} service {1}".format("Removing" if service not in stop_only else "Stopping", service),
                )
                ServiceManager.stop_service(service, client=client)
                if service not in stop_only:
                    ServiceManager.remove_service(service, client=client)
Exemple #5
0
 def remove(cluster_name, client):
     """
     Removes an etcd service
     :param client: Client on which to remove the service
     :param cluster_name: The name of the cluster service to remove
     """
     if ServiceManager.has_service('etcd-{0}'.format(cluster_name), client=client) is True:
         ServiceManager.remove_service('etcd-{0}'.format(cluster_name), client=client)
 def remove(cluster_name, client):
     """
     Removes an arakoon service
     """
     if ServiceManager.has_service('arakoon-{0}'.format(cluster_name),
                                   client=client) is True:
         ServiceManager.remove_service('arakoon-{0}'.format(cluster_name),
                                       client=client)
Exemple #7
0
 def remove(cluster_name, client):
     """
     Removes an etcd service
     :param client: Client on which to remove the service
     :param cluster_name: The name of the cluster service to remove
     """
     if ServiceManager.has_service('etcd-{0}'.format(cluster_name),
                                   client=client) is True:
         ServiceManager.remove_service('etcd-{0}'.format(cluster_name),
                                       client=client)
Exemple #8
0
    def remove(cluster_name, client):
        """
        Removes an arakoon service
        :param cluster_name: The name of the cluster service to remove
        :type cluster_name: str

        :param client: Client on which to remove the service
        :type client: SSHClient

        :return: None
        """
        if ServiceManager.has_service('arakoon-{0}'.format(cluster_name), client=client) is True:
            ServiceManager.remove_service('arakoon-{0}'.format(cluster_name), client=client)
    def remove(cluster_name, client):
        """
        Removes an arakoon service
        :param cluster_name: The name of the cluster service to remove
        :type cluster_name: str

        :param client: Client on which to remove the service
        :type client: SSHClient

        :return: None
        """
        if ServiceManager.has_service('arakoon-{0}'.format(cluster_name), client=client) is True:
            ServiceManager.remove_service('arakoon-{0}'.format(cluster_name), client=client)
 def remove(cluster_name, client, delay_unregistration=False):
     """
     Removes an arakoon service
     :param cluster_name: The name of the cluster service to remove
     :type cluster_name: str
     :param client: Client on which to remove the service
     :type client: SSHClient
     :param delay_unregistration: Un-register the service right away or not
     :type delay_unregistration: bool
     :return: None
     """
     service_name = ArakoonInstaller.get_service_name_for_cluster(cluster_name=cluster_name)
     if ServiceManager.has_service(name=service_name, client=client) is True:
         ServiceManager.remove_service(name=service_name, client=client, delay_unregistration=delay_unregistration)
    def on_demote(cluster_ip, master_ip, offline_node_ips=None):
        """
        Handles the demote for the StorageDrivers
        :param cluster_ip: IP of the node to demote
        :type cluster_ip: str

        :param master_ip: IP of the master node
        :type master_ip: str

        :param offline_node_ips: IPs of nodes which are offline
        :type offline_node_ips: list

        :return: None
        """
        _ = master_ip
        if offline_node_ips is None:
            offline_node_ips = []
        client = SSHClient(
            cluster_ip,
            username='******') if cluster_ip not in offline_node_ips else None
        servicetype = ServiceTypeList.get_by_name(
            ServiceType.SERVICE_TYPES.ARAKOON)
        current_service = None
        remaining_ips = []
        for service in servicetype.services:
            if service.name == 'arakoon-voldrv' and service.is_internal is True:  # Externally managed arakoon cluster service does not have storage router
                if service.storagerouter.ip == cluster_ip:
                    current_service = service
                elif service.storagerouter.ip not in offline_node_ips:
                    remaining_ips.append(service.storagerouter.ip)
        if current_service is not None:
            StorageDriverController._logger.debug(
                '* Shrink StorageDriver cluster')
            cluster_name = str(
                EtcdConfiguration.get(
                    '/ovs/framework/arakoon_clusters|voldrv'))
            ArakoonInstaller.shrink_cluster(deleted_node_ip=cluster_ip,
                                            cluster_name=cluster_name,
                                            offline_nodes=offline_node_ips)
            if client is not None and ServiceManager.has_service(
                    current_service.name, client=client) is True:
                ServiceManager.stop_service(current_service.name,
                                            client=client)
                ServiceManager.remove_service(current_service.name,
                                              client=client)
            ArakoonInstaller.restart_cluster_remove(cluster_name,
                                                    remaining_ips)
            current_service.delete()
            StorageDriverController._configure_arakoon_to_volumedriver(
                cluster_name=cluster_name)
Exemple #12
0
 def on_remove(cluster_ip, complete_removal):
     """
     Handles the StorageDriver removal part of a node
     :param cluster_ip: IP of the node which is being removed from the cluster
     :type cluster_ip: str
     :param complete_removal: Unused for StorageDriver, used for AlbaController
     :type complete_removal: bool
     :return: None
     """
     _ = complete_removal
     service_name = 'watcher-volumedriver'
     try:
         client = SSHClient(endpoint=cluster_ip, username='******')
         if ServiceManager.has_service(name=service_name, client=client):
             ServiceManager.stop_service(name=service_name, client=client)
             ServiceManager.remove_service(name=service_name, client=client)
     except UnableToConnectException:
         pass
Exemple #13
0
 def on_demote(cluster_ip, master_ip, offline_node_ips=None):
     """
     Handles the demote for the StorageDrivers
     :param cluster_ip: IP of the node to demote
     :type cluster_ip: str
     :param master_ip: IP of the master node
     :type master_ip: str
     :param offline_node_ips: IPs of nodes which are offline
     :type offline_node_ips: list
     :return: None
     """
     _ = master_ip
     if offline_node_ips is None:
         offline_node_ips = []
     client = SSHClient(cluster_ip, username='******') if cluster_ip not in offline_node_ips else None
     servicetype = ServiceTypeList.get_by_name(ServiceType.SERVICE_TYPES.ARAKOON)
     current_service = None
     remaining_ips = []
     for service in servicetype.services:
         if service.name == 'arakoon-voldrv' and service.is_internal is True:  # Externally managed arakoon cluster service does not have storage router
             if service.storagerouter.ip == cluster_ip:
                 current_service = service
             elif service.storagerouter.ip not in offline_node_ips:
                 remaining_ips.append(service.storagerouter.ip)
     if current_service is not None:
         if len(remaining_ips) == 0:
             raise RuntimeError('Could not find any remaining arakoon nodes for the voldrv cluster')
         StorageDriverController._logger.debug('* Shrink StorageDriver cluster')
         cluster_name = str(Configuration.get('/ovs/framework/arakoon_clusters|voldrv'))
         ArakoonInstaller.shrink_cluster(deleted_node_ip=cluster_ip,
                                         remaining_node_ip=remaining_ips[0],
                                         cluster_name=cluster_name,
                                         offline_nodes=offline_node_ips)
         if client is not None and ServiceManager.has_service(current_service.name, client=client) is True:
             ServiceManager.stop_service(current_service.name, client=client)
             ServiceManager.remove_service(current_service.name, client=client)
         ArakoonInstaller.restart_cluster_remove(cluster_name, remaining_ips, filesystem=False)
         current_service.delete()
         StorageDriverController._configure_arakoon_to_volumedriver(cluster_name=cluster_name)
    def unconfigure_host(self, ip):
        if self._is_devstack is False and self._is_openstack is False or self._cinder_installed is False or self._nova_installed is False:
            self._logger.warning('Unconfigure host: No OpenStack nor DevStack installation detected or Cinder and Nova plugins are not installed')
            return

        # 1. Remove driver code
        self._logger.info('*** Unconfiguring host with IP {0} ***'.format(ip))
        self._logger.info(' Removing driver code')
        if self._is_devstack is True:
            self.client.file_delete(self._devstack_driver)
        else:
            self.client.file_delete('{0}/cinder/volume/drivers/openvstorage.py'.format(self._driver_location))

        # 2. Removing users from group
        self._logger.info('  Removing users from group ovs')
        for user in ['libvirt-qemu', 'stack'] if self._is_devstack is True else self._openstack_users:
            self.client.run('deluser {0} ovs'.format(user))

        # 3. Revert patches
        self._logger.info('  Reverting patches')
        nova_base_path = self._get_base_path('nova')
        cinder_base_path = self._get_base_path('cinder')
        if self._is_devstack is True:
            nova_volume_file = '{0}/virt/libvirt/volume.py'.format(nova_base_path)
            nova_driver_file = '{0}/virt/libvirt/driver.py'.format(nova_base_path)
            cinder_brick_initiator_file = '{0}/brick/initiator/connector.py'.format(cinder_base_path)
        else:
            nova_volume_file = '{0}/nova/virt/libvirt/volume.py'.format(self._driver_location)
            nova_driver_file = '{0}/nova/virt/libvirt/driver.py'.format(self._driver_location)
            cinder_brick_initiator_file = '{0}/cinder/brick/initiator/connector.py'.format(self._driver_location)

        self._logger.info('    Reverting patched file: {0}'.format(nova_volume_file))
        new_contents = []

        skip_class = False
        for line in self.client.file_read(nova_volume_file).splitlines():
            if line.startswith('class LibvirtFileVolumeDriver(LibvirtBaseVolumeDriver):'):
                skip_class = True
                continue
            if line.startswith('class'):
                skip_class = False
            if skip_class is False:
                new_contents.append(line)
        self.client.file_write(nova_volume_file, "".join(new_contents))

        self._logger.info('    Reverting patched file: {0}'.format(nova_driver_file))
        new_contents = []

        for line in self.client.file_read(nova_driver_file).splitlines():
            stripped_line = line.strip()
            if stripped_line.startswith("'file=nova.virt.libvirt.volume.LibvirtFileVolumeDriver'"):
                continue
            new_contents.append(line)
        self.client.file_write(nova_driver_file, "".join(new_contents))

        if os.path.exists(cinder_brick_initiator_file):
            self._logger.info('    Reverting patched file: {0}'.format(cinder_brick_initiator_file))
            self.client.run("""sed -i 's/elif protocol in ["LOCAL", "FILE"]:/elif protocol == "LOCAL":/g' {0}""".format(cinder_brick_initiator_file))

        # 4. Unconfigure messaging driver
        self._logger.info('  Unconfiguring messaging driver')
        nova_messaging_driver = 'nova.openstack.common.notifier.rpc_notifier' if self._stack_version == 'juno' else 'messaging'
        cinder_messaging_driver = 'cinder.openstack.common.notifier.rpc_notifier' if self._stack_version == 'juno' else 'messaging'

        with remote(ip, [RawConfigParser, open], 'root') as rem:
            for config_file, driver in {self._NOVA_CONF: nova_messaging_driver,
                                        self._CINDER_CONF: cinder_messaging_driver}.iteritems():
                cfg = rem.RawConfigParser()
                cfg.read([config_file])
                if cfg.has_option("DEFAULT", "notification_driver"):
                    cfg.remove_option("DEFAULT", "notification_driver")
                if cfg.has_option("DEFAULT", "notification_topics"):
                    notification_topics = cfg.get("DEFAULT", "notification_topics").split(",")
                    if "notifications" in notification_topics:
                        notification_topics.remove("notifications")
                        cfg.set("DEFAULT", "notification_topics", ",".join(notification_topics))

                if config_file == self._NOVA_CONF:
                    for param, value in {'notify_on_any_change': 'True',
                                         'notify_on_state_change': 'vm_and_task_state'}.iteritems():
                        if cfg.has_option("DEFAULT", param):
                            cfg.remove_option("DEFAULT", param)

                with rem.open(config_file, "w") as fp:
                    cfg.write(fp)

        # 5. Disable events consumer
        self._logger.info('  Disabling events consumer')
        service_name = 'ovs-openstack-events-consumer'
        if ServiceManager.has_service(service_name, self.client):
            ServiceManager.stop_service(service_name, self.client)
            ServiceManager.disable_service(service_name, self.client)
            ServiceManager.remove_service(service_name, self.client)
Exemple #15
0
    def demote_node(cluster_ip, master_ip, ip_client_map, unique_id, unconfigure_memcached, unconfigure_rabbitmq, offline_nodes=None):
        """
        Demotes a given node
        """
        from ovs.dal.lists.storagerouterlist import StorageRouterList

        Toolbox.log(logger=NodeTypeController._logger, messages='Demoting node', title=True)
        if offline_nodes is None:
            offline_nodes = []

        if unconfigure_memcached is True and len(offline_nodes) == 0:
            if NodeTypeController._validate_local_memcache_servers(ip_client_map) is False:
                raise RuntimeError('Not all memcache nodes can be reached which is required for demoting a node.')

        # Find other (arakoon) master nodes
        arakoon_cluster_name = str(Configuration.get('/ovs/framework/arakoon_clusters|ovsdb'))
        arakoon_metadata = ArakoonInstaller.get_arakoon_metadata_by_cluster_name(cluster_name=arakoon_cluster_name)
        config = ArakoonClusterConfig(cluster_id=arakoon_cluster_name, filesystem=False)
        config.load_config()
        master_node_ips = [node.ip for node in config.nodes]
        if cluster_ip in master_node_ips:
            master_node_ips.remove(cluster_ip)
        if len(master_node_ips) == 0:
            raise RuntimeError('There should be at least one other master node')

        storagerouter = StorageRouterList.get_by_machine_id(unique_id)
        storagerouter.node_type = 'EXTRA'
        storagerouter.save()

        offline_node_ips = [node.ip for node in offline_nodes]
        if arakoon_metadata['internal'] is True:
            Toolbox.log(logger=NodeTypeController._logger, messages='Leaving Arakoon {0} cluster'.format(arakoon_cluster_name))
            ArakoonInstaller.shrink_cluster(deleted_node_ip=cluster_ip,
                                            remaining_node_ips=master_node_ips,
                                            cluster_name=arakoon_cluster_name,
                                            offline_nodes=offline_node_ips)

        try:
            external_config = Configuration.get('/ovs/framework/external_config')
            if external_config is None:
                config_store = Configuration.get_store()
                if config_store == 'arakoon':
                    Toolbox.log(logger=NodeTypeController._logger, messages='Leaving Arakoon config cluster')
                    ArakoonInstaller.shrink_cluster(deleted_node_ip=cluster_ip,
                                                    remaining_node_ips=master_node_ips,
                                                    cluster_name='config',
                                                    offline_nodes=offline_node_ips,
                                                    filesystem=True)
                else:
                    from ovs.extensions.db.etcd.installer import EtcdInstaller
                    Toolbox.log(logger=NodeTypeController._logger, messages='Leaving Etcd cluster')
                    EtcdInstaller.shrink_cluster(master_ip, cluster_ip, 'config', offline_node_ips)
        except Exception as ex:
            Toolbox.log(logger=NodeTypeController._logger, messages=['\nFailed to leave configuration cluster', ex], loglevel='exception')

        Toolbox.log(logger=NodeTypeController._logger, messages='Update configurations')
        try:
            if unconfigure_memcached is True:
                endpoints = Configuration.get('/ovs/framework/memcache|endpoints')
                endpoint = '{0}:{1}'.format(cluster_ip, 11211)
                if endpoint in endpoints:
                    endpoints.remove(endpoint)
                Configuration.set('/ovs/framework/memcache|endpoints', endpoints)
            if unconfigure_rabbitmq is True:
                endpoints = Configuration.get('/ovs/framework/messagequeue|endpoints')
                endpoint = '{0}:{1}'.format(cluster_ip, 5672)
                if endpoint in endpoints:
                    endpoints.remove(endpoint)
                Configuration.set('/ovs/framework/messagequeue|endpoints', endpoints)
        except Exception as ex:
            Toolbox.log(logger=NodeTypeController._logger, messages=['\nFailed to update configurations', ex], loglevel='exception')

        if arakoon_metadata['internal'] is True:
            Toolbox.log(logger=NodeTypeController._logger, messages='Restarting master node services')
            remaining_nodes = ip_client_map.keys()[:]
            if cluster_ip in remaining_nodes:
                remaining_nodes.remove(cluster_ip)

            PersistentFactory.store = None
            VolatileFactory.store = None

            for service in storagerouter.services:
                if service.name == 'arakoon-ovsdb':
                    service.delete()

        target_client = None
        if storagerouter in offline_nodes:
            if unconfigure_rabbitmq is True:
                Toolbox.log(logger=NodeTypeController._logger, messages='Removing/unconfiguring offline RabbitMQ node')
                client = ip_client_map[master_ip]
                try:
                    client.run(['rabbitmqctl', 'forget_cluster_node', 'rabbit@{0}'.format(storagerouter.name)])
                except Exception as ex:
                    Toolbox.log(logger=NodeTypeController._logger, messages=['\nFailed to forget RabbitMQ cluster node', ex], loglevel='exception')
        else:
            target_client = ip_client_map[cluster_ip]
            if unconfigure_rabbitmq is True:
                Toolbox.log(logger=NodeTypeController._logger, messages='Removing/unconfiguring RabbitMQ')
                try:
                    if ServiceManager.has_service('rabbitmq-server', client=target_client):
                        Toolbox.change_service_state(target_client, 'rabbitmq-server', 'stop', NodeTypeController._logger)
                        target_client.run(['rabbitmq-server', '-detached'])
                        time.sleep(5)
                        target_client.run(['rabbitmqctl', 'stop_app'])
                        time.sleep(5)
                        target_client.run(['rabbitmqctl', 'reset'])
                        time.sleep(5)
                        target_client.run(['rabbitmqctl', 'stop'])
                        time.sleep(5)
                        target_client.file_unlink("/var/lib/rabbitmq/.erlang.cookie")
                        Toolbox.change_service_state(target_client, 'rabbitmq-server', 'stop', NodeTypeController._logger)  # To be sure
                except Exception as ex:
                    Toolbox.log(logger=NodeTypeController._logger, messages=['\nFailed to remove/unconfigure RabbitMQ', ex], loglevel='exception')

            Toolbox.log(logger=NodeTypeController._logger, messages='Stopping services')
            services = ['memcached', 'rabbitmq-server']
            if unconfigure_rabbitmq is False:
                services.remove('rabbitmq-server')
            if unconfigure_memcached is False:
                services.remove('memcached')
            for service in services:
                if ServiceManager.has_service(service, client=target_client):
                    Toolbox.log(logger=NodeTypeController._logger, messages='Stopping service {0}'.format(service))
                    try:
                        Toolbox.change_service_state(target_client, service, 'stop', NodeTypeController._logger)
                    except Exception as ex:
                        Toolbox.log(logger=NodeTypeController._logger, messages=['\nFailed to stop service'.format(service), ex], loglevel='exception')

            Toolbox.log(logger=NodeTypeController._logger, messages='Removing services')
            services = ['scheduled-tasks', 'webapp-api', 'volumerouter-consumer']
            for service in services:
                if ServiceManager.has_service(service, client=target_client):
                    Toolbox.log(logger=NodeTypeController._logger, messages='Removing service {0}'.format(service))
                    try:
                        Toolbox.change_service_state(target_client, service, 'stop', NodeTypeController._logger)
                        ServiceManager.remove_service(service, client=target_client)
                    except Exception as ex:
                        Toolbox.log(logger=NodeTypeController._logger, messages=['\nFailed to remove service'.format(service), ex], loglevel='exception')

            if ServiceManager.has_service('workers', client=target_client):
                ServiceManager.add_service(name='workers',
                                           client=target_client,
                                           params={'WORKER_QUEUE': '{0}'.format(unique_id)})
        try:
            NodeTypeController._configure_amqp_to_volumedriver()
        except Exception as ex:
            Toolbox.log(logger=NodeTypeController._logger, messages=['\nFailed to configure AMQP to Storage Driver', ex], loglevel='exception')

        Toolbox.log(logger=NodeTypeController._logger, messages='Restarting services')
        NodeTypeController.restart_framework_and_memcache_services(clients=ip_client_map, logger=NodeTypeController._logger, offline_node_ips=offline_node_ips)

        if Toolbox.run_hooks(component='nodetype',
                             sub_component='demote',
                             logger=NodeTypeController._logger,
                             cluster_ip=cluster_ip,
                             master_ip=master_ip,
                             offline_node_ips=offline_node_ips):
            Toolbox.log(logger=NodeTypeController._logger, messages='Restarting services')
            NodeTypeController.restart_framework_and_memcache_services(clients=ip_client_map, logger=NodeTypeController._logger, offline_node_ips=offline_node_ips)

        if storagerouter not in offline_nodes:
            target_client = ip_client_map[cluster_ip]
            node_name, _ = target_client.get_hostname()
            if NodeTypeController.avahi_installed(client=target_client, logger=NodeTypeController._logger) is True:
                NodeTypeController.configure_avahi(client=target_client, node_name=node_name, node_type='extra', logger=NodeTypeController._logger)
        Configuration.set('/ovs/framework/hosts/{0}/type'.format(storagerouter.machine_id), 'EXTRA')

        if target_client is not None and target_client.file_exists('/tmp/ovs_rollback'):
            target_client.file_write('/tmp/ovs_rollback', 'rollback')

        Toolbox.log(logger=NodeTypeController._logger, messages='Demote complete', title=True)
Exemple #16
0
    def execute_scrub_work(queue, vpool, scrub_info, error_messages):
        """
        Executes scrub work for a given vDisk queue and vPool, based on scrub_info
        :param queue: a Queue with vDisk guids that need to be scrubbed (they should only be member of a single vPool)
        :type queue: Queue
        :param vpool: the vPool object of the vDisks
        :type vpool: VPool
        :param scrub_info: A dict containing scrub information: `scrub_path` with the path where to scrub and `storage_router` with the StorageRouter
                           that needs to do the work
        :type scrub_info: dict
        :param error_messages: A list of error messages to be filled
        :type error_messages: list
        :return: a list of error messages
        :rtype: list
        """

        def _verify_mds_config(current_vdisk):
            current_vdisk.invalidate_dynamics('info')
            vdisk_configs = current_vdisk.info['metadata_backend_config']
            if len(vdisk_configs) == 0:
                raise RuntimeError('Could not load MDS configuration')
            return vdisk_configs

        client = None
        lock_time = 5 * 60
        storagerouter = scrub_info['storage_router']
        scrub_directory = '{0}/scrub_work_{1}_{2}'.format(scrub_info['scrub_path'], vpool.name, storagerouter.name)
        scrub_config_key = 'ovs/vpools/{0}/proxies/scrub/scrub_config_{1}'.format(vpool.guid, storagerouter.guid)
        backend_config_key = 'ovs/vpools/{0}/proxies/scrub/backend_config_{1}'.format(vpool.guid, storagerouter.guid)
        alba_proxy_service = 'ovs-albaproxy_{0}_{1}_scrub'.format(vpool.name, storagerouter.name)

        # Deploy a proxy
        try:
            with file_mutex(name='ovs_albaproxy_scrub', wait=lock_time):
                ScheduledTaskController._logger.info('Scrubber - vPool {0} - StorageRouter {1} - Deploying ALBA proxy {2}'.format(vpool.name, storagerouter.name, alba_proxy_service))
                client = SSHClient(storagerouter, 'root')
                client.dir_create(scrub_directory)
                client.dir_chmod(scrub_directory, 0777)  # Celery task executed by 'ovs' user and should be able to write in it
                if ServiceManager.has_service(name=alba_proxy_service, client=client) is True and ServiceManager.get_service_status(name=alba_proxy_service, client=client) is True:
                    ScheduledTaskController._logger.info('Scrubber - vPool {0} - StorageRouter {1} - Re-using existing proxy service {2}'.format(vpool.name, storagerouter.name, alba_proxy_service))
                    scrub_config = Configuration.get(scrub_config_key)
                else:
                    machine_id = System.get_my_machine_id(client)
                    port_range = Configuration.get('/ovs/framework/hosts/{0}/ports|storagedriver'.format(machine_id))
                    port = System.get_free_ports(selected_range=port_range, nr=1, client=client)[0]
                    # Scrub config
                    # {u'albamgr_cfg_url': u'arakoon://config/ovs/vpools/71e2f717-f270-4a41-bbb0-d4c8c084d43e/proxies/64759516-3471-4321-b912-fb424568fc5b/config/abm?ini=%2Fopt%2FOpenvStorage%2Fconfig%2Farakoon_cacc.ini',
                    #  u'fragment_cache': [u'none'],
                    #  u'ips': [u'127.0.0.1'],
                    #  u'log_level': u'info',
                    #  u'manifest_cache_size': 17179869184,
                    #  u'port': 0,
                    #  u'transport': u'tcp'}

                    # Backend config
                    # {u'alba_connection_host': u'10.100.193.155',
                    #  u'alba_connection_port': 26204,
                    #  u'alba_connection_preset': u'preset',
                    #  u'alba_connection_timeout': 15,
                    #  u'alba_connection_transport': u'TCP',
                    #  u'backend_interface_retries_on_error': 5,
                    #  u'backend_interface_retry_backoff_multiplier': 2.0,
                    #  u'backend_interface_retry_interval_secs': 1,
                    #  u'backend_type': u'ALBA'}
                    scrub_config = Configuration.get('ovs/vpools/{0}/proxies/scrub/generic_scrub'.format(vpool.guid))
                    scrub_config['port'] = port
                    scrub_config['transport'] = 'tcp'
                    Configuration.set(scrub_config_key, json.dumps(scrub_config, indent=4), raw=True)

                    params = {'VPOOL_NAME': vpool.name,
                              'LOG_SINK': LogHandler.get_sink_path('alba_proxy'),
                              'CONFIG_PATH': Configuration.get_configuration_path(scrub_config_key)}
                    ServiceManager.add_service(name='ovs-albaproxy', params=params, client=client, target_name=alba_proxy_service)
                    ServiceManager.start_service(name=alba_proxy_service, client=client)
                    ScheduledTaskController._logger.info('Scrubber - vPool {0} - StorageRouter {1} - Deployed ALBA proxy {2}'.format(vpool.name, storagerouter.name, alba_proxy_service))

                backend_config = Configuration.get('ovs/vpools/{0}/hosts/{1}/config'.format(vpool.guid, vpool.storagedrivers[0].storagedriver_id))['backend_connection_manager']
                backend_config['alba_connection_host'] = '127.0.0.1'
                backend_config['alba_connection_port'] = scrub_config['port']
                Configuration.set(backend_config_key, json.dumps({"backend_connection_manager": backend_config}, indent=4), raw=True)
        except Exception:
            message = 'Scrubber - vPool {0} - StorageRouter {1} - An error occurred deploying ALBA proxy {2}'.format(vpool.name, storagerouter.name, alba_proxy_service)
            error_messages.append(message)
            ScheduledTaskController._logger.exception(message)
            if client is not None and ServiceManager.has_service(name=alba_proxy_service, client=client) is True:
                if ServiceManager.get_service_status(name=alba_proxy_service, client=client) is True:
                    ServiceManager.stop_service(name=alba_proxy_service, client=client)
                ServiceManager.remove_service(name=alba_proxy_service, client=client)
            if Configuration.exists(scrub_config_key):
                Configuration.delete(scrub_config_key)

        try:
            # Empty the queue with vDisks to scrub
            with remote(storagerouter.ip, [VDisk]) as rem:
                while True:
                    vdisk = None
                    vdisk_guid = queue.get(False)
                    try:
                        # Check MDS master is local. Trigger MDS handover if necessary
                        vdisk = rem.VDisk(vdisk_guid)
                        ScheduledTaskController._logger.info('Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - Started scrubbing at location {3}'.format(vpool.name, storagerouter.name, vdisk.name, scrub_directory))
                        configs = _verify_mds_config(current_vdisk=vdisk)
                        storagedriver = StorageDriverList.get_by_storagedriver_id(vdisk.storagedriver_id)
                        if configs[0].get('ip') != storagedriver.storagerouter.ip:
                            ScheduledTaskController._logger.info('Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - MDS master is not local, trigger handover'.format(vpool.name, storagerouter.name, vdisk.name))
                            MDSServiceController.ensure_safety(VDisk(vdisk_guid))  # Do not use a remote VDisk instance here
                            configs = _verify_mds_config(current_vdisk=vdisk)
                            if configs[0].get('ip') != storagedriver.storagerouter.ip:
                                ScheduledTaskController._logger.warning('Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - Skipping because master MDS still not local'.format(vpool.name, storagerouter.name, vdisk.name))
                                continue

                        # Do the actual scrubbing
                        with vdisk.storagedriver_client.make_locked_client(str(vdisk.volume_id)) as locked_client:
                            ScheduledTaskController._logger.info('Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - Retrieve and apply scrub work'.format(vpool.name, storagerouter.name, vdisk.name))
                            work_units = locked_client.get_scrubbing_workunits()
                            for work_unit in work_units:
                                res = locked_client.scrub(work_unit=work_unit,
                                                          scratch_dir=scrub_directory,
                                                          log_sinks=[LogHandler.get_sink_path('scrubber', allow_override=True)],
                                                          backend_config=Configuration.get_configuration_path(backend_config_key))
                                locked_client.apply_scrubbing_result(scrubbing_work_result=res)
                            if work_units:
                                ScheduledTaskController._logger.info('Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - {3} work units successfully applied'.format(vpool.name, storagerouter.name, vdisk.name, len(work_units)))
                            else:
                                ScheduledTaskController._logger.info('Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - No scrubbing required'.format(vpool.name, storagerouter.name, vdisk.name))
                    except Exception:
                        if vdisk is None:
                            message = 'Scrubber - vPool {0} - StorageRouter {1} - vDisk with guid {2} could not be found'.format(vpool.name, storagerouter.name, vdisk_guid)
                        else:
                            message = 'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - Scrubbing failed'.format(vpool.name, storagerouter.name, vdisk.name)
                        error_messages.append(message)
                        ScheduledTaskController._logger.exception(message)

        except Empty:  # Raised when all items have been fetched from the queue
            ScheduledTaskController._logger.info('Scrubber - vPool {0} - StorageRouter {1} - Queue completely processed'.format(vpool.name, storagerouter.name))
        except Exception:
            message = 'Scrubber - vPool {0} - StorageRouter {1} - Scrubbing failed'.format(vpool.name, storagerouter.name)
            error_messages.append(message)
            ScheduledTaskController._logger.exception(message)

        # Delete the proxy again
        try:
            with file_mutex(name='ovs_albaproxy_scrub', wait=lock_time):
                ScheduledTaskController._logger.info('Scrubber - vPool {0} - StorageRouter {1} - Removing service {2}'.format(vpool.name, storagerouter.name, alba_proxy_service))
                client = SSHClient(storagerouter, 'root')
                client.dir_delete(scrub_directory)
                if ServiceManager.has_service(alba_proxy_service, client=client):
                    ServiceManager.stop_service(alba_proxy_service, client=client)
                    ServiceManager.remove_service(alba_proxy_service, client=client)
                if Configuration.exists(scrub_config_key):
                    Configuration.delete(scrub_config_key)
                ScheduledTaskController._logger.info('Scrubber - vPool {0} - StorageRouter {1} - Removed service {2}'.format(vpool.name, storagerouter.name, alba_proxy_service))
        except Exception:
            message = 'Scrubber - vPool {0} - StorageRouter {1} - Removing service {2} failed'.format(vpool.name, storagerouter.name, alba_proxy_service)
            error_messages.append(message)
            ScheduledTaskController._logger.exception(message)
 def remove(cluster_name, client):
     """
     Removes an arakoon service
     """
     if ServiceManager.has_service("arakoon-{0}".format(cluster_name), client=client) is True:
         ServiceManager.remove_service("arakoon-{0}".format(cluster_name), client=client)
Exemple #18
0
    def unconfigure_host(self, ip):
        if self._is_devstack is False and self._is_openstack is False or self._cinder_installed is False or self._nova_installed is False:
            self._logger.warning(
                'Unconfigure host: No OpenStack nor DevStack installation detected or Cinder and Nova plugins are not installed'
            )
            return

        # 1. Remove driver code
        self._logger.info('*** Unconfiguring host with IP {0} ***'.format(ip))
        self._logger.info(' Removing driver code')
        if self._is_devstack is True:
            self.client.file_delete(self._devstack_driver)
        else:
            self.client.file_delete(
                '{0}/cinder/volume/drivers/openvstorage.py'.format(
                    self._driver_location))

        # 2. Removing users from group
        self._logger.info('  Removing users from group ovs')
        for user in ['libvirt-qemu', 'stack'
                     ] if self._is_devstack is True else self._openstack_users:
            self.client.run('deluser {0} ovs'.format(user))

        # 3. Revert patches
        self._logger.info('  Reverting patches')
        nova_base_path = self._get_base_path('nova')
        cinder_base_path = self._get_base_path('cinder')
        if self._is_devstack is True:
            nova_volume_file = '{0}/virt/libvirt/volume.py'.format(
                nova_base_path)
            nova_driver_file = '{0}/virt/libvirt/driver.py'.format(
                nova_base_path)
            cinder_brick_initiator_file = '{0}/brick/initiator/connector.py'.format(
                cinder_base_path)
        else:
            nova_volume_file = '{0}/nova/virt/libvirt/volume.py'.format(
                self._driver_location)
            nova_driver_file = '{0}/nova/virt/libvirt/driver.py'.format(
                self._driver_location)
            cinder_brick_initiator_file = '{0}/cinder/brick/initiator/connector.py'.format(
                self._driver_location)

        self._logger.info(
            '    Reverting patched file: {0}'.format(nova_volume_file))
        new_contents = []

        skip_class = False
        for line in self.client.file_read(nova_volume_file).splitlines():
            if line.startswith(
                    'class LibvirtFileVolumeDriver(LibvirtBaseVolumeDriver):'):
                skip_class = True
                continue
            if line.startswith('class'):
                skip_class = False
            if skip_class is False:
                new_contents.append(line)
        self.client.file_write(nova_volume_file, "".join(new_contents))

        self._logger.info(
            '    Reverting patched file: {0}'.format(nova_driver_file))
        new_contents = []

        for line in self.client.file_read(nova_driver_file).splitlines():
            stripped_line = line.strip()
            if stripped_line.startswith(
                    "'file=nova.virt.libvirt.volume.LibvirtFileVolumeDriver'"):
                continue
            new_contents.append(line)
        self.client.file_write(nova_driver_file, "".join(new_contents))

        if os.path.exists(cinder_brick_initiator_file):
            self._logger.info('    Reverting patched file: {0}'.format(
                cinder_brick_initiator_file))
            self.client.run(
                """sed -i 's/elif protocol in ["LOCAL", "FILE"]:/elif protocol == "LOCAL":/g' {0}"""
                .format(cinder_brick_initiator_file))

        # 4. Unconfigure messaging driver
        self._logger.info('  Unconfiguring messaging driver')
        nova_messaging_driver = 'nova.openstack.common.notifier.rpc_notifier' if self._stack_version == 'juno' else 'messaging'
        cinder_messaging_driver = 'cinder.openstack.common.notifier.rpc_notifier' if self._stack_version == 'juno' else 'messaging'

        with remote(ip, [RawConfigParser, open], 'root') as rem:
            for config_file, driver in {
                    self._NOVA_CONF: nova_messaging_driver,
                    self._CINDER_CONF: cinder_messaging_driver
            }.iteritems():
                cfg = rem.RawConfigParser()
                cfg.read([config_file])
                if cfg.has_option("DEFAULT", "notification_driver"):
                    cfg.remove_option("DEFAULT", "notification_driver")
                if cfg.has_option("DEFAULT", "notification_topics"):
                    notification_topics = cfg.get(
                        "DEFAULT", "notification_topics").split(",")
                    if "notifications" in notification_topics:
                        notification_topics.remove("notifications")
                        cfg.set("DEFAULT", "notification_topics",
                                ",".join(notification_topics))

                if config_file == self._NOVA_CONF:
                    for param, value in {
                            'notify_on_any_change': 'True',
                            'notify_on_state_change': 'vm_and_task_state'
                    }.iteritems():
                        if cfg.has_option("DEFAULT", param):
                            cfg.remove_option("DEFAULT", param)

                with rem.open(config_file, "w") as fp:
                    cfg.write(fp)

        # 5. Disable events consumer
        self._logger.info('  Disabling events consumer')
        service_name = 'ovs-openstack-events-consumer'
        if ServiceManager.has_service(service_name, self.client):
            ServiceManager.stop_service(service_name, self.client)
            ServiceManager.disable_service(service_name, self.client)
            ServiceManager.remove_service(service_name, self.client)
Exemple #19
0
    def remove_node(node_ip, silent=None):
        """
        Remove the node with specified IP from the cluster
        :param node_ip: IP of the node to remove
        :type node_ip: str
        :param silent: If silent == '--force-yes' no question will be asked to confirm the removal
        :type silent: str
        :return: None
        """
        from ovs.lib.storagedriver import StorageDriverController
        from ovs.lib.storagerouter import StorageRouterController
        from ovs.dal.lists.storagerouterlist import StorageRouterList

        Toolbox.log(logger=NodeRemovalController._logger, messages="Remove node", boxed=True)
        Toolbox.log(
            logger=NodeRemovalController._logger,
            messages="WARNING: Some of these steps may take a very long time, please check the logs for more information\n\n",
        )

        ###############
        # VALIDATIONS #
        ###############
        try:
            node_ip = node_ip.strip()
            if not isinstance(node_ip, str):
                raise ValueError("Node IP must be a string")
            if not re.match(SSHClient.IP_REGEX, node_ip):
                raise ValueError("Invalid IP {0} specified".format(node_ip))

            storage_router_all = StorageRouterList.get_storagerouters()
            storage_router_masters = StorageRouterList.get_masters()
            storage_router_all_ips = set([storage_router.ip for storage_router in storage_router_all])
            storage_router_master_ips = set([storage_router.ip for storage_router in storage_router_masters])
            storage_router_to_remove = StorageRouterList.get_by_ip(node_ip)

            if node_ip not in storage_router_all_ips:
                raise ValueError(
                    "Unknown IP specified\nKnown in model:\n - {0}\nSpecified for removal:\n - {1}".format(
                        "\n - ".join(storage_router_all_ips), node_ip
                    )
                )

            if len(storage_router_all_ips) == 1:
                raise RuntimeError("Removing the only node is not possible")

            if node_ip in storage_router_master_ips and len(storage_router_master_ips) == 1:
                raise RuntimeError("Removing the only master node is not possible")

            if System.get_my_storagerouter() == storage_router_to_remove:
                raise RuntimeError(
                    "The node to be removed cannot be identical to the node on which the removal is initiated"
                )

            Toolbox.log(
                logger=NodeRemovalController._logger, messages="Creating SSH connections to remaining master nodes"
            )
            master_ip = None
            ip_client_map = {}
            storage_routers_offline = []
            storage_router_to_remove_online = True
            for storage_router in storage_router_all:
                try:
                    client = SSHClient(storage_router, username="******")
                    if client.run(["pwd"]):
                        Toolbox.log(
                            logger=NodeRemovalController._logger,
                            messages="  Node with IP {0:<15} successfully connected to".format(storage_router.ip),
                        )
                        ip_client_map[storage_router.ip] = client
                        if storage_router != storage_router_to_remove and storage_router.node_type == "MASTER":
                            master_ip = storage_router.ip
                except UnableToConnectException:
                    Toolbox.log(
                        logger=NodeRemovalController._logger,
                        messages="  Node with IP {0:<15} is unreachable".format(storage_router.ip),
                    )
                    storage_routers_offline.append(storage_router)
                    if storage_router == storage_router_to_remove:
                        storage_router_to_remove_online = False

            if len(ip_client_map) == 0 or master_ip is None:
                raise RuntimeError("Could not connect to any master node in the cluster")

            storage_router_to_remove.invalidate_dynamics("vdisks_guids")
            if (
                len(storage_router_to_remove.vdisks_guids) > 0
            ):  # vDisks are supposed to be moved away manually before removing a node
                raise RuntimeError("Still vDisks attached to Storage Router {0}".format(storage_router_to_remove.name))

            internal_memcached = Toolbox.is_service_internally_managed(service="memcached")
            internal_rabbit_mq = Toolbox.is_service_internally_managed(service="rabbitmq")
            memcached_endpoints = Configuration.get(key="/ovs/framework/memcache|endpoints")
            rabbit_mq_endpoints = Configuration.get(key="/ovs/framework/messagequeue|endpoints")
            copy_memcached_endpoints = list(memcached_endpoints)
            copy_rabbit_mq_endpoints = list(rabbit_mq_endpoints)
            for endpoint in memcached_endpoints:
                if endpoint.startswith(storage_router_to_remove.ip):
                    copy_memcached_endpoints.remove(endpoint)
            for endpoint in rabbit_mq_endpoints:
                if endpoint.startswith(storage_router_to_remove.ip):
                    copy_rabbit_mq_endpoints.remove(endpoint)
            if len(copy_memcached_endpoints) == 0 and internal_memcached is True:
                raise RuntimeError(
                    "Removal of provided nodes will result in a complete removal of the memcached service"
                )
            if len(copy_rabbit_mq_endpoints) == 0 and internal_rabbit_mq is True:
                raise RuntimeError(
                    "Removal of provided nodes will result in a complete removal of the messagequeue service"
                )
        except Exception as exception:
            Toolbox.log(
                logger=NodeRemovalController._logger, messages=[str(exception)], boxed=True, loglevel="exception"
            )
            sys.exit(1)

        #################
        # CONFIRMATIONS #
        #################
        interactive = silent != "--force-yes"
        remove_asd_manager = not interactive  # Remove ASD manager if non-interactive else ask
        if interactive is True:
            proceed = Interactive.ask_yesno(
                message="Are you sure you want to remove node {0}?".format(storage_router_to_remove.name),
                default_value=False,
            )
            if proceed is False:
                Toolbox.log(logger=NodeRemovalController._logger, messages="Abort removal", title=True)
                sys.exit(1)

            if storage_router_to_remove_online is True:
                client = SSHClient(endpoint=storage_router_to_remove, username="******")
                if ServiceManager.has_service(name="asd-manager", client=client):
                    remove_asd_manager = Interactive.ask_yesno(
                        message="Do you also want to remove the ASD manager and related ASDs?", default_value=False
                    )

            if remove_asd_manager is True or storage_router_to_remove_online is False:
                for function in Toolbox.fetch_hooks("setup", "validate_asd_removal"):
                    validation_output = function(storage_router_to_remove.ip)
                    if validation_output["confirm"] is True:
                        if Interactive.ask_yesno(message=validation_output["question"], default_value=False) is False:
                            remove_asd_manager = False
                            break

        ###########
        # REMOVAL #
        ###########
        try:
            Toolbox.log(
                logger=NodeRemovalController._logger,
                messages="Starting removal of node {0} - {1}".format(
                    storage_router_to_remove.name, storage_router_to_remove.ip
                ),
            )
            if storage_router_to_remove_online is False:
                Toolbox.log(
                    logger=NodeRemovalController._logger,
                    messages="  Marking all Storage Drivers served by Storage Router {0} as offline".format(
                        storage_router_to_remove.ip
                    ),
                )
                StorageDriverController.mark_offline(storagerouter_guid=storage_router_to_remove.guid)

            # Remove vPools
            Toolbox.log(
                logger=NodeRemovalController._logger,
                messages="  Removing vPools from node".format(storage_router_to_remove.ip),
            )
            storage_routers_offline_guids = [
                sr.guid for sr in storage_routers_offline if sr.guid != storage_router_to_remove.guid
            ]
            for storage_driver in storage_router_to_remove.storagedrivers:
                Toolbox.log(
                    logger=NodeRemovalController._logger,
                    messages="    Removing vPool {0} from node".format(storage_driver.vpool.name),
                )
                StorageRouterController.remove_storagedriver(
                    storagedriver_guid=storage_driver.guid, offline_storage_router_guids=storage_routers_offline_guids
                )

            # Demote if MASTER
            if storage_router_to_remove.node_type == "MASTER":
                NodeTypeController.demote_node(
                    cluster_ip=storage_router_to_remove.ip,
                    master_ip=master_ip,
                    ip_client_map=ip_client_map,
                    unique_id=storage_router_to_remove.machine_id,
                    unconfigure_memcached=internal_memcached,
                    unconfigure_rabbitmq=internal_rabbit_mq,
                    offline_nodes=storage_routers_offline,
                )

            # Stop / remove services
            Toolbox.log(logger=NodeRemovalController._logger, messages="Stopping and removing services")
            config_store = Configuration.get_store()
            if storage_router_to_remove_online is True:
                client = SSHClient(endpoint=storage_router_to_remove, username="******")
                NodeRemovalController.remove_services(
                    client=client,
                    node_type=storage_router_to_remove.node_type.lower(),
                    logger=NodeRemovalController._logger,
                )
                service = "watcher-config"
                if ServiceManager.has_service(service, client=client):
                    Toolbox.log(logger=NodeRemovalController._logger, messages="Removing service {0}".format(service))
                    ServiceManager.stop_service(service, client=client)
                    ServiceManager.remove_service(service, client=client)

                if config_store == "etcd":
                    from ovs.extensions.db.etcd.installer import EtcdInstaller

                    if Configuration.get(key="/ovs/framework/external_config") is None:
                        Toolbox.log(logger=NodeRemovalController._logger, messages="      Removing Etcd cluster")
                        try:
                            EtcdInstaller.stop("config", client)
                            EtcdInstaller.remove("config", client)
                        except Exception as ex:
                            Toolbox.log(
                                logger=NodeRemovalController._logger,
                                messages=["\nFailed to unconfigure Etcd", ex],
                                loglevel="exception",
                            )

                    Toolbox.log(logger=NodeRemovalController._logger, messages="Removing Etcd proxy")
                    EtcdInstaller.remove_proxy("config", client.ip)

            Toolbox.run_hooks(
                component="noderemoval",
                sub_component="remove",
                logger=NodeRemovalController._logger,
                cluster_ip=storage_router_to_remove.ip,
                complete_removal=remove_asd_manager,
            )

            # Clean up model
            Toolbox.log(logger=NodeRemovalController._logger, messages="Removing node from model")
            for service in storage_router_to_remove.services:
                service.delete()
            for disk in storage_router_to_remove.disks:
                for partition in disk.partitions:
                    partition.delete()
                disk.delete()
            for j_domain in storage_router_to_remove.domains:
                j_domain.delete()
            Configuration.delete("/ovs/framework/hosts/{0}".format(storage_router_to_remove.machine_id))

            NodeTypeController.restart_framework_and_memcache_services(
                clients=ip_client_map,
                offline_node_ips=[node.ip for node in storage_routers_offline],
                logger=NodeRemovalController._logger,
            )

            if storage_router_to_remove_online is True:
                client = SSHClient(endpoint=storage_router_to_remove, username="******")
                if config_store == "arakoon":
                    client.file_delete(filenames=[ArakoonConfiguration.CACC_LOCATION])
                client.file_delete(filenames=[Configuration.BOOTSTRAP_CONFIG_LOCATION])
            storage_router_to_remove.delete()
            Toolbox.log(logger=NodeRemovalController._logger, messages="Successfully removed node\n")
        except Exception as exception:
            Toolbox.log(logger=NodeRemovalController._logger, messages="\n")
            Toolbox.log(
                logger=NodeRemovalController._logger,
                messages=["An unexpected error occurred:", str(exception)],
                boxed=True,
                loglevel="exception",
            )
            sys.exit(1)
        except KeyboardInterrupt:
            Toolbox.log(logger=NodeRemovalController._logger, messages="\n")
            Toolbox.log(
                logger=NodeRemovalController._logger,
                messages="This setup was aborted. Open vStorage may be in an inconsistent state, make sure to validate the installation.",
                boxed=True,
                loglevel="error",
            )
            sys.exit(1)

        if remove_asd_manager is True:
            Toolbox.log(logger=NodeRemovalController._logger, messages="\nRemoving ASD Manager")
            with remote(storage_router_to_remove.ip, [os]) as rem:
                rem.os.system("asd-manager remove --force-yes")
        Toolbox.log(logger=NodeRemovalController._logger, messages="Remove nodes finished", title=True)
Exemple #20
0
    def execute_scrub_work(queue, vpool, scrub_info, error_messages):
        """
        Executes scrub work for a given vDisk queue and vPool, based on scrub_info
        :param queue: a Queue with vDisk guids that need to be scrubbed (they should only be member of a single vPool)
        :type queue: Queue
        :param vpool: the vPool object of the vDisks
        :type vpool: VPool
        :param scrub_info: A dict containing scrub information: `scrub_path` with the path where to scrub and `storage_router` with the StorageRouter
                           that needs to do the work
        :type scrub_info: dict
        :param error_messages: A list of error messages to be filled
        :type error_messages: list
        :return: a list of error messages
        :rtype: list
        """
        def _verify_mds_config(current_vdisk):
            current_vdisk.invalidate_dynamics('info')
            vdisk_configs = current_vdisk.info['metadata_backend_config']
            if len(vdisk_configs) == 0:
                raise RuntimeError('Could not load MDS configuration')
            return vdisk_configs

        client = None
        lock_time = 5 * 60
        storagerouter = scrub_info['storage_router']
        scrub_directory = '{0}/scrub_work_{1}_{2}'.format(
            scrub_info['scrub_path'], vpool.name, storagerouter.name)
        scrub_config_key = 'ovs/vpools/{0}/proxies/scrub/scrub_config_{1}'.format(
            vpool.guid, storagerouter.guid)
        backend_config_key = 'ovs/vpools/{0}/proxies/scrub/backend_config_{1}'.format(
            vpool.guid, storagerouter.guid)
        alba_proxy_service = 'ovs-albaproxy_{0}_{1}_scrub'.format(
            vpool.name, storagerouter.name)

        # Deploy a proxy
        try:
            with file_mutex(name='ovs_albaproxy_scrub', wait=lock_time):
                ScheduledTaskController._logger.info(
                    'Scrubber - vPool {0} - StorageRouter {1} - Deploying ALBA proxy {2}'
                    .format(vpool.name, storagerouter.name,
                            alba_proxy_service))
                client = SSHClient(storagerouter, 'root')
                client.dir_create(scrub_directory)
                client.dir_chmod(
                    scrub_directory, 0777
                )  # Celery task executed by 'ovs' user and should be able to write in it
                if ServiceManager.has_service(
                        name=alba_proxy_service, client=client
                ) is True and ServiceManager.get_service_status(
                        name=alba_proxy_service, client=client) is True:
                    ScheduledTaskController._logger.info(
                        'Scrubber - vPool {0} - StorageRouter {1} - Re-using existing proxy service {2}'
                        .format(vpool.name, storagerouter.name,
                                alba_proxy_service))
                    scrub_config = Configuration.get(scrub_config_key)
                else:
                    machine_id = System.get_my_machine_id(client)
                    port_range = Configuration.get(
                        '/ovs/framework/hosts/{0}/ports|storagedriver'.format(
                            machine_id))
                    port = System.get_free_ports(selected_range=port_range,
                                                 nr=1,
                                                 client=client)[0]
                    # Scrub config
                    # {u'albamgr_cfg_url': u'arakoon://config/ovs/vpools/71e2f717-f270-4a41-bbb0-d4c8c084d43e/proxies/64759516-3471-4321-b912-fb424568fc5b/config/abm?ini=%2Fopt%2FOpenvStorage%2Fconfig%2Farakoon_cacc.ini',
                    #  u'fragment_cache': [u'none'],
                    #  u'ips': [u'127.0.0.1'],
                    #  u'log_level': u'info',
                    #  u'manifest_cache_size': 17179869184,
                    #  u'port': 0,
                    #  u'transport': u'tcp'}

                    # Backend config
                    # {u'alba_connection_host': u'10.100.193.155',
                    #  u'alba_connection_port': 26204,
                    #  u'alba_connection_preset': u'preset',
                    #  u'alba_connection_timeout': 15,
                    #  u'alba_connection_transport': u'TCP',
                    #  u'backend_interface_retries_on_error': 5,
                    #  u'backend_interface_retry_backoff_multiplier': 2.0,
                    #  u'backend_interface_retry_interval_secs': 1,
                    #  u'backend_type': u'ALBA'}
                    scrub_config = Configuration.get(
                        'ovs/vpools/{0}/proxies/scrub/generic_scrub'.format(
                            vpool.guid))
                    scrub_config['port'] = port
                    scrub_config['transport'] = 'tcp'
                    Configuration.set(scrub_config_key,
                                      json.dumps(scrub_config, indent=4),
                                      raw=True)

                    params = {
                        'VPOOL_NAME':
                        vpool.name,
                        'LOG_SINK':
                        LogHandler.get_sink_path('alba_proxy'),
                        'CONFIG_PATH':
                        Configuration.get_configuration_path(scrub_config_key)
                    }
                    ServiceManager.add_service(name='ovs-albaproxy',
                                               params=params,
                                               client=client,
                                               target_name=alba_proxy_service)
                    ServiceManager.start_service(name=alba_proxy_service,
                                                 client=client)
                    ScheduledTaskController._logger.info(
                        'Scrubber - vPool {0} - StorageRouter {1} - Deployed ALBA proxy {2}'
                        .format(vpool.name, storagerouter.name,
                                alba_proxy_service))

                backend_config = Configuration.get(
                    'ovs/vpools/{0}/hosts/{1}/config'.format(
                        vpool.guid, vpool.storagedrivers[0].storagedriver_id
                    ))['backend_connection_manager']
                backend_config['alba_connection_host'] = '127.0.0.1'
                backend_config['alba_connection_port'] = scrub_config['port']
                Configuration.set(
                    backend_config_key,
                    json.dumps({"backend_connection_manager": backend_config},
                               indent=4),
                    raw=True)
        except Exception:
            message = 'Scrubber - vPool {0} - StorageRouter {1} - An error occurred deploying ALBA proxy {2}'.format(
                vpool.name, storagerouter.name, alba_proxy_service)
            error_messages.append(message)
            ScheduledTaskController._logger.exception(message)
            if client is not None and ServiceManager.has_service(
                    name=alba_proxy_service, client=client) is True:
                if ServiceManager.get_service_status(name=alba_proxy_service,
                                                     client=client) is True:
                    ServiceManager.stop_service(name=alba_proxy_service,
                                                client=client)
                ServiceManager.remove_service(name=alba_proxy_service,
                                              client=client)
            if Configuration.exists(scrub_config_key):
                Configuration.delete(scrub_config_key)

        try:
            # Empty the queue with vDisks to scrub
            with remote(storagerouter.ip, [VDisk]) as rem:
                while True:
                    vdisk = None
                    vdisk_guid = queue.get(False)
                    try:
                        # Check MDS master is local. Trigger MDS handover if necessary
                        vdisk = rem.VDisk(vdisk_guid)
                        ScheduledTaskController._logger.info(
                            'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - Started scrubbing at location {3}'
                            .format(vpool.name, storagerouter.name, vdisk.name,
                                    scrub_directory))
                        configs = _verify_mds_config(current_vdisk=vdisk)
                        storagedriver = StorageDriverList.get_by_storagedriver_id(
                            vdisk.storagedriver_id)
                        if configs[0].get(
                                'ip') != storagedriver.storagerouter.ip:
                            ScheduledTaskController._logger.info(
                                'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - MDS master is not local, trigger handover'
                                .format(vpool.name, storagerouter.name,
                                        vdisk.name))
                            MDSServiceController.ensure_safety(
                                VDisk(vdisk_guid)
                            )  # Do not use a remote VDisk instance here
                            configs = _verify_mds_config(current_vdisk=vdisk)
                            if configs[0].get(
                                    'ip') != storagedriver.storagerouter.ip:
                                ScheduledTaskController._logger.warning(
                                    'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - Skipping because master MDS still not local'
                                    .format(vpool.name, storagerouter.name,
                                            vdisk.name))
                                continue

                        # Do the actual scrubbing
                        with vdisk.storagedriver_client.make_locked_client(
                                str(vdisk.volume_id)) as locked_client:
                            ScheduledTaskController._logger.info(
                                'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - Retrieve and apply scrub work'
                                .format(vpool.name, storagerouter.name,
                                        vdisk.name))
                            work_units = locked_client.get_scrubbing_workunits(
                            )
                            for work_unit in work_units:
                                res = locked_client.scrub(
                                    work_unit=work_unit,
                                    scratch_dir=scrub_directory,
                                    log_sinks=[
                                        LogHandler.get_sink_path(
                                            'scrubber', allow_override=True)
                                    ],
                                    backend_config=Configuration.
                                    get_configuration_path(backend_config_key))
                                locked_client.apply_scrubbing_result(
                                    scrubbing_work_result=res)
                            if work_units:
                                ScheduledTaskController._logger.info(
                                    'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - {3} work units successfully applied'
                                    .format(vpool.name, storagerouter.name,
                                            vdisk.name, len(work_units)))
                            else:
                                ScheduledTaskController._logger.info(
                                    'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - No scrubbing required'
                                    .format(vpool.name, storagerouter.name,
                                            vdisk.name))
                    except Exception:
                        if vdisk is None:
                            message = 'Scrubber - vPool {0} - StorageRouter {1} - vDisk with guid {2} could not be found'.format(
                                vpool.name, storagerouter.name, vdisk_guid)
                        else:
                            message = 'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - Scrubbing failed'.format(
                                vpool.name, storagerouter.name, vdisk.name)
                        error_messages.append(message)
                        ScheduledTaskController._logger.exception(message)

        except Empty:  # Raised when all items have been fetched from the queue
            ScheduledTaskController._logger.info(
                'Scrubber - vPool {0} - StorageRouter {1} - Queue completely processed'
                .format(vpool.name, storagerouter.name))
        except Exception:
            message = 'Scrubber - vPool {0} - StorageRouter {1} - Scrubbing failed'.format(
                vpool.name, storagerouter.name)
            error_messages.append(message)
            ScheduledTaskController._logger.exception(message)

        # Delete the proxy again
        try:
            with file_mutex(name='ovs_albaproxy_scrub', wait=lock_time):
                ScheduledTaskController._logger.info(
                    'Scrubber - vPool {0} - StorageRouter {1} - Removing service {2}'
                    .format(vpool.name, storagerouter.name,
                            alba_proxy_service))
                client = SSHClient(storagerouter, 'root')
                client.dir_delete(scrub_directory)
                if ServiceManager.has_service(alba_proxy_service,
                                              client=client):
                    ServiceManager.stop_service(alba_proxy_service,
                                                client=client)
                    ServiceManager.remove_service(alba_proxy_service,
                                                  client=client)
                if Configuration.exists(scrub_config_key):
                    Configuration.delete(scrub_config_key)
                ScheduledTaskController._logger.info(
                    'Scrubber - vPool {0} - StorageRouter {1} - Removed service {2}'
                    .format(vpool.name, storagerouter.name,
                            alba_proxy_service))
        except Exception:
            message = 'Scrubber - vPool {0} - StorageRouter {1} - Removing service {2} failed'.format(
                vpool.name, storagerouter.name, alba_proxy_service)
            error_messages.append(message)
            ScheduledTaskController._logger.exception(message)