Exemple #1
0
    def add_services(client, node_type, logger):
        """
        Add the services required by the OVS cluster
        :param client: Client on which to add the services
        :type client: ovs.extensions.generic.sshclient.SSHClient
        :param node_type: Type of node ('master' or 'extra')
        :type node_type: str
        :param logger: Logger object used for logging
        :type logger: ovs.log.log_handler.LogHandler
        :return: None
        """
        Toolbox.log(logger=logger, messages='Adding services')
        services = {}
        worker_queue = System.get_my_machine_id(client=client)
        if node_type == 'master':
            worker_queue += ',ovs_masters'
            services.update({'memcached': {'MEMCACHE_NODE_IP': client.ip, 'WORKER_QUEUE': worker_queue},
                             'rabbitmq-server': {'MEMCACHE_NODE_IP': client.ip, 'WORKER_QUEUE': worker_queue},
                             'scheduled-tasks': {},
                             'webapp-api': {},
                             'volumerouter-consumer': {}})
        services.update({'workers': {'WORKER_QUEUE': worker_queue},
                         'watcher-framework': {}})

        for service_name, params in services.iteritems():
            if not ServiceManager.has_service(service_name, client):
                Toolbox.log(logger=logger, messages='Adding service {0}'.format(service_name))
                ServiceManager.add_service(name=service_name, params=params, client=client)
Exemple #2
0
    def restart_framework_and_memcache_services(clients, logger, offline_node_ips=None):
        """
        Restart framework and Memcached services
        :param clients: Clients on which to restart these services
        :type clients: dict
        :param logger: Logger object used for logging
        :type logger: ovs.log.log_handler.LogHandler
        :param offline_node_ips: IP addresses of offline nodes in the cluster
        :type offline_node_ips: list
        :return: None
        """
        from ovs.dal.lists.storagerouterlist import StorageRouterList

        master_ips = [sr.ip for sr in StorageRouterList.get_masters()]
        slave_ips = [sr.ip for sr in StorageRouterList.get_slaves()]
        if offline_node_ips is None:
            offline_node_ips = []
        memcached = 'memcached'
        watcher = 'watcher-framework'
        support_agent = 'support-agent'
        for ip in master_ips + slave_ips:
            if ip not in offline_node_ips:
                if ServiceManager.has_service(watcher, clients[ip]):
                    Toolbox.change_service_state(clients[ip], watcher, 'stop', logger)
        for ip in master_ips:
            if ip not in offline_node_ips:
                Toolbox.change_service_state(clients[ip], memcached, 'restart', logger)
        for ip in master_ips + slave_ips:
            if ip not in offline_node_ips:
                if ServiceManager.has_service(watcher, clients[ip]):
                    Toolbox.change_service_state(clients[ip], watcher, 'start', logger)
                if ServiceManager.has_service(support_agent, clients[ip]):
                    Toolbox.change_service_state(clients[ip], support_agent, 'restart', logger)
        VolatileFactory.store = None
    def remove_services(client, node_type, logger):
        """
        Remove all services managed by OVS
        :param client: Client on which to remove the services
        :type client: ovs.extensions.generic.sshclient.SSHClient
        :param node_type: Type of node, can be 'master' or 'extra'
        :type node_type: str
        :param logger: Logger object used for logging
        :type logger: ovs.log.log_handler.LogHandler
        :return: None
        """
        Toolbox.log(logger=logger, messages="Removing services")
        stop_only = ["rabbitmq-server", "memcached"]
        services = ["workers", "support-agent", "watcher-framework"]
        if node_type == "master":
            services += ["scheduled-tasks", "webapp-api", "volumerouter-consumer"]
            if Toolbox.is_service_internally_managed(service="rabbitmq") is True:
                services.append("rabbitmq-server")
            if Toolbox.is_service_internally_managed(service="memcached") is True:
                services.append("memcached")

        for service in services:
            if ServiceManager.has_service(service, client=client):
                Toolbox.log(
                    logger=logger,
                    messages="{0} service {1}".format("Removing" if service not in stop_only else "Stopping", service),
                )
                ServiceManager.stop_service(service, client=client)
                if service not in stop_only:
                    ServiceManager.remove_service(service, client=client)
 def on_demote(cluster_ip, master_ip, offline_node_ips=None):
     """
     Handles the demote for the StorageDrivers
     :param cluster_ip: IP of the node to demote
     :param master_ip: IP of the master node
     :param offline_node_ips: IPs of nodes which are offline
     """
     if offline_node_ips is None:
         offline_node_ips = []
     client = SSHClient(cluster_ip, username='******') if cluster_ip not in offline_node_ips else None
     servicetype = ServiceTypeList.get_by_name('Arakoon')
     current_service = None
     remaining_ips = []
     for service in servicetype.services:
         if service.name == 'arakoon-voldrv':
             if service.storagerouter.ip == cluster_ip:
                 current_service = service
             elif service.storagerouter.ip not in offline_node_ips:
                 remaining_ips.append(service.storagerouter.ip)
     if current_service is not None:
         print '* Shrink StorageDriver cluster'
         ArakoonInstaller.shrink_cluster(master_ip, cluster_ip, 'voldrv', offline_node_ips)
         if client is not None and ServiceManager.has_service(current_service.name, client=client) is True:
             ServiceManager.stop_service(current_service.name, client=client)
             ServiceManager.remove_service(current_service.name, client=client)
         ArakoonInstaller.restart_cluster_remove('voldrv', remaining_ips)
         current_service.delete()
         StorageDriverController._configure_arakoon_to_volumedriver(offline_node_ips)
    def _deploy(config, offline_nodes=None):
        """
        Deploys a complete cluster: Distributing the configuration files, creating directories and services
        """
        ArakoonInstaller._logger.debug('Deploying cluster {0}'.format(config.cluster_id))
        if offline_nodes is None:
            offline_nodes = []
        for node in config.nodes:
            if node.ip in offline_nodes:
                continue
            ArakoonInstaller._logger.debug('  Deploying cluster {0} on {1}'.format(config.cluster_id, node.ip))
            root_client = SSHClient(node.ip, username='******')

            # Distributes a configuration file to all its nodes
            config.write_config()

            # Create dirs as root because mountpoint /mnt/cache1 is typically owned by root
            abs_paths = [node.log_dir, node.tlog_dir, node.home]
            if not root_client.dir_exists(abs_paths):
                root_client.dir_create(abs_paths)
                root_client.dir_chmod(abs_paths, 0755, recursive=True)
                root_client.dir_chown(abs_paths, 'ovs', 'ovs', recursive=True)

            # Creates services for/on all nodes in the config
            base_name = 'ovs-arakoon'
            target_name = 'ovs-arakoon-{0}'.format(config.cluster_id)
            ServiceManager.add_service(base_name, root_client,
                                       params={'CLUSTER': config.cluster_id,
                                               'NODE_ID': node.name,
                                               'CONFIG_PATH': ArakoonInstaller.ETCD_CONFIG_PATH.format(config.cluster_id)},
                                       target_name=target_name)
            ArakoonInstaller._logger.debug('  Deploying cluster {0} on {1} completed'.format(config.cluster_id, node.ip))
 def stop(cluster_name, client):
     """
     Stops an arakoon service
     """
     if ServiceManager.has_service('arakoon-{0}'.format(cluster_name), client=client) is True and \
             ServiceManager.get_service_status('arakoon-{0}'.format(cluster_name), client=client) is True:
         ServiceManager.stop_service('arakoon-{0}'.format(cluster_name), client=client)
 def on_demote(cluster_ip, master_ip):
     """
     Handles the demote for the StorageDrivers
     :param cluster_ip: IP of the node to demote
     :param master_ip: IP of the master node
     """
     client = SSHClient(cluster_ip, username='******')
     servicetype = ServiceTypeList.get_by_name('Arakoon')
     current_service = None
     remaining_ips = []
     for service in servicetype.services:
         if service.name == 'arakoon-voldrv':
             if service.storagerouter.ip == cluster_ip:
                 current_service = service
             else:
                 remaining_ips.append(service.storagerouter.ip)
     if current_service is not None:
         print '* Shrink StorageDriver cluster'
         ArakoonInstaller.shrink_cluster(master_ip, cluster_ip, 'voldrv')
         if ServiceManager.has_service(current_service.name, client=client) is True:
             ServiceManager.stop_service(current_service.name, client=client)
             ServiceManager.remove_service(current_service.name, client=client)
         ArakoonInstaller.restart_cluster_remove('voldrv', remaining_ips)
         current_service.delete()
         for storagerouter in StorageRouterList.get_storagerouters():
             ArakoonInstaller.deploy_to_slave(master_ip, storagerouter.ip, 'voldrv')
         StorageDriverController._configure_arakoon_to_volumedriver()
    def _deploy(config):
        """
        Deploys a complete cluster: Distributing the configuration files, creating directories and services
        """
        logger.debug("Deploying cluster {0}".format(config.cluster_id))
        for node in config.nodes:
            logger.debug("  Deploying cluster {0} on {1}".format(config.cluster_id, node.ip))
            ovs_client = SSHClient(node.ip)
            root_client = SSHClient(node.ip, username="******")

            # Distributes a configuration file to all its nodes
            config.write_config(ovs_client)

            # Create dirs as root because mountpoint /mnt/cache1 is typically owned by root
            abs_paths = [node.log_dir, node.tlog_dir, node.home]
            root_client.dir_create(abs_paths)
            root_client.dir_chmod(abs_paths, 0755, recursive=True)
            root_client.dir_chown(abs_paths, "ovs", "ovs", recursive=True)

            # Creates services for/on all nodes in the config
            base_name = "ovs-arakoon"
            target_name = "ovs-arakoon-{0}".format(config.cluster_id)
            ServiceManager.prepare_template(base_name, target_name, ovs_client)
            ServiceManager.add_service(target_name, root_client, params={"CLUSTER": config.cluster_id})
            logger.debug("  Deploying cluster {0} on {1} completed".format(config.cluster_id, node.ip))
Exemple #9
0
    def _setup_proxy(initial_cluster, slave_client, cluster_name, force=False):
        base_name = 'ovs-etcd-proxy'
        target_name = 'ovs-etcd-{0}'.format(cluster_name)
        if force is False and ServiceManager.has_service(target_name, slave_client) and \
            ServiceManager.get_service_status(target_name, slave_client) is True:
            logger.info('Service {0} already configured and running'.format(target_name))
            return
        EtcdInstaller.stop(cluster_name, slave_client)

        data_dir = EtcdInstaller.DATA_DIR.format(EtcdInstaller.DB_DIR, cluster_name)
        wal_dir = EtcdInstaller.WAL_DIR.format(EtcdInstaller.DB_DIR, cluster_name)
        abs_paths = [data_dir, wal_dir]
        slave_client.dir_delete(abs_paths)
        slave_client.dir_create(data_dir)
        slave_client.dir_chmod(data_dir, 0755, recursive=True)
        slave_client.dir_chown(data_dir, 'ovs', 'ovs', recursive=True)

        ServiceManager.add_service(base_name, slave_client,
                                   params={'CLUSTER': cluster_name,
                                           'DATA_DIR': data_dir,
                                           'LOCAL_CLIENT_URL': EtcdInstaller.CLIENT_URL.format('127.0.0.1'),
                                           'INITIAL_CLUSTER': initial_cluster},
                                   target_name=target_name)
        EtcdInstaller.start(cluster_name, slave_client)
        EtcdInstaller.wait_for_cluster(cluster_name, slave_client)
    def delete_cluster(cluster_name, ip, filesystem=False):
        """
        Deletes a complete cluster
        :param cluster_name: Name of the cluster to remove
        :type cluster_name: str
        :param ip: IP address of the last node of a cluster
        :type ip: str
        :param filesystem: Indicates whether the configuration should be on the filesystem or in a configuration cluster
        :type filesystem: bool
        :return: None
        """
        ArakoonInstaller._logger.debug('Deleting cluster {0} on {1}'.format(cluster_name, ip))
        config = ArakoonClusterConfig(cluster_name, filesystem)
        config.load_config(ip)
        cluster_type = ArakoonInstaller.get_arakoon_metadata_by_cluster_name(cluster_name=config.cluster_id, filesystem=filesystem, ip=ip)['cluster_type']

        service_name = ArakoonInstaller.get_service_name_for_cluster(cluster_name=config.cluster_id)
        for node in config.nodes:
            try:
                ServiceManager.unregister_service(service_name=service_name, node_name=node.name)
            except:
                ArakoonInstaller._logger.exception('Un-registering service {0} on {1} failed'.format(service_name, ip))

        # Cleans up a complete cluster (remove services, directories and configuration files)
        for node in config.nodes:
            ArakoonInstaller._destroy_node(config, node, delay_unregistration=cluster_type == ServiceType.ARAKOON_CLUSTER_TYPES.CFG)
            config.delete_config(ip)

        ArakoonInstaller._logger.debug('Deleting cluster {0} on {1} completed'.format(cluster_name, ip))
 def start(cluster_name, client):
     """
     Starts an arakoon cluster
     :param client: Client on which to start the service
     :param cluster_name: The name of the cluster service to start
     """
     if ServiceManager.has_service('arakoon-{0}'.format(cluster_name), client=client) is True:
         ServiceManager.start_service('arakoon-{0}'.format(cluster_name), client=client)
 def start_service(name, client):
     """
     Start a service
     :param name: Name of the service
     :param client: SSHClient object
     :return: None
     """
     ServiceManager.start_service(name, client)
Exemple #13
0
 def stop(cluster_name, client):
     """
     Stops an etcd service
     :param client: Client on which to stop the service
     :param cluster_name: The name of the cluster service to stop
     """
     if ServiceManager.has_service('etcd-{0}'.format(cluster_name), client=client) is True:
         ServiceManager.stop_service('etcd-{0}'.format(cluster_name), client=client)
Exemple #14
0
def restart_required_services():
    """
    Checks if the ASD MANAGER setup was executed correctly

    :returns if all services successfully restarted
    :rtype bool
    """
    ServiceManager.restart_service('avahi-daemon', root_client)
Exemple #15
0
 def remove(cluster_name, client):
     """
     Removes an etcd service
     :param client: Client on which to remove the service
     :param cluster_name: The name of the cluster service to remove
     """
     if ServiceManager.has_service('etcd-{0}'.format(cluster_name), client=client) is True:
         ServiceManager.remove_service('etcd-{0}'.format(cluster_name), client=client)
Exemple #16
0
    def extend_cluster(master_ip, new_ip, cluster_name):
        """
        Extends a cluster to a given new node
        :param cluster_name: Name of the cluster to be extended
        :param new_ip: IP address of the node to be added
        :param master_ip: IP of one of the already existing nodes
        """
        logger.debug('Extending cluster "{0}" from {1} to {2}'.format(cluster_name, master_ip, new_ip))

        client = SSHClient(master_ip, username='******')
        if not EtcdInstaller._is_healty(cluster_name, client):
            raise RuntimeError('Cluster "{0}" unhealthy, aborting extend'.format(cluster_name))

        cluster_members = client.run('etcdctl member list').splitlines()
        for cluster_member in cluster_members:
            if EtcdInstaller.SERVER_URL.format(new_ip) in cluster_member:
                logger.info('Node {0} already member of etcd cluster'.format(new_ip))
                return

        current_cluster = []
        for item in client.run('etcdctl member list').splitlines():
            info = re.search(EtcdInstaller.MEMBER_REGEX, item).groupdict()
            current_cluster.append('{0}={1}'.format(info['name'], info['peer']))

        client = SSHClient(new_ip, username='******')
        node_name = System.get_my_machine_id(client)
        current_cluster.append('{0}={1}'.format(node_name, EtcdInstaller.SERVER_URL.format(new_ip)))

        data_dir = EtcdInstaller.DATA_DIR.format(EtcdInstaller.DB_DIR, cluster_name)
        wal_dir = EtcdInstaller.WAL_DIR.format(EtcdInstaller.DB_DIR, cluster_name)
        abs_paths = [data_dir, wal_dir]
        client.dir_delete(abs_paths)
        client.dir_create(abs_paths)
        client.dir_chmod(abs_paths, 0755, recursive=True)
        client.dir_chown(abs_paths, 'ovs', 'ovs', recursive=True)

        base_name = 'ovs-etcd'
        target_name = 'ovs-etcd-{0}'.format(cluster_name)
        EtcdInstaller.stop(cluster_name, client)  # Stop a possible proxy service
        ServiceManager.add_service(base_name, client,
                                   params={'CLUSTER': cluster_name,
                                           'NODE_ID': node_name,
                                           'DATA_DIR': data_dir,
                                           'WAL_DIR': wal_dir,
                                           'SERVER_URL': EtcdInstaller.SERVER_URL.format(new_ip),
                                           'CLIENT_URL': EtcdInstaller.CLIENT_URL.format(new_ip),
                                           'LOCAL_CLIENT_URL': EtcdInstaller.CLIENT_URL.format('127.0.0.1'),
                                           'INITIAL_CLUSTER': ','.join(current_cluster),
                                           'INITIAL_STATE': 'existing',
                                           'INITIAL_PEERS': ''},
                                   target_name=target_name)

        master_client = SSHClient(master_ip, username='******')
        master_client.run('etcdctl member add {0} {1}'.format(node_name, EtcdInstaller.SERVER_URL.format(new_ip)))
        EtcdInstaller.start(cluster_name, client)
        EtcdInstaller.wait_for_cluster(cluster_name, client)

        logger.debug('Extending cluster "{0}" from {1} to {2} completed'.format(cluster_name, master_ip, new_ip))
 def start(cluster_name, client):
     """
     Starts an etcd cluster
     :param client: Client on which to start the service
     :param cluster_name: The name of the cluster service to start
     """
     if ServiceManager.has_service('etcd-{0}'.format(cluster_name), client=client) is True and \
             ServiceManager.get_service_status('etcd-{0}'.format(cluster_name), client=client) is False:
         ServiceManager.start_service('etcd-{0}'.format(cluster_name), client=client)
 def stop(cluster_name, client):
     """
     Stops an arakoon service
     """
     if (
         ServiceManager.has_service("arakoon-{0}".format(cluster_name), client=client) is True
         and ServiceManager.get_service_status("arakoon-{0}".format(cluster_name), client=client) is True
     ):
         ServiceManager.stop_service("arakoon-{0}".format(cluster_name), client=client)
 def is_running(cluster_name, client):
     """
     Checks if arakoon service is running
     :param client: Client on which to stop the service
     :param cluster_name: The name of the cluster service to stop
     """
     if ServiceManager.has_service('arakoon-{0}'.format(cluster_name), client=client):
         return ServiceManager.get_service_status('arakoon-{0}'.format(cluster_name), client=client)
     return False
 def stop(cluster_name, client):
     """
     Stops an arakoon service
     :param client: Client on which to stop the service
     :param cluster_name: The name of the cluster service to stop
     """
     if ServiceManager.has_service('arakoon-{0}'.format(cluster_name), client=client) is True and \
             ServiceManager.get_service_status('arakoon-{0}'.format(cluster_name), client=client) is True:
         ServiceManager.stop_service('arakoon-{0}'.format(cluster_name), client=client)
 def tearDown(self):
     """
     Clean up the unittest
     """
     # Cleaning storage
     self.volatile.clean()
     self.persistent.clean()
     Upstart.clean()
     ServiceManager.clean()
     StorageRouterClient.clean()
 def setUp(self):
     """
     (Re)Sets the stores on every test
     """
     # Cleaning storage
     self.volatile.clean()
     self.persistent.clean()
     Upstart.clean()
     ServiceManager.clean()
     StorageRouterClient.clean()
 def _roll_out_dtl_services(vpool, storagerouters):
     """
     Deploy and start the DTL service on all storagerouters
     :param storagerouters: StorageRouters to deploy and start a DTL service on
     :return: None
     """
     service_name = 'dtl_{0}'.format(vpool.name)
     for sr in storagerouters.values():
         client = SSHClient(sr, 'root')
         ServiceManager.add_service(name=service_name, client=client)
         ServiceManager.start_service(name=service_name, client=client)
Exemple #24
0
 def start(cluster_name, client):
     """
     Starts an etcd cluster
     :param cluster_name: The name of the cluster service to start
     :type cluster_name: str
     :param client: Client on which to start the service
     :type client: SSHClient
     :return: None
     """
     if ServiceManager.has_service('etcd-{0}'.format(cluster_name), client=client) is True:
         ServiceManager.start_service('etcd-{0}'.format(cluster_name), client=client)
 def stop(cluster_name, client):
     """
     Stops an arakoon service
     :param cluster_name: The name of the cluster service to stop
     :type cluster_name: str
     :param client: Client on which to stop the service
     :type client: SSHClient
     :return: None
     """
     service_name = ArakoonInstaller.get_service_name_for_cluster(cluster_name=cluster_name)
     if ServiceManager.has_service(name=service_name, client=client) is True:
         ServiceManager.stop_service(name=service_name, client=client)
    def setUpClass(cls):
        """
        Sets up the unittest, mocking a certain set of 3rd party libraries and extensions.
        This makes sure the unittests can be executed without those libraries installed
        """
        cls.persistent = PersistentFactory.get_client()
        cls.persistent.clean()

        cls.volatile = VolatileFactory.get_client()
        cls.volatile.clean()
        Upstart.clean()
        ServiceManager.clean()
        StorageRouterClient.clean()
 def _restart_openstack_services(self):
     """
     Restart services on openstack
     """
     services = OSManager.get_openstack_services()
     for service_name in services:
         if ServiceManager.has_service(service_name, self.client):
             try:
                 ServiceManager.restart_service(service_name, self.client)
             except SystemExit as sex:
                 logger.debug('Failed to restart service {0}. {1}'.format(service_name, sex))
     time.sleep(3)
     return self._is_cinder_running()
    def remove(cluster_name, client):
        """
        Removes an arakoon service
        :param cluster_name: The name of the cluster service to remove
        :type cluster_name: str

        :param client: Client on which to remove the service
        :type client: SSHClient

        :return: None
        """
        if ServiceManager.has_service('arakoon-{0}'.format(cluster_name), client=client) is True:
            ServiceManager.remove_service('arakoon-{0}'.format(cluster_name), client=client)
 def is_running(cluster_name, client):
     """
     Checks if arakoon service is running
     :param cluster_name: The name of the cluster service to check
     :type cluster_name: str
     :param client: Client on which to check the service
     :type client: SSHClient
     :return: None
     """
     service_name = ArakoonInstaller.get_service_name_for_cluster(cluster_name=cluster_name)
     if ServiceManager.has_service(name=service_name, client=client):
         return ServiceManager.get_service_status(name=service_name, client=client)[0]
     return False
    def stop(cluster_name, client):
        """
        Stops an arakoon service
        :param cluster_name: The name of the cluster service to stop
        :type cluster_name: str

        :param client: Client on which to stop the service
        :type client: SSHClient

        :return: None
        """
        if ServiceManager.has_service('arakoon-{0}'.format(cluster_name), client=client) is True:
            ServiceManager.stop_service('arakoon-{0}'.format(cluster_name), client=client)
Exemple #31
0
 def _is_openstack(self):
     cinder_service = OSManager.get_openstack_cinder_service_name()
     return ServiceManager.has_service(cinder_service, self.client)
Exemple #32
0
    def extend_cluster(master_ip, new_ip, cluster_name):
        """
        Extends a cluster to a given new node
        :param base_dir: Base directory that will hold the data
        :param cluster_name: Name of the cluster to be extended
        :param new_ip: IP address of the node to be added
        :param master_ip: IP of one of the already existing nodes
        """
        logger.debug('Extending cluster "{0}" from {1} to {2}'.format(
            cluster_name, master_ip, new_ip))

        client = SSHClient(master_ip, username='******')
        if not EtcdInstaller._is_healty(cluster_name, client):
            raise RuntimeError(
                'Cluster "{0}" unhealthy, aborting extend'.format(
                    cluster_name))

        current_cluster = []
        for item in client.run('etcdctl member list').splitlines():
            info = re.search(EtcdInstaller.MEMBER_REGEX, item).groupdict()
            current_cluster.append('{0}={1}'.format(info['name'],
                                                    info['peer']))

        client = SSHClient(new_ip, username='******')
        node_name = System.get_my_machine_id(client)
        current_cluster.append('{0}={1}'.format(
            node_name, EtcdInstaller.SERVER_URL.format(new_ip)))

        data_dir = EtcdInstaller.DATA_DIR.format(EtcdInstaller.DB_DIR,
                                                 cluster_name)
        wal_dir = EtcdInstaller.WAL_DIR.format(EtcdInstaller.DB_DIR,
                                               cluster_name)
        abs_paths = [data_dir, wal_dir]
        client.dir_delete(abs_paths)
        client.dir_create(abs_paths)
        client.dir_chmod(abs_paths, 0755, recursive=True)
        client.dir_chown(abs_paths, 'ovs', 'ovs', recursive=True)

        base_name = 'ovs-etcd'
        target_name = 'ovs-etcd-{0}'.format(cluster_name)
        EtcdInstaller.stop(cluster_name,
                           client)  # Stop a possible proxy service
        ServiceManager.add_service(
            base_name,
            client,
            params={
                'CLUSTER': cluster_name,
                'NODE_ID': node_name,
                'DATA_DIR': data_dir,
                'WAL_DIR': wal_dir,
                'SERVER_URL': EtcdInstaller.SERVER_URL.format(new_ip),
                'CLIENT_URL': EtcdInstaller.CLIENT_URL.format(new_ip),
                'LOCAL_CLIENT_URL':
                EtcdInstaller.CLIENT_URL.format('127.0.0.1'),
                'INITIAL_CLUSTER': ','.join(current_cluster),
                'INITIAL_STATE': 'existing',
                'INITIAL_PEERS': ''
            },
            target_name=target_name)

        master_client = SSHClient(master_ip, username='******')
        master_client.run('etcdctl member add {0} {1}'.format(
            node_name, EtcdInstaller.SERVER_URL.format(new_ip)))
        EtcdInstaller.start(cluster_name, client)
        EtcdInstaller.wait_for_cluster(cluster_name, client)

        logger.debug(
            'Extending cluster "{0}" from {1} to {2} completed'.format(
                cluster_name, master_ip, new_ip))
Exemple #33
0
    def _restart_processes(self):
        """
        Restart the cinder process that uses the OVS volume driver
        - also restarts nova api and compute services
        """
        def stop_screen_process(process_name):
            out = self.client.run(
                '''su stack -c 'screen -S {0} -p {1} -Q select 1>/dev/null; echo $?' '''
                .format(screen_name, process_name))
            process_screen_exists = out == '0'
            if process_screen_exists:
                self.client.run(
                    '''su stack -c 'screen -S {0} -p {1} -X stuff \n' '''.
                    format(screen_name, process_name))
                self.client.run(
                    '''su stack -c 'screen -S {0} -p {1} -X kill' '''.format(
                        screen_name, process_name))
            return process_screen_exists

        def start_screen_process(process_name, commands):
            logfile = '{0}/{1}.log.{2}'.format(
                logdir, process_name,
                datetime.datetime.strftime(datetime.datetime.now(),
                                           '%Y-%m-%d-%H%M%S'))
            self._logger.debug(
                self.client.run(
                    '''su stack -c 'touch {0}' '''.format(logfile)))
            self._logger.debug(
                self.client.run(
                    '''su stack -c 'screen -S {0} -X screen -t {1}' '''.format(
                        screen_name, process_name)))
            self._logger.debug(
                self.client.run(
                    '''su stack -c 'screen -S {0} -p {1} -X logfile {2}' '''.
                    format(screen_name, process_name, logfile)))
            self._logger.debug(
                self.client.run(
                    '''su stack -c 'screen -S {0} -p {1} -X log on' '''.format(
                        screen_name, process_name)))
            time.sleep(1)
            self._logger.debug(
                self.client.run('rm {0}/{1}.log || true'.format(
                    logdir, process_name)))
            self._logger.debug(
                self.client.run('ln -sf {0} {1}/{2}.log'.format(
                    logfile, logdir, process_name)))
            for command in commands:
                cmd = '''su stack -c 'screen -S {0} -p {1} -X stuff "{2}\012"' '''.format(
                    screen_name, process_name, command)
                self._logger.debug(cmd)
                self._logger.debug(self.client.run(cmd))

        logdir = '/opt/stack/logs'
        screen_name = 'stack'
        if self._is_devstack is True:
            try:
                c_vol_screen_exists = stop_screen_process('c-vol')
                n_cpu_screen_exists = stop_screen_process('n-cpu')
                n_api_screen_exists = stop_screen_process('n-api')
                c_api_screen_exists = stop_screen_process('c-api')

                self.client.run('''su stack -c 'mkdir -p /opt/stack/logs' ''')

                if c_vol_screen_exists:
                    start_screen_process('c-vol', [
                        "export PYTHONPATH=\"${PYTHONPATH}:/opt/OpenvStorage\" ",
                        "newgrp ovs", "newgrp stack", "umask 0002",
                        "/usr/local/bin/cinder-volume --config-file /etc/cinder/cinder.conf & echo \$! >/opt/stack/status/stack/c-vol.pid; fg || echo  c-vol failed to start | tee \"/opt/stack/status/stack/c-vol.failure\" "
                    ])
                time.sleep(3)
                if n_cpu_screen_exists:
                    start_screen_process('n-cpu', [
                        "newgrp ovs", "newgrp stack",
                        "sg libvirtd /usr/local/bin/nova-compute --config-file /etc/nova/nova.conf & echo $! >/opt/stack/status/stack/n-cpu.pid; fg || echo n-cpu failed to start | tee \"/opt/stack/status/stack/n-cpu.failure\" "
                    ])
                time.sleep(3)
                if n_api_screen_exists:
                    start_screen_process('n-api', [
                        "export PYTHONPATH=\"${PYTHONPATH}:/opt/OpenvStorage\" ",
                        "/usr/local/bin/nova-api & echo $! >/opt/stack/status/stack/n-api.pid; fg || echo n-api failed to start | tee \"/opt/stack/status/stack/n-api.failure\" "
                    ])
                time.sleep(3)
                if c_api_screen_exists:
                    start_screen_process('c-api', [
                        "/usr/local/bin/cinder-api --config-file /etc/cinder/cinder.conf & echo $! >/opt/stack/status/stack/c-api.pid; fg || echo c-api failed to start | tee \"/opt/stack/status/stack/c-api.failure\" "
                    ])
                time.sleep(3)
            except SystemExit as se:  # failed command or non-zero exit codes raise SystemExit
                raise RuntimeError(str(se))

        else:
            for service_name in OSManager.get_openstack_services():
                if ServiceManager.has_service(service_name, self.client):
                    try:
                        ServiceManager.restart_service(service_name,
                                                       self.client)
                    except SystemExit as sex:
                        self._logger.debug(
                            'Failed to restart service {0}. {1}'.format(
                                service_name, sex))
            time.sleep(3)
Exemple #34
0
    def create_cluster(cluster_name,
                       ip,
                       server_port=DEFAULT_SERVER_PORT,
                       client_port=DEFAULT_CLIENT_PORT):
        """
        Creates a cluster
        :param cluster_name: Name of the cluster
        :type cluster_name: str

        :param ip: IP address of the first node of the new cluster
        :type ip: str

        :param server_port: Port to be used by server
        :type server_port: int

        :param client_port: Port to be used by client
        :type client_port: int

        :return: None
        """
        EtcdInstaller._logger.debug('Creating cluster "{0}" on {1}'.format(
            cluster_name, ip))

        client = SSHClient(ip, username='******')
        target_name = 'ovs-etcd-{0}'.format(cluster_name)
        if ServiceManager.has_service(
                target_name, client) and ServiceManager.get_service_status(
                    target_name, client)[0] is True:
            EtcdInstaller._logger.info(
                'Service {0} already configured and running'.format(
                    target_name))
            return

        node_name = System.get_my_machine_id(client)
        data_dir = EtcdInstaller.DATA_DIR.format(cluster_name)
        wal_dir = EtcdInstaller.WAL_DIR.format(cluster_name)
        abs_paths = [data_dir, wal_dir]
        client.dir_delete(abs_paths)
        client.dir_create(abs_paths)
        client.dir_chmod(abs_paths, 0755, recursive=True)
        client.dir_chown(abs_paths, 'ovs', 'ovs', recursive=True)

        base_name = 'ovs-etcd'
        ServiceManager.add_service(
            base_name,
            client,
            params={
                'CLUSTER':
                cluster_name,
                'NODE_ID':
                node_name,
                'DATA_DIR':
                data_dir,
                'WAL_DIR':
                wal_dir,
                'SERVER_URL':
                EtcdInstaller.SERVER_URL.format(ip, server_port),
                'CLIENT_URL':
                EtcdInstaller.CLIENT_URL.format(ip, client_port),
                'LOCAL_CLIENT_URL':
                EtcdInstaller.CLIENT_URL.format('127.0.0.1', client_port),
                'INITIAL_CLUSTER':
                '{0}={1}'.format(
                    node_name,
                    EtcdInstaller.SERVER_URL.format(ip, server_port)),
                'INITIAL_STATE':
                'new',
                'INITIAL_PEERS':
                '-initial-advertise-peer-urls {0}'.format(
                    EtcdInstaller.SERVER_URL.format(ip, server_port))
            },
            target_name=target_name)
        EtcdInstaller.start(cluster_name, client)
        EtcdInstaller.wait_for_cluster(cluster_name,
                                       client,
                                       client_port=client_port)

        EtcdInstaller._logger.debug(
            'Creating cluster "{0}" on {1} completed'.format(cluster_name, ip))
Exemple #35
0
    def gather_scrub_work():
        """
        Retrieve and execute scrub work
        :return: None
        """
        ScheduledTaskController._logger.info('Gather Scrub - Started')

        scrub_locations = {}
        for storage_driver in StorageDriverList.get_storagedrivers():
            for partition in storage_driver.partitions:
                if DiskPartition.ROLES.SCRUB == partition.role:
                    ScheduledTaskController._logger.info(
                        'Gather Scrub - Storage Router {0:<15} has SCRUB partition at {1}'
                        .format(storage_driver.storagerouter.ip,
                                partition.path))
                    if storage_driver.storagerouter not in scrub_locations:
                        try:
                            sshclient = SSHClient(storage_driver.storagerouter)
                            # Use ServiceManager(sshclient) to make sure ovs-workers are actually running
                            if ServiceManager.get_service_status(
                                    'workers', sshclient) is False:
                                ScheduledTaskController._logger.warning(
                                    'Gather Scrub - Storage Router {0:<15} - workers are not running'
                                    .format(storage_driver.storagerouter.ip))
                            else:
                                scrub_locations[
                                    storage_driver.storagerouter] = str(
                                        partition.path)
                        except UnableToConnectException:
                            ScheduledTaskController._logger.warning(
                                'Gather Scrub - Storage Router {0:<15} is not reachable'
                                .format(storage_driver.storagerouter.ip))

        if len(scrub_locations) == 0:
            raise RuntimeError('No scrub locations found')

        vdisk_guids = set()
        for vmachine in VMachineList.get_customer_vmachines():
            for vdisk in vmachine.vdisks:
                if vdisk.info['object_type'] == 'BASE':
                    vdisk_guids.add(vdisk.guid)
        for vdisk in VDiskList.get_without_vmachine():
            if vdisk.info['object_type'] == 'BASE':
                vdisk_guids.add(vdisk.guid)

        if len(vdisk_guids) == 0:
            ScheduledTaskController._logger.info(
                'Gather Scrub - No scrub work needed'.format(len(vdisk_guids)))
            return

        ScheduledTaskController._logger.info(
            'Gather Scrub - Checking {0} volumes for scrub work'.format(
                len(vdisk_guids)))
        local_machineid = System.get_my_machine_id()
        local_storage_router = None
        local_scrub_location = None
        local_vdisks_to_scrub = []
        result_set = {}
        storage_router_list = []
        scrub_map = {}

        for index, scrub_info in enumerate(scrub_locations.items()):
            start_index = index * len(vdisk_guids) / len(scrub_locations)
            end_index = (index + 1) * len(vdisk_guids) / len(scrub_locations)
            storage_router = scrub_info[0]
            vdisk_guids_to_scrub = list(vdisk_guids)[start_index:end_index]
            local = storage_router.machine_id == local_machineid
            ScheduledTaskController._logger.info(
                'Gather Scrub - Storage Router {0:<15} ({1}) - Scrubbing {2} virtual disks'
                .format(storage_router.ip,
                        'local' if local is True else 'remote',
                        len(vdisk_guids_to_scrub)))

            if local is True:
                local_storage_router = storage_router
                local_scrub_location = scrub_info[1]
                local_vdisks_to_scrub = vdisk_guids_to_scrub
            else:
                result_set[storage_router.
                           ip] = ScheduledTaskController._execute_scrub_work.s(
                               scrub_location=scrub_info[1],
                               vdisk_guids=vdisk_guids_to_scrub).apply_async(
                                   routing_key='sr.{0}'.format(
                                       storage_router.machine_id))
                storage_router_list.append(storage_router)
                scrub_map[storage_router.ip] = vdisk_guids_to_scrub

        # Remote tasks have been launched, now start the local task and then wait for remote tasks to finish
        processed_guids = []
        if local_scrub_location is not None and len(local_vdisks_to_scrub) > 0:
            try:
                processed_guids = ScheduledTaskController._execute_scrub_work(
                    scrub_location=local_scrub_location,
                    vdisk_guids=local_vdisks_to_scrub)
            except Exception as ex:
                ScheduledTaskController._logger.error(
                    'Gather Scrub - Storage Router {0:<15} - Scrubbing failed with error:\n - {1}'
                    .format(local_storage_router.ip, ex))

        all_results, failed_nodes = CeleryToolbox.manage_running_tasks(
            result_set,
            timesleep=60)  # Check every 60 seconds if tasks are still running

        for ip, result in all_results.iteritems():
            if isinstance(result, list):
                processed_guids.extend(result)
            else:
                ScheduledTaskController._logger.error(
                    'Gather Scrub - Storage Router {0:<15} - Scrubbing failed with error:\n - {1}'
                    .format(ip, result))

        result_set = {}
        for failed_node in failed_nodes:
            ScheduledTaskController._logger.warning(
                'Scrubbing failed on node {0}. Will reschedule on another node.'
                .format(failed_node))
            vdisk_guids_to_scrub = scrub_map[failed_node]
            rescheduled_work = False
            for storage_router, scrub_location in scrub_locations.items():
                if storage_router.ip not in failed_nodes:
                    if storage_router.machine_id != local_machineid:
                        ScheduledTaskController._logger.info(
                            'Rescheduled scrub work from node {0} to node {1}.'
                            .format(failed_node, storage_router.ip))
                        result_set[
                            storage_router.
                            ip] = ScheduledTaskController._execute_scrub_work.s(
                                scrub_location=scrub_location,
                                vdisk_guids=vdisk_guids_to_scrub).apply_async(
                                    routing_key='sr.{0}'.format(
                                        storage_router.machine_id))
                        storage_router_list.append(storage_router)
                        rescheduled_work = True
                        break
            if rescheduled_work is False:
                if local_scrub_location is not None:
                    try:
                        processed_guids.extend(
                            ScheduledTaskController._execute_scrub_work(
                                scrub_location=local_scrub_location,
                                vdisk_guids=vdisk_guids_to_scrub))
                    except Exception as ex:
                        ScheduledTaskController._logger.error(
                            'Gather Scrub - Storage Router Local - Scrubbing failed with error:\n - {0}'
                            .format(ex))
                else:
                    ScheduledTaskController._logger.warning(
                        'No nodes left to reschedule work from node {0}'.
                        format(failed_node))

        if len(result_set) > 0:
            all_results2, failed_nodes = CeleryToolbox.manage_running_tasks(
                result_set, timesleep=60
            )  # Check every 60 seconds if tasks are still running

            for ip, result in all_results2.iteritems():
                if isinstance(result, list):
                    processed_guids.extend(result)
                else:
                    ScheduledTaskController._logger.error(
                        'Gather Scrub - Storage Router {0:<15} - Scrubbing failed with error:\n - {1}'
                        .format(ip, result))

        if len(set(processed_guids)) != len(vdisk_guids) or set(
                processed_guids).difference(vdisk_guids):
            raise RuntimeError('Scrubbing failed for 1 or more storagerouters')
        ScheduledTaskController._logger.info('Gather Scrub - Finished')
Exemple #36
0
 def post_upgrade(client):
     """
     Upgrade actions after the new packages have actually been installed
     :param client: SSHClient object
     :return: None
     """
     # If we can reach Etcd with a valid config, and there's still an old config file present, delete it
     from ovs.extensions.db.etcd.configuration import EtcdConfiguration
     path = '/opt/OpenvStorage/config/ovs.json'
     if EtcdConfiguration.exists(
             '/ovs/framework/cluster_id') and client.file_exists(path):
         client.file_delete(path)
     # Migrate volumedriver & albaproxy configuration files
     import uuid
     from ovs.extensions.storageserver.storagedriver import StorageDriverConfiguration
     from ovs.dal.lists.storagedriverlist import StorageDriverList
     from ovs.extensions.generic.system import System
     with remote(client.ip,
                 [StorageDriverConfiguration, os, open, json, System],
                 username='******') as rem:
         configuration_dir = '{0}/storagedriver/storagedriver'.format(
             EtcdConfiguration.get('/ovs/framework/paths|cfgdir'))
         host_id = rem.System.get_my_machine_id()
         if rem.os.path.exists(configuration_dir):
             for storagedriver in StorageDriverList.get_storagedrivers_by_storagerouter(
                     rem.System.get_my_storagerouter().guid):
                 vpool = storagedriver.vpool
                 if storagedriver.alba_proxy is not None:
                     config_tree = '/ovs/vpools/{0}/proxies/{1}/config/{{0}}'.format(
                         vpool.guid, storagedriver.alba_proxy.guid)
                     # ABM config
                     abm_config = '{0}/{1}_alba.cfg'.format(
                         configuration_dir, vpool.name)
                     if rem.os.path.exists(abm_config):
                         with rem.open(abm_config) as config_file:
                             EtcdConfiguration.set(
                                 config_tree.format('abm'),
                                 config_file.read(),
                                 raw=True)
                         rem.os.remove(abm_config)
                     # Albaproxy config
                     alba_config = '{0}/{1}_alba.json'.format(
                         configuration_dir, vpool.name)
                     if rem.os.path.exists(alba_config):
                         with rem.open(alba_config) as config_file:
                             config = rem.json.load(config_file)
                             del config['albamgr_cfg_file']
                             config[
                                 'albamgr_cfg_url'] = 'etcd://127.0.0.1:2379{0}'.format(
                                     config_tree.format('abm'))
                             EtcdConfiguration.set(
                                 config_tree.format('main'),
                                 json.dumps(config, indent=4),
                                 raw=True)
                         params = {
                             'VPOOL_NAME': vpool.name,
                             'VPOOL_GUID': vpool.guid,
                             'PROXY_ID': storagedriver.alba_proxy.guid
                         }
                         alba_proxy_service = 'ovs-albaproxy_{0}'.format(
                             vpool.name)
                         ServiceManager.add_service(
                             name='ovs-albaproxy',
                             params=params,
                             client=client,
                             target_name=alba_proxy_service)
                         rem.os.remove(alba_config)
                 # Volumedriver config
                 current_file = '{0}/{1}.json'.format(
                     configuration_dir, vpool.name)
                 if rem.os.path.exists(current_file):
                     readcache_size = 0
                     with rem.open(current_file) as config_file:
                         config = rem.json.load(config_file)
                     config['distributed_transaction_log'] = {}
                     config['distributed_transaction_log'][
                         'dtl_transport'] = config['failovercache'][
                             'failovercache_transport']
                     config['distributed_transaction_log'][
                         'dtl_path'] = config['failovercache'][
                             'failovercache_path']
                     config['volume_manager'][
                         'dtl_throttle_usecs'] = config['volume_manager'][
                             'foc_throttle_usecs']
                     del config['failovercache']
                     del config['volume_manager']['foc_throttle_usecs']
                     sdc = rem.StorageDriverConfiguration(
                         'storagedriver', vpool.guid,
                         storagedriver.storagedriver_id)
                     sdc.configuration = config
                     sdc.save(reload_config=False)
                     for mountpoint in config['content_addressed_cache'][
                             'clustercache_mount_points']:
                         readcache_size += int(mountpoint['size'].replace(
                             'KiB', ''))
                     params = {
                         'VPOOL_MOUNTPOINT':
                         storagedriver.mountpoint,
                         'HYPERVISOR_TYPE':
                         storagedriver.storagerouter.pmachine.hvtype,
                         'VPOOL_NAME':
                         vpool.name,
                         'CONFIG_PATH':
                         sdc.remote_path,
                         'UUID':
                         str(uuid.uuid4()),
                         'OVS_UID':
                         client.run('id -u ovs').strip(),
                         'OVS_GID':
                         client.run('id -g ovs').strip(),
                         'KILL_TIMEOUT':
                         str(
                             int(readcache_size / 1024.0 / 1024.0 / 6.0 +
                                 30))
                     }
                     vmware_mode = EtcdConfiguration.get(
                         '/ovs/framework/hosts/{0}/storagedriver|vmware_mode'
                         .format(host_id))
                     dtl_service = 'ovs-dtl_{0}'.format(vpool.name)
                     ServiceManager.add_service(name='ovs-dtl',
                                                params=params,
                                                client=client,
                                                target_name=dtl_service)
                     if vpool.backend_type.code == 'alba':
                         alba_proxy_service = 'ovs-albaproxy_{0}'.format(
                             vpool.name)
                         dependencies = [alba_proxy_service]
                     else:
                         dependencies = None
                     if vmware_mode == 'ganesha':
                         template_name = 'ovs-ganesha'
                     else:
                         template_name = 'ovs-volumedriver'
                     voldrv_service = 'ovs-volumedriver_{0}'.format(
                         vpool.name)
                     ServiceManager.add_service(
                         name=template_name,
                         params=params,
                         client=client,
                         target_name=voldrv_service,
                         additional_dependencies=dependencies)
                     rem.os.remove(current_file)
                 # Ganesha config, if available
                 current_file = '{0}/{1}_ganesha.conf'.format(
                     configuration_dir, vpool.name)
                 if rem.os.path.exists(current_file):
                     sdc = rem.StorageDriverConfiguration(
                         'storagedriver', vpool.guid,
                         storagedriver.storagedriver_id)
                     contents = ''
                     for template in ['ganesha-core', 'ganesha-export']:
                         contents += client.file_read(
                             '/opt/OpenvStorage/config/templates/{0}.conf'.
                             format(template))
                     params = {
                         'VPOOL_NAME':
                         vpool.name,
                         'VPOOL_MOUNTPOINT':
                         '/mnt/{0}'.format(vpool.name),
                         'CONFIG_PATH':
                         sdc.remote_path,
                         'NFS_FILESYSTEM_ID':
                         storagedriver.storagerouter.ip.split('.', 2)[-1]
                     }
                     for key, value in params.iteritems():
                         contents = contents.replace(
                             '<{0}>'.format(key), value)
                     client.file_write(current_file, contents)
Exemple #37
0
    def configure_host(self, ip):
        if self._is_devstack is False and self._is_openstack is False or self._cinder_installed is False or self._nova_installed is False:
            self._logger.warning(
                'Configure host: No OpenStack nor DevStack installation detected or Cinder and Nova plugins are not installed'
            )
            return

        # 1. Get Driver code
        self._logger.info('*** Configuring host with IP {0} ***'.format(ip))
        self._logger.info('  Copy driver code')
        remote_driver = "/opt/OpenvStorage/config/templates/cinder-volume-driver/{0}/openvstorage.py".format(
            self._stack_version)
        remote_version = '0.0.0'
        existing_version = '0.0.0'
        try:
            from cinder.volume.drivers import openvstorage
            if hasattr(openvstorage, 'OVSVolumeDriver'):
                existing_version = getattr(openvstorage.OVSVolumeDriver,
                                           'VERSION', '0.0.0')
        except ImportError:
            pass

        for line in self.client.file_read(remote_driver).splitlines():
            if 'VERSION = ' in line:
                remote_version = line.split('VERSION = ')[-1].strip().replace(
                    "'", "").replace('"', "")
                break

        nova_base_path = self._get_base_path('nova')
        cinder_base_path = self._get_base_path('cinder')

        if self._is_devstack is True:
            local_driver = '{0}/volume/drivers/openvstorage.py'.format(
                cinder_base_path)
        else:
            local_driver = '{0}/cinder/volume/drivers/openvstorage.py'.format(
                self._driver_location)

        if remote_version > existing_version:
            self._logger.debug(
                'Updating existing driver using {0} from version {1} to version {2}'
                .format(remote_driver, existing_version, remote_version))
            self.client.run('cp -f {0} {1}'.format(remote_driver,
                                                   local_driver))
        else:
            self._logger.debug('Using driver {0} version {1}'.format(
                local_driver, existing_version))

        # 2. Configure users and groups
        self._logger.info('  Add users to group ovs')
        users = ['libvirt-qemu', 'stack'
                 ] if self._is_devstack is True else self._openstack_users
        for user in users:
            self.client.run('usermod -a -G ovs {0}'.format(user))

        # 3. Apply patches
        self._logger.info('  Applying patches')
        if self._stack_version in ('liberty', 'mitaka', 'newton'):
            try:
                import os_brick
                cinder_brick_initiator_file = "{0}/initiator/connector.py".format(
                    os.path.dirname(os_brick.__file__))
            except ImportError:
                cinder_brick_initiator_file = ''
            if self._is_devstack is True:
                nova_volume_file = '{0}/virt/libvirt/volume/volume.py'.format(
                    nova_base_path)
            else:
                nova_volume_file = '{0}/nova/virt/libvirt/volume/volume.py'.format(
                    self._driver_location)
        else:
            cinder_brick_initiator_file = '{0}/cinder/brick/initiator/connector.py'.format(
                self._driver_location)
            if self._is_devstack is True:
                nova_volume_file = '{0}/virt/libvirt/volume.py'.format(
                    nova_base_path)
            else:
                nova_volume_file = '{0}/nova/virt/libvirt/volume.py'.format(
                    self._driver_location)
        if self._is_devstack is True:
            nova_driver_file = '{0}/virt/libvirt/driver.py'.format(
                nova_base_path)
        else:
            nova_driver_file = '{0}/nova/virt/libvirt/driver.py'.format(
                self._driver_location)

        self._logger.info('    Patching file {0}'.format(nova_volume_file))

        file_contents = self.client.file_read(nova_volume_file)
        if 'class LibvirtFileVolumeDriver(LibvirtBaseVolumeDriver):' not in file_contents:
            file_contents += '''
class LibvirtFileVolumeDriver(LibvirtBaseVolumeDriver):
    def __init__(self, connection):
        super(LibvirtFileVolumeDriver,
              self).__init__(connection, is_block_dev=False)

    def get_config(self, connection_info, disk_info):
        conf = super(LibvirtFileVolumeDriver,
                     self).get_config(connection_info, disk_info)
        conf.source_type = 'file'
        conf.source_path = connection_info['data']['device_path']
        return conf
'''
            self.client.file_write(nova_volume_file, file_contents)

        self._logger.info('    Patching file {0}'.format(nova_driver_file))

        file_contents = self.client.file_read(nova_driver_file)
        if self._stack_version in ('liberty', 'mitaka'):
            check_line = 'local=nova.virt.libvirt.volume.volume.LibvirtVolumeDriver'
            new_line = 'file=nova.virt.libvirt.volume.volume.LibvirtFileVolumeDriver'
        else:
            check_line = 'local=nova.virt.libvirt.volume.LibvirtVolumeDriver'
            new_line = 'file=nova.virt.libvirt.volume.LibvirtFileVolumeDriver'
        if new_line not in file_contents:
            for line in file_contents.splitlines():
                if check_line in line:
                    stripped_line = line.rstrip()
                    whitespaces = len(stripped_line) - len(
                        stripped_line.lstrip())
                    new_line = "{0}'{1}',\n".format(' ' * whitespaces,
                                                    new_line)
                    fc = file_contents[:file_contents.
                                       index(line)] + new_line + file_contents[
                                           file_contents.index(line):]
                    self.client.file_write(nova_driver_file, "".join(fc))
                    break

        if os.path.exists(cinder_brick_initiator_file):
            # fix brick/upload to glance
            self._logger.info(
                '    Patching file {0}'.format(cinder_brick_initiator_file))
            if self._stack_version in ('liberty', 'mitaka', 'newton'):
                self.client.run(
                    """sed -i 's/elif protocol == LOCAL:/elif protocol in [LOCAL, "FILE"]:/g' {0}"""
                    .format(cinder_brick_initiator_file))
            else:
                self.client.run(
                    """sed -i 's/elif protocol == "LOCAL":/elif protocol in ["LOCAL", "FILE"]:/g' {0}"""
                    .format(cinder_brick_initiator_file))

        # 4. Configure messaging driver
        self._logger.info('   - Configure messaging driver')
        nova_messaging_driver = 'nova.openstack.common.notifier.rpc_notifier' if self._stack_version == 'juno' else 'messaging'
        cinder_messaging_driver = 'cinder.openstack.common.notifier.rpc_notifier' if self._stack_version == 'juno' else 'messaging'

        with remote(ip, [RawConfigParser, open], 'root') as rem:
            for config_file, driver in {
                    self._NOVA_CONF: nova_messaging_driver,
                    self._CINDER_CONF: cinder_messaging_driver
            }.iteritems():
                changed = False
                cfg = rem.RawConfigParser()
                cfg.read([config_file])
                if cfg.has_option("DEFAULT", "notification_driver"):
                    if cfg.get("DEFAULT", "notification_driver") != driver:
                        changed = True
                        cfg.set("DEFAULT", "notification_driver", driver)
                else:
                    changed = True
                    cfg.set("DEFAULT", "notification_driver", driver)
                if cfg.has_option("DEFAULT", "notification_topics"):
                    notification_topics = cfg.get(
                        "DEFAULT", "notification_topics").split(",")
                    if "notifications" not in notification_topics:
                        notification_topics.append("notifications")
                        changed = True
                        cfg.set("DEFAULT", "notification_topics",
                                ",".join(notification_topics))
                else:
                    changed = True
                    cfg.set("DEFAULT", "notification_topics", "notifications")

                if config_file == self._NOVA_CONF:
                    for param, value in {
                            'notify_on_any_change': 'True',
                            'notify_on_state_change': 'vm_and_task_state'
                    }.iteritems():
                        if not cfg.has_option("DEFAULT", param):
                            changed = True
                            cfg.set("DEFAULT", param, value)

                if changed is True:
                    with rem.open(config_file, "w") as fp:
                        cfg.write(fp)

        # 5. Enable events consumer
        self._logger.info('   - Enabling events consumer service')
        service_name = 'openstack-events-consumer'
        if not ServiceManager.has_service(service_name, self.client):
            ServiceManager.add_service(service_name, self.client)
            ServiceManager.enable_service(service_name, self.client)
            ServiceManager.start_service(service_name, self.client)
Exemple #38
0
    def unconfigure_host(self, ip):
        if self._is_devstack is False and self._is_openstack is False or self._cinder_installed is False or self._nova_installed is False:
            self._logger.warning(
                'Unconfigure host: No OpenStack nor DevStack installation detected or Cinder and Nova plugins are not installed'
            )
            return

        # 1. Remove driver code
        self._logger.info('*** Unconfiguring host with IP {0} ***'.format(ip))
        self._logger.info(' Removing driver code')
        if self._is_devstack is True:
            self.client.file_delete(self._devstack_driver)
        else:
            self.client.file_delete(
                '{0}/cinder/volume/drivers/openvstorage.py'.format(
                    self._driver_location))

        # 2. Removing users from group
        self._logger.info('  Removing users from group ovs')
        for user in ['libvirt-qemu', 'stack'
                     ] if self._is_devstack is True else self._openstack_users:
            self.client.run('deluser {0} ovs'.format(user))

        # 3. Revert patches
        self._logger.info('  Reverting patches')
        nova_base_path = self._get_base_path('nova')
        cinder_base_path = self._get_base_path('cinder')
        if self._is_devstack is True:
            nova_volume_file = '{0}/virt/libvirt/volume.py'.format(
                nova_base_path)
            nova_driver_file = '{0}/virt/libvirt/driver.py'.format(
                nova_base_path)
            cinder_brick_initiator_file = '{0}/brick/initiator/connector.py'.format(
                cinder_base_path)
        else:
            nova_volume_file = '{0}/nova/virt/libvirt/volume.py'.format(
                self._driver_location)
            nova_driver_file = '{0}/nova/virt/libvirt/driver.py'.format(
                self._driver_location)
            cinder_brick_initiator_file = '{0}/cinder/brick/initiator/connector.py'.format(
                self._driver_location)

        self._logger.info(
            '    Reverting patched file: {0}'.format(nova_volume_file))
        new_contents = []

        skip_class = False
        for line in self.client.file_read(nova_volume_file).splitlines():
            if line.startswith(
                    'class LibvirtFileVolumeDriver(LibvirtBaseVolumeDriver):'):
                skip_class = True
                continue
            if line.startswith('class'):
                skip_class = False
            if skip_class is False:
                new_contents.append(line)
        self.client.file_write(nova_volume_file, "".join(new_contents))

        self._logger.info(
            '    Reverting patched file: {0}'.format(nova_driver_file))
        new_contents = []

        for line in self.client.file_read(nova_driver_file).splitlines():
            stripped_line = line.strip()
            if stripped_line.startswith(
                    "'file=nova.virt.libvirt.volume.LibvirtFileVolumeDriver'"):
                continue
            new_contents.append(line)
        self.client.file_write(nova_driver_file, "".join(new_contents))

        if os.path.exists(cinder_brick_initiator_file):
            self._logger.info('    Reverting patched file: {0}'.format(
                cinder_brick_initiator_file))
            self.client.run(
                """sed -i 's/elif protocol in ["LOCAL", "FILE"]:/elif protocol == "LOCAL":/g' {0}"""
                .format(cinder_brick_initiator_file))

        # 4. Unconfigure messaging driver
        self._logger.info('  Unconfiguring messaging driver')
        nova_messaging_driver = 'nova.openstack.common.notifier.rpc_notifier' if self._stack_version == 'juno' else 'messaging'
        cinder_messaging_driver = 'cinder.openstack.common.notifier.rpc_notifier' if self._stack_version == 'juno' else 'messaging'

        with remote(ip, [RawConfigParser, open], 'root') as rem:
            for config_file, driver in {
                    self._NOVA_CONF: nova_messaging_driver,
                    self._CINDER_CONF: cinder_messaging_driver
            }.iteritems():
                cfg = rem.RawConfigParser()
                cfg.read([config_file])
                if cfg.has_option("DEFAULT", "notification_driver"):
                    cfg.remove_option("DEFAULT", "notification_driver")
                if cfg.has_option("DEFAULT", "notification_topics"):
                    notification_topics = cfg.get(
                        "DEFAULT", "notification_topics").split(",")
                    if "notifications" in notification_topics:
                        notification_topics.remove("notifications")
                        cfg.set("DEFAULT", "notification_topics",
                                ",".join(notification_topics))

                if config_file == self._NOVA_CONF:
                    for param, value in {
                            'notify_on_any_change': 'True',
                            'notify_on_state_change': 'vm_and_task_state'
                    }.iteritems():
                        if cfg.has_option("DEFAULT", param):
                            cfg.remove_option("DEFAULT", param)

                with rem.open(config_file, "w") as fp:
                    cfg.write(fp)

        # 5. Disable events consumer
        self._logger.info('  Disabling events consumer')
        service_name = 'ovs-openstack-events-consumer'
        if ServiceManager.has_service(service_name, self.client):
            ServiceManager.stop_service(service_name, self.client)
            ServiceManager.disable_service(service_name, self.client)
            ServiceManager.remove_service(service_name, self.client)
Exemple #39
0
    def _deploy(config, filesystem, offline_nodes=None):
        """
        Deploys a complete cluster: Distributing the configuration files, creating directories and services
        """
        if os.environ.get('RUNNING_UNITTESTS') == 'True':
            if filesystem is True:
                raise NotImplementedError(
                    'At this moment, there is no support for unittesting filesystem backend Arakoon clusters'
                )

        ArakoonInstaller._logger.debug('Deploying cluster {0}'.format(
            config.cluster_id))
        if offline_nodes is None:
            offline_nodes = []
        for node in config.nodes:
            if node.ip in offline_nodes:
                continue
            ArakoonInstaller._logger.debug(
                '  Deploying cluster {0} on {1}'.format(
                    config.cluster_id, node.ip))
            root_client = SSHClient(node.ip, username='******')

            # Distributes a configuration file to all its nodes
            config.write_config(node.ip)

            # Create dirs as root because mountpoint /mnt/cache1 is typically owned by root
            abs_paths = {node.tlog_dir, node.home}  # That's a set
            if node.log_sinks.startswith('/'):
                abs_paths.add(os.path.dirname(os.path.abspath(node.log_sinks)))
            if node.crash_log_sinks.startswith('/'):
                abs_paths.add(
                    os.path.dirname(os.path.abspath(node.crash_log_sinks)))
            abs_paths = list(abs_paths)
            root_client.dir_create(abs_paths)
            root_client.dir_chmod(abs_paths, 0755, recursive=True)
            root_client.dir_chown(abs_paths, 'ovs', 'ovs', recursive=True)

            # Creates services for/on all nodes in the config
            if config.filesystem is True:
                config_path = config.config_path
            else:
                config_path = Configuration.get_configuration_path(
                    config.config_path)
            base_name = 'ovs-arakoon'
            target_name = 'ovs-arakoon-{0}'.format(config.cluster_id)
            ServiceManager.add_service(
                base_name,
                root_client,
                params={
                    'CLUSTER':
                    config.cluster_id,
                    'NODE_ID':
                    node.name,
                    'CONFIG_PATH':
                    config_path,
                    'STARTUP_DEPENDENCY':
                    'started ovs-watcher-config' if filesystem is False else
                    '(local-filesystems and started networking)'
                },
                target_name=target_name)
            ArakoonInstaller._logger.debug(
                '  Deploying cluster {0} on {1} completed'.format(
                    config.cluster_id, node.ip))
Exemple #40
0
    def is_host_configured(self, ip):
        if (
                self._is_devstack is False and self._is_openstack is False
        ) or self._cinder_installed is False or self._nova_installed is False:
            self._logger.warning(
                'Host configured: No OpenStack nor DevStack installation detected or Cinder and Nova plugins are not installed'
            )
            return False

        # 1. Check driver code
        if self._is_devstack is True:
            if not self.client.file_exists(filename=self._devstack_driver):
                self._logger.info('  File "{0}" does not exist'.format(
                    self._devstack_driver))
                return False
        else:
            if not self.client.file_exists(
                    filename='{0}/cinder/volume/drivers/openvstorage.py'.
                    format(self._driver_location)):
                self._logger.info(
                    '  File "{0}/cinder/volume/drivers/openvstorage.py" does not exist'
                    .format(self._driver_location))
                return False

        # 2. Check configured users
        ovs_id = self.client.run('id -u ovs')
        if not ovs_id:
            self._logger.info('Failed to determine the OVS user group ID')
            return False

        users = ['libvirt-qemu', 'stack'
                 ] if self._is_devstack is True else self._openstack_users
        for user in users:
            if '{0}(ovs)'.format(ovs_id) not in self.client.run(
                    'id -a {0}'.format(user)):
                self._logger.info(
                    'User "{0}" is not part of the OVS user group')
                return False

        # 3. Check patches
        nova_base_path = self._get_base_path('nova')
        cinder_base_path = self._get_base_path('cinder')
        if self._stack_version in ('liberty', 'mitaka', 'newton'):
            try:
                import os_brick
                cinder_brick_initiator_file = "{0}/initiator/connector.py".format(
                    os.path.dirname(os_brick.__file__))
            except ImportError:
                cinder_brick_initiator_file = ''
            if self._is_devstack is True:
                nova_volume_file = '{0}/virt/libvirt/volume/volume.py'.format(
                    nova_base_path)
            else:
                nova_volume_file = '{0}/nova/virt/libvirt/volume/volume.py'.format(
                    self._driver_location)
        else:
            if self._is_devstack is True:
                nova_volume_file = '{0}/virt/libvirt/volume.py'.format(
                    nova_base_path)
            else:
                nova_volume_file = '{0}/nova/virt/libvirt/volume.py'.format(
                    self._driver_location)
            cinder_brick_initiator_file = '{0}/brick/initiator/connector.py'.format(
                cinder_base_path)

        if self._is_devstack is True:
            nova_driver_file = '{0}/virt/libvirt/driver.py'.format(
                nova_base_path)
        else:
            nova_driver_file = '{0}/nova/virt/libvirt/driver.py'.format(
                self._driver_location)

        file_contents = self.client.file_read(nova_volume_file)
        if 'class LibvirtFileVolumeDriver(LibvirtBaseVolumeDriver):' not in file_contents:
            self._logger.info('File "{0}" is not configured properly'.format(
                nova_volume_file))
            return False

        if self._stack_version in ('liberty', 'mitaka'):
            check_line = 'file=nova.virt.libvirt.volume.volume.LibvirtFileVolumeDriver'
        else:
            check_line = 'file=nova.virt.libvirt.volume.LibvirtFileVolumeDriver'

        file_contents = self.client.file_read(nova_driver_file)
        if check_line not in file_contents:
            self._logger.info('File "{0}" is not configured properly'.format(
                nova_driver_file))
            return False

        if os.path.exists(cinder_brick_initiator_file):
            file_contents = self.client.file_read(cinder_brick_initiator_file)
            if self._stack_version in ('liberty', 'mitaka', 'newton'):
                if 'elif protocol in [LOCAL, "FILE"]:' not in file_contents:
                    self._logger.info(
                        'File "{0}" is not configured properly'.format(
                            cinder_brick_initiator_file))
                    return False
            else:
                if 'elif protocol in ["LOCAL", "FILE"]:' not in file_contents:
                    self._logger.info(
                        'File "{0}" is not configured properly'.format(
                            cinder_brick_initiator_file))
                    return False

        # 4. Check messaging driver configuration
        nova_messaging_driver = 'nova.openstack.common.notifier.rpc_notifier' if self._stack_version == 'juno' else 'messaging'
        cinder_messaging_driver = 'cinder.openstack.common.notifier.rpc_notifier' if self._stack_version == 'juno' else 'messaging'

        host_configured = True
        with remote(ip, [RawConfigParser], 'root') as rem:
            for config_file, driver in {
                    self._NOVA_CONF: nova_messaging_driver,
                    self._CINDER_CONF: cinder_messaging_driver
            }.iteritems():
                cfg = rem.RawConfigParser()
                cfg.read([config_file])
                host_configured &= cfg.get("DEFAULT",
                                           "notification_driver") == driver
                host_configured &= "notifications" in cfg.get(
                    "DEFAULT", "notification_topics")

                if config_file == self._NOVA_CONF:
                    host_configured &= cfg.get(
                        "DEFAULT", "notify_on_any_change") == "True"
                    host_configured &= cfg.get(
                        "DEFAULT",
                        "notify_on_state_change") == "vm_and_task_state"

        if host_configured is False:
            self._logger.info(
                'Nova and/or Cinder configuration files are not configured properly'
            )
            return host_configured

        # 5. Check events consumer service
        service_name = 'ovs-openstack-events-consumer'
        if not (ServiceManager.has_service(service_name, self.client)
                and ServiceManager.get_service_status(service_name,
                                                      self.client) is True):
            self._logger.info(
                'Service "{0}" is not configured properly'.format(
                    service_name))
            return False

        return True
Exemple #41
0
    def execute_scrub_work(queue, vpool, scrub_info, error_messages):
        """
        Executes scrub work for a given vDisk queue and vPool, based on scrub_info
        :param queue: a Queue with vDisk guids that need to be scrubbed (they should only be member of a single vPool)
        :type queue: Queue
        :param vpool: the vPool object of the vDisks
        :type vpool: VPool
        :param scrub_info: A dict containing scrub information: `scrub_path` with the path where to scrub and `storage_router` with the StorageRouter
                           that needs to do the work
        :type scrub_info: dict
        :param error_messages: A list of error messages to be filled
        :type error_messages: list
        :return: a list of error messages
        :rtype: list
        """
        def _verify_mds_config(current_vdisk):
            current_vdisk.invalidate_dynamics('info')
            vdisk_configs = current_vdisk.info['metadata_backend_config']
            if len(vdisk_configs) == 0:
                raise RuntimeError('Could not load MDS configuration')
            return vdisk_configs

        client = None
        lock_time = 5 * 60
        storagerouter = scrub_info['storage_router']
        scrub_directory = '{0}/scrub_work_{1}_{2}'.format(
            scrub_info['scrub_path'], vpool.name, storagerouter.name)
        scrub_config_key = 'ovs/vpools/{0}/proxies/scrub/scrub_config_{1}'.format(
            vpool.guid, storagerouter.guid)
        backend_config_key = 'ovs/vpools/{0}/proxies/scrub/backend_config_{1}'.format(
            vpool.guid, storagerouter.guid)
        alba_proxy_service = 'ovs-albaproxy_{0}_{1}_scrub'.format(
            vpool.name, storagerouter.name)

        # Deploy a proxy
        try:
            with file_mutex(name='ovs_albaproxy_scrub', wait=lock_time):
                ScheduledTaskController._logger.info(
                    'Scrubber - vPool {0} - StorageRouter {1} - Deploying ALBA proxy {2}'
                    .format(vpool.name, storagerouter.name,
                            alba_proxy_service))
                client = SSHClient(storagerouter, 'root')
                client.dir_create(scrub_directory)
                client.dir_chmod(
                    scrub_directory, 0777
                )  # Celery task executed by 'ovs' user and should be able to write in it
                if ServiceManager.has_service(
                        name=alba_proxy_service, client=client
                ) is True and ServiceManager.get_service_status(
                        name=alba_proxy_service, client=client) is True:
                    ScheduledTaskController._logger.info(
                        'Scrubber - vPool {0} - StorageRouter {1} - Re-using existing proxy service {2}'
                        .format(vpool.name, storagerouter.name,
                                alba_proxy_service))
                    scrub_config = Configuration.get(scrub_config_key)
                else:
                    machine_id = System.get_my_machine_id(client)
                    port_range = Configuration.get(
                        '/ovs/framework/hosts/{0}/ports|storagedriver'.format(
                            machine_id))
                    port = System.get_free_ports(selected_range=port_range,
                                                 nr=1,
                                                 client=client)[0]
                    # Scrub config
                    # {u'albamgr_cfg_url': u'arakoon://config/ovs/vpools/71e2f717-f270-4a41-bbb0-d4c8c084d43e/proxies/64759516-3471-4321-b912-fb424568fc5b/config/abm?ini=%2Fopt%2FOpenvStorage%2Fconfig%2Farakoon_cacc.ini',
                    #  u'fragment_cache': [u'none'],
                    #  u'ips': [u'127.0.0.1'],
                    #  u'log_level': u'info',
                    #  u'manifest_cache_size': 17179869184,
                    #  u'port': 0,
                    #  u'transport': u'tcp'}

                    # Backend config
                    # {u'alba_connection_host': u'10.100.193.155',
                    #  u'alba_connection_port': 26204,
                    #  u'alba_connection_preset': u'preset',
                    #  u'alba_connection_timeout': 15,
                    #  u'alba_connection_transport': u'TCP',
                    #  u'backend_interface_retries_on_error': 5,
                    #  u'backend_interface_retry_backoff_multiplier': 2.0,
                    #  u'backend_interface_retry_interval_secs': 1,
                    #  u'backend_type': u'ALBA'}
                    scrub_config = Configuration.get(
                        'ovs/vpools/{0}/proxies/scrub/generic_scrub'.format(
                            vpool.guid))
                    scrub_config['port'] = port
                    scrub_config['transport'] = 'tcp'
                    Configuration.set(scrub_config_key,
                                      json.dumps(scrub_config, indent=4),
                                      raw=True)

                    params = {
                        'VPOOL_NAME':
                        vpool.name,
                        'LOG_SINK':
                        LogHandler.get_sink_path('alba_proxy'),
                        'CONFIG_PATH':
                        Configuration.get_configuration_path(scrub_config_key)
                    }
                    ServiceManager.add_service(name='ovs-albaproxy',
                                               params=params,
                                               client=client,
                                               target_name=alba_proxy_service)
                    ServiceManager.start_service(name=alba_proxy_service,
                                                 client=client)
                    ScheduledTaskController._logger.info(
                        'Scrubber - vPool {0} - StorageRouter {1} - Deployed ALBA proxy {2}'
                        .format(vpool.name, storagerouter.name,
                                alba_proxy_service))

                backend_config = Configuration.get(
                    'ovs/vpools/{0}/hosts/{1}/config'.format(
                        vpool.guid, vpool.storagedrivers[0].storagedriver_id
                    ))['backend_connection_manager']
                backend_config['alba_connection_host'] = '127.0.0.1'
                backend_config['alba_connection_port'] = scrub_config['port']
                Configuration.set(
                    backend_config_key,
                    json.dumps({"backend_connection_manager": backend_config},
                               indent=4),
                    raw=True)
        except Exception:
            message = 'Scrubber - vPool {0} - StorageRouter {1} - An error occurred deploying ALBA proxy {2}'.format(
                vpool.name, storagerouter.name, alba_proxy_service)
            error_messages.append(message)
            ScheduledTaskController._logger.exception(message)
            if client is not None and ServiceManager.has_service(
                    name=alba_proxy_service, client=client) is True:
                if ServiceManager.get_service_status(name=alba_proxy_service,
                                                     client=client) is True:
                    ServiceManager.stop_service(name=alba_proxy_service,
                                                client=client)
                ServiceManager.remove_service(name=alba_proxy_service,
                                              client=client)
            if Configuration.exists(scrub_config_key):
                Configuration.delete(scrub_config_key)

        try:
            # Empty the queue with vDisks to scrub
            with remote(storagerouter.ip, [VDisk]) as rem:
                while True:
                    vdisk = None
                    vdisk_guid = queue.get(False)
                    try:
                        # Check MDS master is local. Trigger MDS handover if necessary
                        vdisk = rem.VDisk(vdisk_guid)
                        ScheduledTaskController._logger.info(
                            'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - Started scrubbing at location {3}'
                            .format(vpool.name, storagerouter.name, vdisk.name,
                                    scrub_directory))
                        configs = _verify_mds_config(current_vdisk=vdisk)
                        storagedriver = StorageDriverList.get_by_storagedriver_id(
                            vdisk.storagedriver_id)
                        if configs[0].get(
                                'ip') != storagedriver.storagerouter.ip:
                            ScheduledTaskController._logger.info(
                                'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - MDS master is not local, trigger handover'
                                .format(vpool.name, storagerouter.name,
                                        vdisk.name))
                            MDSServiceController.ensure_safety(
                                VDisk(vdisk_guid)
                            )  # Do not use a remote VDisk instance here
                            configs = _verify_mds_config(current_vdisk=vdisk)
                            if configs[0].get(
                                    'ip') != storagedriver.storagerouter.ip:
                                ScheduledTaskController._logger.warning(
                                    'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - Skipping because master MDS still not local'
                                    .format(vpool.name, storagerouter.name,
                                            vdisk.name))
                                continue

                        # Do the actual scrubbing
                        with vdisk.storagedriver_client.make_locked_client(
                                str(vdisk.volume_id)) as locked_client:
                            ScheduledTaskController._logger.info(
                                'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - Retrieve and apply scrub work'
                                .format(vpool.name, storagerouter.name,
                                        vdisk.name))
                            work_units = locked_client.get_scrubbing_workunits(
                            )
                            for work_unit in work_units:
                                res = locked_client.scrub(
                                    work_unit=work_unit,
                                    scratch_dir=scrub_directory,
                                    log_sinks=[
                                        LogHandler.get_sink_path(
                                            'scrubber', allow_override=True)
                                    ],
                                    backend_config=Configuration.
                                    get_configuration_path(backend_config_key))
                                locked_client.apply_scrubbing_result(
                                    scrubbing_work_result=res)
                            if work_units:
                                ScheduledTaskController._logger.info(
                                    'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - {3} work units successfully applied'
                                    .format(vpool.name, storagerouter.name,
                                            vdisk.name, len(work_units)))
                            else:
                                ScheduledTaskController._logger.info(
                                    'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - No scrubbing required'
                                    .format(vpool.name, storagerouter.name,
                                            vdisk.name))
                    except Exception:
                        if vdisk is None:
                            message = 'Scrubber - vPool {0} - StorageRouter {1} - vDisk with guid {2} could not be found'.format(
                                vpool.name, storagerouter.name, vdisk_guid)
                        else:
                            message = 'Scrubber - vPool {0} - StorageRouter {1} - vDisk {2} - Scrubbing failed'.format(
                                vpool.name, storagerouter.name, vdisk.name)
                        error_messages.append(message)
                        ScheduledTaskController._logger.exception(message)

        except Empty:  # Raised when all items have been fetched from the queue
            ScheduledTaskController._logger.info(
                'Scrubber - vPool {0} - StorageRouter {1} - Queue completely processed'
                .format(vpool.name, storagerouter.name))
        except Exception:
            message = 'Scrubber - vPool {0} - StorageRouter {1} - Scrubbing failed'.format(
                vpool.name, storagerouter.name)
            error_messages.append(message)
            ScheduledTaskController._logger.exception(message)

        # Delete the proxy again
        try:
            with file_mutex(name='ovs_albaproxy_scrub', wait=lock_time):
                ScheduledTaskController._logger.info(
                    'Scrubber - vPool {0} - StorageRouter {1} - Removing service {2}'
                    .format(vpool.name, storagerouter.name,
                            alba_proxy_service))
                client = SSHClient(storagerouter, 'root')
                client.dir_delete(scrub_directory)
                if ServiceManager.has_service(alba_proxy_service,
                                              client=client):
                    ServiceManager.stop_service(alba_proxy_service,
                                                client=client)
                    ServiceManager.remove_service(alba_proxy_service,
                                                  client=client)
                if Configuration.exists(scrub_config_key):
                    Configuration.delete(scrub_config_key)
                ScheduledTaskController._logger.info(
                    'Scrubber - vPool {0} - StorageRouter {1} - Removed service {2}'
                    .format(vpool.name, storagerouter.name,
                            alba_proxy_service))
        except Exception:
            message = 'Scrubber - vPool {0} - StorageRouter {1} - Removing service {2} failed'.format(
                vpool.name, storagerouter.name, alba_proxy_service)
            error_messages.append(message)
            ScheduledTaskController._logger.exception(message)
Exemple #42
0
    def extend_cluster(master_ip, new_ip, cluster_name, server_port=DEFAULT_SERVER_PORT, client_port=DEFAULT_CLIENT_PORT):
        """
        Extends a cluster to a given new node
        :param master_ip: IP of one of the already existing nodes
        :type master_ip: str

        :param new_ip: IP address of the node to be added
        :type new_ip: str

        :param cluster_name: Name of the cluster to be extended
        :type cluster_name: str

        :param server_port: Port to be used by server
        :type server_port: int

        :param client_port: Port to be used by client
        :type client_port: int
        """
        EtcdInstaller._logger.debug('Extending cluster "{0}" from {1} to {2}'.format(cluster_name, master_ip, new_ip))

        master_client = SSHClient(master_ip, username='******')
        if not EtcdInstaller._is_healty(cluster_name, master_client, client_port=client_port):
            raise RuntimeError('Cluster "{0}" unhealthy, aborting extend'.format(cluster_name))

        command = 'etcdctl member list'
        new_server_url = EtcdInstaller.SERVER_URL.format(new_ip, server_port)
        if client_port != EtcdInstaller.DEFAULT_CLIENT_PORT:
            command = 'etcdctl --peers={0}:{1} member list'.format(master_ip, client_port)
        cluster_members = master_client.run(command).splitlines()
        for cluster_member in cluster_members:
            if new_server_url in cluster_member:
                EtcdInstaller._logger.info('Node {0} already member of etcd cluster'.format(new_ip))
                return

        current_cluster = []
        for item in cluster_members:
            info = re.search(EtcdInstaller.MEMBER_REGEX, item).groupdict()
            current_cluster.append('{0}={1}'.format(info['name'], info['peer']))

        new_client = SSHClient(new_ip, username='******')
        node_name = System.get_my_machine_id(new_client)
        current_cluster.append('{0}={1}'.format(node_name, new_server_url))

        data_dir = EtcdInstaller.DATA_DIR.format(cluster_name)
        wal_dir = EtcdInstaller.WAL_DIR.format(cluster_name)
        abs_paths = [data_dir, wal_dir]
        new_client.dir_delete(abs_paths)
        new_client.dir_create(abs_paths)
        new_client.dir_chmod(abs_paths, 0755, recursive=True)
        new_client.dir_chown(abs_paths, 'ovs', 'ovs', recursive=True)

        base_name = 'ovs-etcd'
        target_name = 'ovs-etcd-{0}'.format(cluster_name)
        EtcdInstaller.stop(cluster_name, new_client)  # Stop a possible proxy service
        ServiceManager.add_service(base_name, new_client,
                                   params={'CLUSTER': cluster_name,
                                           'NODE_ID': node_name,
                                           'DATA_DIR': data_dir,
                                           'WAL_DIR': wal_dir,
                                           'SERVER_URL': new_server_url,
                                           'CLIENT_URL': EtcdInstaller.CLIENT_URL.format(new_ip, client_port),
                                           'LOCAL_CLIENT_URL': EtcdInstaller.CLIENT_URL.format('127.0.0.1', client_port),
                                           'INITIAL_CLUSTER': ','.join(current_cluster),
                                           'INITIAL_STATE': 'existing',
                                           'INITIAL_PEERS': ''},
                                   target_name=target_name)

        add_command = 'etcdctl member add {0} {1}'.format(node_name, new_server_url)
        if client_port != EtcdInstaller.DEFAULT_CLIENT_PORT:
            add_command = 'etcdctl --peers={0}:{1} member add {2} {3}'.format(master_ip, client_port, node_name, new_server_url)
        master_client.run(add_command)
        EtcdInstaller.start(cluster_name, new_client)
        EtcdInstaller.wait_for_cluster(cluster_name, new_client, client_port=client_port)

        EtcdInstaller._logger.debug('Extending cluster "{0}" from {1} to {2} completed'.format(cluster_name, master_ip, new_ip))