Beispiel #1
0
    def migrate(previous_version, master_ips=None, extra_ips=None):
        """
        Migrates from any version to any version, running all migrations required
        If previous_version is for example 0 and this script is at
        verison 3 it will execute two steps:
          - 1 > 2
          - 2 > 3
        :param previous_version: The previous version from which to start the migration.
        :param master_ips: IP addresses of the MASTER nodes
        :param extra_ips: IP addresses of the EXTRA nodes
        """

        logger = LogHandler.get('extensions', name='migration')
        working_version = previous_version

        # Version 1 introduced:
        # - Flexible SSD layout
        if working_version < 1:
            try:
                from ovs.extensions.generic.configuration import Configuration
                if Configuration.exists('ovs.arakoon'):
                    Configuration.delete('ovs.arakoon', remove_root=True)
                Configuration.set('ovs.core.ovsdb', '/opt/OpenvStorage/db')
            except:
                logger.exception('Error migrating to version 1')

            working_version = 1

        # Version 2 introduced:
        # - Registration
        if working_version < 2:
            try:
                import time
                from ovs.extensions.generic.configuration import Configuration
                if not Configuration.exists('ovs.core.registered'):
                    Configuration.set('ovs.core.registered', False)
                    Configuration.set('ovs.core.install_time', time.time())
            except:
                logger.exception('Error migrating to version 2')

        working_version = 2

        # Version 3 introduced:
        # - New arakoon clients
        if working_version < 3:
            try:
                from ovs.extensions.db.arakoon import ArakoonInstaller
                reload(ArakoonInstaller)
                from ovs.extensions.db.arakoon.ArakoonInstaller import ArakoonInstaller
                from ovs.extensions.generic.sshclient import SSHClient
                from ovs.extensions.generic.configuration import Configuration
                if master_ips is not None:
                    for ip in master_ips:
                        client = SSHClient(ip)
                        if client.dir_exists(
                                ArakoonInstaller.ARAKOON_CONFIG_DIR):
                            for cluster_name in client.dir_list(
                                    ArakoonInstaller.ARAKOON_CONFIG_DIR):
                                try:
                                    ArakoonInstaller.deploy_cluster(
                                        cluster_name, ip)
                                except:
                                    pass
                if Configuration.exists('ovs.core.storage.persistent'):
                    Configuration.set('ovs.core.storage.persistent',
                                      'pyrakoon')
            except:
                logger.exception('Error migrating to version 3')

            working_version = 3

        # Version 4 introduced:
        # - Etcd
        if working_version < 4:
            try:
                import os
                import json
                from ConfigParser import RawConfigParser
                from ovs.extensions.db.etcd import installer
                reload(installer)
                from ovs.extensions.db.etcd.installer import EtcdInstaller
                from ovs.extensions.db.etcd.configuration import EtcdConfiguration
                from ovs.extensions.generic.system import System
                host_id = System.get_my_machine_id()
                etcd_migrate = False
                if EtcdInstaller.has_cluster('127.0.0.1', 'config'):
                    etcd_migrate = True
                else:
                    if master_ips is not None and extra_ips is not None:
                        cluster_ip = None
                        for ip in master_ips + extra_ips:
                            if EtcdInstaller.has_cluster(ip, 'config'):
                                cluster_ip = ip
                                break
                        node_ip = None
                        path = '/opt/OpenvStorage/config/ovs.json'
                        if os.path.exists(path):
                            with open(path) as config_file:
                                config = json.load(config_file)
                                node_ip = config['grid']['ip']
                        if node_ip is not None:
                            if cluster_ip is None:
                                EtcdInstaller.create_cluster('config', node_ip)
                                EtcdConfiguration.initialize()
                                EtcdConfiguration.initialize_host(host_id)
                            else:
                                EtcdInstaller.extend_cluster(
                                    cluster_ip, node_ip, 'config')
                                EtcdConfiguration.initialize_host(host_id)
                            etcd_migrate = True
                if etcd_migrate is True:
                    # Migrating configuration files
                    path = '/opt/OpenvStorage/config/ovs.json'
                    if os.path.exists(path):
                        with open(path) as config_file:
                            config = json.load(config_file)
                            EtcdConfiguration.set('/ovs/framework/cluster_id',
                                                  config['support']['cid'])
                            if not EtcdConfiguration.exists(
                                    '/ovs/framework/install_time'):
                                EtcdConfiguration.set(
                                    '/ovs/framework/install_time',
                                    config['core']['install_time'])
                            else:
                                EtcdConfiguration.set(
                                    '/ovs/framework/install_time',
                                    min(
                                        EtcdConfiguration.get(
                                            '/ovs/framework/install_time'),
                                        config['core']['install_time']))
                            EtcdConfiguration.set('/ovs/framework/registered',
                                                  config['core']['registered'])
                            EtcdConfiguration.set(
                                '/ovs/framework/plugins/installed',
                                config['plugins'])
                            EtcdConfiguration.set('/ovs/framework/stores',
                                                  config['core']['storage'])
                            EtcdConfiguration.set(
                                '/ovs/framework/paths', {
                                    'cfgdir': config['core']['cfgdir'],
                                    'basedir': config['core']['basedir'],
                                    'ovsdb': config['core']['ovsdb']
                                })
                            EtcdConfiguration.set(
                                '/ovs/framework/support', {
                                    'enablesupport':
                                    config['support']['enablesupport'],
                                    'enabled':
                                    config['support']['enabled'],
                                    'interval':
                                    config['support']['interval']
                                })
                            EtcdConfiguration.set(
                                '/ovs/framework/storagedriver', {
                                    'mds_safety':
                                    config['storagedriver']['mds']['safety'],
                                    'mds_tlogs':
                                    config['storagedriver']['mds']['tlogs'],
                                    'mds_maxload':
                                    config['storagedriver']['mds']['maxload']
                                })
                            EtcdConfiguration.set(
                                '/ovs/framework/webapps', {
                                    'html_endpoint':
                                    config['webapps']['html_endpoint'],
                                    'oauth2':
                                    config['webapps']['oauth2']
                                })
                            EtcdConfiguration.set(
                                '/ovs/framework/messagequeue', {
                                    'endpoints': [],
                                    'protocol':
                                    config['core']['broker']['protocol'],
                                    'user':
                                    config['core']['broker']['login'],
                                    'port':
                                    config['core']['broker']['port'],
                                    'password':
                                    config['core']['broker']['password'],
                                    'queues':
                                    config['core']['broker']['queues']
                                })
                            host_key = '/ovs/framework/hosts/{0}{{0}}'.format(
                                host_id)
                            EtcdConfiguration.set(
                                host_key.format('/storagedriver'), {
                                    'rsp':
                                    config['storagedriver']['rsp'],
                                    'vmware_mode':
                                    config['storagedriver']['vmware_mode']
                                })
                            EtcdConfiguration.set(host_key.format('/ports'),
                                                  config['ports'])
                            EtcdConfiguration.set(
                                host_key.format('/setupcompleted'),
                                config['core']['setupcompleted'])
                            EtcdConfiguration.set(
                                host_key.format('/versions'),
                                config['core'].get('versions', {}))
                            EtcdConfiguration.set(host_key.format('/type'),
                                                  config['core']['nodetype'])
                            EtcdConfiguration.set(host_key.format('/ip'),
                                                  config['grid']['ip'])
                    path = '{0}/memcacheclient.cfg'.format(
                        EtcdConfiguration.get('/ovs/framework/paths|cfgdir'))
                    if os.path.exists(path):
                        config = RawConfigParser()
                        config.read(path)
                        nodes = [
                            config.get(node.strip(), 'location').strip()
                            for node in config.get('main', 'nodes').split(',')
                        ]
                        EtcdConfiguration.set(
                            '/ovs/framework/memcache|endpoints', nodes)
                        os.remove(path)
                    path = '{0}/rabbitmqclient.cfg'.format(
                        EtcdConfiguration.get('/ovs/framework/paths|cfgdir'))
                    if os.path.exists(path):
                        config = RawConfigParser()
                        config.read(path)
                        nodes = [
                            config.get(node.strip(), 'location').strip()
                            for node in config.get('main', 'nodes').split(',')
                        ]
                        EtcdConfiguration.set(
                            '/ovs/framework/messagequeue|endpoints', nodes)
                        os.remove(path)
                    # Migrate arakoon configuration files
                    from ovs.extensions.db.arakoon import ArakoonInstaller
                    reload(ArakoonInstaller)
                    from ovs.extensions.db.arakoon.ArakoonInstaller import ArakoonInstaller, ArakoonClusterConfig
                    from ovs.extensions.generic.sshclient import SSHClient
                    if master_ips is not None:
                        config_dir = '/opt/OpenvStorage/config/arakoon/'
                        for ip in master_ips:
                            client = SSHClient(ip)
                            if client.dir_exists(config_dir):
                                for cluster_name in client.dir_list(
                                        config_dir):
                                    try:
                                        with open('{0}/{1}/{1}.cfg'.format(
                                                config_dir,
                                                cluster_name)) as config_file:
                                            EtcdConfiguration.set(
                                                ArakoonClusterConfig.
                                                ETCD_CONFIG_KEY.format(
                                                    cluster_name),
                                                config_file.read(),
                                                raw=True)
                                            ArakoonInstaller.deploy_cluster(
                                                cluster_name, ip)
                                    except:
                                        logger.exception(
                                            'Error migrating {0} on {1}'.
                                            format(cluster_name, ip))
                                client.dir_delete(config_dir)
            except:
                logger.exception('Error migrating to version 4')

            working_version = 4

        return working_version
    def migrate(previous_version, master_ips=None, extra_ips=None):
        """
        Migrates from any version to any version, running all migrations required
        If previous_version is for example 0 and this script is at
        verison 3 it will execute two steps:
          - 1 > 2
          - 2 > 3
        :param previous_version: The previous version from which to start the migration.
        :param master_ips: IP addresses of the MASTER nodes
        :param extra_ips: IP addresses of the EXTRA nodes
        """
        logger = LogHandler.get('extensions', name='albamigration')
        working_version = previous_version

        # Version 1 introduced:
        # - Etcd
        if working_version < 1:
            try:
                import os
                import json
                from ovs.extensions.db.etcd import installer
                reload(installer)
                from ovs.extensions.db.etcd.installer import EtcdInstaller
                from ovs.extensions.db.etcd.configuration import EtcdConfiguration
                from ovs.extensions.generic.system import System
                host_id = System.get_my_machine_id()
                etcd_migrate = False
                if EtcdInstaller.has_cluster('127.0.0.1', 'config'):
                    etcd_migrate = True
                else:
                    if master_ips is not None and extra_ips is not None:
                        cluster_ip = None
                        for ip in master_ips + extra_ips:
                            if EtcdInstaller.has_cluster(ip, 'config'):
                                cluster_ip = ip
                                break
                        node_ip = None
                        path = '/opt/OpenvStorage/config/ovs.json'
                        if os.path.exists(path):
                            with open(path) as config_file:
                                config = json.load(config_file)
                                node_ip = config['grid']['ip']
                        if node_ip is not None:
                            if cluster_ip is None:
                                EtcdInstaller.create_cluster('config', node_ip)
                                EtcdConfiguration.initialize()
                                EtcdConfiguration.initialize_host(host_id)
                            else:
                                EtcdInstaller.extend_cluster(cluster_ip, node_ip, 'config')
                                EtcdConfiguration.initialize_host(host_id)
                            etcd_migrate = True
                if etcd_migrate is True:
                    # At this point, there is an etcd cluster. Migrating alba.json
                    path = '/opt/OpenvStorage/config/alba.json'
                    if os.path.exists(path):
                        with open(path) as config_file:
                            config = json.load(config_file)
                            EtcdConfiguration.set('/ovs/framework/plugins/alba/config', config)
                        os.remove(path)
                        EtcdConfiguration.set('/ovs/alba/backends/global_gui_error_interval', 300)
            except:
                logger.exception('Error migrating to version 1')

            working_version = 1

        return working_version
Beispiel #3
0
    def remove_node(node_ip, silent=None):
        """
        Remove the node with specified IP from the cluster
        :param node_ip: IP of the node to remove
        :type node_ip: str
        :param silent: If silent == '--force-yes' no question will be asked to confirm the removal
        :type silent: str
        :return: None
        """
        from ovs.lib.storagedriver import StorageDriverController
        from ovs.lib.storagerouter import StorageRouterController
        from ovs.dal.lists.storagerouterlist import StorageRouterList

        Toolbox.log(logger=NodeRemovalController._logger, messages="Remove node", boxed=True)
        Toolbox.log(
            logger=NodeRemovalController._logger,
            messages="WARNING: Some of these steps may take a very long time, please check the logs for more information\n\n",
        )

        ###############
        # VALIDATIONS #
        ###############
        try:
            node_ip = node_ip.strip()
            if not isinstance(node_ip, str):
                raise ValueError("Node IP must be a string")
            if not re.match(SSHClient.IP_REGEX, node_ip):
                raise ValueError("Invalid IP {0} specified".format(node_ip))

            storage_router_all = StorageRouterList.get_storagerouters()
            storage_router_masters = StorageRouterList.get_masters()
            storage_router_all_ips = set([storage_router.ip for storage_router in storage_router_all])
            storage_router_master_ips = set([storage_router.ip for storage_router in storage_router_masters])
            storage_router_to_remove = StorageRouterList.get_by_ip(node_ip)

            if node_ip not in storage_router_all_ips:
                raise ValueError(
                    "Unknown IP specified\nKnown in model:\n - {0}\nSpecified for removal:\n - {1}".format(
                        "\n - ".join(storage_router_all_ips), node_ip
                    )
                )

            if len(storage_router_all_ips) == 1:
                raise RuntimeError("Removing the only node is not possible")

            if node_ip in storage_router_master_ips and len(storage_router_master_ips) == 1:
                raise RuntimeError("Removing the only master node is not possible")

            if System.get_my_storagerouter() == storage_router_to_remove:
                raise RuntimeError(
                    "The node to be removed cannot be identical to the node on which the removal is initiated"
                )

            Toolbox.log(
                logger=NodeRemovalController._logger, messages="Creating SSH connections to remaining master nodes"
            )
            master_ip = None
            ip_client_map = {}
            storage_routers_offline = []
            storage_router_to_remove_online = True
            for storage_router in storage_router_all:
                try:
                    client = SSHClient(storage_router, username="******")
                    if client.run(["pwd"]):
                        Toolbox.log(
                            logger=NodeRemovalController._logger,
                            messages="  Node with IP {0:<15} successfully connected to".format(storage_router.ip),
                        )
                        ip_client_map[storage_router.ip] = client
                        if storage_router != storage_router_to_remove and storage_router.node_type == "MASTER":
                            master_ip = storage_router.ip
                except UnableToConnectException:
                    Toolbox.log(
                        logger=NodeRemovalController._logger,
                        messages="  Node with IP {0:<15} is unreachable".format(storage_router.ip),
                    )
                    storage_routers_offline.append(storage_router)
                    if storage_router == storage_router_to_remove:
                        storage_router_to_remove_online = False

            if len(ip_client_map) == 0 or master_ip is None:
                raise RuntimeError("Could not connect to any master node in the cluster")

            storage_router_to_remove.invalidate_dynamics("vdisks_guids")
            if (
                len(storage_router_to_remove.vdisks_guids) > 0
            ):  # vDisks are supposed to be moved away manually before removing a node
                raise RuntimeError("Still vDisks attached to Storage Router {0}".format(storage_router_to_remove.name))

            internal_memcached = Toolbox.is_service_internally_managed(service="memcached")
            internal_rabbit_mq = Toolbox.is_service_internally_managed(service="rabbitmq")
            memcached_endpoints = Configuration.get(key="/ovs/framework/memcache|endpoints")
            rabbit_mq_endpoints = Configuration.get(key="/ovs/framework/messagequeue|endpoints")
            copy_memcached_endpoints = list(memcached_endpoints)
            copy_rabbit_mq_endpoints = list(rabbit_mq_endpoints)
            for endpoint in memcached_endpoints:
                if endpoint.startswith(storage_router_to_remove.ip):
                    copy_memcached_endpoints.remove(endpoint)
            for endpoint in rabbit_mq_endpoints:
                if endpoint.startswith(storage_router_to_remove.ip):
                    copy_rabbit_mq_endpoints.remove(endpoint)
            if len(copy_memcached_endpoints) == 0 and internal_memcached is True:
                raise RuntimeError(
                    "Removal of provided nodes will result in a complete removal of the memcached service"
                )
            if len(copy_rabbit_mq_endpoints) == 0 and internal_rabbit_mq is True:
                raise RuntimeError(
                    "Removal of provided nodes will result in a complete removal of the messagequeue service"
                )
        except Exception as exception:
            Toolbox.log(
                logger=NodeRemovalController._logger, messages=[str(exception)], boxed=True, loglevel="exception"
            )
            sys.exit(1)

        #################
        # CONFIRMATIONS #
        #################
        interactive = silent != "--force-yes"
        remove_asd_manager = not interactive  # Remove ASD manager if non-interactive else ask
        if interactive is True:
            proceed = Interactive.ask_yesno(
                message="Are you sure you want to remove node {0}?".format(storage_router_to_remove.name),
                default_value=False,
            )
            if proceed is False:
                Toolbox.log(logger=NodeRemovalController._logger, messages="Abort removal", title=True)
                sys.exit(1)

            if storage_router_to_remove_online is True:
                client = SSHClient(endpoint=storage_router_to_remove, username="******")
                if ServiceManager.has_service(name="asd-manager", client=client):
                    remove_asd_manager = Interactive.ask_yesno(
                        message="Do you also want to remove the ASD manager and related ASDs?", default_value=False
                    )

            if remove_asd_manager is True or storage_router_to_remove_online is False:
                for function in Toolbox.fetch_hooks("setup", "validate_asd_removal"):
                    validation_output = function(storage_router_to_remove.ip)
                    if validation_output["confirm"] is True:
                        if Interactive.ask_yesno(message=validation_output["question"], default_value=False) is False:
                            remove_asd_manager = False
                            break

        ###########
        # REMOVAL #
        ###########
        try:
            Toolbox.log(
                logger=NodeRemovalController._logger,
                messages="Starting removal of node {0} - {1}".format(
                    storage_router_to_remove.name, storage_router_to_remove.ip
                ),
            )
            if storage_router_to_remove_online is False:
                Toolbox.log(
                    logger=NodeRemovalController._logger,
                    messages="  Marking all Storage Drivers served by Storage Router {0} as offline".format(
                        storage_router_to_remove.ip
                    ),
                )
                StorageDriverController.mark_offline(storagerouter_guid=storage_router_to_remove.guid)

            # Remove vPools
            Toolbox.log(
                logger=NodeRemovalController._logger,
                messages="  Removing vPools from node".format(storage_router_to_remove.ip),
            )
            storage_routers_offline_guids = [
                sr.guid for sr in storage_routers_offline if sr.guid != storage_router_to_remove.guid
            ]
            for storage_driver in storage_router_to_remove.storagedrivers:
                Toolbox.log(
                    logger=NodeRemovalController._logger,
                    messages="    Removing vPool {0} from node".format(storage_driver.vpool.name),
                )
                StorageRouterController.remove_storagedriver(
                    storagedriver_guid=storage_driver.guid, offline_storage_router_guids=storage_routers_offline_guids
                )

            # Demote if MASTER
            if storage_router_to_remove.node_type == "MASTER":
                NodeTypeController.demote_node(
                    cluster_ip=storage_router_to_remove.ip,
                    master_ip=master_ip,
                    ip_client_map=ip_client_map,
                    unique_id=storage_router_to_remove.machine_id,
                    unconfigure_memcached=internal_memcached,
                    unconfigure_rabbitmq=internal_rabbit_mq,
                    offline_nodes=storage_routers_offline,
                )

            # Stop / remove services
            Toolbox.log(logger=NodeRemovalController._logger, messages="Stopping and removing services")
            config_store = Configuration.get_store()
            if storage_router_to_remove_online is True:
                client = SSHClient(endpoint=storage_router_to_remove, username="******")
                NodeRemovalController.remove_services(
                    client=client,
                    node_type=storage_router_to_remove.node_type.lower(),
                    logger=NodeRemovalController._logger,
                )
                service = "watcher-config"
                if ServiceManager.has_service(service, client=client):
                    Toolbox.log(logger=NodeRemovalController._logger, messages="Removing service {0}".format(service))
                    ServiceManager.stop_service(service, client=client)
                    ServiceManager.remove_service(service, client=client)

                if config_store == "etcd":
                    from ovs.extensions.db.etcd.installer import EtcdInstaller

                    if Configuration.get(key="/ovs/framework/external_config") is None:
                        Toolbox.log(logger=NodeRemovalController._logger, messages="      Removing Etcd cluster")
                        try:
                            EtcdInstaller.stop("config", client)
                            EtcdInstaller.remove("config", client)
                        except Exception as ex:
                            Toolbox.log(
                                logger=NodeRemovalController._logger,
                                messages=["\nFailed to unconfigure Etcd", ex],
                                loglevel="exception",
                            )

                    Toolbox.log(logger=NodeRemovalController._logger, messages="Removing Etcd proxy")
                    EtcdInstaller.remove_proxy("config", client.ip)

            Toolbox.run_hooks(
                component="noderemoval",
                sub_component="remove",
                logger=NodeRemovalController._logger,
                cluster_ip=storage_router_to_remove.ip,
                complete_removal=remove_asd_manager,
            )

            # Clean up model
            Toolbox.log(logger=NodeRemovalController._logger, messages="Removing node from model")
            for service in storage_router_to_remove.services:
                service.delete()
            for disk in storage_router_to_remove.disks:
                for partition in disk.partitions:
                    partition.delete()
                disk.delete()
            for j_domain in storage_router_to_remove.domains:
                j_domain.delete()
            Configuration.delete("/ovs/framework/hosts/{0}".format(storage_router_to_remove.machine_id))

            NodeTypeController.restart_framework_and_memcache_services(
                clients=ip_client_map,
                offline_node_ips=[node.ip for node in storage_routers_offline],
                logger=NodeRemovalController._logger,
            )

            if storage_router_to_remove_online is True:
                client = SSHClient(endpoint=storage_router_to_remove, username="******")
                if config_store == "arakoon":
                    client.file_delete(filenames=[ArakoonConfiguration.CACC_LOCATION])
                client.file_delete(filenames=[Configuration.BOOTSTRAP_CONFIG_LOCATION])
            storage_router_to_remove.delete()
            Toolbox.log(logger=NodeRemovalController._logger, messages="Successfully removed node\n")
        except Exception as exception:
            Toolbox.log(logger=NodeRemovalController._logger, messages="\n")
            Toolbox.log(
                logger=NodeRemovalController._logger,
                messages=["An unexpected error occurred:", str(exception)],
                boxed=True,
                loglevel="exception",
            )
            sys.exit(1)
        except KeyboardInterrupt:
            Toolbox.log(logger=NodeRemovalController._logger, messages="\n")
            Toolbox.log(
                logger=NodeRemovalController._logger,
                messages="This setup was aborted. Open vStorage may be in an inconsistent state, make sure to validate the installation.",
                boxed=True,
                loglevel="error",
            )
            sys.exit(1)

        if remove_asd_manager is True:
            Toolbox.log(logger=NodeRemovalController._logger, messages="\nRemoving ASD Manager")
            with remote(storage_router_to_remove.ip, [os]) as rem:
                rem.os.system("asd-manager remove --force-yes")
        Toolbox.log(logger=NodeRemovalController._logger, messages="Remove nodes finished", title=True)
Beispiel #4
0
    def migrate(previous_version, master_ips=None, extra_ips=None):
        """
        Migrates from any version to any version, running all migrations required
        If previous_version is for example 0 and this script is at
        verison 3 it will execute two steps:
          - 1 > 2
          - 2 > 3
        :param previous_version: The previous version from which to start the migration.
        :param master_ips: IP addresses of the MASTER nodes
        :param extra_ips: IP addresses of the EXTRA nodes
        """

        working_version = previous_version

        # Version 1 introduced:
        # - Flexible SSD layout
        if working_version < 1:
            try:
                from ovs.extensions.generic.configuration import Configuration
                if Configuration.exists('ovs.arakoon'):
                    Configuration.delete('ovs.arakoon', remove_root=True)
                Configuration.set('ovs.core.ovsdb', '/opt/OpenvStorage/db')
            except:
                logger.exception('Error migrating to version 1')

            working_version = 1

        # Version 2 introduced:
        # - Registration
        if working_version < 2:
            try:
                import time
                from ovs.extensions.generic.configuration import Configuration
                if not Configuration.exists('ovs.core.registered'):
                    Configuration.set('ovs.core.registered', False)
                    Configuration.set('ovs.core.install_time', time.time())
            except:
                logger.exception('Error migrating to version 2')

        working_version = 2

        # Version 3 introduced:
        # - New arakoon clients
        if working_version < 3:
            try:
                from ovs.extensions.db.arakoon import ArakoonInstaller
                reload(ArakoonInstaller)
                from ovs.extensions.db.arakoon.ArakoonInstaller import ArakoonInstaller
                from ovs.extensions.generic.sshclient import SSHClient
                from ovs.extensions.generic.configuration import Configuration
                if master_ips is not None:
                    for ip in master_ips:
                        client = SSHClient(ip)
                        if client.dir_exists(ArakoonInstaller.ARAKOON_CONFIG_DIR):
                            for cluster_name in client.dir_list(ArakoonInstaller.ARAKOON_CONFIG_DIR):
                                try:
                                    ArakoonInstaller.deploy_cluster(cluster_name, ip)
                                except:
                                    pass
                if Configuration.exists('ovs.core.storage.persistent'):
                    Configuration.set('ovs.core.storage.persistent', 'pyrakoon')
            except:
                logger.exception('Error migrating to version 3')

            working_version = 3

        # Version 4 introduced:
        # - Etcd
        if working_version < 4:
            try:
                import os
                import json
                from ConfigParser import RawConfigParser
                from ovs.extensions.db.etcd import installer
                reload(installer)
                from ovs.extensions.db.etcd.installer import EtcdInstaller
                from ovs.extensions.db.etcd.configuration import EtcdConfiguration
                from ovs.extensions.generic.system import System
                host_id = System.get_my_machine_id()
                etcd_migrate = False
                if EtcdInstaller.has_cluster('127.0.0.1', 'config'):
                    etcd_migrate = True
                else:
                    if master_ips is not None and extra_ips is not None:
                        cluster_ip = None
                        for ip in master_ips + extra_ips:
                            if EtcdInstaller.has_cluster(ip, 'config'):
                                cluster_ip = ip
                                break
                        node_ip = None
                        path = '/opt/OpenvStorage/config/ovs.json'
                        if os.path.exists(path):
                            with open(path) as config_file:
                                config = json.load(config_file)
                                node_ip = config['grid']['ip']
                        if node_ip is not None:
                            if cluster_ip is None:
                                EtcdInstaller.create_cluster('config', node_ip)
                                EtcdConfiguration.initialize()
                                EtcdConfiguration.initialize_host(host_id)
                            else:
                                EtcdInstaller.extend_cluster(cluster_ip, node_ip, 'config')
                                EtcdConfiguration.initialize_host(host_id)
                            etcd_migrate = True
                if etcd_migrate is True:
                    # Migrating configuration files
                    path = '/opt/OpenvStorage/config/ovs.json'
                    if os.path.exists(path):
                        with open(path) as config_file:
                            config = json.load(config_file)
                            EtcdConfiguration.set('/ovs/framework/cluster_id', config['support']['cid'])
                            if not EtcdConfiguration.exists('/ovs/framework/install_time'):
                                EtcdConfiguration.set('/ovs/framework/install_time', config['core']['install_time'])
                            else:
                                EtcdConfiguration.set('/ovs/framework/install_time', min(EtcdConfiguration.get('/ovs/framework/install_time'), config['core']['install_time']))
                            EtcdConfiguration.set('/ovs/framework/registered', config['core']['registered'])
                            EtcdConfiguration.set('/ovs/framework/plugins/installed', config['plugins'])
                            EtcdConfiguration.set('/ovs/framework/stores', config['core']['storage'])
                            EtcdConfiguration.set('/ovs/framework/paths', {'cfgdir': config['core']['cfgdir'],
                                                                           'basedir': config['core']['basedir'],
                                                                           'ovsdb': config['core']['ovsdb']})
                            EtcdConfiguration.set('/ovs/framework/support', {'enablesupport': config['support']['enablesupport'],
                                                                             'enabled': config['support']['enabled'],
                                                                             'interval': config['support']['interval']})
                            EtcdConfiguration.set('/ovs/framework/storagedriver', {'mds_safety': config['storagedriver']['mds']['safety'],
                                                                                   'mds_tlogs': config['storagedriver']['mds']['tlogs'],
                                                                                   'mds_maxload': config['storagedriver']['mds']['maxload']})
                            EtcdConfiguration.set('/ovs/framework/webapps', {'html_endpoint': config['webapps']['html_endpoint'],
                                                                             'oauth2': config['webapps']['oauth2']})
                            EtcdConfiguration.set('/ovs/framework/messagequeue', {'endpoints': [],
                                                                                  'protocol': config['core']['broker']['protocol'],
                                                                                  'user': config['core']['broker']['login'],
                                                                                  'port': config['core']['broker']['port'],
                                                                                  'password': config['core']['broker']['password'],
                                                                                  'queues': config['core']['broker']['queues']})
                            host_key = '/ovs/framework/hosts/{0}{{0}}'.format(host_id)
                            EtcdConfiguration.set(host_key.format('/storagedriver'), {'rsp': config['storagedriver']['rsp'],
                                                                                      'vmware_mode': config['storagedriver']['vmware_mode']})
                            EtcdConfiguration.set(host_key.format('/ports'), config['ports'])
                            EtcdConfiguration.set(host_key.format('/setupcompleted'), config['core']['setupcompleted'])
                            EtcdConfiguration.set(host_key.format('/versions'), config['core'].get('versions', {}))
                            EtcdConfiguration.set(host_key.format('/type'), config['core']['nodetype'])
                            EtcdConfiguration.set(host_key.format('/ip'), config['grid']['ip'])
                    path = '{0}/memcacheclient.cfg'.format(EtcdConfiguration.get('/ovs/framework/paths|cfgdir'))
                    if os.path.exists(path):
                        config = RawConfigParser()
                        config.read(path)
                        nodes = [config.get(node.strip(), 'location').strip()
                                 for node in config.get('main', 'nodes').split(',')]
                        EtcdConfiguration.set('/ovs/framework/memcache|endpoints', nodes)
                        os.remove(path)
                    path = '{0}/rabbitmqclient.cfg'.format(EtcdConfiguration.get('/ovs/framework/paths|cfgdir'))
                    if os.path.exists(path):
                        config = RawConfigParser()
                        config.read(path)
                        nodes = [config.get(node.strip(), 'location').strip()
                                 for node in config.get('main', 'nodes').split(',')]
                        EtcdConfiguration.set('/ovs/framework/messagequeue|endpoints', nodes)
                        os.remove(path)
                    # Migrate arakoon configuration files
                    from ovs.extensions.db.arakoon import ArakoonInstaller
                    reload(ArakoonInstaller)
                    from ovs.extensions.db.arakoon.ArakoonInstaller import ArakoonInstaller, ArakoonClusterConfig
                    from ovs.extensions.generic.sshclient import SSHClient
                    if master_ips is not None:
                        config_dir = '/opt/OpenvStorage/config/arakoon/'
                        for ip in master_ips:
                            client = SSHClient(ip)
                            if client.dir_exists(config_dir):
                                for cluster_name in client.dir_list(config_dir):
                                    try:
                                        with open('{0}/{1}/{1}.cfg'.format(config_dir, cluster_name)) as config_file:
                                            EtcdConfiguration.set(ArakoonClusterConfig.ETCD_CONFIG_KEY.format(cluster_name),
                                                                  config_file.read(),
                                                                  raw=True)
                                            ArakoonInstaller.deploy_cluster(cluster_name, ip)
                                    except:
                                        logger.exception('Error migrating {0} on {1}'.format(cluster_name, ip))
                                client.dir_delete(config_dir)
            except:
                logger.exception('Error migrating to version 4')

            working_version = 4

        return working_version
Beispiel #5
0
    def demote_node(cluster_ip, master_ip, ip_client_map, unique_id, unconfigure_memcached, unconfigure_rabbitmq, offline_nodes=None):
        """
        Demotes a given node
        """
        from ovs.dal.lists.storagerouterlist import StorageRouterList

        Toolbox.log(logger=NodeTypeController._logger, messages='Demoting node', title=True)
        if offline_nodes is None:
            offline_nodes = []

        if unconfigure_memcached is True and len(offline_nodes) == 0:
            if NodeTypeController._validate_local_memcache_servers(ip_client_map) is False:
                raise RuntimeError('Not all memcache nodes can be reached which is required for demoting a node.')

        # Find other (arakoon) master nodes
        arakoon_cluster_name = str(Configuration.get('/ovs/framework/arakoon_clusters|ovsdb'))
        arakoon_metadata = ArakoonInstaller.get_arakoon_metadata_by_cluster_name(cluster_name=arakoon_cluster_name)
        config = ArakoonClusterConfig(cluster_id=arakoon_cluster_name, filesystem=False)
        config.load_config()
        master_node_ips = [node.ip for node in config.nodes]
        if cluster_ip in master_node_ips:
            master_node_ips.remove(cluster_ip)
        if len(master_node_ips) == 0:
            raise RuntimeError('There should be at least one other master node')

        storagerouter = StorageRouterList.get_by_machine_id(unique_id)
        storagerouter.node_type = 'EXTRA'
        storagerouter.save()

        offline_node_ips = [node.ip for node in offline_nodes]
        if arakoon_metadata['internal'] is True:
            Toolbox.log(logger=NodeTypeController._logger, messages='Leaving Arakoon {0} cluster'.format(arakoon_cluster_name))
            ArakoonInstaller.shrink_cluster(deleted_node_ip=cluster_ip,
                                            remaining_node_ips=master_node_ips,
                                            cluster_name=arakoon_cluster_name,
                                            offline_nodes=offline_node_ips)

        try:
            external_config = Configuration.get('/ovs/framework/external_config')
            if external_config is None:
                config_store = Configuration.get_store()
                if config_store == 'arakoon':
                    Toolbox.log(logger=NodeTypeController._logger, messages='Leaving Arakoon config cluster')
                    ArakoonInstaller.shrink_cluster(deleted_node_ip=cluster_ip,
                                                    remaining_node_ips=master_node_ips,
                                                    cluster_name='config',
                                                    offline_nodes=offline_node_ips,
                                                    filesystem=True)
                else:
                    from ovs.extensions.db.etcd.installer import EtcdInstaller
                    Toolbox.log(logger=NodeTypeController._logger, messages='Leaving Etcd cluster')
                    EtcdInstaller.shrink_cluster(master_ip, cluster_ip, 'config', offline_node_ips)
        except Exception as ex:
            Toolbox.log(logger=NodeTypeController._logger, messages=['\nFailed to leave configuration cluster', ex], loglevel='exception')

        Toolbox.log(logger=NodeTypeController._logger, messages='Update configurations')
        try:
            if unconfigure_memcached is True:
                endpoints = Configuration.get('/ovs/framework/memcache|endpoints')
                endpoint = '{0}:{1}'.format(cluster_ip, 11211)
                if endpoint in endpoints:
                    endpoints.remove(endpoint)
                Configuration.set('/ovs/framework/memcache|endpoints', endpoints)
            if unconfigure_rabbitmq is True:
                endpoints = Configuration.get('/ovs/framework/messagequeue|endpoints')
                endpoint = '{0}:{1}'.format(cluster_ip, 5672)
                if endpoint in endpoints:
                    endpoints.remove(endpoint)
                Configuration.set('/ovs/framework/messagequeue|endpoints', endpoints)
        except Exception as ex:
            Toolbox.log(logger=NodeTypeController._logger, messages=['\nFailed to update configurations', ex], loglevel='exception')

        if arakoon_metadata['internal'] is True:
            Toolbox.log(logger=NodeTypeController._logger, messages='Restarting master node services')
            remaining_nodes = ip_client_map.keys()[:]
            if cluster_ip in remaining_nodes:
                remaining_nodes.remove(cluster_ip)

            PersistentFactory.store = None
            VolatileFactory.store = None

            for service in storagerouter.services:
                if service.name == 'arakoon-ovsdb':
                    service.delete()

        target_client = None
        if storagerouter in offline_nodes:
            if unconfigure_rabbitmq is True:
                Toolbox.log(logger=NodeTypeController._logger, messages='Removing/unconfiguring offline RabbitMQ node')
                client = ip_client_map[master_ip]
                try:
                    client.run(['rabbitmqctl', 'forget_cluster_node', 'rabbit@{0}'.format(storagerouter.name)])
                except Exception as ex:
                    Toolbox.log(logger=NodeTypeController._logger, messages=['\nFailed to forget RabbitMQ cluster node', ex], loglevel='exception')
        else:
            target_client = ip_client_map[cluster_ip]
            if unconfigure_rabbitmq is True:
                Toolbox.log(logger=NodeTypeController._logger, messages='Removing/unconfiguring RabbitMQ')
                try:
                    if ServiceManager.has_service('rabbitmq-server', client=target_client):
                        Toolbox.change_service_state(target_client, 'rabbitmq-server', 'stop', NodeTypeController._logger)
                        target_client.run(['rabbitmq-server', '-detached'])
                        time.sleep(5)
                        target_client.run(['rabbitmqctl', 'stop_app'])
                        time.sleep(5)
                        target_client.run(['rabbitmqctl', 'reset'])
                        time.sleep(5)
                        target_client.run(['rabbitmqctl', 'stop'])
                        time.sleep(5)
                        target_client.file_unlink("/var/lib/rabbitmq/.erlang.cookie")
                        Toolbox.change_service_state(target_client, 'rabbitmq-server', 'stop', NodeTypeController._logger)  # To be sure
                except Exception as ex:
                    Toolbox.log(logger=NodeTypeController._logger, messages=['\nFailed to remove/unconfigure RabbitMQ', ex], loglevel='exception')

            Toolbox.log(logger=NodeTypeController._logger, messages='Stopping services')
            services = ['memcached', 'rabbitmq-server']
            if unconfigure_rabbitmq is False:
                services.remove('rabbitmq-server')
            if unconfigure_memcached is False:
                services.remove('memcached')
            for service in services:
                if ServiceManager.has_service(service, client=target_client):
                    Toolbox.log(logger=NodeTypeController._logger, messages='Stopping service {0}'.format(service))
                    try:
                        Toolbox.change_service_state(target_client, service, 'stop', NodeTypeController._logger)
                    except Exception as ex:
                        Toolbox.log(logger=NodeTypeController._logger, messages=['\nFailed to stop service'.format(service), ex], loglevel='exception')

            Toolbox.log(logger=NodeTypeController._logger, messages='Removing services')
            services = ['scheduled-tasks', 'webapp-api', 'volumerouter-consumer']
            for service in services:
                if ServiceManager.has_service(service, client=target_client):
                    Toolbox.log(logger=NodeTypeController._logger, messages='Removing service {0}'.format(service))
                    try:
                        Toolbox.change_service_state(target_client, service, 'stop', NodeTypeController._logger)
                        ServiceManager.remove_service(service, client=target_client)
                    except Exception as ex:
                        Toolbox.log(logger=NodeTypeController._logger, messages=['\nFailed to remove service'.format(service), ex], loglevel='exception')

            if ServiceManager.has_service('workers', client=target_client):
                ServiceManager.add_service(name='workers',
                                           client=target_client,
                                           params={'WORKER_QUEUE': '{0}'.format(unique_id)})
        try:
            NodeTypeController._configure_amqp_to_volumedriver()
        except Exception as ex:
            Toolbox.log(logger=NodeTypeController._logger, messages=['\nFailed to configure AMQP to Storage Driver', ex], loglevel='exception')

        Toolbox.log(logger=NodeTypeController._logger, messages='Restarting services')
        NodeTypeController.restart_framework_and_memcache_services(clients=ip_client_map, logger=NodeTypeController._logger, offline_node_ips=offline_node_ips)

        if Toolbox.run_hooks(component='nodetype',
                             sub_component='demote',
                             logger=NodeTypeController._logger,
                             cluster_ip=cluster_ip,
                             master_ip=master_ip,
                             offline_node_ips=offline_node_ips):
            Toolbox.log(logger=NodeTypeController._logger, messages='Restarting services')
            NodeTypeController.restart_framework_and_memcache_services(clients=ip_client_map, logger=NodeTypeController._logger, offline_node_ips=offline_node_ips)

        if storagerouter not in offline_nodes:
            target_client = ip_client_map[cluster_ip]
            node_name, _ = target_client.get_hostname()
            if NodeTypeController.avahi_installed(client=target_client, logger=NodeTypeController._logger) is True:
                NodeTypeController.configure_avahi(client=target_client, node_name=node_name, node_type='extra', logger=NodeTypeController._logger)
        Configuration.set('/ovs/framework/hosts/{0}/type'.format(storagerouter.machine_id), 'EXTRA')

        if target_client is not None and target_client.file_exists('/tmp/ovs_rollback'):
            target_client.file_write('/tmp/ovs_rollback', 'rollback')

        Toolbox.log(logger=NodeTypeController._logger, messages='Demote complete', title=True)
Beispiel #6
0
    def promote_node(cluster_ip, master_ip, ip_client_map, unique_id, configure_memcached, configure_rabbitmq):
        """
        Promotes a given node
        """
        from ovs.dal.lists.storagerouterlist import StorageRouterList
        from ovs.dal.lists.servicetypelist import ServiceTypeList
        from ovs.dal.lists.servicelist import ServiceList
        from ovs.dal.hybrids.service import Service

        Toolbox.log(logger=NodeTypeController._logger, messages='Promoting node', title=True)
        if configure_memcached is True:
            if NodeTypeController._validate_local_memcache_servers(ip_client_map) is False:
                raise RuntimeError('Not all memcache nodes can be reached which is required for promoting a node.')

        target_client = ip_client_map[cluster_ip]
        machine_id = System.get_my_machine_id(target_client)
        node_name, _ = target_client.get_hostname()
        master_client = ip_client_map[master_ip]

        storagerouter = StorageRouterList.get_by_machine_id(unique_id)
        storagerouter.node_type = 'MASTER'
        storagerouter.save()

        external_config = Configuration.get('/ovs/framework/external_config')
        if external_config is None:
            config_store = Configuration.get_store()
            if config_store == 'arakoon':
                Toolbox.log(logger=NodeTypeController._logger, messages='Joining Arakoon configuration cluster')
                metadata = ArakoonInstaller.extend_cluster(master_ip=master_ip,
                                                           new_ip=cluster_ip,
                                                           cluster_name='config',
                                                           base_dir=Configuration.get('/ovs/framework/paths|ovsdb'),
                                                           ports=[26400, 26401],
                                                           filesystem=True)
                ArakoonInstaller.restart_cluster_add(cluster_name='config',
                                                     current_ips=metadata['ips'],
                                                     new_ip=cluster_ip,
                                                     filesystem=True)
                ServiceManager.register_service(node_name=machine_id,
                                                service_metadata=metadata['service_metadata'])
            else:
                from ovs.extensions.db.etcd.installer import EtcdInstaller
                Toolbox.log(logger=NodeTypeController._logger, messages='Joining Etcd cluster')
                EtcdInstaller.extend_cluster(master_ip, cluster_ip, 'config')

        # Find other (arakoon) master nodes
        arakoon_cluster_name = str(Configuration.get('/ovs/framework/arakoon_clusters|ovsdb'))
        arakoon_metadata = ArakoonInstaller.get_arakoon_metadata_by_cluster_name(cluster_name=arakoon_cluster_name)
        config = ArakoonClusterConfig(cluster_id=arakoon_cluster_name, filesystem=False)
        config.load_config()
        master_node_ips = [node.ip for node in config.nodes]
        if cluster_ip in master_node_ips:
            master_node_ips.remove(cluster_ip)
        if len(master_node_ips) == 0:
            raise RuntimeError('There should be at least one other master node')

        arakoon_ports = []
        if arakoon_metadata['internal'] is True:
            Toolbox.log(logger=NodeTypeController._logger, messages='Joining Arakoon OVS DB cluster')
            result = ArakoonInstaller.extend_cluster(master_ip=master_ip,
                                                     new_ip=cluster_ip,
                                                     cluster_name=arakoon_cluster_name,
                                                     base_dir=Configuration.get('/ovs/framework/paths|ovsdb'))
            ArakoonInstaller.restart_cluster_add(cluster_name=arakoon_cluster_name,
                                                 current_ips=result['ips'],
                                                 new_ip=cluster_ip, filesystem=False)
            arakoon_ports = [result['client_port'], result['messaging_port']]

        if configure_memcached is True:
            NodeTypeController.configure_memcached(client=target_client, logger=NodeTypeController._logger)
        NodeTypeController.add_services(client=target_client, node_type='master', logger=NodeTypeController._logger)

        Toolbox.log(logger=NodeTypeController._logger, messages='Update configurations')
        if configure_memcached is True:
            endpoints = Configuration.get('/ovs/framework/memcache|endpoints')
            endpoint = '{0}:11211'.format(cluster_ip)
            if endpoint not in endpoints:
                endpoints.append(endpoint)
                Configuration.set('/ovs/framework/memcache|endpoints', endpoints)
        if configure_rabbitmq is True:
            endpoints = Configuration.get('/ovs/framework/messagequeue|endpoints')
            endpoint = '{0}:5672'.format(cluster_ip)
            if endpoint not in endpoints:
                endpoints.append(endpoint)
                Configuration.set('/ovs/framework/messagequeue|endpoints', endpoints)

        if arakoon_metadata['internal'] is True:
            Toolbox.log(logger=NodeTypeController._logger, messages='Restarting master node services')
            ArakoonInstaller.restart_cluster_add(cluster_name=arakoon_cluster_name,
                                                 current_ips=master_node_ips,
                                                 new_ip=cluster_ip,
                                                 filesystem=False)
            PersistentFactory.store = None
            VolatileFactory.store = None

            if 'arakoon-ovsdb' not in [s.name for s in ServiceList.get_services() if s.is_internal is False or s.storagerouter.ip == cluster_ip]:
                service = Service()
                service.name = 'arakoon-ovsdb'
                service.type = ServiceTypeList.get_by_name(ServiceType.SERVICE_TYPES.ARAKOON)
                service.ports = arakoon_ports
                service.storagerouter = storagerouter
                service.save()

        if configure_rabbitmq is True:
            NodeTypeController.configure_rabbitmq(client=target_client, logger=NodeTypeController._logger)
            # Copy rabbitmq cookie
            rabbitmq_cookie_file = '/var/lib/rabbitmq/.erlang.cookie'

            Toolbox.log(logger=NodeTypeController._logger, messages='Copying Rabbit MQ cookie')
            contents = master_client.file_read(rabbitmq_cookie_file)
            master_hostname, _ = master_client.get_hostname()
            target_client.dir_create(os.path.dirname(rabbitmq_cookie_file))
            target_client.file_write(rabbitmq_cookie_file, contents)
            target_client.file_chmod(rabbitmq_cookie_file, mode=400)
            target_client.run(['rabbitmq-server', '-detached'])
            time.sleep(5)
            target_client.run(['rabbitmqctl', 'stop_app'])
            time.sleep(5)
            target_client.run(['rabbitmqctl', 'join_cluster', 'rabbit@{0}'.format(master_hostname)])
            time.sleep(5)
            target_client.run(['rabbitmqctl', 'stop'])
            time.sleep(5)

            # Enable HA for the rabbitMQ queues
            Toolbox.change_service_state(target_client, 'rabbitmq-server', 'start', NodeTypeController._logger)
            NodeTypeController.check_rabbitmq_and_enable_ha_mode(client=target_client, logger=NodeTypeController._logger)

        NodeTypeController._configure_amqp_to_volumedriver()

        Toolbox.log(logger=NodeTypeController._logger, messages='Starting services')
        services = ['memcached', 'arakoon-ovsdb', 'rabbitmq-server', 'etcd-config']
        if arakoon_metadata['internal'] is True:
            services.remove('arakoon-ovsdb')
        for service in services:
            if ServiceManager.has_service(service, client=target_client):
                Toolbox.change_service_state(target_client, service, 'start', NodeTypeController._logger)

        Toolbox.log(logger=NodeTypeController._logger, messages='Restarting services')
        NodeTypeController.restart_framework_and_memcache_services(clients=ip_client_map, logger=NodeTypeController._logger)

        if Toolbox.run_hooks(component='nodetype',
                             sub_component='promote',
                             logger=NodeTypeController._logger,
                             cluster_ip=cluster_ip,
                             master_ip=master_ip):
            Toolbox.log(logger=NodeTypeController._logger, messages='Restarting services')
            NodeTypeController.restart_framework_and_memcache_services(clients=ip_client_map, logger=NodeTypeController._logger)

        if NodeTypeController.avahi_installed(client=target_client, logger=NodeTypeController._logger) is True:
            NodeTypeController.configure_avahi(client=target_client, node_name=node_name, node_type='master', logger=NodeTypeController._logger)
        Configuration.set('/ovs/framework/hosts/{0}/type'.format(machine_id), 'MASTER')
        target_client.run(['chown', '-R', 'ovs:ovs', '/opt/OpenvStorage/config'])
        Configuration.set('/ovs/framework/hosts/{0}/promotecompleted'.format(machine_id), True)

        if target_client.file_exists('/tmp/ovs_rollback'):
            target_client.file_delete('/tmp/ovs_rollback')

        Toolbox.log(logger=NodeTypeController._logger, messages='Promote complete')