コード例 #1
0
ファイル: nodetype.py プロジェクト: openvstorage/framework
    def add_services(client, node_type, logger):
        """
        Add the services required by the OVS cluster
        :param client: Client on which to add the services
        :type client: ovs.extensions.generic.sshclient.SSHClient
        :param node_type: Type of node ('master' or 'extra')
        :type node_type: str
        :param logger: Logger object used for logging
        :type logger: ovs.log.log_handler.LogHandler
        :return: None
        """
        Toolbox.log(logger=logger, messages='Adding services')
        services = {}
        worker_queue = System.get_my_machine_id(client=client)
        if node_type == 'master':
            worker_queue += ',ovs_masters'
            services.update({'memcached': {'MEMCACHE_NODE_IP': client.ip, 'WORKER_QUEUE': worker_queue},
                             'rabbitmq-server': {'MEMCACHE_NODE_IP': client.ip, 'WORKER_QUEUE': worker_queue},
                             'scheduled-tasks': {},
                             'webapp-api': {},
                             'volumerouter-consumer': {}})
        services.update({'workers': {'WORKER_QUEUE': worker_queue},
                         'watcher-framework': {}})

        for service_name, params in services.iteritems():
            if not ServiceManager.has_service(service_name, client):
                Toolbox.log(logger=logger, messages='Adding service {0}'.format(service_name))
                ServiceManager.add_service(name=service_name, params=params, client=client)
コード例 #2
0
ファイル: noderemoval.py プロジェクト: openvstorage/framework
    def remove_services(client, node_type, logger):
        """
        Remove all services managed by OVS
        :param client: Client on which to remove the services
        :type client: ovs.extensions.generic.sshclient.SSHClient
        :param node_type: Type of node, can be 'master' or 'extra'
        :type node_type: str
        :param logger: Logger object used for logging
        :type logger: ovs.log.log_handler.LogHandler
        :return: None
        """
        Toolbox.log(logger=logger, messages="Removing services")
        stop_only = ["rabbitmq-server", "memcached"]
        services = ["workers", "support-agent", "watcher-framework"]
        if node_type == "master":
            services += ["scheduled-tasks", "webapp-api", "volumerouter-consumer"]
            if Toolbox.is_service_internally_managed(service="rabbitmq") is True:
                services.append("rabbitmq-server")
            if Toolbox.is_service_internally_managed(service="memcached") is True:
                services.append("memcached")

        for service in services:
            if ServiceManager.has_service(service, client=client):
                Toolbox.log(
                    logger=logger,
                    messages="{0} service {1}".format("Removing" if service not in stop_only else "Stopping", service),
                )
                ServiceManager.stop_service(service, client=client)
                if service not in stop_only:
                    ServiceManager.remove_service(service, client=client)
コード例 #3
0
ファイル: nodetype.py プロジェクト: sun363587351/framework-1
    def configure_avahi(client, node_name, node_type, logger):
        """
        Configure Avahi
        :param client: Client on which to configure avahi
        :type client: ovs_extensions.generic.sshclient.SSHClient
        :param node_name: Name of the node to set in Avahi
        :type node_name: str
        :param node_type: Type of the node ('master' or 'extra')
        :type node_type: str
        :param logger: Logger object used for logging
        :type logger: ovs.extensions.generic.logger.Logger
        :return: None
        """
        valid_avahi = NodeTypeController.validate_avahi_cluster_name(
            ip=client.ip,
            cluster_name=Configuration.get('/ovs/framework/cluster_name'),
            node_name=node_name)
        if valid_avahi[0] is False:
            raise RuntimeError(valid_avahi[1])
        Toolbox.log(logger=logger, messages='Announcing service')
        client.file_write(
            NodeTypeController.avahi_filename,
            """<?xml version="1.0" standalone='no'?>
<!--*-nxml-*-->
<!DOCTYPE service-group SYSTEM "avahi-service.dtd">
<!-- $Id$ -->
<service-group>
    <name replace-wildcards="yes">{0}</name>
    <service>
        <type>_ovs_{1}_node._tcp</type>
        <port>443</port>
    </service>
</service-group>""".format(valid_avahi[1], node_type))
        ServiceFactory.change_service_state(client, 'avahi-daemon', 'restart',
                                            NodeTypeController._logger)
コード例 #4
0
ファイル: nodetype.py プロジェクト: openvstorage/framework
    def configure_avahi(client, node_name, node_type, logger):
        """
        Configure Avahi
        :param client: Client on which to configure avahi
        :type client: ovs.extensions.generic.sshclient.SSHClient
        :param node_name: Name of the node to set in Avahi
        :type node_name: str
        :param node_type: Type of the node ('master' or 'extra')
        :type node_type: str
        :param logger: Logger object used for logging
        :type logger: ovs.log.log_handler.LogHandler
        :return: None
        """
        cluster_name = Configuration.get('/ovs/framework/cluster_name')
        Toolbox.log(logger=logger, messages='Announcing service')
        client.file_write(NodeTypeController.avahi_filename, """<?xml version="1.0" standalone='no'?>
<!--*-nxml-*-->
<!DOCTYPE service-group SYSTEM "avahi-service.dtd">
<!-- $Id$ -->
<service-group>
    <name replace-wildcards="yes">ovs_cluster_{0}_{1}_{3}</name>
    <service>
        <type>_ovs_{2}_node._tcp</type>
        <port>443</port>
    </service>
</service-group>""".format(cluster_name, node_name, node_type, client.ip.replace('.', '_')))
        Toolbox.change_service_state(client, 'avahi-daemon', 'restart', NodeTypeController._logger)
コード例 #5
0
ファイル: nodetype.py プロジェクト: sun363587351/framework-1
    def _configure_amqp_to_volumedriver():
        Toolbox.log(logger=NodeTypeController._logger,
                    messages='Update existing vPools')
        login = Configuration.get('/ovs/framework/messagequeue|user')
        password = Configuration.get('/ovs/framework/messagequeue|password')
        protocol = Configuration.get('/ovs/framework/messagequeue|protocol')

        uris = []
        for endpoint in Configuration.get(
                '/ovs/framework/messagequeue|endpoints'):
            uris.append({
                'amqp_uri':
                '{0}://{1}:{2}@{3}'.format(protocol, login, password, endpoint)
            })

        if Configuration.dir_exists('/ovs/vpools'):
            for vpool_guid in Configuration.list('/ovs/vpools'):
                for storagedriver_id in Configuration.list(
                        '/ovs/vpools/{0}/hosts'.format(vpool_guid)):
                    storagedriver_config = StorageDriverConfiguration(
                        vpool_guid, storagedriver_id)
                    storagedriver_config.configure_event_publisher(
                        events_amqp_routing_key=Configuration.get(
                            '/ovs/framework/messagequeue|queues.storagedriver'
                        ),
                        events_amqp_uris=uris)
                    storagedriver_config.save()
コード例 #6
0
ファイル: nodetype.py プロジェクト: openvstorage/framework
 def retrieve_storagerouter_info_via_host(ip, password):
     """
     Retrieve the storagerouters from model
     """
     storagerouters = {}
     try:
         from ovs.dal.lists.storagerouterlist import StorageRouterList
         with remote(ip_info=ip, modules=[StorageRouterList], username='******', password=password, strict_host_key_checking=False) as rem:
             for sr in rem.StorageRouterList.get_storagerouters():
                 storagerouters[sr.name] = {'ip': sr.ip,
                                            'type': sr.node_type.lower()}
     except Exception as ex:
         Toolbox.log(logger=NodeTypeController._logger, messages='Error loading storagerouters: {0}'.format(ex), loglevel='exception', silent=True)
     return storagerouters
コード例 #7
0
ファイル: nodetype.py プロジェクト: openvstorage/framework
 def configure_memcached(client, logger):
     """
     Configure Memcached
     :param client: Client on which to configure Memcached
     :type client: ovs.extensions.generic.sshclient.SSHClient
     :param logger: Logger object used for logging
     :type logger: ovs.log.log_handler.LogHandler
     :return: None
     """
     Toolbox.log(logger=logger, messages='Setting up Memcached')
     client.run(['sed', '-i', 's/^-l.*/-l 0.0.0.0/g', '/etc/memcached.conf'])
     client.run(['sed', '-i', 's/^-m.*/-m 1024/g', '/etc/memcached.conf'])
     client.run(['sed', '-i', '-E', 's/^-v(.*)/# -v\1/g', '/etc/memcached.conf'])  # Put all -v, -vv, ... back in comment
     client.run(['sed', '-i', 's/^# -v[^v]*$/-v/g', '/etc/memcached.conf'])     # Uncomment only -v
コード例 #8
0
ファイル: nodetype.py プロジェクト: openvstorage/framework
 def avahi_installed(client, logger):
     """
     Verify whether Avahi is installed
     :param client: Client on which to check for Avahi
     :type client: ovs.extensions.generic.sshclient.SSHClient
     :param logger: Logger object used for logging
     :type logger: ovs.log.log_handler.LogHandler
     :return: True if Avahi is installed, False otherwise
     :rtype: bool
     """
     installed = client.run(['which', 'avahi-daemon'], allow_nonzero=True)
     if installed == '':
         Toolbox.log(logger=logger, messages='Avahi not installed')
         return False
     else:
         Toolbox.log(logger=logger, messages='Avahi installed')
         return True
コード例 #9
0
ファイル: nodetype.py プロジェクト: sun363587351/framework-1
 def avahi_installed(client, logger):
     """
     Verify whether Avahi is installed
     :param client: Client on which to check for Avahi
     :type client: ovs_extensions.generic.sshclient.SSHClient
     :param logger: Logger object used for logging
     :type logger: ovs.extensions.generic.logger.Logger
     :return: True if Avahi is installed, False otherwise
     :rtype: bool
     """
     installed = client.run(['which', 'avahi-daemon'], allow_nonzero=True)
     if installed == '':
         Toolbox.log(logger=logger, messages='Avahi not installed')
         return False
     else:
         Toolbox.log(logger=logger, messages='Avahi installed')
         return True
コード例 #10
0
ファイル: nodetype.py プロジェクト: openvstorage/framework
    def _configure_amqp_to_volumedriver():
        Toolbox.log(logger=NodeTypeController._logger, messages='Update existing vPools')
        login = Configuration.get('/ovs/framework/messagequeue|user')
        password = Configuration.get('/ovs/framework/messagequeue|password')
        protocol = Configuration.get('/ovs/framework/messagequeue|protocol')

        uris = []
        for endpoint in Configuration.get('/ovs/framework/messagequeue|endpoints'):
            uris.append({'amqp_uri': '{0}://{1}:{2}@{3}'.format(protocol, login, password, endpoint)})

        if Configuration.dir_exists('/ovs/vpools'):
            for vpool_guid in Configuration.list('/ovs/vpools'):
                for storagedriver_id in Configuration.list('/ovs/vpools/{0}/hosts'.format(vpool_guid)):
                    storagedriver_config = StorageDriverConfiguration('storagedriver', vpool_guid, storagedriver_id)
                    storagedriver_config.load()
                    storagedriver_config.configure_event_publisher(events_amqp_routing_key=Configuration.get('/ovs/framework/messagequeue|queues.storagedriver'),
                                                                   events_amqp_uris=uris)
                    storagedriver_config.save()
コード例 #11
0
ファイル: nodetype.py プロジェクト: sun363587351/framework-1
 def configure_memcached(client, logger):
     """
     Configure Memcached
     :param client: Client on which to configure Memcached
     :type client: ovs_extensions.generic.sshclient.SSHClient
     :param logger: Logger object used for logging
     :type logger: ovs.extensions.generic.logger.Logger
     :return: None
     """
     Toolbox.log(logger=logger, messages='Setting up Memcached')
     client.run(
         ['sed', '-i', 's/^-l.*/-l 0.0.0.0/g', '/etc/memcached.conf'])
     client.run(['sed', '-i', 's/^-m.*/-m 1024/g', '/etc/memcached.conf'])
     client.run(
         ['sed', '-i', '-E', 's/^-v(.*)/# -v\1/g',
          '/etc/memcached.conf'])  # Put all -v, -vv, ... back in comment
     client.run(['sed', '-i', 's/^# -v[^v]*$/-v/g',
                 '/etc/memcached.conf'])  # Uncomment only -v
コード例 #12
0
ファイル: nodetype.py プロジェクト: sun363587351/framework-1
    def add_services(client, node_type, logger):
        """
        Add the services required by the OVS cluster
        :param client: Client on which to add the services
        :type client: ovs_extensions.generic.sshclient.SSHClient
        :param node_type: Type of node ('master' or 'extra')
        :type node_type: str
        :param logger: Logger object used for logging
        :type logger: ovs.extensions.generic.logger.Logger
        :return: None
        """
        Toolbox.log(logger=logger, messages='Adding services')
        service_manager = ServiceFactory.get_manager()
        services = {}
        worker_queue = System.get_my_machine_id(client=client)
        if node_type == 'master':
            worker_queue += ',ovs_masters'
            services.update({
                'memcached': {
                    'MEMCACHE_NODE_IP': client.ip,
                    'WORKER_QUEUE': worker_queue
                },
                'rabbitmq-server': {
                    'MEMCACHE_NODE_IP': client.ip,
                    'WORKER_QUEUE': worker_queue
                },
                'scheduled-tasks': {},
                'webapp-api': {},
                'volumerouter-consumer': {}
            })
        services.update({
            'workers': {
                'WORKER_QUEUE': worker_queue
            },
            'watcher-framework': {}
        })

        for service_name, params in services.iteritems():
            if not service_manager.has_service(service_name, client):
                Toolbox.log(logger=logger,
                            messages='Adding service {0}'.format(service_name))
                service_manager.add_service(name=service_name,
                                            params=params,
                                            client=client)
コード例 #13
0
ファイル: nodetype.py プロジェクト: sun363587351/framework-1
 def retrieve_storagerouter_info_via_host(ip, password):
     """
     Retrieve the storagerouters from model
     """
     storagerouters = {}
     try:
         from ovs.dal.lists.storagerouterlist import StorageRouterList
         with remote(ip_info=ip,
                     modules=[StorageRouterList],
                     username='******',
                     password=password,
                     strict_host_key_checking=False) as rem:
             for sr in rem.StorageRouterList.get_storagerouters():
                 storagerouters[sr.name] = {
                     'ip': sr.ip,
                     'type': sr.node_type.lower()
                 }
     except Exception as ex:
         Toolbox.log(
             logger=NodeTypeController._logger,
             messages='Error loading storagerouters: {0}'.format(ex),
             loglevel='exception',
             silent=True)
     return storagerouters
コード例 #14
0
    def remove_services(client, node_type, logger):
        """
        Remove all services managed by OVS
        :param client: Client on which to remove the services
        :type client: ovs_extensions.generic.sshclient.SSHClient
        :param node_type: Type of node, can be 'master' or 'extra'
        :type node_type: str
        :param logger: Logger object used for logging
        :type logger: ovs.extensions.generic.logger.Logger
        :return: None
        """
        Toolbox.log(logger=logger, messages='Removing services')
        service_manager = ServiceFactory.get_manager()
        stop_only = ['rabbitmq-server', 'memcached']
        services = ['workers', 'support-agent', 'watcher-framework']
        if node_type == 'master':
            services += [
                'scheduled-tasks', 'webapp-api', 'volumerouter-consumer'
            ]
            if Toolbox.is_service_internally_managed(
                    service='rabbitmq') is True:
                services.append('rabbitmq-server')
            if Toolbox.is_service_internally_managed(
                    service='memcached') is True:
                services.append('memcached')

        for service in services:
            if service_manager.has_service(service, client=client):
                Toolbox.log(
                    logger=logger,
                    messages='{0} service {1}'.format(
                        'Removing' if service not in stop_only else 'Stopping',
                        service))
                service_manager.stop_service(service, client=client)
                if service not in stop_only:
                    service_manager.remove_service(service, client=client)
コード例 #15
0
ファイル: nodetype.py プロジェクト: openvstorage/framework
    def promote_or_demote_node(node_action, cluster_ip=None, execute_rollback=False):
        """
        Promotes or demotes the local node
        :param node_action: Demote or promote
        :type node_action: str
        :param cluster_ip: IP of node to promote or demote
        :type cluster_ip: str
        :param execute_rollback: In case of failure revert the changes made
        :type execute_rollback: bool
        :return: None
        """

        if node_action not in ('promote', 'demote'):
            raise ValueError('Nodes can only be promoted or demoted')

        Toolbox.log(logger=NodeTypeController._logger, messages='Open vStorage Setup - {0}'.format(node_action.capitalize()), boxed=True)
        try:
            Toolbox.log(logger=NodeTypeController._logger, messages='Collecting information', title=True)

            machine_id = System.get_my_machine_id()
            if Configuration.get('/ovs/framework/hosts/{0}/setupcompleted'.format(machine_id)) is False:
                raise RuntimeError('No local OVS setup found.')

            if cluster_ip and not re.match(Toolbox.regex_ip, cluster_ip):
                raise RuntimeError('Incorrect IP provided ({0})'.format(cluster_ip))

            if cluster_ip:
                client = SSHClient(endpoint=cluster_ip)
                machine_id = System.get_my_machine_id(client)

            node_type = Configuration.get('/ovs/framework/hosts/{0}/type'.format(machine_id))
            if node_action == 'promote' and node_type == 'MASTER':
                raise RuntimeError('This node is already master.')
            elif node_action == 'demote' and node_type == 'EXTRA':
                raise RuntimeError('This node should be a master.')
            elif node_type not in ['MASTER', 'EXTRA']:
                raise RuntimeError('This node is not correctly configured.')

            master_ip = None
            offline_nodes = []

            online = True
            target_client = None
            if node_action == 'demote' and cluster_ip:  # Demote an offline node
                from ovs.dal.lists.storagerouterlist import StorageRouterList
                from ovs.lib.storagedriver import StorageDriverController

                ip = cluster_ip
                unique_id = None
                ip_client_map = {}
                for storage_router in StorageRouterList.get_storagerouters():
                    try:
                        client = SSHClient(storage_router.ip, username='******')
                        if storage_router.node_type == 'MASTER':
                            master_ip = storage_router.ip
                        ip_client_map[storage_router.ip] = client
                    except UnableToConnectException:
                        if storage_router.ip == cluster_ip:
                            online = False
                            unique_id = storage_router.machine_id
                            StorageDriverController.mark_offline(storagerouter_guid=storage_router.guid)
                        offline_nodes.append(storage_router)
                if online is True:
                    raise RuntimeError("If the node is online, please use 'ovs setup demote' executed on the node you wish to demote")
                if master_ip is None:
                    raise RuntimeError('Failed to retrieve another responsive MASTER node')

            else:
                target_password = Toolbox.ask_validate_password(ip='127.0.0.1', logger=NodeTypeController._logger)
                target_client = SSHClient('127.0.0.1', username='******', password=target_password)

                unique_id = System.get_my_machine_id(target_client)
                ip = Configuration.get('/ovs/framework/hosts/{0}/ip'.format(unique_id))

                storagerouter_info = NodeTypeController.retrieve_storagerouter_info_via_host(ip=target_client.ip, password=target_password)
                node_ips = [sr_info['ip'] for sr_info in storagerouter_info.itervalues()]
                master_node_ips = [sr_info['ip'] for sr_info in storagerouter_info.itervalues() if sr_info['type'] == 'master' and sr_info['ip'] != ip]
                if len(master_node_ips) == 0:
                    if node_action == 'promote':
                        raise RuntimeError('No master node could be found')
                    else:
                        raise RuntimeError('It is not possible to remove the only master')

                master_ip = master_node_ips[0]
                ip_client_map = dict((node_ip, SSHClient(node_ip, username='******')) for node_ip in node_ips)

            if node_action == 'demote':
                for cluster_name in Configuration.list('/ovs/arakoon'):
                    config = ArakoonClusterConfig(cluster_name, False)
                    config.load_config()
                    arakoon_client = ArakoonInstaller.build_client(config)
                    metadata = json.loads(arakoon_client.get(ArakoonInstaller.METADATA_KEY))
                    if len(config.nodes) == 1 and config.nodes[0].ip == ip and metadata.get('internal') is True:
                        raise RuntimeError('Demote is not supported when single node Arakoon cluster(s) are present on the node to be demoted.')

            configure_rabbitmq = Toolbox.is_service_internally_managed(service='rabbitmq')
            configure_memcached = Toolbox.is_service_internally_managed(service='memcached')
            if node_action == 'promote':
                try:
                    NodeTypeController.promote_node(cluster_ip=ip,
                                                    master_ip=master_ip,
                                                    ip_client_map=ip_client_map,
                                                    unique_id=unique_id,
                                                    configure_memcached=configure_memcached,
                                                    configure_rabbitmq=configure_rabbitmq)
                except Exception:
                    if execute_rollback is True:
                        NodeTypeController.demote_node(cluster_ip=ip,
                                                       master_ip=master_ip,
                                                       ip_client_map=ip_client_map,
                                                       unique_id=unique_id,
                                                       unconfigure_memcached=configure_memcached,
                                                       unconfigure_rabbitmq=configure_rabbitmq,
                                                       offline_nodes=offline_nodes)
                    elif target_client is not None:
                        target_client.file_write('/tmp/ovs_rollback', 'demote')
                    raise
            else:
                try:
                    NodeTypeController.demote_node(cluster_ip=ip,
                                                   master_ip=master_ip,
                                                   ip_client_map=ip_client_map,
                                                   unique_id=unique_id,
                                                   unconfigure_memcached=configure_memcached,
                                                   unconfigure_rabbitmq=configure_rabbitmq,
                                                   offline_nodes=offline_nodes)
                except Exception:
                    if execute_rollback is True:
                        NodeTypeController.promote_node(cluster_ip=ip,
                                                        master_ip=master_ip,
                                                        ip_client_map=ip_client_map,
                                                        unique_id=unique_id,
                                                        configure_memcached=configure_memcached,
                                                        configure_rabbitmq=configure_rabbitmq)
                    elif target_client is not None:
                        target_client.file_write('/tmp/ovs_rollback', 'promote')
                    raise

            Toolbox.log(logger=NodeTypeController._logger, messages='\n')
            Toolbox.log(logger=NodeTypeController._logger, messages='{0} complete.'.format(node_action.capitalize()), boxed=True)
        except Exception as exception:
            Toolbox.log(logger=NodeTypeController._logger, messages='\n')
            Toolbox.log(logger=NodeTypeController._logger, messages=['An unexpected error occurred:', str(exception)], boxed=True, loglevel='exception')
            sys.exit(1)
        except KeyboardInterrupt:
            Toolbox.log(logger=NodeTypeController._logger, messages='\n')
            Toolbox.log(logger=NodeTypeController._logger,
                        messages='This setup was aborted. Open vStorage may be in an inconsistent state, make sure to validate the installation.',
                        boxed=True,
                        loglevel='error')
            sys.exit(1)
コード例 #16
0
ファイル: nodetype.py プロジェクト: openvstorage/framework
    def configure_rabbitmq(client, logger):
        """
        Configure RabbitMQ
        :param client: Client on which to configure RabbitMQ
        :type client: ovs.extensions.generic.sshclient.SSHClient
        :param logger: Logger object used for logging
        :type logger: ovs.log.log_handler.LogHandler
        :return: None
        """
        Toolbox.log(logger=logger, messages='Setting up RabbitMQ')
        rabbitmq_port = Configuration.get('/ovs/framework/messagequeue|endpoints')[0].split(':')[1]
        rabbitmq_login = Configuration.get('/ovs/framework/messagequeue|user')
        rabbitmq_password = Configuration.get('/ovs/framework/messagequeue|password')
        client.file_write('/etc/rabbitmq/rabbitmq.config', """[
   {{rabbit, [{{tcp_listeners, [{0}]}},
              {{default_user, <<"{1}">>}},
              {{default_pass, <<"{2}">>}},
              {{log_levels, [{{connection, warning}}]}},
              {{vm_memory_high_watermark, 0.2}}]}}
].""".format(rabbitmq_port, rabbitmq_login, rabbitmq_password))

        rabbitmq_running, same_process = ServiceManager.is_rabbitmq_running(client=client)
        if rabbitmq_running is True:
            # Example output of 'list_users' command
            # Listing users ...
            # guest   [administrator]
            # ovs     []
            # ... done.
            users = [user.split('\t')[0] for user in client.run(['rabbitmqctl', 'list_users']).splitlines() if '\t' in user and '[' in user and ']' in user]
            if 'ovs' in users:
                Toolbox.log(logger=logger, messages='Already configured RabbitMQ')
                return
            Toolbox.change_service_state(client, 'rabbitmq-server', 'stop', logger)

        client.run(['rabbitmq-server', '-detached'])
        time.sleep(5)

        # Sometimes/At random the rabbitmq server takes longer than 5 seconds to start,
        #  and the next command fails so the best solution is to retry several times
        # Also retry the add_user/set_permissions, and validate the result
        retry = 0
        while retry < 10:
            users = Toolbox.retry_client_run(client=client,
                                             command=['rabbitmqctl', 'list_users'],
                                             logger=logger).splitlines()
            users = [usr.split('\t')[0] for usr in users if '\t' in usr and '[' in usr and ']' in usr]
            logger.debug('Rabbitmq users {0}'.format(users))
            if 'ovs' in users:
                logger.debug('User ovs configured in rabbitmq')
                break

            logger.debug(Toolbox.retry_client_run(client=client,
                                                  command=['rabbitmqctl', 'add_user', rabbitmq_login, rabbitmq_password],
                                                  logger=logger))
            logger.debug(Toolbox.retry_client_run(client=client,
                                                  command=['rabbitmqctl', 'set_permissions', rabbitmq_login, '.*', '.*', '.*'],
                                                  logger=logger))
            retry += 1
            time.sleep(1)
        client.run(['rabbitmqctl', 'stop'])
        time.sleep(5)
コード例 #17
0
ファイル: nodetype.py プロジェクト: sun363587351/framework-1
    def configure_rabbitmq(client, logger):
        """
        Configure RabbitMQ
        :param client: Client on which to configure RabbitMQ
        :type client: ovs_extensions.generic.sshclient.SSHClient
        :param logger: Logger object used for logging
        :type logger: ovs.extensions.generic.logger.Logger
        :return: None
        """
        Toolbox.log(logger=logger, messages='Setting up RabbitMQ')
        service_manager = ServiceFactory.get_manager()
        rabbitmq_port = Configuration.get(
            '/ovs/framework/messagequeue|endpoints')[0].split(':')[1]
        rabbitmq_login = Configuration.get('/ovs/framework/messagequeue|user')
        rabbitmq_password = Configuration.get(
            '/ovs/framework/messagequeue|password')
        client.file_write(
            '/etc/rabbitmq/rabbitmq.config', """[
   {{rabbit, [{{tcp_listeners, [{0}]}},
              {{default_user, <<"{1}">>}},
              {{default_pass, <<"{2}">>}},
              {{cluster_partition_handling, autoheal}},
              {{log_levels, [{{connection, warning}}]}},
              {{vm_memory_high_watermark, 0.2}}]}}
].""".format(rabbitmq_port, rabbitmq_login, rabbitmq_password))

        rabbitmq_running, same_process = service_manager.is_rabbitmq_running(
            client=client)
        if rabbitmq_running is True:
            # Example output of 'list_users' command
            # Listing users ...
            # guest   [administrator]
            # ovs     []
            # ... done.
            users = [
                user.split('\t')[0] for user in client.run(
                    ['rabbitmqctl', 'list_users']).splitlines()
                if '\t' in user and '[' in user and ']' in user
            ]
            if 'ovs' in users:
                Toolbox.log(logger=logger,
                            messages='Already configured RabbitMQ')
                return
            ServiceFactory.change_service_state(client, 'rabbitmq-server',
                                                'stop', logger)

        client.run(['rabbitmq-server', '-detached'])
        time.sleep(5)

        # Sometimes/At random the rabbitmq server takes longer than 5 seconds to start,
        #  and the next command fails so the best solution is to retry several times
        # Also retry the add_user/set_permissions, and validate the result
        retry = 0
        while retry < 10:
            users = Toolbox.retry_client_run(
                client=client,
                command=['rabbitmqctl', 'list_users'],
                logger=logger).splitlines()
            users = [
                usr.split('\t')[0] for usr in users
                if '\t' in usr and '[' in usr and ']' in usr
            ]
            logger.debug('Rabbitmq users {0}'.format(users))
            if 'ovs' in users:
                logger.debug('User ovs configured in rabbitmq')
                break

            logger.debug(
                Toolbox.retry_client_run(client=client,
                                         command=[
                                             'rabbitmqctl', 'add_user',
                                             rabbitmq_login, rabbitmq_password
                                         ],
                                         logger=logger))
            logger.debug(
                Toolbox.retry_client_run(client=client,
                                         command=[
                                             'rabbitmqctl', 'set_permissions',
                                             rabbitmq_login, '.*', '.*', '.*'
                                         ],
                                         logger=logger))
            retry += 1
            time.sleep(1)
        client.run(['rabbitmqctl', 'stop'])
        time.sleep(5)
コード例 #18
0
ファイル: nodetype.py プロジェクト: sun363587351/framework-1
    def promote_or_demote_node(node_action,
                               cluster_ip=None,
                               execute_rollback=False):
        """
        Promotes or demotes the local node
        :param node_action: Demote or promote
        :type node_action: str
        :param cluster_ip: IP of node to promote or demote
        :type cluster_ip: str
        :param execute_rollback: In case of failure revert the changes made
        :type execute_rollback: bool
        :return: None
        """

        if node_action not in ('promote', 'demote'):
            raise ValueError('Nodes can only be promoted or demoted')

        Toolbox.log(logger=NodeTypeController._logger,
                    messages='Open vStorage Setup - {0}'.format(
                        node_action.capitalize()),
                    boxed=True)
        try:
            Toolbox.log(logger=NodeTypeController._logger,
                        messages='Collecting information',
                        title=True)

            machine_id = System.get_my_machine_id()
            if Configuration.get('/ovs/framework/hosts/{0}/setupcompleted'.
                                 format(machine_id)) is False:
                raise RuntimeError('No local OVS setup found.')

            if cluster_ip and not re.match(Toolbox.regex_ip, cluster_ip):
                raise RuntimeError(
                    'Incorrect IP provided ({0})'.format(cluster_ip))

            if cluster_ip:
                client = SSHClient(endpoint=cluster_ip)
                machine_id = System.get_my_machine_id(client)

            node_type = Configuration.get(
                '/ovs/framework/hosts/{0}/type'.format(machine_id))
            if node_action == 'promote' and node_type == 'MASTER':
                raise RuntimeError('This node is already master.')
            elif node_action == 'demote' and node_type == 'EXTRA':
                raise RuntimeError('This node should be a master.')
            elif node_type not in ['MASTER', 'EXTRA']:
                raise RuntimeError('This node is not correctly configured.')

            master_ip = None
            offline_nodes = []

            online = True
            target_client = None
            if node_action == 'demote' and cluster_ip:  # Demote an offline node
                from ovs.dal.lists.storagerouterlist import StorageRouterList
                from ovs.lib.storagedriver import StorageDriverController

                ip = cluster_ip
                unique_id = None
                ip_client_map = {}
                for storage_router in StorageRouterList.get_storagerouters():
                    try:
                        client = SSHClient(storage_router.ip, username='******')
                        if storage_router.node_type == 'MASTER':
                            master_ip = storage_router.ip
                        ip_client_map[storage_router.ip] = client
                    except UnableToConnectException:
                        if storage_router.ip == cluster_ip:
                            online = False
                            unique_id = storage_router.machine_id
                            StorageDriverController.mark_offline(
                                storagerouter_guid=storage_router.guid)
                        offline_nodes.append(storage_router)
                if online is True:
                    raise RuntimeError(
                        "If the node is online, please use 'ovs setup demote' executed on the node you wish to demote"
                    )
                if master_ip is None:
                    raise RuntimeError(
                        'Failed to retrieve another responsive MASTER node')

            else:
                target_password = Toolbox.ask_validate_password(
                    ip='127.0.0.1', logger=NodeTypeController._logger)
                target_client = SSHClient('127.0.0.1',
                                          username='******',
                                          password=target_password)

                unique_id = System.get_my_machine_id(target_client)
                ip = Configuration.get(
                    '/ovs/framework/hosts/{0}/ip'.format(unique_id))

                storagerouter_info = NodeTypeController.retrieve_storagerouter_info_via_host(
                    ip=target_client.ip, password=target_password)
                node_ips = [
                    sr_info['ip']
                    for sr_info in storagerouter_info.itervalues()
                ]
                master_node_ips = [
                    sr_info['ip']
                    for sr_info in storagerouter_info.itervalues()
                    if sr_info['type'] == 'master' and sr_info['ip'] != ip
                ]
                if len(master_node_ips) == 0:
                    if node_action == 'promote':
                        raise RuntimeError('No master node could be found')
                    else:
                        raise RuntimeError(
                            'It is not possible to remove the only master')

                master_ip = master_node_ips[0]
                ip_client_map = dict(
                    (node_ip, SSHClient(node_ip, username='******'))
                    for node_ip in node_ips)

            if node_action == 'demote':
                for cluster_name in Configuration.list('/ovs/arakoon'):
                    config = ArakoonClusterConfig(cluster_id=cluster_name)
                    arakoon_client = ArakoonInstaller.build_client(config)
                    metadata = json.loads(
                        arakoon_client.get(ArakoonInstaller.METADATA_KEY))
                    if len(config.nodes) == 1 and config.nodes[
                            0].ip == ip and metadata.get('internal') is True:
                        raise RuntimeError(
                            'Demote is not supported when single node Arakoon cluster(s) are present on the node to be demoted.'
                        )

            configure_rabbitmq = Toolbox.is_service_internally_managed(
                service='rabbitmq')
            configure_memcached = Toolbox.is_service_internally_managed(
                service='memcached')
            if node_action == 'promote':
                try:
                    NodeTypeController.promote_node(
                        cluster_ip=ip,
                        master_ip=master_ip,
                        ip_client_map=ip_client_map,
                        unique_id=unique_id,
                        configure_memcached=configure_memcached,
                        configure_rabbitmq=configure_rabbitmq)
                except Exception:
                    if execute_rollback is True:
                        NodeTypeController.demote_node(
                            cluster_ip=ip,
                            master_ip=master_ip,
                            ip_client_map=ip_client_map,
                            unique_id=unique_id,
                            unconfigure_memcached=configure_memcached,
                            unconfigure_rabbitmq=configure_rabbitmq,
                            offline_nodes=offline_nodes)
                    elif target_client is not None:
                        target_client.file_write('/tmp/ovs_rollback', 'demote')
                    raise
            else:
                try:
                    NodeTypeController.demote_node(
                        cluster_ip=ip,
                        master_ip=master_ip,
                        ip_client_map=ip_client_map,
                        unique_id=unique_id,
                        unconfigure_memcached=configure_memcached,
                        unconfigure_rabbitmq=configure_rabbitmq,
                        offline_nodes=offline_nodes)
                except Exception:
                    if execute_rollback is True:
                        NodeTypeController.promote_node(
                            cluster_ip=ip,
                            master_ip=master_ip,
                            ip_client_map=ip_client_map,
                            unique_id=unique_id,
                            configure_memcached=configure_memcached,
                            configure_rabbitmq=configure_rabbitmq)
                    elif target_client is not None:
                        target_client.file_write('/tmp/ovs_rollback',
                                                 'promote')
                    raise

            Toolbox.log(logger=NodeTypeController._logger, messages='\n')
            Toolbox.log(logger=NodeTypeController._logger,
                        messages='{0} complete.'.format(
                            node_action.capitalize()),
                        boxed=True)
        except Exception as exception:
            Toolbox.log(logger=NodeTypeController._logger, messages='\n')
            Toolbox.log(
                logger=NodeTypeController._logger,
                messages=['An unexpected error occurred:',
                          str(exception)],
                boxed=True,
                loglevel='exception')
            sys.exit(1)
        except KeyboardInterrupt:
            Toolbox.log(logger=NodeTypeController._logger, messages='\n')
            Toolbox.log(
                logger=NodeTypeController._logger,
                messages=
                'This setup was aborted. Open vStorage may be in an inconsistent state, make sure to validate the installation.',
                boxed=True,
                loglevel='error')
            sys.exit(1)
コード例 #19
0
ファイル: nodetype.py プロジェクト: sun363587351/framework-1
    def demote_node(cluster_ip,
                    master_ip,
                    ip_client_map,
                    unique_id,
                    unconfigure_memcached,
                    unconfigure_rabbitmq,
                    offline_nodes=None):
        """
        Demotes a given node
        """
        from ovs.dal.lists.storagerouterlist import StorageRouterList

        Toolbox.log(logger=NodeTypeController._logger,
                    messages='Demoting node',
                    title=True)
        service_manager = ServiceFactory.get_manager()
        if offline_nodes is None:
            offline_nodes = []

        if unconfigure_memcached is True and len(offline_nodes) == 0:
            if NodeTypeController._validate_local_memcache_servers(
                    ip_client_map) is False:
                raise RuntimeError(
                    'Not all memcache nodes can be reached which is required for demoting a node.'
                )

        # Find other (arakoon) master nodes
        arakoon_cluster_name = str(
            Configuration.get('/ovs/framework/arakoon_clusters|ovsdb'))
        arakoon_metadata = ArakoonInstaller.get_arakoon_metadata_by_cluster_name(
            cluster_name=arakoon_cluster_name)
        config = ArakoonClusterConfig(cluster_id=arakoon_cluster_name)
        master_node_ips = [node.ip for node in config.nodes]
        shrink = False
        if cluster_ip in master_node_ips:
            shrink = True
            master_node_ips.remove(cluster_ip)
        if len(master_node_ips) == 0:
            raise RuntimeError(
                'There should be at least one other master node')

        storagerouter = StorageRouterList.get_by_machine_id(unique_id)
        storagerouter.node_type = 'EXTRA'
        storagerouter.save()

        offline_node_ips = [node.ip for node in offline_nodes]
        if arakoon_metadata['internal'] is True and shrink is True:
            Toolbox.log(logger=NodeTypeController._logger,
                        messages='Leaving Arakoon {0} cluster'.format(
                            arakoon_cluster_name))
            arakoon_installer = ArakoonInstaller(
                cluster_name=arakoon_cluster_name)
            arakoon_installer.load()
            arakoon_installer.shrink_cluster(removal_ip=cluster_ip,
                                             offline_nodes=offline_node_ips)
            arakoon_installer.restart_cluster_after_shrinking()
        try:
            external_config = Configuration.get(
                '/ovs/framework/external_config')
            if external_config is None and shrink is True:
                Toolbox.log(logger=NodeTypeController._logger,
                            messages='Leaving Arakoon config cluster')
                arakoon_installer = ArakoonInstaller(cluster_name='config')
                arakoon_installer.load(ip=master_node_ips[0])
                arakoon_installer.shrink_cluster(
                    removal_ip=cluster_ip, offline_nodes=offline_node_ips)
                arakoon_installer.restart_cluster_after_shrinking()
        except Exception as ex:
            Toolbox.log(
                logger=NodeTypeController._logger,
                messages=['\nFailed to leave configuration cluster', ex],
                loglevel='exception')

        Toolbox.log(logger=NodeTypeController._logger,
                    messages='Update configurations')
        try:
            if unconfigure_memcached is True:
                endpoints = Configuration.get(
                    '/ovs/framework/memcache|endpoints')
                endpoint = '{0}:{1}'.format(cluster_ip, 11211)
                if endpoint in endpoints:
                    endpoints.remove(endpoint)
                Configuration.set('/ovs/framework/memcache|endpoints',
                                  endpoints)
            if unconfigure_rabbitmq is True:
                endpoints = Configuration.get(
                    '/ovs/framework/messagequeue|endpoints')
                endpoint = '{0}:{1}'.format(cluster_ip, 5672)
                if endpoint in endpoints:
                    endpoints.remove(endpoint)
                Configuration.set('/ovs/framework/messagequeue|endpoints',
                                  endpoints)
        except Exception as ex:
            Toolbox.log(logger=NodeTypeController._logger,
                        messages=['\nFailed to update configurations', ex],
                        loglevel='exception')

        if arakoon_metadata['internal'] is True:
            Toolbox.log(logger=NodeTypeController._logger,
                        messages='Restarting master node services')
            remaining_nodes = ip_client_map.keys()[:]
            if cluster_ip in remaining_nodes:
                remaining_nodes.remove(cluster_ip)

            PersistentFactory.store = None
            VolatileFactory.store = None

            for service in storagerouter.services:
                if service.name == 'arakoon-ovsdb':
                    service.delete()

        target_client = None
        if storagerouter in offline_nodes:
            if unconfigure_rabbitmq is True:
                Toolbox.log(
                    logger=NodeTypeController._logger,
                    messages='Removing/unconfiguring offline RabbitMQ node')
                client = ip_client_map[master_ip]
                try:
                    client.run([
                        'rabbitmqctl', 'forget_cluster_node',
                        'rabbit@{0}'.format(storagerouter.name)
                    ])
                except Exception as ex:
                    Toolbox.log(logger=NodeTypeController._logger,
                                messages=[
                                    '\nFailed to forget RabbitMQ cluster node',
                                    ex
                                ],
                                loglevel='exception')
        else:
            target_client = ip_client_map[cluster_ip]
            if unconfigure_rabbitmq is True:
                Toolbox.log(logger=NodeTypeController._logger,
                            messages='Removing/unconfiguring RabbitMQ')
                try:
                    if service_manager.has_service('rabbitmq-server',
                                                   client=target_client):
                        ServiceFactory.change_service_state(
                            target_client, 'rabbitmq-server', 'stop',
                            NodeTypeController._logger)
                        target_client.run(['rabbitmq-server', '-detached'])
                        time.sleep(5)
                        target_client.run(['rabbitmqctl', 'stop_app'])
                        time.sleep(5)
                        target_client.run(['rabbitmqctl', 'reset'])
                        time.sleep(5)
                        target_client.run(['rabbitmqctl', 'stop'])
                        time.sleep(5)
                        target_client.file_unlink(
                            "/var/lib/rabbitmq/.erlang.cookie")
                        ServiceFactory.change_service_state(
                            target_client, 'rabbitmq-server', 'stop',
                            NodeTypeController._logger)  # To be sure
                except Exception as ex:
                    Toolbox.log(logger=NodeTypeController._logger,
                                messages=[
                                    '\nFailed to remove/unconfigure RabbitMQ',
                                    ex
                                ],
                                loglevel='exception')

            Toolbox.log(logger=NodeTypeController._logger,
                        messages='Stopping services')
            services = ['memcached', 'rabbitmq-server']
            if unconfigure_rabbitmq is False:
                services.remove('rabbitmq-server')
            if unconfigure_memcached is False:
                services.remove('memcached')
            for service in services:
                if service_manager.has_service(service, client=target_client):
                    Toolbox.log(
                        logger=NodeTypeController._logger,
                        messages='Stopping service {0}'.format(service))
                    try:
                        ServiceFactory.change_service_state(
                            target_client, service, 'stop',
                            NodeTypeController._logger)
                    except Exception as ex:
                        Toolbox.log(
                            logger=NodeTypeController._logger,
                            messages=[
                                '\nFailed to stop service'.format(service), ex
                            ],
                            loglevel='exception')

            Toolbox.log(logger=NodeTypeController._logger,
                        messages='Removing services')
            services = [
                'scheduled-tasks', 'webapp-api', 'volumerouter-consumer'
            ]
            for service in services:
                if service_manager.has_service(service, client=target_client):
                    Toolbox.log(
                        logger=NodeTypeController._logger,
                        messages='Removing service {0}'.format(service))
                    try:
                        ServiceFactory.change_service_state(
                            target_client, service, 'stop',
                            NodeTypeController._logger)
                        service_manager.remove_service(service,
                                                       client=target_client)
                    except Exception as ex:
                        Toolbox.log(
                            logger=NodeTypeController._logger,
                            messages=[
                                '\nFailed to remove service'.format(service),
                                ex
                            ],
                            loglevel='exception')

            if service_manager.has_service('workers', client=target_client):
                service_manager.add_service(
                    name='workers',
                    client=target_client,
                    params={'WORKER_QUEUE': '{0}'.format(unique_id)})
        try:
            NodeTypeController._configure_amqp_to_volumedriver()
        except Exception as ex:
            Toolbox.log(
                logger=NodeTypeController._logger,
                messages=['\nFailed to configure AMQP to Storage Driver', ex],
                loglevel='exception')

        Toolbox.log(logger=NodeTypeController._logger,
                    messages='Restarting services')
        NodeTypeController.restart_framework_and_memcache_services(
            clients=ip_client_map,
            logger=NodeTypeController._logger,
            offline_node_ips=offline_node_ips)

        if Toolbox.run_hooks(component='nodetype',
                             sub_component='demote',
                             logger=NodeTypeController._logger,
                             cluster_ip=cluster_ip,
                             master_ip=master_ip,
                             offline_node_ips=offline_node_ips):
            Toolbox.log(logger=NodeTypeController._logger,
                        messages='Restarting services')
            NodeTypeController.restart_framework_and_memcache_services(
                clients=ip_client_map,
                logger=NodeTypeController._logger,
                offline_node_ips=offline_node_ips)

        if storagerouter not in offline_nodes:
            target_client = ip_client_map[cluster_ip]
            node_name, _ = target_client.get_hostname()
            if NodeTypeController.avahi_installed(
                    client=target_client,
                    logger=NodeTypeController._logger) is True:
                NodeTypeController.configure_avahi(
                    client=target_client,
                    node_name=node_name,
                    node_type='extra',
                    logger=NodeTypeController._logger)
        Configuration.set(
            '/ovs/framework/hosts/{0}/type'.format(storagerouter.machine_id),
            'EXTRA')

        if target_client is not None and target_client.file_exists(
                '/tmp/ovs_rollback'):
            target_client.file_write('/tmp/ovs_rollback', 'rollback')

        Toolbox.log(logger=NodeTypeController._logger,
                    messages='Demote complete',
                    title=True)
コード例 #20
0
ファイル: nodetype.py プロジェクト: openvstorage/framework
    def demote_node(cluster_ip, master_ip, ip_client_map, unique_id, unconfigure_memcached, unconfigure_rabbitmq, offline_nodes=None):
        """
        Demotes a given node
        """
        from ovs.dal.lists.storagerouterlist import StorageRouterList

        Toolbox.log(logger=NodeTypeController._logger, messages='Demoting node', title=True)
        if offline_nodes is None:
            offline_nodes = []

        if unconfigure_memcached is True and len(offline_nodes) == 0:
            if NodeTypeController._validate_local_memcache_servers(ip_client_map) is False:
                raise RuntimeError('Not all memcache nodes can be reached which is required for demoting a node.')

        # Find other (arakoon) master nodes
        arakoon_cluster_name = str(Configuration.get('/ovs/framework/arakoon_clusters|ovsdb'))
        arakoon_metadata = ArakoonInstaller.get_arakoon_metadata_by_cluster_name(cluster_name=arakoon_cluster_name)
        config = ArakoonClusterConfig(cluster_id=arakoon_cluster_name, filesystem=False)
        config.load_config()
        master_node_ips = [node.ip for node in config.nodes]
        if cluster_ip in master_node_ips:
            master_node_ips.remove(cluster_ip)
        if len(master_node_ips) == 0:
            raise RuntimeError('There should be at least one other master node')

        storagerouter = StorageRouterList.get_by_machine_id(unique_id)
        storagerouter.node_type = 'EXTRA'
        storagerouter.save()

        offline_node_ips = [node.ip for node in offline_nodes]
        if arakoon_metadata['internal'] is True:
            Toolbox.log(logger=NodeTypeController._logger, messages='Leaving Arakoon {0} cluster'.format(arakoon_cluster_name))
            ArakoonInstaller.shrink_cluster(deleted_node_ip=cluster_ip,
                                            remaining_node_ips=master_node_ips,
                                            cluster_name=arakoon_cluster_name,
                                            offline_nodes=offline_node_ips)

        try:
            external_config = Configuration.get('/ovs/framework/external_config')
            if external_config is None:
                config_store = Configuration.get_store()
                if config_store == 'arakoon':
                    Toolbox.log(logger=NodeTypeController._logger, messages='Leaving Arakoon config cluster')
                    ArakoonInstaller.shrink_cluster(deleted_node_ip=cluster_ip,
                                                    remaining_node_ips=master_node_ips,
                                                    cluster_name='config',
                                                    offline_nodes=offline_node_ips,
                                                    filesystem=True)
                else:
                    from ovs.extensions.db.etcd.installer import EtcdInstaller
                    Toolbox.log(logger=NodeTypeController._logger, messages='Leaving Etcd cluster')
                    EtcdInstaller.shrink_cluster(master_ip, cluster_ip, 'config', offline_node_ips)
        except Exception as ex:
            Toolbox.log(logger=NodeTypeController._logger, messages=['\nFailed to leave configuration cluster', ex], loglevel='exception')

        Toolbox.log(logger=NodeTypeController._logger, messages='Update configurations')
        try:
            if unconfigure_memcached is True:
                endpoints = Configuration.get('/ovs/framework/memcache|endpoints')
                endpoint = '{0}:{1}'.format(cluster_ip, 11211)
                if endpoint in endpoints:
                    endpoints.remove(endpoint)
                Configuration.set('/ovs/framework/memcache|endpoints', endpoints)
            if unconfigure_rabbitmq is True:
                endpoints = Configuration.get('/ovs/framework/messagequeue|endpoints')
                endpoint = '{0}:{1}'.format(cluster_ip, 5672)
                if endpoint in endpoints:
                    endpoints.remove(endpoint)
                Configuration.set('/ovs/framework/messagequeue|endpoints', endpoints)
        except Exception as ex:
            Toolbox.log(logger=NodeTypeController._logger, messages=['\nFailed to update configurations', ex], loglevel='exception')

        if arakoon_metadata['internal'] is True:
            Toolbox.log(logger=NodeTypeController._logger, messages='Restarting master node services')
            remaining_nodes = ip_client_map.keys()[:]
            if cluster_ip in remaining_nodes:
                remaining_nodes.remove(cluster_ip)

            PersistentFactory.store = None
            VolatileFactory.store = None

            for service in storagerouter.services:
                if service.name == 'arakoon-ovsdb':
                    service.delete()

        target_client = None
        if storagerouter in offline_nodes:
            if unconfigure_rabbitmq is True:
                Toolbox.log(logger=NodeTypeController._logger, messages='Removing/unconfiguring offline RabbitMQ node')
                client = ip_client_map[master_ip]
                try:
                    client.run(['rabbitmqctl', 'forget_cluster_node', 'rabbit@{0}'.format(storagerouter.name)])
                except Exception as ex:
                    Toolbox.log(logger=NodeTypeController._logger, messages=['\nFailed to forget RabbitMQ cluster node', ex], loglevel='exception')
        else:
            target_client = ip_client_map[cluster_ip]
            if unconfigure_rabbitmq is True:
                Toolbox.log(logger=NodeTypeController._logger, messages='Removing/unconfiguring RabbitMQ')
                try:
                    if ServiceManager.has_service('rabbitmq-server', client=target_client):
                        Toolbox.change_service_state(target_client, 'rabbitmq-server', 'stop', NodeTypeController._logger)
                        target_client.run(['rabbitmq-server', '-detached'])
                        time.sleep(5)
                        target_client.run(['rabbitmqctl', 'stop_app'])
                        time.sleep(5)
                        target_client.run(['rabbitmqctl', 'reset'])
                        time.sleep(5)
                        target_client.run(['rabbitmqctl', 'stop'])
                        time.sleep(5)
                        target_client.file_unlink("/var/lib/rabbitmq/.erlang.cookie")
                        Toolbox.change_service_state(target_client, 'rabbitmq-server', 'stop', NodeTypeController._logger)  # To be sure
                except Exception as ex:
                    Toolbox.log(logger=NodeTypeController._logger, messages=['\nFailed to remove/unconfigure RabbitMQ', ex], loglevel='exception')

            Toolbox.log(logger=NodeTypeController._logger, messages='Stopping services')
            services = ['memcached', 'rabbitmq-server']
            if unconfigure_rabbitmq is False:
                services.remove('rabbitmq-server')
            if unconfigure_memcached is False:
                services.remove('memcached')
            for service in services:
                if ServiceManager.has_service(service, client=target_client):
                    Toolbox.log(logger=NodeTypeController._logger, messages='Stopping service {0}'.format(service))
                    try:
                        Toolbox.change_service_state(target_client, service, 'stop', NodeTypeController._logger)
                    except Exception as ex:
                        Toolbox.log(logger=NodeTypeController._logger, messages=['\nFailed to stop service'.format(service), ex], loglevel='exception')

            Toolbox.log(logger=NodeTypeController._logger, messages='Removing services')
            services = ['scheduled-tasks', 'webapp-api', 'volumerouter-consumer']
            for service in services:
                if ServiceManager.has_service(service, client=target_client):
                    Toolbox.log(logger=NodeTypeController._logger, messages='Removing service {0}'.format(service))
                    try:
                        Toolbox.change_service_state(target_client, service, 'stop', NodeTypeController._logger)
                        ServiceManager.remove_service(service, client=target_client)
                    except Exception as ex:
                        Toolbox.log(logger=NodeTypeController._logger, messages=['\nFailed to remove service'.format(service), ex], loglevel='exception')

            if ServiceManager.has_service('workers', client=target_client):
                ServiceManager.add_service(name='workers',
                                           client=target_client,
                                           params={'WORKER_QUEUE': '{0}'.format(unique_id)})
        try:
            NodeTypeController._configure_amqp_to_volumedriver()
        except Exception as ex:
            Toolbox.log(logger=NodeTypeController._logger, messages=['\nFailed to configure AMQP to Storage Driver', ex], loglevel='exception')

        Toolbox.log(logger=NodeTypeController._logger, messages='Restarting services')
        NodeTypeController.restart_framework_and_memcache_services(clients=ip_client_map, logger=NodeTypeController._logger, offline_node_ips=offline_node_ips)

        if Toolbox.run_hooks(component='nodetype',
                             sub_component='demote',
                             logger=NodeTypeController._logger,
                             cluster_ip=cluster_ip,
                             master_ip=master_ip,
                             offline_node_ips=offline_node_ips):
            Toolbox.log(logger=NodeTypeController._logger, messages='Restarting services')
            NodeTypeController.restart_framework_and_memcache_services(clients=ip_client_map, logger=NodeTypeController._logger, offline_node_ips=offline_node_ips)

        if storagerouter not in offline_nodes:
            target_client = ip_client_map[cluster_ip]
            node_name, _ = target_client.get_hostname()
            if NodeTypeController.avahi_installed(client=target_client, logger=NodeTypeController._logger) is True:
                NodeTypeController.configure_avahi(client=target_client, node_name=node_name, node_type='extra', logger=NodeTypeController._logger)
        Configuration.set('/ovs/framework/hosts/{0}/type'.format(storagerouter.machine_id), 'EXTRA')

        if target_client is not None and target_client.file_exists('/tmp/ovs_rollback'):
            target_client.file_write('/tmp/ovs_rollback', 'rollback')

        Toolbox.log(logger=NodeTypeController._logger, messages='Demote complete', title=True)
コード例 #21
0
ファイル: noderemoval.py プロジェクト: openvstorage/framework
    def remove_node(node_ip, silent=None):
        """
        Remove the node with specified IP from the cluster
        :param node_ip: IP of the node to remove
        :type node_ip: str
        :param silent: If silent == '--force-yes' no question will be asked to confirm the removal
        :type silent: str
        :return: None
        """
        from ovs.lib.storagedriver import StorageDriverController
        from ovs.lib.storagerouter import StorageRouterController
        from ovs.dal.lists.storagerouterlist import StorageRouterList

        Toolbox.log(logger=NodeRemovalController._logger, messages="Remove node", boxed=True)
        Toolbox.log(
            logger=NodeRemovalController._logger,
            messages="WARNING: Some of these steps may take a very long time, please check the logs for more information\n\n",
        )

        ###############
        # VALIDATIONS #
        ###############
        try:
            node_ip = node_ip.strip()
            if not isinstance(node_ip, str):
                raise ValueError("Node IP must be a string")
            if not re.match(SSHClient.IP_REGEX, node_ip):
                raise ValueError("Invalid IP {0} specified".format(node_ip))

            storage_router_all = StorageRouterList.get_storagerouters()
            storage_router_masters = StorageRouterList.get_masters()
            storage_router_all_ips = set([storage_router.ip for storage_router in storage_router_all])
            storage_router_master_ips = set([storage_router.ip for storage_router in storage_router_masters])
            storage_router_to_remove = StorageRouterList.get_by_ip(node_ip)

            if node_ip not in storage_router_all_ips:
                raise ValueError(
                    "Unknown IP specified\nKnown in model:\n - {0}\nSpecified for removal:\n - {1}".format(
                        "\n - ".join(storage_router_all_ips), node_ip
                    )
                )

            if len(storage_router_all_ips) == 1:
                raise RuntimeError("Removing the only node is not possible")

            if node_ip in storage_router_master_ips and len(storage_router_master_ips) == 1:
                raise RuntimeError("Removing the only master node is not possible")

            if System.get_my_storagerouter() == storage_router_to_remove:
                raise RuntimeError(
                    "The node to be removed cannot be identical to the node on which the removal is initiated"
                )

            Toolbox.log(
                logger=NodeRemovalController._logger, messages="Creating SSH connections to remaining master nodes"
            )
            master_ip = None
            ip_client_map = {}
            storage_routers_offline = []
            storage_router_to_remove_online = True
            for storage_router in storage_router_all:
                try:
                    client = SSHClient(storage_router, username="******")
                    if client.run(["pwd"]):
                        Toolbox.log(
                            logger=NodeRemovalController._logger,
                            messages="  Node with IP {0:<15} successfully connected to".format(storage_router.ip),
                        )
                        ip_client_map[storage_router.ip] = client
                        if storage_router != storage_router_to_remove and storage_router.node_type == "MASTER":
                            master_ip = storage_router.ip
                except UnableToConnectException:
                    Toolbox.log(
                        logger=NodeRemovalController._logger,
                        messages="  Node with IP {0:<15} is unreachable".format(storage_router.ip),
                    )
                    storage_routers_offline.append(storage_router)
                    if storage_router == storage_router_to_remove:
                        storage_router_to_remove_online = False

            if len(ip_client_map) == 0 or master_ip is None:
                raise RuntimeError("Could not connect to any master node in the cluster")

            storage_router_to_remove.invalidate_dynamics("vdisks_guids")
            if (
                len(storage_router_to_remove.vdisks_guids) > 0
            ):  # vDisks are supposed to be moved away manually before removing a node
                raise RuntimeError("Still vDisks attached to Storage Router {0}".format(storage_router_to_remove.name))

            internal_memcached = Toolbox.is_service_internally_managed(service="memcached")
            internal_rabbit_mq = Toolbox.is_service_internally_managed(service="rabbitmq")
            memcached_endpoints = Configuration.get(key="/ovs/framework/memcache|endpoints")
            rabbit_mq_endpoints = Configuration.get(key="/ovs/framework/messagequeue|endpoints")
            copy_memcached_endpoints = list(memcached_endpoints)
            copy_rabbit_mq_endpoints = list(rabbit_mq_endpoints)
            for endpoint in memcached_endpoints:
                if endpoint.startswith(storage_router_to_remove.ip):
                    copy_memcached_endpoints.remove(endpoint)
            for endpoint in rabbit_mq_endpoints:
                if endpoint.startswith(storage_router_to_remove.ip):
                    copy_rabbit_mq_endpoints.remove(endpoint)
            if len(copy_memcached_endpoints) == 0 and internal_memcached is True:
                raise RuntimeError(
                    "Removal of provided nodes will result in a complete removal of the memcached service"
                )
            if len(copy_rabbit_mq_endpoints) == 0 and internal_rabbit_mq is True:
                raise RuntimeError(
                    "Removal of provided nodes will result in a complete removal of the messagequeue service"
                )
        except Exception as exception:
            Toolbox.log(
                logger=NodeRemovalController._logger, messages=[str(exception)], boxed=True, loglevel="exception"
            )
            sys.exit(1)

        #################
        # CONFIRMATIONS #
        #################
        interactive = silent != "--force-yes"
        remove_asd_manager = not interactive  # Remove ASD manager if non-interactive else ask
        if interactive is True:
            proceed = Interactive.ask_yesno(
                message="Are you sure you want to remove node {0}?".format(storage_router_to_remove.name),
                default_value=False,
            )
            if proceed is False:
                Toolbox.log(logger=NodeRemovalController._logger, messages="Abort removal", title=True)
                sys.exit(1)

            if storage_router_to_remove_online is True:
                client = SSHClient(endpoint=storage_router_to_remove, username="******")
                if ServiceManager.has_service(name="asd-manager", client=client):
                    remove_asd_manager = Interactive.ask_yesno(
                        message="Do you also want to remove the ASD manager and related ASDs?", default_value=False
                    )

            if remove_asd_manager is True or storage_router_to_remove_online is False:
                for function in Toolbox.fetch_hooks("setup", "validate_asd_removal"):
                    validation_output = function(storage_router_to_remove.ip)
                    if validation_output["confirm"] is True:
                        if Interactive.ask_yesno(message=validation_output["question"], default_value=False) is False:
                            remove_asd_manager = False
                            break

        ###########
        # REMOVAL #
        ###########
        try:
            Toolbox.log(
                logger=NodeRemovalController._logger,
                messages="Starting removal of node {0} - {1}".format(
                    storage_router_to_remove.name, storage_router_to_remove.ip
                ),
            )
            if storage_router_to_remove_online is False:
                Toolbox.log(
                    logger=NodeRemovalController._logger,
                    messages="  Marking all Storage Drivers served by Storage Router {0} as offline".format(
                        storage_router_to_remove.ip
                    ),
                )
                StorageDriverController.mark_offline(storagerouter_guid=storage_router_to_remove.guid)

            # Remove vPools
            Toolbox.log(
                logger=NodeRemovalController._logger,
                messages="  Removing vPools from node".format(storage_router_to_remove.ip),
            )
            storage_routers_offline_guids = [
                sr.guid for sr in storage_routers_offline if sr.guid != storage_router_to_remove.guid
            ]
            for storage_driver in storage_router_to_remove.storagedrivers:
                Toolbox.log(
                    logger=NodeRemovalController._logger,
                    messages="    Removing vPool {0} from node".format(storage_driver.vpool.name),
                )
                StorageRouterController.remove_storagedriver(
                    storagedriver_guid=storage_driver.guid, offline_storage_router_guids=storage_routers_offline_guids
                )

            # Demote if MASTER
            if storage_router_to_remove.node_type == "MASTER":
                NodeTypeController.demote_node(
                    cluster_ip=storage_router_to_remove.ip,
                    master_ip=master_ip,
                    ip_client_map=ip_client_map,
                    unique_id=storage_router_to_remove.machine_id,
                    unconfigure_memcached=internal_memcached,
                    unconfigure_rabbitmq=internal_rabbit_mq,
                    offline_nodes=storage_routers_offline,
                )

            # Stop / remove services
            Toolbox.log(logger=NodeRemovalController._logger, messages="Stopping and removing services")
            config_store = Configuration.get_store()
            if storage_router_to_remove_online is True:
                client = SSHClient(endpoint=storage_router_to_remove, username="******")
                NodeRemovalController.remove_services(
                    client=client,
                    node_type=storage_router_to_remove.node_type.lower(),
                    logger=NodeRemovalController._logger,
                )
                service = "watcher-config"
                if ServiceManager.has_service(service, client=client):
                    Toolbox.log(logger=NodeRemovalController._logger, messages="Removing service {0}".format(service))
                    ServiceManager.stop_service(service, client=client)
                    ServiceManager.remove_service(service, client=client)

                if config_store == "etcd":
                    from ovs.extensions.db.etcd.installer import EtcdInstaller

                    if Configuration.get(key="/ovs/framework/external_config") is None:
                        Toolbox.log(logger=NodeRemovalController._logger, messages="      Removing Etcd cluster")
                        try:
                            EtcdInstaller.stop("config", client)
                            EtcdInstaller.remove("config", client)
                        except Exception as ex:
                            Toolbox.log(
                                logger=NodeRemovalController._logger,
                                messages=["\nFailed to unconfigure Etcd", ex],
                                loglevel="exception",
                            )

                    Toolbox.log(logger=NodeRemovalController._logger, messages="Removing Etcd proxy")
                    EtcdInstaller.remove_proxy("config", client.ip)

            Toolbox.run_hooks(
                component="noderemoval",
                sub_component="remove",
                logger=NodeRemovalController._logger,
                cluster_ip=storage_router_to_remove.ip,
                complete_removal=remove_asd_manager,
            )

            # Clean up model
            Toolbox.log(logger=NodeRemovalController._logger, messages="Removing node from model")
            for service in storage_router_to_remove.services:
                service.delete()
            for disk in storage_router_to_remove.disks:
                for partition in disk.partitions:
                    partition.delete()
                disk.delete()
            for j_domain in storage_router_to_remove.domains:
                j_domain.delete()
            Configuration.delete("/ovs/framework/hosts/{0}".format(storage_router_to_remove.machine_id))

            NodeTypeController.restart_framework_and_memcache_services(
                clients=ip_client_map,
                offline_node_ips=[node.ip for node in storage_routers_offline],
                logger=NodeRemovalController._logger,
            )

            if storage_router_to_remove_online is True:
                client = SSHClient(endpoint=storage_router_to_remove, username="******")
                if config_store == "arakoon":
                    client.file_delete(filenames=[ArakoonConfiguration.CACC_LOCATION])
                client.file_delete(filenames=[Configuration.BOOTSTRAP_CONFIG_LOCATION])
            storage_router_to_remove.delete()
            Toolbox.log(logger=NodeRemovalController._logger, messages="Successfully removed node\n")
        except Exception as exception:
            Toolbox.log(logger=NodeRemovalController._logger, messages="\n")
            Toolbox.log(
                logger=NodeRemovalController._logger,
                messages=["An unexpected error occurred:", str(exception)],
                boxed=True,
                loglevel="exception",
            )
            sys.exit(1)
        except KeyboardInterrupt:
            Toolbox.log(logger=NodeRemovalController._logger, messages="\n")
            Toolbox.log(
                logger=NodeRemovalController._logger,
                messages="This setup was aborted. Open vStorage may be in an inconsistent state, make sure to validate the installation.",
                boxed=True,
                loglevel="error",
            )
            sys.exit(1)

        if remove_asd_manager is True:
            Toolbox.log(logger=NodeRemovalController._logger, messages="\nRemoving ASD Manager")
            with remote(storage_router_to_remove.ip, [os]) as rem:
                rem.os.system("asd-manager remove --force-yes")
        Toolbox.log(logger=NodeRemovalController._logger, messages="Remove nodes finished", title=True)
コード例 #22
0
ファイル: nodetype.py プロジェクト: sun363587351/framework-1
    def promote_node(cluster_ip, master_ip, ip_client_map, unique_id,
                     configure_memcached, configure_rabbitmq):
        """
        Promotes a given node
        """
        from ovs.dal.lists.storagerouterlist import StorageRouterList
        from ovs.dal.lists.servicetypelist import ServiceTypeList
        from ovs.dal.lists.servicelist import ServiceList
        from ovs.dal.hybrids.service import Service

        Toolbox.log(logger=NodeTypeController._logger,
                    messages='Promoting node',
                    title=True)
        service_manager = ServiceFactory.get_manager()
        if configure_memcached is True:
            if NodeTypeController._validate_local_memcache_servers(
                    ip_client_map) is False:
                raise RuntimeError(
                    'Not all memcache nodes can be reached which is required for promoting a node.'
                )

        target_client = ip_client_map[cluster_ip]
        machine_id = System.get_my_machine_id(target_client)
        node_name, _ = target_client.get_hostname()
        master_client = ip_client_map[master_ip]

        storagerouter = StorageRouterList.get_by_machine_id(unique_id)
        storagerouter.node_type = 'MASTER'
        storagerouter.save()

        external_config = Configuration.get('/ovs/framework/external_config')
        if external_config is None:
            Toolbox.log(logger=NodeTypeController._logger,
                        messages='Joining Arakoon configuration cluster')
            arakoon_installer = ArakoonInstaller(cluster_name='config')
            arakoon_installer.load(ip=master_ip)
            arakoon_installer.extend_cluster(
                new_ip=cluster_ip,
                base_dir=Configuration.get('/ovs/framework/paths|ovsdb'))
            arakoon_installer.restart_cluster_after_extending(
                new_ip=cluster_ip)
            service_manager.register_service(
                node_name=machine_id,
                service_metadata=arakoon_installer.service_metadata[cluster_ip]
            )

        # Find other (arakoon) master nodes
        arakoon_cluster_name = str(
            Configuration.get('/ovs/framework/arakoon_clusters|ovsdb'))
        arakoon_metadata = ArakoonInstaller.get_arakoon_metadata_by_cluster_name(
            cluster_name=arakoon_cluster_name)
        config = ArakoonClusterConfig(cluster_id=arakoon_cluster_name)
        master_node_ips = [node.ip for node in config.nodes]
        if cluster_ip in master_node_ips:
            master_node_ips.remove(cluster_ip)
        if len(master_node_ips) == 0:
            raise RuntimeError(
                'There should be at least one other master node')

        arakoon_ports = []
        if arakoon_metadata['internal'] is True:
            Toolbox.log(logger=NodeTypeController._logger,
                        messages='Joining Arakoon OVS DB cluster')
            arakoon_installer = ArakoonInstaller(
                cluster_name=arakoon_cluster_name)
            arakoon_installer.load()
            arakoon_installer.extend_cluster(
                new_ip=cluster_ip,
                base_dir=Configuration.get('/ovs/framework/paths|ovsdb'))
            arakoon_installer.restart_cluster_after_extending(
                new_ip=cluster_ip)
            arakoon_ports = arakoon_installer.ports[cluster_ip]

        if configure_memcached is True:
            NodeTypeController.configure_memcached(
                client=target_client, logger=NodeTypeController._logger)
        NodeTypeController.add_services(client=target_client,
                                        node_type='master',
                                        logger=NodeTypeController._logger)

        Toolbox.log(logger=NodeTypeController._logger,
                    messages='Update configurations')
        if configure_memcached is True:
            endpoints = Configuration.get('/ovs/framework/memcache|endpoints')
            endpoint = '{0}:11211'.format(cluster_ip)
            if endpoint not in endpoints:
                endpoints.append(endpoint)
                Configuration.set('/ovs/framework/memcache|endpoints',
                                  endpoints)
        if configure_rabbitmq is True:
            endpoints = Configuration.get(
                '/ovs/framework/messagequeue|endpoints')
            endpoint = '{0}:5672'.format(cluster_ip)
            if endpoint not in endpoints:
                endpoints.append(endpoint)
                Configuration.set('/ovs/framework/messagequeue|endpoints',
                                  endpoints)

        if arakoon_metadata['internal'] is True:
            Toolbox.log(logger=NodeTypeController._logger,
                        messages='Restarting master node services')
            PersistentFactory.store = None
            VolatileFactory.store = None

            if 'arakoon-ovsdb' not in [
                    s.name for s in ServiceList.get_services() if
                    s.is_internal is False or s.storagerouter.ip == cluster_ip
            ]:
                service = Service()
                service.name = 'arakoon-ovsdb'
                service.type = ServiceTypeList.get_by_name(
                    ServiceType.SERVICE_TYPES.ARAKOON)
                service.ports = arakoon_ports
                service.storagerouter = storagerouter
                service.save()

        if configure_rabbitmq is True:
            NodeTypeController.configure_rabbitmq(
                client=target_client, logger=NodeTypeController._logger)
            # Copy rabbitmq cookie
            rabbitmq_cookie_file = '/var/lib/rabbitmq/.erlang.cookie'

            Toolbox.log(logger=NodeTypeController._logger,
                        messages='Copying RabbitMQ cookie')
            contents = master_client.file_read(rabbitmq_cookie_file)
            master_hostname, _ = master_client.get_hostname()
            target_client.dir_create(os.path.dirname(rabbitmq_cookie_file))
            target_client.file_write(rabbitmq_cookie_file, contents)
            target_client.file_chmod(rabbitmq_cookie_file, mode=0400)
            target_client.run(['rabbitmq-server', '-detached'])
            time.sleep(5)
            target_client.run(['rabbitmqctl', 'stop_app'])
            time.sleep(5)
            target_client.run([
                'rabbitmqctl', 'join_cluster',
                'rabbit@{0}'.format(master_hostname)
            ])
            time.sleep(5)
            target_client.run(['rabbitmqctl', 'stop'])
            time.sleep(5)

            # Enable HA for the rabbitMQ queues
            ServiceFactory.change_service_state(target_client,
                                                'rabbitmq-server', 'start',
                                                NodeTypeController._logger)
            NodeTypeController.check_rabbitmq_and_enable_ha_mode(
                client=target_client, logger=NodeTypeController._logger)

        NodeTypeController._configure_amqp_to_volumedriver()

        Toolbox.log(logger=NodeTypeController._logger,
                    messages='Starting services')
        services = ['memcached', 'arakoon-ovsdb', 'rabbitmq-server']
        if arakoon_metadata['internal'] is True:
            services.remove('arakoon-ovsdb')
        for service in services:
            if service_manager.has_service(service, client=target_client):
                ServiceFactory.change_service_state(target_client, service,
                                                    'start',
                                                    NodeTypeController._logger)

        Toolbox.log(logger=NodeTypeController._logger,
                    messages='Restarting services')
        NodeTypeController.restart_framework_and_memcache_services(
            clients=ip_client_map, logger=NodeTypeController._logger)

        if Toolbox.run_hooks(component='nodetype',
                             sub_component='promote',
                             logger=NodeTypeController._logger,
                             cluster_ip=cluster_ip,
                             master_ip=master_ip):
            Toolbox.log(logger=NodeTypeController._logger,
                        messages='Restarting services')
            NodeTypeController.restart_framework_and_memcache_services(
                clients=ip_client_map, logger=NodeTypeController._logger)

        if NodeTypeController.avahi_installed(
                client=target_client,
                logger=NodeTypeController._logger) is True:
            NodeTypeController.configure_avahi(
                client=target_client,
                node_name=node_name,
                node_type='master',
                logger=NodeTypeController._logger)
        Configuration.set('/ovs/framework/hosts/{0}/type'.format(machine_id),
                          'MASTER')
        target_client.run(
            ['chown', '-R', 'ovs:ovs', '/opt/OpenvStorage/config'])
        Configuration.set(
            '/ovs/framework/hosts/{0}/promotecompleted'.format(machine_id),
            True)

        if target_client.file_exists('/tmp/ovs_rollback'):
            target_client.file_delete('/tmp/ovs_rollback')

        Toolbox.log(logger=NodeTypeController._logger,
                    messages='Promote complete')
コード例 #23
0
    def remove_node(node_ip, silent=None):
        """
        Remove the node with specified IP from the cluster
        :param node_ip: IP of the node to remove
        :type node_ip: str
        :param silent: If silent == '--force-yes' no question will be asked to confirm the removal
        :type silent: str
        :return: None
        """
        from ovs.dal.lists.storagerouterlist import StorageRouterList
        from ovs.lib.storagedriver import StorageDriverController
        from ovs.lib.vpool import VPoolController

        Toolbox.log(logger=NodeRemovalController._logger,
                    messages='Remove node',
                    boxed=True)
        Toolbox.log(
            logger=NodeRemovalController._logger,
            messages=
            'WARNING: Some of these steps may take a very long time, please check the logs for more information\n\n'
        )
        service_manager = ServiceFactory.get_manager()

        ###############
        # VALIDATIONS #
        ###############
        try:
            node_ip = node_ip.strip()
            if not isinstance(node_ip, str):
                raise ValueError('Node IP must be a string')
            if not re.match(SSHClient.IP_REGEX, node_ip):
                raise ValueError('Invalid IP {0} specified'.format(node_ip))

            storage_router_all = sorted(StorageRouterList.get_storagerouters(),
                                        key=lambda k: k.name)
            storage_router_masters = StorageRouterList.get_masters()
            storage_router_all_ips = set(
                [storage_router.ip for storage_router in storage_router_all])
            storage_router_master_ips = set([
                storage_router.ip for storage_router in storage_router_masters
            ])
            storage_router_to_remove = StorageRouterList.get_by_ip(node_ip)
            offline_reasons = {}
            if node_ip not in storage_router_all_ips:
                raise ValueError(
                    'Unknown IP specified\nKnown in model:\n - {0}\nSpecified for removal:\n - {1}'
                    .format('\n - '.join(storage_router_all_ips), node_ip))

            if len(storage_router_all_ips) == 1:
                raise RuntimeError("Removing the only node is not possible")

            if node_ip in storage_router_master_ips and len(
                    storage_router_master_ips) == 1:
                raise RuntimeError(
                    "Removing the only master node is not possible")

            if System.get_my_storagerouter() == storage_router_to_remove:
                raise RuntimeError(
                    'The node to be removed cannot be identical to the node on which the removal is initiated'
                )

            Toolbox.log(
                logger=NodeRemovalController._logger,
                messages='Creating SSH connections to remaining master nodes')
            master_ip = None
            ip_client_map = {}
            storage_routers_offline = []
            storage_router_to_remove_online = True
            for storage_router in storage_router_all:
                try:
                    client = SSHClient(storage_router,
                                       username='******',
                                       timeout=10)
                except (UnableToConnectException, NotAuthenticatedException,
                        TimeOutException) as ex:
                    if isinstance(ex, UnableToConnectException):
                        msg = 'Unable to connect'
                    elif isinstance(ex, NotAuthenticatedException):
                        msg = 'Could not authenticate'
                    elif isinstance(ex, TimeOutException):
                        msg = 'Connection timed out'
                    Toolbox.log(
                        logger=NodeRemovalController._logger,
                        messages='  * Node with IP {0:<15}- {1}'.format(
                            storage_router.ip, msg))
                    offline_reasons[storage_router.ip] = msg
                    storage_routers_offline.append(storage_router)
                    if storage_router == storage_router_to_remove:
                        storage_router_to_remove_online = False
                    continue

                Toolbox.log(
                    logger=NodeRemovalController._logger,
                    messages='  * Node with IP {0:<15}- Successfully connected'
                    .format(storage_router.ip))
                ip_client_map[storage_router.ip] = client
                if storage_router != storage_router_to_remove and storage_router.node_type == 'MASTER':
                    master_ip = storage_router.ip

            if len(ip_client_map) == 0 or master_ip is None:
                raise RuntimeError(
                    'Could not connect to any master node in the cluster')

            storage_router_to_remove.invalidate_dynamics('vdisks_guids')
            if len(
                    storage_router_to_remove.vdisks_guids
            ) > 0:  # vDisks are supposed to be moved away manually before removing a node
                raise RuntimeError(
                    "Still vDisks attached to Storage Router {0}".format(
                        storage_router_to_remove.name))

            internal_memcached = Toolbox.is_service_internally_managed(
                service='memcached')
            internal_rabbit_mq = Toolbox.is_service_internally_managed(
                service='rabbitmq')
            memcached_endpoints = Configuration.get(
                key='/ovs/framework/memcache|endpoints')
            rabbit_mq_endpoints = Configuration.get(
                key='/ovs/framework/messagequeue|endpoints')
            copy_memcached_endpoints = list(memcached_endpoints)
            copy_rabbit_mq_endpoints = list(rabbit_mq_endpoints)
            for endpoint in memcached_endpoints:
                if endpoint.startswith(storage_router_to_remove.ip):
                    copy_memcached_endpoints.remove(endpoint)
            for endpoint in rabbit_mq_endpoints:
                if endpoint.startswith(storage_router_to_remove.ip):
                    copy_rabbit_mq_endpoints.remove(endpoint)
            if len(copy_memcached_endpoints
                   ) == 0 and internal_memcached is True:
                raise RuntimeError(
                    'Removal of provided nodes will result in a complete removal of the memcached service'
                )
            if len(copy_rabbit_mq_endpoints
                   ) == 0 and internal_rabbit_mq is True:
                raise RuntimeError(
                    'Removal of provided nodes will result in a complete removal of the messagequeue service'
                )

            Toolbox.run_hooks(component='noderemoval',
                              sub_component='validate_removal',
                              logger=NodeRemovalController._logger,
                              cluster_ip=storage_router_to_remove.ip)
        except KeyboardInterrupt:
            Toolbox.log(logger=NodeRemovalController._logger, messages='\n')
            Toolbox.log(
                logger=NodeRemovalController._logger,
                messages=
                'Removal has been aborted during the validation step. No changes have been applied.',
                boxed=True,
                loglevel='warning')
            sys.exit(1)
        except Exception as exception:
            Toolbox.log(logger=NodeRemovalController._logger,
                        messages=[str(exception)],
                        boxed=True,
                        loglevel='exception')
            sys.exit(1)

        #################
        # CONFIRMATIONS #
        #################
        try:
            interactive = silent != '--force-yes'
            remove_asd_manager = not interactive  # Remove ASD manager if non-interactive else ask
            if interactive is True:
                if len(storage_routers_offline) > 0:
                    Toolbox.log(
                        logger=NodeRemovalController._logger,
                        messages=
                        'Certain nodes appear to be offline. These will not fully removed and will cause issues if they are not really offline.'
                    )
                    Toolbox.log(
                        logger=NodeRemovalController._logger,
                        messages='Offline nodes: {0}'.format(''.join(
                            ('\n  * {0:<15}- {1}.'.format(ip, message)
                             for ip, message in offline_reasons.iteritems()))))
                    valid_node_info = Interactive.ask_yesno(
                        message=
                        'Continue the removal with these being presumably offline?',
                        default_value=False)
                    if valid_node_info is False:
                        Toolbox.log(
                            logger=NodeRemovalController._logger,
                            messages=
                            'Please validate the state of the nodes before removing.',
                            title=True)
                        sys.exit(1)
                proceed = Interactive.ask_yesno(
                    message='Are you sure you want to remove node {0}?'.format(
                        storage_router_to_remove.name),
                    default_value=False)
                if proceed is False:
                    Toolbox.log(logger=NodeRemovalController._logger,
                                messages='Abort removal',
                                title=True)
                    sys.exit(1)

                remove_asd_manager = True
                if storage_router_to_remove_online is True:
                    client = SSHClient(endpoint=storage_router_to_remove,
                                       username='******')
                    if service_manager.has_service(name='asd-manager',
                                                   client=client):
                        remove_asd_manager = Interactive.ask_yesno(
                            message=
                            'Do you also want to remove the ASD manager and related ASDs?',
                            default_value=False)

                if remove_asd_manager is True or storage_router_to_remove_online is False:
                    for fct in Toolbox.fetch_hooks('noderemoval',
                                                   'validate_asd_removal'):
                        validation_output = fct(storage_router_to_remove.ip)
                        if validation_output['confirm'] is True:
                            if Interactive.ask_yesno(
                                    message=validation_output['question'],
                                    default_value=False) is False:
                                remove_asd_manager = False
                                break
        except KeyboardInterrupt:
            Toolbox.log(logger=NodeRemovalController._logger, messages='\n')
            Toolbox.log(
                logger=NodeRemovalController._logger,
                messages=
                'Removal has been aborted during the confirmation step. No changes have been applied.',
                boxed=True,
                loglevel='warning')
            sys.exit(1)
        except Exception as exception:
            Toolbox.log(logger=NodeRemovalController._logger,
                        messages=[str(exception)],
                        boxed=True,
                        loglevel='exception')
            sys.exit(1)
        ###########
        # REMOVAL #
        ###########
        try:
            Toolbox.log(logger=NodeRemovalController._logger,
                        messages='Starting removal of node {0} - {1}'.format(
                            storage_router_to_remove.name,
                            storage_router_to_remove.ip))
            if storage_router_to_remove_online is False:
                Toolbox.log(
                    logger=NodeRemovalController._logger,
                    messages=
                    '  Marking all Storage Drivers served by Storage Router {0} as offline'
                    .format(storage_router_to_remove.ip))
                StorageDriverController.mark_offline(
                    storagerouter_guid=storage_router_to_remove.guid)

            # Remove vPools
            Toolbox.log(logger=NodeRemovalController._logger,
                        messages='  Removing vPools from node'.format(
                            storage_router_to_remove.ip))
            storage_routers_offline_guids = [
                sr.guid for sr in storage_routers_offline
                if sr.guid != storage_router_to_remove.guid
            ]
            for storage_driver in storage_router_to_remove.storagedrivers:
                Toolbox.log(logger=NodeRemovalController._logger,
                            messages='    Removing vPool {0} from node'.format(
                                storage_driver.vpool.name))
                VPoolController.shrink_vpool(
                    storagedriver_guid=storage_driver.guid,
                    offline_storage_router_guids=storage_routers_offline_guids)

            # Demote if MASTER
            if storage_router_to_remove.node_type == 'MASTER':
                NodeTypeController.demote_node(
                    cluster_ip=storage_router_to_remove.ip,
                    master_ip=master_ip,
                    ip_client_map=ip_client_map,
                    unique_id=storage_router_to_remove.machine_id,
                    unconfigure_memcached=internal_memcached,
                    unconfigure_rabbitmq=internal_rabbit_mq,
                    offline_nodes=storage_routers_offline)

            # Stop / remove services
            Toolbox.log(logger=NodeRemovalController._logger,
                        messages='Stopping and removing services')
            if storage_router_to_remove_online is True:
                client = SSHClient(endpoint=storage_router_to_remove,
                                   username='******')
                NodeRemovalController.remove_services(
                    client=client,
                    node_type=storage_router_to_remove.node_type.lower(),
                    logger=NodeRemovalController._logger)
                service = 'watcher-config'
                if service_manager.has_service(service, client=client):
                    Toolbox.log(
                        logger=NodeRemovalController._logger,
                        messages='Removing service {0}'.format(service))
                    service_manager.stop_service(service, client=client)
                    service_manager.remove_service(service, client=client)

            Toolbox.run_hooks(component='noderemoval',
                              sub_component='remove',
                              logger=NodeRemovalController._logger,
                              cluster_ip=storage_router_to_remove.ip,
                              complete_removal=remove_asd_manager)

            # Clean up model
            Toolbox.log(logger=NodeRemovalController._logger,
                        messages='Removing node from model')
            for service in storage_router_to_remove.services:
                service.delete()
            for disk in storage_router_to_remove.disks:
                for partition in disk.partitions:
                    partition.delete()
                disk.delete()
            for j_domain in storage_router_to_remove.domains:
                j_domain.delete()
            Configuration.delete('/ovs/framework/hosts/{0}'.format(
                storage_router_to_remove.machine_id))

            NodeTypeController.restart_framework_and_memcache_services(
                clients=ip_client_map,
                offline_node_ips=[node.ip for node in storage_routers_offline],
                logger=NodeRemovalController._logger)

            if storage_router_to_remove_online is True:
                client = SSHClient(endpoint=storage_router_to_remove,
                                   username='******')
                client.file_delete(filenames=[CACC_LOCATION])
                client.file_delete(filenames=[CONFIG_STORE_LOCATION])
            storage_router_to_remove.delete()
            Toolbox.log(logger=NodeRemovalController._logger,
                        messages='Successfully removed node\n')
        except Exception as exception:
            Toolbox.log(logger=NodeRemovalController._logger, messages='\n')
            Toolbox.log(
                logger=NodeRemovalController._logger,
                messages=['An unexpected error occurred:',
                          str(exception)],
                boxed=True,
                loglevel='exception')
            sys.exit(1)
        except KeyboardInterrupt:
            Toolbox.log(logger=NodeRemovalController._logger, messages='\n')
            Toolbox.log(
                logger=NodeRemovalController._logger,
                messages=
                'This setup was aborted. Open vStorage may be in an inconsistent state, make sure to validate the installation.',
                boxed=True,
                loglevel='error')
            sys.exit(1)

        if remove_asd_manager is True and storage_router_to_remove_online is True:
            Toolbox.log(logger=NodeRemovalController._logger,
                        messages='\nRemoving ASD Manager')
            with remote(storage_router_to_remove.ip, [os]) as rem:
                rem.os.system('asd-manager remove --force-yes')
        Toolbox.log(logger=NodeRemovalController._logger,
                    messages='Remove nodes finished',
                    title=True)
コード例 #24
0
ファイル: nodetype.py プロジェクト: openvstorage/framework
    def promote_node(cluster_ip, master_ip, ip_client_map, unique_id, configure_memcached, configure_rabbitmq):
        """
        Promotes a given node
        """
        from ovs.dal.lists.storagerouterlist import StorageRouterList
        from ovs.dal.lists.servicetypelist import ServiceTypeList
        from ovs.dal.lists.servicelist import ServiceList
        from ovs.dal.hybrids.service import Service

        Toolbox.log(logger=NodeTypeController._logger, messages='Promoting node', title=True)
        if configure_memcached is True:
            if NodeTypeController._validate_local_memcache_servers(ip_client_map) is False:
                raise RuntimeError('Not all memcache nodes can be reached which is required for promoting a node.')

        target_client = ip_client_map[cluster_ip]
        machine_id = System.get_my_machine_id(target_client)
        node_name, _ = target_client.get_hostname()
        master_client = ip_client_map[master_ip]

        storagerouter = StorageRouterList.get_by_machine_id(unique_id)
        storagerouter.node_type = 'MASTER'
        storagerouter.save()

        external_config = Configuration.get('/ovs/framework/external_config')
        if external_config is None:
            config_store = Configuration.get_store()
            if config_store == 'arakoon':
                Toolbox.log(logger=NodeTypeController._logger, messages='Joining Arakoon configuration cluster')
                metadata = ArakoonInstaller.extend_cluster(master_ip=master_ip,
                                                           new_ip=cluster_ip,
                                                           cluster_name='config',
                                                           base_dir=Configuration.get('/ovs/framework/paths|ovsdb'),
                                                           ports=[26400, 26401],
                                                           filesystem=True)
                ArakoonInstaller.restart_cluster_add(cluster_name='config',
                                                     current_ips=metadata['ips'],
                                                     new_ip=cluster_ip,
                                                     filesystem=True)
                ServiceManager.register_service(node_name=machine_id,
                                                service_metadata=metadata['service_metadata'])
            else:
                from ovs.extensions.db.etcd.installer import EtcdInstaller
                Toolbox.log(logger=NodeTypeController._logger, messages='Joining Etcd cluster')
                EtcdInstaller.extend_cluster(master_ip, cluster_ip, 'config')

        # Find other (arakoon) master nodes
        arakoon_cluster_name = str(Configuration.get('/ovs/framework/arakoon_clusters|ovsdb'))
        arakoon_metadata = ArakoonInstaller.get_arakoon_metadata_by_cluster_name(cluster_name=arakoon_cluster_name)
        config = ArakoonClusterConfig(cluster_id=arakoon_cluster_name, filesystem=False)
        config.load_config()
        master_node_ips = [node.ip for node in config.nodes]
        if cluster_ip in master_node_ips:
            master_node_ips.remove(cluster_ip)
        if len(master_node_ips) == 0:
            raise RuntimeError('There should be at least one other master node')

        arakoon_ports = []
        if arakoon_metadata['internal'] is True:
            Toolbox.log(logger=NodeTypeController._logger, messages='Joining Arakoon OVS DB cluster')
            result = ArakoonInstaller.extend_cluster(master_ip=master_ip,
                                                     new_ip=cluster_ip,
                                                     cluster_name=arakoon_cluster_name,
                                                     base_dir=Configuration.get('/ovs/framework/paths|ovsdb'))
            ArakoonInstaller.restart_cluster_add(cluster_name=arakoon_cluster_name,
                                                 current_ips=result['ips'],
                                                 new_ip=cluster_ip, filesystem=False)
            arakoon_ports = [result['client_port'], result['messaging_port']]

        if configure_memcached is True:
            NodeTypeController.configure_memcached(client=target_client, logger=NodeTypeController._logger)
        NodeTypeController.add_services(client=target_client, node_type='master', logger=NodeTypeController._logger)

        Toolbox.log(logger=NodeTypeController._logger, messages='Update configurations')
        if configure_memcached is True:
            endpoints = Configuration.get('/ovs/framework/memcache|endpoints')
            endpoint = '{0}:11211'.format(cluster_ip)
            if endpoint not in endpoints:
                endpoints.append(endpoint)
                Configuration.set('/ovs/framework/memcache|endpoints', endpoints)
        if configure_rabbitmq is True:
            endpoints = Configuration.get('/ovs/framework/messagequeue|endpoints')
            endpoint = '{0}:5672'.format(cluster_ip)
            if endpoint not in endpoints:
                endpoints.append(endpoint)
                Configuration.set('/ovs/framework/messagequeue|endpoints', endpoints)

        if arakoon_metadata['internal'] is True:
            Toolbox.log(logger=NodeTypeController._logger, messages='Restarting master node services')
            ArakoonInstaller.restart_cluster_add(cluster_name=arakoon_cluster_name,
                                                 current_ips=master_node_ips,
                                                 new_ip=cluster_ip,
                                                 filesystem=False)
            PersistentFactory.store = None
            VolatileFactory.store = None

            if 'arakoon-ovsdb' not in [s.name for s in ServiceList.get_services() if s.is_internal is False or s.storagerouter.ip == cluster_ip]:
                service = Service()
                service.name = 'arakoon-ovsdb'
                service.type = ServiceTypeList.get_by_name(ServiceType.SERVICE_TYPES.ARAKOON)
                service.ports = arakoon_ports
                service.storagerouter = storagerouter
                service.save()

        if configure_rabbitmq is True:
            NodeTypeController.configure_rabbitmq(client=target_client, logger=NodeTypeController._logger)
            # Copy rabbitmq cookie
            rabbitmq_cookie_file = '/var/lib/rabbitmq/.erlang.cookie'

            Toolbox.log(logger=NodeTypeController._logger, messages='Copying Rabbit MQ cookie')
            contents = master_client.file_read(rabbitmq_cookie_file)
            master_hostname, _ = master_client.get_hostname()
            target_client.dir_create(os.path.dirname(rabbitmq_cookie_file))
            target_client.file_write(rabbitmq_cookie_file, contents)
            target_client.file_chmod(rabbitmq_cookie_file, mode=400)
            target_client.run(['rabbitmq-server', '-detached'])
            time.sleep(5)
            target_client.run(['rabbitmqctl', 'stop_app'])
            time.sleep(5)
            target_client.run(['rabbitmqctl', 'join_cluster', 'rabbit@{0}'.format(master_hostname)])
            time.sleep(5)
            target_client.run(['rabbitmqctl', 'stop'])
            time.sleep(5)

            # Enable HA for the rabbitMQ queues
            Toolbox.change_service_state(target_client, 'rabbitmq-server', 'start', NodeTypeController._logger)
            NodeTypeController.check_rabbitmq_and_enable_ha_mode(client=target_client, logger=NodeTypeController._logger)

        NodeTypeController._configure_amqp_to_volumedriver()

        Toolbox.log(logger=NodeTypeController._logger, messages='Starting services')
        services = ['memcached', 'arakoon-ovsdb', 'rabbitmq-server', 'etcd-config']
        if arakoon_metadata['internal'] is True:
            services.remove('arakoon-ovsdb')
        for service in services:
            if ServiceManager.has_service(service, client=target_client):
                Toolbox.change_service_state(target_client, service, 'start', NodeTypeController._logger)

        Toolbox.log(logger=NodeTypeController._logger, messages='Restarting services')
        NodeTypeController.restart_framework_and_memcache_services(clients=ip_client_map, logger=NodeTypeController._logger)

        if Toolbox.run_hooks(component='nodetype',
                             sub_component='promote',
                             logger=NodeTypeController._logger,
                             cluster_ip=cluster_ip,
                             master_ip=master_ip):
            Toolbox.log(logger=NodeTypeController._logger, messages='Restarting services')
            NodeTypeController.restart_framework_and_memcache_services(clients=ip_client_map, logger=NodeTypeController._logger)

        if NodeTypeController.avahi_installed(client=target_client, logger=NodeTypeController._logger) is True:
            NodeTypeController.configure_avahi(client=target_client, node_name=node_name, node_type='master', logger=NodeTypeController._logger)
        Configuration.set('/ovs/framework/hosts/{0}/type'.format(machine_id), 'MASTER')
        target_client.run(['chown', '-R', 'ovs:ovs', '/opt/OpenvStorage/config'])
        Configuration.set('/ovs/framework/hosts/{0}/promotecompleted'.format(machine_id), True)

        if target_client.file_exists('/tmp/ovs_rollback'):
            target_client.file_delete('/tmp/ovs_rollback')

        Toolbox.log(logger=NodeTypeController._logger, messages='Promote complete')