コード例 #1
0
 def _on_demote(cluster_ip, master_ip, offline_node_ips=None):
     """
     Handles the demote for the StorageDrivers
     :param cluster_ip: IP of the node to demote
     :type cluster_ip: str
     :param master_ip: IP of the master node
     :type master_ip: str
     :param offline_node_ips: IPs of nodes which are offline
     :type offline_node_ips: list
     :return: None
     """
     _ = master_ip
     if offline_node_ips is None:
         offline_node_ips = []
     servicetype = ServiceTypeList.get_by_name(
         ServiceType.SERVICE_TYPES.ARAKOON)
     current_service = None
     remaining_ips = []
     for service in servicetype.services:
         if service.name == 'arakoon-voldrv' and service.is_internal is True:  # Externally managed arakoon cluster services do not have StorageRouters
             if service.storagerouter.ip == cluster_ip:
                 current_service = service
             elif service.storagerouter.ip not in offline_node_ips:
                 remaining_ips.append(service.storagerouter.ip)
     if current_service is not None:
         if len(remaining_ips) == 0:
             raise RuntimeError(
                 'Could not find any remaining arakoon nodes for the voldrv cluster'
             )
         StorageDriverController._logger.debug(
             '* Shrink StorageDriver cluster')
         cluster_name = str(
             Configuration.get('/ovs/framework/arakoon_clusters|voldrv'))
         arakoon_installer = ArakoonInstaller(cluster_name=cluster_name)
         arakoon_installer.load()
         arakoon_installer.shrink_cluster(removal_ip=cluster_ip,
                                          offline_nodes=offline_node_ips)
         arakoon_installer.restart_cluster_after_shrinking()
         current_service.delete()
         StorageDriverController._configure_arakoon_to_volumedriver(
             cluster_name=cluster_name)
コード例 #2
0
ファイル: nodetype.py プロジェクト: sun363587351/framework-1
    def demote_node(cluster_ip,
                    master_ip,
                    ip_client_map,
                    unique_id,
                    unconfigure_memcached,
                    unconfigure_rabbitmq,
                    offline_nodes=None):
        """
        Demotes a given node
        """
        from ovs.dal.lists.storagerouterlist import StorageRouterList

        Toolbox.log(logger=NodeTypeController._logger,
                    messages='Demoting node',
                    title=True)
        service_manager = ServiceFactory.get_manager()
        if offline_nodes is None:
            offline_nodes = []

        if unconfigure_memcached is True and len(offline_nodes) == 0:
            if NodeTypeController._validate_local_memcache_servers(
                    ip_client_map) is False:
                raise RuntimeError(
                    'Not all memcache nodes can be reached which is required for demoting a node.'
                )

        # Find other (arakoon) master nodes
        arakoon_cluster_name = str(
            Configuration.get('/ovs/framework/arakoon_clusters|ovsdb'))
        arakoon_metadata = ArakoonInstaller.get_arakoon_metadata_by_cluster_name(
            cluster_name=arakoon_cluster_name)
        config = ArakoonClusterConfig(cluster_id=arakoon_cluster_name)
        master_node_ips = [node.ip for node in config.nodes]
        shrink = False
        if cluster_ip in master_node_ips:
            shrink = True
            master_node_ips.remove(cluster_ip)
        if len(master_node_ips) == 0:
            raise RuntimeError(
                'There should be at least one other master node')

        storagerouter = StorageRouterList.get_by_machine_id(unique_id)
        storagerouter.node_type = 'EXTRA'
        storagerouter.save()

        offline_node_ips = [node.ip for node in offline_nodes]
        if arakoon_metadata['internal'] is True and shrink is True:
            Toolbox.log(logger=NodeTypeController._logger,
                        messages='Leaving Arakoon {0} cluster'.format(
                            arakoon_cluster_name))
            arakoon_installer = ArakoonInstaller(
                cluster_name=arakoon_cluster_name)
            arakoon_installer.load()
            arakoon_installer.shrink_cluster(removal_ip=cluster_ip,
                                             offline_nodes=offline_node_ips)
            arakoon_installer.restart_cluster_after_shrinking()
        try:
            external_config = Configuration.get(
                '/ovs/framework/external_config')
            if external_config is None and shrink is True:
                Toolbox.log(logger=NodeTypeController._logger,
                            messages='Leaving Arakoon config cluster')
                arakoon_installer = ArakoonInstaller(cluster_name='config')
                arakoon_installer.load(ip=master_node_ips[0])
                arakoon_installer.shrink_cluster(
                    removal_ip=cluster_ip, offline_nodes=offline_node_ips)
                arakoon_installer.restart_cluster_after_shrinking()
        except Exception as ex:
            Toolbox.log(
                logger=NodeTypeController._logger,
                messages=['\nFailed to leave configuration cluster', ex],
                loglevel='exception')

        Toolbox.log(logger=NodeTypeController._logger,
                    messages='Update configurations')
        try:
            if unconfigure_memcached is True:
                endpoints = Configuration.get(
                    '/ovs/framework/memcache|endpoints')
                endpoint = '{0}:{1}'.format(cluster_ip, 11211)
                if endpoint in endpoints:
                    endpoints.remove(endpoint)
                Configuration.set('/ovs/framework/memcache|endpoints',
                                  endpoints)
            if unconfigure_rabbitmq is True:
                endpoints = Configuration.get(
                    '/ovs/framework/messagequeue|endpoints')
                endpoint = '{0}:{1}'.format(cluster_ip, 5672)
                if endpoint in endpoints:
                    endpoints.remove(endpoint)
                Configuration.set('/ovs/framework/messagequeue|endpoints',
                                  endpoints)
        except Exception as ex:
            Toolbox.log(logger=NodeTypeController._logger,
                        messages=['\nFailed to update configurations', ex],
                        loglevel='exception')

        if arakoon_metadata['internal'] is True:
            Toolbox.log(logger=NodeTypeController._logger,
                        messages='Restarting master node services')
            remaining_nodes = ip_client_map.keys()[:]
            if cluster_ip in remaining_nodes:
                remaining_nodes.remove(cluster_ip)

            PersistentFactory.store = None
            VolatileFactory.store = None

            for service in storagerouter.services:
                if service.name == 'arakoon-ovsdb':
                    service.delete()

        target_client = None
        if storagerouter in offline_nodes:
            if unconfigure_rabbitmq is True:
                Toolbox.log(
                    logger=NodeTypeController._logger,
                    messages='Removing/unconfiguring offline RabbitMQ node')
                client = ip_client_map[master_ip]
                try:
                    client.run([
                        'rabbitmqctl', 'forget_cluster_node',
                        'rabbit@{0}'.format(storagerouter.name)
                    ])
                except Exception as ex:
                    Toolbox.log(logger=NodeTypeController._logger,
                                messages=[
                                    '\nFailed to forget RabbitMQ cluster node',
                                    ex
                                ],
                                loglevel='exception')
        else:
            target_client = ip_client_map[cluster_ip]
            if unconfigure_rabbitmq is True:
                Toolbox.log(logger=NodeTypeController._logger,
                            messages='Removing/unconfiguring RabbitMQ')
                try:
                    if service_manager.has_service('rabbitmq-server',
                                                   client=target_client):
                        ServiceFactory.change_service_state(
                            target_client, 'rabbitmq-server', 'stop',
                            NodeTypeController._logger)
                        target_client.run(['rabbitmq-server', '-detached'])
                        time.sleep(5)
                        target_client.run(['rabbitmqctl', 'stop_app'])
                        time.sleep(5)
                        target_client.run(['rabbitmqctl', 'reset'])
                        time.sleep(5)
                        target_client.run(['rabbitmqctl', 'stop'])
                        time.sleep(5)
                        target_client.file_unlink(
                            "/var/lib/rabbitmq/.erlang.cookie")
                        ServiceFactory.change_service_state(
                            target_client, 'rabbitmq-server', 'stop',
                            NodeTypeController._logger)  # To be sure
                except Exception as ex:
                    Toolbox.log(logger=NodeTypeController._logger,
                                messages=[
                                    '\nFailed to remove/unconfigure RabbitMQ',
                                    ex
                                ],
                                loglevel='exception')

            Toolbox.log(logger=NodeTypeController._logger,
                        messages='Stopping services')
            services = ['memcached', 'rabbitmq-server']
            if unconfigure_rabbitmq is False:
                services.remove('rabbitmq-server')
            if unconfigure_memcached is False:
                services.remove('memcached')
            for service in services:
                if service_manager.has_service(service, client=target_client):
                    Toolbox.log(
                        logger=NodeTypeController._logger,
                        messages='Stopping service {0}'.format(service))
                    try:
                        ServiceFactory.change_service_state(
                            target_client, service, 'stop',
                            NodeTypeController._logger)
                    except Exception as ex:
                        Toolbox.log(
                            logger=NodeTypeController._logger,
                            messages=[
                                '\nFailed to stop service'.format(service), ex
                            ],
                            loglevel='exception')

            Toolbox.log(logger=NodeTypeController._logger,
                        messages='Removing services')
            services = [
                'scheduled-tasks', 'webapp-api', 'volumerouter-consumer'
            ]
            for service in services:
                if service_manager.has_service(service, client=target_client):
                    Toolbox.log(
                        logger=NodeTypeController._logger,
                        messages='Removing service {0}'.format(service))
                    try:
                        ServiceFactory.change_service_state(
                            target_client, service, 'stop',
                            NodeTypeController._logger)
                        service_manager.remove_service(service,
                                                       client=target_client)
                    except Exception as ex:
                        Toolbox.log(
                            logger=NodeTypeController._logger,
                            messages=[
                                '\nFailed to remove service'.format(service),
                                ex
                            ],
                            loglevel='exception')

            if service_manager.has_service('workers', client=target_client):
                service_manager.add_service(
                    name='workers',
                    client=target_client,
                    params={'WORKER_QUEUE': '{0}'.format(unique_id)})
        try:
            NodeTypeController._configure_amqp_to_volumedriver()
        except Exception as ex:
            Toolbox.log(
                logger=NodeTypeController._logger,
                messages=['\nFailed to configure AMQP to Storage Driver', ex],
                loglevel='exception')

        Toolbox.log(logger=NodeTypeController._logger,
                    messages='Restarting services')
        NodeTypeController.restart_framework_and_memcache_services(
            clients=ip_client_map,
            logger=NodeTypeController._logger,
            offline_node_ips=offline_node_ips)

        if Toolbox.run_hooks(component='nodetype',
                             sub_component='demote',
                             logger=NodeTypeController._logger,
                             cluster_ip=cluster_ip,
                             master_ip=master_ip,
                             offline_node_ips=offline_node_ips):
            Toolbox.log(logger=NodeTypeController._logger,
                        messages='Restarting services')
            NodeTypeController.restart_framework_and_memcache_services(
                clients=ip_client_map,
                logger=NodeTypeController._logger,
                offline_node_ips=offline_node_ips)

        if storagerouter not in offline_nodes:
            target_client = ip_client_map[cluster_ip]
            node_name, _ = target_client.get_hostname()
            if NodeTypeController.avahi_installed(
                    client=target_client,
                    logger=NodeTypeController._logger) is True:
                NodeTypeController.configure_avahi(
                    client=target_client,
                    node_name=node_name,
                    node_type='extra',
                    logger=NodeTypeController._logger)
        Configuration.set(
            '/ovs/framework/hosts/{0}/type'.format(storagerouter.machine_id),
            'EXTRA')

        if target_client is not None and target_client.file_exists(
                '/tmp/ovs_rollback'):
            target_client.file_write('/tmp/ovs_rollback', 'rollback')

        Toolbox.log(logger=NodeTypeController._logger,
                    messages='Demote complete',
                    title=True)