コード例 #1
0
 def _configure_arakoon_to_volumedriver(offline_node_ips=None):
     print 'Update existing vPools'
     logger.info('Update existing vPools')
     if offline_node_ips is None:
         offline_node_ips = []
     for storagerouter in StorageRouterList.get_storagerouters():
         config = ArakoonClusterConfig('voldrv')
         config.load_config()
         arakoon_nodes = []
         for node in config.nodes:
             arakoon_nodes.append({'host': node.ip,
                                   'port': node.client_port,
                                   'node_id': node.name})
         with Remote(storagerouter.ip, [os, RawConfigParser, EtcdConfiguration, StorageDriverConfiguration], 'ovs') as remote:
             configuration_dir = '{0}/storagedriver/storagedriver'.format(EtcdConfiguration.get('/ovs/framework/paths|cfgdir'))
             if not remote.os.path.exists(configuration_dir):
                 remote.os.makedirs(configuration_dir)
             for json_file in remote.os.listdir(configuration_dir):
                 vpool_name = json_file.replace('.json', '')
                 if json_file.endswith('.json'):
                     if remote.os.path.exists('{0}/{1}.cfg'.format(configuration_dir, vpool_name)):
                         continue  # There's also a .cfg file, so this is an alba_proxy configuration file
                     storagedriver_config = remote.StorageDriverConfiguration('storagedriver', vpool_name)
                     storagedriver_config.load()
                     storagedriver_config.configure_volume_registry(vregistry_arakoon_cluster_id='voldrv',
                                                                    vregistry_arakoon_cluster_nodes=arakoon_nodes)
                     storagedriver_config.configure_distributed_lock_store(dls_type='Arakoon',
                                                                           dls_arakoon_cluster_id='voldrv',
                                                                           dls_arakoon_cluster_nodes=arakoon_nodes)
                     storagedriver_config.save(reload_config=True)
コード例 #2
0
 def _configure_arakoon_to_volumedriver():
     print "Update existing vPools"
     logger.info("Update existing vPools")
     for storagerouter in StorageRouterList.get_storagerouters():
         config = ArakoonClusterConfig("voldrv")
         config.load_config()
         arakoon_nodes = []
         for node in config.nodes:
             arakoon_nodes.append({"host": node.ip, "port": node.client_port, "node_id": node.name})
         with Remote(
             storagerouter.ip, [os, RawConfigParser, EtcdConfiguration, StorageDriverConfiguration], "ovs"
         ) as remote:
             configuration_dir = "{0}/storagedriver/storagedriver".format(
                 EtcdConfiguration.get("/ovs/framework/paths|cfgdir")
             )
             if not remote.os.path.exists(configuration_dir):
                 remote.os.makedirs(configuration_dir)
             for json_file in remote.os.listdir(configuration_dir):
                 vpool_name = json_file.replace(".json", "")
                 if json_file.endswith(".json"):
                     if remote.os.path.exists("{0}/{1}.cfg".format(configuration_dir, vpool_name)):
                         continue  # There's also a .cfg file, so this is an alba_proxy configuration file
                     storagedriver_config = remote.StorageDriverConfiguration("storagedriver", vpool_name)
                     storagedriver_config.load()
                     storagedriver_config.configure_volume_registry(
                         vregistry_arakoon_cluster_id="voldrv", vregistry_arakoon_cluster_nodes=arakoon_nodes
                     )
                     storagedriver_config.configure_distributed_lock_store(
                         dls_type="Arakoon", dls_arakoon_cluster_id="voldrv", dls_arakoon_cluster_nodes=arakoon_nodes
                     )
                     storagedriver_config.save(reload_config=True)
コード例 #3
0
 def _configure_arakoon_to_volumedriver():
     print 'Update existing vPools'
     logger.info('Update existing vPools')
     config = ArakoonClusterConfig('voldrv')
     config.load_config()
     arakoon_nodes = []
     for node in config.nodes:
         arakoon_nodes.append({
             'host': node.ip,
             'port': node.client_port,
             'node_id': node.name
         })
     if EtcdConfiguration.dir_exists('/ovs/vpools'):
         for vpool_guid in EtcdConfiguration.list('/ovs/vpools'):
             for storagedriver_id in EtcdConfiguration.list(
                     '/ovs/vpools/{0}/hosts'.format(vpool_guid)):
                 storagedriver_config = StorageDriverConfiguration(
                     'storagedriver', vpool_guid, storagedriver_id)
                 storagedriver_config.load()
                 storagedriver_config.configure_volume_registry(
                     vregistry_arakoon_cluster_id='voldrv',
                     vregistry_arakoon_cluster_nodes=arakoon_nodes)
                 storagedriver_config.configure_distributed_lock_store(
                     dls_type='Arakoon',
                     dls_arakoon_cluster_id='voldrv',
                     dls_arakoon_cluster_nodes=arakoon_nodes)
                 storagedriver_config.save(reload_config=True)
コード例 #4
0
 def _get_nsm_state(abm):
     state = {}
     data = VirtualAlbaBackend.data[abm]
     for nsm in data['nsms']:
         contents = Configuration.get(nsm['_config_key'], raw=True)
         config = ArakoonClusterConfig(nsm['id'], filesystem=False)
         config.read_config(contents)
         state[nsm['id']] = [node.name for node in config.nodes]
     return state
コード例 #5
0
ファイル: scheduledtask.py プロジェクト: grimpy/openvstorage
    def collapse_arakoon():
        """
        Collapse Arakoon's Tlogs
        :return: None
        """
        ScheduledTaskController._logger.info('Starting arakoon collapse')
        storagerouters = StorageRouterList.get_storagerouters()
        cluster_info = [('cacc', storagerouters[0], True)]
        cluster_names = []
        for service in ServiceList.get_services():
            if service.is_internal is True and service.type.name in (ServiceType.SERVICE_TYPES.ARAKOON,
                                                                     ServiceType.SERVICE_TYPES.NS_MGR,
                                                                     ServiceType.SERVICE_TYPES.ALBA_MGR):
                cluster = service.name.replace('arakoon-', '')
                if cluster in cluster_names:
                    continue
                cluster_names.append(cluster)
                cluster_info.append((cluster, service.storagerouter, False))
        workload = {}
        for cluster, storagerouter, filesystem in cluster_info:
            ScheduledTaskController._logger.debug('  Collecting info for cluster {0}'.format(cluster))
            config = ArakoonClusterConfig(cluster, filesystem=filesystem)
            config.load_config(storagerouter.ip)
            for node in config.nodes:
                if node.ip not in workload:
                    workload[node.ip] = {'node_id': node.name,
                                         'clusters': []}
                workload[node.ip]['clusters'].append((cluster, filesystem))
        for storagerouter in storagerouters:
            try:
                if storagerouter.ip not in workload:
                    continue
                node_workload = workload[storagerouter.ip]
                client = SSHClient(storagerouter)
                for cluster, filesystem in node_workload['clusters']:
                    try:
                        ScheduledTaskController._logger.debug('  Collapsing cluster {0} on {1}'.format(cluster, storagerouter.ip))
                        if filesystem is True:
                            config_path = ArakoonClusterConfig.CONFIG_FILE.format(cluster)
                        else:
                            config_path = Configuration.get_configuration_path(ArakoonClusterConfig.CONFIG_KEY.format(cluster))
                        client.run(['arakoon', '--collapse-local', node_workload['node_id'], '2', '-config', config_path])
                        ScheduledTaskController._logger.info('  Collapsing cluster {0} on {1} completed'.format(cluster, storagerouter.ip))
                    except:
                        ScheduledTaskController._logger.exception('  Collapsing cluster {0} on {1} failed'.format(cluster, storagerouter.ip))
            except UnableToConnectException:
                ScheduledTaskController._logger.error('  Could not collapse any cluster on {0} (not reachable)'.format(storagerouter.name))

        ScheduledTaskController._logger.info('Arakoon collapse finished')
コード例 #6
0
    def load(vpool):
        """
        Initializes the wrapper for a given vpool
        :param vpool: vPool for which the ClusterRegistryClient needs to be loaded
        """
        if os.environ.get('RUNNING_UNITTESTS') == 'True':
            return ClusterRegistry(str(vpool.guid), None, None)

        key = vpool.identifier
        if key not in crclient_vpool_cache:
            arakoon_cluster_name = str(Configuration.get('/ovs/framework/arakoon_clusters|voldrv'))
            config = ArakoonClusterConfig(cluster_id=arakoon_cluster_name, filesystem=False)
            config.load_config()
            arakoon_node_configs = []
            for node in config.nodes:
                arakoon_node_configs.append(ArakoonNodeConfig(str(node.name), str(node.ip), node.client_port))
            client = ClusterRegistry(str(vpool.guid), arakoon_cluster_name, arakoon_node_configs)
            crclient_vpool_cache[key] = client
        return crclient_vpool_cache[key]
コード例 #7
0
ファイル: storagedriver.py プロジェクト: grimpy/openvstorage
 def _configure_arakoon_to_volumedriver(cluster_name):
     StorageDriverController._logger.info('Update existing vPools')
     config = ArakoonClusterConfig(cluster_id=cluster_name, filesystem=False)
     config.load_config()
     arakoon_nodes = []
     for node in config.nodes:
         arakoon_nodes.append({'host': node.ip,
                               'port': node.client_port,
                               'node_id': node.name})
     if Configuration.dir_exists('/ovs/vpools'):
         for vpool_guid in Configuration.list('/ovs/vpools'):
             for storagedriver_id in Configuration.list('/ovs/vpools/{0}/hosts'.format(vpool_guid)):
                 storagedriver_config = StorageDriverConfiguration('storagedriver', vpool_guid, storagedriver_id)
                 storagedriver_config.load()
                 storagedriver_config.configure_volume_registry(vregistry_arakoon_cluster_id=cluster_name,
                                                                vregistry_arakoon_cluster_nodes=arakoon_nodes)
                 storagedriver_config.configure_distributed_lock_store(dls_type='Arakoon',
                                                                       dls_arakoon_cluster_id=cluster_name,
                                                                       dls_arakoon_cluster_nodes=arakoon_nodes)
                 storagedriver_config.save(reload_config=True)
コード例 #8
0
    def load(vpool):
        """
        Initializes the wrapper for a given vpool
        :param vpool: vPool for which the ObjectRegistryClient needs to be loaded
        """
        if os.environ.get('RUNNING_UNITTESTS') == 'True':
            return ORClient(str(vpool.guid), None, None)

        key = vpool.identifier
        if key not in oclient_vpool_cache:
            arakoon_node_configs = []
            arakoon_cluster_name = str(
                Configuration.get('/ovs/framework/arakoon_clusters|voldrv'))
            config = ArakoonClusterConfig(cluster_id=arakoon_cluster_name,
                                          filesystem=False)
            config.load_config()
            for node in config.nodes:
                arakoon_node_configs.append(
                    ArakoonNodeConfig(str(node.name), str(node.ip),
                                      node.client_port))
            client = ORClient(str(vpool.guid), str(arakoon_cluster_name),
                              arakoon_node_configs)
            oclient_vpool_cache[key] = client
        return oclient_vpool_cache[key]
コード例 #9
0
ファイル: update.py プロジェクト: openvstorage/framework
    def post_update_core(client, components):
        """
        Execute functionality after the openvstorage core packages have been updated
        For framework:
            * Restart support-agent on every client
            * Restart arakoon-ovsdb on every client (if present and required)
        For storagedriver:
            * ALBA proxies on every client
            * Restart arakoon-voldrv on every client (if present and required)
        :param client: Client on which to execute this post update functionality
        :type client: SSHClient
        :param components: Update components which have been executed
        :type components: list
        :return: None
        """
        if 'framework' not in components and 'storagedriver' not in components:
            return

        update_information = UpdateController.get_update_information_core({})
        services_to_restart = set()
        if 'storagedriver' in components:
            services_to_restart.update(update_information.get('storagedriver', {}).get('services_post_update', set()))
        if 'framework' in components:
            services_to_restart.update(update_information.get('framework', {}).get('services_post_update', set()))
            services_to_restart.add('support-agent')

        if services_to_restart:
            UpdateController._logger.debug('{0}: Executing hook {1}'.format(client.ip, inspect.currentframe().f_code.co_name))
            for service_name in sorted(services_to_restart):
                if not service_name.startswith('ovs-arakoon-'):
                    UpdateController.change_services_state(services=[service_name], ssh_clients=[client], action='restart')
                else:
                    cluster_name = ArakoonClusterConfig.get_cluster_name(ExtensionToolbox.remove_prefix(service_name, 'ovs-arakoon-'))
                    if cluster_name == 'config':
                        arakoon_metadata = ArakoonInstaller.get_arakoon_metadata_by_cluster_name(cluster_name='cacc', filesystem=True, ip=System.get_my_storagerouter().ip)
                    else:
                        arakoon_metadata = ArakoonInstaller.get_arakoon_metadata_by_cluster_name(cluster_name=cluster_name)
                    if arakoon_metadata['internal'] is True:
                        UpdateController._logger.debug('{0}: Restarting arakoon node {1}'.format(client.ip, cluster_name))
                        ArakoonInstaller.restart_node(cluster_name=cluster_name,
                                                      client=client)
            UpdateController._logger.debug('{0}: Executed hook {1}'.format(client.ip, inspect.currentframe().f_code.co_name))
コード例 #10
0
ファイル: nodetype.py プロジェクト: openvstorage/framework
    def demote_node(cluster_ip, master_ip, ip_client_map, unique_id, unconfigure_memcached, unconfigure_rabbitmq, offline_nodes=None):
        """
        Demotes a given node
        """
        from ovs.dal.lists.storagerouterlist import StorageRouterList

        Toolbox.log(logger=NodeTypeController._logger, messages='Demoting node', title=True)
        if offline_nodes is None:
            offline_nodes = []

        if unconfigure_memcached is True and len(offline_nodes) == 0:
            if NodeTypeController._validate_local_memcache_servers(ip_client_map) is False:
                raise RuntimeError('Not all memcache nodes can be reached which is required for demoting a node.')

        # Find other (arakoon) master nodes
        arakoon_cluster_name = str(Configuration.get('/ovs/framework/arakoon_clusters|ovsdb'))
        arakoon_metadata = ArakoonInstaller.get_arakoon_metadata_by_cluster_name(cluster_name=arakoon_cluster_name)
        config = ArakoonClusterConfig(cluster_id=arakoon_cluster_name, filesystem=False)
        config.load_config()
        master_node_ips = [node.ip for node in config.nodes]
        if cluster_ip in master_node_ips:
            master_node_ips.remove(cluster_ip)
        if len(master_node_ips) == 0:
            raise RuntimeError('There should be at least one other master node')

        storagerouter = StorageRouterList.get_by_machine_id(unique_id)
        storagerouter.node_type = 'EXTRA'
        storagerouter.save()

        offline_node_ips = [node.ip for node in offline_nodes]
        if arakoon_metadata['internal'] is True:
            Toolbox.log(logger=NodeTypeController._logger, messages='Leaving Arakoon {0} cluster'.format(arakoon_cluster_name))
            ArakoonInstaller.shrink_cluster(deleted_node_ip=cluster_ip,
                                            remaining_node_ips=master_node_ips,
                                            cluster_name=arakoon_cluster_name,
                                            offline_nodes=offline_node_ips)

        try:
            external_config = Configuration.get('/ovs/framework/external_config')
            if external_config is None:
                config_store = Configuration.get_store()
                if config_store == 'arakoon':
                    Toolbox.log(logger=NodeTypeController._logger, messages='Leaving Arakoon config cluster')
                    ArakoonInstaller.shrink_cluster(deleted_node_ip=cluster_ip,
                                                    remaining_node_ips=master_node_ips,
                                                    cluster_name='config',
                                                    offline_nodes=offline_node_ips,
                                                    filesystem=True)
                else:
                    from ovs.extensions.db.etcd.installer import EtcdInstaller
                    Toolbox.log(logger=NodeTypeController._logger, messages='Leaving Etcd cluster')
                    EtcdInstaller.shrink_cluster(master_ip, cluster_ip, 'config', offline_node_ips)
        except Exception as ex:
            Toolbox.log(logger=NodeTypeController._logger, messages=['\nFailed to leave configuration cluster', ex], loglevel='exception')

        Toolbox.log(logger=NodeTypeController._logger, messages='Update configurations')
        try:
            if unconfigure_memcached is True:
                endpoints = Configuration.get('/ovs/framework/memcache|endpoints')
                endpoint = '{0}:{1}'.format(cluster_ip, 11211)
                if endpoint in endpoints:
                    endpoints.remove(endpoint)
                Configuration.set('/ovs/framework/memcache|endpoints', endpoints)
            if unconfigure_rabbitmq is True:
                endpoints = Configuration.get('/ovs/framework/messagequeue|endpoints')
                endpoint = '{0}:{1}'.format(cluster_ip, 5672)
                if endpoint in endpoints:
                    endpoints.remove(endpoint)
                Configuration.set('/ovs/framework/messagequeue|endpoints', endpoints)
        except Exception as ex:
            Toolbox.log(logger=NodeTypeController._logger, messages=['\nFailed to update configurations', ex], loglevel='exception')

        if arakoon_metadata['internal'] is True:
            Toolbox.log(logger=NodeTypeController._logger, messages='Restarting master node services')
            remaining_nodes = ip_client_map.keys()[:]
            if cluster_ip in remaining_nodes:
                remaining_nodes.remove(cluster_ip)

            PersistentFactory.store = None
            VolatileFactory.store = None

            for service in storagerouter.services:
                if service.name == 'arakoon-ovsdb':
                    service.delete()

        target_client = None
        if storagerouter in offline_nodes:
            if unconfigure_rabbitmq is True:
                Toolbox.log(logger=NodeTypeController._logger, messages='Removing/unconfiguring offline RabbitMQ node')
                client = ip_client_map[master_ip]
                try:
                    client.run(['rabbitmqctl', 'forget_cluster_node', 'rabbit@{0}'.format(storagerouter.name)])
                except Exception as ex:
                    Toolbox.log(logger=NodeTypeController._logger, messages=['\nFailed to forget RabbitMQ cluster node', ex], loglevel='exception')
        else:
            target_client = ip_client_map[cluster_ip]
            if unconfigure_rabbitmq is True:
                Toolbox.log(logger=NodeTypeController._logger, messages='Removing/unconfiguring RabbitMQ')
                try:
                    if ServiceManager.has_service('rabbitmq-server', client=target_client):
                        Toolbox.change_service_state(target_client, 'rabbitmq-server', 'stop', NodeTypeController._logger)
                        target_client.run(['rabbitmq-server', '-detached'])
                        time.sleep(5)
                        target_client.run(['rabbitmqctl', 'stop_app'])
                        time.sleep(5)
                        target_client.run(['rabbitmqctl', 'reset'])
                        time.sleep(5)
                        target_client.run(['rabbitmqctl', 'stop'])
                        time.sleep(5)
                        target_client.file_unlink("/var/lib/rabbitmq/.erlang.cookie")
                        Toolbox.change_service_state(target_client, 'rabbitmq-server', 'stop', NodeTypeController._logger)  # To be sure
                except Exception as ex:
                    Toolbox.log(logger=NodeTypeController._logger, messages=['\nFailed to remove/unconfigure RabbitMQ', ex], loglevel='exception')

            Toolbox.log(logger=NodeTypeController._logger, messages='Stopping services')
            services = ['memcached', 'rabbitmq-server']
            if unconfigure_rabbitmq is False:
                services.remove('rabbitmq-server')
            if unconfigure_memcached is False:
                services.remove('memcached')
            for service in services:
                if ServiceManager.has_service(service, client=target_client):
                    Toolbox.log(logger=NodeTypeController._logger, messages='Stopping service {0}'.format(service))
                    try:
                        Toolbox.change_service_state(target_client, service, 'stop', NodeTypeController._logger)
                    except Exception as ex:
                        Toolbox.log(logger=NodeTypeController._logger, messages=['\nFailed to stop service'.format(service), ex], loglevel='exception')

            Toolbox.log(logger=NodeTypeController._logger, messages='Removing services')
            services = ['scheduled-tasks', 'webapp-api', 'volumerouter-consumer']
            for service in services:
                if ServiceManager.has_service(service, client=target_client):
                    Toolbox.log(logger=NodeTypeController._logger, messages='Removing service {0}'.format(service))
                    try:
                        Toolbox.change_service_state(target_client, service, 'stop', NodeTypeController._logger)
                        ServiceManager.remove_service(service, client=target_client)
                    except Exception as ex:
                        Toolbox.log(logger=NodeTypeController._logger, messages=['\nFailed to remove service'.format(service), ex], loglevel='exception')

            if ServiceManager.has_service('workers', client=target_client):
                ServiceManager.add_service(name='workers',
                                           client=target_client,
                                           params={'WORKER_QUEUE': '{0}'.format(unique_id)})
        try:
            NodeTypeController._configure_amqp_to_volumedriver()
        except Exception as ex:
            Toolbox.log(logger=NodeTypeController._logger, messages=['\nFailed to configure AMQP to Storage Driver', ex], loglevel='exception')

        Toolbox.log(logger=NodeTypeController._logger, messages='Restarting services')
        NodeTypeController.restart_framework_and_memcache_services(clients=ip_client_map, logger=NodeTypeController._logger, offline_node_ips=offline_node_ips)

        if Toolbox.run_hooks(component='nodetype',
                             sub_component='demote',
                             logger=NodeTypeController._logger,
                             cluster_ip=cluster_ip,
                             master_ip=master_ip,
                             offline_node_ips=offline_node_ips):
            Toolbox.log(logger=NodeTypeController._logger, messages='Restarting services')
            NodeTypeController.restart_framework_and_memcache_services(clients=ip_client_map, logger=NodeTypeController._logger, offline_node_ips=offline_node_ips)

        if storagerouter not in offline_nodes:
            target_client = ip_client_map[cluster_ip]
            node_name, _ = target_client.get_hostname()
            if NodeTypeController.avahi_installed(client=target_client, logger=NodeTypeController._logger) is True:
                NodeTypeController.configure_avahi(client=target_client, node_name=node_name, node_type='extra', logger=NodeTypeController._logger)
        Configuration.set('/ovs/framework/hosts/{0}/type'.format(storagerouter.machine_id), 'EXTRA')

        if target_client is not None and target_client.file_exists('/tmp/ovs_rollback'):
            target_client.file_write('/tmp/ovs_rollback', 'rollback')

        Toolbox.log(logger=NodeTypeController._logger, messages='Demote complete', title=True)
コード例 #11
0
    def test_cluster_maintenance(self):
        """
        Validates whether a cluster can be correctly created
        """
        Configuration.set('/ovs/framework/hosts/1/ports', {'arakoon': [10000, 10100]})
        Configuration.set('/ovs/framework/hosts/2/ports', {'arakoon': [20000, 20100]})

        structure = Helper.build_service_structure(
            {'storagerouters': [1, 2]}
        )
        storagerouters = structure['storagerouters']
        System._machine_id = {storagerouters[1].ip: '1',
                              storagerouters[2].ip: '2'}

        # Create new cluster
        mountpoint = storagerouters[1].disks[0].partitions[0].mountpoint
        if os.path.exists(mountpoint) and mountpoint != '/':
            shutil.rmtree(mountpoint)
        base_dir = mountpoint + '/test_create_cluster'
        info = ArakoonInstaller.create_cluster('test', ServiceType.ARAKOON_CLUSTER_TYPES.FWK, storagerouters[1].ip, base_dir)

        reality = Helper.extract_dir_structure(base_dir)
        expected = {'dirs': {'arakoon': {'dirs': {'test': {'dirs': {'tlogs': {'dirs': {},
                                                                              'files': []},
                                                                    'db': {'dirs': {},
                                                                           'files': []}},
                                                           'files': []}},
                                         'files': []}},
                    'files': []}
        self.assertDictEqual(reality, expected)
        expected = '{0}\n\n{1}\n\n'.format(ArakoonInstallerTester.EXPECTED_CLUSTER_CONFIG.format('1', 'test', ''),
                                           ArakoonInstallerTester.EXPECTED_NODE_CONFIG.format(
                                               '1', storagerouters[1].ip, 10000, base_dir, '1', 10001
                                           ))
        self.assertEqual(Configuration.get(ArakoonInstaller.CONFIG_KEY.format('test'), raw=True), expected)
        # @TODO: assert service availability here. It should be stopped

        ArakoonInstaller.start_cluster('test', storagerouters[1].ip, filesystem=False)
        # @TODO: assert the service is running

        config = ArakoonClusterConfig('test', filesystem=False)
        config.load_config(storagerouters[1].ip)
        client = ArakoonInstaller.build_client(config)
        reality = client.get(ArakoonInstaller.INTERNAL_CONFIG_KEY)
        self.assertEqual(reality, expected)
        self.assertFalse(client.exists(ArakoonInstaller.METADATA_KEY))

        ArakoonInstaller.claim_cluster('test', storagerouters[1].ip, filesystem=False, metadata=info['metadata'])

        reality = json.loads(client.get(ArakoonInstaller.METADATA_KEY))
        expected = {'cluster_name': 'test',
                    'cluster_type': 'FWK',
                    'in_use': True,
                    'internal': True}
        self.assertDictEqual(reality, expected)

        # Extending cluster
        mountpoint = storagerouters[2].disks[0].partitions[0].mountpoint
        if os.path.exists(mountpoint) and mountpoint != '/':
            shutil.rmtree(mountpoint)
        base_dir2 = mountpoint + '/test_extend_cluster'
        ArakoonInstaller.extend_cluster(storagerouters[1].ip, storagerouters[2].ip, 'test', base_dir2)
        reality = Helper.extract_dir_structure(base_dir)
        expected = {'dirs': {'arakoon': {'dirs': {'test': {'dirs': {'tlogs': {'dirs': {},
                                                                              'files': []},
                                                                    'db': {'dirs': {},
                                                                           'files': []}},
                                                           'files': []}},
                                         'files': []}},
                    'files': []}
        self.assertDictEqual(reality, expected)
        expected = '{0}\n\n{1}\n\n{2}\n\n'.format(ArakoonInstallerTester.EXPECTED_CLUSTER_CONFIG.format('1,2', 'test', ''),
                                                  ArakoonInstallerTester.EXPECTED_NODE_CONFIG.format(
                                                      '1', storagerouters[1].ip, 10000, base_dir, '1', 10001
                                                  ),
                                                  ArakoonInstallerTester.EXPECTED_NODE_CONFIG.format(
                                                      '2', storagerouters[2].ip, 20000, base_dir2, '2', 20001
                                                  ))
        self.assertEqual(Configuration.get(ArakoonInstaller.CONFIG_KEY.format('test'), raw=True), expected)
        # @TODO: assert service availability here. It should be stopped

        catchup_command = 'arakoon --node 2 -config file://opt/OpenvStorage/config/framework.json?key=/ovs/arakoon/test/config -catchup-only'
        SSHClient._run_returns[catchup_command] = None
        SSHClient._run_recordings = []
        ArakoonInstaller.restart_cluster_add('test', [storagerouters[1].ip], storagerouters[2].ip, filesystem=False)
        self.assertIn(catchup_command, SSHClient._run_recordings)
        # @TODO: assert the service is running

        config = ArakoonClusterConfig('test', filesystem=False)
        config.load_config(storagerouters[2].ip)
        client = ArakoonInstaller.build_client(config)
        reality = client.get(ArakoonInstaller.INTERNAL_CONFIG_KEY)
        self.assertEqual(reality, expected)

        reality = json.loads(client.get(ArakoonInstaller.METADATA_KEY))
        expected = {'cluster_name': 'test',
                    'cluster_type': 'FWK',
                    'in_use': True,
                    'internal': True}
        self.assertDictEqual(reality, expected)

        # Shrinking cluster
        ArakoonInstaller.shrink_cluster(storagerouters[1].ip, storagerouters[2].ip, 'test')
        reality = Helper.extract_dir_structure(base_dir)
        expected = {'dirs': {'arakoon': {'dirs': {'test': {'dirs': {}, 'files': []}},
                                         'files': []}},
                    'files': []}
        self.assertDictEqual(reality, expected)
        expected = '{0}\n\n{1}\n\n'.format(ArakoonInstallerTester.EXPECTED_CLUSTER_CONFIG.format('2', 'test', ''),
                                           ArakoonInstallerTester.EXPECTED_NODE_CONFIG.format(
                                               '2', storagerouters[2].ip, 20000, base_dir2, '2', 20001
                                           ))
        self.assertEqual(Configuration.get(ArakoonInstaller.CONFIG_KEY.format('test'), raw=True), expected)
        # @TODO: assert service availability here. It should be stopped

        ArakoonInstaller.restart_cluster_remove('test', [storagerouters[2].ip], filesystem=False)
        # @TODO: assert the service is running

        config = ArakoonClusterConfig('test', filesystem=False)
        config.load_config(storagerouters[2].ip)
        client = ArakoonInstaller.build_client(config)
        reality = client.get(ArakoonInstaller.INTERNAL_CONFIG_KEY)
        self.assertEqual(reality, expected)

        reality = json.loads(client.get(ArakoonInstaller.METADATA_KEY))
        expected = {'cluster_name': 'test',
                    'cluster_type': 'FWK',
                    'in_use': True,
                    'internal': True}
        self.assertDictEqual(reality, expected)
コード例 #12
0
    def services_running(self, target):
        """
        Check all services are running
        :param target: Target to check
        :return: Boolean
        """
        try:
            key = 'ovs-watcher-{0}'.format(str(uuid.uuid4()))
            value = str(time.time())

            if target == 'config':
                self.log_message(target, 'Testing configuration store...', 0)
                from ovs.extensions.generic.configuration import Configuration
                try:
                    Configuration.list('/')
                except Exception as ex:
                    self.log_message(
                        target,
                        '  Error during configuration store test: {0}'.format(
                            ex), 2)
                    return False
                if Configuration.get_store() == 'arakoon':
                    from ovs.extensions.db.arakoon.configuration import ArakoonConfiguration
                    from ovs.extensions.db.arakoon.ArakoonInstaller import ArakoonInstaller, ArakoonClusterConfig
                    from ovs.extensions.db.arakoon.pyrakoon.pyrakoon.compat import NoGuarantee
                    with open(
                            ArakoonConfiguration.CACC_LOCATION) as config_file:
                        contents = config_file.read()
                    config = ArakoonClusterConfig(cluster_id='cacc',
                                                  filesystem=True)
                    config.read_config(contents)
                    client = ArakoonInstaller.build_client(config)
                    contents = client.get(ArakoonInstaller.INTERNAL_CONFIG_KEY,
                                          consistency=NoGuarantee())
                    if Watcher.LOG_CONTENTS != contents:
                        try:
                            config.read_config(
                                contents
                            )  # Validate whether the contents are not corrupt
                        except Exception as ex:
                            self.log_message(
                                target,
                                '  Configuration stored in configuration store seems to be corrupt: {0}'
                                .format(ex), 2)
                            return False
                        temp_filename = '{0}~'.format(
                            ArakoonConfiguration.CACC_LOCATION)
                        with open(temp_filename, 'w') as config_file:
                            config_file.write(contents)
                            config_file.flush()
                            os.fsync(config_file)
                        os.rename(temp_filename,
                                  ArakoonConfiguration.CACC_LOCATION)
                        Watcher.LOG_CONTENTS = contents
                self.log_message(target, '  Configuration store OK', 0)
                return True

            if target == 'framework':
                # Volatile
                self.log_message(target, 'Testing volatile store...', 0)
                max_tries = 5
                tries = 0
                while tries < max_tries:
                    try:
                        try:
                            logging.disable(logging.WARNING)
                            from ovs.extensions.storage.volatilefactory import VolatileFactory
                            VolatileFactory.store = None
                            volatile = VolatileFactory.get_client()
                            volatile.set(key, value)
                            if volatile.get(key) == value:
                                volatile.delete(key)
                                break
                            volatile.delete(key)
                        finally:
                            logging.disable(logging.NOTSET)
                    except Exception as message:
                        self.log_message(
                            target,
                            '  Error during volatile store test: {0}'.format(
                                message), 2)
                    key = 'ovs-watcher-{0}'.format(str(
                        uuid.uuid4()))  # Get another key
                    time.sleep(1)
                    tries += 1
                if tries == max_tries:
                    self.log_message(target,
                                     '  Volatile store not working correctly',
                                     2)
                    return False
                self.log_message(
                    target,
                    '  Volatile store OK after {0} tries'.format(tries), 0)

                # Persistent
                self.log_message(target, 'Testing persistent store...', 0)
                max_tries = 5
                tries = 0
                while tries < max_tries:
                    try:
                        try:
                            logging.disable(logging.WARNING)
                            persistent = PersistentFactory.get_client()
                            persistent.set(key, value)
                            if persistent.get(key) == value:
                                persistent.delete(key)
                                break
                            persistent.delete(key)
                        finally:
                            logging.disable(logging.NOTSET)
                    except Exception as message:
                        self.log_message(
                            target,
                            '  Error during persistent store test: {0}'.format(
                                message), 2)
                    key = 'ovs-watcher-{0}'.format(str(
                        uuid.uuid4()))  # Get another key
                    time.sleep(1)
                    tries += 1
                if tries == max_tries:
                    self.log_message(
                        target, '  Persistent store not working correctly', 2)
                    return False
                self.log_message(
                    target,
                    '  Persistent store OK after {0} tries'.format(tries), 0)

            if target == 'volumedriver':
                # Arakoon, voldrv cluster
                self.log_message(target, 'Testing arakoon (voldrv)...', 0)
                max_tries = 5
                tries = 0
                while tries < max_tries:
                    try:
                        from ovs.extensions.generic.configuration import Configuration
                        from ovs.extensions.storage.persistent.pyrakoonstore import PyrakoonStore
                        cluster_name = str(
                            Configuration.get(
                                '/ovs/framework/arakoon_clusters|voldrv'))
                        client = PyrakoonStore(cluster=cluster_name)
                        client.set(key, value)
                        if client.get(key) == value:
                            client.delete(key)
                            break
                        client.delete(key)
                    except Exception as message:
                        self.log_message(
                            target,
                            '  Error during arakoon (voldrv) test: {0}'.format(
                                message), 2)
                    key = 'ovs-watcher-{0}'.format(str(
                        uuid.uuid4()))  # Get another key
                    time.sleep(1)
                    tries += 1
                if tries == max_tries:
                    self.log_message(
                        target, '  Arakoon (voldrv) not working correctly', 2)
                    return False
                self.log_message(target, '  Arakoon (voldrv) OK', 0)

            if target in ['framework', 'volumedriver']:
                # RabbitMQ
                self.log_message(target, 'Test rabbitMQ...', 0)
                import pika
                from ovs.extensions.generic.configuration import Configuration
                messagequeue = Configuration.get('/ovs/framework/messagequeue')
                rmq_servers = messagequeue['endpoints']
                good_node = False
                for server in rmq_servers:
                    try:
                        connection_string = '{0}://{1}:{2}@{3}/%2F'.format(
                            messagequeue['protocol'], messagequeue['user'],
                            messagequeue['password'], server)
                        connection = pika.BlockingConnection(
                            pika.URLParameters(connection_string))
                        channel = connection.channel()
                        channel.basic_publish(
                            '', 'ovs-watcher', str(time.time()),
                            pika.BasicProperties(content_type='text/plain',
                                                 delivery_mode=1))
                        connection.close()
                        good_node = True
                    except Exception as message:
                        self.log_message(
                            target,
                            '  Error during rabbitMQ test on node {0}: {1}'.
                            format(server, message), 2)
                if good_node is False:
                    self.log_message(
                        target, '  No working rabbitMQ node could be found', 2)
                    return False
                self.log_message(target, '  RabbitMQ test OK', 0)
                self.log_message(target, 'All tests OK', 0)
                return True
        except Exception as ex:
            self.log_message(target, 'Unexpected exception: {0}'.format(ex), 2)
            return False
コード例 #13
0
ファイル: update.py プロジェクト: openvstorage/framework
    def get_package_information_core(client, package_info):
        """
        Called by GenericController.refresh_package_information() every hour

        Retrieve information about the currently installed versions of the core packages
        Retrieve information about the versions to which each package can potentially be updated
        If installed version is different from candidate version --> store this information in model

        Additionally check the services with a 'run' file
        Verify whether the running version is up-to-date with the candidate version
        If different --> store this information in the model

        Result: Every package with updates or which requires services to be restarted is stored in the model

        :param client: Client on which to collect the version information
        :type client: SSHClient
        :param package_info: Dictionary passed in by the thread calling this function
        :type package_info: dict
        :return: Package information
        :rtype: dict
        """
        try:
            if client.username != 'root':
                raise RuntimeError('Only the "root" user can retrieve the package information')

            binaries = PackageManager.get_binary_versions(client=client, package_names=UpdateController.core_packages_with_binaries)
            installed = PackageManager.get_installed_versions(client=client, package_names=UpdateController.all_core_packages)
            candidate = PackageManager.get_candidate_versions(client=client, package_names=UpdateController.all_core_packages)
            if set(installed.keys()) != set(UpdateController.all_core_packages) or set(candidate.keys()) != set(UpdateController.all_core_packages):
                raise RuntimeError('Failed to retrieve the installed and candidate versions for packages: {0}'.format(', '.join(UpdateController.all_core_packages)))

            # Retrieve Arakoon information
            framework_arakoons = []
            storagedriver_arakoons = []
            for cluster, arakoon_list in {'cacc': framework_arakoons,
                                          'ovsdb': framework_arakoons,
                                          'voldrv': storagedriver_arakoons}.iteritems():
                cluster_name = ArakoonClusterConfig.get_cluster_name(cluster)
                if cluster_name is None:
                    continue

                if cluster == 'cacc':
                    arakoon_metadata = ArakoonInstaller.get_arakoon_metadata_by_cluster_name(cluster_name=cluster_name, filesystem=True, ip=client.ip)
                else:
                    arakoon_metadata = ArakoonInstaller.get_arakoon_metadata_by_cluster_name(cluster_name=cluster_name)

                if arakoon_metadata['internal'] is True:
                    arakoon_list.append(ArakoonInstaller.get_service_name_for_cluster(cluster_name=arakoon_metadata['cluster_name']))

            storagerouter = StorageRouterList.get_by_ip(client.ip)
            alba_proxies = []
            for service in storagerouter.services:
                if service.type.name == ServiceType.SERVICE_TYPES.ALBA_PROXY:
                    alba_proxies.append(service.name)

            storagedriver_services = []
            for sd in storagerouter.storagedrivers:
                storagedriver_services.append('ovs-dtl_{0}'.format(sd.vpool.name))
                storagedriver_services.append('ovs-volumedriver_{0}'.format(sd.vpool.name))

            default_entry = {'candidate': None,
                             'installed': None,
                             'services_to_restart': []}

            #                       component:   package_name: services_with_run_file
            for component, info in {'framework': {'arakoon': framework_arakoons,
                                                  'openvstorage': []},
                                    'storagedriver': {'alba': alba_proxies,
                                                      'arakoon': storagedriver_arakoons,
                                                      'volumedriver-no-dedup-base': [],
                                                      'volumedriver-no-dedup-server': storagedriver_services}}.iteritems():
                component_info = {}
                for package, services in info.iteritems():
                    for service in services:
                        service = ExtensionToolbox.remove_prefix(service, 'ovs-')
                        version_file = '/opt/OpenvStorage/run/{0}.version'.format(service)
                        if not client.file_exists(version_file):
                            UpdateController._logger.warning('{0}: Failed to find a version file in /opt/OpenvStorage/run for service {1}'.format(client.ip, service))
                            continue
                        package_name = package
                        running_versions = client.file_read(version_file).strip()
                        for version in running_versions.split(';'):
                            version = version.strip()
                            running_version = None
                            if '=' in version:
                                package_name = version.split('=')[0]
                                running_version = version.split('=')[1]
                            elif version:
                                running_version = version

                            if package_name not in UpdateController.all_core_packages:
                                raise ValueError('Unknown package dependency found in {0}'.format(version_file))
                            if package_name not in binaries:
                                raise RuntimeError('Binary version for package {0} was not retrieved'.format(package_name))

                            if running_version is not None and running_version != binaries[package_name]:
                                if package_name not in component_info:
                                    component_info[package_name] = copy.deepcopy(default_entry)
                                component_info[package_name]['installed'] = running_version
                                component_info[package_name]['candidate'] = binaries[package_name]
                                component_info[package_name]['services_to_restart'].append('ovs-{0}'.format(service))

                    if installed[package] != candidate[package] and package not in component_info:
                        component_info[package] = copy.deepcopy(default_entry)
                        component_info[package]['installed'] = installed[package]
                        component_info[package]['candidate'] = candidate[package]
                if component_info:
                    if component not in package_info[client.ip]:
                        package_info[client.ip][component] = {}
                    package_info[client.ip][component].update(component_info)
        except Exception as ex:
            if 'errors' not in package_info[client.ip]:
                package_info[client.ip]['errors'] = []
            package_info[client.ip]['errors'].append(ex)
        return package_info
コード例 #14
0
ファイル: update.py プロジェクト: openvstorage/framework
    def get_update_information_core(information):
        """
        Called when the 'Update' button in the GUI is pressed
        This call collects additional information about the packages which can be updated
        Eg:
            * Downtime for Arakoons
            * Downtime for StorageDrivers
            * Prerequisites that haven't been met
            * Services which will be stopped during update
            * Services which will be restarted after update
        """
        # Verify arakoon info
        arakoon_ovs_info = {'down': False,
                            'name': None,
                            'internal': False}
        arakoon_cacc_info = {'down': False,
                             'name': None,
                             'internal': False}
        arakoon_voldrv_info = {'down': False,
                               'name': None,
                               'internal': False}
        for cluster in ['cacc', 'ovsdb', 'voldrv']:
            cluster_name = ArakoonClusterConfig.get_cluster_name(cluster)
            if cluster_name is None:
                continue

            if cluster == 'cacc':
                arakoon_metadata = ArakoonInstaller.get_arakoon_metadata_by_cluster_name(cluster_name=cluster_name, filesystem=True, ip=System.get_my_storagerouter().ip)
            else:
                arakoon_metadata = ArakoonInstaller.get_arakoon_metadata_by_cluster_name(cluster_name=cluster_name)

            if arakoon_metadata['internal'] is True:
                config = ArakoonClusterConfig(cluster_id=cluster_name, filesystem=(cluster == 'cacc'))
                config.load_config(System.get_my_storagerouter().ip if cluster == 'cacc' else None)
                if cluster == 'ovsdb':
                    arakoon_ovs_info['down'] = len(config.nodes) < 3
                    arakoon_ovs_info['name'] = arakoon_metadata['cluster_name']
                    arakoon_ovs_info['internal'] = True
                elif cluster == 'voldrv':
                    arakoon_voldrv_info['down'] = len(config.nodes) < 3
                    arakoon_voldrv_info['name'] = arakoon_metadata['cluster_name']
                    arakoon_voldrv_info['internal'] = True
                else:
                    arakoon_cacc_info['name'] = arakoon_metadata['cluster_name']
                    arakoon_cacc_info['internal'] = True

        # Verify StorageRouter downtime
        prerequisites = []
        all_storagerouters = StorageRouterList.get_storagerouters()
        for storagerouter in all_storagerouters:
            try:
                SSHClient(endpoint=storagerouter, username='******')
            except UnableToConnectException:
                prerequisites.append(['node_down', storagerouter.name])

        for key in ['framework', 'storagedriver']:
            if key not in information:
                information[key] = {'packages': {},
                                    'downtime': [],
                                    'prerequisites': prerequisites,
                                    'services_stop_start': set(),
                                    'services_post_update': set()}

            for storagerouter in all_storagerouters:
                if key not in storagerouter.package_information:
                    continue

                # Retrieve ALBA proxy issues
                alba_services = []
                alba_downtime = []
                for service in storagerouter.services:
                    if service.type.name != ServiceType.SERVICE_TYPES.ALBA_PROXY or service.alba_proxy is None:
                        continue
                    alba_services.append(service.name)
                    alba_downtime.append(['proxy', service.alba_proxy.storagedriver.vpool.name])

                # Retrieve StorageDriver issues
                storagedriver_downtime = []
                storagedriver_services = []
                for sd in storagerouter.storagedrivers:
                    # Order of services is important, first we want to stop all volume-drivers, then DTLs
                    storagedriver_services.append('ovs-volumedriver_{0}'.format(sd.vpool.name))
                for sd in storagerouter.storagedrivers:
                    storagedriver_services.append('ovs-dtl_{0}'.format(sd.vpool.name))
                    if len(sd.vdisks_guids) > 0:
                        storagedriver_downtime.append(['voldrv', sd.vpool.name])

                # Retrieve the actual update information
                for package_name, package_info in storagerouter.package_information[key].iteritems():
                    if package_name not in UpdateController.all_core_packages:
                        continue  # Only gather information for the core packages

                    information[key]['services_post_update'].update(package_info.pop('services_to_restart'))
                    if package_name not in information[key]['packages']:
                        information[key]['packages'][package_name] = {}
                    information[key]['packages'][package_name].update(package_info)

                    if package_name == 'openvstorage':
                        if ['gui', None] not in information[key]['downtime']:
                            information[key]['downtime'].append(['gui', None])
                        if ['api', None] not in information[key]['downtime']:
                            information[key]['downtime'].append(['api', None])
                        information[key]['services_stop_start'].update({'watcher-framework', 'memcached'})
                    elif package_name == 'alba':
                        for down in alba_downtime:
                            if down not in information[key]['downtime']:
                                information[key]['downtime'].append(down)
                        information[key]['services_post_update'].update(alba_services)
                    elif package_name == 'volumedriver-no-dedup-base':
                        for down in storagedriver_downtime:
                            if down not in information[key]['downtime']:
                                information[key]['downtime'].append(down)
                        information[key]['services_post_update'].update(storagedriver_services)
                    elif package_name == 'volumedriver-no-dedup-server':
                        for down in storagedriver_downtime:
                            if down not in information[key]['downtime']:
                                information[key]['downtime'].append(down)
                        information[key]['services_post_update'].update(storagedriver_services)
                    elif package_name == 'arakoon':
                        if key == 'framework':
                            framework_arakoons = set()
                            if arakoon_ovs_info['internal'] is True:
                                framework_arakoons.add('ovs-arakoon-{0}'.format(arakoon_ovs_info['name']))
                            if arakoon_cacc_info['internal'] is True:
                                framework_arakoons.add('ovs-arakoon-{0}'.format(arakoon_cacc_info['name']))

                            information[key]['services_post_update'].update(framework_arakoons)
                            if arakoon_ovs_info['down'] is True and ['ovsdb', None] not in information[key]['downtime']:
                                information[key]['downtime'].append(['ovsdb', None])
                        elif arakoon_voldrv_info['internal'] is True:
                            information[key]['services_post_update'].update({'ovs-arakoon-{0}'.format(arakoon_voldrv_info['name'])})
                            if arakoon_voldrv_info['down'] is True and ['voldrv', None] not in information[key]['downtime']:
                                information[key]['downtime'].append(['voldrv', None])
        return information
コード例 #15
0
    def get_update_information_alba_plugin(information):
        """
        Called when the 'Update' button in the GUI is pressed
        This call collects additional information about the packages which can be updated
        Eg:
            * Downtime for Arakoons
            * Downtime for StorageDrivers
            * Prerequisites that haven't been met
            * Services which will be stopped during update
            * Services which will be restarted after update
        """
        # Verify arakoon info
        arakoon_ovs_info = {'down': False,
                            'name': None,
                            'internal': False}
        arakoon_cacc_info = {'down': False,
                             'name': None,
                             'internal': False}
        for cluster in ['cacc', 'ovsdb']:
            cluster_name = ArakoonClusterConfig.get_cluster_name(cluster)
            if cluster_name is None:
                continue

            if cluster == 'cacc':
                arakoon_metadata = ArakoonInstaller.get_arakoon_metadata_by_cluster_name(cluster_name=cluster_name, filesystem=True, ip=System.get_my_storagerouter().ip)
            else:
                arakoon_metadata = ArakoonInstaller.get_arakoon_metadata_by_cluster_name(cluster_name=cluster_name)

            if arakoon_metadata['internal'] is True:
                config = ArakoonClusterConfig(cluster_id=cluster_name, filesystem=(cluster == 'cacc'))
                config.load_config(System.get_my_storagerouter().ip if cluster == 'cacc' else None)
                if cluster == 'ovsdb':
                    arakoon_ovs_info['down'] = len(config.nodes) < 3
                    arakoon_ovs_info['name'] = arakoon_metadata['cluster_name']
                    arakoon_ovs_info['internal'] = True
                else:
                    arakoon_cacc_info['name'] = arakoon_metadata['cluster_name']
                    arakoon_cacc_info['internal'] = True

        # Verify StorageRouter downtime
        fwk_prerequisites = []
        all_storagerouters = StorageRouterList.get_storagerouters()
        for storagerouter in all_storagerouters:
            try:
                SSHClient(endpoint=storagerouter, username='******')
            except UnableToConnectException:
                fwk_prerequisites.append(['node_down', storagerouter.name])

        # Verify ALBA node responsiveness
        alba_prerequisites = []
        for alba_node in AlbaNodeList.get_albanodes():
            try:
                alba_node.client.get_metadata()
            except Exception:
                alba_prerequisites.append(['alba_node_unresponsive', alba_node.ip])

        for key in ['framework', 'alba']:
            if key not in information:
                information[key] = {'packages': {},
                                    'downtime': [],
                                    'prerequisites': fwk_prerequisites if key == 'framework' else alba_prerequisites,
                                    'services_stop_start': set(),
                                    'services_post_update': set()}

            for storagerouter in StorageRouterList.get_storagerouters():
                if key not in storagerouter.package_information:
                    continue

                # Retrieve Arakoon issues
                arakoon_downtime = []
                arakoon_services = []
                for service in storagerouter.services:
                    if service.type.name not in [ServiceType.SERVICE_TYPES.ALBA_MGR, ServiceType.SERVICE_TYPES.NS_MGR]:
                        continue

                    if service.type.name == ServiceType.SERVICE_TYPES.ALBA_MGR:
                        cluster_name = AlbaController.get_abm_cluster_name(alba_backend=service.abm_service.alba_backend)
                    else:
                        cluster_name = AlbaController.get_nsm_cluster_name(alba_backend=service.nsm_service.alba_backend, number=service.nsm_service.number)
                    if Configuration.exists('/ovs/arakoon/{0}/config'.format(cluster_name), raw=True) is False:
                        continue
                    arakoon_metadata = ArakoonInstaller.get_arakoon_metadata_by_cluster_name(cluster_name=cluster_name)
                    if arakoon_metadata['internal'] is True:
                        arakoon_services.append('ovs-{0}'.format(service.name))
                        config = ArakoonClusterConfig(cluster_id=cluster_name, filesystem=False)
                        config.load_config()
                        if len(config.nodes) < 3:
                            if service.type.name == ServiceType.SERVICE_TYPES.NS_MGR:
                                arakoon_downtime.append(['backend', service.nsm_service.alba_backend.name])
                            else:
                                arakoon_downtime.append(['backend', service.abm_service.alba_backend.name])

                for package_name, package_info in storagerouter.package_information[key].iteritems():
                    if package_name not in AlbaUpdateController.alba_plugin_packages:
                        continue  # Only gather information for the core packages

                    information[key]['services_post_update'].update(package_info.pop('services_to_restart'))
                    if package_name not in information[key]['packages']:
                        information[key]['packages'][package_name] = {}
                    information[key]['packages'][package_name].update(package_info)

                    if package_name == 'openvstorage-backend':
                        if ['gui', None] not in information[key]['downtime']:
                            information[key]['downtime'].append(['gui', None])
                        if ['api', None] not in information[key]['downtime']:
                            information[key]['downtime'].append(['api', None])
                        information[key]['services_stop_start'].update({'watcher-framework', 'memcached'})
                    elif package_name == 'alba':
                        for down in arakoon_downtime:
                            if down not in information[key]['downtime']:
                                information[key]['downtime'].append(down)
                        information[key]['services_post_update'].update(arakoon_services)
                    elif package_name == 'arakoon':
                        if key == 'framework':
                            framework_arakoons = set()
                            if arakoon_ovs_info['internal'] is True:
                                framework_arakoons.add('ovs-arakoon-{0}'.format(arakoon_ovs_info['name']))
                            if arakoon_cacc_info['internal'] is True:
                                framework_arakoons.add('ovs-arakoon-{0}'.format(arakoon_cacc_info['name']))

                            information[key]['services_post_update'].update(framework_arakoons)
                            if arakoon_ovs_info['down'] is True and ['ovsdb', None] not in information[key]['downtime']:
                                information[key]['downtime'].append(['ovsdb', None])
                        else:
                            for down in arakoon_downtime:
                                if down not in information[key]['downtime']:
                                    information[key]['downtime'].append(down)
                            information[key]['services_post_update'].update(arakoon_services)

            for alba_node in AlbaNodeList.get_albanodes():
                for package_name, package_info in alba_node.package_information.get(key, {}).iteritems():
                    if package_name not in AlbaUpdateController.sdm_packages:
                        continue  # Only gather information for the SDM packages

                    information[key]['services_post_update'].update(package_info.pop('services_to_restart'))
                    if package_name not in information[key]['packages']:
                        information[key]['packages'][package_name] = {}
                    information[key]['packages'][package_name].update(package_info)
        return information
コード例 #16
0
ファイル: watcher.py プロジェクト: grimpy/openvstorage
    def services_running(self, target):
        """
        Check all services are running
        :param target: Target to check
        :return: Boolean
        """
        try:
            key = 'ovs-watcher-{0}'.format(str(uuid.uuid4()))
            value = str(time.time())

            if target == 'config':
                self.log_message(target, 'Testing configuration store...', 0)
                from ovs.extensions.generic.configuration import Configuration
                try:
                    Configuration.list('/')
                except Exception as ex:
                    self.log_message(target, '  Error during configuration store test: {0}'.format(ex), 2)
                    return False
                if Configuration.get_store() == 'arakoon':
                    from ovs.extensions.db.arakoon.configuration import ArakoonConfiguration
                    from ovs.extensions.db.arakoon.ArakoonInstaller import ArakoonInstaller, ArakoonClusterConfig
                    from ovs.extensions.db.arakoon.pyrakoon.pyrakoon.compat import NoGuarantee
                    with open(ArakoonConfiguration.CACC_LOCATION) as config_file:
                        contents = config_file.read()
                    config = ArakoonClusterConfig(cluster_id='cacc', filesystem=True)
                    config.read_config(contents)
                    client = ArakoonInstaller.build_client(config)
                    contents = client.get(ArakoonInstaller.INTERNAL_CONFIG_KEY, consistency=NoGuarantee())
                    if Watcher.LOG_CONTENTS != contents:
                        try:
                            config.read_config(contents)  # Validate whether the contents are not corrupt
                        except Exception as ex:
                            self.log_message(target, '  Configuration stored in configuration store seems to be corrupt: {0}'.format(ex), 2)
                            return False
                        temp_filename = '{0}~'.format(ArakoonConfiguration.CACC_LOCATION)
                        with open(temp_filename, 'w') as config_file:
                            config_file.write(contents)
                            config_file.flush()
                            os.fsync(config_file)
                        os.rename(temp_filename, ArakoonConfiguration.CACC_LOCATION)
                        Watcher.LOG_CONTENTS = contents
                self.log_message(target, '  Configuration store OK', 0)
                return True

            if target == 'framework':
                # Volatile
                self.log_message(target, 'Testing volatile store...', 0)
                max_tries = 5
                tries = 0
                while tries < max_tries:
                    try:
                        try:
                            logging.disable(logging.WARNING)
                            from ovs.extensions.storage.volatilefactory import VolatileFactory
                            VolatileFactory.store = None
                            volatile = VolatileFactory.get_client()
                            volatile.set(key, value)
                            if volatile.get(key) == value:
                                volatile.delete(key)
                                break
                            volatile.delete(key)
                        finally:
                            logging.disable(logging.NOTSET)
                    except Exception as message:
                        self.log_message(target, '  Error during volatile store test: {0}'.format(message), 2)
                    key = 'ovs-watcher-{0}'.format(str(uuid.uuid4()))  # Get another key
                    time.sleep(1)
                    tries += 1
                if tries == max_tries:
                    self.log_message(target, '  Volatile store not working correctly', 2)
                    return False
                self.log_message(target, '  Volatile store OK after {0} tries'.format(tries), 0)

                # Persistent
                self.log_message(target, 'Testing persistent store...', 0)
                max_tries = 5
                tries = 0
                while tries < max_tries:
                    try:
                        try:
                            logging.disable(logging.WARNING)
                            persistent = PersistentFactory.get_client()
                            persistent.set(key, value)
                            if persistent.get(key) == value:
                                persistent.delete(key)
                                break
                            persistent.delete(key)
                        finally:
                            logging.disable(logging.NOTSET)
                    except Exception as message:
                        self.log_message(target, '  Error during persistent store test: {0}'.format(message), 2)
                    key = 'ovs-watcher-{0}'.format(str(uuid.uuid4()))  # Get another key
                    time.sleep(1)
                    tries += 1
                if tries == max_tries:
                    self.log_message(target, '  Persistent store not working correctly', 2)
                    return False
                self.log_message(target, '  Persistent store OK after {0} tries'.format(tries), 0)

            if target == 'volumedriver':
                # Arakoon, voldrv cluster
                self.log_message(target, 'Testing arakoon (voldrv)...', 0)
                max_tries = 5
                tries = 0
                while tries < max_tries:
                    try:
                        from ovs.extensions.generic.configuration import Configuration
                        from ovs.extensions.storage.persistent.pyrakoonstore import PyrakoonStore
                        cluster_name = str(Configuration.get('/ovs/framework/arakoon_clusters|voldrv'))
                        client = PyrakoonStore(cluster=cluster_name)
                        client.set(key, value)
                        if client.get(key) == value:
                            client.delete(key)
                            break
                        client.delete(key)
                    except Exception as message:
                        self.log_message(target, '  Error during arakoon (voldrv) test: {0}'.format(message), 2)
                    key = 'ovs-watcher-{0}'.format(str(uuid.uuid4()))  # Get another key
                    time.sleep(1)
                    tries += 1
                if tries == max_tries:
                    self.log_message(target, '  Arakoon (voldrv) not working correctly', 2)
                    return False
                self.log_message(target, '  Arakoon (voldrv) OK', 0)

            if target in ['framework', 'volumedriver']:
                # RabbitMQ
                self.log_message(target, 'Test rabbitMQ...', 0)
                import pika
                from ovs.extensions.generic.configuration import Configuration
                messagequeue = Configuration.get('/ovs/framework/messagequeue')
                rmq_servers = messagequeue['endpoints']
                good_node = False
                for server in rmq_servers:
                    try:
                        connection_string = '{0}://{1}:{2}@{3}/%2F'.format(messagequeue['protocol'],
                                                                           messagequeue['user'],
                                                                           messagequeue['password'],
                                                                           server)
                        connection = pika.BlockingConnection(pika.URLParameters(connection_string))
                        channel = connection.channel()
                        channel.basic_publish('', 'ovs-watcher', str(time.time()),
                                              pika.BasicProperties(content_type='text/plain', delivery_mode=1))
                        connection.close()
                        good_node = True
                    except Exception as message:
                        self.log_message(target, '  Error during rabbitMQ test on node {0}: {1}'.format(server, message), 2)
                if good_node is False:
                    self.log_message(target, '  No working rabbitMQ node could be found', 2)
                    return False
                self.log_message(target, '  RabbitMQ test OK', 0)
                self.log_message(target, 'All tests OK', 0)
                return True
        except Exception as ex:
            self.log_message(target, 'Unexpected exception: {0}'.format(ex), 2)
            return False
コード例 #17
0
    def test_cluster_maintenance(self):
        """
        Validates whether a cluster can be correctly created
        """
        Configuration.set('/ovs/framework/hosts/1/ports',
                          {'arakoon': [10000, 10100]})
        Configuration.set('/ovs/framework/hosts/2/ports',
                          {'arakoon': [20000, 20100]})

        structure = Helper.build_service_structure({'storagerouters': [1, 2]})
        storagerouters = structure['storagerouters']
        System._machine_id = {
            storagerouters[1].ip: '1',
            storagerouters[2].ip: '2'
        }

        # Create new cluster
        mountpoint = storagerouters[1].disks[0].partitions[0].mountpoint
        if os.path.exists(mountpoint) and mountpoint != '/':
            shutil.rmtree(mountpoint)
        base_dir = mountpoint + '/test_create_cluster'
        info = ArakoonInstaller.create_cluster(
            'test', ServiceType.ARAKOON_CLUSTER_TYPES.FWK,
            storagerouters[1].ip, base_dir)

        reality = Helper.extract_dir_structure(base_dir)
        expected = {
            'dirs': {
                'arakoon': {
                    'dirs': {
                        'test': {
                            'dirs': {
                                'tlogs': {
                                    'dirs': {},
                                    'files': []
                                },
                                'db': {
                                    'dirs': {},
                                    'files': []
                                }
                            },
                            'files': []
                        }
                    },
                    'files': []
                }
            },
            'files': []
        }
        self.assertDictEqual(reality, expected)
        expected = '{0}\n\n{1}\n\n'.format(
            ArakoonInstallerTester.EXPECTED_CLUSTER_CONFIG.format(
                '1', 'test', ''),
            ArakoonInstallerTester.EXPECTED_NODE_CONFIG.format(
                '1', storagerouters[1].ip, 10000, base_dir, '1', 10001))
        self.assertEqual(
            Configuration.get(ArakoonInstaller.CONFIG_KEY.format('test'),
                              raw=True), expected)
        # @TODO: assert service availability here. It should be stopped

        ArakoonInstaller.start_cluster('test',
                                       storagerouters[1].ip,
                                       filesystem=False)
        # @TODO: assert the service is running

        config = ArakoonClusterConfig('test', filesystem=False)
        config.load_config(storagerouters[1].ip)
        client = ArakoonInstaller.build_client(config)
        reality = client.get(ArakoonInstaller.INTERNAL_CONFIG_KEY)
        self.assertEqual(reality, expected)
        self.assertFalse(client.exists(ArakoonInstaller.METADATA_KEY))

        ArakoonInstaller.claim_cluster('test',
                                       storagerouters[1].ip,
                                       filesystem=False,
                                       metadata=info['metadata'])

        reality = json.loads(client.get(ArakoonInstaller.METADATA_KEY))
        expected = {
            'cluster_name': 'test',
            'cluster_type': 'FWK',
            'in_use': True,
            'internal': True
        }
        self.assertDictEqual(reality, expected)

        # Extending cluster
        mountpoint = storagerouters[2].disks[0].partitions[0].mountpoint
        if os.path.exists(mountpoint) and mountpoint != '/':
            shutil.rmtree(mountpoint)
        base_dir2 = mountpoint + '/test_extend_cluster'
        ArakoonInstaller.extend_cluster(storagerouters[1].ip,
                                        storagerouters[2].ip, 'test',
                                        base_dir2)
        reality = Helper.extract_dir_structure(base_dir)
        expected = {
            'dirs': {
                'arakoon': {
                    'dirs': {
                        'test': {
                            'dirs': {
                                'tlogs': {
                                    'dirs': {},
                                    'files': []
                                },
                                'db': {
                                    'dirs': {},
                                    'files': []
                                }
                            },
                            'files': []
                        }
                    },
                    'files': []
                }
            },
            'files': []
        }
        self.assertDictEqual(reality, expected)
        expected = '{0}\n\n{1}\n\n{2}\n\n'.format(
            ArakoonInstallerTester.EXPECTED_CLUSTER_CONFIG.format(
                '1,2', 'test', ''),
            ArakoonInstallerTester.EXPECTED_NODE_CONFIG.format(
                '1', storagerouters[1].ip, 10000, base_dir, '1', 10001),
            ArakoonInstallerTester.EXPECTED_NODE_CONFIG.format(
                '2', storagerouters[2].ip, 20000, base_dir2, '2', 20001))
        self.assertEqual(
            Configuration.get(ArakoonInstaller.CONFIG_KEY.format('test'),
                              raw=True), expected)
        # @TODO: assert service availability here. It should be stopped

        catchup_command = 'arakoon --node 2 -config file://opt/OpenvStorage/config/framework.json?key=/ovs/arakoon/test/config -catchup-only'
        SSHClient._run_returns[catchup_command] = None
        SSHClient._run_recordings = []
        ArakoonInstaller.restart_cluster_add('test', [storagerouters[1].ip],
                                             storagerouters[2].ip,
                                             filesystem=False)
        self.assertIn(catchup_command, SSHClient._run_recordings)
        # @TODO: assert the service is running

        config = ArakoonClusterConfig('test', filesystem=False)
        config.load_config(storagerouters[2].ip)
        client = ArakoonInstaller.build_client(config)
        reality = client.get(ArakoonInstaller.INTERNAL_CONFIG_KEY)
        self.assertEqual(reality, expected)

        reality = json.loads(client.get(ArakoonInstaller.METADATA_KEY))
        expected = {
            'cluster_name': 'test',
            'cluster_type': 'FWK',
            'in_use': True,
            'internal': True
        }
        self.assertDictEqual(reality, expected)

        # Shrinking cluster
        ArakoonInstaller.shrink_cluster(storagerouters[1].ip,
                                        storagerouters[2].ip, 'test')
        reality = Helper.extract_dir_structure(base_dir)
        expected = {
            'dirs': {
                'arakoon': {
                    'dirs': {
                        'test': {
                            'dirs': {},
                            'files': []
                        }
                    },
                    'files': []
                }
            },
            'files': []
        }
        self.assertDictEqual(reality, expected)
        expected = '{0}\n\n{1}\n\n'.format(
            ArakoonInstallerTester.EXPECTED_CLUSTER_CONFIG.format(
                '2', 'test', ''),
            ArakoonInstallerTester.EXPECTED_NODE_CONFIG.format(
                '2', storagerouters[2].ip, 20000, base_dir2, '2', 20001))
        self.assertEqual(
            Configuration.get(ArakoonInstaller.CONFIG_KEY.format('test'),
                              raw=True), expected)
        # @TODO: assert service availability here. It should be stopped

        ArakoonInstaller.restart_cluster_remove('test', [storagerouters[2].ip],
                                                filesystem=False)
        # @TODO: assert the service is running

        config = ArakoonClusterConfig('test', filesystem=False)
        config.load_config(storagerouters[2].ip)
        client = ArakoonInstaller.build_client(config)
        reality = client.get(ArakoonInstaller.INTERNAL_CONFIG_KEY)
        self.assertEqual(reality, expected)

        reality = json.loads(client.get(ArakoonInstaller.METADATA_KEY))
        expected = {
            'cluster_name': 'test',
            'cluster_type': 'FWK',
            'in_use': True,
            'internal': True
        }
        self.assertDictEqual(reality, expected)
コード例 #18
0
ファイル: nodetype.py プロジェクト: openvstorage/framework
    def promote_node(cluster_ip, master_ip, ip_client_map, unique_id, configure_memcached, configure_rabbitmq):
        """
        Promotes a given node
        """
        from ovs.dal.lists.storagerouterlist import StorageRouterList
        from ovs.dal.lists.servicetypelist import ServiceTypeList
        from ovs.dal.lists.servicelist import ServiceList
        from ovs.dal.hybrids.service import Service

        Toolbox.log(logger=NodeTypeController._logger, messages='Promoting node', title=True)
        if configure_memcached is True:
            if NodeTypeController._validate_local_memcache_servers(ip_client_map) is False:
                raise RuntimeError('Not all memcache nodes can be reached which is required for promoting a node.')

        target_client = ip_client_map[cluster_ip]
        machine_id = System.get_my_machine_id(target_client)
        node_name, _ = target_client.get_hostname()
        master_client = ip_client_map[master_ip]

        storagerouter = StorageRouterList.get_by_machine_id(unique_id)
        storagerouter.node_type = 'MASTER'
        storagerouter.save()

        external_config = Configuration.get('/ovs/framework/external_config')
        if external_config is None:
            config_store = Configuration.get_store()
            if config_store == 'arakoon':
                Toolbox.log(logger=NodeTypeController._logger, messages='Joining Arakoon configuration cluster')
                metadata = ArakoonInstaller.extend_cluster(master_ip=master_ip,
                                                           new_ip=cluster_ip,
                                                           cluster_name='config',
                                                           base_dir=Configuration.get('/ovs/framework/paths|ovsdb'),
                                                           ports=[26400, 26401],
                                                           filesystem=True)
                ArakoonInstaller.restart_cluster_add(cluster_name='config',
                                                     current_ips=metadata['ips'],
                                                     new_ip=cluster_ip,
                                                     filesystem=True)
                ServiceManager.register_service(node_name=machine_id,
                                                service_metadata=metadata['service_metadata'])
            else:
                from ovs.extensions.db.etcd.installer import EtcdInstaller
                Toolbox.log(logger=NodeTypeController._logger, messages='Joining Etcd cluster')
                EtcdInstaller.extend_cluster(master_ip, cluster_ip, 'config')

        # Find other (arakoon) master nodes
        arakoon_cluster_name = str(Configuration.get('/ovs/framework/arakoon_clusters|ovsdb'))
        arakoon_metadata = ArakoonInstaller.get_arakoon_metadata_by_cluster_name(cluster_name=arakoon_cluster_name)
        config = ArakoonClusterConfig(cluster_id=arakoon_cluster_name, filesystem=False)
        config.load_config()
        master_node_ips = [node.ip for node in config.nodes]
        if cluster_ip in master_node_ips:
            master_node_ips.remove(cluster_ip)
        if len(master_node_ips) == 0:
            raise RuntimeError('There should be at least one other master node')

        arakoon_ports = []
        if arakoon_metadata['internal'] is True:
            Toolbox.log(logger=NodeTypeController._logger, messages='Joining Arakoon OVS DB cluster')
            result = ArakoonInstaller.extend_cluster(master_ip=master_ip,
                                                     new_ip=cluster_ip,
                                                     cluster_name=arakoon_cluster_name,
                                                     base_dir=Configuration.get('/ovs/framework/paths|ovsdb'))
            ArakoonInstaller.restart_cluster_add(cluster_name=arakoon_cluster_name,
                                                 current_ips=result['ips'],
                                                 new_ip=cluster_ip, filesystem=False)
            arakoon_ports = [result['client_port'], result['messaging_port']]

        if configure_memcached is True:
            NodeTypeController.configure_memcached(client=target_client, logger=NodeTypeController._logger)
        NodeTypeController.add_services(client=target_client, node_type='master', logger=NodeTypeController._logger)

        Toolbox.log(logger=NodeTypeController._logger, messages='Update configurations')
        if configure_memcached is True:
            endpoints = Configuration.get('/ovs/framework/memcache|endpoints')
            endpoint = '{0}:11211'.format(cluster_ip)
            if endpoint not in endpoints:
                endpoints.append(endpoint)
                Configuration.set('/ovs/framework/memcache|endpoints', endpoints)
        if configure_rabbitmq is True:
            endpoints = Configuration.get('/ovs/framework/messagequeue|endpoints')
            endpoint = '{0}:5672'.format(cluster_ip)
            if endpoint not in endpoints:
                endpoints.append(endpoint)
                Configuration.set('/ovs/framework/messagequeue|endpoints', endpoints)

        if arakoon_metadata['internal'] is True:
            Toolbox.log(logger=NodeTypeController._logger, messages='Restarting master node services')
            ArakoonInstaller.restart_cluster_add(cluster_name=arakoon_cluster_name,
                                                 current_ips=master_node_ips,
                                                 new_ip=cluster_ip,
                                                 filesystem=False)
            PersistentFactory.store = None
            VolatileFactory.store = None

            if 'arakoon-ovsdb' not in [s.name for s in ServiceList.get_services() if s.is_internal is False or s.storagerouter.ip == cluster_ip]:
                service = Service()
                service.name = 'arakoon-ovsdb'
                service.type = ServiceTypeList.get_by_name(ServiceType.SERVICE_TYPES.ARAKOON)
                service.ports = arakoon_ports
                service.storagerouter = storagerouter
                service.save()

        if configure_rabbitmq is True:
            NodeTypeController.configure_rabbitmq(client=target_client, logger=NodeTypeController._logger)
            # Copy rabbitmq cookie
            rabbitmq_cookie_file = '/var/lib/rabbitmq/.erlang.cookie'

            Toolbox.log(logger=NodeTypeController._logger, messages='Copying Rabbit MQ cookie')
            contents = master_client.file_read(rabbitmq_cookie_file)
            master_hostname, _ = master_client.get_hostname()
            target_client.dir_create(os.path.dirname(rabbitmq_cookie_file))
            target_client.file_write(rabbitmq_cookie_file, contents)
            target_client.file_chmod(rabbitmq_cookie_file, mode=400)
            target_client.run(['rabbitmq-server', '-detached'])
            time.sleep(5)
            target_client.run(['rabbitmqctl', 'stop_app'])
            time.sleep(5)
            target_client.run(['rabbitmqctl', 'join_cluster', 'rabbit@{0}'.format(master_hostname)])
            time.sleep(5)
            target_client.run(['rabbitmqctl', 'stop'])
            time.sleep(5)

            # Enable HA for the rabbitMQ queues
            Toolbox.change_service_state(target_client, 'rabbitmq-server', 'start', NodeTypeController._logger)
            NodeTypeController.check_rabbitmq_and_enable_ha_mode(client=target_client, logger=NodeTypeController._logger)

        NodeTypeController._configure_amqp_to_volumedriver()

        Toolbox.log(logger=NodeTypeController._logger, messages='Starting services')
        services = ['memcached', 'arakoon-ovsdb', 'rabbitmq-server', 'etcd-config']
        if arakoon_metadata['internal'] is True:
            services.remove('arakoon-ovsdb')
        for service in services:
            if ServiceManager.has_service(service, client=target_client):
                Toolbox.change_service_state(target_client, service, 'start', NodeTypeController._logger)

        Toolbox.log(logger=NodeTypeController._logger, messages='Restarting services')
        NodeTypeController.restart_framework_and_memcache_services(clients=ip_client_map, logger=NodeTypeController._logger)

        if Toolbox.run_hooks(component='nodetype',
                             sub_component='promote',
                             logger=NodeTypeController._logger,
                             cluster_ip=cluster_ip,
                             master_ip=master_ip):
            Toolbox.log(logger=NodeTypeController._logger, messages='Restarting services')
            NodeTypeController.restart_framework_and_memcache_services(clients=ip_client_map, logger=NodeTypeController._logger)

        if NodeTypeController.avahi_installed(client=target_client, logger=NodeTypeController._logger) is True:
            NodeTypeController.configure_avahi(client=target_client, node_name=node_name, node_type='master', logger=NodeTypeController._logger)
        Configuration.set('/ovs/framework/hosts/{0}/type'.format(machine_id), 'MASTER')
        target_client.run(['chown', '-R', 'ovs:ovs', '/opt/OpenvStorage/config'])
        Configuration.set('/ovs/framework/hosts/{0}/promotecompleted'.format(machine_id), True)

        if target_client.file_exists('/tmp/ovs_rollback'):
            target_client.file_delete('/tmp/ovs_rollback')

        Toolbox.log(logger=NodeTypeController._logger, messages='Promote complete')
コード例 #19
0
ファイル: nodetype.py プロジェクト: openvstorage/framework
    def promote_or_demote_node(node_action, cluster_ip=None, execute_rollback=False):
        """
        Promotes or demotes the local node
        :param node_action: Demote or promote
        :type node_action: str
        :param cluster_ip: IP of node to promote or demote
        :type cluster_ip: str
        :param execute_rollback: In case of failure revert the changes made
        :type execute_rollback: bool
        :return: None
        """

        if node_action not in ('promote', 'demote'):
            raise ValueError('Nodes can only be promoted or demoted')

        Toolbox.log(logger=NodeTypeController._logger, messages='Open vStorage Setup - {0}'.format(node_action.capitalize()), boxed=True)
        try:
            Toolbox.log(logger=NodeTypeController._logger, messages='Collecting information', title=True)

            machine_id = System.get_my_machine_id()
            if Configuration.get('/ovs/framework/hosts/{0}/setupcompleted'.format(machine_id)) is False:
                raise RuntimeError('No local OVS setup found.')

            if cluster_ip and not re.match(Toolbox.regex_ip, cluster_ip):
                raise RuntimeError('Incorrect IP provided ({0})'.format(cluster_ip))

            if cluster_ip:
                client = SSHClient(endpoint=cluster_ip)
                machine_id = System.get_my_machine_id(client)

            node_type = Configuration.get('/ovs/framework/hosts/{0}/type'.format(machine_id))
            if node_action == 'promote' and node_type == 'MASTER':
                raise RuntimeError('This node is already master.')
            elif node_action == 'demote' and node_type == 'EXTRA':
                raise RuntimeError('This node should be a master.')
            elif node_type not in ['MASTER', 'EXTRA']:
                raise RuntimeError('This node is not correctly configured.')

            master_ip = None
            offline_nodes = []

            online = True
            target_client = None
            if node_action == 'demote' and cluster_ip:  # Demote an offline node
                from ovs.dal.lists.storagerouterlist import StorageRouterList
                from ovs.lib.storagedriver import StorageDriverController

                ip = cluster_ip
                unique_id = None
                ip_client_map = {}
                for storage_router in StorageRouterList.get_storagerouters():
                    try:
                        client = SSHClient(storage_router.ip, username='******')
                        if storage_router.node_type == 'MASTER':
                            master_ip = storage_router.ip
                        ip_client_map[storage_router.ip] = client
                    except UnableToConnectException:
                        if storage_router.ip == cluster_ip:
                            online = False
                            unique_id = storage_router.machine_id
                            StorageDriverController.mark_offline(storagerouter_guid=storage_router.guid)
                        offline_nodes.append(storage_router)
                if online is True:
                    raise RuntimeError("If the node is online, please use 'ovs setup demote' executed on the node you wish to demote")
                if master_ip is None:
                    raise RuntimeError('Failed to retrieve another responsive MASTER node')

            else:
                target_password = Toolbox.ask_validate_password(ip='127.0.0.1', logger=NodeTypeController._logger)
                target_client = SSHClient('127.0.0.1', username='******', password=target_password)

                unique_id = System.get_my_machine_id(target_client)
                ip = Configuration.get('/ovs/framework/hosts/{0}/ip'.format(unique_id))

                storagerouter_info = NodeTypeController.retrieve_storagerouter_info_via_host(ip=target_client.ip, password=target_password)
                node_ips = [sr_info['ip'] for sr_info in storagerouter_info.itervalues()]
                master_node_ips = [sr_info['ip'] for sr_info in storagerouter_info.itervalues() if sr_info['type'] == 'master' and sr_info['ip'] != ip]
                if len(master_node_ips) == 0:
                    if node_action == 'promote':
                        raise RuntimeError('No master node could be found')
                    else:
                        raise RuntimeError('It is not possible to remove the only master')

                master_ip = master_node_ips[0]
                ip_client_map = dict((node_ip, SSHClient(node_ip, username='******')) for node_ip in node_ips)

            if node_action == 'demote':
                for cluster_name in Configuration.list('/ovs/arakoon'):
                    config = ArakoonClusterConfig(cluster_name, False)
                    config.load_config()
                    arakoon_client = ArakoonInstaller.build_client(config)
                    metadata = json.loads(arakoon_client.get(ArakoonInstaller.METADATA_KEY))
                    if len(config.nodes) == 1 and config.nodes[0].ip == ip and metadata.get('internal') is True:
                        raise RuntimeError('Demote is not supported when single node Arakoon cluster(s) are present on the node to be demoted.')

            configure_rabbitmq = Toolbox.is_service_internally_managed(service='rabbitmq')
            configure_memcached = Toolbox.is_service_internally_managed(service='memcached')
            if node_action == 'promote':
                try:
                    NodeTypeController.promote_node(cluster_ip=ip,
                                                    master_ip=master_ip,
                                                    ip_client_map=ip_client_map,
                                                    unique_id=unique_id,
                                                    configure_memcached=configure_memcached,
                                                    configure_rabbitmq=configure_rabbitmq)
                except Exception:
                    if execute_rollback is True:
                        NodeTypeController.demote_node(cluster_ip=ip,
                                                       master_ip=master_ip,
                                                       ip_client_map=ip_client_map,
                                                       unique_id=unique_id,
                                                       unconfigure_memcached=configure_memcached,
                                                       unconfigure_rabbitmq=configure_rabbitmq,
                                                       offline_nodes=offline_nodes)
                    elif target_client is not None:
                        target_client.file_write('/tmp/ovs_rollback', 'demote')
                    raise
            else:
                try:
                    NodeTypeController.demote_node(cluster_ip=ip,
                                                   master_ip=master_ip,
                                                   ip_client_map=ip_client_map,
                                                   unique_id=unique_id,
                                                   unconfigure_memcached=configure_memcached,
                                                   unconfigure_rabbitmq=configure_rabbitmq,
                                                   offline_nodes=offline_nodes)
                except Exception:
                    if execute_rollback is True:
                        NodeTypeController.promote_node(cluster_ip=ip,
                                                        master_ip=master_ip,
                                                        ip_client_map=ip_client_map,
                                                        unique_id=unique_id,
                                                        configure_memcached=configure_memcached,
                                                        configure_rabbitmq=configure_rabbitmq)
                    elif target_client is not None:
                        target_client.file_write('/tmp/ovs_rollback', 'promote')
                    raise

            Toolbox.log(logger=NodeTypeController._logger, messages='\n')
            Toolbox.log(logger=NodeTypeController._logger, messages='{0} complete.'.format(node_action.capitalize()), boxed=True)
        except Exception as exception:
            Toolbox.log(logger=NodeTypeController._logger, messages='\n')
            Toolbox.log(logger=NodeTypeController._logger, messages=['An unexpected error occurred:', str(exception)], boxed=True, loglevel='exception')
            sys.exit(1)
        except KeyboardInterrupt:
            Toolbox.log(logger=NodeTypeController._logger, messages='\n')
            Toolbox.log(logger=NodeTypeController._logger,
                        messages='This setup was aborted. Open vStorage may be in an inconsistent state, make sure to validate the installation.',
                        boxed=True,
                        loglevel='error')
            sys.exit(1)
コード例 #20
0
ファイル: scheduledtask.py プロジェクト: winglq/framework
    def collapse_arakoon():
        """
        Collapse Arakoon's Tlogs
        :return: None
        """
        ScheduledTaskController._logger.info('Starting arakoon collapse')
        storagerouters = StorageRouterList.get_storagerouters()
        cluster_info = [('cacc', storagerouters[0], True)]
        cluster_names = []
        for service in ServiceList.get_services():
            if service.is_internal is True and service.type.name in (
                    ServiceType.SERVICE_TYPES.ARAKOON,
                    ServiceType.SERVICE_TYPES.NS_MGR,
                    ServiceType.SERVICE_TYPES.ALBA_MGR):
                cluster = service.name.replace('arakoon-', '')
                if cluster in cluster_names:
                    continue
                cluster_names.append(cluster)
                cluster_info.append((cluster, service.storagerouter, False))
        workload = {}
        for cluster, storagerouter, filesystem in cluster_info:
            ScheduledTaskController._logger.debug(
                '  Collecting info for cluster {0}'.format(cluster))
            config = ArakoonClusterConfig(cluster, filesystem=filesystem)
            config.load_config(storagerouter.ip)
            for node in config.nodes:
                if node.ip not in workload:
                    workload[node.ip] = {'node_id': node.name, 'clusters': []}
                workload[node.ip]['clusters'].append((cluster, filesystem))
        for storagerouter in storagerouters:
            try:
                if storagerouter.ip not in workload:
                    continue
                node_workload = workload[storagerouter.ip]
                client = SSHClient(storagerouter)
                for cluster, filesystem in node_workload['clusters']:
                    try:
                        ScheduledTaskController._logger.debug(
                            '  Collapsing cluster {0} on {1}'.format(
                                cluster, storagerouter.ip))
                        if filesystem is True:
                            config_path = ArakoonClusterConfig.CONFIG_FILE.format(
                                cluster)
                        else:
                            config_path = Configuration.get_configuration_path(
                                ArakoonClusterConfig.CONFIG_KEY.format(
                                    cluster))
                        client.run([
                            'arakoon', '--collapse-local',
                            node_workload['node_id'], '2', '-config',
                            config_path
                        ])
                        ScheduledTaskController._logger.info(
                            '  Collapsing cluster {0} on {1} completed'.format(
                                cluster, storagerouter.ip))
                    except:
                        ScheduledTaskController._logger.exception(
                            '  Collapsing cluster {0} on {1} failed'.format(
                                cluster, storagerouter.ip))
            except UnableToConnectException:
                ScheduledTaskController._logger.error(
                    '  Could not collapse any cluster on {0} (not reachable)'.
                    format(storagerouter.name))

        ScheduledTaskController._logger.info('Arakoon collapse finished')