def _get_arakoon_clusters(cls, result_handler):
     """
     Retrieves all Arakoon clusters registered in this OVSCluster
     :param result_handler: Logging object
     :type result_handler: ovs.extensions.healthcheck.result.HCResults
     :return: Dict with the Arakoon cluster types as key and list with dicts which contain cluster names and pyrakoon clients
     :rtype: dict(str, list[dict])
     """
     result_handler.info('Fetching available arakoon clusters.', add_to_result=False)
     arakoon_clusters = {}
     for cluster_name in list(Configuration.list('/ovs/arakoon')) + ['cacc']:
         # Determine Arakoon type
         is_cacc = cluster_name == 'cacc'
         arakoon_config = ArakoonClusterConfig(cluster_id=cluster_name, load_config=not is_cacc)
         if is_cacc is True:
             with open(Configuration.CACC_LOCATION) as config_file:
                 contents = config_file.read()
             arakoon_config.read_config(contents=contents)
         try:
             arakoon_client = ArakoonInstaller.build_client(arakoon_config)
         except (ArakoonNoMaster, ArakoonNoMasterResult) as ex:
             result_handler.failure('Unable to find a master for Arakoon cluster {0}. (Message: {1})'.format(cluster_name, str(ex)),
                                    code=ErrorCodes.master_none)
         except Exception as ex:
             msg = 'Unable to connect to Arakoon cluster {0}. (Message: {1})'.format(cluster_name, str(ex))
             result_handler.exception(msg, code=ErrorCodes.unhandled_exception)
             cls.logger.exception(msg)
             continue
         metadata = json.loads(arakoon_client.get(ArakoonInstaller.METADATA_KEY))
         cluster_type = metadata['cluster_type']
         if cluster_type not in arakoon_clusters:
             arakoon_clusters[cluster_type] = []
         arakoon_clusters[cluster_type].append({'cluster_name': cluster_name, 'client': arakoon_client, 'config': arakoon_config})
     return arakoon_clusters
Ejemplo n.º 2
0
 def _get_arakoon_clusters(cls, result_handler):
     """
     Retrieves all Arakoon clusters registered in this OVSCluster
     :param result_handler: Logging object
     :type result_handler: ovs.extensions.healthcheck.result.HCResults
     :return: Dict with the Arakoon cluster types as key and list with dicts which contain cluster names and pyrakoon clients
     :rtype: dict(str, list[dict])
     """
     result_handler.info('Fetching available arakoon clusters.',
                         add_to_result=False)
     arakoon_clusters = {}
     for cluster_name in list(
             Configuration.list('/ovs/arakoon')) + ['cacc']:
         # Determine Arakoon type
         is_cacc = cluster_name == 'cacc'
         arakoon_config = ArakoonClusterConfig(cluster_id=cluster_name,
                                               load_config=not is_cacc)
         if is_cacc is True:
             with open(Configuration.CACC_LOCATION) as config_file:
                 contents = config_file.read()
             arakoon_config.read_config(contents=contents)
         try:
             arakoon_client = ArakoonInstaller.build_client(arakoon_config)
         except (ArakoonNoMaster, ArakoonNoMasterResult) as ex:
             result_handler.failure(
                 'Unable to find a master for Arakoon cluster {0}. (Message: {1})'
                 .format(cluster_name, str(ex)),
                 code=ErrorCodes.master_none)
         except Exception as ex:
             msg = 'Unable to connect to Arakoon cluster {0}. (Message: {1})'.format(
                 cluster_name, str(ex))
             result_handler.exception(msg,
                                      code=ErrorCodes.unhandled_exception)
             cls.logger.exception(msg)
             continue
         metadata = json.loads(
             arakoon_client.get(ArakoonInstaller.METADATA_KEY))
         cluster_type = metadata['cluster_type']
         if cluster_type not in arakoon_clusters:
             arakoon_clusters[cluster_type] = []
         arakoon_clusters[cluster_type].append({
             'cluster_name': cluster_name,
             'client': arakoon_client,
             'config': arakoon_config
         })
     return arakoon_clusters
Ejemplo n.º 3
0
    def services_running(self, target):
        """
        Check all services are running
        :param target: Target to check
        :return: Boolean
        """
        try:
            key = 'ovs-watcher-{0}'.format(str(uuid.uuid4()))
            value = str(time.time())

            if target in ['config', 'framework']:
                self.log_message(target, 'Testing configuration store...', 0)
                from ovs.extensions.generic.configuration import Configuration
                try:
                    Configuration.list('/')
                except Exception as ex:
                    self.log_message(
                        target,
                        '  Error during configuration store test: {0}'.format(
                            ex), 2)
                    return False

                from ovs.extensions.db.arakooninstaller import ArakoonInstaller, ArakoonClusterConfig
                from ovs_extensions.db.arakoon.pyrakoon.pyrakoon.compat import NoGuarantee
                from ovs.extensions.generic.configuration import Configuration
                with open(Configuration.CACC_LOCATION) as config_file:
                    contents = config_file.read()
                config = ArakoonClusterConfig(
                    cluster_id=Configuration.ARAKOON_NAME, load_config=False)
                config.read_config(contents=contents)
                client = ArakoonInstaller.build_client(config)
                contents = client.get(ArakoonInstaller.INTERNAL_CONFIG_KEY,
                                      consistency=NoGuarantee())
                if Watcher.LOG_CONTENTS != contents:
                    try:
                        config.read_config(
                            contents=contents
                        )  # Validate whether the contents are not corrupt
                    except Exception as ex:
                        self.log_message(
                            target,
                            '  Configuration stored in configuration store seems to be corrupt: {0}'
                            .format(ex), 2)
                        return False
                    temp_filename = '{0}~'.format(Configuration.CACC_LOCATION)
                    with open(temp_filename, 'w') as config_file:
                        config_file.write(contents)
                        config_file.flush()
                        os.fsync(config_file)
                    os.rename(temp_filename, Configuration.CACC_LOCATION)
                    Watcher.LOG_CONTENTS = contents
                self.log_message(target, '  Configuration store OK', 0)

            if target == 'framework':
                # Volatile
                self.log_message(target, 'Testing volatile store...', 0)
                max_tries = 5
                tries = 0
                while tries < max_tries:
                    try:
                        try:
                            logging.disable(logging.WARNING)
                            from ovs.extensions.storage.volatilefactory import VolatileFactory
                            VolatileFactory.store = None
                            volatile = VolatileFactory.get_client()
                            volatile.set(key, value)
                            if volatile.get(key) == value:
                                volatile.delete(key)
                                break
                            volatile.delete(key)
                        finally:
                            logging.disable(logging.NOTSET)
                    except Exception as message:
                        self.log_message(
                            target,
                            '  Error during volatile store test: {0}'.format(
                                message), 2)
                    key = 'ovs-watcher-{0}'.format(str(
                        uuid.uuid4()))  # Get another key
                    time.sleep(1)
                    tries += 1
                if tries == max_tries:
                    self.log_message(target,
                                     '  Volatile store not working correctly',
                                     2)
                    return False
                self.log_message(
                    target,
                    '  Volatile store OK after {0} tries'.format(tries), 0)

                # Persistent
                self.log_message(target, 'Testing persistent store...', 0)
                max_tries = 5
                tries = 0
                while tries < max_tries:
                    try:
                        try:
                            logging.disable(logging.WARNING)
                            persistent = PersistentFactory.get_client()
                            persistent.nop()
                            break
                        finally:
                            logging.disable(logging.NOTSET)
                    except Exception as message:
                        self.log_message(
                            target,
                            '  Error during persistent store test: {0}'.format(
                                message), 2)
                    time.sleep(1)
                    tries += 1
                if tries == max_tries:
                    self.log_message(
                        target, '  Persistent store not working correctly', 2)
                    return False
                self.log_message(
                    target,
                    '  Persistent store OK after {0} tries'.format(tries), 0)

            if target == 'volumedriver':
                # Arakoon, voldrv cluster
                self.log_message(target, 'Testing arakoon (voldrv)...', 0)
                max_tries = 5
                tries = 0
                while tries < max_tries:
                    try:
                        from ovs.extensions.generic.configuration import Configuration
                        from ovs_extensions.storage.persistent.pyrakoonstore import PyrakoonStore
                        cluster_name = str(
                            Configuration.get(
                                '/ovs/framework/arakoon_clusters|voldrv'))
                        configuration = Configuration.get(
                            '/ovs/arakoon/{0}/config'.format(cluster_name),
                            raw=True)
                        client = PyrakoonStore(cluster=cluster_name,
                                               configuration=configuration)
                        client.nop()
                        break
                    except Exception as message:
                        self.log_message(
                            target,
                            '  Error during arakoon (voldrv) test: {0}'.format(
                                message), 2)
                    time.sleep(1)
                    tries += 1
                if tries == max_tries:
                    self.log_message(
                        target, '  Arakoon (voldrv) not working correctly', 2)
                    return False
                self.log_message(target, '  Arakoon (voldrv) OK', 0)

            if target in ['framework', 'volumedriver']:
                # RabbitMQ
                self.log_message(target, 'Test rabbitMQ...', 0)
                import pika
                from ovs.extensions.generic.configuration import Configuration
                messagequeue = Configuration.get('/ovs/framework/messagequeue')
                rmq_servers = messagequeue['endpoints']
                good_node = False
                for server in rmq_servers:
                    try:
                        connection_string = '{0}://{1}:{2}@{3}/%2F'.format(
                            messagequeue['protocol'], messagequeue['user'],
                            messagequeue['password'], server)
                        connection = pika.BlockingConnection(
                            pika.URLParameters(connection_string))
                        channel = connection.channel()
                        channel.basic_publish(
                            '', 'ovs-watcher', str(time.time()),
                            pika.BasicProperties(content_type='text/plain',
                                                 delivery_mode=1))
                        connection.close()
                        good_node = True
                    except Exception as message:
                        self.log_message(
                            target,
                            '  Error during rabbitMQ test on node {0}: {1}'.
                            format(server, message), 2)
                if good_node is False:
                    self.log_message(
                        target, '  No working rabbitMQ node could be found', 2)
                    return False
                self.log_message(target, '  RabbitMQ test OK', 0)
                self.log_message(target, 'All tests OK', 0)

            return True
        except Exception as ex:
            self.log_message(target, 'Unexpected exception: {0}'.format(ex), 2)
            return False
Ejemplo n.º 4
0
    def promote_or_demote_node(node_action,
                               cluster_ip=None,
                               execute_rollback=False):
        """
        Promotes or demotes the local node
        :param node_action: Demote or promote
        :type node_action: str
        :param cluster_ip: IP of node to promote or demote
        :type cluster_ip: str
        :param execute_rollback: In case of failure revert the changes made
        :type execute_rollback: bool
        :return: None
        """

        if node_action not in ('promote', 'demote'):
            raise ValueError('Nodes can only be promoted or demoted')

        Toolbox.log(logger=NodeTypeController._logger,
                    messages='Open vStorage Setup - {0}'.format(
                        node_action.capitalize()),
                    boxed=True)
        try:
            Toolbox.log(logger=NodeTypeController._logger,
                        messages='Collecting information',
                        title=True)

            machine_id = System.get_my_machine_id()
            if Configuration.get('/ovs/framework/hosts/{0}/setupcompleted'.
                                 format(machine_id)) is False:
                raise RuntimeError('No local OVS setup found.')

            if cluster_ip and not re.match(Toolbox.regex_ip, cluster_ip):
                raise RuntimeError(
                    'Incorrect IP provided ({0})'.format(cluster_ip))

            if cluster_ip:
                client = SSHClient(endpoint=cluster_ip)
                machine_id = System.get_my_machine_id(client)

            node_type = Configuration.get(
                '/ovs/framework/hosts/{0}/type'.format(machine_id))
            if node_action == 'promote' and node_type == 'MASTER':
                raise RuntimeError('This node is already master.')
            elif node_action == 'demote' and node_type == 'EXTRA':
                raise RuntimeError('This node should be a master.')
            elif node_type not in ['MASTER', 'EXTRA']:
                raise RuntimeError('This node is not correctly configured.')

            master_ip = None
            offline_nodes = []

            online = True
            target_client = None
            if node_action == 'demote' and cluster_ip:  # Demote an offline node
                from ovs.dal.lists.storagerouterlist import StorageRouterList
                from ovs.lib.storagedriver import StorageDriverController

                ip = cluster_ip
                unique_id = None
                ip_client_map = {}
                for storage_router in StorageRouterList.get_storagerouters():
                    try:
                        client = SSHClient(storage_router.ip, username='******')
                        if storage_router.node_type == 'MASTER':
                            master_ip = storage_router.ip
                        ip_client_map[storage_router.ip] = client
                    except UnableToConnectException:
                        if storage_router.ip == cluster_ip:
                            online = False
                            unique_id = storage_router.machine_id
                            StorageDriverController.mark_offline(
                                storagerouter_guid=storage_router.guid)
                        offline_nodes.append(storage_router)
                if online is True:
                    raise RuntimeError(
                        "If the node is online, please use 'ovs setup demote' executed on the node you wish to demote"
                    )
                if master_ip is None:
                    raise RuntimeError(
                        'Failed to retrieve another responsive MASTER node')

            else:
                target_password = Toolbox.ask_validate_password(
                    ip='127.0.0.1', logger=NodeTypeController._logger)
                target_client = SSHClient('127.0.0.1',
                                          username='******',
                                          password=target_password)

                unique_id = System.get_my_machine_id(target_client)
                ip = Configuration.get(
                    '/ovs/framework/hosts/{0}/ip'.format(unique_id))

                storagerouter_info = NodeTypeController.retrieve_storagerouter_info_via_host(
                    ip=target_client.ip, password=target_password)
                node_ips = [
                    sr_info['ip']
                    for sr_info in storagerouter_info.itervalues()
                ]
                master_node_ips = [
                    sr_info['ip']
                    for sr_info in storagerouter_info.itervalues()
                    if sr_info['type'] == 'master' and sr_info['ip'] != ip
                ]
                if len(master_node_ips) == 0:
                    if node_action == 'promote':
                        raise RuntimeError('No master node could be found')
                    else:
                        raise RuntimeError(
                            'It is not possible to remove the only master')

                master_ip = master_node_ips[0]
                ip_client_map = dict(
                    (node_ip, SSHClient(node_ip, username='******'))
                    for node_ip in node_ips)

            if node_action == 'demote':
                for cluster_name in Configuration.list('/ovs/arakoon'):
                    config = ArakoonClusterConfig(cluster_id=cluster_name)
                    arakoon_client = ArakoonInstaller.build_client(config)
                    metadata = json.loads(
                        arakoon_client.get(ArakoonInstaller.METADATA_KEY))
                    if len(config.nodes) == 1 and config.nodes[
                            0].ip == ip and metadata.get('internal') is True:
                        raise RuntimeError(
                            'Demote is not supported when single node Arakoon cluster(s) are present on the node to be demoted.'
                        )

            configure_rabbitmq = Toolbox.is_service_internally_managed(
                service='rabbitmq')
            configure_memcached = Toolbox.is_service_internally_managed(
                service='memcached')
            if node_action == 'promote':
                try:
                    NodeTypeController.promote_node(
                        cluster_ip=ip,
                        master_ip=master_ip,
                        ip_client_map=ip_client_map,
                        unique_id=unique_id,
                        configure_memcached=configure_memcached,
                        configure_rabbitmq=configure_rabbitmq)
                except Exception:
                    if execute_rollback is True:
                        NodeTypeController.demote_node(
                            cluster_ip=ip,
                            master_ip=master_ip,
                            ip_client_map=ip_client_map,
                            unique_id=unique_id,
                            unconfigure_memcached=configure_memcached,
                            unconfigure_rabbitmq=configure_rabbitmq,
                            offline_nodes=offline_nodes)
                    elif target_client is not None:
                        target_client.file_write('/tmp/ovs_rollback', 'demote')
                    raise
            else:
                try:
                    NodeTypeController.demote_node(
                        cluster_ip=ip,
                        master_ip=master_ip,
                        ip_client_map=ip_client_map,
                        unique_id=unique_id,
                        unconfigure_memcached=configure_memcached,
                        unconfigure_rabbitmq=configure_rabbitmq,
                        offline_nodes=offline_nodes)
                except Exception:
                    if execute_rollback is True:
                        NodeTypeController.promote_node(
                            cluster_ip=ip,
                            master_ip=master_ip,
                            ip_client_map=ip_client_map,
                            unique_id=unique_id,
                            configure_memcached=configure_memcached,
                            configure_rabbitmq=configure_rabbitmq)
                    elif target_client is not None:
                        target_client.file_write('/tmp/ovs_rollback',
                                                 'promote')
                    raise

            Toolbox.log(logger=NodeTypeController._logger, messages='\n')
            Toolbox.log(logger=NodeTypeController._logger,
                        messages='{0} complete.'.format(
                            node_action.capitalize()),
                        boxed=True)
        except Exception as exception:
            Toolbox.log(logger=NodeTypeController._logger, messages='\n')
            Toolbox.log(
                logger=NodeTypeController._logger,
                messages=['An unexpected error occurred:',
                          str(exception)],
                boxed=True,
                loglevel='exception')
            sys.exit(1)
        except KeyboardInterrupt:
            Toolbox.log(logger=NodeTypeController._logger, messages='\n')
            Toolbox.log(
                logger=NodeTypeController._logger,
                messages=
                'This setup was aborted. Open vStorage may be in an inconsistent state, make sure to validate the installation.',
                boxed=True,
                loglevel='error')
            sys.exit(1)
Ejemplo n.º 5
0
    def services_running(self):
        # type: () -> bool
        """
        Check if all services are running
        :return: Boolean
        """
        try:
            key = 'ovs-watcher-{0}'.format(str(uuid.uuid4()))
            value = str(time.time())
            if self.target in [WatcherTypes.CONFIG, WatcherTypes.FWK]:
                self.log_message('Testing configuration store...')
                try:
                    Configuration.list('/')
                except Exception as ex:
                    self.log_message('  Error during configuration store test: {0}'.format(ex), 2)
                    return False

                with open(CACC_LOCATION) as config_file:
                    contents = config_file.read()
                config = ArakoonClusterConfig(cluster_id=ARAKOON_NAME, load_config=False)
                config.read_config(contents=contents)
                client = ArakoonInstaller.build_client(config)
                contents = client.get(ArakoonInstaller.INTERNAL_CONFIG_KEY, consistency=NoGuarantee())
                if Watcher.LOG_CONTENTS != contents:
                    try:
                        config.read_config(contents=contents)  # Validate whether the contents are not corrupt
                    except Exception as ex:
                        self.log_message('  Configuration stored in configuration store seems to be corrupt: {0}'.format(ex), 2)
                        return False
                    temp_filename = '{0}~'.format(CACC_LOCATION)
                    with open(temp_filename, 'w') as config_file:
                        config_file.write(contents)
                        config_file.flush()
                        os.fsync(config_file)
                    os.rename(temp_filename, CACC_LOCATION)
                    Watcher.LOG_CONTENTS = contents
                self.log_message('  Configuration store OK', 0)

            if self.target == WatcherTypes.FWK:
                self._test_store('volatile', key, value)
                self._test_store('persistent')

            if self.target == WatcherTypes.VOLDRV:
                # Arakoon, voldrv cluster
                self._test_store('arakoon_voldrv')

            if self.target in [WatcherTypes.FWK, WatcherTypes.VOLDRV]:
                # RabbitMQ
                self.log_message('Test rabbitMQ...', 0)
                messagequeue = Configuration.get('/ovs/framework/messagequeue')
                rmq_servers = messagequeue['endpoints']
                good_node = False
                for server in rmq_servers:
                    try:
                        connection_string = '{0}://{1}:{2}@{3}/%2F'.format(messagequeue['protocol'],
                                                                           messagequeue['user'],
                                                                           messagequeue['password'],
                                                                           server)
                        connection = pika.BlockingConnection(pika.URLParameters(connection_string))
                        channel = connection.channel()
                        channel.basic_publish('', 'ovs-watcher', str(time.time()),
                                              pika.BasicProperties(content_type='text/plain', delivery_mode=1))
                        connection.close()
                        good_node = True
                    except Exception as message:
                        self.log_message('  Error during rabbitMQ test on node {0}: {1}'.format(server, message), 2)
                if good_node is False:
                    self.log_message('  No working rabbitMQ node could be found', 2)
                    return False
                self.log_message('  RabbitMQ test OK')
                self.log_message('All tests OK')

            return True
        except Exception as ex:
            self.log_message('Unexpected exception: {0}'.format(ex), 2)
            return False