コード例 #1
0
 def get_storage_router_by_ip(ip):
     """
     Retrieve Storage Router based on IP
     :param ip: IP of Storage Router
     :return: Storage Router DAL object
     """
     return StorageRouterList.get_by_ip(ip)
コード例 #2
0
 def register(node_id):
     """
     Adds a Node with a given node_id to the model
     :param node_id: ID of the ALBA node
     :type node_id: str
     :return: None
     """
     node = AlbaNodeList.get_albanode_by_node_id(node_id)
     if node is None:
         main_config = Configuration.get('/ovs/alba/asdnodes/{0}/config/main'.format(node_id))
         node = AlbaNode()
         node.ip = main_config['ip']
         node.port = main_config['port']
         node.username = main_config['username']
         node.password = main_config['password']
         node.storagerouter = StorageRouterList.get_by_ip(main_config['ip'])
     data = node.client.get_metadata()
     if data['_success'] is False and data['_error'] == 'Invalid credentials':
         raise RuntimeError('Invalid credentials')
     if data['node_id'] != node_id:
         AlbaNodeController._logger.error('Unexpected node_id: {0} vs {1}'.format(data['node_id'], node_id))
         raise RuntimeError('Unexpected node identifier')
     node.node_id = node_id
     node.type = 'ASD'
     node.save()
     AlbaController.checkup_maintenance_agents.delay()
コード例 #3
0
    def register(node_id):
        """
        Adds a Node with a given node_id to the model
        :param node_id: ID of the ALBA node
        :type node_id: str

        :return: None
        """
        node = AlbaNodeList.get_albanode_by_node_id(node_id)
        if node is None:
            main_config = EtcdConfiguration.get('/ovs/alba/asdnodes/{0}/config/main'.format(node_id))
            node = AlbaNode()
            node.ip = main_config['ip']
            node.port = main_config['port']
            node.username = main_config['username']
            node.password = main_config['password']
            node.storagerouter = StorageRouterList.get_by_ip(main_config['ip'])
        data = node.client.get_metadata()
        if data['_success'] is False and data['_error'] == 'Invalid credentials':
            raise RuntimeError('Invalid credentials')
        if data['node_id'] != node_id:
            AlbaNodeController._logger.error('Unexpected node_id: {0} vs {1}'.format(data['node_id'], node_id))
            raise RuntimeError('Unexpected node identifier')
        node.node_id = node_id
        node.type = 'ASD'
        node.save()

        # increase maintenance agents count for all nodes by 1
        for backend in AlbaBackendList.get_albabackends():
            nr_of_agents_key = AlbaNodeController.NR_OF_AGENTS_ETCD_TEMPLATE.format(backend.guid)
            if EtcdConfiguration.exists(nr_of_agents_key):
                EtcdConfiguration.set(nr_of_agents_key, int(EtcdConfiguration.get(nr_of_agents_key) + 1))
            else:
                EtcdConfiguration.set(nr_of_agents_key, 1)
        AlbaNodeController.checkup_maintenance_agents()
コード例 #4
0
 def manage_running_tasks(tasklist, timesleep=10):
     """
     Manage a list of running celery task
     - discard PENDING tasks after a certain timeout
     - validate RUNNING tasks are actually running
     :param tasklist: Dictionary of tasks to wait {IP address: AsyncResult}
     :type tasklist: dict
     :param timesleep: leep between checks -
       -for long running tasks it's better to sleep for a longer period of time to reduce number of ssh calls
     :type timesleep: int
     :return: results
     :rtype: dict
     """
     logger = LogHandler.get('lib', name='celery toolbox')
     ssh_clients = {}
     tasks_pending = {}
     tasks_pending_timeout = 1800  # 30 minutes
     results = {}
     failed_nodes = []
     while len(tasklist.keys()) > 0:
         for ip, task in tasklist.items():
             if task.state in ('SUCCESS', 'FAILURE'):
                 logger.info('Task {0} finished: {1}'.format(task.id, task.state))
                 results[ip] = task.get(propagate=False)
                 del tasklist[ip]
             elif task.state == 'PENDING':
                 if task.id not in tasks_pending:
                     tasks_pending[task.id] = time.time()
                 else:
                     task_pending_since = tasks_pending[task.id]
                     if time.time() - task_pending_since > tasks_pending_timeout:
                         logger.warning('Task {0} is pending since {1} on node {2}. Task will be revoked'.format(task.id, datetime.datetime.fromtimestamp(task_pending_since), ip))
                         revoke(task.id)
                         del tasklist[ip]
                         del tasks_pending[task.id]
                         failed_nodes.append(ip)
             elif task.state == 'STARTED':
                 if ip not in ssh_clients:
                     ssh_clients[ip] = SSHClient(ip, username='******')
                 client = ssh_clients[ip]
                 if ServiceManager.get_service_status('workers', client) is False:
                     logger.error('Service ovs-workers on node {0} appears halted while there is a task PENDING for it {1}. Task will be revoked.'.format(ip, task.id))
                     revoke(task.id)
                     del tasklist[ip]
                     failed_nodes.append(ip)
                 else:
                     ping_result = task.app.control.inspect().ping()
                     storage_router = StorageRouterList.get_by_ip(ip)
                     if "celery@{0}".format(storage_router.name) not in ping_result:
                         logger.error('Service ovs-workers on node {0} is not reachable via rabbitmq while there is a task STARTED for it {1}. Task will be revoked.'.format(ip, task.id))
                         revoke(task.id)
                         del tasklist[ip]
                         failed_nodes.append(ip)
         if len(tasklist.keys()) > 0:
             time.sleep(timesleep)
     return results, failed_nodes
コード例 #5
0
ファイル: vdisks.py プロジェクト: dawnpower/framework
 def set_config_params(self, vdisk, new_config_params, version):
     """
     Sets configuration parameters to a given vdisk.
     :param vdisk: Guid of the virtual disk to configure
     :param new_config_params: Configuration settings for the virtual disk
     :param version: API version
     """
     if version == 1 and 'dtl_target' in new_config_params:
         storage_router = StorageRouterList.get_by_ip(new_config_params['dtl_target'])
         if storage_router is None:
             raise NotAcceptable('API version 1 requires a Storage Router IP')
         new_config_params['dtl_target'] = storage_router.primary_failure_domain.guid
     return VDiskController.set_config_params.delay(vdisk_guid=vdisk.guid, new_config_params=new_config_params)
コード例 #6
0
 def set_config_params(self, vdisk, new_config_params, version):
     """
     Sets configuration parameters to a given vdisk.
     :param vdisk: Guid of the virtual disk to configure
     :param new_config_params: Configuration settings for the virtual disk
     :param version: API version
     """
     if version == 1 and 'dtl_target' in new_config_params:
         storage_router = StorageRouterList.get_by_ip(
             new_config_params['dtl_target'])
         if storage_router is None:
             raise NotAcceptable(
                 'API version 1 requires a Storage Router IP')
         new_config_params[
             'dtl_target'] = storage_router.primary_failure_domain.guid
     return VDiskController.set_config_params.delay(
         vdisk_guid=vdisk.guid, new_config_params=new_config_params)
コード例 #7
0
    def __init__(self, ip):
        """
        Create RabbitMQ object

        :param ip: ip from the server
        :type ip: str
        """
        # check if rabbitmq is available on the ip
        if not RabbitMQ._check_rabbitmq_ip(ip):
            raise ValueError('RabbitMQ on {0} could not be found.'.format(ip))
        self._service_manager = ServiceFactory.get_manager()
        self.ip = ip
        if RabbitMQ.INTERNAL:
            self._storagerouter = StorageRouterList.get_by_ip(ip)
            self._client = SSHClient(ip, username='******')

        if not self.check_management_plugin():
            self.enable_management_plugin()
コード例 #8
0
    def __init__(self, ip):
        """
        Create RabbitMQ object

        :param ip: ip from the server
        :type ip: str
        """
        # check if rabbitmq is available on the ip
        if not RabbitMQ._check_rabbitmq_ip(ip):
            raise ValueError('RabbitMQ on {0} could not be found.'.format(ip))
        self._service_manager = ServiceFactory.get_manager()
        self.ip = ip
        if RabbitMQ.INTERNAL:
            self._storagerouter = StorageRouterList.get_by_ip(ip)
            self._client = SSHClient(ip, username='******')

        if not self.check_management_plugin():
            self.enable_management_plugin()
コード例 #9
0
 def model_albanodes(**kwargs):
     """
     Add all ALBA nodes known to the config platform to the model
     :param kwargs: Kwargs containing information regarding the node
     :type kwargs: dict
     :return: None
     """
     _ = kwargs
     if Configuration.dir_exists('/ovs/alba/asdnodes'):
         for node_id in Configuration.list('/ovs/alba/asdnodes'):
             node = AlbaNodeList.get_albanode_by_node_id(node_id)
             if node is None:
                 node = AlbaNode()
             main_config = Configuration.get('/ovs/alba/asdnodes/{0}/config/main'.format(node_id))
             node.type = 'ASD'
             node.node_id = node_id
             node.ip = main_config['ip']
             node.port = main_config['port']
             node.username = main_config['username']
             node.password = main_config['password']
             node.storagerouter = StorageRouterList.get_by_ip(main_config['ip'])
             node.save()
コード例 #10
0
ファイル: vdisks.py プロジェクト: grimpy/openvstorage
    def set_config_params(self, vdisk, new_config_params, version):
        """
        Sets configuration parameters to a given vdisk.
        :param vdisk: Guid of the virtual disk to configure
        :type vdisk: VDisk
        :param new_config_params: Configuration settings for the virtual disk
        :type new_config_params: dict
        :param version: Client version
        :type version: int
        """
        if version == 1 and "dtl_target" in new_config_params:
            storage_router = StorageRouterList.get_by_ip(new_config_params["dtl_target"])
            if storage_router is None:
                raise HttpNotAcceptableException(
                    error_description="API version 1 requires a Storage Router IP", error="invalid_version"
                )
            new_config_params["dtl_target"] = [junction.domain_guid for junction in storage_router.domains]

        new_config_params.pop("dedupe_mode", None)
        new_config_params.pop("cache_strategy", None)
        new_config_params.pop("readcache_limit", None)
        return VDiskController.set_config_params.delay(vdisk_guid=vdisk.guid, new_config_params=new_config_params)
コード例 #11
0
    def set_config_params(self, vdisk, new_config_params, version):
        """
        Sets configuration parameters to a given vdisk.
        :param vdisk: Guid of the virtual disk to configure
        :type vdisk: VDisk
        :param new_config_params: Configuration settings for the virtual disk
        :type new_config_params: dict
        :param version: Client version
        :type version: int
        :return: Asynchronous result of a CeleryTask
        :rtype: celery.result.AsyncResult
        """
        if version == 1 and 'dtl_target' in new_config_params:
            storage_router = StorageRouterList.get_by_ip(new_config_params['dtl_target'])
            if storage_router is None:
                raise HttpNotAcceptableException(error_description='API version 1 requires a Storage Router IP',
                                                 error='invalid_version')
            new_config_params['dtl_target'] = [junction.domain_guid for junction in storage_router.domains]

        new_config_params.pop('dedupe_mode', None)
        new_config_params.pop('cache_strategy', None)
        new_config_params.pop('readcache_limit', None)
        return VDiskController.set_config_params.delay(vdisk_guid=vdisk.guid, new_config_params=new_config_params)
コード例 #12
0
ファイル: openvstorage.py プロジェクト: DarumasLegs/dev_ops
def flush_node(storagerouter_ip):
    """
    Flush write buffer to backend

    :param storagerouter_ip: ip of a storage router were to perform the flush
    :type storagerouter_ip: str
    """

    sr_info = StorageRouterList.get_by_ip(storagerouter_ip)
    vdisks_by_guid = sr_info.vdisks_guids

    for vdisk_guid in vdisks_by_guid:
        disk = VDisk(vdisk_guid)
        snapshot_name = "flush_snapshot"

    # create snapshot
    disk.storagedriver_client.create_snapshot(str(disk.volume_id), snapshot_name)

    # check if snapshot is synced to the backend
    while not disk.storagedriver_client.info_snapshot(str(disk.volume_id), snapshot_name).in_backend:
        time.sleep(5)

    # delete snapshot if sync is completed
    disk.storagedriver_client.delete_snapshot(str(disk.volume_id), snapshot_name)
コード例 #13
0
    def remove_node(node_ip, silent=None):
        """
        Remove the node with specified IP from the cluster
        :param node_ip: IP of the node to remove
        :type node_ip: str
        :param silent: If silent == '--force-yes' no question will be asked to confirm the removal
        :type silent: str
        :return: None
        """
        from ovs.dal.lists.storagerouterlist import StorageRouterList
        from ovs.lib.storagedriver import StorageDriverController
        from ovs.lib.vpool import VPoolController

        Toolbox.log(logger=NodeRemovalController._logger,
                    messages='Remove node',
                    boxed=True)
        Toolbox.log(
            logger=NodeRemovalController._logger,
            messages=
            'WARNING: Some of these steps may take a very long time, please check the logs for more information\n\n'
        )
        service_manager = ServiceFactory.get_manager()

        ###############
        # VALIDATIONS #
        ###############
        try:
            node_ip = node_ip.strip()
            if not isinstance(node_ip, str):
                raise ValueError('Node IP must be a string')
            if not re.match(SSHClient.IP_REGEX, node_ip):
                raise ValueError('Invalid IP {0} specified'.format(node_ip))

            storage_router_all = sorted(StorageRouterList.get_storagerouters(),
                                        key=lambda k: k.name)
            storage_router_masters = StorageRouterList.get_masters()
            storage_router_all_ips = set(
                [storage_router.ip for storage_router in storage_router_all])
            storage_router_master_ips = set([
                storage_router.ip for storage_router in storage_router_masters
            ])
            storage_router_to_remove = StorageRouterList.get_by_ip(node_ip)
            offline_reasons = {}
            if node_ip not in storage_router_all_ips:
                raise ValueError(
                    'Unknown IP specified\nKnown in model:\n - {0}\nSpecified for removal:\n - {1}'
                    .format('\n - '.join(storage_router_all_ips), node_ip))

            if len(storage_router_all_ips) == 1:
                raise RuntimeError("Removing the only node is not possible")

            if node_ip in storage_router_master_ips and len(
                    storage_router_master_ips) == 1:
                raise RuntimeError(
                    "Removing the only master node is not possible")

            if System.get_my_storagerouter() == storage_router_to_remove:
                raise RuntimeError(
                    'The node to be removed cannot be identical to the node on which the removal is initiated'
                )

            Toolbox.log(
                logger=NodeRemovalController._logger,
                messages='Creating SSH connections to remaining master nodes')
            master_ip = None
            ip_client_map = {}
            storage_routers_offline = []
            storage_router_to_remove_online = True
            for storage_router in storage_router_all:
                try:
                    client = SSHClient(storage_router,
                                       username='******',
                                       timeout=10)
                except (UnableToConnectException, NotAuthenticatedException,
                        TimeOutException) as ex:
                    if isinstance(ex, UnableToConnectException):
                        msg = 'Unable to connect'
                    elif isinstance(ex, NotAuthenticatedException):
                        msg = 'Could not authenticate'
                    elif isinstance(ex, TimeOutException):
                        msg = 'Connection timed out'
                    Toolbox.log(
                        logger=NodeRemovalController._logger,
                        messages='  * Node with IP {0:<15}- {1}'.format(
                            storage_router.ip, msg))
                    offline_reasons[storage_router.ip] = msg
                    storage_routers_offline.append(storage_router)
                    if storage_router == storage_router_to_remove:
                        storage_router_to_remove_online = False
                    continue

                Toolbox.log(
                    logger=NodeRemovalController._logger,
                    messages='  * Node with IP {0:<15}- Successfully connected'
                    .format(storage_router.ip))
                ip_client_map[storage_router.ip] = client
                if storage_router != storage_router_to_remove and storage_router.node_type == 'MASTER':
                    master_ip = storage_router.ip

            if len(ip_client_map) == 0 or master_ip is None:
                raise RuntimeError(
                    'Could not connect to any master node in the cluster')

            storage_router_to_remove.invalidate_dynamics('vdisks_guids')
            if len(
                    storage_router_to_remove.vdisks_guids
            ) > 0:  # vDisks are supposed to be moved away manually before removing a node
                raise RuntimeError(
                    "Still vDisks attached to Storage Router {0}".format(
                        storage_router_to_remove.name))

            internal_memcached = Toolbox.is_service_internally_managed(
                service='memcached')
            internal_rabbit_mq = Toolbox.is_service_internally_managed(
                service='rabbitmq')
            memcached_endpoints = Configuration.get(
                key='/ovs/framework/memcache|endpoints')
            rabbit_mq_endpoints = Configuration.get(
                key='/ovs/framework/messagequeue|endpoints')
            copy_memcached_endpoints = list(memcached_endpoints)
            copy_rabbit_mq_endpoints = list(rabbit_mq_endpoints)
            for endpoint in memcached_endpoints:
                if endpoint.startswith(storage_router_to_remove.ip):
                    copy_memcached_endpoints.remove(endpoint)
            for endpoint in rabbit_mq_endpoints:
                if endpoint.startswith(storage_router_to_remove.ip):
                    copy_rabbit_mq_endpoints.remove(endpoint)
            if len(copy_memcached_endpoints
                   ) == 0 and internal_memcached is True:
                raise RuntimeError(
                    'Removal of provided nodes will result in a complete removal of the memcached service'
                )
            if len(copy_rabbit_mq_endpoints
                   ) == 0 and internal_rabbit_mq is True:
                raise RuntimeError(
                    'Removal of provided nodes will result in a complete removal of the messagequeue service'
                )

            Toolbox.run_hooks(component='noderemoval',
                              sub_component='validate_removal',
                              logger=NodeRemovalController._logger,
                              cluster_ip=storage_router_to_remove.ip)
        except KeyboardInterrupt:
            Toolbox.log(logger=NodeRemovalController._logger, messages='\n')
            Toolbox.log(
                logger=NodeRemovalController._logger,
                messages=
                'Removal has been aborted during the validation step. No changes have been applied.',
                boxed=True,
                loglevel='warning')
            sys.exit(1)
        except Exception as exception:
            Toolbox.log(logger=NodeRemovalController._logger,
                        messages=[str(exception)],
                        boxed=True,
                        loglevel='exception')
            sys.exit(1)

        #################
        # CONFIRMATIONS #
        #################
        try:
            interactive = silent != '--force-yes'
            remove_asd_manager = not interactive  # Remove ASD manager if non-interactive else ask
            if interactive is True:
                if len(storage_routers_offline) > 0:
                    Toolbox.log(
                        logger=NodeRemovalController._logger,
                        messages=
                        'Certain nodes appear to be offline. These will not fully removed and will cause issues if they are not really offline.'
                    )
                    Toolbox.log(
                        logger=NodeRemovalController._logger,
                        messages='Offline nodes: {0}'.format(''.join(
                            ('\n  * {0:<15}- {1}.'.format(ip, message)
                             for ip, message in offline_reasons.iteritems()))))
                    valid_node_info = Interactive.ask_yesno(
                        message=
                        'Continue the removal with these being presumably offline?',
                        default_value=False)
                    if valid_node_info is False:
                        Toolbox.log(
                            logger=NodeRemovalController._logger,
                            messages=
                            'Please validate the state of the nodes before removing.',
                            title=True)
                        sys.exit(1)
                proceed = Interactive.ask_yesno(
                    message='Are you sure you want to remove node {0}?'.format(
                        storage_router_to_remove.name),
                    default_value=False)
                if proceed is False:
                    Toolbox.log(logger=NodeRemovalController._logger,
                                messages='Abort removal',
                                title=True)
                    sys.exit(1)

                remove_asd_manager = True
                if storage_router_to_remove_online is True:
                    client = SSHClient(endpoint=storage_router_to_remove,
                                       username='******')
                    if service_manager.has_service(name='asd-manager',
                                                   client=client):
                        remove_asd_manager = Interactive.ask_yesno(
                            message=
                            'Do you also want to remove the ASD manager and related ASDs?',
                            default_value=False)

                if remove_asd_manager is True or storage_router_to_remove_online is False:
                    for fct in Toolbox.fetch_hooks('noderemoval',
                                                   'validate_asd_removal'):
                        validation_output = fct(storage_router_to_remove.ip)
                        if validation_output['confirm'] is True:
                            if Interactive.ask_yesno(
                                    message=validation_output['question'],
                                    default_value=False) is False:
                                remove_asd_manager = False
                                break
        except KeyboardInterrupt:
            Toolbox.log(logger=NodeRemovalController._logger, messages='\n')
            Toolbox.log(
                logger=NodeRemovalController._logger,
                messages=
                'Removal has been aborted during the confirmation step. No changes have been applied.',
                boxed=True,
                loglevel='warning')
            sys.exit(1)
        except Exception as exception:
            Toolbox.log(logger=NodeRemovalController._logger,
                        messages=[str(exception)],
                        boxed=True,
                        loglevel='exception')
            sys.exit(1)
        ###########
        # REMOVAL #
        ###########
        try:
            Toolbox.log(logger=NodeRemovalController._logger,
                        messages='Starting removal of node {0} - {1}'.format(
                            storage_router_to_remove.name,
                            storage_router_to_remove.ip))
            if storage_router_to_remove_online is False:
                Toolbox.log(
                    logger=NodeRemovalController._logger,
                    messages=
                    '  Marking all Storage Drivers served by Storage Router {0} as offline'
                    .format(storage_router_to_remove.ip))
                StorageDriverController.mark_offline(
                    storagerouter_guid=storage_router_to_remove.guid)

            # Remove vPools
            Toolbox.log(logger=NodeRemovalController._logger,
                        messages='  Removing vPools from node'.format(
                            storage_router_to_remove.ip))
            storage_routers_offline_guids = [
                sr.guid for sr in storage_routers_offline
                if sr.guid != storage_router_to_remove.guid
            ]
            for storage_driver in storage_router_to_remove.storagedrivers:
                Toolbox.log(logger=NodeRemovalController._logger,
                            messages='    Removing vPool {0} from node'.format(
                                storage_driver.vpool.name))
                VPoolController.shrink_vpool(
                    storagedriver_guid=storage_driver.guid,
                    offline_storage_router_guids=storage_routers_offline_guids)

            # Demote if MASTER
            if storage_router_to_remove.node_type == 'MASTER':
                NodeTypeController.demote_node(
                    cluster_ip=storage_router_to_remove.ip,
                    master_ip=master_ip,
                    ip_client_map=ip_client_map,
                    unique_id=storage_router_to_remove.machine_id,
                    unconfigure_memcached=internal_memcached,
                    unconfigure_rabbitmq=internal_rabbit_mq,
                    offline_nodes=storage_routers_offline)

            # Stop / remove services
            Toolbox.log(logger=NodeRemovalController._logger,
                        messages='Stopping and removing services')
            if storage_router_to_remove_online is True:
                client = SSHClient(endpoint=storage_router_to_remove,
                                   username='******')
                NodeRemovalController.remove_services(
                    client=client,
                    node_type=storage_router_to_remove.node_type.lower(),
                    logger=NodeRemovalController._logger)
                service = 'watcher-config'
                if service_manager.has_service(service, client=client):
                    Toolbox.log(
                        logger=NodeRemovalController._logger,
                        messages='Removing service {0}'.format(service))
                    service_manager.stop_service(service, client=client)
                    service_manager.remove_service(service, client=client)

            Toolbox.run_hooks(component='noderemoval',
                              sub_component='remove',
                              logger=NodeRemovalController._logger,
                              cluster_ip=storage_router_to_remove.ip,
                              complete_removal=remove_asd_manager)

            # Clean up model
            Toolbox.log(logger=NodeRemovalController._logger,
                        messages='Removing node from model')
            for service in storage_router_to_remove.services:
                service.delete()
            for disk in storage_router_to_remove.disks:
                for partition in disk.partitions:
                    partition.delete()
                disk.delete()
            for j_domain in storage_router_to_remove.domains:
                j_domain.delete()
            Configuration.delete('/ovs/framework/hosts/{0}'.format(
                storage_router_to_remove.machine_id))

            NodeTypeController.restart_framework_and_memcache_services(
                clients=ip_client_map,
                offline_node_ips=[node.ip for node in storage_routers_offline],
                logger=NodeRemovalController._logger)

            if storage_router_to_remove_online is True:
                client = SSHClient(endpoint=storage_router_to_remove,
                                   username='******')
                client.file_delete(filenames=[CACC_LOCATION])
                client.file_delete(filenames=[CONFIG_STORE_LOCATION])
            storage_router_to_remove.delete()
            Toolbox.log(logger=NodeRemovalController._logger,
                        messages='Successfully removed node\n')
        except Exception as exception:
            Toolbox.log(logger=NodeRemovalController._logger, messages='\n')
            Toolbox.log(
                logger=NodeRemovalController._logger,
                messages=['An unexpected error occurred:',
                          str(exception)],
                boxed=True,
                loglevel='exception')
            sys.exit(1)
        except KeyboardInterrupt:
            Toolbox.log(logger=NodeRemovalController._logger, messages='\n')
            Toolbox.log(
                logger=NodeRemovalController._logger,
                messages=
                'This setup was aborted. Open vStorage may be in an inconsistent state, make sure to validate the installation.',
                boxed=True,
                loglevel='error')
            sys.exit(1)

        if remove_asd_manager is True and storage_router_to_remove_online is True:
            Toolbox.log(logger=NodeRemovalController._logger,
                        messages='\nRemoving ASD Manager')
            with remote(storage_router_to_remove.ip, [os]) as rem:
                rem.os.system('asd-manager remove --force-yes')
        Toolbox.log(logger=NodeRemovalController._logger,
                    messages='Remove nodes finished',
                    title=True)
コード例 #14
0
ファイル: vdisk.py プロジェクト: jamie-liu/openvstorage
    def set_config_params(vdisk_guid, new_config_params):
        """
        Sets configuration parameters for a given vdisk.
        :param vdisk_guid: Guid of the virtual disk to set the configuration parameters for
        :param new_config_params: New configuration parameters
        """
        required_params = {'dtl_mode': (str, StorageDriverClient.VDISK_DTL_MODE_MAP.keys()),
                           'sco_size': (int, StorageDriverClient.TLOG_MULTIPLIER_MAP.keys()),
                           'dedupe_mode': (str, StorageDriverClient.VDISK_DEDUPE_MAP.keys()),
                           'write_buffer': (int, {'min': 128, 'max': 10 * 1024}),
                           'cache_strategy': (str, StorageDriverClient.VDISK_CACHE_MAP.keys()),
                           'readcache_limit': (int, {'min': 1, 'max': 10 * 1024}, False)}

        if new_config_params.get('dtl_target') is not None:
            required_params.update({'dtl_target': (str, Toolbox.regex_ip)})

        Toolbox.verify_required_params(required_params, new_config_params)

        if new_config_params['dtl_mode'] != 'no_sync' and new_config_params.get('dtl_target') is None:
            raise Exception('If DTL mode is Asynchronous or Synchronous, a target IP should always be specified')

        errors = False
        vdisk = VDisk(vdisk_guid)
        volume_id = str(vdisk.volume_id)
        old_config_params = VDiskController.get_config_params(vdisk.guid)

        # 1st update SCO size, because this impacts TLOG multiplier which on its turn impacts write buffer
        new_sco_size = new_config_params['sco_size']
        old_sco_size = old_config_params['sco_size']
        if new_sco_size != old_sco_size:
            write_buffer = float(new_config_params['write_buffer'])
            tlog_multiplier = StorageDriverClient.TLOG_MULTIPLIER_MAP[new_sco_size]
            sco_factor = write_buffer / tlog_multiplier / new_sco_size
            try:
                logger.info('Updating property sco_size on vDisk {0} to {1}'.format(vdisk_guid, new_sco_size))
                vdisk.storagedriver_client.set_sco_multiplier(volume_id, new_sco_size / 4 * 1024)
                vdisk.storagedriver_client.set_tlog_multiplier(volume_id, tlog_multiplier)
                vdisk.storagedriver_client.set_sco_cache_max_non_disposable_factor(volume_id, sco_factor)
                logger.info('Updated property sco_size')
            except Exception as ex:
                logger.error('Error updating "sco_size": {0}'.format(ex))
                errors = True

        # 2nd Check for DTL changes
        new_dtl_mode = new_config_params['dtl_mode']
        old_dtl_mode = old_config_params['dtl_mode']
        new_dtl_target = new_config_params.get('dtl_target')
        old_dtl_target = old_config_params['dtl_target']
        if old_dtl_mode != new_dtl_mode or new_dtl_target != old_dtl_target:
            if old_dtl_mode != new_dtl_mode and new_dtl_mode == 'no_sync':
                logger.info('Disabling DTL for vDisk {0}'.format(vdisk_guid))
                vdisk.storagedriver_client.set_manual_dtl_config(volume_id, None)
            elif (new_dtl_target is not None and new_dtl_target != old_dtl_target or old_dtl_mode != new_dtl_mode) and new_dtl_mode != 'no_sync':
                logger.info('Changing DTL to use global values for vDisk {0}'.format(vdisk_guid))
                sr_target = StorageRouterList.get_by_ip(new_dtl_target)
                if sr_target is None:
                    logger.error('Failed to retrieve Storage Router with IP {0}'.format(new_dtl_target))
                    errors = True
                for sd in sr_target.storagedrivers:
                    if sd.vpool == vdisk.vpool:
                        dtl_config = DTLConfig(str(new_dtl_target), sd.ports[2], StorageDriverClient.VDISK_DTL_MODE_MAP[new_dtl_mode])
                        vdisk.storagedriver_client.set_manual_dtl_config(volume_id, dtl_config)
                        break
                else:
                    logger.error('Failed to retrieve Storage Driver with IP {0}'.format(new_dtl_target))
                    errors = True

        # 2nd update rest
        for key in required_params:
            try:
                if key in ['sco_size', 'dtl_mode', 'dtl_target']:
                    continue

                new_value = new_config_params[key]
                old_value = old_config_params[key]
                if new_value != old_value:
                    logger.info('Updating property {0} on vDisk {1} from to {2}'.format(key, vdisk_guid, new_value))
                    if key == 'dedupe_mode':
                        vdisk.storagedriver_client.set_readcache_mode(volume_id, StorageDriverClient.VDISK_DEDUPE_MAP[new_value])
                    elif key == 'write_buffer':
                        tlog_multiplier = vdisk.storagedriver_client.get_tlog_multiplier(volume_id) or StorageDriverClient.TLOG_MULTIPLIER_MAP[new_sco_size]
                        sco_factor = float(new_value) / tlog_multiplier / new_sco_size
                        vdisk.storagedriver_client.set_sco_cache_max_non_disposable_factor(volume_id, sco_factor)
                    elif key == 'cache_strategy':
                        vdisk.storagedriver_client.set_readcache_behaviour(volume_id, StorageDriverClient.VDISK_CACHE_MAP[new_value])
                    elif key == 'readcache_limit':
                        vol_info = vdisk.storagedriver_client.info_volume(volume_id)
                        block_size = vol_info.lba_size * vol_info.cluster_multiplier or 4096
                        limit = new_value * 1024 * 1024 * 1024 / block_size if new_value else None
                        vdisk.storagedriver_client.set_readcache_limit(volume_id, limit)
                    else:
                        raise KeyError('Unsupported property provided: "{0}"'.format(key))
                    logger.info('Updated property {0}'.format(key))
            except Exception as ex:
                logger.error('Error updating "{0}": {1}'.format(key, ex))
                errors = True
        if errors is True:
            raise Exception('Failed to update the values for vDisk {0}'.format(vdisk.name))
コード例 #15
0
    def add_vpool(cls, parameters):
        """
        Add a vPool to the machine this task is running on
        :param parameters: Parameters for vPool creation
        :type parameters: dict
        :return: None
        :rtype: NoneType
        """
        # TODO: Add logging
        cls._logger.debug('Adding vpool. Parameters: {}'.format(parameters))
        # VALIDATIONS
        if not isinstance(parameters, dict):
            raise ValueError(
                'Parameters passed to create a vPool should be of type dict')

        # Check StorageRouter existence
        storagerouter = StorageRouterList.get_by_ip(
            ip=parameters.get('storagerouter_ip'))
        if storagerouter is None:
            raise RuntimeError('Could not find StorageRouter')

        # Validate requested vPool configurations
        vp_installer = VPoolInstaller(name=parameters.get('vpool_name'))
        vp_installer.validate(storagerouter=storagerouter)

        # Validate requested StorageDriver configurations
        cls._logger.info(
            'vPool {0}: Validating StorageDriver configurations'.format(
                vp_installer.name))
        sd_installer = StorageDriverInstaller(
            vp_installer=vp_installer,
            configurations={
                'storage_ip': parameters.get('storage_ip'),
                'caching_info': parameters.get('caching_info'),
                'backend_info': {
                    'main':
                    parameters.get('backend_info'),
                    StorageDriverConfiguration.CACHE_BLOCK:
                    parameters.get('backend_info_bc'),
                    StorageDriverConfiguration.CACHE_FRAGMENT:
                    parameters.get('backend_info_fc')
                },
                'connection_info': {
                    'main':
                    parameters.get('connection_info'),
                    StorageDriverConfiguration.CACHE_BLOCK:
                    parameters.get('connection_info_bc'),
                    StorageDriverConfiguration.CACHE_FRAGMENT:
                    parameters.get('connection_info_fc')
                },
                'sd_configuration': parameters.get('config_params')
            })

        partitions_mutex = volatile_mutex('add_vpool_partitions_{0}'.format(
            storagerouter.guid))
        try:
            # VPOOL CREATION
            # Create the vPool as soon as possible in the process to be displayed in the GUI (INSTALLING/EXTENDING state)
            if vp_installer.is_new is True:
                vp_installer.create(rdma_enabled=sd_installer.rdma_enabled)
                vp_installer.configure_mds(
                    config=parameters.get('mds_config_params', {}))
            else:
                vp_installer.update_status(status=VPool.STATUSES.EXTENDING)

            # ADDITIONAL VALIDATIONS
            # Check StorageRouter connectivity
            cls._logger.info(
                'vPool {0}: Validating StorageRouter connectivity'.format(
                    vp_installer.name))
            linked_storagerouters = [storagerouter]
            if vp_installer.is_new is False:
                linked_storagerouters += [
                    sd.storagerouter
                    for sd in vp_installer.vpool.storagedrivers
                ]

            sr_client_map = SSHClient.get_clients(
                endpoints=linked_storagerouters, user_names=['ovs', 'root'])
            offline_nodes = sr_client_map.pop('offline')
            if storagerouter in offline_nodes:
                raise RuntimeError(
                    'Node on which the vPool is being {0} is not reachable'.
                    format('created'
                           if vp_installer.is_new is True else 'extended'))

            sr_installer = StorageRouterInstaller(
                root_client=sr_client_map[storagerouter]['root'],
                sd_installer=sd_installer,
                vp_installer=vp_installer,
                storagerouter=storagerouter)

            # When 2 or more jobs simultaneously run on the same StorageRouter, we need to check and create the StorageDriver partitions in locked context
            partitions_mutex.acquire(wait=60)
            sr_installer.partition_info = StorageRouterController.get_partition_info(
                storagerouter_guid=storagerouter.guid)
            sr_installer.validate_vpool_extendable()
            sr_installer.validate_global_write_buffer(
                requested_size=parameters.get('writecache_size', 0))
            sr_installer.validate_local_cache_size(
                requested_proxies=parameters.get('parallelism', {}).get(
                    'proxies', 2))

            # MODEL STORAGEDRIVER AND PARTITION JUNCTIONS
            sd_installer.create()
            sd_installer.create_partitions()
            partitions_mutex.release()

            vp_installer.refresh_metadata()
        except Exception:
            cls._logger.exception(
                'Something went wrong during the validation or modeling of vPool {0} on StorageRouter {1}'
                .format(vp_installer.name, storagerouter.name))
            partitions_mutex.release()
            vp_installer.revert_vpool(status=VPool.STATUSES.RUNNING)
            raise

        # Arakoon setup
        counter = 0
        while counter < 300:
            try:
                if StorageDriverController.manual_voldrv_arakoon_checkup(
                ) is True:
                    break
            except Exception:
                cls._logger.exception(
                    'Arakoon checkup for voldrv cluster failed')
                vp_installer.revert_vpool(status=VPool.STATUSES.RUNNING)
                raise
            counter += 1
            time.sleep(1)
            if counter == 300:
                vp_installer.revert_vpool(status=VPool.STATUSES.RUNNING)
                raise RuntimeError(
                    'Arakoon checkup for the StorageDriver cluster could not be started'
                )

        # Cluster registry
        try:
            vp_installer.configure_cluster_registry(allow_raise=True)
        except Exception:
            if vp_installer.is_new is True:
                vp_installer.revert_vpool(status=VPool.STATUSES.RUNNING)
            else:
                vp_installer.revert_vpool(status=VPool.STATUSES.FAILURE)
            raise

        try:
            sd_installer.setup_proxy_configs()
            sd_installer.configure_storagedriver_service()
            DiskController.sync_with_reality(storagerouter.guid)
            MDSServiceController.prepare_mds_service(
                storagerouter=storagerouter, vpool=vp_installer.vpool)

            # Update the MDS safety if changed via API (vpool.configuration will be available at this point also for the newly added StorageDriver)
            vp_installer.vpool.invalidate_dynamics('configuration')
            if vp_installer.mds_safety is not None and vp_installer.vpool.configuration[
                    'mds_config']['mds_safety'] != vp_installer.mds_safety:
                Configuration.set(
                    key='/ovs/vpools/{0}/mds_config|mds_safety'.format(
                        vp_installer.vpool.guid),
                    value=vp_installer.mds_safety)

            sd_installer.start_services(
            )  # Create and start watcher volumedriver, DTL, proxies and StorageDriver services

            # Post creation/extension checkups
            mds_config_set = MDSServiceController.get_mds_storagedriver_config_set(
                vpool=vp_installer.vpool, offline_nodes=offline_nodes)
            for sr, clients in sr_client_map.iteritems():
                for current_storagedriver in [
                        sd for sd in sr.storagedrivers
                        if sd.vpool_guid == vp_installer.vpool.guid
                ]:
                    storagedriver_config = StorageDriverConfiguration(
                        vpool_guid=vp_installer.vpool.guid,
                        storagedriver_id=current_storagedriver.storagedriver_id
                    )
                    if storagedriver_config.config_missing is False:
                        # Filesystem section in StorageDriver configuration are all parameters used for vDisks created directly on the filesystem
                        # So when a vDisk gets created on the filesystem, these MDSes will be assigned to them
                        storagedriver_config.configure_filesystem(
                            fs_metadata_backend_mds_nodes=mds_config_set[
                                sr.guid])
                        storagedriver_config.save(client=clients['ovs'])

            # Everything's reconfigured, refresh new cluster configuration
            for current_storagedriver in vp_installer.vpool.storagedrivers:
                if current_storagedriver.storagerouter not in sr_client_map:
                    continue
                vp_installer.vpool.storagedriver_client.update_cluster_node_configs(
                    str(current_storagedriver.storagedriver_id),
                    req_timeout_secs=10)
        except Exception:
            cls._logger.exception('vPool {0}: Creation failed'.format(
                vp_installer.name))
            vp_installer.update_status(status=VPool.STATUSES.FAILURE)
            raise

        # When a node is offline, we can run into errors, but also when 1 or more volumes are not running
        # Scheduled tasks below, so don't really care whether they succeed or not
        try:
            VDiskController.dtl_checkup(vpool_guid=vp_installer.vpool.guid,
                                        ensure_single_timeout=600)
        except:
            pass
        for vdisk in vp_installer.vpool.vdisks:
            try:
                MDSServiceController.ensure_safety(vdisk_guid=vdisk.guid)
            except:
                pass
        vp_installer.update_status(status=VPool.STATUSES.RUNNING)
        cls._logger.info('Add vPool {0} ended successfully'.format(
            vp_installer.name))
コード例 #16
0
ファイル: noderemoval.py プロジェクト: openvstorage/framework
    def remove_node(node_ip, silent=None):
        """
        Remove the node with specified IP from the cluster
        :param node_ip: IP of the node to remove
        :type node_ip: str
        :param silent: If silent == '--force-yes' no question will be asked to confirm the removal
        :type silent: str
        :return: None
        """
        from ovs.lib.storagedriver import StorageDriverController
        from ovs.lib.storagerouter import StorageRouterController
        from ovs.dal.lists.storagerouterlist import StorageRouterList

        Toolbox.log(logger=NodeRemovalController._logger, messages="Remove node", boxed=True)
        Toolbox.log(
            logger=NodeRemovalController._logger,
            messages="WARNING: Some of these steps may take a very long time, please check the logs for more information\n\n",
        )

        ###############
        # VALIDATIONS #
        ###############
        try:
            node_ip = node_ip.strip()
            if not isinstance(node_ip, str):
                raise ValueError("Node IP must be a string")
            if not re.match(SSHClient.IP_REGEX, node_ip):
                raise ValueError("Invalid IP {0} specified".format(node_ip))

            storage_router_all = StorageRouterList.get_storagerouters()
            storage_router_masters = StorageRouterList.get_masters()
            storage_router_all_ips = set([storage_router.ip for storage_router in storage_router_all])
            storage_router_master_ips = set([storage_router.ip for storage_router in storage_router_masters])
            storage_router_to_remove = StorageRouterList.get_by_ip(node_ip)

            if node_ip not in storage_router_all_ips:
                raise ValueError(
                    "Unknown IP specified\nKnown in model:\n - {0}\nSpecified for removal:\n - {1}".format(
                        "\n - ".join(storage_router_all_ips), node_ip
                    )
                )

            if len(storage_router_all_ips) == 1:
                raise RuntimeError("Removing the only node is not possible")

            if node_ip in storage_router_master_ips and len(storage_router_master_ips) == 1:
                raise RuntimeError("Removing the only master node is not possible")

            if System.get_my_storagerouter() == storage_router_to_remove:
                raise RuntimeError(
                    "The node to be removed cannot be identical to the node on which the removal is initiated"
                )

            Toolbox.log(
                logger=NodeRemovalController._logger, messages="Creating SSH connections to remaining master nodes"
            )
            master_ip = None
            ip_client_map = {}
            storage_routers_offline = []
            storage_router_to_remove_online = True
            for storage_router in storage_router_all:
                try:
                    client = SSHClient(storage_router, username="******")
                    if client.run(["pwd"]):
                        Toolbox.log(
                            logger=NodeRemovalController._logger,
                            messages="  Node with IP {0:<15} successfully connected to".format(storage_router.ip),
                        )
                        ip_client_map[storage_router.ip] = client
                        if storage_router != storage_router_to_remove and storage_router.node_type == "MASTER":
                            master_ip = storage_router.ip
                except UnableToConnectException:
                    Toolbox.log(
                        logger=NodeRemovalController._logger,
                        messages="  Node with IP {0:<15} is unreachable".format(storage_router.ip),
                    )
                    storage_routers_offline.append(storage_router)
                    if storage_router == storage_router_to_remove:
                        storage_router_to_remove_online = False

            if len(ip_client_map) == 0 or master_ip is None:
                raise RuntimeError("Could not connect to any master node in the cluster")

            storage_router_to_remove.invalidate_dynamics("vdisks_guids")
            if (
                len(storage_router_to_remove.vdisks_guids) > 0
            ):  # vDisks are supposed to be moved away manually before removing a node
                raise RuntimeError("Still vDisks attached to Storage Router {0}".format(storage_router_to_remove.name))

            internal_memcached = Toolbox.is_service_internally_managed(service="memcached")
            internal_rabbit_mq = Toolbox.is_service_internally_managed(service="rabbitmq")
            memcached_endpoints = Configuration.get(key="/ovs/framework/memcache|endpoints")
            rabbit_mq_endpoints = Configuration.get(key="/ovs/framework/messagequeue|endpoints")
            copy_memcached_endpoints = list(memcached_endpoints)
            copy_rabbit_mq_endpoints = list(rabbit_mq_endpoints)
            for endpoint in memcached_endpoints:
                if endpoint.startswith(storage_router_to_remove.ip):
                    copy_memcached_endpoints.remove(endpoint)
            for endpoint in rabbit_mq_endpoints:
                if endpoint.startswith(storage_router_to_remove.ip):
                    copy_rabbit_mq_endpoints.remove(endpoint)
            if len(copy_memcached_endpoints) == 0 and internal_memcached is True:
                raise RuntimeError(
                    "Removal of provided nodes will result in a complete removal of the memcached service"
                )
            if len(copy_rabbit_mq_endpoints) == 0 and internal_rabbit_mq is True:
                raise RuntimeError(
                    "Removal of provided nodes will result in a complete removal of the messagequeue service"
                )
        except Exception as exception:
            Toolbox.log(
                logger=NodeRemovalController._logger, messages=[str(exception)], boxed=True, loglevel="exception"
            )
            sys.exit(1)

        #################
        # CONFIRMATIONS #
        #################
        interactive = silent != "--force-yes"
        remove_asd_manager = not interactive  # Remove ASD manager if non-interactive else ask
        if interactive is True:
            proceed = Interactive.ask_yesno(
                message="Are you sure you want to remove node {0}?".format(storage_router_to_remove.name),
                default_value=False,
            )
            if proceed is False:
                Toolbox.log(logger=NodeRemovalController._logger, messages="Abort removal", title=True)
                sys.exit(1)

            if storage_router_to_remove_online is True:
                client = SSHClient(endpoint=storage_router_to_remove, username="******")
                if ServiceManager.has_service(name="asd-manager", client=client):
                    remove_asd_manager = Interactive.ask_yesno(
                        message="Do you also want to remove the ASD manager and related ASDs?", default_value=False
                    )

            if remove_asd_manager is True or storage_router_to_remove_online is False:
                for function in Toolbox.fetch_hooks("setup", "validate_asd_removal"):
                    validation_output = function(storage_router_to_remove.ip)
                    if validation_output["confirm"] is True:
                        if Interactive.ask_yesno(message=validation_output["question"], default_value=False) is False:
                            remove_asd_manager = False
                            break

        ###########
        # REMOVAL #
        ###########
        try:
            Toolbox.log(
                logger=NodeRemovalController._logger,
                messages="Starting removal of node {0} - {1}".format(
                    storage_router_to_remove.name, storage_router_to_remove.ip
                ),
            )
            if storage_router_to_remove_online is False:
                Toolbox.log(
                    logger=NodeRemovalController._logger,
                    messages="  Marking all Storage Drivers served by Storage Router {0} as offline".format(
                        storage_router_to_remove.ip
                    ),
                )
                StorageDriverController.mark_offline(storagerouter_guid=storage_router_to_remove.guid)

            # Remove vPools
            Toolbox.log(
                logger=NodeRemovalController._logger,
                messages="  Removing vPools from node".format(storage_router_to_remove.ip),
            )
            storage_routers_offline_guids = [
                sr.guid for sr in storage_routers_offline if sr.guid != storage_router_to_remove.guid
            ]
            for storage_driver in storage_router_to_remove.storagedrivers:
                Toolbox.log(
                    logger=NodeRemovalController._logger,
                    messages="    Removing vPool {0} from node".format(storage_driver.vpool.name),
                )
                StorageRouterController.remove_storagedriver(
                    storagedriver_guid=storage_driver.guid, offline_storage_router_guids=storage_routers_offline_guids
                )

            # Demote if MASTER
            if storage_router_to_remove.node_type == "MASTER":
                NodeTypeController.demote_node(
                    cluster_ip=storage_router_to_remove.ip,
                    master_ip=master_ip,
                    ip_client_map=ip_client_map,
                    unique_id=storage_router_to_remove.machine_id,
                    unconfigure_memcached=internal_memcached,
                    unconfigure_rabbitmq=internal_rabbit_mq,
                    offline_nodes=storage_routers_offline,
                )

            # Stop / remove services
            Toolbox.log(logger=NodeRemovalController._logger, messages="Stopping and removing services")
            config_store = Configuration.get_store()
            if storage_router_to_remove_online is True:
                client = SSHClient(endpoint=storage_router_to_remove, username="******")
                NodeRemovalController.remove_services(
                    client=client,
                    node_type=storage_router_to_remove.node_type.lower(),
                    logger=NodeRemovalController._logger,
                )
                service = "watcher-config"
                if ServiceManager.has_service(service, client=client):
                    Toolbox.log(logger=NodeRemovalController._logger, messages="Removing service {0}".format(service))
                    ServiceManager.stop_service(service, client=client)
                    ServiceManager.remove_service(service, client=client)

                if config_store == "etcd":
                    from ovs.extensions.db.etcd.installer import EtcdInstaller

                    if Configuration.get(key="/ovs/framework/external_config") is None:
                        Toolbox.log(logger=NodeRemovalController._logger, messages="      Removing Etcd cluster")
                        try:
                            EtcdInstaller.stop("config", client)
                            EtcdInstaller.remove("config", client)
                        except Exception as ex:
                            Toolbox.log(
                                logger=NodeRemovalController._logger,
                                messages=["\nFailed to unconfigure Etcd", ex],
                                loglevel="exception",
                            )

                    Toolbox.log(logger=NodeRemovalController._logger, messages="Removing Etcd proxy")
                    EtcdInstaller.remove_proxy("config", client.ip)

            Toolbox.run_hooks(
                component="noderemoval",
                sub_component="remove",
                logger=NodeRemovalController._logger,
                cluster_ip=storage_router_to_remove.ip,
                complete_removal=remove_asd_manager,
            )

            # Clean up model
            Toolbox.log(logger=NodeRemovalController._logger, messages="Removing node from model")
            for service in storage_router_to_remove.services:
                service.delete()
            for disk in storage_router_to_remove.disks:
                for partition in disk.partitions:
                    partition.delete()
                disk.delete()
            for j_domain in storage_router_to_remove.domains:
                j_domain.delete()
            Configuration.delete("/ovs/framework/hosts/{0}".format(storage_router_to_remove.machine_id))

            NodeTypeController.restart_framework_and_memcache_services(
                clients=ip_client_map,
                offline_node_ips=[node.ip for node in storage_routers_offline],
                logger=NodeRemovalController._logger,
            )

            if storage_router_to_remove_online is True:
                client = SSHClient(endpoint=storage_router_to_remove, username="******")
                if config_store == "arakoon":
                    client.file_delete(filenames=[ArakoonConfiguration.CACC_LOCATION])
                client.file_delete(filenames=[Configuration.BOOTSTRAP_CONFIG_LOCATION])
            storage_router_to_remove.delete()
            Toolbox.log(logger=NodeRemovalController._logger, messages="Successfully removed node\n")
        except Exception as exception:
            Toolbox.log(logger=NodeRemovalController._logger, messages="\n")
            Toolbox.log(
                logger=NodeRemovalController._logger,
                messages=["An unexpected error occurred:", str(exception)],
                boxed=True,
                loglevel="exception",
            )
            sys.exit(1)
        except KeyboardInterrupt:
            Toolbox.log(logger=NodeRemovalController._logger, messages="\n")
            Toolbox.log(
                logger=NodeRemovalController._logger,
                messages="This setup was aborted. Open vStorage may be in an inconsistent state, make sure to validate the installation.",
                boxed=True,
                loglevel="error",
            )
            sys.exit(1)

        if remove_asd_manager is True:
            Toolbox.log(logger=NodeRemovalController._logger, messages="\nRemoving ASD Manager")
            with remote(storage_router_to_remove.ip, [os]) as rem:
                rem.os.system("asd-manager remove --force-yes")
        Toolbox.log(logger=NodeRemovalController._logger, messages="Remove nodes finished", title=True)
コード例 #17
0
ファイル: update.py プロジェクト: openvstorage/framework
    def get_package_information_core(client, package_info):
        """
        Called by GenericController.refresh_package_information() every hour

        Retrieve information about the currently installed versions of the core packages
        Retrieve information about the versions to which each package can potentially be updated
        If installed version is different from candidate version --> store this information in model

        Additionally check the services with a 'run' file
        Verify whether the running version is up-to-date with the candidate version
        If different --> store this information in the model

        Result: Every package with updates or which requires services to be restarted is stored in the model

        :param client: Client on which to collect the version information
        :type client: SSHClient
        :param package_info: Dictionary passed in by the thread calling this function
        :type package_info: dict
        :return: Package information
        :rtype: dict
        """
        try:
            if client.username != 'root':
                raise RuntimeError('Only the "root" user can retrieve the package information')

            binaries = PackageManager.get_binary_versions(client=client, package_names=UpdateController.core_packages_with_binaries)
            installed = PackageManager.get_installed_versions(client=client, package_names=UpdateController.all_core_packages)
            candidate = PackageManager.get_candidate_versions(client=client, package_names=UpdateController.all_core_packages)
            if set(installed.keys()) != set(UpdateController.all_core_packages) or set(candidate.keys()) != set(UpdateController.all_core_packages):
                raise RuntimeError('Failed to retrieve the installed and candidate versions for packages: {0}'.format(', '.join(UpdateController.all_core_packages)))

            # Retrieve Arakoon information
            framework_arakoons = []
            storagedriver_arakoons = []
            for cluster, arakoon_list in {'cacc': framework_arakoons,
                                          'ovsdb': framework_arakoons,
                                          'voldrv': storagedriver_arakoons}.iteritems():
                cluster_name = ArakoonClusterConfig.get_cluster_name(cluster)
                if cluster_name is None:
                    continue

                if cluster == 'cacc':
                    arakoon_metadata = ArakoonInstaller.get_arakoon_metadata_by_cluster_name(cluster_name=cluster_name, filesystem=True, ip=client.ip)
                else:
                    arakoon_metadata = ArakoonInstaller.get_arakoon_metadata_by_cluster_name(cluster_name=cluster_name)

                if arakoon_metadata['internal'] is True:
                    arakoon_list.append(ArakoonInstaller.get_service_name_for_cluster(cluster_name=arakoon_metadata['cluster_name']))

            storagerouter = StorageRouterList.get_by_ip(client.ip)
            alba_proxies = []
            for service in storagerouter.services:
                if service.type.name == ServiceType.SERVICE_TYPES.ALBA_PROXY:
                    alba_proxies.append(service.name)

            storagedriver_services = []
            for sd in storagerouter.storagedrivers:
                storagedriver_services.append('ovs-dtl_{0}'.format(sd.vpool.name))
                storagedriver_services.append('ovs-volumedriver_{0}'.format(sd.vpool.name))

            default_entry = {'candidate': None,
                             'installed': None,
                             'services_to_restart': []}

            #                       component:   package_name: services_with_run_file
            for component, info in {'framework': {'arakoon': framework_arakoons,
                                                  'openvstorage': []},
                                    'storagedriver': {'alba': alba_proxies,
                                                      'arakoon': storagedriver_arakoons,
                                                      'volumedriver-no-dedup-base': [],
                                                      'volumedriver-no-dedup-server': storagedriver_services}}.iteritems():
                component_info = {}
                for package, services in info.iteritems():
                    for service in services:
                        service = ExtensionToolbox.remove_prefix(service, 'ovs-')
                        version_file = '/opt/OpenvStorage/run/{0}.version'.format(service)
                        if not client.file_exists(version_file):
                            UpdateController._logger.warning('{0}: Failed to find a version file in /opt/OpenvStorage/run for service {1}'.format(client.ip, service))
                            continue
                        package_name = package
                        running_versions = client.file_read(version_file).strip()
                        for version in running_versions.split(';'):
                            version = version.strip()
                            running_version = None
                            if '=' in version:
                                package_name = version.split('=')[0]
                                running_version = version.split('=')[1]
                            elif version:
                                running_version = version

                            if package_name not in UpdateController.all_core_packages:
                                raise ValueError('Unknown package dependency found in {0}'.format(version_file))
                            if package_name not in binaries:
                                raise RuntimeError('Binary version for package {0} was not retrieved'.format(package_name))

                            if running_version is not None and running_version != binaries[package_name]:
                                if package_name not in component_info:
                                    component_info[package_name] = copy.deepcopy(default_entry)
                                component_info[package_name]['installed'] = running_version
                                component_info[package_name]['candidate'] = binaries[package_name]
                                component_info[package_name]['services_to_restart'].append('ovs-{0}'.format(service))

                    if installed[package] != candidate[package] and package not in component_info:
                        component_info[package] = copy.deepcopy(default_entry)
                        component_info[package]['installed'] = installed[package]
                        component_info[package]['candidate'] = candidate[package]
                if component_info:
                    if component not in package_info[client.ip]:
                        package_info[client.ip][component] = {}
                    package_info[client.ip][component].update(component_info)
        except Exception as ex:
            if 'errors' not in package_info[client.ip]:
                package_info[client.ip]['errors'] = []
            package_info[client.ip]['errors'].append(ex)
        return package_info
コード例 #18
0
    def _get_update_information_cluster_alba(cls, client, update_info,
                                             package_info):
        """
        In this function the services for each component / package combination are defined
        This service information consists out of:
            * Services to stop (before update) and start (after update of packages) -> 'services_stop_start'
            * Services to restart after update (post-update logic)                  -> 'services_post_update'
            * Down-times which will be caused due to service restarts               -> 'downtime'
            * Prerequisites that have not been met                                  -> 'prerequisites'

        Verify whether all relevant services have the correct binary active
        Whether a service has the correct binary version in use, we use the ServiceFactory.get_service_update_versions functionality
        When a service has an older binary version running, we add this information to the 'update_info'

        This combined information is then stored in the 'package_information' of the StorageRouter DAL object

        :param client: SSHClient on which to retrieve the service information required for an update
        :type client: ovs.extensions.generic.sshclient.SSHClient
        :param update_info: Dictionary passed in by the thread calling this function used to store all update information
        :type update_info: dict
        :param package_info: Dictionary containing the components and packages which have an update available for current SSHClient
        :type package_info: dict
        :return: None
        :rtype: NoneType
        """
        cls._logger.info(
            'StorageRouter {0}: Refreshing ALBA update information'.format(
                client.ip))
        try:
            binaries = cls._package_manager.get_binary_versions(client=client)
            storagerouter = StorageRouterList.get_by_ip(ip=client.ip)
            cls._logger.debug('StorageRouter {0}: Binary versions: {1}'.format(
                client.ip, binaries))

            # Retrieve Arakoon information
            arakoon_info = {}
            for service in storagerouter.services:
                if service.type.name not in [
                        ServiceType.SERVICE_TYPES.ALBA_MGR,
                        ServiceType.SERVICE_TYPES.NS_MGR
                ]:
                    continue

                if service.type.name == ServiceType.SERVICE_TYPES.ALBA_MGR:
                    cluster_name = service.abm_service.abm_cluster.name
                    alba_backend_name = service.abm_service.abm_cluster.alba_backend.name
                else:
                    cluster_name = service.nsm_service.nsm_cluster.name
                    alba_backend_name = service.nsm_service.nsm_cluster.alba_backend.name

                cls._logger.debug(
                    'StorageRouter {0}: Retrieving update information for Arakoon cluster {1}'
                    .format(client.ip, cluster_name))
                arakoon_update_info = ArakoonInstaller.get_arakoon_update_info(
                    cluster_name=cluster_name)
                cls._logger.debug(
                    'StorageRouter {0}: Arakoon update information for cluster {1}: {2}'
                    .format(client.ip, cluster_name, arakoon_update_info))
                if arakoon_update_info['internal'] is True:
                    arakoon_info[arakoon_update_info['service_name']] = [
                        'backend', alba_backend_name
                    ] if arakoon_update_info['downtime'] is True else None

            for component, package_names in PackageFactory.get_package_info(
            )['names'].iteritems():
                package_names = sorted(package_names)
                cls._logger.debug(
                    'StorageRouter {0}: Validating component {1} and related packages: {2}'
                    .format(client.ip, component, package_names))

                if component not in update_info[client.ip]:
                    update_info[client.ip][component] = copy.deepcopy(
                        ServiceFactory.DEFAULT_UPDATE_ENTRY)
                svc_component_info = update_info[client.ip][component]
                pkg_component_info = package_info.get(component, {})

                for package_name in package_names:
                    cls._logger.debug(
                        'StorageRouter {0}: Validating ALBA plugin related package {1}'
                        .format(client.ip, package_name))
                    if package_name == PackageFactory.PKG_OVS_BACKEND and package_name in pkg_component_info:
                        if ['gui', None] not in svc_component_info['downtime']:
                            svc_component_info['downtime'].append(
                                ['gui', None])
                        if ['api', None] not in svc_component_info['downtime']:
                            svc_component_info['downtime'].append(
                                ['api', None])
                        svc_component_info['services_stop_start'][10].append(
                            'ovs-watcher-framework')
                        svc_component_info['services_stop_start'][20].append(
                            'memcached')
                        cls._logger.debug(
                            'StorageRouter {0}: Added services "ovs-watcher-framework" and "memcached" to stop-start services'
                            .format(client.ip))
                        cls._logger.debug(
                            'StorageRouter {0}: Added GUI and API to downtime'.
                            format(client.ip))

                    elif package_name in [
                            PackageFactory.PKG_ALBA, PackageFactory.PKG_ALBA_EE
                    ]:
                        # Retrieve proxy service information
                        for service in storagerouter.services:
                            if service.type.name != ServiceType.SERVICE_TYPES.ALBA_PROXY or service.alba_proxy is None:
                                continue

                            service_version = None
                            if package_name not in pkg_component_info:
                                service_version = ServiceFactory.get_service_update_versions(
                                    client=client,
                                    service_name=service.name,
                                    binary_versions=binaries)

                            cls._logger.debug(
                                'StorageRouter {0}: Service {1} is running version {2}'
                                .format(client.ip, service.name,
                                        service_version))
                            if package_name in pkg_component_info or service_version is not None:
                                if service_version is not None and package_name not in svc_component_info[
                                        'packages']:
                                    svc_component_info['packages'][
                                        package_name] = service_version
                                svc_component_info['services_post_update'][
                                    10].append('ovs-{0}'.format(service.name))
                                cls._logger.debug(
                                    'StorageRouter {0}: Added service {1} to post-update services'
                                    .format(client.ip,
                                            'ovs-{0}'.format(service.name)))

                                downtime = [
                                    'proxy',
                                    service.alba_proxy.storagedriver.vpool.name
                                ]
                                if downtime not in svc_component_info[
                                        'downtime']:
                                    svc_component_info['downtime'].append(
                                        downtime)
                                    cls._logger.debug(
                                        'StorageRouter {0}: Added ALBA proxy downtime for vPool {1} to downtime'
                                        .format(
                                            client.ip, service.alba_proxy.
                                            storagedriver.vpool.name))

                    if package_name in [
                            PackageFactory.PKG_ALBA,
                            PackageFactory.PKG_ALBA_EE,
                            PackageFactory.PKG_ARAKOON
                    ]:
                        for service_name, downtime in arakoon_info.iteritems():
                            service_version = ServiceFactory.get_service_update_versions(
                                client=client,
                                service_name=service_name,
                                binary_versions=binaries,
                                package_name=package_name)
                            cls._logger.debug(
                                'StorageRouter {0}: Arakoon service {1} information: {2}'
                                .format(client.ip, service_name,
                                        service_version))

                            if package_name in pkg_component_info or service_version is not None:
                                svc_component_info['services_post_update'][
                                    10].append('ovs-{0}'.format(service_name))
                                cls._logger.debug(
                                    'StorageRouter {0}: Added service {1} to post-update services'
                                    .format(client.ip,
                                            'ovs-{0}'.format(service_name)))
                                if service_version is not None and package_name not in svc_component_info[
                                        'packages']:
                                    svc_component_info['packages'][
                                        package_name] = service_version
                                if downtime is not None and downtime not in svc_component_info[
                                        'downtime']:
                                    svc_component_info['downtime'].append(
                                        downtime)
                                    cls._logger.debug(
                                        'StorageRouter {0}: Added Arakoon cluster for ALBA Backend {1} to downtime'
                                        .format(client.ip, downtime[1]))

                    # Extend the service information with the package information related to this repository for current StorageRouter
                    if package_name in pkg_component_info and package_name not in svc_component_info[
                            'packages']:
                        cls._logger.debug(
                            'StorageRouter {0}: Adding package {1} because it has an update available'
                            .format(client.ip, package_name))
                        svc_component_info['packages'][
                            package_name] = pkg_component_info[package_name]

                if component == PackageFactory.COMP_ALBA:
                    for alba_node in AlbaNodeList.get_albanodes():
                        try:
                            alba_node.client.get_metadata()
                        except:
                            svc_component_info['prerequisites'].append(
                                ['alba_node_unresponsive', alba_node.ip])
                            cls._logger.debug(
                                'StorageRouter {0}: Added unresponsive ALBA Node {1} to prerequisites'
                                .format(client.ip, alba_node.ip))

                # Verify whether migration (DAL and extension) code needs to be executed (only if no packages have an update available so far)
                elif component == PackageFactory.COMP_FWK and PackageFactory.PKG_OVS_BACKEND not in svc_component_info[
                        'packages']:
                    cls._logger.debug(
                        'StorageRouter {0}: No updates detected, checking for required migrations'
                        .format(client.ip))
                    # Extension migration check
                    key = '/ovs/framework/hosts/{0}/versions'.format(
                        System.get_my_machine_id(client=client))
                    old_version = Configuration.get(key, default={}).get(
                        PackageFactory.COMP_MIGRATION_ALBA)
                    installed_version = str(
                        cls._package_manager.get_installed_versions(
                            client=client,
                            package_names=[PackageFactory.PKG_OVS_BACKEND
                                           ])[PackageFactory.PKG_OVS_BACKEND])
                    migrations_detected = False
                    if old_version is not None:
                        cls._logger.debug(
                            'StorageRouter {0}: Current running version for {1} extension migrations: {2}'
                            .format(client.ip, PackageFactory.COMP_ALBA,
                                    old_version))
                        with remote(client.ip, [ExtensionMigrator]) as rem:
                            cls._logger.debug(
                                'StorageRouter {0}: Available version for {1} extension migrations: {2}'
                                .format(client.ip, PackageFactory.COMP_ALBA,
                                        rem.ExtensionMigrator.THIS_VERSION))
                            if rem.ExtensionMigrator.THIS_VERSION > old_version:
                                migrations_detected = True
                                svc_component_info['packages'][
                                    PackageFactory.PKG_OVS_BACKEND] = {
                                        'installed': 'migrations',
                                        'candidate': installed_version
                                    }

                    # DAL migration check
                    if migrations_detected is False:
                        persistent_client = PersistentFactory.get_client()
                        old_version = persistent_client.get(
                            'ovs_model_version').get(
                                PackageFactory.COMP_MIGRATION_ALBA
                            ) if persistent_client.exists(
                                'ovs_model_version') else None
                        if old_version is not None:
                            cls._logger.debug(
                                'StorageRouter {0}: Current running version for {1} DAL migrations: {2}'
                                .format(client.ip, PackageFactory.COMP_ALBA,
                                        old_version))
                            with remote(client.ip, [DALMigrator]) as rem:
                                cls._logger.debug(
                                    'StorageRouter {0}: Available version for {1} DAL migrations: {2}'
                                    .format(client.ip,
                                            PackageFactory.COMP_ALBA,
                                            rem.DALMigrator.THIS_VERSION))
                                if rem.DALMigrator.THIS_VERSION > old_version:
                                    svc_component_info['packages'][
                                        PackageFactory.PKG_OVS_BACKEND] = {
                                            'installed': 'migrations',
                                            'candidate': installed_version
                                        }

            cls._logger.info(
                'StorageRouter {0}: Refreshed ALBA update information'.format(
                    client.ip))
        except Exception as ex:
            cls._logger.exception(
                'StorageRouter {0}: Refreshing ALBA update information failed'.
                format(client.ip))
            if 'errors' not in update_info[client.ip]:
                update_info[client.ip]['errors'] = []
            update_info[client.ip]['errors'].append(ex)