Beispiel #1
0
def setup():
    """
    Interactive setup part for initial asd manager configuration
    """
    _print_and_log(message=Interactive.boxed_message(['ASD Manager setup']))

    # Gather information
    ipaddresses = OSFactory.get_manager().get_ip_addresses()
    if not ipaddresses:
        _print_and_log(
            level='error',
            message='\n' + Interactive.boxed_message(
                ['Could not retrieve IP information on local node']))
        sys.exit(1)
    validation_ip_addresses = copy.deepcopy(ipaddresses)

    local_client = SSHClient(endpoint='127.0.0.1', username='******')
    service_manager = ServiceFactory.get_manager()
    if service_manager.has_service(MANAGER_SERVICE, local_client):
        _print_and_log(level='error',
                       message='\n' + Interactive.boxed_message(
                           ['The ASD Manager is already installed.']))
        sys.exit(1)

    config = _validate_and_retrieve_pre_config()
    interactive = len(config) == 0
    ipmi_info = {'ip': None, 'username': None, 'pwd': None}

    if interactive is False:
        api_ip = config['api_ip']
        api_port = config.get('api_port', 8500)
        asd_ips = config.get('asd_ips', [])
        asd_start_port = config.get('asd_start_port', 8600)
        configuration_store = config.get('configuration_store', 'arakoon')
        ipmi_info = config.get('ipmi', ipmi_info)
    else:
        api_ip = Interactive.ask_choice(
            choice_options=ipaddresses,
            question='Select the public IP address to be used for the API',
            sort_choices=True)
        api_port = Interactive.ask_integer(
            question="Select the port to be used for the API",
            min_value=1025,
            max_value=65535,
            default_value=8500)
        asd_ips = []
        add_ips = True
        ipaddresses.append('All')
        while add_ips:
            current_ips = ' - Current selected IPs: {0}'.format(asd_ips)
            new_asd_ip = Interactive.ask_choice(
                choice_options=ipaddresses,
                question=
                "Select an IP address to be used for the ASDs or 'All' (All current and future interfaces: 0.0.0.0){0}"
                .format(current_ips if len(asd_ips) > 0 else ''),
                default_value='All')
            if new_asd_ip == 'All':
                ipaddresses.remove('All')
                asd_ips = [
                ]  # Empty list maps to all IPs - checked when configuring ASDs
                add_ips = False
            else:
                asd_ips.append(new_asd_ip)
                ipaddresses.remove(new_asd_ip)
                add_ips = Interactive.ask_yesno(
                    "Do you want to add another IP?")
        asd_start_port = Interactive.ask_integer(
            question="Select the port to be used for the ASDs",
            min_value=1025,
            max_value=65435,
            default_value=8600)
        configuration_store = 'arakoon'

        message = 'Do you want to set IPMI configuration keys?'
        proceed = Interactive.ask_yesno(message=message, default_value=False)
        if proceed is True:
            ipmi_info['ip'] = Interactive.ask_string(
                message='Enter the IPMI IP address',
                regex_info={'regex': ExtensionsToolbox.regex_ip})
            ipmi_info['username'] = Interactive.ask_string(
                message='Enter the IPMI username')
            ipmi_info['pwd'] = Interactive.ask_password(
                message='Enter the IPMI password')

    if api_ip not in validation_ip_addresses:
        _print_and_log(
            level='error',
            message='\n' + Interactive.boxed_message(lines=[
                'Invalid API IP {0} specified. Please choose from:'.format(
                    api_ip)
            ] + ['  * {0}'.format(ip) for ip in ipaddresses]))
        sys.exit(1)
    different_ips = set(asd_ips).difference(set(validation_ip_addresses))
    if different_ips:
        _print_and_log(
            level='error',
            message='\n' + Interactive.boxed_message(lines=[
                'Invalid ASD IPs {0} specified. Please choose from:'.format(
                    asd_ips)
            ] + ['  * {0}'.format(ip) for ip in ipaddresses]))
        sys.exit(1)

    if api_port in range(asd_start_port, asd_start_port + 100):
        _print_and_log(
            level='error',
            message='\n' + Interactive.boxed_message(
                ['API port cannot be in the range of the ASD port + 100']))
        sys.exit(1)

    if interactive is True:
        while not local_client.file_exists(CACC_LOCATION):
            _print_and_log(
                level='warning',
                message=
                ' - Please place a copy of the Arakoon\'s client configuration file at: {0}'
                .format(CACC_LOCATION))
            Interactive.ask_continue()

    local_client.file_write(filename=CONFIG_STORE_LOCATION,
                            contents=json.dumps(
                                {'configuration_store': configuration_store},
                                indent=4))

    node_id = Configuration.initialize(
        config={
            'api_ip': api_ip,
            'asd_ips': asd_ips,
            'api_port': api_port,
            'asd_start_port': asd_start_port,
            'ipmi': ipmi_info
        })

    # Model settings
    _print_and_log(message=' - Store settings in DB')
    for code, value in {
            'api_ip': api_ip,
            'api_port': api_port,
            'configuration_store': configuration_store,
            'node_id': node_id
    }.iteritems():
        setting = Setting()
        setting.code = code
        setting.value = value
        setting.save()

    # Deploy/start services
    _print_and_log(message=' - Deploying and starting services')
    service_manager.add_service(name=MANAGER_SERVICE, client=local_client)
    service_manager.add_service(name=WATCHER_SERVICE, client=local_client)
    _print_and_log(message=' - Starting watcher service')
    try:
        service_manager.start_service(name=WATCHER_SERVICE,
                                      client=local_client)
    except Exception:
        Configuration.uninitialize()
        _print_and_log(level='exception',
                       message='\n' +
                       Interactive.boxed_message(['Starting watcher failed']))
        sys.exit(1)

    _print_and_log(message='\n' +
                   Interactive.boxed_message(['ASD Manager setup completed']))
Beispiel #2
0
    def remove_node(node_ip, silent=None):
        """
        Remove the node with specified IP from the cluster
        :param node_ip: IP of the node to remove
        :type node_ip: str
        :param silent: If silent == '--force-yes' no question will be asked to confirm the removal
        :type silent: str
        :return: None
        """
        from ovs.dal.lists.storagerouterlist import StorageRouterList
        from ovs.lib.storagedriver import StorageDriverController
        from ovs.lib.vpool import VPoolController

        Toolbox.log(logger=NodeRemovalController._logger,
                    messages='Remove node',
                    boxed=True)
        Toolbox.log(
            logger=NodeRemovalController._logger,
            messages=
            'WARNING: Some of these steps may take a very long time, please check the logs for more information\n\n'
        )
        service_manager = ServiceFactory.get_manager()

        ###############
        # VALIDATIONS #
        ###############
        try:
            node_ip = node_ip.strip()
            if not isinstance(node_ip, str):
                raise ValueError('Node IP must be a string')
            if not re.match(SSHClient.IP_REGEX, node_ip):
                raise ValueError('Invalid IP {0} specified'.format(node_ip))

            storage_router_all = sorted(StorageRouterList.get_storagerouters(),
                                        key=lambda k: k.name)
            storage_router_masters = StorageRouterList.get_masters()
            storage_router_all_ips = set(
                [storage_router.ip for storage_router in storage_router_all])
            storage_router_master_ips = set([
                storage_router.ip for storage_router in storage_router_masters
            ])
            storage_router_to_remove = StorageRouterList.get_by_ip(node_ip)
            offline_reasons = {}
            if node_ip not in storage_router_all_ips:
                raise ValueError(
                    'Unknown IP specified\nKnown in model:\n - {0}\nSpecified for removal:\n - {1}'
                    .format('\n - '.join(storage_router_all_ips), node_ip))

            if len(storage_router_all_ips) == 1:
                raise RuntimeError("Removing the only node is not possible")

            if node_ip in storage_router_master_ips and len(
                    storage_router_master_ips) == 1:
                raise RuntimeError(
                    "Removing the only master node is not possible")

            if System.get_my_storagerouter() == storage_router_to_remove:
                raise RuntimeError(
                    'The node to be removed cannot be identical to the node on which the removal is initiated'
                )

            Toolbox.log(
                logger=NodeRemovalController._logger,
                messages='Creating SSH connections to remaining master nodes')
            master_ip = None
            ip_client_map = {}
            storage_routers_offline = []
            storage_router_to_remove_online = True
            for storage_router in storage_router_all:
                try:
                    client = SSHClient(storage_router,
                                       username='******',
                                       timeout=10)
                except (UnableToConnectException, NotAuthenticatedException,
                        TimeOutException) as ex:
                    if isinstance(ex, UnableToConnectException):
                        msg = 'Unable to connect'
                    elif isinstance(ex, NotAuthenticatedException):
                        msg = 'Could not authenticate'
                    elif isinstance(ex, TimeOutException):
                        msg = 'Connection timed out'
                    Toolbox.log(
                        logger=NodeRemovalController._logger,
                        messages='  * Node with IP {0:<15}- {1}'.format(
                            storage_router.ip, msg))
                    offline_reasons[storage_router.ip] = msg
                    storage_routers_offline.append(storage_router)
                    if storage_router == storage_router_to_remove:
                        storage_router_to_remove_online = False
                    continue

                Toolbox.log(
                    logger=NodeRemovalController._logger,
                    messages='  * Node with IP {0:<15}- Successfully connected'
                    .format(storage_router.ip))
                ip_client_map[storage_router.ip] = client
                if storage_router != storage_router_to_remove and storage_router.node_type == 'MASTER':
                    master_ip = storage_router.ip

            if len(ip_client_map) == 0 or master_ip is None:
                raise RuntimeError(
                    'Could not connect to any master node in the cluster')

            storage_router_to_remove.invalidate_dynamics('vdisks_guids')
            if len(
                    storage_router_to_remove.vdisks_guids
            ) > 0:  # vDisks are supposed to be moved away manually before removing a node
                raise RuntimeError(
                    "Still vDisks attached to Storage Router {0}".format(
                        storage_router_to_remove.name))

            internal_memcached = Toolbox.is_service_internally_managed(
                service='memcached')
            internal_rabbit_mq = Toolbox.is_service_internally_managed(
                service='rabbitmq')
            memcached_endpoints = Configuration.get(
                key='/ovs/framework/memcache|endpoints')
            rabbit_mq_endpoints = Configuration.get(
                key='/ovs/framework/messagequeue|endpoints')
            copy_memcached_endpoints = list(memcached_endpoints)
            copy_rabbit_mq_endpoints = list(rabbit_mq_endpoints)
            for endpoint in memcached_endpoints:
                if endpoint.startswith(storage_router_to_remove.ip):
                    copy_memcached_endpoints.remove(endpoint)
            for endpoint in rabbit_mq_endpoints:
                if endpoint.startswith(storage_router_to_remove.ip):
                    copy_rabbit_mq_endpoints.remove(endpoint)
            if len(copy_memcached_endpoints
                   ) == 0 and internal_memcached is True:
                raise RuntimeError(
                    'Removal of provided nodes will result in a complete removal of the memcached service'
                )
            if len(copy_rabbit_mq_endpoints
                   ) == 0 and internal_rabbit_mq is True:
                raise RuntimeError(
                    'Removal of provided nodes will result in a complete removal of the messagequeue service'
                )

            Toolbox.run_hooks(component='noderemoval',
                              sub_component='validate_removal',
                              logger=NodeRemovalController._logger,
                              cluster_ip=storage_router_to_remove.ip)
        except KeyboardInterrupt:
            Toolbox.log(logger=NodeRemovalController._logger, messages='\n')
            Toolbox.log(
                logger=NodeRemovalController._logger,
                messages=
                'Removal has been aborted during the validation step. No changes have been applied.',
                boxed=True,
                loglevel='warning')
            sys.exit(1)
        except Exception as exception:
            Toolbox.log(logger=NodeRemovalController._logger,
                        messages=[str(exception)],
                        boxed=True,
                        loglevel='exception')
            sys.exit(1)

        #################
        # CONFIRMATIONS #
        #################
        try:
            interactive = silent != '--force-yes'
            remove_asd_manager = not interactive  # Remove ASD manager if non-interactive else ask
            if interactive is True:
                if len(storage_routers_offline) > 0:
                    Toolbox.log(
                        logger=NodeRemovalController._logger,
                        messages=
                        'Certain nodes appear to be offline. These will not fully removed and will cause issues if they are not really offline.'
                    )
                    Toolbox.log(
                        logger=NodeRemovalController._logger,
                        messages='Offline nodes: {0}'.format(''.join(
                            ('\n  * {0:<15}- {1}.'.format(ip, message)
                             for ip, message in offline_reasons.iteritems()))))
                    valid_node_info = Interactive.ask_yesno(
                        message=
                        'Continue the removal with these being presumably offline?',
                        default_value=False)
                    if valid_node_info is False:
                        Toolbox.log(
                            logger=NodeRemovalController._logger,
                            messages=
                            'Please validate the state of the nodes before removing.',
                            title=True)
                        sys.exit(1)
                proceed = Interactive.ask_yesno(
                    message='Are you sure you want to remove node {0}?'.format(
                        storage_router_to_remove.name),
                    default_value=False)
                if proceed is False:
                    Toolbox.log(logger=NodeRemovalController._logger,
                                messages='Abort removal',
                                title=True)
                    sys.exit(1)

                remove_asd_manager = True
                if storage_router_to_remove_online is True:
                    client = SSHClient(endpoint=storage_router_to_remove,
                                       username='******')
                    if service_manager.has_service(name='asd-manager',
                                                   client=client):
                        remove_asd_manager = Interactive.ask_yesno(
                            message=
                            'Do you also want to remove the ASD manager and related ASDs?',
                            default_value=False)

                if remove_asd_manager is True or storage_router_to_remove_online is False:
                    for fct in Toolbox.fetch_hooks('noderemoval',
                                                   'validate_asd_removal'):
                        validation_output = fct(storage_router_to_remove.ip)
                        if validation_output['confirm'] is True:
                            if Interactive.ask_yesno(
                                    message=validation_output['question'],
                                    default_value=False) is False:
                                remove_asd_manager = False
                                break
        except KeyboardInterrupt:
            Toolbox.log(logger=NodeRemovalController._logger, messages='\n')
            Toolbox.log(
                logger=NodeRemovalController._logger,
                messages=
                'Removal has been aborted during the confirmation step. No changes have been applied.',
                boxed=True,
                loglevel='warning')
            sys.exit(1)
        except Exception as exception:
            Toolbox.log(logger=NodeRemovalController._logger,
                        messages=[str(exception)],
                        boxed=True,
                        loglevel='exception')
            sys.exit(1)
        ###########
        # REMOVAL #
        ###########
        try:
            Toolbox.log(logger=NodeRemovalController._logger,
                        messages='Starting removal of node {0} - {1}'.format(
                            storage_router_to_remove.name,
                            storage_router_to_remove.ip))
            if storage_router_to_remove_online is False:
                Toolbox.log(
                    logger=NodeRemovalController._logger,
                    messages=
                    '  Marking all Storage Drivers served by Storage Router {0} as offline'
                    .format(storage_router_to_remove.ip))
                StorageDriverController.mark_offline(
                    storagerouter_guid=storage_router_to_remove.guid)

            # Remove vPools
            Toolbox.log(logger=NodeRemovalController._logger,
                        messages='  Removing vPools from node'.format(
                            storage_router_to_remove.ip))
            storage_routers_offline_guids = [
                sr.guid for sr in storage_routers_offline
                if sr.guid != storage_router_to_remove.guid
            ]
            for storage_driver in storage_router_to_remove.storagedrivers:
                Toolbox.log(logger=NodeRemovalController._logger,
                            messages='    Removing vPool {0} from node'.format(
                                storage_driver.vpool.name))
                VPoolController.shrink_vpool(
                    storagedriver_guid=storage_driver.guid,
                    offline_storage_router_guids=storage_routers_offline_guids)

            # Demote if MASTER
            if storage_router_to_remove.node_type == 'MASTER':
                NodeTypeController.demote_node(
                    cluster_ip=storage_router_to_remove.ip,
                    master_ip=master_ip,
                    ip_client_map=ip_client_map,
                    unique_id=storage_router_to_remove.machine_id,
                    unconfigure_memcached=internal_memcached,
                    unconfigure_rabbitmq=internal_rabbit_mq,
                    offline_nodes=storage_routers_offline)

            # Stop / remove services
            Toolbox.log(logger=NodeRemovalController._logger,
                        messages='Stopping and removing services')
            if storage_router_to_remove_online is True:
                client = SSHClient(endpoint=storage_router_to_remove,
                                   username='******')
                NodeRemovalController.remove_services(
                    client=client,
                    node_type=storage_router_to_remove.node_type.lower(),
                    logger=NodeRemovalController._logger)
                service = 'watcher-config'
                if service_manager.has_service(service, client=client):
                    Toolbox.log(
                        logger=NodeRemovalController._logger,
                        messages='Removing service {0}'.format(service))
                    service_manager.stop_service(service, client=client)
                    service_manager.remove_service(service, client=client)

            Toolbox.run_hooks(component='noderemoval',
                              sub_component='remove',
                              logger=NodeRemovalController._logger,
                              cluster_ip=storage_router_to_remove.ip,
                              complete_removal=remove_asd_manager)

            # Clean up model
            Toolbox.log(logger=NodeRemovalController._logger,
                        messages='Removing node from model')
            for service in storage_router_to_remove.services:
                service.delete()
            for disk in storage_router_to_remove.disks:
                for partition in disk.partitions:
                    partition.delete()
                disk.delete()
            for j_domain in storage_router_to_remove.domains:
                j_domain.delete()
            Configuration.delete('/ovs/framework/hosts/{0}'.format(
                storage_router_to_remove.machine_id))

            NodeTypeController.restart_framework_and_memcache_services(
                clients=ip_client_map,
                offline_node_ips=[node.ip for node in storage_routers_offline],
                logger=NodeRemovalController._logger)

            if storage_router_to_remove_online is True:
                client = SSHClient(endpoint=storage_router_to_remove,
                                   username='******')
                client.file_delete(filenames=[CACC_LOCATION])
                client.file_delete(filenames=[CONFIG_STORE_LOCATION])
            storage_router_to_remove.delete()
            Toolbox.log(logger=NodeRemovalController._logger,
                        messages='Successfully removed node\n')
        except Exception as exception:
            Toolbox.log(logger=NodeRemovalController._logger, messages='\n')
            Toolbox.log(
                logger=NodeRemovalController._logger,
                messages=['An unexpected error occurred:',
                          str(exception)],
                boxed=True,
                loglevel='exception')
            sys.exit(1)
        except KeyboardInterrupt:
            Toolbox.log(logger=NodeRemovalController._logger, messages='\n')
            Toolbox.log(
                logger=NodeRemovalController._logger,
                messages=
                'This setup was aborted. Open vStorage may be in an inconsistent state, make sure to validate the installation.',
                boxed=True,
                loglevel='error')
            sys.exit(1)

        if remove_asd_manager is True and storage_router_to_remove_online is True:
            Toolbox.log(logger=NodeRemovalController._logger,
                        messages='\nRemoving ASD Manager')
            with remote(storage_router_to_remove.ip, [os]) as rem:
                rem.os.system('asd-manager remove --force-yes')
        Toolbox.log(logger=NodeRemovalController._logger,
                    messages='Remove nodes finished',
                    title=True)
Beispiel #3
0
def remove(silent=None):
    """
    Interactive removal part for the ASD manager
    :param silent: If silent == '--force-yes' no question will be asked to confirm the removal
    :type silent: str
    :return: None
    :rtype: NoneType
    """
    _print_and_log(message='\n' +
                   Interactive.boxed_message(['ASD Manager removal']))

    local_client = SSHClient(endpoint='127.0.0.1', username='******')
    if not local_client.file_exists(
            filename='{0}/main.db'.format(Setting.DATABASE_FOLDER)):
        _print_and_log(level='error',
                       message='\n' + Interactive.boxed_message(
                           ['The ASD Manager has already been removed']))
        sys.exit(1)

    _print_and_log(message=' - Validating configuration management')
    try:
        Configuration.list(key='ovs')
    except:
        _print_and_log(
            level='exception',
            message='\n' +
            Interactive.boxed_message(['Could not connect to Arakoon']))
        sys.exit(1)

    _print_and_log(message='  - Retrieving ASD information')
    all_asds = {}
    try:
        all_asds = ASDList.get_asds()
    except:
        _print_and_log(level='exception',
                       message='  - Failed to retrieve the ASD information')

    interactive = silent != '--force-yes'
    if interactive is True:
        message = 'Are you sure you want to continue?'
        if len(all_asds) > 0:
            _print_and_log(message='\n\n+++ ALERT +++\n', level='warning')
            message = 'DATA LOSS possible if proceeding! Continue?'

        proceed = Interactive.ask_yesno(message=message, default_value=False)
        if proceed is False:
            _print_and_log(level='error',
                           message='\n' +
                           Interactive.boxed_message(['Abort removal']))
            sys.exit(1)

    if len(all_asds) > 0:
        _print_and_log(message=' - Removing disks')
        for disk in DiskList.get_disks():
            if disk.available is True:
                continue
            try:
                _print_and_log(
                    message='    - Retrieving ASD information for disk {0}'.
                    format(disk.name))
                for asd in disk.asds:
                    _print_and_log(
                        message='      - Removing ASD {0}'.format(asd.name))
                    ASDController.remove_asd(asd)
                DiskController.clean_disk(disk)
            except Exception:
                _print_and_log(level='exception',
                               message='    - Deleting ASDs failed')

    _print_and_log(message=' - Removing services')
    service_manager = ServiceFactory.get_manager()
    for service in MaintenanceController.get_services():
        service_name = service
        _print_and_log(
            message='    - Removing service {0}'.format(service_name))
        guid = None
        for alba_backend_guid in Configuration.list(key='/ovs/alba/backends'):
            for maintenance_service_name in Configuration.list(
                    key='/ovs/alba/backends/{0}/maintenance/'.format(
                        alba_backend_guid)):
                if maintenance_service_name == service_name:
                    guid = alba_backend_guid
                    break
        MaintenanceController.remove_maintenance_service(
            name=service_name, alba_backend_guid=guid)

    for service_name in [WATCHER_SERVICE, MANAGER_SERVICE]:
        if service_manager.has_service(name=service_name, client=local_client):
            _print_and_log(
                message='   - Removing service {0}'.format(service_name))
            service_manager.stop_service(name=service_name,
                                         client=local_client)
            service_manager.remove_service(name=service_name,
                                           client=local_client)

    _print_and_log(message=' - Removing from configuration management')
    remaining_users = Configuration.uninitialize()
    if not remaining_users:
        local_client.file_delete(filenames=CACC_LOCATION)

    local_client.file_delete(
        filenames='{0}/main.db'.format(Setting.DATABASE_FOLDER))
    _print_and_log(
        message='\n' +
        Interactive.boxed_message(['ASD Manager removal completed']))