Ejemplo n.º 1
0
 def __init__(self, ip, username, password, client):
     # type: (str, str, str, SSHClient) -> None
     """
     Intialize an IPMIController
     :param ip: IP of the host to control through IPMI
     :type ip: str
     :param username: IPMI username of the host to control through IPMI
     :type username: str
     :param password: IPMI password of the host to control through IPMI
     :type password: str
     :param client: SSHClient to perform all IPMI commands on
     :type client: SSHClient
     """
     actual_params = {'ip': ip,
                      'username': username,
                      'password': password,
                      'client': client}
     required_params = {'ip': (str, ExtensionsToolbox.regex_ip, True),
                        'username': (str, None, True),
                        'password': (str, None, True),
                        'client': (SSHClient, None, True)}
     ExtensionsToolbox.verify_required_params(actual_params=actual_params,
                                              required_params=required_params)
     self.ip = ip
     self.username = username
     self._basic_command = ['ipmi-power', '-h', self.ip, '-u', self.username, '-p', self._pwd]
     self._client = client
     self._pwd = password
Ejemplo n.º 2
0
    def create_blktap_device(client, diskname, edge_info, logger=LOGGER):
        """
        Creates a blk tap device from a vdisk
        :return: blktap device location
        """
        required_edge_params = {
            'port': (int, {
                'min': 1,
                'max': 65535
            }),
            'protocol': (str, ['tcp', 'udp', 'rdma']),
            'ip': (str, Toolbox.regex_ip),
            'username': (str, None, False),
            'password': (str, None, False)
        }
        ExtensionsToolbox.verify_required_params(required_edge_params,
                                                 edge_info)
        if edge_info.get('username') and edge_info.get('password'):
            ovs_edge_connection = "openvstorage+{0}:{1}:{2}/{3}:username={4}:password={5}".format(
                edge_info['protocol'], edge_info['ip'], edge_info['port'],
                diskname, edge_info['username'], edge_info['password'])
        else:
            ovs_edge_connection = "openvstorage+{0}:{1}:{2}/{3}".format(
                edge_info['protocol'], edge_info['ip'], edge_info['port'],
                diskname)

        cmd = ["tap-ctl", "create", "-a", ovs_edge_connection]
        logger.debug('Creating blktap device: {}'.format(' '.join(cmd)))
        return client.run(cmd)
Ejemplo n.º 3
0
 def create_hprm_config_files(self, local_storagerouter, storagerouter,
                              parameters):
     """
     DEPRECATED API CALL - USE /vpool/vpool_guid/create_hprm_config_files instead
     Create the required configuration files to be able to make use of HPRM (aka PRACC)
     These configuration will be zipped and made available for download
     :param local_storagerouter: StorageRouter this call is executed on
     :type local_storagerouter: ovs.dal.hybrids.storagerouter.StorageRouter
     :param storagerouter: The StorageRouter for which a HPRM manager needs to be deployed
     :type storagerouter: ovs.dal.hybrids.storagerouter.StorageRouter
     :param parameters: Additional information required for the HPRM configuration files
     :type parameters: dict
     :return: Asynchronous result of a CeleryTask
     :rtype: celery.result.AsyncResult
     """
     _ = storagerouter
     ExtensionsToolbox.verify_required_params(
         actual_params=parameters,
         required_params={
             'vpool_guid': (str, ExtensionsToolbox.regex_guid)
         })
     return VPoolController.create_hprm_config_files.delay(
         parameters=parameters,
         vpool_guid=parameters['vpool_guid'],
         local_storagerouter_guid=local_storagerouter.guid)
    def add_domain_to_sr(self, storagerouter_ip, name, recovery=False):
        """
        Add domains, present in the model, to a storage router.
        :param storagerouter_ip: ip of the storage router
        :type storagerouter_ip: str
        :param name: name of the domain to add to the storagerouter
        :type name: str
        :param recovery: true or false whether the domain is a recovery domain or not
        :type recovery: bool
        """
        self._valid_storagerouter(storagerouter_ip)
        ExtensionsToolbox.verify_required_params(
            required_params={'name': (str, None, True)},
            actual_params={'name': name},
            verify_keys=True)

        if name not in self._domains:
            raise ValueError('Invalid domain passed: {0}'.format(name))

        path = self.config['setup']['storagerouters'][storagerouter_ip]
        if 'domains' not in path.keys():
            path['domains'] = {}
        path = path['domains']
        config_key = 'domain_guids' if recovery is False else 'recovery_domain_guids'
        if config_key not in path:
            path[config_key] = []
        path[config_key].append(name)
Ejemplo n.º 5
0
 def convert_image(client,
                   image_location,
                   diskname,
                   edge_info,
                   logger=LOGGER):
     """
     Converts an image file with qemu over edge connection
     :return: None
     """
     required_edge_params = {
         'port': (int, {
             'min': 1,
             'max': 65535
         }),
         'protocol': (str, ['tcp', 'udp', 'rdma']),
         'ip': (str, Toolbox.regex_ip),
         'username': (str, None, False),
         'password': (str, None, False)
     }
     ExtensionsToolbox.verify_required_params(required_edge_params,
                                              edge_info)
     if edge_info.get('username') and edge_info.get('password'):
         ovs_edge_connection = "openvstorage+{0}:{1}:{2}/{3}:username={4}:password={5}".format(
             edge_info['protocol'], edge_info['ip'], edge_info['port'],
             diskname, edge_info['username'], edge_info['password'])
     else:
         ovs_edge_connection = "openvstorage+{0}:{1}:{2}/{3}".format(
             edge_info['protocol'], edge_info['ip'], edge_info['port'],
             diskname)
     cmd = ["qemu-img", "convert", image_location, ovs_edge_connection]
     logger.debug('Converting an image with qemu using: {}'.format(
         ' '.join(cmd)))
     client.run(cmd)
 def add_disk_to_sr(self, storagerouter_ip, name, roles):
     """
     Add disk with given name and roles to a storagerouter in the model.
     :param storagerouter_ip:
     :type storagerouter_ip: str
     :param name: name of the disk
     :type name: str
     :param roles: roles to assign to the disk
     :type roles: list
     """
     self._valid_storagerouter(storagerouter_ip)
     required_params = {
         'name': (str, None, True),
         'roles': (list, None, True)
     }
     ExtensionsToolbox.verify_required_params(
         required_params=required_params,
         actual_params={
             'name': name,
             'roles': roles
         },
         verify_keys=True)
     for role in roles:
         if role not in DiskPartition.ROLES:
             raise ValueError(
                 'Provided role {0} is not an allowed role for disk {1}.'.
                 format(role, name))
     disk_dict = {name: {'roles': roles}}
     if 'disks' not in self.config['setup']['storagerouters'][
             storagerouter_ip]:
         self.config['setup']['storagerouters'][storagerouter_ip][
             'disks'] = {}
     self.config['setup']['storagerouters'][storagerouter_ip][
         'disks'].update(disk_dict)
 def add_storagerouter(self, storagerouter_ip, hostname):
     """
     Add a storagerouter to the model given the provided ip and hostname.
     :param storagerouter_ip: ip address of the storage router
     :type storagerouter_ip: str
     :param hostname: hostname of the storagerouter
     :type hostname: str
     """
     self._validate_ip(storagerouter_ip)
     required_params = {'hostname': (str, None, True)}
     ExtensionsToolbox.verify_required_params(
         required_params=required_params,
         actual_params={'hostname': hostname},
         verify_keys=True)
     if 'setup' not in self.config.keys():
         self.config['setup'] = {}
     if 'storagerouters' in self.config['setup'].keys():
         if storagerouter_ip in self.config['setup']['storagerouters']:
             raise ValueError(
                 'Storagerouter with given ip {0} already defined.'.format(
                     storagerouter_ip))
     else:
         if 'storagerouters' not in self.config['setup']:
             self.config['setup']['storagerouters'] = {}
     self.config['setup']['storagerouters'][storagerouter_ip] = {
         'hostname': hostname
     }
Ejemplo n.º 8
0
    def change_config(vpool_name, vpool_details, storagerouter_ip, *args,
                      **kwargs):

        # Settings volumedriver
        storagedriver_config = vpool_details.get('storagedriver')
        if storagedriver_config is not None:
            ExtensionsToolbox.verify_required_params(
                StoragedriverSetup.STORAGEDRIVER_PARAMS, storagedriver_config)
            StoragedriverSetup.LOGGER.info(
                'Updating volumedriver configuration of vPool `{0}` on storagerouter `{1}`.'
                .format(vpool_name, storagerouter_ip))
            vpool = VPoolHelper.get_vpool_by_name(vpool_name)
            storagedriver = [
                sd for sd in vpool.storagedrivers
                if sd.storagerouter.ip == storagerouter_ip
            ][0]
            if not storagedriver:
                error_msg = 'Unable to find the storagedriver of vPool {0} on storagerouter {1}'.format(
                    vpool_name, storagerouter_ip)
                raise RuntimeError(error_msg)
            StoragedriverHelper.change_config(storagedriver,
                                              storagedriver_config)
            vpool.invalidate_dynamics('configuration')
            StoragedriverSetup.LOGGER.info(
                'Updating volumedriver config of vPool `{0}` should have succeeded on storagerouter `{1}`'
                .format(vpool_name, storagerouter_ip))
Ejemplo n.º 9
0
 def get_instance(cls, connection_info, cache_store=None, version=6):
     """
     Retrieve an OVSClient instance to the connection information passed
     :param connection_info: Connection information, includes: 'host', 'port', 'client_id', 'client_secret'
     :type connection_info: dict
     :param cache_store: Store in which to keep the generated token for the client
     :type cache_store: object
     :param version: Version for the API
     :type version: int
     :return: An instance of the OVSClient class
     :rtype: ovs_extensions.api.client.OVSClient
     """
     ExtensionsToolbox.verify_required_params(
         actual_params=connection_info,
         required_params={
             'host': (str, ExtensionsToolbox.regex_ip),
             'port': (int, {
                 'min': 1,
                 'max': 65535
             }),
             'client_id': (str, None),
             'client_secret': (str, None),
             'local': (bool, None, False)
         })
     return cls(ip=connection_info['host'],
                port=connection_info['port'],
                credentials=(connection_info['client_id'],
                             connection_info['client_secret']),
                version=version,
                cache_store=cache_store)
Ejemplo n.º 10
0
    def validate(self, storagerouter=None, storagedriver=None):
        """
        Perform some validations before creating or extending a vPool
        :param storagerouter: StorageRouter on which the vPool will be created or extended
        :type storagerouter: ovs.dal.hybrids.storagerouter.StorageRouter
        :param storagedriver: When passing a StorageDriver, perform validations when shrinking a vPool
        :type storagedriver: ovs.dal.hybrids.storagedriver.StorageDriver
        :raises ValueError: If extending a vPool which status is not RUNNING
                RuntimeError: If this vPool's configuration does not meet the requirements
                              If the vPool has already been extended on the specified StorageRouter
        :return: None
        :rtype: NoneType
        """
        if self.vpool is not None:
            if self.vpool.status != VPool.STATUSES.RUNNING:
                raise ValueError('vPool should be in {0} status'.format(
                    VPool.STATUSES.RUNNING))

            ExtensionsToolbox.verify_required_params(
                actual_params=self.vpool.configuration,
                required_params={
                    'sco_size':
                    (int, StorageDriverClient.TLOG_MULTIPLIER_MAP.keys()),
                    'dtl_mode':
                    (str, StorageDriverClient.VPOOL_DTL_MODE_MAP.keys()),
                    'write_buffer': (float, None),
                    'dtl_transport':
                    (str, StorageDriverClient.VPOOL_DTL_TRANSPORT_MAP.keys()),
                    'tlog_multiplier':
                    (int, StorageDriverClient.TLOG_MULTIPLIER_MAP.values())
                })

            if storagerouter is not None:
                for vpool_storagedriver in self.vpool.storagedrivers:
                    if vpool_storagedriver.storagerouter_guid == storagerouter.guid:
                        raise RuntimeError(
                            'A StorageDriver is already linked to this StorageRouter for vPool {0}'
                            .format(self.vpool.name))
            if storagedriver is not None:
                VDiskController.sync_with_reality(vpool_guid=self.vpool.guid)
                storagedriver.invalidate_dynamics('vdisks_guids')
                if len(storagedriver.vdisks_guids) > 0:
                    raise RuntimeError(
                        'There are still vDisks served from the given StorageDriver'
                    )

                self.mds_services = [
                    mds_service for mds_service in self.vpool.mds_services
                    if mds_service.service.storagerouter_guid ==
                    storagedriver.storagerouter_guid
                ]
                for mds_service in self.mds_services:
                    if len(mds_service.storagedriver_partitions
                           ) == 0 or mds_service.storagedriver_partitions[
                               0].storagedriver is None:
                        raise RuntimeError(
                            'Failed to retrieve the linked StorageDriver to this MDS Service {0}'
                            .format(mds_service.service.name))
Ejemplo n.º 11
0
    def __getattr__(self, item):
        from ovs_extensions.generic.toolbox import ExtensionsToolbox

        if item.startswith('configure_'):
            section = ExtensionsToolbox.remove_prefix(item, 'configure_')
            return lambda **kwargs: self._add(section, **kwargs)
        if item.startswith('clear_'):
            section = ExtensionsToolbox.remove_prefix(item, 'clear_')
            return lambda: self._delete(section)
    def update_storagedriver_of_vpool(self, sr_ip, vpool_name, sr_params=None):
        '''
        Update all or some data of a storagedriver, assigned to a vpool on a specific storagerouter.
        :param sr_ip: ip of the storagerouter on which the vpool is located
        :type sr_ip: str
        :param vpool_name: name of the vpool of which to update the storagedriver data
        :type vpool_name: str
        :param sr_params: parameters to update of the referenced storagedriver
        :type sr_params: dict
        '''
        required_params = {
            'sco_size': (int, StorageDriverClient.TLOG_MULTIPLIER_MAP.keys()),
            'cluster_size': (int, StorageDriverClient.CLUSTER_SIZES),
            'volume_write_buffer': (int, {
                'min': 128,
                'max': 10240
            }, False),
            'global_read_buffer': (int, {
                'min': 128,
                'max': 10240
            }, False),
            'strategy': (str, None, False),
            'deduplication': (str, None, False),
            'dtl_transport':
            (str, StorageDriverClient.VPOOL_DTL_TRANSPORT_MAP.keys()),
            'dtl_mode': (str, StorageDriverClient.VPOOL_DTL_MODE_MAP.keys())
        }

        default_params = {
            'sco_size': 4,
            'cluster_size': 4,
            'volume_write_buffer': 512,
            'strategy': 'none',
            'global_write_buffer': 128,
            'global_read_buffer': 128,
            'deduplication': 'non_dedupe',
            'dtl_transport': 'tcp',
            'dtl_mode': 'sync'
        }

        if sr_params is None:
            sr_params = {}
        default_params.update(sr_params)
        if not isinstance(default_params, dict):
            raise ValueError('Parameters should be of type "dict"')
        ExtensionsToolbox.verify_required_params(required_params,
                                                 default_params)
        if sr_ip not in self.config['setup']['storagerouters'].keys():
            raise KeyError('Storagerouter with ip is not defined')
        if vpool_name not in self.config['setup']['storagerouters'][sr_ip][
                'vpools']:
            raise KeyError(
                'Vpool with name {0} is not defined on storagerouter with ip {1}'
                .format(vpool_name, sr_ip))
        self.config['setup']['storagerouters'][sr_ip]['vpools'][vpool_name][
            'storagedriver'] = default_params
 def _validate_ip(self, ip):
     required_params = {'storagerouter_ip': (str, Toolbox.regex_ip, True)}
     try:
         ExtensionsToolbox.verify_required_params(
             required_params=required_params,
             actual_params={'storagerouter_ip': ip},
             verify_keys=True)
     except RuntimeError as e:
         raise ValueError(e)
     if os.system('ping -c 1 {0}'.format(ip)) != 0:
         raise ValueError('No response from ip {0}'.format(ip))
    def change_cache(self,
                     storagerouter_ip,
                     vpool,
                     block_cache=True,
                     fragment_cache=True,
                     on_read=True,
                     on_write=True):
        """
        Change the caching parameters of a given vpool on a given storagerouter. By default, change parameters of both block chache and fragment cache.
        :param storagerouter_ip: search for vpool on this storagerouter
        :type storagerouter_ip: str
        :param vpool: change cache options of given vpool
        :type vpool: str
        :param block_cache: change block cache parameters, default True
        :type block_cache: bool
        :param fragment_cache: change fragment cache parameters, default True
        :type fragment_cache: bool
        :param on_read: change onread parameters, default True
        :type on_read: bool
        :param on_write: chance onwrite parameters, default True
        :type on_write: bool
        """
        self._valid_storagerouter(storagerouter_ip=storagerouter_ip)

        required_params = {
            'vpool': (str, None, True),
            'block_cache': (bool, None, False),
            'fragment_cache': (bool, None, False),
            'on_read': (bool, None, False),
            'on_write': (bool, None, False)
        }
        actual_params = {
            'vpool': vpool,
            'block_cache': block_cache,
            'fragment_cache': fragment_cache,
            'on_read': on_read,
            'on_write': on_write
        }
        ExtensionsToolbox.verify_required_params(
            required_params=required_params,
            actual_params=actual_params,
            verify_keys=True)
        try:
            vpool = self.config['setup']['storagerouters'][storagerouter_ip][
                'vpools'][vpool]
        except KeyError:
            raise ValueError('Vpool {0} not found'.format(vpool))
        if block_cache is True:
            vpool['block_cache']['strategy']['cache_on_read'] = on_read
            vpool['block_cache']['strategy']['cache_on_write'] = on_write
        if fragment_cache is True:
            vpool['fragment_cache']['strategy']['cache_on_read'] = on_read
            vpool['fragment_cache']['strategy']['cache_on_write'] = on_write
Ejemplo n.º 15
0
 def __init__(self, ip, user, password, type):
     required_params = {'ip': (str, Toolbox.regex_ip),
                        'user': (str, None),
                        'password': (str, None),
                        'type': (str, ['KVM', 'VMWARE'])}
     ExtensionsToolbox.verify_required_params(required_params, {'ip': ip,
                                                      'user': user,
                                                      'password': password,
                                                      'type': type})
     self.ip = ip
     self.user = user
     self.password = password
     self.type = type
Ejemplo n.º 16
0
    def monitor_services(self):
        # type: () -> None
        """
        Monitor the local services
        :return: None
        :rtype: NoneType
        """
        try:
            grep = ['egrep "{0}"'.format(prefix) for prefix in self._monitor_prefixes]
            previous_output = None
            while True:
                # Gather service states
                running_services = {}
                non_running_services = {}
                longest_service_name = 0
                for service_name in check_output('systemctl list-unit-files --full --type=service --no-legend --no-pager | {0} | tr -s " " | cut -d " " -f 1'.format(' | '.join(grep)), shell=True).splitlines():
                    try:
                        service_state = check_output('systemctl is-active {0}'.format(service_name), shell=True).strip()
                    except CalledProcessError as cpe:
                        service_state = cpe.output.strip()

                    service_name = service_name.replace('.service', '')
                    if service_state == 'active':
                        service_pid = check_output('systemctl show {0} --property=MainPID'.format(service_name), shell=True).strip().split('=')[1]
                        running_services[service_name] = (service_state, service_pid)
                    else:
                        non_running_services[service_name] = service_state

                    if len(service_name) > longest_service_name:
                        longest_service_name = len(service_name)

                # Put service states in list
                output = ['Running processes',
                          '=================\n']
                for service_name in sorted(running_services, key=lambda service: ExtensionsToolbox.advanced_sort(service, '_')):
                    output.append('{0} {1} {2}  {3}'.format(service_name, ' ' * (longest_service_name - len(service_name)), running_services[service_name][0], running_services[service_name][1]))

                output.extend(['\n\nNon-running processes',
                               '=====================\n'])
                for service_name in sorted(non_running_services, key=lambda service: ExtensionsToolbox.advanced_sort(service, '_')):
                    output.append('{0} {1} {2}'.format(service_name, ' ' * (longest_service_name - len(service_name)), non_running_services[service_name]))

                # Print service states (only if changes)
                if previous_output != output:
                    print '\x1b[2J\x1b[H'
                    for line in output:
                        print line
                    previous_output = list(output)
                time.sleep(1)
        except KeyboardInterrupt:
            pass
Ejemplo n.º 17
0
 def regenerate_service(self, name, client, target_name):
     # type: (str, SSHClient, str) -> None
     """
     Regenerates the service files of a service.
     :param name: Template name of the service to regenerate
     :type name: str
     :param client: Client on which to regenerate the service
     :type client: ovs_extensions.generic.sshclient.SSHClient
     :param target_name: The current service name eg ovs-volumedriver_flash01.service
     :type target_name: str
     :return: None
     :rtype: NoneType
     :raises: RuntimeError if the regeneration failed
     """
     configuration_key = self.service_config_key.format(self._system.get_my_machine_id(client),
                                                        ExtensionsToolbox.remove_prefix(target_name, self.OVS_SERVICE_PREFIX))
     # If the entry is stored in arakoon, it means the service file was previously made
     if not self._configuration.exists(configuration_key):
         raise RuntimeError('Service {0} was not previously added and cannot be regenerated.'.format(target_name))
     # Rewrite the service file
     service_params = self._configuration.get(configuration_key)
     startup_dependency = service_params['STARTUP_DEPENDENCY']
     if startup_dependency == '':
         startup_dependency = None
     else:
         startup_dependency = '.'.join(startup_dependency.split('.')[:-1])  # Remove .service from startup dependency
     output = self.add_service(name=name,
                               client=client,
                               params=service_params,
                               target_name=target_name,
                               startup_dependency=startup_dependency,
                               delay_registration=True)
     if output is None:
         raise RuntimeError('Regenerating files for service {0} has failed'.format(target_name))
 def list(self, key, recursive=False):
     # type: (str, bool) -> Generator[str]
     """
     List all keys starting with specified key
     :param key: Key to list
     :type key: str
     :param recursive: List keys recursively
     :type recursive: bool
     :return: Generator with all keys
     :rtype: generator
     """
     key = self._clean_key(key)
     entries = []
     for entry in self._client.prefix(key):
         if entry.startswith('_'):
             continue
         if recursive is True:
             parts = entry.split('/')
             for index, part in enumerate(parts):
                 if index == len(parts) - 1:  # Last part
                     yield entry  # Every entry is unique, so when having reached last part, we yield it
                 else:
                     dir_name = '{0}/'.format('/'.join(parts[:index + 1]))
                     if dir_name not in entries:
                         entries.append(dir_name)
                         yield dir_name
         else:
             if key == '' or entry.startswith(key.rstrip('/') + '/'):
                 cleaned = ExtensionsToolbox.remove_prefix(
                     entry, key).strip('/').split('/')[0]
                 if cleaned not in entries:
                     entries.append(cleaned)
                     yield cleaned
Ejemplo n.º 19
0
 def check_arakoon_ports(cls, result_handler):
     """
     Verifies that the Arakoon clusters still respond to connections
     :param result_handler: logging object
     :type result_handler: ovs.extensions.healthcheck.result.HCResults
     :return: None
     :rtype: NoneType
     """
     arakoon_clusters = cls._get_arakoon_clusters(result_handler)
     result_handler.info('Starting Arakoon ports test.',
                         add_to_result=False)
     result_handler.info(
         'Retrieving all collapsing statistics. This might take a while',
         add_to_result=False)
     start = time.time()
     arakoon_stats = cls._get_port_connections(result_handler,
                                               arakoon_clusters)
     result_handler.info(
         'Retrieving all collapsing statistics succeeded (duration: {0})'.
         format(time.time() - start),
         add_to_result=False)
     for cluster_type, clusters in arakoon_stats.iteritems():
         result_handler.info(
             'Testing the collapse of {0} Arakoons'.format(cluster_type),
             add_to_result=False)
         for cluster in clusters:
             cluster_name = cluster['cluster_name']
             connection_result = cluster['connection_result']
             connection_result = OrderedDict(
                 sorted(connection_result.items(),
                        key=lambda item: ExtensionsToolbox.advanced_sort(
                            item[0].ip, separator='.')))
             for node, stats in connection_result.iteritems():
                 identifier_log = 'Arakoon cluster {0} on node {1}'.format(
                     cluster_name, node.ip)
                 if len(stats['errors']) > 0:
                     # Determine where issues were found
                     for step, exception in stats['errors']:
                         if step == 'test_connection':
                             try:
                                 # Raise the thrown exception
                                 raise exception
                             except Exception:
                                 message = 'Connection to {0} could not be established due to an unhandled exception.'.format(
                                     identifier_log)
                                 cls.logger.exception(message)
                                 result_handler.exception(
                                     message,
                                     code=ErrorCodes.unhandled_exception)
                     continue
                 if stats['result'] is True:
                     result_handler.success(
                         'Connection established to {0}'.format(
                             identifier_log),
                         code=ErrorCodes.arakoon_connection_ok)
                 else:
                     result_handler.failure(
                         'Connection could not be established to {0}'.
                         format(identifier_log),
                         code=ErrorCodes.arakoon_connection_failure)
Ejemplo n.º 20
0
    def add_service(self, name, client, params=None, target_name=None, startup_dependency=None, delay_registration=False, path=None):
        # type: (str, SSHClient, dict, str, str, bool, str) -> dict
        """
        Add a service
        :param name: Template name of the service to add
        :type name: str
        :param client: Client on which to add the service
        :type client: ovs_extensions.generic.sshclient.SSHClient
        :param params: Additional information about the service
        :type params: dict or None
        :param target_name: Overrule default name of the service with this name
        :type target_name: str or None
        :param startup_dependency: Additional startup dependency
        :type startup_dependency: str or None
        :param delay_registration: Register the service parameters in the config management right away or not
        :type delay_registration: bool
        :param path: path to add service to
        :type path: str
        :return: Parameters used by the service
        :rtype: dict
        """
        if params is None:
            params = {}
        if path is None:
            path = self._config_template_dir.format('systemd')
        else:
            path = path.format('systemd')
        service_name = self._get_name(name, client, path)

        template_file = '{0}/{1}.service'.format(path, service_name)

        if not client.file_exists(template_file):
            # Given template doesn't exist so we are probably using system init scripts
            return {}

        if target_name is not None:
            service_name = target_name

        params.update({'SERVICE_NAME': ExtensionsToolbox.remove_prefix(service_name, 'ovs-'),
                       'RUN_FILE_DIR': self._run_file_dir,
                       'STARTUP_DEPENDENCY': '' if startup_dependency is None else '{0}.service'.format(startup_dependency)})
        template_content = client.file_read(template_file)
        for key, value in params.iteritems():
            template_content = template_content.replace('<{0}>'.format(key), str(value))
        service_path = self.get_service_file_path(service_name)
        client.file_write(service_path, template_content)

        try:
            client.run(['systemctl', 'daemon-reload'])
            client.run(['systemctl', 'enable', '{0}.service'.format(service_name)])
        except CalledProcessError as cpe:
            self._logger.exception('Add {0}.service failed, {1}'.format(service_name, cpe.output))
            raise Exception('Add {0}.service failed, {1}'.format(service_name, cpe.output))

        if delay_registration is False:
            self.register_service(service_metadata=params, node_name=self._system.get_my_machine_id(client))
        return params
    def add_backend(self, backend_name, domains=None, scaling='LOCAL'):
        """
        Add a backend with provided domains and scaling to the model.
        :param backend_name: name of the backend
        :type backend_name: str
        :param domains: domains the backend is linked to
        :type domains: {}
        :param scaling:
        :type scaling: str
        """
        if domains is None:
            domains = []
        else:
            for domain_name in domains:
                if domain_name not in self._domains:
                    raise ValueError(
                        'Invalid domain passed: {0}'.format(domain_name))

        ExtensionsToolbox.verify_required_params(required_params={
            'backend_name': (str, Toolbox.regex_backend, True),
            'domains': (list, self._domains, True),
            'scaling': (str, AlbaBackend.SCALINGS, True)
        },
                                                 actual_params={
                                                     'backend_name':
                                                     backend_name,
                                                     'domains': domains,
                                                     'scaling': scaling
                                                 },
                                                 verify_keys=True)
        be_dict = {
            'name': backend_name,
            'domains': {
                'domain_guids': domains
            },
            'scaling': scaling
        }
        if 'setup' not in self.config.keys():
            self.config['setup'] = {}
        self._backends.append(be_dict['name'])
        if 'backends' not in self.config['setup']:
            self.config['setup']['backends'] = []
        self.config['setup']['backends'].append(be_dict)
Ejemplo n.º 22
0
 def configure_proxy(backend_name, proxy_configuration):
     faulty_keys = [
         key for key in proxy_configuration.keys()
         if key not in ProxySetup.PARAMS
     ]
     if len(faulty_keys) > 0:
         raise ValueError(
             '{0} are unsupported keys for proxy configuration.'.format(
                 ', '.join(faulty_keys)))
     ExtensionsToolbox.verify_required_params(ProxySetup.PARAMS,
                                              proxy_configuration)
     vpools = VPoolList.get_vpools()
     service_manager = ServiceFactory.get_manager()
     with open('/root/old_proxies', 'w') as backup_file:
         for vpool in vpools:
             if vpool.metadata['backend']['backend_info'][
                     'name'] != backend_name:
                 continue
             for storagedriver in vpool.storagedrivers:
                 for proxy in storagedriver.alba_proxies:
                     config_loc = 'ovs/vpools/{0}/proxies/{1}/config/main'.format(
                         vpool.guid, proxy.guid)
                     proxy_service = Service(proxy.service_guid)
                     proxy_config = Configuration.get(config_loc)
                     old_proxy_config = dict(proxy_config)
                     backup_file.write('{} -- {}\n'.format(
                         config_loc, old_proxy_config))
                     proxy_config.update(proxy_configuration)
                     ProxySetup.LOGGER.info(
                         "Changed {0} to {1} for proxy {2}".format(
                             old_proxy_config, proxy_config, config_loc))
                     ProxySetup.LOGGER.info("Changed items {0}".format([
                         (key, value)
                         for key, value in proxy_config.iteritems()
                         if key not in old_proxy_config.keys()
                     ]))
                     Configuration.set(config_loc,
                                       json.dumps(proxy_config, indent=4),
                                       raw=True)
                     client = SSHClient(storagedriver.storage_ip,
                                        username='******')
                     service_manager.restart_service(proxy_service.name,
                                                     client=client)
Ejemplo n.º 23
0
    def collapse_arakoon():
        """
        Collapse Arakoon's Tlogs
        :return: None
        """
        from ovs_extensions.generic.toolbox import ExtensionsToolbox

        GenericController._logger.info('Arakoon collapse started')
        cluster_info = []
        storagerouters = StorageRouterList.get_storagerouters()
        if os.environ.get('RUNNING_UNITTESTS') != 'True':
            cluster_info = [('cacc', storagerouters[0])]

        cluster_names = []
        for service in ServiceList.get_services():
            if service.is_internal is True and service.type.name in (ServiceType.SERVICE_TYPES.ARAKOON,
                                                                     ServiceType.SERVICE_TYPES.NS_MGR,
                                                                     ServiceType.SERVICE_TYPES.ALBA_MGR):
                cluster = ExtensionsToolbox.remove_prefix(service.name, 'arakoon-')
                if cluster in cluster_names and cluster not in [ARAKOON_NAME, ARAKOON_NAME_UNITTEST]:
                    continue
                cluster_names.append(cluster)
                cluster_info.append((cluster, service.storagerouter))
        workload = {}
        cluster_config_map = {}
        for cluster, storagerouter in cluster_info:
            GenericController._logger.debug('  Collecting info for cluster {0}'.format(cluster))
            ip = storagerouter.ip if cluster in [ARAKOON_NAME, ARAKOON_NAME_UNITTEST] else None
            try:
                config = ArakoonClusterConfig(cluster_id=cluster, source_ip=ip)
                cluster_config_map[cluster] = config
            except:
                GenericController._logger.exception('  Retrieving cluster information on {0} for {1} failed'.format(storagerouter.ip, cluster))
                continue
            for node in config.nodes:
                if node.ip not in workload:
                    workload[node.ip] = {'node_id': node.name,
                                         'clusters': []}
                workload[node.ip]['clusters'].append((cluster, ip))
        for storagerouter in storagerouters:
            try:
                if storagerouter.ip not in workload:
                    continue
                node_workload = workload[storagerouter.ip]
                client = SSHClient(storagerouter)
                for cluster, ip in node_workload['clusters']:
                    try:
                        GenericController._logger.debug('  Collapsing cluster {0} on {1}'.format(cluster, storagerouter.ip))
                        client.run(['arakoon', '--collapse-local', node_workload['node_id'], '2', '-config', cluster_config_map[cluster].external_config_path])
                        GenericController._logger.debug('  Collapsing cluster {0} on {1} completed'.format(cluster, storagerouter.ip))
                    except:
                        GenericController._logger.exception('  Collapsing cluster {0} on {1} failed'.format(cluster, storagerouter.ip))
            except UnableToConnectException:
                GenericController._logger.error('  Could not collapse any cluster on {0} (not reachable)'.format(storagerouter.name))
        GenericController._logger.info('Arakoon collapse finished')
Ejemplo n.º 24
0
    def configure_mds(self, config):
        """
        Configure the global MDS settings for this vPool
        :param config: MDS configuration settings (Can contain amount of tlogs to wait for during MDS checkup, MDS safety and the maximum load for an MDS)
        :type config: dict
        :raises RuntimeError: If specified safety not between 1 and 5
                              If specified amount of tlogs is less than 1
                              If specified maximum load is less than 10%
        :return: None
        :rtype: NoneType
        """
        if self.vpool is None:
            raise RuntimeError(
                'Cannot configure MDS settings when no vPool has been created yet'
            )

        ExtensionsToolbox.verify_required_params(verify_keys=True,
                                                 actual_params=config,
                                                 required_params={
                                                     'mds_tlogs': (int, {
                                                         'min': 1
                                                     }, False),
                                                     'mds_safety': (int, {
                                                         'min': 1,
                                                         'max': 5
                                                     }, False),
                                                     'mds_maxload': (int, {
                                                         'min': 10
                                                     }, False)
                                                 })

        # Don't set a default value here, because we need to know whether these values have been specifically set or were set at None
        self.mds_tlogs = config.get('mds_tlogs')
        self.mds_safety = config.get('mds_safety')
        self.mds_maxload = config.get('mds_maxload')
        Configuration.set(key='/ovs/vpools/{0}/mds_config'.format(
            self.vpool.guid),
                          value={
                              'mds_tlogs': self.mds_tlogs or 100,
                              'mds_safety': self.mds_safety or 3,
                              'mds_maxload': self.mds_maxload or 75
                          })
Ejemplo n.º 25
0
 def get_run_file_path(self, name):
     # type: (str) -> str
     """
     Get the path to the run file for the given service
     This is tied to the template files as they specify something like `/opt/OpenvStorage/run/<SERVICE_NAME>.version`
     :param name: Name of the service
     :type name: str
     :return: Path to the file
     :rtype: str
     """
     non_ovs_name = ExtensionsToolbox.remove_prefix(name, self.OVS_SERVICE_PREFIX)
     return os.path.join(self._run_file_dir, non_ovs_name, '.version')
Ejemplo n.º 26
0
 def unregister_service(self, node_name, service_name):
     # type: (str, str) -> None
     """
     Un-register the metadata of a service from the configuration management
     :param node_name: Name of the node on which to un-register the service
     :type node_name: str
     :param service_name: Name of the service to clean from the configuration management
     :type service_name: str
     :return: None
     :rtype: NoneType
     """
     self._configuration.delete(key=self.service_config_key.format(node_name, ExtensionsToolbox.remove_prefix(service_name, self.OVS_SERVICE_PREFIX)))
Ejemplo n.º 27
0
    def __init__(self, ip=None, port=None, database=None):
        # type: (str, int, str) -> None
        """
        Create client instance for graphite and validate parameters
        :param ip: IP address of the client to send graphite data towards
        :type ip: str
        :param port: port of the UDP listening socket
        :type port: int
        :param database: name of the database
        :type database: str
        """
        graphite_data = {}
        if all(p is None for p in [ip, port]):
            # Nothing specified
            graphite_data = self.get_graphite_config()
            if not graphite_data:
                raise RuntimeError(
                    'No graphite data found in config path `{0}`'.format(
                        self.CONFIG_PATH))

        ip = ip or graphite_data['ip']
        port = port or graphite_data.get('port', 2003)

        ExtensionsToolbox.verify_required_params(
            verify_keys=True,
            actual_params={
                'host': ip,
                'port': port
            },
            required_params={
                'host': (str, ExtensionsToolbox.regex_ip, True),
                'port': (int, {
                    'min': 1025,
                    'max': 65535
                }, True)
            })

        super(GraphiteClient, self).__init__(ip=ip,
                                             port=port,
                                             database=database)
Ejemplo n.º 28
0
    def validate_and_retrieve_config(cls):
        """
        Retrieve and validate the configuration for StatsMonkey
        :return: The configuration set at /ovs/framework/monitoring/stats_monkey
        :rtype: dict
        """
        config_key = '/ovs/framework/monitoring/stats_monkey'
        config = cls._get_configuration()
        if not config.exists(config_key):
            raise ValueError(
                'StatsMonkey requires a configuration key at {0}'.format(
                    config_key))

        config = config.get(config_key)
        if not isinstance(config, dict):
            raise ValueError('StatsMonkey configuration must be of type dict')

        required_params = {
            'host': (str, ExtensionsToolbox.regex_ip),
            'port': (int, {
                'min': 1025,
                'max': 65535
            }),
            'interval': (int, {
                'min': 1
            }, False),
            'database': (str, None),
            'transport': (str, ['influxdb', 'redis', 'graphite']),
            'environment': (str, None)
        }
        if config.get('transport') == 'influxdb':
            required_params['username'] = (str, None)
        if config.get('transport') in ['influxdb', 'reddis']:
            required_params['password'] = (str, None)

        ExtensionsToolbox.verify_required_params(
            actual_params=config, required_params=required_params)
        cls._config = config
        return cls._config
Ejemplo n.º 29
0
    def test_filter_dict_for_none(self):
        d = {'a': 'a',
             'b': {'b1': 'b1',
                   'b2': None},
             'c': None,
             'd': {'d1': {'d11': {'d111': 'd111'}}},
             'e': {'e1': None}}

        result_dict = {'a': 'a',
                       'b': {'b1': 'b1'},
                       'd': {'d1': {'d11': {'d111': 'd111'}}}}
        filtered_dict = ExtensionsToolbox.filter_dict_for_none(d)
        self.assertEquals(filtered_dict, result_dict)
Ejemplo n.º 30
0
 def register_service(self, node_name, service_metadata):
     # type: (str, dict) -> None
     """
     Register the metadata of the service to the configuration management
     :param node_name: Name of the node on which the service is running
     :type node_name: str
     :param service_metadata: Metadata of the service
     :type service_metadata: dict
     :return: None
     :rtype: NoneType
     """
     service_name = service_metadata['SERVICE_NAME']
     self._configuration.set(key=self.service_config_key.format(node_name, ExtensionsToolbox.remove_prefix(service_name, self.OVS_SERVICE_PREFIX)),
                             value=service_metadata)
    def check_collapse(cls, result_handler, max_collapse_age=3, min_tlx_amount=10):
        """
        Verifies collapsing has occurred for all Arakoons
        :param result_handler: logging object
        :type result_handler: ovs.extensions.healthcheck.result.HCResults
        :param max_collapse_age: tlx files may not be longer than x days
        :type max_collapse_age: int
        :param min_tlx_amount: Minimum amount of tlxes before making collapsing mandatory (defaults to 10)
        :type min_tlx_amount: int
        :return: None
        :rtype: NoneType
        """
        arakoon_clusters = cls._get_arakoon_clusters(result_handler)
        result_handler.info('Starting Arakoon collapse test', add_to_result=False)
        max_age_seconds = timedelta(days=max_collapse_age).total_seconds()
        result_handler.info('Retrieving all collapsing statistics. This might take a while', add_to_result=False)
        start = time.time()
        arakoon_stats = cls._retrieve_stats(result_handler, arakoon_clusters)
        result_handler.info('Retrieving all collapsing statistics succeeded (duration: {0})'.format(time.time() - start), add_to_result=False)
        for cluster_type, clusters in arakoon_stats.iteritems():
            result_handler.info('Testing the collapse of {0} Arakoons'.format(cluster_type), add_to_result=False)
            for cluster in clusters:
                cluster_name = cluster['cluster_name']
                collapse_result = cluster['collapse_result']
                collapse_result = OrderedDict(sorted(collapse_result.items(), key=lambda item: ExtensionsToolbox.advanced_sort(item[0].ip, separator='.')))
                for node, stats in collapse_result.iteritems():
                    identifier_log = 'Arakoon cluster {0} on node {1}'.format(cluster_name, node.ip)
                    if len(stats['errors']) > 0:
                        # Determine where issues were found
                        for step, exception in stats['errors']:
                            if step == 'build_client':
                                try:
                                    # Raise the thrown exception
                                    raise exception
                                except TimeOutException:
                                    result_handler.warning('Connection to {0} has timed out'.format(identifier_log), code=ErrorCodes.ssh_connection_time)
                                except (socket.error, UnableToConnectException):
                                    result_handler.failure(
                                        'Connection to {0} could not be established'.format(identifier_log), code=ErrorCodes.ssh_connection_fail)
                                except NotAuthenticatedException:
                                    result_handler.skip('Connection to {0} could not be authenticated. This node has no access to the Arakoon node.'.format(identifier_log),
                                                        code=ErrorCodes.ssh_connection_authentication)
                                except Exception:
                                    message = 'Connection to {0} could not be established due to an unhandled exception.'.format(identifier_log)
                                    cls.logger.exception(message)
                                    result_handler.exception(message, code=ErrorCodes.unhandled_exception)
                            elif step == 'stat_dir':
                                try:
                                    raise exception
                                except Exception:
                                    message = 'Unable to list the contents of the tlog directory ({0}) for {1}'.format(node.tlog_dir, identifier_log)
                                    cls.logger.exception(message)
                                    result_handler.exception(message, code=ErrorCodes.unhandled_exception)
                        continue
                    tlx_files = stats['result']['tlx']
                    tlog_files = stats['result']['tlog']
                    headdb_files = stats['result']['headDB']
                    avail_size = stats['result']['avail_size']

                    if any(item is None for item in [tlx_files, tlog_files, avail_size]):
                        # Exception occurred but no errors were logged
                        result_handler.exception('Either the tlx or tlog files or available size could be found in/of the tlog directory ({0}) for {1}'.format(node.tlog_dir, identifier_log),
                                                 code=ErrorCodes.tlx_tlog_not_found)
                        continue
                    if len(headdb_files) > 0:
                        headdb_size = sum([int(i[2]) for i in headdb_files])
                        collapse_size_msg = 'Spare space for local collapse is'
                        if avail_size >= headdb_size * 4:
                            result_handler.success('{0} sufficient (n > 4x head.db size)'.format(collapse_size_msg))
                        elif avail_size >= headdb_size * 3:
                            result_handler.warning('{0} running short (n > 3x head.db size)'.format(collapse_size_msg))
                        elif avail_size >= headdb_size * 2:
                            result_handler.failure('{0} just enough (n > 2x head.db size'.format(collapse_size_msg))
                        else:
                            result_handler.failure('{0} insufficient (n <2 x head.db size'.format(collapse_size_msg))

                    if len(tlog_files) == 0:
                        # A tlog should always be present
                        result_handler.failure('{0} has no open tlog'.format(identifier_log), code=ErrorCodes.tlog_not_found)
                        continue
                    if len(tlx_files) < min_tlx_amount:
                        result_handler.skip('{0} only has {1} tlx, not worth collapsing (required: {2})'.format(identifier_log, len(tlx_files), min_tlx_amount))
                        continue
                    # Compare youngest tlog and oldest tlx timestamp
                    seconds_difference = int(tlog_files[-1][0]) - int(tlx_files[0][0])
                    if max_age_seconds > seconds_difference:
                        result_handler.success('{0} should not be collapsed. The oldest tlx is at least {1} days younger than the youngest tlog (actual age: {2})'.format(identifier_log, max_collapse_age, str(timedelta(seconds=seconds_difference))),
                                               code=ErrorCodes.collapse_ok)
                    else:
                        result_handler.failure('{0} should be collapsed. The oldest tlx is currently {1} old'.format(identifier_log, str(timedelta(seconds=seconds_difference))), code=ErrorCodes.collapse_not_ok)
Ejemplo n.º 32
0
    def check_if_proxies_work(cls, result_handler):
        """
        Checks if all Alba Proxies work on a local machine, it creates a namespace and tries to put and object
        :param result_handler: logging object
        :type result_handler: ovs.extensions.healthcheck.result.HCResults
        :return: None
        :rtype: NoneType
        """
        namespace_params = {'bucket_count': (list, None),
                            'logical': (int, None),
                            'storage': (int, None),
                            'storage_per_osd': (list, None)}

        result_handler.info('Checking the ALBA proxies.', add_to_result=False)

        amount_of_presets_not_working = []
        # ignore possible subprocess output
        fnull = open(os.devnull, 'w')
        # try put/get/verify on all available proxies on the local node
        local_proxies = ServiceHelper.get_local_proxy_services()
        if len(local_proxies) == 0:
            result_handler.info('Found no proxies.', add_to_result=False)
            return amount_of_presets_not_working
        api_cache = {}
        for service in local_proxies:
            try:
                result_handler.info('Checking ALBA proxy {0}.'.format(service.name), add_to_result=False)
                ip = service.alba_proxy.storagedriver.storage_ip
                # Encapsulating try to determine test output
                try:
                    # Determine what to what backend the proxy is connected
                    proxy_client_cfg = AlbaCLI.run(command='proxy-client-cfg', named_params={'host': ip, 'port': service.ports[0]})
                except AlbaException:
                    result_handler.failure('Fetching proxy info has failed. Please verify if {0}:{1} is the correct address for proxy {2}.'.format(ip, service.ports[0], service.name),
                                           code=ErrorCodes.alba_cmd_fail)
                    continue
                # Fetch arakoon information
                abm_name = proxy_client_cfg.get('cluster_id')
                # Check if proxy config is correctly setup
                if abm_name is None:
                    raise ConfigNotMatchedException('Proxy config for proxy {0} does not have the correct format on node {1} with port {2}.'.format(service.name, ip, service.ports[0]))
                abm_config = Configuration.get_configuration_path('/ovs/vpools/{0}/proxies/{1}/config/abm' .format(service.alba_proxy.storagedriver.vpool.guid, service.alba_proxy.guid))

                # Determine presets / backend
                try:
                    presets = AlbaCLI.run(command='list-presets', config=abm_config)
                except AlbaException:
                    result_handler.failure('Listing the presets has failed. Please check the arakoon config path. We used {0}'.format(abm_config),
                                           code=ErrorCodes.alba_cmd_fail)
                    continue

                for preset in presets:
                    # If preset is not in use, test will fail so add a skip
                    if preset['in_use'] is False:
                        result_handler.skip('Preset {0} is not in use and will not be checked'.format(preset['name']))
                        continue
                    preset_name = preset['name']
                    # Encapsulation try for cleanup
                    try:
                        # Generate new namespace name using the preset
                        namespace_key_prefix = 'ovs-healthcheck-ns-{0}-{1}'.format(preset_name, AlbaHealthCheck.LOCAL_ID)
                        namespace_key = '{0}_{1}'.format(namespace_key_prefix, uuid.uuid4())
                        object_key = 'ovs-healthcheck-obj-{0}'.format(str(uuid.uuid4()))
                        # Create namespace
                        AlbaCLI.run(command='proxy-create-namespace',
                                    named_params={'host': ip, 'port': service.ports[0]},
                                    extra_params=[namespace_key, preset_name])
                        # Wait until fully created
                        namespace_start_time = time.time()
                        for index in xrange(2):
                            # Running twice because the first one could give a false positive as the osds will alert the nsm
                            # and the nsm would respond with got messages but these were not the ones we are after
                            AlbaCLI.run(command='deliver-messages', config=abm_config)
                        while True:
                            if time.time() - namespace_start_time > AlbaHealthCheck.NAMESPACE_TIMEOUT:
                                raise AlbaTimeOutException('Creating namespace has timed out after {0}s'.format(time.time() - namespace_start_time), 'deliver-messages')
                            list_ns_osds_output = AlbaCLI.run(command='list-ns-osds', config=abm_config, extra_params=[namespace_key])
                            # Example output: [[0, [u'Active']], [3, [u'Active']]]
                            namespace_ready = True
                            for osd_info in list_ns_osds_output:
                                if osd_info[1][0] != 'Active':
                                    # If we found an OSD not Active, check if preset is satisfiable
                                    namespace_ready = False
                                    break
                            if namespace_ready is True:
                                break
                            else:
                                result_handler.info('Not all OSDs have responded to the creation message. Fetching the safety', add_to_result=False)
                                try:
                                    # Fetch the preset information on the Framework
                                    # This add an extra delay for the messages to propagate too
                                    vpool = service.alba_proxy.storagedriver.vpool
                                    alba_backend_guid = vpool.metadata['backend']['backend_info']['alba_backend_guid']
                                    api_url = 'alba/backends/{0}'.format(alba_backend_guid)
                                    if api_url not in api_cache:
                                        connection_info = vpool.metadata['backend']['backend_info']['connection_info']
                                        api_client = OVSClient(connection_info['host'], connection_info['port'], (connection_info['client_id'], connection_info['client_secret']))
                                        start = time.time()
                                        _presets = api_client.get(api_url, params={'contents': 'presets'})['presets']
                                        api_cache[api_url] = _presets
                                        result_handler.info('Fetching the safety took {0} seconds'.format(time.time() - start))
                                    _presets = api_cache[api_url]
                                    _preset = filter(lambda p: p['name'] == preset_name, _presets)[0]
                                    if _preset['is_available'] is True:
                                        # Preset satisfiable, don't care about osds availability
                                        result_handler.info('Requested preset is available, no longer waiting on \'deliver_messages\'', add_to_result=False)
                                        break
                                    else:
                                        raise ValueError('Requested preset is marked as unavailable. Please check the disk safety'.format(time.time() - namespace_start_time))
                                except ValueError:
                                    raise
                                except Exception:
                                    msg = 'Could not query the preset data. Checking the preset might timeout'
                                    result_handler.warning(msg)
                                    cls.logger.exception(msg)
                                    # Sleep for syncing purposes
                                    time.sleep(1)
                        result_handler.success('Namespace successfully created on proxy {0} with preset {1}!'.format(service.name, preset_name),
                                               code=ErrorCodes.proxy_namespace_create)
                        namespace_info = AlbaCLI.run(command='show-namespace', config=abm_config, extra_params=[namespace_key])
                        ExtensionsToolbox.verify_required_params(required_params=namespace_params, actual_params=namespace_info)
                        result_handler.success('Namespace successfully fetched on proxy {0} with preset {1}!'.format(service.name, preset_name),
                                               code=ErrorCodes.proxy_namespace_fetch)

                        # Put test object to given dir
                        with open(AlbaHealthCheck.TEMP_FILE_LOC, 'wb') as output_file:
                            output_file.write(os.urandom(AlbaHealthCheck.TEMP_FILE_SIZE))
                        AlbaCLI.run(command='proxy-upload-object',
                                    named_params={'host': ip, 'port': service.ports[0]},
                                    extra_params=[namespace_key, AlbaHealthCheck.TEMP_FILE_LOC, object_key])
                        result_handler.success('Successfully uploaded the object to namespace {0}'.format(namespace_key),
                                               code=ErrorCodes.proxy_upload_obj)
                        # download object
                        AlbaCLI.run(command='proxy-download-object',
                                    named_params={'host': ip, 'port': service.ports[0]},
                                    extra_params=[namespace_key, object_key, AlbaHealthCheck.TEMP_FILE_FETCHED_LOC])
                        result_handler.success('Successfully downloaded the object to namespace {0}'.format(namespace_key),
                                               code=ErrorCodes.proxy_download_obj)
                        # check if files exists - issue #57
                        if not(os.path.isfile(AlbaHealthCheck.TEMP_FILE_FETCHED_LOC) and os.path.isfile(AlbaHealthCheck.TEMP_FILE_LOC)):
                            # creation of object failed
                            raise ObjectNotFoundException(ValueError('Creation of object has failed'))
                        hash_original = hashlib.md5(open(AlbaHealthCheck.TEMP_FILE_LOC, 'rb').read()).hexdigest()
                        hash_fetched = hashlib.md5(open(AlbaHealthCheck.TEMP_FILE_FETCHED_LOC, 'rb').read()).hexdigest()

                        if hash_original == hash_fetched:
                            result_handler.success('Fetched object {0} from namespace {1} on proxy {2} with preset {3} matches the created object!'.format(object_key, namespace_key, service.name, preset_name),
                                                   code=ErrorCodes.proxy_verify_obj)
                        else:
                            result_handler.failure('Fetched object {0} from namespace {1} on proxy {2} with preset {3} does not match the created object!'.format(object_key, namespace_key, service.name, preset_name),
                                                   code=ErrorCodes.proxy_verify_obj_fail)

                    except ValueError:
                        result_handler.failure('The preset is not available for use')
                    except ObjectNotFoundException as ex:
                        amount_of_presets_not_working.append(preset_name)
                        result_handler.failure('Failed to put object on namespace {0} failed on proxy {1}with preset {2} With error {3}'.format(namespace_key, service.name, preset_name, ex))
                    except AlbaTimeOutException as ex:
                        result_handler.failure(str(ex))
                    except AlbaException as ex:
                        code = ErrorCodes.alba_cmd_fail
                        if ex.alba_command == 'proxy-create-namespace':
                            result_handler.failure('Create namespace has failed with {0} on namespace {1} with proxy {2} with preset {3}'.format(str(ex), namespace_key, service.name, preset_name),
                                                   code=code)
                        elif ex.alba_command == 'show-namespace':
                            result_handler.failure('Show namespace has failed with {0} on namespace {1} with proxy {2} with preset {3}'.format(str(ex), namespace_key, service.name, preset_name),
                                                   code=code)
                        elif ex.alba_command == 'proxy-upload-object':
                            result_handler.failure('Uploading the object has failed with {0} on namespace {1} with proxy {2} with preset {3}'.format(str(ex), namespace_key, service.name, preset_name),
                                                   code=code)
                        elif ex.alba_command == 'proxy-download-object':
                            result_handler.failure('Downloading the object has failed with {0} on namespace {1} with proxy {2} with preset {3}'.format(str(ex), namespace_key, service.name, preset_name),
                                                   code=code)
                    finally:
                        # Delete the created namespace and preset
                        subprocess.call(['rm', str(AlbaHealthCheck.TEMP_FILE_LOC)], stdout=fnull, stderr=subprocess.STDOUT)
                        subprocess.call(['rm', str(AlbaHealthCheck.TEMP_FILE_FETCHED_LOC)], stdout=fnull, stderr=subprocess.STDOUT)
                        try:
                            namespaces = AlbaCLI.run(command='list-namespaces', config=abm_config)
                            namespaces_to_remove = []
                            proxy_named_params = {'host': ip, 'port': service.ports[0]}
                            for namespace in namespaces:
                                if namespace['name'].startswith(namespace_key_prefix):
                                    namespaces_to_remove.append(namespace['name'])
                            for namespace_name in namespaces_to_remove:
                                if namespace_name == namespace_key:
                                    result_handler.info('Deleting namespace {0}.'.format(namespace_name))
                                else:
                                    result_handler.warning('Deleting namespace {0} which was leftover from a previous run.'.format(namespace_name))

                                AlbaCLI.run(command='proxy-delete-namespace',
                                            named_params=proxy_named_params,
                                            extra_params=[namespace_name])

                                namespace_delete_start = time.time()
                                while True:
                                    try:
                                        AlbaCLI.run(command='show-namespace', config=abm_config, extra_params=[namespace_name])  # Will fail if the namespace does not exist
                                    except AlbaException:
                                        result_handler.success('Namespace {0} successfully removed.'.format(namespace_name))
                                        break
                                    if time.time() - namespace_delete_start > AlbaHealthCheck.NAMESPACE_TIMEOUT:
                                        raise AlbaTimeOutException('Delete namespace has timed out after {0}s'.format(time.time() - namespace_start_time), 'show-namespace')

                                # be tidy, and make the proxy forget the namespace
                                try:
                                    AlbaCLI.run(command='proxy-statistics',
                                                named_params=proxy_named_params,
                                                extra_params=['--forget', namespace_name])
                                except:
                                    result_handler.warning('Failed to make proxy forget namespace {0}.'.format(namespace_name))
                        except AlbaException as ex:
                            if ex.alba_command == 'list-namespaces':
                                result_handler.failure(
                                    'list namespaces has failed with {0} on namespace {1} with proxy {2} with preset {3}'.format(
                                        str(ex), namespace_key, service.name, preset_name))
                            elif ex.alba_command == 'proxy-delete-namespace':
                                result_handler.failure(
                                    'Delete namespace has failed with {0} on namespace {1} with proxy {2} with preset {3}'.format(
                                        str(ex), namespace_key, service.name, preset_name))

            except subprocess.CalledProcessError as ex:
                # this should stay for the deletion of the remaining files
                amount_of_presets_not_working.append(service.name)
                result_handler.failure('Proxy {0} has some problems. Got {1} as error'.format(service.name, ex),
                                       code=ErrorCodes.proxy_problems)

            except ConfigNotMatchedException as ex:
                amount_of_presets_not_working.append(service.name)
                result_handler.failure('Proxy {0} has some problems. Got {1} as error'.format(service.name, ex),
                                       code=ErrorCodes.proxy_problems)
 def check_arakoon_ports(cls, result_handler):
     """
     Verifies that the Arakoon clusters still respond to connections
     :param result_handler: logging object
     :type result_handler: ovs.extensions.healthcheck.result.HCResults
     :return: None
     :rtype: NoneType
     """
     arakoon_clusters = cls._get_arakoon_clusters(result_handler)
     result_handler.info('Starting Arakoon ports test.', add_to_result=False)
     result_handler.info('Retrieving all collapsing statistics. This might take a while', add_to_result=False)
     start = time.time()
     arakoon_stats = cls._get_port_connections(result_handler, arakoon_clusters)
     result_handler.info('Retrieving all collapsing statistics succeeded (duration: {0})'.format(time.time() - start), add_to_result=False)
     for cluster_type, clusters in arakoon_stats.iteritems():
         result_handler.info('Testing the collapse of {0} Arakoons'.format(cluster_type), add_to_result=False)
         for cluster in clusters:
             cluster_name = cluster['cluster_name']
             connection_result = cluster['connection_result']
             connection_result = OrderedDict(sorted(connection_result.items(), key=lambda item: ExtensionsToolbox.advanced_sort(item[0].ip, separator='.')))
             for node, stats in connection_result.iteritems():
                 identifier_log = 'Arakoon cluster {0} on node {1}'.format(cluster_name, node.ip)
                 if len(stats['errors']) > 0:
                     # Determine where issues were found
                     for step, exception in stats['errors']:
                         if step == 'test_connection':
                             try:
                                 # Raise the thrown exception
                                 raise exception
                             except Exception:
                                 message = 'Connection to {0} could not be established due to an unhandled exception.'.format(identifier_log)
                                 cls.logger.exception(message)
                                 result_handler.exception(message, code=ErrorCodes.unhandled_exception)
                     continue
                 if stats['result'] is True:
                     result_handler.success('Connection established to {0}'.format(identifier_log),
                                            code=ErrorCodes.arakoon_connection_ok)
                 else:
                     result_handler.failure('Connection could not be established to {0}'.format(identifier_log),
                                            code=ErrorCodes.arakoon_connection_failure)
    def check_arakoon_fd(cls, result_handler, fd_limit=30, passed_connections=None):
        """
        Checks all current open tcp file descriptors for all Arakoon clusters in the OVS cluster
        Will raise warnings when these reach a certain threshold
        :param result_handler: Logging object
        :type result_handler: ovs.extensions.healthcheck.result.HCResults
        :param fd_limit: Threshold for the number number of tcp connections for which to start logging warnings
        :type fd_limit: int
        :param passed_connections: checked TCP connections
        :type passed_connections: list
        :return: None
        :rtype: NoneType
        """
        if passed_connections is None:
            passed_connections = ['ESTABLISHED', 'TIME_WAIT']
        warning_threshold = fd_limit * 80 / 100
        error_threshold = fd_limit * 95 / 100

        result_handler.info('Starting Arakoon integrity test', add_to_result=False)
        arakoon_clusters = cls._get_arakoon_clusters(result_handler)
        start = time.time()
        arakoon_fd_results = cls._get_filedescriptors(result_handler, arakoon_clusters)
        result_handler.info('Retrieving all file descriptor information succeeded (duration: {0})'.format(time.time() - start), add_to_result=False)
        for cluster_type, clusters in arakoon_fd_results.iteritems():
            result_handler.info('Checking the file descriptors of {0} Arakoons'.format(cluster_type), add_to_result=False)
            for cluster in clusters:
                cluster_name = cluster['cluster_name']
                fd_result = cluster['fd_result']
                fd_result = OrderedDict(sorted(fd_result.items(), key=lambda item: ExtensionsToolbox.advanced_sort(item[0].ip, separator='.')))
                for node, stats in fd_result.iteritems():
                    identifier_log = 'Arakoon cluster {0} on node {1}'.format(cluster_name, node.ip)
                    if len(stats['errors']) > 0:
                        # Determine where issues were found
                        for step, exception in stats['errors']:
                            if step == 'build_client':
                                try:
                                    # Raise the thrown exception
                                    raise exception
                                except TimeOutException:
                                    result_handler.warning('Connection to {0} has timed out'.format(identifier_log), code=ErrorCodes.ssh_connection_time)
                                except (socket.error, UnableToConnectException):
                                    result_handler.failure(
                                        'Connection to {0} could not be established'.format(identifier_log), code=ErrorCodes.ssh_connection_fail)
                                except NotAuthenticatedException:
                                    result_handler.skip('Connection to {0} could not be authenticated. This node has no access to the Arakoon node.'.format(identifier_log),
                                                        code=ErrorCodes.ssh_connection_authentication)
                                except Exception:
                                    message = 'Connection to {0} could not be established due to an unhandled exception.'.format(identifier_log)
                                    cls.logger.exception(message)
                                    result_handler.exception(message, code=ErrorCodes.unhandled_exception)
                            elif step == 'lsof':
                                try:
                                    raise exception
                                except Exception:
                                    message = 'Unable to list the file descriptors for {0}'.format(identifier_log)
                                    cls.logger.exception(message)
                                    result_handler.exception(message, ErrorCodes.unhandled_exception)
                        continue
                    fds = stats['result']['fds']
                    filtered_fds = [i for i in fds if i.split()[-1].strip('(').strip(')') in passed_connections]
                    if len(filtered_fds) >= warning_threshold:
                        if len(filtered_fds) >= error_threshold:
                            result_handler.warning('Number of TCP connections exceeded the 95% warning threshold for {0}, ({1}/{2})'.format(identifier_log, len(filtered_fds), fd_limit),
                                                   code=ErrorCodes.arakoon_fd_95)
                        else:
                            result_handler.warning('Number of TCP connections exceeded the 80% warning threshold for {0}, ({1}/{2})'.format(identifier_log, len(filtered_fds), fd_limit),
                                                   code=ErrorCodes.arakoon_fd_80)
                    else:
                        result_handler.success('Number of TCP connections for {0} is healthy ({1}/{2})'.format(identifier_log, len(filtered_fds), fd_limit),
                                               code=ErrorCodes.arakoon_fd_ok)