Exemple #1
0
class TestProfile(profile_base.Profile):
    CONTEXT = 'context'
    properties_schema = {
        'INT': schema.Integer('int property', default=0),
        'STR': schema.String('string property', default='a string'),
        'MAP': schema.Map(
            'map property',
            schema={
                'KEY1': schema.Integer('key1'),
                'KEY2': schema.String('key2')
            }
        ),
        'LIST': schema.List(
            'list property',
            schema=schema.String('list item'),
        ),
    }

    OPERATIONS = {}

    def __init__(self, name, spec, **kwargs):
        super(TestProfile, self).__init__(name, spec, **kwargs)

    def do_create(self):
        return {}

    def do_delete(self, id):
        return True

    def do_update(self):
        return {}

    def do_check(self, id):
        return True
Exemple #2
0
 def test_schema_list_schema(self):
     d = {
         'type': 'List',
         'description': 'A list',
         'schema': {
             '*': {
                 'type':
                 'String',
                 'description':
                 'A string',
                 'default':
                 'wibble',
                 'required':
                 True,
                 'updatable':
                 False,
                 'constraints': [{
                     'constraint': ['foo', 'bar'],
                     'type': 'AllowedValues'
                 }]
             }
         },
         'required': False,
         'updatable': False,
     }
     c = constraints.AllowedValues(['foo', 'bar'])
     s = schema.String('A string',
                       default='wibble',
                       required=True,
                       constraints=[c])
     l = schema.List('A list', schema=s)
     self.assertEqual(d, dict(l))
Exemple #3
0
class CombinationAlarm(Alarm):

    rule_schema = {
        COMBINATION_OP:
        schema.String(
            _('Operator for combination. Must be one of %s') %
            list(COMBINATION_OPERATORS),
            default=C_AND,
        ),
        ALARM_IDS:
        schema.List(
            _('List of alarm IDs for combination.'),
            schema=schema.String(
                _('The ID of an alarm.'),
                required=True,
            ),
            required=True,
        )
    }

    def __init__(self, name, spec, **kwargs):
        super(CombinationAlarm, self).__init__(name, spec, **kwargs)

        rule_spec = spec.get('rule', {})
        self.rule = schema.Spec(self.rule_schema, rule_spec)
        self.namespace = 'combination'
    def test_resolve(self):
        sot = schema.List(schema=schema.String())

        res = sot.resolve(['v1', 'v2'])

        self.assertEqual(['v1', 'v2'], res)

        self.assertRaises(TypeError, sot.resolve, 123)
Exemple #5
0
    def test__getitem__(self):
        sot = FakeSchema(description='desc', default='default', required=False,
                         constraints=[{'foo': 'bar'}])

        self.assertEqual('desc', sot['description'])
        self.assertEqual('default', sot['default'])
        self.assertEqual(False, sot['required'])
        self.assertEqual([{'foo': 'bar'}], sot['constraints'])
        self.assertRaises(KeyError, sot.__getitem__, 'bogus')

        sot = schema.List(schema=schema.String())
        self.assertEqual(
            {
                '*': {
                    'required': False,
                    'type': 'String',
                    'updatable': False
                }
            },
            sot['schema'])
Exemple #6
0
 def test_schema_nested_schema(self):
     d = {
         'type': 'List',
         'description': 'A list',
         'schema': {
             '*': {
                 'type': 'Map',
                 'description': 'A map',
                 'schema': {
                     'Foo': {
                         'type':
                         'String',
                         'description':
                         'A string',
                         'default':
                         'wibble',
                         'readonly':
                         False,
                         'required':
                         True,
                         'constraints': [{
                             'type': 'AllowedValues',
                             'constraint': ['foo', 'bar']
                         }]
                     }
                 },
                 'readonly': False,
                 'required': False,
             }
         },
         'readonly': False,
         'required': False,
     }
     c = constraints.AllowedValues(['foo', 'bar'])
     s = schema.String('A string',
                       default='wibble',
                       required=True,
                       constraints=[c])
     m = schema.Map('A map', schema={'Foo': s})
     l = schema.List('A list', schema=m)
     self.assertEqual(d, dict(l))
Exemple #7
0
class ServerProfile(base.Profile):
    '''Profile for an OpenStack Nova server.'''

    KEYS = (
        CONTEXT,
        ADMIN_PASS,
        AUTO_DISK_CONFIG,
        AVAILABILITY_ZONE,
        BLOCK_DEVICE_MAPPING,
        BLOCK_DEVICE_MAPPING_V2,
        CONFIG_DRIVE,
        FLAVOR,
        IMAGE,
        KEY_NAME,
        METADATA,
        NAME,
        NETWORKS,
        PERSONALITY,
        SECURITY_GROUPS,
        USER_DATA,
        SCHEDULER_HINTS,
    ) = (
        'context',
        'adminPass',
        'auto_disk_config',
        'availability_zone',
        'block_device_mapping',
        'block_device_mapping_v2',
        'config_drive',
        'flavor',
        'image',
        'key_name',
        'metadata',
        'name',
        'networks',
        'personality',
        'security_groups',
        'user_data',
        'scheduler_hints',
    )

    BDM_KEYS = (
        BDM_DEVICE_NAME,
        BDM_VOLUME_SIZE,
    ) = (
        'device_name',
        'volume_size',
    )

    BDM2_KEYS = (
        BDM2_UUID,
        BDM2_SOURCE_TYPE,
        BDM2_DESTINATION_TYPE,
        BDM2_DISK_BUS,
        BDM2_DEVICE_NAME,
        BDM2_VOLUME_SIZE,
        BDM2_GUEST_FORMAT,
        BDM2_BOOT_INDEX,
        BDM2_DEVICE_TYPE,
        BDM2_DELETE_ON_TERMINATION,
    ) = (
        'uuid',
        'source_type',
        'destination_type',
        'disk_bus',
        'device_name',
        'volume_size',
        'guest_format',
        'boot_index',
        'device_type',
        'delete_on_termination',
    )

    NETWORK_KEYS = (
        PORT,
        FIXED_IP,
        NETWORK,
    ) = (
        'port',
        'fixed-ip',
        'network',
    )

    PERSONALITY_KEYS = (
        PATH,
        CONTENTS,
    ) = (
        'path',
        'contents',
    )

    SCHEDULER_HINTS_KEYS = (GROUP, ) = ('group', )

    properties_schema = {
        CONTEXT:
        schema.Map(_('Customized security context for operating servers.'), ),
        ADMIN_PASS:
        schema.String(_('Password for the administrator account.'), ),
        AUTO_DISK_CONFIG:
        schema.Boolean(
            _('Whether the disk partition is done automatically.'),
            default=True,
        ),
        AVAILABILITY_ZONE:
        schema.String(
            _('Name of availability zone for running the server.'), ),
        BLOCK_DEVICE_MAPPING:
        schema.List(
            _('A list specifying the properties of block devices to be used '
              'for this server.'),
            schema=schema.Map(
                _('A map specifying the properties of a block device to be '
                  'used by the server.'),
                schema={
                    BDM_DEVICE_NAME:
                    schema.String(
                        _('Block device name, should be <=255 chars.'), ),
                    BDM_VOLUME_SIZE:
                    schema.Integer(_('Block device size in GB.'), ),
                }),
        ),
        BLOCK_DEVICE_MAPPING_V2:
        schema.List(
            _('A list specifying the properties of block devices to be used '
              'for this server.'),
            schema=schema.Map(
                _('A map specifying the properties of a block device to be '
                  'used by the server.'),
                schema={
                    BDM2_UUID:
                    schema.String(
                        _('ID of the source image, snapshot or volume'), ),
                    BDM2_SOURCE_TYPE:
                    schema.String(
                        _('Volume source type, should be image, snapshot, '
                          'volume or blank'),
                        required=True,
                    ),
                    BDM2_DESTINATION_TYPE:
                    schema.String(
                        _('Volume destination type, should be volume or '
                          'local'),
                        required=True,
                    ),
                    BDM2_DISK_BUS:
                    schema.String(_('Bus of the device.'), ),
                    BDM2_DEVICE_NAME:
                    schema.String(
                        _('Name of the device(e.g. vda, xda, ....).'), ),
                    BDM2_VOLUME_SIZE:
                    schema.Integer(
                        _('Size of the block device in MB(for swap) and '
                          'in GB(for other formats)'),
                        required=True,
                    ),
                    BDM2_GUEST_FORMAT:
                    schema.String(
                        _('Specifies the disk file system format(e.g. swap, '
                          'ephemeral, ...).'), ),
                    BDM2_BOOT_INDEX:
                    schema.Integer(_('Define the boot order of the device'), ),
                    BDM2_DEVICE_TYPE:
                    schema.String(
                        _('Type of the device(e.g. disk, cdrom, ...).'), ),
                    BDM2_DELETE_ON_TERMINATION:
                    schema.Boolean(
                        _('Whether to delete the volume when the server '
                          'stops.'), ),
                }),
        ),
        CONFIG_DRIVE:
        schema.Boolean(
            _('Whether config drive should be enabled for the server.'), ),
        FLAVOR:
        schema.String(
            _('ID of flavor used for the server.'),
            required=True,
            updatable=True,
        ),
        IMAGE:
        schema.String(
            # IMAGE is not required, because there could be BDM or BDMv2
            # support and the corresponding settings effective
            _('ID of image to be used for the new server.'),
            updatable=True,
        ),
        KEY_NAME:
        schema.String(_('Name of Nova keypair to be injected to server.'), ),
        METADATA:
        schema.Map(
            _('A collection of key/value pairs to be associated with the '
              'server created. Both key and value should be <=255 chars.'),
            updatable=True,
        ),
        NAME:
        schema.String(
            _('Name of the server.'),
            updatable=True,
        ),
        NETWORKS:
        schema.List(
            _('List of networks for the server.'),
            schema=schema.Map(
                _('A map specifying the properties of a network for uses.'),
                schema={
                    NETWORK:
                    schema.String(
                        _('Name or ID of network to create a port on.'), ),
                    PORT:
                    schema.String(_('Port ID to be used by the network.'), ),
                    FIXED_IP:
                    schema.String(_('Fixed IP to be used by the network.'), ),
                },
            ),
            updatable=True,
        ),
        PERSONALITY:
        schema.List(
            _('List of files to be injected into the server, where each.'),
            schema=schema.Map(
                _('A map specifying the path & contents for an injected '
                  'file.'),
                schema={
                    PATH:
                    schema.String(
                        _('In-instance path for the file to be injected.'),
                        required=True,
                    ),
                    CONTENTS:
                    schema.String(
                        _('Contents of the file to be injected.'),
                        required=True,
                    ),
                },
            ),
        ),
        SCHEDULER_HINTS:
        schema.Map(
            _('A collection of key/value pairs to be associated with the '
              'Scheduler hints. Both key and value should be <=255 chars.'), ),
        SECURITY_GROUPS:
        schema.List(
            _('List of security groups.'),
            schema=schema.String(
                _('Name of a security group'),
                required=True,
            ),
        ),
        USER_DATA:
        schema.String(_('User data to be exposed by the metadata server.'), ),
    }

    OP_NAMES = (OP_REBOOT, ) = ('reboot', )

    REBOOT_TYPE = 'type'
    REBOOT_TYPES = (REBOOT_SOFT, REBOOT_HARD) = ('SOFT', 'HARD')

    OPERATIONS = {
        OP_REBOOT:
        schema.Operation(
            _("Reboot the nova server."),
            schema={
                REBOOT_TYPE:
                schema.String(
                    _("Type of reboot which can be 'SOFT' or 'HARD'."),
                    default=REBOOT_SOFT,
                    constraints=[
                        constraints.AllowedValues(REBOOT_TYPES),
                    ])
            })
    }

    def __init__(self, type_name, name, **kwargs):
        super(ServerProfile, self).__init__(type_name, name, **kwargs)

        self._novaclient = None
        self._neutronclient = None
        self.server_id = None

    def nova(self, obj):
        '''Construct nova client based on object.

        :param obj: Object for which the client is created. It is expected to
                    be None when retrieving an existing client. When creating
                    a client, it contains the user and project to be used.
        '''

        if self._novaclient is not None:
            return self._novaclient
        params = self._build_conn_params(obj.user, obj.project)
        self._novaclient = driver_base.SenlinDriver().compute(params)
        return self._novaclient

    def neutron(self, obj):
        '''Construct neutron client based on object.

        :param obj: Object for which the client is created. It is expected to
                    be None when retrieving an existing client. When creating
                    a client, it contains the user and project to be used.
        '''

        if self._neutronclient is not None:
            return self._neutronclient
        params = self._build_conn_params(obj.user, obj.project)
        self._neutronclient = driver_base.SenlinDriver().network(params)
        return self._neutronclient

    def do_validate(self, obj):
        '''Validate if the spec has provided valid info for server creation.'''
        return True

    def _resolve_bdm(self, bdm):
        for bd in bdm:
            for key in self.BDM2_KEYS:
                if bd[key] is None:
                    del bd[key]
        return bdm

    def _resolve_network(self, networks, client):
        for network in networks:
            net_name_id = network.get(self.NETWORK)
            if net_name_id:
                res = client.network_get(net_name_id)
                network['uuid'] = res.id
                del network[self.NETWORK]
                if network['port'] is None:
                    del network['port']
                if network['fixed-ip'] is None:
                    del network['fixed-ip']
        return networks

    def do_create(self, obj):
        '''Create a server using the given profile.'''
        kwargs = {}
        for key in self.KEYS:
            # context is treated as connection parameters
            if key == self.CONTEXT:
                continue

            if self.properties[key] is not None:
                kwargs[key] = self.properties[key]

        name_or_id = self.properties[self.IMAGE]
        if name_or_id is not None:
            image = self.nova(obj).image_find(name_or_id)
            # wait for new version of openstacksdk to fix this
            kwargs.pop(self.IMAGE)
            kwargs['imageRef'] = image.id

        flavor_id = self.properties[self.FLAVOR]
        flavor = self.nova(obj).flavor_find(flavor_id, False)

        # wait for new verson of openstacksdk to fix this
        kwargs.pop(self.FLAVOR)
        kwargs['flavorRef'] = flavor.id

        name = self.properties[self.NAME]
        if name:
            kwargs['name'] = name
        else:
            kwargs['name'] = obj.name

        metadata = self.properties[self.METADATA] or {}
        if obj.cluster_id:
            metadata['cluster'] = obj.cluster_id
        kwargs['metadata'] = metadata

        block_device_mapping_v2 = self.properties[self.BLOCK_DEVICE_MAPPING_V2]
        if block_device_mapping_v2 is not None:
            kwargs['block_device_mapping_v2'] = self._resolve_bdm(
                block_device_mapping_v2)

        user_data = self.properties[self.USER_DATA]
        if user_data is not None:
            ud = encodeutils.safe_encode(user_data)
            kwargs['user_data'] = encodeutils.safe_decode(base64.b64encode(ud))

        networks = self.properties[self.NETWORKS]
        if networks is not None:
            kwargs['networks'] = self._resolve_network(networks,
                                                       self.neutron(obj))

        secgroups = self.properties[self.SECURITY_GROUPS]
        if secgroups:
            kwargs['security_groups'] = [{'name': sg} for sg in secgroups]

        if 'placement' in obj.data:
            if 'zone' in obj.data['placement']:
                kwargs['availability_zone'] = obj.data['placement']['zone']
            if 'servergroup' in obj.data['placement']:
                group_id = obj.data['placement']['servergroup']
                hints = self.properties.get(self.SCHEDULER_HINTS, {})
                hints.update({'group': group_id})
                kwargs['scheduler_hints'] = hints

        LOG.info('Creating server: %s' % kwargs)
        server = self.nova(obj).server_create(**kwargs)
        self.nova(obj).wait_for_server(server.id)
        self.server_id = server.id

        return server.id

    def do_delete(self, obj):
        self.server_id = obj.physical_id

        if not obj.physical_id:
            return True

        try:
            self.nova(obj).server_delete(self.server_id)
            self.nova(obj).wait_for_server_delete(self.server_id)
        except Exception as ex:
            LOG.error('Error: %s' % six.text_type(ex))
            return False

        return True

    def do_update(self, obj, new_profile=None, **params):
        '''Perform update on the server.

        :param obj: the server to operate on
        :param new_profile: the new profile for the server.
        :param params: a dictionary of optional parameters.
        '''
        self.server_id = obj.physical_id
        if not self.server_id:
            return True

        if not new_profile:
            return True

        if not self.validate_for_update(new_profile):
            return False

        # TODO(Yanyan Hu): Update block_device properties

        # Update basic properties of server
        if not self._update_basic_properties(obj, new_profile):
            return False

        # Update server flavor
        flavor = self.properties[self.FLAVOR]
        new_flavor = new_profile.properties[self.FLAVOR]
        if new_flavor != flavor:
            try:
                self._update_flavor(obj, flavor, new_flavor)
            except Exception as ex:
                LOG.exception(_('Failed in updating server flavor: %s'),
                              six.text_type(ex))
                return False

        # Update server image
        old_passwd = self.properties.get(self.ADMIN_PASS)
        passwd = old_passwd
        if new_profile.properties[self.ADMIN_PASS] is not None:
            passwd = new_profile.properties[self.ADMIN_PASS]
        image = self.properties[self.IMAGE]
        new_image = new_profile.properties[self.IMAGE]
        if new_image != image:
            try:
                self._update_image(obj, image, new_image, passwd)
            except Exception as ex:
                LOG.exception(_('Failed in updating server image: %s'),
                              six.text_type(ex))
                return False
        elif old_passwd != passwd:
            # TODO(Jun Xu): update server admin password
            pass

        # Update server network
        networks_current = self.properties[self.NETWORKS]
        networks_create = new_profile.properties[self.NETWORKS]
        networks_delete = copy.deepcopy(networks_current)
        for network in networks_current:
            if network in networks_create:
                networks_create.remove(network)
                networks_delete.remove(network)
        if networks_create or networks_delete:
            # We have network interfaces to be deleted and/or created
            try:
                self._update_network(obj, networks_create, networks_delete)
            except Exception as ex:
                LOG.exception(_('Failed in updating server network: %s'),
                              six.text_type(ex))
                return False

        return True

    def _update_basic_properties(self, obj, new_profile):
        '''Updating basic server properties including name, metadata'''

        # Update server metadata
        metadata = self.properties[self.METADATA]
        new_metadata = new_profile.properties[self.METADATA]
        if new_metadata != metadata:
            if new_metadata is None:
                new_metadata = {}
            try:
                self.nova(obj).server_metadata_update(self.server_id,
                                                      new_metadata)
            except Exception as ex:
                LOG.exception(_('Failed in updating server metadata: %s'),
                              six.text_type(ex))
                return False

        # Update server name
        name = self.properties[self.NAME]
        new_name = new_profile.properties[self.NAME]
        if new_name != name:
            attrs = {'name': new_name if new_name else obj.name}
            try:
                self.nova(obj).server_update(self.server_id, **attrs)
            except Exception as ex:
                LOG.exception(_('Failed in updating server name: %s'),
                              six.text_type(ex))
                return False

        return True

    def _update_flavor(self, obj, old_flavor, new_flavor):
        '''Updating server flavor'''
        res = self.nova(obj).flavor_find(old_flavor)
        old_flavor_id = res.id
        res = self.nova(obj).flavor_find(new_flavor)
        new_flavor_id = res.id
        if new_flavor_id == old_flavor_id:
            return

        try:
            self.nova(obj).server_resize(obj.physical_id, new_flavor_id)
            self.nova(obj).wait_for_server(obj.physical_id, 'VERIFY_RESIZE')
        except Exception as ex:
            LOG.error(_("Server resizing failed, revert it: %s"),
                      six.text_type(ex))
            self.nova(obj).server_resize_revert(obj.physical_id)
            self.nova(obj).wait_for_server(obj.physical_id, 'ACTIVE')
            raise exception.ResourceUpdateFailure(resource=obj.physical_id)

        self.nova(obj).server_resize_confirm(obj.physical_id)
        self.nova(obj).wait_for_server(obj.physical_id, 'ACTIVE')

    def _update_image(self, obj, old_image, new_image, admin_password):
        '''Updating server image'''

        if old_image:
            res = self.nova(obj).image_find(old_image)
            image_id = res.id
        else:
            server = self.nova(obj).server_get(obj.physical_id)
            image_id = server.image['id']

        if new_image:
            res = self.nova(obj).image_find(new_image)
            new_image_id = res.id
            if new_image_id != image_id:
                # (Jun Xu): Not update name here if name changed,
                # it should be updated  in do_update
                self.nova(obj).server_rebuild(obj.physical_id, new_image_id,
                                              self.properties.get(self.NAME),
                                              admin_password)
                self.nova(obj).wait_for_server(obj.physical_id, 'ACTIVE')
        else:
            # TODO(Yanyan Hu): Allow server update with new_image
            # set to None if Nova service supports it
            LOG.error(
                _("Updating Nova server with image set to None is "
                  "not supported by Nova."))
            raise exception.ResourceUpdateFailure(resource=obj.physical_id)

    def _update_network(self, obj, networks_create, networks_delete):
        '''Updating server network interfaces'''

        server = self.nova(obj).server_get(self.server_id)
        ports_existing = list(self.nova(obj).server_interface_list(server))
        ports = []
        for p in ports_existing:
            fixed_ips = []
            for addr in p['fixed_ips']:
                fixed_ips.append(addr['ip_address'])
            ports.append({
                'port_id': p['port_id'],
                'net_id': p['net_id'],
                'fixed_ips': fixed_ips
            })

        # Detach some existing ports
        # Step1. Accurately search port with port_id or fixed-ip/net_id
        for n in networks_delete:
            if n['port'] is not None:
                for p in ports:
                    if p['port_id'] == n['port']:
                        ports.remove(p)
                        break
                res = self.nova(obj).server_interface_delete(n['port'], server)
            elif n['fixed-ip'] is not None:
                res = self.neutron(obj).network_get(n['network'])
                net_id = res.id
                for p in ports:
                    if (n['fixed-ip'] in p['fixed_ips']) and (p['net_id']
                                                              == net_id):
                        res = self.nova(obj).server_interface_delete(
                            p['port_id'], server)
                        ports.remove(p)
                        break

        # Step2. Fuzzy search port with net_id
        for n in networks_delete:
            if n['port'] is None and n['fixed-ip'] is None:
                res = self.neutron(obj).network_get(n['network'])
                net_id = res.id
                for p in ports:
                    if p['net_id'] == net_id:
                        res = self.nova(obj).server_interface_delete(
                            p['port_id'], server)
                        ports.remove(p)
                        break

        # Attach new ports added in new network definition
        for n in networks_create:
            net_name_id = n.get(self.NETWORK, None)
            if net_name_id:
                res = self.neutron(obj).network_get(net_name_id)
                n['net_id'] = res.id
                if n['fixed-ip'] is not None:
                    n['fixed_ips'] = [{'ip_address': n['fixed-ip']}]
            if n['port'] is not None:
                n['port_id'] = n['port']
            del n['network']
            del n['port']
            del n['fixed-ip']
            self.nova(obj).server_interface_create(server, **n)

        return

    def do_check(self, obj):
        if not obj.physical_id:
            return False

        self.server_id = obj.physical_id

        try:
            server = self.nova(obj).server_get(self.server_id)
        except Exception as ex:
            LOG.error('Error: %s' % six.text_type(ex))
            return False

        if (server is None or server.status != 'ACTIVE'):
            return False

        return True

    def do_get_details(self, obj):
        known_keys = {
            'OS-DCF:diskConfig', 'OS-EXT-AZ:availability_zone',
            'OS-EXT-STS:power_state', 'OS-EXT-STS:vm_state', 'accessIPv4',
            'accessIPv6', 'config_drive', 'created', 'hostId', 'id',
            'key_name', 'locked', 'metadata', 'name',
            'os-extended-volumes:volumes_attached', 'progress', 'status',
            'updated'
        }
        if obj.physical_id is None or obj.physical_id == '':
            return {}

        try:
            server = self.nova(obj).server_get(obj.physical_id)
        except exception.InternalError as ex:
            return {'Error': {'code': ex.code, 'message': six.text_type(ex)}}

        if server is None:
            return {}
        server_data = server.to_dict()
        details = {
            'image': server_data['image']['id'],
            'flavor': server_data['flavor']['id'],
        }
        for key in known_keys:
            if key in server_data:
                details[key] = server_data[key]

        # process special keys like 'OS-EXT-STS:task_state': these keys have
        # a default value '-' when not existing
        special_keys = [
            'OS-EXT-STS:task_state',
            'OS-SRV-USG:launched_at',
            'OS-SRV-USG:terminated_at',
        ]
        for key in special_keys:
            if key in server_data:
                val = server_data[key]
                details[key] = val if val else '-'

        # process network addresses
        details['addresses'] = {}
        for net in server_data['addresses']:
            addresses = []
            for addr in server_data['addresses'][net]:
                # Ignore IPv6 address
                if addr['version'] == 4:
                    addresses.append(addr['addr'])
            details['addresses'][net] = addresses

        # process security groups
        sgroups = []
        if 'security_groups' in server_data:
            for sg in server_data['security_groups']:
                sgroups.append(sg['name'])
        if len(sgroups) == 0:
            details['security_groups'] = ''
        elif len(sgroups) == 1:
            details['security_groups'] = sgroups[0]
        else:
            details['security_groups'] = sgroups

        return dict((k, details[k]) for k in sorted(details))

    def do_join(self, obj, cluster_id):
        if not obj.physical_id:
            return False

        metadata = self.nova(obj).server_metadata_get(obj.physical_id) or {}
        metadata['cluster'] = cluster_id
        self.nova(obj).server_metadata_update(obj.physical_id, metadata)
        return super(ServerProfile, self).do_join(obj, cluster_id)

    def do_leave(self, obj):
        if not obj.physical_id:
            return False

        self.nova(obj).server_metadata_delete(obj.physical_id, ['cluster'])
        return super(ServerProfile, self).do_leave(obj)

    def do_rebuild(self, obj):
        if not obj.physical_id:
            return False

        self.server_id = obj.physical_id

        try:
            server = self.nova(obj).server_get(self.server_id)
        except Exception as ex:
            LOG.exception(_('Failed at getting server: %s'), six.text_type(ex))
            return False

        if server is None or server.image is None:
            return False

        image_id = server.image['id']
        admin_pass = self.properties.get(self.ADMIN_PASS)

        try:
            self.nova(obj).server_rebuild(self.server_id, image_id,
                                          self.properties.get(self.NAME),
                                          admin_pass)
            self.nova(obj).wait_for_server(self.server_id, 'ACTIVE')
        except Exception as ex:
            LOG.exception(_('Failed at rebuilding server: %s'),
                          six.text_type(ex))
            return False

        return True

    def do_recover(self, obj, **options):

        if 'operation' in options:
            if options['operation'] == 'REBUILD':
                return self.do_rebuild(obj)

        res = super(ServerProfile, self).do_recover(obj, **options)

        return res

    def handle_reboot(self, obj, **options):
        """Handler for the reboot operation."""
        pass
Exemple #8
0
class ZonePlacementPolicy(base.Policy):
    """Policy for placing members of a cluster across availability zones."""

    VERSION = '1.0'
    VERSIONS = {
        '1.0': [
            {'status': consts.EXPERIMENTAL, 'since': '2016.04'},
            {'status': consts.SUPPORTED, 'since': '2016.10'},
        ]
    }
    PRIORITY = 300

    TARGET = [
        ('BEFORE', consts.CLUSTER_SCALE_OUT),
        ('BEFORE', consts.CLUSTER_SCALE_IN),
        ('BEFORE', consts.CLUSTER_RESIZE),
        ('BEFORE', consts.NODE_CREATE),
    ]

    PROFILE_TYPE = [
        'os.nova.server-1.0',
    ]

    KEYS = (
        ZONES,
    ) = (
        'zones',
    )

    _AZ_KEYS = (
        ZONE_NAME, ZONE_WEIGHT,
    ) = (
        'name', 'weight',
    )

    properties_schema = {
        ZONES: schema.List(
            _('List of availability zones to choose from.'),
            schema=schema.Map(
                _('An availability zone as candidate.'),
                schema={
                    ZONE_NAME: schema.String(
                        _('Name of an availability zone.'),
                    ),
                    ZONE_WEIGHT: schema.Integer(
                        _('Weight of the availability zone (default is 100).'),
                        default=100,
                        required=False,
                    )
                },
            ),
        ),
    }

    def __init__(self, name, spec, **kwargs):
        super(ZonePlacementPolicy, self).__init__(name, spec, **kwargs)

        self.zones = dict((z[self.ZONE_NAME], z[self.ZONE_WEIGHT])
                          for z in self.properties.get(self.ZONES))

    def validate(self, context, validate_props=False):
        super(ZonePlacementPolicy, self).validate(context, validate_props)

        if not validate_props:
            return True

        nc = self.nova(context.user_id, context.project_id)
        input_azs = sorted(self.zones.keys())
        valid_azs = nc.validate_azs(input_azs)
        invalid_azs = sorted(set(input_azs) - set(valid_azs))
        if invalid_azs:
            msg = _("The specified %(key)s '%(value)s' could not be "
                    "found.") % {'key': self.ZONE_NAME,
                                 'value': list(invalid_azs)}
            raise exc.InvalidSpec(message=msg)

        return True

    def _create_plan(self, current, zones, count, expand):
        """Compute a placement plan based on the weights of AZs.

        :param current: Distribution of existing nodes.
        :returns: A dict that contains a placement plan.
        """
        # sort candidate zones by distribution and covert it into a list
        candidates = sorted(zones.items(), key=operator.itemgetter(1),
                            reverse=expand)

        sum_weight = sum(zones.values())
        if expand:
            total = count + sum(current.values())
        else:
            total = sum(current.values()) - count

        remain = count
        plan = dict.fromkeys(zones.keys(), 0)

        for i in range(len(zones)):
            zone = candidates[i][0]
            weight = candidates[i][1]
            q = total * weight / float(sum_weight)
            if expand:
                quota = int(math.ceil(q))
                headroom = quota - current[zone]
            else:
                quota = int(math.floor(q))
                headroom = current[zone] - quota

            if headroom <= 0:
                continue

            if headroom < remain:
                plan[zone] = headroom
                remain -= headroom
            else:
                plan[zone] = remain if remain > 0 else 0
                remain = 0
                break

        if remain > 0:
            return None

        # filter out zero values
        result = {}
        for z, c in plan.items():
            if c > 0:
                result[z] = c

        return result

    def _get_count(self, cluster_id, action):
        """Get number of nodes to create or delete.

        :param cluster_id: The ID of the target cluster.
        :param action: The action object which triggered this policy check.
        :return: An integer value which can be 1) positive - number of nodes
                 to create; 2) negative - number of nodes to delete; 3) 0 -
                 something wrong happened, and the policy check failed.
        """
        if action.action == consts.NODE_CREATE:
            # skip the policy if availability zone is specified in profile
            profile = action.entity.rt['profile']
            if profile.properties[profile.AVAILABILITY_ZONE]:
                return 0
            return 1

        if action.action == consts.CLUSTER_RESIZE:
            if action.data.get('deletion', None):
                return -action.data['deletion']['count']
            elif action.data.get('creation', None):
                return action.data['creation']['count']

            db_cluster = co.Cluster.get(action.context, cluster_id)
            current = no.Node.count_by_cluster(action.context, cluster_id)
            res = scaleutils.parse_resize_params(action, db_cluster, current)
            if res[0] == base.CHECK_ERROR:
                action.data['status'] = base.CHECK_ERROR
                action.data['reason'] = res[1]
                LOG.error(res[1])
                return 0

            if action.data.get('deletion', None):
                return -action.data['deletion']['count']
            else:
                return action.data['creation']['count']

        if action.action == consts.CLUSTER_SCALE_IN:
            pd = action.data.get('deletion', None)
            if pd is None:
                return -action.inputs.get('count', 1)
            else:
                return -pd.get('count', 1)

        # CLUSTER_SCALE_OUT: an action that inflates the cluster
        pd = action.data.get('creation', None)
        if pd is None:
            return action.inputs.get('count', 1)
        else:
            return pd.get('count', 1)

    def pre_op(self, cluster_id, action):
        """Callback function when cluster membership is about to change.

        :param cluster_id: ID of the target cluster.
        :param action: The action that triggers this policy check.
        """
        count = self._get_count(cluster_id, action)
        if count == 0:
            return

        expand = True
        if count < 0:
            expand = False
            count = -count

        cluster = cm.Cluster.load(action.context, cluster_id)

        nc = self.nova(cluster.user, cluster.project)
        zones_good = nc.validate_azs(self.zones.keys())
        if len(zones_good) == 0:
            action.data['status'] = base.CHECK_ERROR
            action.data['reason'] = _('No availability zone found available.')
            LOG.error('No availability zone found available.')
            return

        zones = {}
        for z, w in self.zones.items():
            if z in zones_good:
                zones[z] = w

        current = cluster.get_zone_distribution(action.context, zones.keys())
        result = self._create_plan(current, zones, count, expand)

        if not result:
            action.data['status'] = base.CHECK_ERROR
            action.data['reason'] = _('There is no feasible plan to '
                                      'handle all nodes.')
            LOG.error('There is no feasible plan to handle all nodes.')
            return

        if expand:
            if 'creation' not in action.data:
                action.data['creation'] = {}
            action.data['creation']['count'] = count
            action.data['creation']['zones'] = result
        else:
            if 'deletion' not in action.data:
                action.data['deletion'] = {}
            action.data['deletion']['count'] = count
            action.data['deletion']['zones'] = result
    def test_validate_failed(self):
        sot = schema.List(schema=schema.String())

        ex = self.assertRaises(exc.ESchema, sot.validate, None)
        self.assertEqual("'None' is not a List", six.text_type(ex))
    def test_validate(self):
        sot = schema.List(schema=schema.String())

        res = sot.validate(['abc', 'def'])

        self.assertIsNone(res)
Exemple #11
0
class ServerProfile(base.KubeBaseProfile):
    """Profile for an kubernetes master server."""

    VERSIONS = {'1.0': [{'status': consts.EXPERIMENTAL, 'since': '2017.10'}]}

    KEYS = (
        CONTEXT,
        FLAVOR,
        IMAGE,
        KEY_NAME,
        PUBLIC_NETWORK,
        BLOCK_DEVICE_MAPPING_V2,
    ) = (
        'context',
        'flavor',
        'image',
        'key_name',
        'public_network',
        'block_device_mapping_v2',
    )

    INTERNAL_KEYS = (
        KUBEADM_TOKEN,
        KUBE_MASTER_IP,
        SECURITY_GROUP,
        PRIVATE_NETWORK,
        PRIVATE_SUBNET,
        PRIVATE_ROUTER,
        KUBE_MASTER_FLOATINGIP,
        KUBE_MASTER_FLOATINGIP_ID,
        SCALE_OUT_RECV_ID,
        SCALE_OUT_URL,
    ) = (
        'kubeadm_token',
        'kube_master_ip',
        'security_group',
        'private_network',
        'private_subnet',
        'private_router',
        'kube_master_floatingip',
        'kube_master_floatingip_id',
        'scale_out_recv_id',
        'scale_out_url',
    )

    NETWORK_KEYS = (
        PORT,
        FIXED_IP,
        NETWORK,
        PORT_SECURITY_GROUPS,
        FLOATING_NETWORK,
        FLOATING_IP,
    ) = (
        'port',
        'fixed_ip',
        'network',
        'security_groups',
        'floating_network',
        'floating_ip',
    )

    BDM2_KEYS = (
        BDM2_UUID,
        BDM2_SOURCE_TYPE,
        BDM2_DESTINATION_TYPE,
        BDM2_DISK_BUS,
        BDM2_DEVICE_NAME,
        BDM2_VOLUME_SIZE,
        BDM2_GUEST_FORMAT,
        BDM2_BOOT_INDEX,
        BDM2_DEVICE_TYPE,
        BDM2_DELETE_ON_TERMINATION,
    ) = (
        'uuid',
        'source_type',
        'destination_type',
        'disk_bus',
        'device_name',
        'volume_size',
        'guest_format',
        'boot_index',
        'device_type',
        'delete_on_termination',
    )

    properties_schema = {
        CONTEXT:
        schema.Map(_('Customized security context for operating servers.'), ),
        FLAVOR:
        schema.String(
            _('ID of flavor used for the server.'),
            required=True,
            updatable=True,
        ),
        IMAGE:
        schema.String(
            # IMAGE is not required, because there could be BDM or BDMv2
            # support and the corresponding settings effective
            _('ID of image to be used for the new server.'),
            updatable=True,
        ),
        KEY_NAME:
        schema.String(_('Name of Nova keypair to be injected to server.'), ),
        PUBLIC_NETWORK:
        schema.String(
            _('Public network for kubernetes.'),
            required=True,
        ),
        BLOCK_DEVICE_MAPPING_V2:
        schema.List(
            _('A list specifying the properties of block devices to be used '
              'for this server.'),
            schema=schema.Map(
                _('A map specifying the properties of a block device to be '
                  'used by the server.'),
                schema={
                    BDM2_UUID:
                    schema.String(
                        _('ID of the source image, snapshot or volume'), ),
                    BDM2_SOURCE_TYPE:
                    schema.String(
                        _("Volume source type, must be one of 'image', "
                          "'snapshot', 'volume' or 'blank'"),
                        required=True,
                    ),
                    BDM2_DESTINATION_TYPE:
                    schema.String(
                        _("Volume destination type, must be 'volume' or "
                          "'local'"),
                        required=True,
                    ),
                    BDM2_DISK_BUS:
                    schema.String(_('Bus of the device.'), ),
                    BDM2_DEVICE_NAME:
                    schema.String(
                        _('Name of the device(e.g. vda, xda, ....).'), ),
                    BDM2_VOLUME_SIZE:
                    schema.Integer(
                        _('Size of the block device in MB(for swap) and '
                          'in GB(for other formats)'),
                        required=True,
                    ),
                    BDM2_GUEST_FORMAT:
                    schema.String(
                        _('Specifies the disk file system format(e.g. swap, '
                          'ephemeral, ...).'), ),
                    BDM2_BOOT_INDEX:
                    schema.Integer(_('Define the boot order of the device'), ),
                    BDM2_DEVICE_TYPE:
                    schema.String(
                        _('Type of the device(e.g. disk, cdrom, ...).'), ),
                    BDM2_DELETE_ON_TERMINATION:
                    schema.Boolean(
                        _('Whether to delete the volume when the server '
                          'stops.'), ),
                }),
        ),
    }

    def __init__(self, type_name, name, **kwargs):
        super(ServerProfile, self).__init__(type_name, name, **kwargs)
        self.server_id = None

    def do_cluster_create(self, obj):
        self._generate_kubeadm_token(obj)
        self._create_security_group(obj)
        self._create_network(obj)

    def do_cluster_delete(self, obj):
        if obj.dependents and 'kube-node' in obj.dependents:
            msg = ("Cluster %s delete failed, "
                   "Node clusters %s must be deleted first." %
                   (obj.id, obj.dependents['kube-node']))
            raise exc.EResourceDeletion(type='kubernetes.master',
                                        id=obj.id,
                                        message=msg)
        self._delete_network(obj)
        self._delete_security_group(obj)

    def do_create(self, obj):
        """Create a server for the node object.

        :param obj: The node object for which a server will be created.
        """
        kwargs = {}
        for key in self.KEYS:
            if self.properties[key] is not None:
                kwargs[key] = self.properties[key]

        image_ident = self.properties[self.IMAGE]
        if image_ident is not None:
            image = self._validate_image(obj, image_ident, 'create')
            kwargs.pop(self.IMAGE)
            kwargs['imageRef'] = image.id

        flavor_ident = self.properties[self.FLAVOR]
        flavor = self._validate_flavor(obj, flavor_ident, 'create')
        kwargs.pop(self.FLAVOR)
        kwargs['flavorRef'] = flavor.id

        keypair_name = self.properties[self.KEY_NAME]
        if keypair_name:
            keypair = self._validate_keypair(obj, keypair_name, 'create')
            kwargs['key_name'] = keypair.name

        kwargs['name'] = obj.name

        metadata = self._build_metadata(obj, {})
        kwargs['metadata'] = metadata

        jj_vars = {}
        cluster_data = self._get_cluster_data(obj)
        kwargs['networks'] = [{'uuid': cluster_data[self.PRIVATE_NETWORK]}]

        # Get user_data parameters from metadata
        jj_vars['KUBETOKEN'] = cluster_data[self.KUBEADM_TOKEN]
        jj_vars['MASTER_FLOATINGIP'] = cluster_data[
            self.KUBE_MASTER_FLOATINGIP]

        block_device_mapping_v2 = self.properties[self.BLOCK_DEVICE_MAPPING_V2]
        if block_device_mapping_v2 is not None:
            kwargs['block_device_mapping_v2'] = self._resolve_bdm(
                obj, block_device_mapping_v2, 'create')

        # user_data = self.properties[self.USER_DATA]
        user_data = base.loadScript('./scripts/master.sh')
        if user_data is not None:
            # Use jinja2 to replace variables defined in user_data
            try:
                jj_t = jinja2.Template(user_data)
                user_data = jj_t.render(**jj_vars)
            except (jinja2.exceptions.UndefinedError, ValueError) as ex:
                # TODO(anyone) Handle jinja2 error
                pass
            ud = encodeutils.safe_encode(user_data)
            kwargs['user_data'] = encodeutils.safe_decode(base64.b64encode(ud))

        sgid = self._get_security_group(obj)
        kwargs['security_groups'] = [{'name': sgid}]

        server = None
        resource_id = None
        try:
            server = self.compute(obj).server_create(**kwargs)
            self.compute(obj).wait_for_server(server.id)
            server = self.compute(obj).server_get(server.id)
            self._update_master_ip(obj, server.addresses[''][0]['addr'])
            self._associate_floatingip(obj, server)
            LOG.info("Created master node: %s" % server.id)
            return server.id
        except exc.InternalError as ex:
            if server and server.id:
                resource_id = server.id
            raise exc.EResourceCreation(type='server',
                                        message=six.text_type(ex),
                                        resource_id=resource_id)

    def do_delete(self, obj, **params):
        """Delete the physical resource associated with the specified node.

        :param obj: The node object to operate on.
        :param kwargs params: Optional keyword arguments for the delete
                              operation.
        :returns: This operation always return True unless exception is
                  caught.
        :raises: `EResourceDeletion` if interaction with compute service fails.
        """
        if not obj.physical_id:
            return True

        server_id = obj.physical_id
        ignore_missing = params.get('ignore_missing', True)
        internal_ports = obj.data.get('internal_ports', [])
        force = params.get('force', False)

        try:
            self._disassociate_floatingip(obj, server_id)
            driver = self.compute(obj)
            if force:
                driver.server_force_delete(server_id, ignore_missing)
            else:
                driver.server_delete(server_id, ignore_missing)
            driver.wait_for_server_delete(server_id)
            if internal_ports:
                ex = self._delete_ports(obj, internal_ports)
                if ex:
                    raise ex
            return True
        except exc.InternalError as ex:
            raise exc.EResourceDeletion(type='server',
                                        id=server_id,
                                        message=six.text_type(ex))
Exemple #12
0
class HealthPolicy(base.Policy):
    """Policy for health management of a cluster."""

    VERSION = '1.1'
    VERSIONS = {
        '1.0': [
            {
                'status': consts.EXPERIMENTAL,
                'since': '2017.02'
            },
            {
                'status': consts.SUPPORTED,
                'since': '2018.06'
            },
        ],
        '1.1': [{
            'status': consts.SUPPORTED,
            'since': '2018.09'
        }],
    }
    PRIORITY = 600

    TARGET = [
        ('BEFORE', consts.CLUSTER_RECOVER),
        ('BEFORE', consts.CLUSTER_DEL_NODES),
        ('BEFORE', consts.CLUSTER_SCALE_IN),
        ('BEFORE', consts.CLUSTER_RESIZE),
        ('BEFORE', consts.NODE_DELETE),
        ('AFTER', consts.CLUSTER_DEL_NODES),
        ('AFTER', consts.CLUSTER_SCALE_IN),
        ('AFTER', consts.CLUSTER_RESIZE),
        ('AFTER', consts.NODE_DELETE),
    ]

    # Should be ANY if profile provides health check support?
    PROFILE_TYPE = [
        'os.nova.server',
        'os.heat.stack',
    ]

    KEYS = (DETECTION, RECOVERY) = ('detection', 'recovery')

    _DETECTION_KEYS = (DETECTION_MODES, DETECTION_TYPE, DETECTION_OPTIONS,
                       DETECTION_INTERVAL, NODE_UPDATE_TIMEOUT,
                       RECOVERY_CONDITIONAL) = ('detection_modes', 'type',
                                                'options', 'interval',
                                                'node_update_timeout',
                                                'recovery_conditional')

    _DETECTION_OPTIONS = (
        POLL_URL,
        POLL_URL_SSL_VERIFY,
        POLL_URL_CONN_ERROR_AS_UNHEALTHY,
        POLL_URL_HEALTHY_RESPONSE,
        POLL_URL_RETRY_LIMIT,
        POLL_URL_RETRY_INTERVAL,
    ) = ('poll_url', 'poll_url_ssl_verify', 'poll_url_conn_error_as_unhealthy',
         'poll_url_healthy_response', 'poll_url_retry_limit',
         'poll_url_retry_interval')

    _RECOVERY_KEYS = (
        RECOVERY_ACTIONS,
        RECOVERY_FENCING,
        RECOVERY_DELETE_TIMEOUT,
        RECOVERY_FORCE_RECREATE,
    ) = (
        'actions',
        'fencing',
        'node_delete_timeout',
        'node_force_recreate',
    )

    FENCING_OPTION_VALUES = (
        COMPUTE,
        # STORAGE, NETWORK,
    ) = (
        'COMPUTE',
        # 'STORAGE', 'NETWORK'
    )

    ACTION_KEYS = (
        ACTION_NAME,
        ACTION_PARAMS,
    ) = (
        'name',
        'params',
    )

    properties_schema = {
        DETECTION:
        schema.Map(
            _('Policy aspect for node failure detection.'),
            schema={
                DETECTION_INTERVAL:
                schema.Integer(
                    _("Number of seconds between pollings. Only "
                      "required when type is 'NODE_STATUS_POLLING' or "
                      "'NODE_STATUS_POLL_URL'."),
                    default=60,
                ),
                NODE_UPDATE_TIMEOUT:
                schema.Integer(
                    _("Number of seconds since last node update to "
                      "wait before checking node health."),
                    default=300,
                ),
                RECOVERY_CONDITIONAL:
                schema.String(
                    _("The conditional that determines when recovery should be"
                      " performed in case multiple detection modes are "
                      "specified. 'ALL_FAILED' means that all "
                      "detection modes have to return failed health checks "
                      "before a node is recovered. 'ANY_FAILED'"
                      " means that a failed health check with a single "
                      "detection mode triggers a node recovery."),
                    constraints=[
                        constraints.AllowedValues(consts.RECOVERY_CONDITIONAL),
                    ],
                    default=consts.ANY_FAILED,
                    required=False,
                ),
                DETECTION_MODES:
                schema.List(
                    _('List of node failure detection modes.'),
                    schema=schema.Map(
                        _('Node failure detection mode to try'),
                        schema={
                            DETECTION_TYPE:
                            schema.String(
                                _('Type of node failure detection.'),
                                constraints=[
                                    constraints.AllowedValues(
                                        consts.DETECTION_TYPES),
                                ],
                                required=True,
                            ),
                            DETECTION_OPTIONS:
                            schema.Map(schema={
                                POLL_URL:
                                schema.String(
                                    _("URL to poll for node status. See "
                                      "documentation for valid expansion "
                                      "parameters. Only required "
                                      "when type is "
                                      "'NODE_STATUS_POLL_URL'."),
                                    default='',
                                ),
                                POLL_URL_SSL_VERIFY:
                                schema.Boolean(
                                    _("Whether to verify SSL when calling "
                                      "URL to poll for node status. Only "
                                      "required when type is "
                                      "'NODE_STATUS_POLL_URL'."),
                                    default=True,
                                ),
                                POLL_URL_CONN_ERROR_AS_UNHEALTHY:
                                schema.Boolean(
                                    _("Whether to treat URL connection "
                                      "errors as an indication of an "
                                      "unhealthy node. Only required "
                                      "when type is "
                                      "'NODE_STATUS_POLL_URL'."),
                                    default=True,
                                ),
                                POLL_URL_HEALTHY_RESPONSE:
                                schema.String(
                                    _("String pattern in the poll URL "
                                      "response body that indicates a "
                                      "healthy node. Required when type "
                                      "is 'NODE_STATUS_POLL_URL'."),
                                    default='',
                                ),
                                POLL_URL_RETRY_LIMIT:
                                schema.Integer(
                                    _("Number of times to retry URL "
                                      "polling when its return body is "
                                      "missing POLL_URL_HEALTHY_RESPONSE "
                                      "string before a node is considered "
                                      "down. Required when type is "
                                      "'NODE_STATUS_POLL_URL'."),
                                    default=3,
                                ),
                                POLL_URL_RETRY_INTERVAL:
                                schema.Integer(
                                    _("Number of seconds between URL "
                                      "polling retries before a node is "
                                      "considered down. Required when "
                                      "type is 'NODE_STATUS_POLL_URL'."),
                                    default=3,
                                ),
                            },
                                       default={}),
                        }))
            },
            required=True,
        ),
        RECOVERY:
        schema.Map(
            _('Policy aspect for node failure recovery.'),
            schema={
                RECOVERY_ACTIONS:
                schema.List(_('List of actions to try for node recovery.'),
                            schema=schema.Map(
                                _('Action to try for node recovery.'),
                                schema={
                                    ACTION_NAME:
                                    schema.String(
                                        _("Name of action to execute."),
                                        constraints=[
                                            constraints.AllowedValues(
                                                consts.RECOVERY_ACTIONS),
                                        ],
                                        required=True),
                                    ACTION_PARAMS:
                                    schema.Map(_("Parameters for the action")),
                                })),
                RECOVERY_FENCING:
                schema.List(
                    _('List of services to be fenced.'),
                    schema=schema.String(
                        _('Service to be fenced.'),
                        constraints=[
                            constraints.AllowedValues(FENCING_OPTION_VALUES),
                        ],
                        required=True,
                    ),
                ),
                RECOVERY_DELETE_TIMEOUT:
                schema.Integer(
                    _("Number of seconds to wait for node deletion to "
                      "finish and start node creation for recreate "
                      "recovery option. Required when type is "
                      "'NODE_STATUS_POLL_URL and recovery action "
                      "is RECREATE'."),
                    default=20,
                ),
                RECOVERY_FORCE_RECREATE:
                schema.Boolean(
                    _("Whether to create node even if node deletion "
                      "failed. Required when type is "
                      "'NODE_STATUS_POLL_URL' and action recovery "
                      "action is RECREATE."),
                    default=False,
                ),
            },
            required=True,
        ),
    }

    def __init__(self, name, spec, **kwargs):
        super(HealthPolicy, self).__init__(name, spec, **kwargs)

        self.interval = self.properties[self.DETECTION].get(
            self.DETECTION_INTERVAL, 60)

        self.node_update_timeout = self.properties[self.DETECTION].get(
            self.NODE_UPDATE_TIMEOUT, 300)

        self.recovery_conditional = self.properties[self.DETECTION].get(
            self.RECOVERY_CONDITIONAL, consts.ANY_FAILED)

        DetectionMode = namedtuple('DetectionMode', [self.DETECTION_TYPE] +
                                   list(self._DETECTION_OPTIONS))

        self.detection_modes = []

        raw_modes = self.properties[self.DETECTION][self.DETECTION_MODES]
        for mode in raw_modes:
            options = mode[self.DETECTION_OPTIONS]

            self.detection_modes.append(
                DetectionMode(
                    mode[self.DETECTION_TYPE], options.get(self.POLL_URL, ''),
                    options.get(self.POLL_URL_SSL_VERIFY, True),
                    options.get(self.POLL_URL_CONN_ERROR_AS_UNHEALTHY, True),
                    options.get(self.POLL_URL_HEALTHY_RESPONSE, ''),
                    options.get(self.POLL_URL_RETRY_LIMIT, ''),
                    options.get(self.POLL_URL_RETRY_INTERVAL, '')))

        recover_settings = self.properties[self.RECOVERY]
        self.recover_actions = recover_settings[self.RECOVERY_ACTIONS]
        self.fencing_types = recover_settings[self.RECOVERY_FENCING]
        self.node_delete_timeout = recover_settings.get(
            self.RECOVERY_DELETE_TIMEOUT, None)
        self.node_force_recreate = recover_settings.get(
            self.RECOVERY_FORCE_RECREATE, False)

    def validate(self, context, validate_props=False):
        super(HealthPolicy, self).validate(context,
                                           validate_props=validate_props)

        if len(self.recover_actions) > 1:
            message = _(
                "Only one '%s' is supported for now.") % self.RECOVERY_ACTIONS
            raise exc.ESchema(message=message)

        if self.interval < cfg.CONF.health_check_interval_min:
            message = _("Specified interval of %(interval)d seconds has to be "
                        "larger than health_check_interval_min of "
                        "%(min_interval)d seconds set in configuration.") % {
                            "interval": self.interval,
                            "min_interval": cfg.CONF.health_check_interval_min
                        }
            raise exc.InvalidSpec(message=message)

        # check valid detection types
        polling_types = [
            consts.NODE_STATUS_POLLING, consts.NODE_STATUS_POLL_URL
        ]

        has_valid_polling_types = all(d.type in polling_types
                                      for d in self.detection_modes)
        has_valid_lifecycle_type = (len(self.detection_modes) == 1
                                    and self.detection_modes[0].type
                                    == consts.LIFECYCLE_EVENTS)

        if not has_valid_polling_types and not has_valid_lifecycle_type:
            message = ("Invalid detection modes in health policy: %s" %
                       ', '.join([d.type for d in self.detection_modes]))
            raise exc.InvalidSpec(message=message)

        if len(self.detection_modes) != len(set(self.detection_modes)):
            message = ("Duplicate detection modes are not allowed in "
                       "health policy: %s" %
                       ', '.join([d.type for d in self.detection_modes]))
            raise exc.InvalidSpec(message=message)

        # TODO(Qiming): Add detection of duplicated action names when
        # support to list of actions is implemented.

    def attach(self, cluster, enabled=True):
        """"Hook for policy attach.

        Register the cluster for health management.

        :param cluster: The cluster to which the policy is being attached to.
        :param enabled: The attached cluster policy is enabled or disabled.
        :return: A tuple comprising execution result and policy data.
        """
        p_type = cluster.rt['profile'].type_name
        action_names = [a['name'] for a in self.recover_actions]
        if p_type != 'os.nova.server':
            if consts.RECOVER_REBUILD in action_names:
                err_msg = _("Recovery action REBUILD is only applicable to "
                            "os.nova.server clusters.")
                return False, err_msg

            if consts.RECOVER_REBOOT in action_names:
                err_msg = _("Recovery action REBOOT is only applicable to "
                            "os.nova.server clusters.")
                return False, err_msg

        kwargs = {
            'interval': self.interval,
            'node_update_timeout': self.node_update_timeout,
            'params': {
                'recover_action': self.recover_actions,
                'node_delete_timeout': self.node_delete_timeout,
                'node_force_recreate': self.node_force_recreate,
                'recovery_conditional': self.recovery_conditional,
            },
            'enabled': enabled
        }

        converted_detection_modes = [d._asdict() for d in self.detection_modes]
        detection_mode = {'detection_modes': converted_detection_modes}
        kwargs['params'].update(detection_mode)

        health_manager.register(cluster.id, engine_id=None, **kwargs)

        data = {
            'interval': self.interval,
            'node_update_timeout': self.node_update_timeout,
            'recovery_conditional': self.recovery_conditional,
            'node_delete_timeout': self.node_delete_timeout,
            'node_force_recreate': self.node_force_recreate,
        }
        data.update(detection_mode)

        return True, self._build_policy_data(data)

    def detach(self, cluster):
        """Hook for policy detach.

        Unregister the cluster for health management.
        :param cluster: The target cluster.
        :returns: A tuple comprising the execution result and reason.
        """
        ret = health_manager.unregister(cluster.id)
        if not ret:
            LOG.warning(
                'Unregistering health manager for cluster %s '
                'timed out.', cluster.id)
        return True, ''

    def pre_op(self, cluster_id, action, **args):
        """Hook before action execution.

        One of the task for this routine is to disable health policy if the
        action is a request that will shrink the cluster. The reason is that
        the policy may attempt to recover nodes that are to be deleted.

        :param cluster_id: The ID of the target cluster.
        :param action: The action to be examined.
        :param kwargs args: Other keyword arguments to be checked.
        :returns: Boolean indicating whether the checking passed.
        """
        if action.action in (consts.CLUSTER_SCALE_IN, consts.CLUSTER_DEL_NODES,
                             consts.NODE_DELETE):
            health_manager.disable(cluster_id)
            return True

        if action.action == consts.CLUSTER_RESIZE:
            deletion = action.data.get('deletion', None)
            if deletion:
                health_manager.disable(cluster_id)
                return True

            cluster = action.entity
            current = len(cluster.nodes)
            res, reason = scaleutils.parse_resize_params(
                action, cluster, current)
            if res == base.CHECK_ERROR:
                action.data['status'] = base.CHECK_ERROR
                action.data['reason'] = reason
                return False

            if action.data.get('deletion', None):
                health_manager.disable(cluster_id)
                return True

        pd = {
            'recover_action': self.recover_actions,
            'fencing': self.fencing_types,
        }
        action.data.update({'health': pd})
        action.store(action.context)

        return True

    def post_op(self, cluster_id, action, **args):
        """Hook before action execution.

        One of the task for this routine is to re-enable health policy if the
        action is a request that will shrink the cluster thus the policy has
        been temporarily disabled.

        :param cluster_id: The ID of the target cluster.
        :param action: The action to be examined.
        :param kwargs args: Other keyword arguments to be checked.
        :returns: Boolean indicating whether the checking passed.
        """
        if action.action in (consts.CLUSTER_SCALE_IN, consts.CLUSTER_DEL_NODES,
                             consts.NODE_DELETE):
            health_manager.enable(cluster_id)
            return True

        if action.action == consts.CLUSTER_RESIZE:
            deletion = action.data.get('deletion', None)
            if deletion:
                health_manager.enable(cluster_id)
                return True

            cluster = action.entity
            current = len(cluster.nodes)
            res, reason = scaleutils.parse_resize_params(
                action, cluster, current)
            if res == base.CHECK_ERROR:
                action.data['status'] = base.CHECK_ERROR
                action.data['reason'] = reason
                return False

            if action.data.get('deletion', None):
                health_manager.enable(cluster_id)
                return True

        return True
Exemple #13
0
class RegionPlacementPolicy(base.Policy):
    """Policy for placing members of a cluster across multiple regions."""

    VERSION = '1.0'

    PRIORITY = 200

    TARGET = [
        ('BEFORE', consts.CLUSTER_SCALE_OUT),
        ('BEFORE', consts.CLUSTER_SCALE_IN),
        ('BEFORE', consts.CLUSTER_RESIZE),
    ]

    PROFILE_TYPE = ['ANY']

    KEYS = (REGIONS, ) = ('regions', )

    _AZ_KEYS = (
        REGION_NAME,
        REGION_WEIGHT,
        REGION_CAP,
    ) = (
        'name',
        'weight',
        'cap',
    )

    properties_schema = {
        REGIONS:
        schema.List(
            _('List of regions to choose from.'),
            schema=schema.Map(
                _('An region as a candidate.'),
                schema={
                    REGION_NAME:
                    schema.String(_('Name of a region.'), ),
                    REGION_WEIGHT:
                    schema.Integer(
                        _('Weight of the region. The default is 100.'),
                        default=100,
                    ),
                    REGION_CAP:
                    schema.Integer(
                        _('Maximum number of nodes in this region. The '
                          'default is -1 which means no cap set.'),
                        default=-1,
                    ),
                },
            ),
        ),
    }

    def __init__(self, name, spec, **kwargs):
        super(RegionPlacementPolicy, self).__init__(name, spec, **kwargs)

        self._keystoneclient = None
        regions = {}
        for r in self.properties.get(self.REGIONS):
            regions[r[self.REGION_NAME]] = {
                'weight': r[self.REGION_WEIGHT],
                'cap': r[self.REGION_CAP],
            }
        self.regions = regions

    def _keystone(self, obj):
        """Construct keystone client based on object.

        :param obj: Object for which the client is created. It is expected to
                    be None when retrieving an existing client. When creating
                    a client, it contains the user and project to be used.
        """
        if self._keystoneclient is not None:
            return self._keystoneclient
        params = self._build_conn_params(obj)
        self._keystoneclient = driver_base.SenlinDriver().identity(params)
        return self._keystoneclient

    def _create_plan(self, current, regions, count, expand):
        """Compute a placement plan based on the weights of regions.

        :param current: Distribution of existing nodes.
        :param regions: Usable regions for node creation.
        :param count: Number of nodes to create/delete in this plan.
        :param expand: True if the plan is for inflating the cluster, False
                       otherwise.

        :returns: A list of region names selected for the nodes.
        """
        # sort candidate regions by distribution and covert it into a list
        candidates = sorted(regions.items(),
                            key=lambda x: x[1]['weight'],
                            reverse=expand)
        sum_weight = sum(r['weight'] for r in regions.values())
        if expand:
            total = count + sum(current.values())
        else:
            total = sum(current.values()) - count
        remain = count
        plan = dict.fromkeys(regions.keys(), 0)

        for i in range(len(candidates)):
            region = candidates[i]
            r_name = region[0]
            r_weight = region[1]['weight']
            r_cap = region[1]['cap']

            # maximum number of nodes on current region
            q = total * r_weight / float(sum_weight)
            if expand:
                quota = int(math.ceil(q))
                # respect the cap setting, if any
                if r_cap >= 0:
                    quota = min(quota, r_cap)
                headroom = quota - current[r_name]
            else:
                quota = int(math.floor(q))
                headroom = current[r_name] - quota

            if headroom <= 0:
                continue

            if headroom < remain:
                plan[r_name] = headroom
                remain -= headroom
            else:
                plan[r_name] = remain if remain > 0 else 0
                remain = 0
                break

        # we have leftovers
        if remain > 0:
            return None

        result = {}
        for reg, count in plan.items():
            if count > 0:
                result[reg] = count

        return result

    def _get_count(self, cluster_id, action):
        """Get number of nodes to create or delete.

        :param cluster_id: The ID of the target cluster.
        :param action: The action object which triggered this policy check.
        :return: An integer value which can be 1) positive - number of nodes
                 to create; 2) negative - number of nodes to delete; 3) 0 -
                 something wrong happened, and the policy check failed.
        """
        if action.action == consts.CLUSTER_RESIZE:
            if action.data.get('deletion', None):
                return -action.data['deletion']['count']
            elif action.data.get('creation', None):
                return action.data['creation']['count']

            db_cluster = co.Cluster.get(action.context, cluster_id)
            res = scaleutils.parse_resize_params(action, db_cluster)
            if res[0] == base.CHECK_ERROR:
                action.data['status'] = base.CHECK_ERROR
                action.data['reason'] = res[1]
                LOG.error(res[1])
                return 0

            if action.data.get('deletion', None):
                return -action.data['deletion']['count']
            else:
                return action.data['creation']['count']

        if action.action == consts.CLUSTER_SCALE_IN:
            pd = action.data.get('deletion', None)
            if pd is None:
                return -action.inputs.get('count', 1)
            else:
                return -pd.get('count', 1)

        # CLUSTER_SCALE_OUT: an action that inflates the cluster
        pd = action.data.get('creation', None)
        if pd is None:
            return action.inputs.get('count', 1)
        else:
            return pd.get('count', 1)

    def pre_op(self, cluster_id, action):
        """Callback function when cluster membership is about to change.

        :param cluster_id: ID of the target cluster.
        :param action: The action that triggers this policy check.
        :returns: ``None``.
        """
        count = self._get_count(cluster_id, action)
        if count == 0:
            return

        expand = True
        if count < 0:
            expand = False
            count = -count

        cluster = cm.Cluster.load(action.context, cluster_id)

        kc = self._keystone(cluster)

        regions_good = kc.validate_regions(self.regions.keys())
        if len(regions_good) == 0:
            action.data['status'] = base.CHECK_ERROR
            action.data['reason'] = _('No region is found usable.')
            LOG.error(_LE('No region is found usable.'))
            return

        regions = {}
        for r in self.regions.items():
            if r[0] in regions_good:
                regions[r[0]] = r[1]

        current_dist = cluster.get_region_distribution(regions_good)
        result = self._create_plan(current_dist, regions, count, expand)
        if not result:
            action.data['status'] = base.CHECK_ERROR
            action.data['reason'] = _('There is no feasible plan to '
                                      'handle all nodes.')
            LOG.error(_LE('There is no feasible plan to handle all nodes.'))
            return

        if expand:
            if 'creation' not in action.data:
                action.data['creation'] = {}
            action.data['creation']['count'] = count
            action.data['creation']['regions'] = result
        else:
            if 'deletion' not in action.data:
                action.data['deletion'] = {}
            action.data['deletion']['count'] = count
            action.data['deletion']['regions'] = result
Exemple #14
0
class ServerProfile(base.Profile):
    '''Profile for an OpenStack Nova server.'''

    KEYS = (
        CONTEXT,
        ADMIN_PASS,
        AUTO_DISK_CONFIG,
        AVAILABILITY_ZONE,
        BLOCK_DEVICE_MAPPING,  # BLOCK_DEVICE_MAPPING_V2,
        CONFIG_DRIVE,
        FLAVOR,
        IMAGE,
        KEY_NAME,
        METADATA,
        NAME,
        NETWORKS,
        PERSONALITY,
        SECURITY_GROUPS,
        TIMEOUT,
        USER_DATA,
        SCHEDULER_HINTS,
    ) = (
        'context',
        'adminPass',
        'auto_disk_config',
        'availability_zone',
        'block_device_mapping',
        # 'block_device_mapping_v2',
        'config_drive',
        'flavor',
        'image',
        'key_name',
        'metadata',
        'name',
        'networks',
        'personality',
        'security_groups',
        'timeout',
        'user_data',
        'scheduler_hints',
    )

    BDM_KEYS = (
        BDM_DEVICE_NAME,
        BDM_VOLUME_SIZE,
    ) = (
        'device_name',
        'volume_size',
    )

    NETWORK_KEYS = (
        PORT,
        FIXED_IP,
        NETWORK,
    ) = (
        'port',
        'fixed-ip',
        'network',
    )

    PERSONALITY_KEYS = (
        PATH,
        CONTENTS,
    ) = (
        'path',
        'contents',
    )

    SCHEDULER_HINTS_KEYS = (GROUP, ) = ('group', )

    properties_schema = {
        CONTEXT:
        schema.Map(_('Customized security context for operating servers.'), ),
        ADMIN_PASS:
        schema.String(_('Password for the administrator account.'), ),
        AUTO_DISK_CONFIG:
        schema.Boolean(
            _('Whether the disk partition is done automatically.'),
            default=True,
        ),
        AVAILABILITY_ZONE:
        schema.String(
            _('Name of availability zone for running the server.'), ),
        BLOCK_DEVICE_MAPPING:
        schema.List(
            _('A list specifying the properties of block devices to be used '
              'for this server.'),
            schema=schema.Map(
                _('A map specifying the properties of a block device to be '
                  'used by the server.'),
                schema={
                    BDM_DEVICE_NAME:
                    schema.String(
                        _('Block device name, should be <=255 chars.'), ),
                    BDM_VOLUME_SIZE:
                    schema.Integer(_('Block device size in GB.'), ),
                }),
        ),
        CONFIG_DRIVE:
        schema.Boolean(
            _('Whether config drive should be enabled for the server.'), ),
        FLAVOR:
        schema.String(
            _('ID of flavor used for the server.'),
            required=True,
        ),
        IMAGE:
        schema.String(
            # IMAGE is not required, because there could be BDM or BDMv2
            # support and the corresponding settings effective
            _('ID of image to be used for the new server.'), ),
        KEY_NAME:
        schema.String(_('Name of Nova keypair to be injected to server.'), ),
        METADATA:
        schema.Map(
            _('A collection of key/value pairs to be associated with the '
              'server created. Both key and value should be <=255 chars.'), ),
        NAME:
        schema.String(
            _('Name of the server.'),
            required=True,
        ),
        NETWORKS:
        schema.List(
            _('List of networks for the server.'),
            schema=schema.Map(
                _('A map specifying the properties of a network for uses.'),
                schema={
                    NETWORK:
                    schema.String(
                        _('Name or ID of network to create a port on.'), ),
                    PORT:
                    schema.String(_('Port ID to be used by the network.'), ),
                    FIXED_IP:
                    schema.String(_('Fixed IP to be used by the network.'), ),
                },
            ),
        ),
        PERSONALITY:
        schema.List(
            _('List of files to be injected into the server, where each.'),
            schema=schema.Map(
                _('A map specifying the path & contents for an injected '
                  'file.'),
                schema={
                    PATH:
                    schema.String(
                        _('In-instance path for the file to be injected.'),
                        required=True,
                    ),
                    CONTENTS:
                    schema.String(
                        _('Contents of the file to be injected.'),
                        required=True,
                    ),
                },
            ),
        ),
        SCHEDULER_HINTS:
        schema.Map(
            _('A collection of key/value pairs to be associated with the '
              'Scheduler hints. Both key and value should be <=255 chars.'), ),
        SECURITY_GROUPS:
        schema.List(
            _('List of security groups.'),
            schema=schema.String(
                _('Name of a security group'),
                required=True,
            ),
        ),
        TIMEOUT:
        schema.Integer(
            _('Time out threshold for server operations.'),
            default=120,
        ),
        USER_DATA:
        schema.String(_('User data to be exposed by the metadata server.'), ),
    }

    def __init__(self, type_name, name, **kwargs):
        super(ServerProfile, self).__init__(type_name, name, **kwargs)

        self._novaclient = None
        self._neutronclient = None
        self.server_id = None

    def validate(self):
        super(ServerProfile, self).validate()

        if self.properties[self.TIMEOUT] > cfg.CONF.default_action_timeout:
            suggest = cfg.CONF.default_action_timeout
            err = _("Value of the 'timeout' property must be lower than the "
                    "upper limit (%s).") % suggest
            raise exception.InvalidSpec(message=err)

    def nova(self, obj):
        '''Construct nova client based on object.

        :param obj: Object for which the client is created. It is expected to
                    be None when retrieving an existing client. When creating
                    a client, it contains the user and project to be used.
        '''

        if self._novaclient is not None:
            return self._novaclient
        params = self._build_conn_params(obj.user, obj.project)
        self._novaclient = driver_base.SenlinDriver().compute(params)
        return self._novaclient

    def neutron(self, obj):
        '''Construct neutron client based on object.

        :param obj: Object for which the client is created. It is expected to
                    be None when retrieving an existing client. When creating
                    a client, it contains the user and project to be used.
        '''

        if self._neutronclient is not None:
            return self._neutronclient
        params = self._build_conn_params(obj.user, obj.project)
        self._neutronclient = driver_base.SenlinDriver().network(params)
        return self._neutronclient

    def do_validate(self, obj):
        '''Validate if the spec has provided valid info for server creation.'''
        return True

    def do_create(self, obj):
        '''Create a server using the given profile.'''
        kwargs = {}
        for key in self.KEYS:
            # context is treated as connection parameters
            if key == self.CONTEXT:
                continue

            if self.properties[key] is not None:
                kwargs[key] = self.properties[key]

        name_or_id = self.properties[self.IMAGE]
        if name_or_id is not None:
            image = self.nova(obj).image_get_by_name(name_or_id)
            # wait for new version of openstacksdk to fix this
            kwargs.pop(self.IMAGE)
            kwargs['imageRef'] = image.id

        flavor_id = self.properties[self.FLAVOR]
        flavor = self.nova(obj).flavor_find(flavor_id, False)

        # wait for new verson of openstacksdk to fix this
        kwargs.pop(self.FLAVOR)
        kwargs['flavorRef'] = flavor.id

        if obj.name is not None:
            kwargs[self.NAME] = obj.name + '-' + utils.random_name(8)

        metadata = self.properties[self.METADATA] or {}
        if obj.cluster_id is not None:
            metadata['cluster'] = obj.cluster_id
        kwargs['metadata'] = metadata

        scheduler_hint = self.properties[self.SCHEDULER_HINTS]
        if scheduler_hint is not None:
            kwargs['scheduler_hints'] = scheduler_hint

        user_data = self.properties[self.USER_DATA]
        if user_data is not None:
            ud = encodeutils.safe_encode(user_data)
            kwargs['user_data'] = encodeutils.safe_decode(base64.b64encode(ud))

        networks = self.properties[self.NETWORKS]
        if networks is not None:
            for network in networks:
                net_name_id = network.get(self.NETWORK)
                if net_name_id:
                    res = self.neutron(obj).network_get(net_name_id)
                    network['uuid'] = res.id
                    del network[self.NETWORK]
                    if network['port'] is None:
                        del network['port']
                    if network['fixed-ip'] is None:
                        del network['fixed-ip']
            kwargs['networks'] = networks

        LOG.info('Creating server: %s' % kwargs)
        try:
            server = self.nova(obj).server_create(**kwargs)
            self.nova(obj).wait_for_server(server)
        except Exception as ex:
            LOG.exception(_('Failed in creating server: %s'),
                          six.text_type(ex))
            return False
        self.server_id = server.id

        return server.id

    def do_delete(self, obj):
        self.server_id = obj.physical_id

        if not obj.physical_id:
            return True

        try:
            self.nova(obj).server_delete(self.server_id)
            self.nova(obj).wait_for_server_delete(self.server_id)
        except Exception as ex:
            LOG.error('Error: %s' % six.text_type(ex))
            return False

        return True

    def do_update(self, obj, new_profile, **params):
        '''Perform update on the server.

        :param obj: the server to operate on
        :param new_profile: the new profile for the server.
        :param params: a dictionary of optional parameters.
        '''
        self.server_id = obj.physical_id
        if not self.server_id:
            return True

        # TODO(anyone): Validate the new profile
        # TODO(anyone): Do update based on the fields provided.

        # self.nova(obj).server_update(**fields)
        return True

    def do_check(self, obj):
        # TODO(anyone): Check server status
        return True

    def do_get_details(self, obj):
        if obj.physical_id is None or obj.physical_id == '':
            return {}

        server = self.nova(obj).server_get(obj.physical_id)
        if server is None:
            return {}
        details = {
            'id': server.id,
            'name': server.name,
            'access_ipv4': server.access_ipv4,
            'access_ipv6': server.access_ipv6,
            'addresses': server.addresses,
            'created_at': server.created_at,
            'flavor': server.flavor,
            'host_id': server.host_id,
            'image': server.image,
            'links': server.links,
            'metadata': server.metadata,
            'progress': server.progress,
            'project_id': server.project_id,
            'status': server.status,
            'updated_at': server.updated_at,
            'user_id': server.user_id,
        }

        return details

    def do_join(self, obj, cluster_id):
        if not obj.physical_id:
            return {}

        metadata = self.nova(obj).server_metadata_get(
            server_id=obj.physical_id) or {}
        metadata['cluster'] = cluster_id
        return self.nova(obj).server_metadata_update(**metadata)

    def do_leave(self, obj):
        if not obj.physical_id:
            return

        metadata = self.nova(obj).server_metadata_get(
            server_id=obj.physical_id) or {}
        if 'cluster' in metadata:
            del metadata['cluster']
        return self.nova(obj).server_metadata_update(**metadata)
Exemple #15
0
class HealthPolicy(base.Policy):
    '''Policy for health management of a cluster.'''

    VERSION = '1.0'

    PRIORITY = 600

    TARGET = [
        ('BEFORE', consts.CLUSTER_CHECK),
        ('BEFORE', consts.CLUSTER_RECOVER),
    ]

    # Should be ANY if profile provides health check support?
    PROFILE_TYPE = [
        'os.nova.server',
        'os.heat.stack',
    ]

    KEYS = (DETECTION, RECOVERY) = ('detection', 'recovery')

    _DETECTION_KEYS = (
        DETECTION_TYPE,
        DETECTION_OPTIONS,
    ) = ('type', 'options')

    DETECTION_TYPES = (
        VM_LIFECYCLE_EVENTS,
        NODE_STATUS_POLLING,
        LB_STATUS_POLLING,
    ) = (
        'VM_LIFECYCLE_EVENTS',
        'NODE_STATUS_POLLING',
        'LB_STATUS_POLLING',
    )

    _DETECTION_OPTIONS = (DETECTION_INTERVAL, ) = ('interval', )

    _RECOVERY_KEYS = (RECOVERY_ACTIONS, RECOVERY_FENCING) = ('actions',
                                                             'fencing')

    RECOVERY_ACTION_VALUES = (REBOOT, REBUILD, MIGRATE, EVACUATE, RECREATE,
                              NOP) = (
                                  'REBOOT',
                                  'REBUILD',
                                  'MIGRATE',
                                  'EVACUATE',
                                  'RECREATE',
                                  'NOP',
                              )

    FENCING_OPTION_VALUES = (
        COMPUTE,
        STORAGE,
        NETWORK,
    ) = ('COMPUTE', 'STORAGE', 'NETWORK')

    properties_schema = {
        DETECTION:
        schema.Map(
            _('Policy aspect for node failure detection.'),
            schema={
                DETECTION_TYPE:
                schema.String(
                    _('Type of node failure detection.'),
                    constraints=[
                        constraints.AllowedValues(DETECTION_TYPES),
                    ],
                    required=True,
                ),
                DETECTION_OPTIONS:
                schema.Map(
                    schema={
                        DETECTION_INTERVAL:
                        schema.Integer(
                            _("Number of seconds between pollings. Only "
                              "required when type is 'NODE_STATUS_POLLING'."),
                            default=60,
                        ),
                    }),
            },
            required=True,
        ),
        RECOVERY:
        schema.Map(
            _('Policy aspect for node failure recovery.'),
            schema={
                RECOVERY_ACTIONS:
                schema.List(
                    _('List of actions to try for node recovery.'),
                    schema=schema.String(
                        _('Action to try for node recovery.'),
                        constraints=[
                            constraints.AllowedValues(RECOVERY_ACTION_VALUES),
                        ]),
                ),
                RECOVERY_FENCING:
                schema.List(
                    _('List of services to be fenced.'),
                    schema=schema.String(
                        _('Service to be fenced.'),
                        constraints=[
                            constraints.AllowedValues(FENCING_OPTION_VALUES),
                        ],
                    ),
                ),
            }),
    }

    def __init__(self, name, spec, **kwargs):
        super(HealthPolicy, self).__init__(name, spec, **kwargs)

        self.check_type = self.properties[self.DETECTION][self.DETECTION_TYPE]
        options = self.properties[self.DETECTION][self.DETECTION_OPTIONS]
        self.interval = options[self.DETECTION_INTERVAL]
        recover_settings = self.properties[self.RECOVERY]
        self.recover_actions = recover_settings[self.RECOVERY_ACTIONS]

    def attach(self, cluster):
        """"Hook for policy attach.

        Register the cluster for health management.
        """

        kwargs = {
            'check_type': self.check_type,
            'interval': self.interval,
            'params': {},
        }

        health_manager.register(cluster.id, engine_id=None, **kwargs)

        data = {
            'check_type': self.check_type,
            'interval': self.interval,
        }

        return True, self._build_policy_data(data)

    def detach(self, cluster):
        '''Hook for policy detach.

        Unregister the cluster for health management.
        '''

        health_manager.unregister(cluster.id)
        return True, ''

    def pre_op(self, cluster_id, action, **args):
        # Ignore actions that are not required to be processed at this stage
        if action.action != consts.CLUSTER_RECOVER:
            return True

        pd = {
            'recover_action': self.recover_actions[0],
        }
        action.data.update({'health': pd})
        action.store(action.context)

        return True

    def post_op(self, cluster_id, action, **args):
        # Ignore irrelevant action here
        if action.action not in (consts.CLUSTER_CHECK, consts.CLUSTER_RECOVER):
            return True

        # TODO(anyone): subscribe to vm-lifecycle-events for the specified VM
        #               or add vm to the list of VM status polling
        return True
Exemple #16
0
class ThresholdAlarm(Alarm):

    rule_schema = {
        METER_NAME:
        schema.String(
            _('Name of a meter to evaluate against.'),
            required=True,
        ),
        OPERATOR:
        schema.String(
            _('Comparison operator for evaluation.'),
            constraints=[
                constraints.AllowedValues(OPERATOR_VALUES),
            ],
            default=OP_EQUAL,
        ),
        THRESHOLD:
        schema.Number(_('Threshold for evaluation.'), required=True),
        PERIOD:
        schema.Integer(
            _('Length of every evaluation period in seconds.'),
            default=60,
        ),
        EVALUATIONS:
        schema.Integer(
            _('Number of periods to evaluate over.'),
            default=1,
        ),
        STATISTIC:
        schema.String(
            _('Statistics to evaluate. Must be one of %s, default to "avg".') %
            list(STATISTIC_VALUES),
            constraints=[
                constraints.AllowedValues(STATISTIC_VALUES),
            ],
            default=SV_AVG,
        ),
        QUERY:
        schema.List(
            _('The query to find the dat afor computing statistics.'),
            schema=schema.Map(
                schema={
                    Q_FIELD:
                    schema.String(
                        _('A field of a meter to query.'),
                        required=True,
                    ),
                    Q_OP:
                    schema.String(
                        _('An operator for meter comparison.'),
                        default='==',
                    ),
                    Q_VALUE:
                    schema.String(
                        _('A value for comparison.'),
                        required=True,
                    )
                }),
        )
    }

    def __init__(self, name, spec, **kwargs):
        super(ThresholdAlarm, self).__init__(name, spec, **kwargs)
        rule_spec = spec.get('rule', {})
        self.rule = schema.Spec(self.rule_schema, rule_spec)
        self.namespace = 'threshold'
Exemple #17
0
class Alarm(base.Trigger):

    # time constraints
    alarm_schema = {
        REPEAT:
        schema.Boolean(
            _('Whether the actions should be re-triggered on each evaluation '
              'cycle. Default to False.'),
            default=False,
        ),
        TIME_CONSTRAINTS:
        schema.List(schema=schema.Map(
            _('A map of time constraint settings.'),
            schema={
                NAME:
                schema.String(_('Name of the time constraint.'), ),
                TC_DESCRIPTION:
                schema.String(_('A description of the time constraint.'), ),
                TC_START:
                schema.String(
                    _('Start point of the time constraint, expressed as a '
                      'string in cron expression format.'),
                    required=True,
                ),
                TC_DURATION:
                schema.Integer(
                    _('How long the constraint should last, in seconds.'),
                    required=True,
                ),
                TC_TIMEZONE:
                schema.String(
                    _('Time zone of the constraint.'),
                    default='',
                ),
            },
        ), )
    }

    def __init__(self, name, spec, **kwargs):
        super(Alarm, self).__init__(name, spec, **kwargs)

        self.alarm_properties = schema.Spec(self.alarm_schema, spec)
        self.namespace = 'default'
        self.rule = None

    def validate(self):
        # validate cron expression if specified
        if TIME_CONSTRAINTS in self.spec:
            tcs = self.alarm_properties[TIME_CONSTRAINTS]
            for tc in tcs:
                exp = tc.get(TC_START, '')
                try:
                    croniter.croniter(exp)
                except Exception as ex:
                    msg = _("Invalid cron expression specified for property "
                            "'%(property)s' (%(exp)s): %(ex)s") % {
                                'property': TC_START,
                                'exp': exp,
                                'ex': six.text_type(ex)
                            }
                    raise exc.InvalidSpec(message=msg)

                tz = tc.get(TC_TIMEZONE, '')
                try:
                    pytz.timezone(tz)
                except Exception as ex:
                    msg = _("Invalid timezone value specified for property "
                            "'%(property)s' (%(tz)s): %(ex)s") % {
                                'property': TC_TIMEZONE,
                                'tz': tz,
                                'ex': six.text_type(ex)
                            }
                    raise exc.InvalidSpec(message=msg)

    def create(self, ctx, **kwargs):
        """Create an alarm for a cluster.

        :param name: The name for the alarm.
        :param urls: A list of URLs for webhooks to be triggered.
        :returns: A dict containing properties of the alarm.
        """
        self.ok_actions = kwargs.get(OK_ACTIONS, [])
        self.alarm_actions = kwargs.get(ALARM_ACTIONS, [])
        self.insufficient_data_actions = kwargs.get(INSUFFICIENT_DATA_ACTIONS,
                                                    [])

        rule_name = self.namespace + '_rule'
        rule_data = dict((k, v) for k, v in self.rule.items())
        params = {
            NAME: self.name,
            DESCRIPTION: self.desc,
            TYPE: self.namespace,
            STATE: self.state,
            SEVERITY: self.severity,
            ENABLED: self.enabled,
            OK_ACTIONS: self.ok_actions,
            ALARM_ACTIONS: self.alarm_actions,
            INSUFFICIENT_DATA_ACTIONS: self.insufficient_data_actions,
            TIME_CONSTRAINTS: self.alarm_properties[TIME_CONSTRAINTS],
            REPEAT: self.alarm_properties[REPEAT],
            rule_name: rule_data,
        }

        try:
            cc = driver_base.SenlinDriver().telemetry(ctx.to_dict())
            alarm = cc.alarm_create(**params)
            self.physical_id = alarm.id
            self.store(ctx)
            return True, alarm.to_dict()
        except exc.SenlinException as ex:
            return False, six.text_type(ex)

    def delete(self, ctx, identifier):
        """Delete an alarm.

        :param identifier: This must be an alarm ID.
        """
        try:
            cc = driver_base.SenlinDriver().telemetry(ctx)
            res = cc.alarm_delete(identifier, True)
            return True, res
        except exc.InternalError as ex:
            return False, six.text_type(ex)

    def update(self, identifier, values):
        return NotImplemented
Exemple #18
0
class HealthPolicy(base.Policy):
    """Policy for health management of a cluster."""

    VERSION = '1.0'

    PRIORITY = 600

    TARGET = [
        ('BEFORE', consts.CLUSTER_CHECK),
        ('BEFORE', consts.CLUSTER_DEL_NODES),
        ('BEFORE', consts.CLUSTER_RECOVER),
        ('BEFORE', consts.CLUSTER_RESIZE),
        ('BEFORE', consts.CLUSTER_SCALE_IN),
        ('BEFORE', consts.NODE_DELETE),
        ('AFTER', consts.CLUSTER_DEL_NODES),
        ('AFTER', consts.CLUSTER_SCALE_IN),
        ('AFTER', consts.CLUSTER_RESIZE),
        ('AFTER', consts.NODE_DELETE),
    ]

    # Should be ANY if profile provides health check support?
    PROFILE_TYPE = [
        'os.nova.server',
        'os.heat.stack',
    ]

    KEYS = (DETECTION, RECOVERY) = ('detection', 'recovery')

    _DETECTION_KEYS = (
        DETECTION_TYPE,
        DETECTION_OPTIONS,
    ) = ('type', 'options')

    _DETECTION_OPTIONS = (DETECTION_INTERVAL, ) = ('interval', )

    _RECOVERY_KEYS = (RECOVERY_ACTIONS, RECOVERY_FENCING) = ('actions',
                                                             'fencing')

    RECOVERY_ACTION_VALUES = (
        REBUILD,
        RECREATE,
        # REBOOT, MIGRATE, EVACUATE,
    ) = (
        "REBUILD",
        "RECREATE",
        # 'REBOOT', 'MIGRATE', 'EVACUATE',
    )

    FENCING_OPTION_VALUES = (
        COMPUTE,
        # STORAGE, NETWORK,
    ) = (
        'COMPUTE',
        # 'STORAGE', 'NETWORK'
    )

    properties_schema = {
        DETECTION:
        schema.Map(
            _('Policy aspect for node failure detection.'),
            schema={
                DETECTION_TYPE:
                schema.String(
                    _('Type of node failure detection.'),
                    constraints=[
                        constraints.AllowedValues(consts.DETECTION_TYPES),
                    ],
                    required=True,
                ),
                DETECTION_OPTIONS:
                schema.Map(
                    schema={
                        DETECTION_INTERVAL:
                        schema.Integer(
                            _("Number of seconds between pollings. Only "
                              "required when type is 'NODE_STATUS_POLLING'."),
                            default=60,
                        ),
                    }),
            },
            required=True,
        ),
        RECOVERY:
        schema.Map(
            _('Policy aspect for node failure recovery.'),
            schema={
                RECOVERY_ACTIONS:
                schema.List(
                    _('List of actions to try for node recovery.'),
                    schema=schema.String(
                        _('Action to try for node recovery.'),
                        constraints=[
                            constraints.AllowedValues(RECOVERY_ACTION_VALUES),
                        ]),
                ),
                RECOVERY_FENCING:
                schema.List(
                    _('List of services to be fenced.'),
                    schema=schema.String(
                        _('Service to be fenced.'),
                        constraints=[
                            constraints.AllowedValues(FENCING_OPTION_VALUES),
                        ],
                    ),
                ),
            }),
    }

    def __init__(self, name, spec, **kwargs):
        super(HealthPolicy, self).__init__(name, spec, **kwargs)

        self.check_type = self.properties[self.DETECTION][self.DETECTION_TYPE]
        options = self.properties[self.DETECTION][self.DETECTION_OPTIONS]
        self.interval = options[self.DETECTION_INTERVAL]
        recover_settings = self.properties[self.RECOVERY]
        self.recover_actions = recover_settings[self.RECOVERY_ACTIONS]
        self.fencing_types = recover_settings[self.RECOVERY_FENCING]

    def attach(self, cluster):
        """"Hook for policy attach.

        Register the cluster for health management.

        :param cluster: The target cluster.
        :return: A tuple comprising execution result and policy data.
        """
        kwargs = {
            'check_type': self.check_type,
            'interval': self.interval,
            'params': {},
        }

        health_manager.register(cluster.id, engine_id=None, **kwargs)

        data = {
            'check_type': self.check_type,
            'interval': self.interval,
        }

        return True, self._build_policy_data(data)

    def detach(self, cluster):
        """Hook for policy detach.

        Unregister the cluster for health management.
        :param cluster: The target cluster.
        :returns: A tuple comprising the execution result and reason.
        """
        health_manager.unregister(cluster.id)
        return True, ''

    def pre_op(self, cluster_id, action, **args):
        """Hook before action execution.

        One of the task for this routine is to disable health policy if the
        action is a request that will shrink the cluster. The reason is that
        the policy may attempt to recover nodes that are to be deleted.

        :param cluster_id: The ID of the target cluster.
        :param action: The action to be examined.
        :param kwargs args: Other keyword arguments to be checked.
        :returns: Boolean indicating whether the checking passed.
        """
        if action.action in (consts.CLUSTER_SCALE_IN, consts.CLUSTER_DEL_NODES,
                             consts.NODE_DELETE):
            health_manager.disable(cluster_id)
            return True

        if action.action == consts.CLUSTER_RESIZE:
            deletion = action.data.get('deletion', None)
            if deletion:
                health_manager.disable(cluster_id)
                return True

            db_cluster = co.Cluster.get(action.context, cluster_id)
            current = no.Node.count_by_cluster(action.context, cluster_id)
            res, reason = scaleutils.parse_resize_params(
                action, db_cluster, current)
            if res == base.CHECK_ERROR:
                action.data['status'] = base.CHECK_ERROR
                action.data['reason'] = reason
                return False

            if action.data.get('deletion', None):
                health_manager.disable(cluster_id)
                return True

        pd = {
            'recover_action': self.recover_actions,
            'fencing': self.fencing_types,
        }
        action.data.update({'health': pd})
        action.store(action.context)

        return True

    def post_op(self, cluster_id, action, **args):
        """Hook before action execution.

        One of the task for this routine is to re-enable health policy if the
        action is a request that will shrink the cluster thus the policy has
        been temporarily disabled.

        :param cluster_id: The ID of the target cluster.
        :param action: The action to be examined.
        :param kwargs args: Other keyword arguments to be checked.
        :returns: Boolean indicating whether the checking passed.
        """
        if action.action in (consts.CLUSTER_SCALE_IN, consts.CLUSTER_DEL_NODES,
                             consts.NODE_DELETE):
            health_manager.enable(cluster_id)
            return True

        if action.action == consts.CLUSTER_RESIZE:
            deletion = action.data.get('deletion', None)
            if deletion:
                health_manager.enable(cluster_id)
                return True

            db_cluster = co.Cluster.get(action.context, cluster_id)
            current = no.Node.count_by_cluster(action.context, cluster_id)
            res, reason = scaleutils.parse_resize_params(
                action, db_cluster, current)
            if res == base.CHECK_ERROR:
                action.data['status'] = base.CHECK_ERROR
                action.data['reason'] = reason
                return False

            if action.data.get('deletion', None):
                health_manager.enable(cluster_id)
                return True

        return True
Exemple #19
0
class HealthPolicy(base.Policy):
    '''Policy for health checking for members of a cluster.'''

    VERSION = '1.0'

    TARGET = [
        ('AFTER', consts.CLUSTER_ADD_NODES),
        ('AFTER', consts.CLUSTER_SCALE_OUT),
        ('BEFORE', consts.CLUSTER_DEL_NODES),
        ('BEFORE', consts.CLUSTER_SCALE_IN),
    ]

    # Should be ANY if profile provides health check support?
    PROFILE_TYPE = [
        'os.nova.server',
        'os.heat.stack',
        'AWS.AutoScaling.LaunchConfiguration',
    ]

    KEYS = (DETECTION, RECOVERY) = ('detection', 'recovery')

    _DETECTION_KEYS = (
        DETECTION_TYPE,
        DETECTION_OPTIONS,
    ) = ('type', 'options')

    DETECTION_TYPES = (
        VM_LIFECYCLE_EVENTS,
        NODE_STATUS_POLLING,
        LB_STATUS_POLLING,
    ) = (
        'VM_LIFECYCLE_EVENTS',
        'NODE_STATUS_POLLING',
        'LB_STATUS_POLLING',
    )

    _DETECTION_OPTIONS = (DETECTION_INTERVAL, ) = ('interval', )

    _RECOVERY_KEYS = (RECOVERY_ACTIONS, RECOVERY_FENCING) = ('actions',
                                                             'fencing')

    RECOVERY_ACTION_VALUES = (REBOOT, REBUILD, MIGRATE, EVACUATE, RECREATE,
                              NOP) = (
                                  'REBOOT',
                                  'REBUILD',
                                  'MIGRATE',
                                  'EVACUATE',
                                  'RECREATE',
                                  'NOP',
                              )

    FENCING_OPTION_VALUES = (
        COMPUTE,
        STORAGE,
        NETWORK,
    ) = ('COMPUTE', 'STORAGE', 'NETWORK')

    properties_schema = {
        DETECTION:
        schema.Map(
            _('Policy aspect for node failure detection.'),
            schema={
                DETECTION_TYPE:
                schema.String(
                    _('Type of node failure detection.'),
                    constraints=[
                        constraints.AllowedValues(DETECTION_TYPES),
                    ],
                    required=True,
                ),
                DETECTION_OPTIONS:
                schema.Map(
                    schema={
                        DETECTION_INTERVAL:
                        schema.Integer(
                            _("Number of seconds between pollings. Only "
                              "required when type is 'NODE_STATUS_POLLING'."),
                            default=60,
                        ),
                    }),
            },
            required=True,
        ),
        RECOVERY:
        schema.Map(
            _('Policy aspect for node failure recovery.'),
            schema={
                RECOVERY_ACTIONS:
                schema.List(
                    _('List of actions to try for node recovery.'),
                    schema=schema.String(
                        _('Action to try for node recovery.'),
                        constraints=[
                            constraints.AllowedValues(RECOVERY_ACTION_VALUES),
                        ]),
                ),
                RECOVERY_FENCING:
                schema.List(
                    _('List of services to be fenced.'),
                    schema=schema.String(
                        _('Service to be fenced.'),
                        constraints=[
                            constraints.AllowedValues(FENCING_OPTION_VALUES),
                        ],
                    ),
                ),
            }),
    }

    def __init__(self, name, spec, **kwargs):
        super(HealthPolicy, self).__init__(name, spec, kwargs)

        self.check_type = self.properties[self.DETECTION][self.DETECTION_TYPE]
        options = self.properties[self.DETECTION][self.DETECTION_OPTIONS]
        self.interval = options[self.DETECTION_INTERVAL]

    def attach(self, cluster):
        '''Hook for policy attach.

        Initialize the health check mechanism for existing nodes in cluster.
        '''
        data = {
            'type': self.check_type,
            'interval': self.interval,
            'counter': 0,
        }

        # TODO(anyone): register cluster for periodic checking
        return True, self._build_policy_data(data)

    def detach(self, cluster):
        '''Hook for policy detach.

        Deinitialize the health check mechanism (for the cluster).
        '''
        # TODO(anyone): deregister cluster from periodic checking
        return True, ''

    def pre_op(self, cluster_id, action, **args):
        # Ignore actions that are not required to be processed at this stage
        if action not in (consts.CLUSTER_SCALE_IN, consts.CLUSTER_DEL_NODES):
            return True

        # TODO(anyone): Unsubscribe nodes from backend health monitoring
        #               infrastructure
        return True

    def post_op(self, cluster_id, action, **args):
        # Ignore irrelevant action here
        if action not in (consts.CLUSTER_SCALE_OUT, consts.CLUSTER_ADD_NODES):
            return True

        # TODO(anyone): subscribe to vm-lifecycle-events for the specified VM
        #               or add vm to the list of VM status polling
        return True

    @periodic_task.periodic_task
    def health_check(self):
        if not self.detect_enabled:
            return

        if (self.detect_counter < self.detect_interval):
            self.detect_counter += 1
            return
        self.detect_counter = 0

        failures = 0
        for n in self.rt['nodes']:
            if (n.rt.profile.do_check(n)):
                continue

            failures += 1

        # TODO(Anyone): How to enforce the HA policy?
        pass
    def test_basic(self):
        sot = schema.List('desc')

        self.assertEqual('List', sot['type'])
        self.assertEqual('desc', sot['description'])
Exemple #21
0
class ZonePlacementPolicy(base.Policy):
    """Policy for placing members of a cluster across availability zones."""

    VERSION = '1.0'

    PRIORITY = 300

    TARGET = [
        ('BEFORE', consts.CLUSTER_SCALE_OUT),
        ('BEFORE', consts.CLUSTER_SCALE_IN),
        ('BEFORE', consts.CLUSTER_RESIZE),
    ]

    PROFILE_TYPE = [
        'os.nova.server-1.0',
    ]

    KEYS = (
        ZONES,
    ) = (
        'zones',
    )

    _AZ_KEYS = (
        ZONE_NAME, ZONE_WEIGHT,
    ) = (
        'name', 'weight',
    )

    properties_schema = {
        ZONES: schema.List(
            _('List of availability zones to choose from.'),
            schema=schema.Map(
                _('An availability zone as candidate.'),
                schema={
                    ZONE_NAME: schema.String(
                        _('Name of an availability zone.'),
                    ),
                    ZONE_WEIGHT: schema.Integer(
                        _('Weight of the availability zone (default is 100).'),
                        default=100,
                        required=False,
                    )
                },
            ),
        ),
    }

    def __init__(self, name, spec, **kwargs):
        super(ZonePlacementPolicy, self).__init__(name, spec, **kwargs)

        self._novaclient = None
        self.zones = dict((z[self.ZONE_NAME], z[self.ZONE_WEIGHT])
                          for z in self.properties.get(self.ZONES))

    def _nova(self, obj):
        """Construct nova client based on object.

        :param obj: Object for which the client is created. It is expected to
                    be None when retrieving an existing client. When creating
                    a client, it contains the user and project to be used.
        """
        if self._novaclient is not None:
            return self._novaclient

        params = self._build_conn_params(obj)
        self._novaclient = driver_base.SenlinDriver().compute(params)
        return self._novaclient

    def _create_plan(self, current, zones, count, expand):
        """Compute a placement plan based on the weights of AZs.

        :param current: Distribution of existing nodes.
        :returns: A dict that contains a placement plan.
        """
        # sort candidate zones by distribution and covert it into a list
        candidates = sorted(zones.items(), key=operator.itemgetter(1),
                            reverse=expand)

        sum_weight = sum(zones.values())
        if expand:
            total = count + sum(current.values())
        else:
            total = sum(current.values()) - count

        remain = count
        plan = dict.fromkeys(zones.keys(), 0)

        for i in range(len(zones)):
            zone = candidates[i][0]
            weight = candidates[i][1]
            q = total * weight / float(sum_weight)
            if expand:
                quota = int(math.ceil(q))
                headroom = quota - current[zone]
            else:
                quota = int(math.floor(q))
                headroom = current[zone] - quota

            if headroom <= 0:
                continue

            if headroom < remain:
                plan[zone] = headroom
                remain -= headroom
            else:
                plan[zone] = remain if remain > 0 else 0
                remain = 0
                break

        if remain > 0:
            return None

        # filter out zero values
        result = {}
        for z, c in plan.items():
            if c > 0:
                result[z] = c

        return result

    def _get_count(self, cluster_id, action):
        """Get number of nodes to create or delete.

        :param cluster_id: The ID of the target cluster.
        :param action: The action object which triggered this policy check.
        :return: An integer value which can be 1) positive - number of nodes
                 to create; 2) negative - number of nodes to delete; 3) 0 -
                 something wrong happened, and the policy check failed.
        """
        if action.action == consts.CLUSTER_RESIZE:
            if action.data.get('deletion', None):
                return -action.data['deletion']['count']
            elif action.data.get('creation', None):
                return action.data['creation']['count']

            db_cluster = db_api.cluster_get(action.context, cluster_id)
            res = scaleutils.parse_resize_params(action, db_cluster)
            if res[0] == base.CHECK_ERROR:
                action.data['status'] = base.CHECK_ERROR
                action.data['reason'] = res[1]
                LOG.error(res[1])
                return 0

            if action.data.get('deletion', None):
                return -action.data['deletion']['count']
            else:
                return action.data['creation']['count']

        if action.action == consts.CLUSTER_SCALE_IN:
            pd = action.data.get('deletion', None)
            if pd is None:
                return -action.inputs.get('count', 1)
            else:
                return -pd.get('count', 1)

        # CLUSTER_SCALE_OUT: an action that inflates the cluster
        pd = action.data.get('creation', None)
        if pd is None:
            return action.inputs.get('count', 1)
        else:
            return pd.get('count', 1)

    def pre_op(self, cluster_id, action):
        """Callback function when cluster membership is about to change.

        :param cluster_id: ID of the target cluster.
        :param action: The action that triggers this policy check.
        """
        count = self._get_count(cluster_id, action)
        if count == 0:
            return

        expand = True
        if count < 0:
            expand = False
            count = -count

        cluster = cluster_mod.Cluster.load(action.context, cluster_id)

        nc = self._nova(cluster)
        zones_good = nc.validate_azs(self.zones.keys())
        if len(zones_good) == 0:
            action.data['status'] = base.CHECK_ERROR
            action.data['reason'] = _('No availability zone found available.')
            LOG.error(_LE('No availability zone found available.'))
            return

        zones = {}
        for z, w in self.zones.items():
            if z in zones_good:
                zones[z] = w

        current = cluster.get_zone_distribution(action.context, zones.keys())
        result = self._create_plan(current, zones, count, expand)

        if not result:
            action.data['status'] = base.CHECK_ERROR
            action.data['reason'] = _('There is no feasible plan to '
                                      'handle all nodes.')
            LOG.error(_LE('There is no feasible plan to handle all nodes.'))
            return

        if expand:
            if 'creation' not in action.data:
                action.data['creation'] = {}
            action.data['creation']['count'] = count
            action.data['creation']['zones'] = result
        else:
            if 'deletion' not in action.data:
                action.data['deletion'] = {}
            action.data['deletion']['count'] = count
            action.data['deletion']['zones'] = result
    def test__get_children(self):
        sot = schema.List('desc', schema=schema.String())

        res = sot._get_children(['v1', 'v2'], [0, 1])
        self.assertEqual(['v1', 'v2'], list(res))
Exemple #23
0
class ServerProfile(base.Profile):
    """Profile for an OpenStack Nova server."""

    KEYS = (
        CONTEXT,
        ADMIN_PASS,
        AUTO_DISK_CONFIG,
        AVAILABILITY_ZONE,
        BLOCK_DEVICE_MAPPING_V2,
        CONFIG_DRIVE,
        FLAVOR,
        IMAGE,
        KEY_NAME,
        METADATA,
        NAME,
        NETWORKS,
        PERSONALITY,
        SECURITY_GROUPS,
        USER_DATA,
        SCHEDULER_HINTS,
    ) = (
        'context',
        'admin_pass',
        'auto_disk_config',
        'availability_zone',
        'block_device_mapping_v2',
        'config_drive',
        'flavor',
        'image',
        'key_name',
        'metadata',
        'name',
        'networks',
        'personality',
        'security_groups',
        'user_data',
        'scheduler_hints',
    )

    BDM2_KEYS = (
        BDM2_UUID,
        BDM2_SOURCE_TYPE,
        BDM2_DESTINATION_TYPE,
        BDM2_DISK_BUS,
        BDM2_DEVICE_NAME,
        BDM2_VOLUME_SIZE,
        BDM2_GUEST_FORMAT,
        BDM2_BOOT_INDEX,
        BDM2_DEVICE_TYPE,
        BDM2_DELETE_ON_TERMINATION,
    ) = (
        'uuid',
        'source_type',
        'destination_type',
        'disk_bus',
        'device_name',
        'volume_size',
        'guest_format',
        'boot_index',
        'device_type',
        'delete_on_termination',
    )

    NETWORK_KEYS = (
        PORT,
        FIXED_IP,
        NETWORK,
    ) = (
        'port',
        'fixed_ip',
        'network',
    )

    PERSONALITY_KEYS = (
        PATH,
        CONTENTS,
    ) = (
        'path',
        'contents',
    )

    SCHEDULER_HINTS_KEYS = (GROUP, ) = ('group', )

    properties_schema = {
        CONTEXT:
        schema.Map(_('Customized security context for operating servers.'), ),
        ADMIN_PASS:
        schema.String(_('Password for the administrator account.'), ),
        AUTO_DISK_CONFIG:
        schema.Boolean(
            _('Whether the disk partition is done automatically.'),
            default=True,
        ),
        AVAILABILITY_ZONE:
        schema.String(
            _('Name of availability zone for running the server.'), ),
        BLOCK_DEVICE_MAPPING_V2:
        schema.List(
            _('A list specifying the properties of block devices to be used '
              'for this server.'),
            schema=schema.Map(
                _('A map specifying the properties of a block device to be '
                  'used by the server.'),
                schema={
                    BDM2_UUID:
                    schema.String(
                        _('ID of the source image, snapshot or volume'), ),
                    BDM2_SOURCE_TYPE:
                    schema.String(
                        _('Volume source type, should be image, snapshot, '
                          'volume or blank'),
                        required=True,
                    ),
                    BDM2_DESTINATION_TYPE:
                    schema.String(
                        _('Volume destination type, should be volume or '
                          'local'),
                        required=True,
                    ),
                    BDM2_DISK_BUS:
                    schema.String(_('Bus of the device.'), ),
                    BDM2_DEVICE_NAME:
                    schema.String(
                        _('Name of the device(e.g. vda, xda, ....).'), ),
                    BDM2_VOLUME_SIZE:
                    schema.Integer(
                        _('Size of the block device in MB(for swap) and '
                          'in GB(for other formats)'),
                        required=True,
                    ),
                    BDM2_GUEST_FORMAT:
                    schema.String(
                        _('Specifies the disk file system format(e.g. swap, '
                          'ephemeral, ...).'), ),
                    BDM2_BOOT_INDEX:
                    schema.Integer(_('Define the boot order of the device'), ),
                    BDM2_DEVICE_TYPE:
                    schema.String(
                        _('Type of the device(e.g. disk, cdrom, ...).'), ),
                    BDM2_DELETE_ON_TERMINATION:
                    schema.Boolean(
                        _('Whether to delete the volume when the server '
                          'stops.'), ),
                }),
        ),
        CONFIG_DRIVE:
        schema.Boolean(
            _('Whether config drive should be enabled for the server.'), ),
        FLAVOR:
        schema.String(
            _('ID of flavor used for the server.'),
            required=True,
            updatable=True,
        ),
        IMAGE:
        schema.String(
            # IMAGE is not required, because there could be BDM or BDMv2
            # support and the corresponding settings effective
            _('ID of image to be used for the new server.'),
            updatable=True,
        ),
        KEY_NAME:
        schema.String(_('Name of Nova keypair to be injected to server.'), ),
        METADATA:
        schema.Map(
            _('A collection of key/value pairs to be associated with the '
              'server created. Both key and value should be <=255 chars.'),
            updatable=True,
        ),
        NAME:
        schema.String(
            _('Name of the server. When omitted, the node name will be used.'),
            updatable=True,
        ),
        NETWORKS:
        schema.List(
            _('List of networks for the server.'),
            schema=schema.Map(
                _('A map specifying the properties of a network for uses.'),
                schema={
                    NETWORK:
                    schema.String(
                        _('Name or ID of network to create a port on.'), ),
                    PORT:
                    schema.String(_('Port ID to be used by the network.'), ),
                    FIXED_IP:
                    schema.String(_('Fixed IP to be used by the network.'), ),
                },
            ),
            updatable=True,
        ),
        PERSONALITY:
        schema.List(
            _('List of files to be injected into the server, where each.'),
            schema=schema.Map(
                _('A map specifying the path & contents for an injected '
                  'file.'),
                schema={
                    PATH:
                    schema.String(
                        _('In-instance path for the file to be injected.'),
                        required=True,
                    ),
                    CONTENTS:
                    schema.String(
                        _('Contents of the file to be injected.'),
                        required=True,
                    ),
                },
            ),
        ),
        SCHEDULER_HINTS:
        schema.Map(
            _('A collection of key/value pairs to be associated with the '
              'Scheduler hints. Both key and value should be <=255 chars.'), ),
        SECURITY_GROUPS:
        schema.List(
            _('List of security groups.'),
            schema=schema.String(
                _('Name of a security group'),
                required=True,
            ),
        ),
        USER_DATA:
        schema.String(_('User data to be exposed by the metadata server.'), ),
    }

    OP_NAMES = (
        OP_REBOOT,
        OP_CHANGE_PASSWORD,
    ) = (
        'reboot',
        'change_password',
    )

    REBOOT_TYPE = 'type'
    REBOOT_TYPES = (REBOOT_SOFT, REBOOT_HARD) = ('SOFT', 'HARD')
    ADMIN_PASSWORD = '******'

    OPERATIONS = {
        OP_REBOOT:
        schema.Operation(
            _("Reboot the nova server."),
            schema={
                REBOOT_TYPE:
                schema.StringParam(
                    _("Type of reboot which can be 'SOFT' or 'HARD'."),
                    default=REBOOT_SOFT,
                    constraints=[
                        constraints.AllowedValues(REBOOT_TYPES),
                    ])
            }),
        OP_CHANGE_PASSWORD:
        schema.Operation(_("Change the administrator password."),
                         schema={
                             ADMIN_PASSWORD:
                             schema.StringParam(
                                 _("New password for the administrator."))
                         }),
    }

    def __init__(self, type_name, name, **kwargs):
        super(ServerProfile, self).__init__(type_name, name, **kwargs)
        self.server_id = None

    def _validate_az(self, obj, az_name, reason=None):
        try:
            res = self.compute(obj).validate_azs([az_name])
        except exc.InternalError as ex:
            if reason == 'create':
                raise exc.EResourceCreation(type='server',
                                            message=six.text_type(ex))
            else:
                raise

        if not res:
            msg = _("The specified %(key)s '%(value)s' could not be found") % {
                'key': self.AVAILABILITY_ZONE,
                'value': az_name
            }
            if reason == 'create':
                raise exc.EResourceCreation(type='server', message=msg)
            else:
                raise exc.InvalidSpec(message=msg)

        return az_name

    def _validate_flavor(self, obj, name_or_id, reason=None):
        flavor = None
        msg = ''
        try:
            flavor = self.compute(obj).flavor_find(name_or_id, False)
        except exc.InternalError as ex:
            msg = six.text_type(ex)
            if reason is None:  # reaons is 'validate'
                if ex.code == 404:
                    msg = _(
                        "The specified %(k)s '%(v)s' could not be found.") % {
                            'k': self.FLAVOR,
                            'v': name_or_id
                        }
                    raise exc.InvalidSpec(message=msg)
                else:
                    raise

        if flavor is not None:
            if not flavor.is_disabled:
                return flavor
            msg = _("The specified %(k)s '%(v)s' is disabled") % {
                'k': self.FLAVOR,
                'v': name_or_id
            }

        if reason == 'create':
            raise exc.EResourceCreation(type='server', message=msg)
        elif reason == 'update':
            raise exc.EResourceUpdate(type='server',
                                      id=obj.physical_id,
                                      message=msg)
        else:
            raise exc.InvalidSpec(message=msg)

    def _validate_image(self, obj, name_or_id, reason=None):
        try:
            return self.compute(obj).image_find(name_or_id, False)
        except exc.InternalError as ex:
            if reason == 'create':
                raise exc.EResourceCreation(type='server',
                                            message=six.text_type(ex))
            elif reason == 'update':
                raise exc.EResourceUpdate(type='server',
                                          id=obj.physical_id,
                                          message=six.text_type(ex))
            elif ex.code == 404:
                msg = _("The specified %(k)s '%(v)s' could not be found.") % {
                    'k': self.IMAGE,
                    'v': name_or_id
                }
                raise exc.InvalidSpec(message=msg)
            else:
                raise

    def _validate_keypair(self, obj, name_or_id, reason=None):
        try:
            return self.compute(obj).keypair_find(name_or_id, False)
        except exc.InternalError as ex:
            if reason == 'create':
                raise exc.EResourceCreation(type='server',
                                            message=six.text_type(ex))
            elif reason == 'update':
                raise exc.EResourceUpdate(type='server',
                                          id=obj.physical_id,
                                          message=six.text_type(ex))
            elif ex.code == 404:
                msg = _("The specified %(k)s '%(v)s' could not be found.") % {
                    'k': self.KEY_NAME,
                    'v': name_or_id
                }
                raise exc.InvalidSpec(message=msg)
            else:
                raise

    def do_validate(self, obj):
        """Validate if the spec has provided valid info for server creation.

        :param obj: The node object.
        """
        # validate availability_zone
        az_name = self.properties[self.AVAILABILITY_ZONE]
        if az_name is not None:
            self._validate_az(obj, az_name)

        # validate flavor
        flavor = self.properties[self.FLAVOR]
        self._validate_flavor(obj, flavor)

        # validate image
        image = self.properties[self.IMAGE]
        if image is not None:
            self._validate_image(obj, image)

        # validate key_name
        keypair = self.properties[self.KEY_NAME]
        if keypair is not None:
            self._validate_keypair(obj, keypair)

        # validate networks
        networks = self.properties[self.NETWORKS]
        for net in networks:
            self._validate_network(obj, net)

        return True

    def _resolve_bdm(self, bdm):
        for bd in bdm:
            for key in self.BDM2_KEYS:
                if bd[key] is None:
                    del bd[key]
        return bdm

    def _validate_network(self, obj, network, reason=None):
        result = {}
        error = None
        # check network
        net_ident = network.get(self.NETWORK)
        if net_ident:
            try:
                net = self.network(obj).network_get(net_ident)
                if reason == 'update':
                    result['net_id'] = net.id
                else:
                    result['uuid'] = net.id
            except exc.InternalError as ex:
                error = six.text_type(ex)

        # check port
        port_ident = network.get(self.PORT)
        if not error and port_ident:
            try:
                port = self.network(obj).port_find(port_ident)
                if port.status != 'DOWN':
                    error = _(
                        "The status of the port %(port)s must be DOWN") % {
                            'port': port_ident
                        }

                if reason == 'update':
                    result['port_id'] = port.id
                else:
                    result['port'] = port.id
            except exc.InternalError as ex:
                error = six.text_type(ex)
        elif port_ident is None and net_ident is None:
            error = _("'%(port)s' is required if '%(net)s' is omitted") % {
                'port': self.PORT,
                'net': self.NETWORK
            }

        fixed_ip = network.get(self.FIXED_IP)
        if not error and fixed_ip:
            if port_ident is not None:
                error = _("The '%(port)s' property and the '%(fixed_ip)s' "
                          "property cannot be specified at the same time") % {
                              'port': self.PORT,
                              'fixed_ip': self.FIXED_IP
                          }
            else:
                if reason == 'update':
                    result['fixed_ips'] = [{'ip_address': fixed_ip}]
                else:
                    result['fixed_ip'] = fixed_ip

        if error:
            if reason == 'create':
                raise exc.EResourceCreation(type='server', message=error)
            elif reason == 'update':
                raise exc.EResourceUpdate(type='server',
                                          id=obj.physical_id,
                                          message=error)
            else:
                raise exc.InvalidSpec(message=error)

        return result

    def _build_metadata(self, obj, usermeta):
        """Build custom metadata for server.

        :param obj: The node object to operate on.
        :return: A dictionary containing the new metadata.
        """
        metadata = usermeta or {}
        metadata['cluster_node_id'] = obj.id
        if obj.cluster_id:
            metadata['cluster_id'] = obj.cluster_id
            metadata['cluster_node_index'] = six.text_type(obj.index)

        return metadata

    def do_create(self, obj):
        """Create a server for the node object.

        :param obj: The node object for which a server will be created.
        """
        kwargs = {}
        for key in self.KEYS:
            # context is treated as connection parameters
            if key == self.CONTEXT:
                continue

            if self.properties[key] is not None:
                kwargs[key] = self.properties[key]

        admin_pass = self.properties[self.ADMIN_PASS]
        if admin_pass:
            kwargs.pop(self.ADMIN_PASS)
            kwargs['adminPass'] = admin_pass

        auto_disk_config = self.properties[self.AUTO_DISK_CONFIG]
        kwargs.pop(self.AUTO_DISK_CONFIG)
        kwargs['OS-DCF:diskConfig'] = 'AUTO' if auto_disk_config else 'MANUAL'

        image_ident = self.properties[self.IMAGE]
        if image_ident is not None:
            image = self._validate_image(obj, image_ident, 'create')
            kwargs.pop(self.IMAGE)
            kwargs['imageRef'] = image.id

        flavor_ident = self.properties[self.FLAVOR]
        flavor = self._validate_flavor(obj, flavor_ident, 'create')
        kwargs.pop(self.FLAVOR)
        kwargs['flavorRef'] = flavor.id

        keypair_name = self.properties[self.KEY_NAME]
        if keypair_name:
            keypair = self._validate_keypair(obj, keypair_name, 'create')
            kwargs['key_name'] = keypair.name

        kwargs['name'] = self.properties[self.NAME] or obj.name

        metadata = self._build_metadata(obj, self.properties[self.METADATA])
        kwargs['metadata'] = metadata

        block_device_mapping_v2 = self.properties[self.BLOCK_DEVICE_MAPPING_V2]
        if block_device_mapping_v2 is not None:
            kwargs['block_device_mapping_v2'] = self._resolve_bdm(
                block_device_mapping_v2)

        user_data = self.properties[self.USER_DATA]
        if user_data is not None:
            ud = encodeutils.safe_encode(user_data)
            kwargs['user_data'] = encodeutils.safe_decode(base64.b64encode(ud))

        networks = self.properties[self.NETWORKS]
        if networks is not None:
            kwargs['networks'] = []
            for net_spec in networks:
                net = self._validate_network(obj, net_spec, 'create')
                kwargs['networks'].append(net)

        secgroups = self.properties[self.SECURITY_GROUPS]
        if secgroups:
            kwargs['security_groups'] = [{'name': sg} for sg in secgroups]

        if 'placement' in obj.data:
            if 'zone' in obj.data['placement']:
                kwargs['availability_zone'] = obj.data['placement']['zone']

            if 'servergroup' in obj.data['placement']:
                group_id = obj.data['placement']['servergroup']
                hints = self.properties.get(self.SCHEDULER_HINTS, {})
                hints.update({'group': group_id})
                kwargs['scheduler_hints'] = hints

        server = None
        resource_id = 'UNKNOWN'
        try:
            server = self.compute(obj).server_create(**kwargs)
            self.compute(obj).wait_for_server(server.id)
            return server.id
        except exc.InternalError as ex:
            if server and server.id:
                resource_id = server.id
            raise exc.EResourceCreation(type='server',
                                        message=ex.message,
                                        resource_id=resource_id)

    def do_delete(self, obj, **params):
        """Delete the physical resource associated with the specified node.

        :param obj: The node object to operate on.
        :param kwargs params: Optional keyword arguments for the delete
                              operation.
        :returns: This operation always return True unless exception is
                  caught.
        :raises: `EResourceDeletion` if interaction with compute service fails.
        """
        if not obj.physical_id:
            return True

        server_id = obj.physical_id
        ignore_missing = params.get('ignore_missing', True)
        force = params.get('force', False)

        try:
            driver = self.compute(obj)
            if force:
                driver.server_force_delete(server_id, ignore_missing)
            else:
                driver.server_delete(server_id, ignore_missing)
            driver.wait_for_server_delete(server_id)
            return True
        except exc.InternalError as ex:
            raise exc.EResourceDeletion(type='server',
                                        id=server_id,
                                        message=six.text_type(ex))

    def _check_server_name(self, obj, profile):
        """Check if there is a new name to be assigned to the server.

        :param obj: The node object to operate on.
        :param new_profile: The new profile which may contain a name for
                            the server instance.
        :return: A tuple consisting a boolean indicating whether the name
                 needs change and the server name determined.
        """
        old_name = self.properties[self.NAME] or obj.name
        new_name = profile.properties[self.NAME] or obj.name
        if old_name == new_name:
            return False, new_name
        return True, new_name

    def _update_name(self, obj, new_name):
        """Update the name of the server.

        :param obj: The node object to operate.
        :param new_name: The new name for the server instance.
        :return: ``None``.
        :raises: ``EResourceUpdate``.
        """
        try:
            self.compute(obj).server_update(obj.physical_id, name=new_name)
        except exc.InternalError as ex:
            raise exc.EResourceUpdate(type='server',
                                      id=obj.physical_id,
                                      message=six.text_type(ex))

    def _check_password(self, obj, new_profile):
        """Check if the admin password has been changed in the new profile.

        :param obj: The server node to operate, not used currently.
        :param new_profile: The new profile which may contain a new password
                            for the server instance.
        :return: A tuple consisting a boolean indicating whether the password
                 needs a change and the password determined which could be
                 '' if new password is not set.
        """
        old_passwd = self.properties.get(self.ADMIN_PASS) or ''
        new_passwd = new_profile.properties[self.ADMIN_PASS] or ''
        if old_passwd == new_passwd:
            return False, new_passwd
        return True, new_passwd

    def _update_password(self, obj, new_password):
        """Update the admin password for the server.

        :param obj: The node object to operate.
        :param new_password: The new password for the server instance.
        :return: ``None``.
        :raises: ``EResourceUpdate``.
        """
        try:
            self.compute(obj).server_change_password(obj.physical_id,
                                                     new_password)
        except exc.InternalError as ex:
            raise exc.EResourceUpdate(type='server',
                                      id=obj.physical_id,
                                      message=six.text_type(ex))

    def _update_metadata(self, obj, new_profile):
        """Update the server metadata.

        :param obj: The node object to operate on.
        :param new_profile: The new profile that may contain some changes to
                            the metadata.
        :returns: ``None``
        :raises: `EResourceUpdate`.
        """
        old_meta = self._build_metadata(obj, self.properties[self.METADATA])
        new_meta = self._build_metadata(obj,
                                        new_profile.properties[self.METADATA])
        if new_meta == old_meta:
            return

        try:
            self.compute(obj).server_metadata_update(obj.physical_id, new_meta)
        except exc.InternalError as ex:
            raise exc.EResourceUpdate(type='server',
                                      id=obj.physical_id,
                                      message=six.text_type(ex))

    def _update_flavor(self, obj, new_profile):
        """Update server flavor.

        :param obj: The node object to operate on.
        :param old_flavor: The identity of the current flavor.
        :param new_flavor: The identity of the new flavor.
        :returns: ``None``.
        :raises: `EResourceUpdate` when operation was a failure.
        """
        old_flavor = self.properties[self.FLAVOR]
        new_flavor = new_profile.properties[self.FLAVOR]
        cc = self.compute(obj)
        oldflavor = self._validate_flavor(obj, old_flavor, 'update')
        newflavor = self._validate_flavor(obj, new_flavor, 'update')
        if oldflavor.id == newflavor.id:
            return

        try:
            cc.server_resize(obj.physical_id, newflavor.id)
            cc.wait_for_server(obj.physical_id, 'VERIFY_RESIZE')
        except exc.InternalError as ex:
            msg = six.text_type(ex)
            try:
                cc.server_resize_revert(obj.physical_id)
                cc.wait_for_server(obj.physical_id, 'ACTIVE')
            except exc.InternalError as ex1:
                msg = six.text_type(ex1)
            raise exc.EResourceUpdate(type='server',
                                      id=obj.physical_id,
                                      message=msg)

        try:
            cc.server_resize_confirm(obj.physical_id)
            cc.wait_for_server(obj.physical_id, 'ACTIVE')
        except exc.InternalError as ex:
            raise exc.EResourceUpdate(type='server',
                                      id=obj.physical_id,
                                      message=six.text_type(ex))

    def _update_image(self, obj, new_profile, new_name, new_password):
        """Update image used by server node.

        :param obj: The node object to operate on.
        :param new_profile: The profile which may contain a new image name or
                            ID to use.
        :param new_name: The name for the server node.
        :param newn_password: The new password for the administrative account
                              if provided.
        :returns: A boolean indicating whether the image needs an update.
        :raises: ``InternalError`` if operation was a failure.
        """
        old_image = self.properties[self.IMAGE]
        new_image = new_profile.properties[self.IMAGE]
        if not new_image:
            msg = _("Updating Nova server with image set to None is not "
                    "supported by Nova")
            raise exc.EResourceUpdate(type='server',
                                      id=obj.physical_id,
                                      message=msg)
        # check the new image first
        img_new = self._validate_image(obj, new_image, reason='update')
        new_image_id = img_new.id

        driver = self.compute(obj)
        if old_image:
            img_old = self._validate_image(obj, old_image, reason='update')
            old_image_id = img_old.id
        else:
            try:
                server = driver.server_get(obj.physical_id)
            except exc.InternalError as ex:
                raise exc.EResourceUpdate(type='server',
                                          id=obj.physical_id,
                                          message=six.text_type(ex))
            # Still, this 'old_image_id' could be empty, but it doesn't matter
            # because the comparison below would fail if that is the case
            old_image_id = server.image.get('id', None)

        if new_image_id == old_image_id:
            return False

        try:
            driver.server_rebuild(obj.physical_id, new_image_id, new_name,
                                  new_password)
            driver.wait_for_server(obj.physical_id, 'ACTIVE')
        except exc.InternalError as ex:
            raise exc.EResourceUpdate(type='server',
                                      id=obj.physical_id,
                                      message=six.text_type(ex))
        return True

    def _create_interfaces(self, obj, networks):
        """Create new interfaces for the server node.

        :param obj: The node object to operate.
        :param networks: A list containing information about new network
                         interfaces to be created.
        :returns: ``None``.
        :raises: ``EResourceUpdate`` if interaction with drivers failed.
        """
        cc = self.compute(obj)
        try:
            server = cc.server_get(obj.physical_id)
        except exc.InternalError as ex:
            raise exc.EResourceUpdate(type='server',
                                      id=obj.physical_id,
                                      message=six.text_type(ex))

        for net_spec in networks:
            net_attrs = self._validate_network(obj, net_spec, 'update')
            if net_attrs:
                try:
                    cc.server_interface_create(server, **net_attrs)
                except exc.InternalError as ex:
                    raise exc.EResourceUpdate(type='server',
                                              id=obj.physical_id,
                                              message=six.text_type(ex))

    def _delete_interfaces(self, obj, networks):
        """Delete existing interfaces from the node.

        :param obj: The node object to operate.
        :param networks: A list containing information about network
                         interfaces to be created.
        :returns: ``None``
        :raises: ``EResourceUpdate``
        """
        def _get_network(nc, net_id, server_id):
            try:
                net = nc.network_get(net_id)
                return net.id
            except exc.InternalError as ex:
                raise exc.EResourceUpdate(type='server',
                                          id=server_id,
                                          message=six.text_type(ex))

        def _do_delete(port_id, server_id):
            try:
                cc.server_interface_delete(port_id, server_id)
            except exc.InternalError as ex:
                raise exc.EResourceUpdate(type='server',
                                          id=server_id,
                                          message=six.text_type(ex))

        cc = self.compute(obj)
        nc = self.network(obj)
        try:
            existing = list(cc.server_interface_list(obj.physical_id))
        except exc.InternalError as ex:
            raise exc.EResourceUpdate(type='server',
                                      id=obj.physical_id,
                                      message=six.text_type(ex))

        ports = []
        for intf in existing:
            fixed_ips = [addr['ip_address'] for addr in intf.fixed_ips]
            ports.append({
                'id': intf.port_id,
                'net': intf.net_id,
                'ips': fixed_ips
            })

        for n in networks:
            network = n.get('network', None)
            port = n.get('port', None)
            fixed_ip = n.get('fixed_ip', None)
            if port:
                for p in ports:
                    if p['id'] == port:
                        ports.remove(p)
                        _do_delete(port, obj.physical_id)
            elif fixed_ip:
                net_id = _get_network(nc, network, obj.physical_id)
                for p in ports:
                    if (fixed_ip in p['ips'] and net_id == p['net']):
                        ports.remove(p)
                        _do_delete(p['id'], obj.physical_id)
            elif port is None and fixed_ip is None:
                net_id = _get_network(nc, network, obj.physical_id)
                for p in ports:
                    if p['net'] == net_id:
                        ports.remove(p)
                        _do_delete(p['id'], obj.physical_id)

    def _update_network(self, obj, new_profile):
        """Updating server network interfaces.

        :param obj: The node object to operate.
        :param new_profile: The new profile which may contain new network
                            settings.
        :return: ``None``
        :raises: ``EResourceUpdate`` if there are driver failures.
        """
        networks_current = self.properties[self.NETWORKS]
        networks_create = new_profile.properties[self.NETWORKS]
        networks_delete = copy.deepcopy(networks_current)
        for network in networks_current:
            if network in networks_create:
                networks_create.remove(network)
                networks_delete.remove(network)

        # Detach some existing interfaces
        if networks_delete:
            self._delete_interfaces(obj, networks_delete)

        # Attach new interfaces
        if networks_create:
            self._create_interfaces(obj, networks_create)
        return

    def do_update(self, obj, new_profile=None, **params):
        """Perform update on the server.

        :param obj: the server to operate on
        :param new_profile: the new profile for the server.
        :param params: a dictionary of optional parameters.
        :returns: True if update was successful or False otherwise.
        :raises: `EResourceUpdate` if operation fails.
        """
        self.server_id = obj.physical_id
        if not self.server_id:
            return False

        if not new_profile:
            return False

        if not self.validate_for_update(new_profile):
            return False

        name_changed, new_name = self._check_server_name(obj, new_profile)
        passwd_changed, new_passwd = self._check_password(obj, new_profile)
        # Update server image: may have side effect of changing server name
        # and/or admin password
        image_changed = self._update_image(obj, new_profile, new_name,
                                           new_passwd)
        if not image_changed:
            # we do this separately only when rebuild wasn't performed
            if name_changed:
                self._update_name(obj, new_name)
            if passwd_changed:
                self._update_password(obj, new_passwd)

        # Update server flavor: note that flavor is a required property
        self._update_flavor(obj, new_profile)
        self._update_network(obj, new_profile)

        # TODO(Yanyan Hu): Update block_device properties
        # Update server metadata
        self._update_metadata(obj, new_profile)

        return True

    def do_get_details(self, obj):
        known_keys = {
            'OS-DCF:diskConfig', 'OS-EXT-AZ:availability_zone',
            'OS-EXT-STS:power_state', 'OS-EXT-STS:vm_state', 'accessIPv4',
            'accessIPv6', 'config_drive', 'created', 'hostId', 'id',
            'key_name', 'locked', 'metadata', 'name',
            'os-extended-volumes:volumes_attached', 'progress', 'status',
            'updated'
        }
        if obj.physical_id is None or obj.physical_id == '':
            return {}

        driver = self.compute(obj)
        try:
            server = driver.server_get(obj.physical_id)
        except exc.InternalError as ex:
            return {'Error': {'code': ex.code, 'message': six.text_type(ex)}}

        if server is None:
            return {}
        server_data = server.to_dict()
        details = {
            'image': server_data['image']['id'],
            'flavor': server_data['flavor']['id'],
        }
        for key in known_keys:
            if key in server_data:
                details[key] = server_data[key]

        # process special keys like 'OS-EXT-STS:task_state': these keys have
        # a default value '-' when not existing
        special_keys = [
            'OS-EXT-STS:task_state',
            'OS-SRV-USG:launched_at',
            'OS-SRV-USG:terminated_at',
        ]
        for key in special_keys:
            if key in server_data:
                val = server_data[key]
                details[key] = val if val else '-'

        # process network addresses
        details['addresses'] = copy.deepcopy(server_data['addresses'])

        # process security groups
        sgroups = []
        if 'security_groups' in server_data:
            for sg in server_data['security_groups']:
                sgroups.append(sg['name'])
        if len(sgroups) == 0:
            details['security_groups'] = ''
        elif len(sgroups) == 1:
            details['security_groups'] = sgroups[0]
        else:
            details['security_groups'] = sgroups

        return dict((k, details[k]) for k in sorted(details))

    def do_join(self, obj, cluster_id):
        if not obj.physical_id:
            return False

        driver = self.compute(obj)
        metadata = driver.server_metadata_get(obj.physical_id) or {}
        metadata['cluster_id'] = cluster_id
        metadata['cluster_node_index'] = six.text_type(obj.index)
        driver.server_metadata_update(obj.physical_id, metadata)
        return super(ServerProfile, self).do_join(obj, cluster_id)

    def do_leave(self, obj):
        if not obj.physical_id:
            return False

        keys = ['cluster_id', 'cluster_node_index']
        self.compute(obj).server_metadata_delete(obj.physical_id, keys)
        return super(ServerProfile, self).do_leave(obj)

    def do_rebuild(self, obj):
        if not obj.physical_id:
            return False

        self.server_id = obj.physical_id
        driver = self.compute(obj)
        try:
            server = driver.server_get(self.server_id)
        except exc.InternalError as ex:
            raise exc.EResourceOperation(op='rebuilding',
                                         type='server',
                                         id=self.server_id,
                                         message=six.text_type(ex))

        if server is None or server.image is None:
            return False

        image_id = server.image['id']
        admin_pass = self.properties.get(self.ADMIN_PASS)
        try:
            driver.server_rebuild(self.server_id, image_id,
                                  self.properties.get(self.NAME), admin_pass)
            driver.wait_for_server(self.server_id, 'ACTIVE')
        except exc.InternalError as ex:
            raise exc.EResourceOperation(op='rebuilding',
                                         type='server',
                                         id=self.server_id,
                                         message=six.text_type(ex))
        return True

    def do_check(self, obj):
        if not obj.physical_id:
            return False

        try:
            server = self.compute(obj).server_get(obj.physical_id)
        except exc.InternalError as ex:
            raise exc.EResourceOperation(op='checking',
                                         type='server',
                                         id=obj.physical_id,
                                         message=six.text_type(ex))

        if (server is None or server.status != 'ACTIVE'):
            return False

        return True

    def do_recover(self, obj, **options):
        # NOTE: We do a 'get' not a 'pop' here, because the operations may
        #       get fall back to the base class for handling
        operation = options.get('operation', None)

        if operation and not isinstance(operation, six.string_types):
            operation = operation[0]
        # TODO(Qiming): Handle the case that the operation contains other
        #               alternative recover operation
        # Depends-On: https://review.openstack.org/#/c/359676/
        if operation == 'REBUILD':
            return self.do_rebuild(obj)

        return super(ServerProfile, self).do_recover(obj, **options)

    def handle_reboot(self, obj, **options):
        """Handler for the reboot operation."""
        if not obj.physical_id:
            return False

        reboot_type = options.get(self.REBOOT_TYPE, self.REBOOT_SOFT)
        if (not isinstance(reboot_type, six.string_types)
                or reboot_type not in self.REBOOT_TYPES):
            return False

        self.compute(obj).server_reboot(obj.physical_id, reboot_type)
        self.compute(obj).wait_for_server(obj.physical_id, 'ACTIVE')
        return True

    def handle_change_password(self, obj, **options):
        """Handler for the change_password operation."""
        if not obj.physical_id:
            return False

        password = options.get(self.ADMIN_PASSWORD, None)
        if (password is None or not isinstance(password, six.string_types)):
            return False

        self.compute(obj).server_change_password(obj.physical_id, password)
        return True
Exemple #24
0
class HealthPolicy(base.Policy):
    """Policy for health management of a cluster."""

    VERSION = '1.0'
    VERSIONS = {'1.0': [{'status': consts.EXPERIMENTAL, 'since': '2017.02'}]}
    PRIORITY = 600

    TARGET = [
        ('BEFORE', consts.CLUSTER_RECOVER),
        ('BEFORE', consts.CLUSTER_DEL_NODES),
        ('BEFORE', consts.CLUSTER_SCALE_IN),
        ('BEFORE', consts.CLUSTER_RESIZE),
        ('BEFORE', consts.NODE_DELETE),
        ('AFTER', consts.CLUSTER_DEL_NODES),
        ('AFTER', consts.CLUSTER_SCALE_IN),
        ('AFTER', consts.CLUSTER_RESIZE),
        ('AFTER', consts.NODE_DELETE),
    ]

    # Should be ANY if profile provides health check support?
    PROFILE_TYPE = [
        'os.nova.server',
        'os.heat.stack',
    ]

    KEYS = (DETECTION, RECOVERY) = ('detection', 'recovery')

    _DETECTION_KEYS = (
        DETECTION_TYPE,
        DETECTION_OPTIONS,
    ) = ('type', 'options')

    _DETECTION_OPTIONS = (DETECTION_INTERVAL, ) = ('interval', )

    _RECOVERY_KEYS = (RECOVERY_ACTIONS, RECOVERY_FENCING) = ('actions',
                                                             'fencing')

    FENCING_OPTION_VALUES = (
        COMPUTE,
        # STORAGE, NETWORK,
    ) = (
        'COMPUTE',
        # 'STORAGE', 'NETWORK'
    )

    ACTION_KEYS = (
        ACTION_NAME,
        ACTION_PARAMS,
    ) = (
        'name',
        'params',
    )

    properties_schema = {
        DETECTION:
        schema.Map(
            _('Policy aspect for node failure detection.'),
            schema={
                DETECTION_TYPE:
                schema.String(
                    _('Type of node failure detection.'),
                    constraints=[
                        constraints.AllowedValues(consts.DETECTION_TYPES),
                    ],
                    required=True,
                ),
                DETECTION_OPTIONS:
                schema.Map(
                    schema={
                        DETECTION_INTERVAL:
                        schema.Integer(
                            _("Number of seconds between pollings. Only "
                              "required when type is 'NODE_STATUS_POLLING'."),
                            default=60,
                        ),
                    }),
            },
            required=True,
        ),
        RECOVERY:
        schema.Map(
            _('Policy aspect for node failure recovery.'),
            schema={
                RECOVERY_ACTIONS:
                schema.List(_('List of actions to try for node recovery.'),
                            schema=schema.Map(
                                _('Action to try for node recovery.'),
                                schema={
                                    ACTION_NAME:
                                    schema.String(
                                        _("Name of action to execute."),
                                        constraints=[
                                            constraints.AllowedValues(
                                                consts.RECOVERY_ACTIONS),
                                        ],
                                        required=True),
                                    ACTION_PARAMS:
                                    schema.Map(_("Parameters for the action")),
                                })),
                RECOVERY_FENCING:
                schema.List(
                    _('List of services to be fenced.'),
                    schema=schema.String(
                        _('Service to be fenced.'),
                        constraints=[
                            constraints.AllowedValues(FENCING_OPTION_VALUES),
                        ],
                        required=True,
                    ),
                ),
            }),
    }

    def __init__(self, name, spec, **kwargs):
        super(HealthPolicy, self).__init__(name, spec, **kwargs)

        self.check_type = self.properties[self.DETECTION][self.DETECTION_TYPE]
        options = self.properties[self.DETECTION][self.DETECTION_OPTIONS]
        self.interval = options[self.DETECTION_INTERVAL]
        recover_settings = self.properties[self.RECOVERY]
        self.recover_actions = recover_settings[self.RECOVERY_ACTIONS]
        self.fencing_types = recover_settings[self.RECOVERY_FENCING]

    def validate(self, context, validate_props=False):
        super(HealthPolicy, self).validate(context,
                                           validate_props=validate_props)

        if len(self.recover_actions) > 1:
            message = _(
                "Only one '%s' is supported for now.") % self.RECOVERY_ACTIONS
            raise exc.ESchema(message=message)

        # TODO(Qiming): Add detection of duplicated action names when
        # support to list of actions is implemented.

    def attach(self, cluster, enabled=True):
        """"Hook for policy attach.

        Register the cluster for health management.

        :param cluster: The cluster to which the policy is being attached to.
        :param enabled: The attached cluster policy is enabled or disabled.
        :return: A tuple comprising execution result and policy data.
        """
        p_type = cluster.rt['profile'].type_name
        action_names = [a['name'] for a in self.recover_actions]
        if p_type != 'os.nova.server':
            if consts.RECOVER_REBUILD in action_names:
                err_msg = _("Recovery action REBUILD is only applicable to "
                            "os.nova.server clusters.")
                return False, err_msg

            if consts.RECOVER_REBOOT in action_names:
                err_msg = _("Recovery action REBOOT is only applicable to "
                            "os.nova.server clusters.")
                return False, err_msg

        kwargs = {
            'check_type': self.check_type,
            'interval': self.interval,
            'params': {},
            'enabled': enabled
        }

        health_manager.register(cluster.id, engine_id=None, **kwargs)

        data = {
            'check_type': self.check_type,
            'interval': self.interval,
        }

        return True, self._build_policy_data(data)

    def detach(self, cluster):
        """Hook for policy detach.

        Unregister the cluster for health management.
        :param cluster: The target cluster.
        :returns: A tuple comprising the execution result and reason.
        """
        health_manager.unregister(cluster.id)
        return True, ''

    def pre_op(self, cluster_id, action, **args):
        """Hook before action execution.

        One of the task for this routine is to disable health policy if the
        action is a request that will shrink the cluster. The reason is that
        the policy may attempt to recover nodes that are to be deleted.

        :param cluster_id: The ID of the target cluster.
        :param action: The action to be examined.
        :param kwargs args: Other keyword arguments to be checked.
        :returns: Boolean indicating whether the checking passed.
        """
        if action.action in (consts.CLUSTER_SCALE_IN, consts.CLUSTER_DEL_NODES,
                             consts.NODE_DELETE):
            health_manager.disable(cluster_id)
            return True

        if action.action == consts.CLUSTER_RESIZE:
            deletion = action.data.get('deletion', None)
            if deletion:
                health_manager.disable(cluster_id)
                return True

            db_cluster = co.Cluster.get(action.context, cluster_id)
            current = no.Node.count_by_cluster(action.context, cluster_id)
            res, reason = scaleutils.parse_resize_params(
                action, db_cluster, current)
            if res == base.CHECK_ERROR:
                action.data['status'] = base.CHECK_ERROR
                action.data['reason'] = reason
                return False

            if action.data.get('deletion', None):
                health_manager.disable(cluster_id)
                return True

        pd = {
            'recover_action': self.recover_actions,
            'fencing': self.fencing_types,
        }
        action.data.update({'health': pd})
        action.store(action.context)

        return True

    def post_op(self, cluster_id, action, **args):
        """Hook before action execution.

        One of the task for this routine is to re-enable health policy if the
        action is a request that will shrink the cluster thus the policy has
        been temporarily disabled.

        :param cluster_id: The ID of the target cluster.
        :param action: The action to be examined.
        :param kwargs args: Other keyword arguments to be checked.
        :returns: Boolean indicating whether the checking passed.
        """
        if action.action in (consts.CLUSTER_SCALE_IN, consts.CLUSTER_DEL_NODES,
                             consts.NODE_DELETE):
            health_manager.enable(cluster_id)
            return True

        if action.action == consts.CLUSTER_RESIZE:
            deletion = action.data.get('deletion', None)
            if deletion:
                health_manager.enable(cluster_id)
                return True

            db_cluster = co.Cluster.get(action.context, cluster_id)
            current = no.Node.count_by_cluster(action.context, cluster_id)
            res, reason = scaleutils.parse_resize_params(
                action, db_cluster, current)
            if res == base.CHECK_ERROR:
                action.data['status'] = base.CHECK_ERROR
                action.data['reason'] = reason
                return False

            if action.data.get('deletion', None):
                health_manager.enable(cluster_id)
                return True

        return True