def test_resolve(self): sot = schema.Boolean() res = sot.resolve(True) self.assertTrue(res) res = sot.resolve(False) self.assertFalse(res) res = sot.resolve('Yes') self.assertTrue(res)
def test_validate(self): sot = schema.Boolean() res = sot.validate(True) self.assertIsNone(res) res = sot.validate('No') self.assertIsNone(res) ex = self.assertRaises(exc.ESchema, sot.validate, 'bogus') self.assertEqual("The value 'bogus' is not a valid Boolean", six.text_type(ex))
def test_to_schema_type(self): sot = schema.Boolean('desc') res = sot.to_schema_type(True) self.assertTrue(res) res = sot.to_schema_type('true') self.assertTrue(res) res = sot.to_schema_type('trUE') self.assertTrue(res) res = sot.to_schema_type('False') self.assertFalse(res) res = sot.to_schema_type('FALSE') self.assertFalse(res) ex = self.assertRaises(exc.ESchema, sot.to_schema_type, 'bogus') self.assertEqual("The value 'bogus' is not a valid Boolean", six.text_type(ex))
class ServerProfile(base.KubeBaseProfile): """Profile for an kubernetes master server.""" VERSIONS = {'1.0': [{'status': consts.EXPERIMENTAL, 'since': '2017.10'}]} KEYS = ( CONTEXT, FLAVOR, IMAGE, KEY_NAME, PUBLIC_NETWORK, BLOCK_DEVICE_MAPPING_V2, ) = ( 'context', 'flavor', 'image', 'key_name', 'public_network', 'block_device_mapping_v2', ) INTERNAL_KEYS = ( KUBEADM_TOKEN, KUBE_MASTER_IP, SECURITY_GROUP, PRIVATE_NETWORK, PRIVATE_SUBNET, PRIVATE_ROUTER, KUBE_MASTER_FLOATINGIP, KUBE_MASTER_FLOATINGIP_ID, SCALE_OUT_RECV_ID, SCALE_OUT_URL, ) = ( 'kubeadm_token', 'kube_master_ip', 'security_group', 'private_network', 'private_subnet', 'private_router', 'kube_master_floatingip', 'kube_master_floatingip_id', 'scale_out_recv_id', 'scale_out_url', ) NETWORK_KEYS = ( PORT, FIXED_IP, NETWORK, PORT_SECURITY_GROUPS, FLOATING_NETWORK, FLOATING_IP, ) = ( 'port', 'fixed_ip', 'network', 'security_groups', 'floating_network', 'floating_ip', ) BDM2_KEYS = ( BDM2_UUID, BDM2_SOURCE_TYPE, BDM2_DESTINATION_TYPE, BDM2_DISK_BUS, BDM2_DEVICE_NAME, BDM2_VOLUME_SIZE, BDM2_GUEST_FORMAT, BDM2_BOOT_INDEX, BDM2_DEVICE_TYPE, BDM2_DELETE_ON_TERMINATION, ) = ( 'uuid', 'source_type', 'destination_type', 'disk_bus', 'device_name', 'volume_size', 'guest_format', 'boot_index', 'device_type', 'delete_on_termination', ) properties_schema = { CONTEXT: schema.Map(_('Customized security context for operating servers.'), ), FLAVOR: schema.String( _('ID of flavor used for the server.'), required=True, updatable=True, ), IMAGE: schema.String( # IMAGE is not required, because there could be BDM or BDMv2 # support and the corresponding settings effective _('ID of image to be used for the new server.'), updatable=True, ), KEY_NAME: schema.String(_('Name of Nova keypair to be injected to server.'), ), PUBLIC_NETWORK: schema.String( _('Public network for kubernetes.'), required=True, ), BLOCK_DEVICE_MAPPING_V2: schema.List( _('A list specifying the properties of block devices to be used ' 'for this server.'), schema=schema.Map( _('A map specifying the properties of a block device to be ' 'used by the server.'), schema={ BDM2_UUID: schema.String( _('ID of the source image, snapshot or volume'), ), BDM2_SOURCE_TYPE: schema.String( _("Volume source type, must be one of 'image', " "'snapshot', 'volume' or 'blank'"), required=True, ), BDM2_DESTINATION_TYPE: schema.String( _("Volume destination type, must be 'volume' or " "'local'"), required=True, ), BDM2_DISK_BUS: schema.String(_('Bus of the device.'), ), BDM2_DEVICE_NAME: schema.String( _('Name of the device(e.g. vda, xda, ....).'), ), BDM2_VOLUME_SIZE: schema.Integer( _('Size of the block device in MB(for swap) and ' 'in GB(for other formats)'), required=True, ), BDM2_GUEST_FORMAT: schema.String( _('Specifies the disk file system format(e.g. swap, ' 'ephemeral, ...).'), ), BDM2_BOOT_INDEX: schema.Integer(_('Define the boot order of the device'), ), BDM2_DEVICE_TYPE: schema.String( _('Type of the device(e.g. disk, cdrom, ...).'), ), BDM2_DELETE_ON_TERMINATION: schema.Boolean( _('Whether to delete the volume when the server ' 'stops.'), ), }), ), } def __init__(self, type_name, name, **kwargs): super(ServerProfile, self).__init__(type_name, name, **kwargs) self.server_id = None def do_cluster_create(self, obj): self._generate_kubeadm_token(obj) self._create_security_group(obj) self._create_network(obj) def do_cluster_delete(self, obj): if obj.dependents and 'kube-node' in obj.dependents: msg = ("Cluster %s delete failed, " "Node clusters %s must be deleted first." % (obj.id, obj.dependents['kube-node'])) raise exc.EResourceDeletion(type='kubernetes.master', id=obj.id, message=msg) self._delete_network(obj) self._delete_security_group(obj) def do_create(self, obj): """Create a server for the node object. :param obj: The node object for which a server will be created. """ kwargs = {} for key in self.KEYS: if self.properties[key] is not None: kwargs[key] = self.properties[key] image_ident = self.properties[self.IMAGE] if image_ident is not None: image = self._validate_image(obj, image_ident, 'create') kwargs.pop(self.IMAGE) kwargs['imageRef'] = image.id flavor_ident = self.properties[self.FLAVOR] flavor = self._validate_flavor(obj, flavor_ident, 'create') kwargs.pop(self.FLAVOR) kwargs['flavorRef'] = flavor.id keypair_name = self.properties[self.KEY_NAME] if keypair_name: keypair = self._validate_keypair(obj, keypair_name, 'create') kwargs['key_name'] = keypair.name kwargs['name'] = obj.name metadata = self._build_metadata(obj, {}) kwargs['metadata'] = metadata jj_vars = {} cluster_data = self._get_cluster_data(obj) kwargs['networks'] = [{'uuid': cluster_data[self.PRIVATE_NETWORK]}] # Get user_data parameters from metadata jj_vars['KUBETOKEN'] = cluster_data[self.KUBEADM_TOKEN] jj_vars['MASTER_FLOATINGIP'] = cluster_data[ self.KUBE_MASTER_FLOATINGIP] block_device_mapping_v2 = self.properties[self.BLOCK_DEVICE_MAPPING_V2] if block_device_mapping_v2 is not None: kwargs['block_device_mapping_v2'] = self._resolve_bdm( obj, block_device_mapping_v2, 'create') # user_data = self.properties[self.USER_DATA] user_data = base.loadScript('./scripts/master.sh') if user_data is not None: # Use jinja2 to replace variables defined in user_data try: jj_t = jinja2.Template(user_data) user_data = jj_t.render(**jj_vars) except (jinja2.exceptions.UndefinedError, ValueError) as ex: # TODO(anyone) Handle jinja2 error pass ud = encodeutils.safe_encode(user_data) kwargs['user_data'] = encodeutils.safe_decode(base64.b64encode(ud)) sgid = self._get_security_group(obj) kwargs['security_groups'] = [{'name': sgid}] server = None resource_id = None try: server = self.compute(obj).server_create(**kwargs) self.compute(obj).wait_for_server(server.id) server = self.compute(obj).server_get(server.id) self._update_master_ip(obj, server.addresses[''][0]['addr']) self._associate_floatingip(obj, server) LOG.info("Created master node: %s" % server.id) return server.id except exc.InternalError as ex: if server and server.id: resource_id = server.id raise exc.EResourceCreation(type='server', message=six.text_type(ex), resource_id=resource_id) def do_delete(self, obj, **params): """Delete the physical resource associated with the specified node. :param obj: The node object to operate on. :param kwargs params: Optional keyword arguments for the delete operation. :returns: This operation always return True unless exception is caught. :raises: `EResourceDeletion` if interaction with compute service fails. """ if not obj.physical_id: return True server_id = obj.physical_id ignore_missing = params.get('ignore_missing', True) internal_ports = obj.data.get('internal_ports', []) force = params.get('force', False) try: self._disassociate_floatingip(obj, server_id) driver = self.compute(obj) if force: driver.server_force_delete(server_id, ignore_missing) else: driver.server_delete(server_id, ignore_missing) driver.wait_for_server_delete(server_id) if internal_ports: ex = self._delete_ports(obj, internal_ports) if ex: raise ex return True except exc.InternalError as ex: raise exc.EResourceDeletion(type='server', id=server_id, message=six.text_type(ex))
class StackProfile(base.Profile): '''Profile for an OpenStack Heat stack. When this profile is used, the whole cluster is a collection of Heat stacks. ''' KEYS = ( CONTEXT, TEMPLATE, TEMPLATE_URL, PARAMETERS, FILES, TIMEOUT, DISABLE_ROLLBACK, ENVIRONMENT, ) = ( 'context', 'template', 'template_url', 'parameters', 'files', 'timeout', 'disable_rollback', 'environment', ) properties_schema = { CONTEXT: schema.Map( _('A dictionary for specifying the customized context for ' 'stack operations'), default={}, ), TEMPLATE: schema.Map( _('Heat stack template.'), default={}, updatable=True, ), TEMPLATE_URL: schema.String( _('Heat stack template url.'), default='', updatable=True, ), PARAMETERS: schema.Map( _('Parameters to be passed to Heat for stack operations.'), default={}, updatable=True, ), FILES: schema.Map( _('Contents of files referenced by the template, if any.'), default={}, updatable=True, ), TIMEOUT: schema.Integer( _('A integer that specifies the number of minutes that a ' 'stack operation times out.'), updatable=True, ), DISABLE_ROLLBACK: schema.Boolean( _('A boolean specifying whether a stack operation can be ' 'rolled back.'), default=True, updatable=True, ), ENVIRONMENT: schema.Map( _('A map that specifies the environment used for stack ' 'operations.'), default={}, updatable=True, ) } OP_NAMES = ( OP_ABANDON, ) = ( 'abandon', ) OPERATIONS = { OP_ABANDON: schema.Map( _('Abandon a heat stack node.'), ) } def __init__(self, type_name, name, **kwargs): super(StackProfile, self).__init__(type_name, name, **kwargs) self.stack_id = None def validate(self, validate_props=False): '''Validate the schema and the data provided.''' # general validation self.spec_data.validate() self.properties.validate() # validate template template = self.properties[self.TEMPLATE] template_url = self.properties[self.TEMPLATE_URL] if not template and not template_url: msg = _("Both template and template_url are not specified " "for profile '%s'.") % self.name raise exc.InvalidSpec(message=msg) if validate_props: self.do_validate(obj=self) def do_validate(self, obj): """Validate the stack template used by a node. :param obj: Node object to operate. :returns: True if validation succeeds. :raises: `InvalidSpec` exception is raised if template is invalid. """ kwargs = { 'stack_name': utils.random_name(), 'template': self.properties[self.TEMPLATE], 'template_url': self.properties[self.TEMPLATE_URL], 'parameters': self.properties[self.PARAMETERS], 'files': self.properties[self.FILES], 'environment': self.properties[self.ENVIRONMENT], 'preview': True, } try: self.orchestration(obj).stack_create(**kwargs) except exc.InternalError as ex: msg = _('Failed in validating template: %s') % six.text_type(ex) raise exc.InvalidSpec(message=msg) return True def do_create(self, obj): """Create a heat stack using the given node object. :param obj: The node object to operate on. :returns: The UUID of the heat stack created. """ kwargs = { 'stack_name': obj.name + '-' + utils.random_name(8), 'template': self.properties[self.TEMPLATE], 'template_url': self.properties[self.TEMPLATE_URL], 'timeout_mins': self.properties[self.TIMEOUT], 'disable_rollback': self.properties[self.DISABLE_ROLLBACK], 'parameters': self.properties[self.PARAMETERS], 'files': self.properties[self.FILES], 'environment': self.properties[self.ENVIRONMENT], } try: stack = self.orchestration(obj).stack_create(**kwargs) # Timeout = None means we will use the 'default_action_timeout' # It can be overridden by the TIMEOUT profile propertie timeout = None if self.properties[self.TIMEOUT]: timeout = self.properties[self.TIMEOUT] * 60 self.orchestration(obj).wait_for_stack(stack.id, 'CREATE_COMPLETE', timeout=timeout) return stack.id except exc.InternalError as ex: raise exc.EResourceCreation(type='stack', message=ex.message) def do_delete(self, obj, **params): """Delete the physical stack behind the node object. :param obj: The node object to operate on. :param kwargs params: Optional keyword arguments for the delete operation. :returns: This operation always return True unless exception is caught. :raises: `EResourceDeletion` if interaction with heat fails. """ stack_id = obj.physical_id ignore_missing = params.get('ignore_missing', True) try: self.orchestration(obj).stack_delete(stack_id, ignore_missing) self.orchestration(obj).wait_for_stack_delete(stack_id) except exc.InternalError as ex: raise exc.EResourceDeletion(type='stack', id=stack_id, message=six.text_type(ex)) return True def do_update(self, obj, new_profile, **params): """Perform update on object. :param obj: the node object to operate on :param new_profile: the new profile used for updating :param params: other parameters for the update request. :returns: A boolean indicating whether the operation is successful. """ self.stack_id = obj.physical_id if not self.stack_id: return False if not self.validate_for_update(new_profile): return False fields = {} new_template = new_profile.properties[new_profile.TEMPLATE] if new_template != self.properties[self.TEMPLATE]: fields['template'] = new_template new_params = new_profile.properties[new_profile.PARAMETERS] if new_params != self.properties[self.PARAMETERS]: fields['parameters'] = new_params new_timeout = new_profile.properties[new_profile.TIMEOUT] if new_timeout != self.properties[self.TIMEOUT]: fields['timeout_mins'] = new_timeout new_dr = new_profile.properties[new_profile.DISABLE_ROLLBACK] if new_dr != self.properties[self.DISABLE_ROLLBACK]: fields['disable_rollback'] = new_dr new_files = new_profile.properties[new_profile.FILES] if new_files != self.properties[self.FILES]: fields['files'] = new_files new_environment = new_profile.properties[new_profile.ENVIRONMENT] if new_environment != self.properties[self.ENVIRONMENT]: fields['environment'] = new_environment if not fields: return True try: hc = self.orchestration(obj) # Timeout = None means we will use the 'default_action_timeout' # It can be overridden by the TIMEOUT profile propertie timeout = None if self.properties[self.TIMEOUT]: timeout = self.properties[self.TIMEOUT] * 60 hc.stack_update(self.stack_id, **fields) hc.wait_for_stack(self.stack_id, 'UPDATE_COMPLETE', timeout=timeout) except exc.InternalError as ex: raise exc.EResourceUpdate(type='stack', id=self.stack_id, message=ex.message) return True def do_check(self, obj): """Check stack status. :param obj: Node object to operate. :returns: True if check succeeded, or False otherwise. """ stack_id = obj.physical_id if stack_id is None: return False hc = self.orchestration(obj) try: # Timeout = None means we will use the 'default_action_timeout' # It can be overridden by the TIMEOUT profile propertie timeout = None if self.properties[self.TIMEOUT]: timeout = self.properties[self.TIMEOUT] * 60 hc.stack_check(stack_id) hc.wait_for_stack(stack_id, 'CHECK_COMPLETE', timeout=timeout) except exc.InternalError as ex: LOG.error(_LE('Failed in checking stack: %s.'), ex) return False return True def do_get_details(self, obj): if not obj.physical_id: return {} try: stack = self.orchestration(obj).stack_get(obj.physical_id) return stack.to_dict() except exc.InternalError as ex: return { 'Error': { 'code': ex.code, 'message': six.text_type(ex) } } def handle_abandon(self, obj, **options): """Handler for abandoning a heat stack node.""" pass
class HealthPolicy(base.Policy): """Policy for health management of a cluster.""" VERSION = '1.1' VERSIONS = { '1.0': [ { 'status': consts.EXPERIMENTAL, 'since': '2017.02' }, { 'status': consts.SUPPORTED, 'since': '2018.06' }, ], '1.1': [{ 'status': consts.SUPPORTED, 'since': '2018.09' }], } PRIORITY = 600 TARGET = [ ('BEFORE', consts.CLUSTER_RECOVER), ('BEFORE', consts.CLUSTER_DEL_NODES), ('BEFORE', consts.CLUSTER_SCALE_IN), ('BEFORE', consts.CLUSTER_RESIZE), ('BEFORE', consts.NODE_DELETE), ('AFTER', consts.CLUSTER_DEL_NODES), ('AFTER', consts.CLUSTER_SCALE_IN), ('AFTER', consts.CLUSTER_RESIZE), ('AFTER', consts.NODE_DELETE), ] # Should be ANY if profile provides health check support? PROFILE_TYPE = [ 'os.nova.server', 'os.heat.stack', ] KEYS = (DETECTION, RECOVERY) = ('detection', 'recovery') _DETECTION_KEYS = (DETECTION_MODES, DETECTION_TYPE, DETECTION_OPTIONS, DETECTION_INTERVAL, NODE_UPDATE_TIMEOUT, RECOVERY_CONDITIONAL) = ('detection_modes', 'type', 'options', 'interval', 'node_update_timeout', 'recovery_conditional') _DETECTION_OPTIONS = ( POLL_URL, POLL_URL_SSL_VERIFY, POLL_URL_CONN_ERROR_AS_UNHEALTHY, POLL_URL_HEALTHY_RESPONSE, POLL_URL_RETRY_LIMIT, POLL_URL_RETRY_INTERVAL, ) = ('poll_url', 'poll_url_ssl_verify', 'poll_url_conn_error_as_unhealthy', 'poll_url_healthy_response', 'poll_url_retry_limit', 'poll_url_retry_interval') _RECOVERY_KEYS = ( RECOVERY_ACTIONS, RECOVERY_FENCING, RECOVERY_DELETE_TIMEOUT, RECOVERY_FORCE_RECREATE, ) = ( 'actions', 'fencing', 'node_delete_timeout', 'node_force_recreate', ) FENCING_OPTION_VALUES = ( COMPUTE, # STORAGE, NETWORK, ) = ( 'COMPUTE', # 'STORAGE', 'NETWORK' ) ACTION_KEYS = ( ACTION_NAME, ACTION_PARAMS, ) = ( 'name', 'params', ) properties_schema = { DETECTION: schema.Map( _('Policy aspect for node failure detection.'), schema={ DETECTION_INTERVAL: schema.Integer( _("Number of seconds between pollings. Only " "required when type is 'NODE_STATUS_POLLING' or " "'NODE_STATUS_POLL_URL'."), default=60, ), NODE_UPDATE_TIMEOUT: schema.Integer( _("Number of seconds since last node update to " "wait before checking node health."), default=300, ), RECOVERY_CONDITIONAL: schema.String( _("The conditional that determines when recovery should be" " performed in case multiple detection modes are " "specified. 'ALL_FAILED' means that all " "detection modes have to return failed health checks " "before a node is recovered. 'ANY_FAILED'" " means that a failed health check with a single " "detection mode triggers a node recovery."), constraints=[ constraints.AllowedValues(consts.RECOVERY_CONDITIONAL), ], default=consts.ANY_FAILED, required=False, ), DETECTION_MODES: schema.List( _('List of node failure detection modes.'), schema=schema.Map( _('Node failure detection mode to try'), schema={ DETECTION_TYPE: schema.String( _('Type of node failure detection.'), constraints=[ constraints.AllowedValues( consts.DETECTION_TYPES), ], required=True, ), DETECTION_OPTIONS: schema.Map(schema={ POLL_URL: schema.String( _("URL to poll for node status. See " "documentation for valid expansion " "parameters. Only required " "when type is " "'NODE_STATUS_POLL_URL'."), default='', ), POLL_URL_SSL_VERIFY: schema.Boolean( _("Whether to verify SSL when calling " "URL to poll for node status. Only " "required when type is " "'NODE_STATUS_POLL_URL'."), default=True, ), POLL_URL_CONN_ERROR_AS_UNHEALTHY: schema.Boolean( _("Whether to treat URL connection " "errors as an indication of an " "unhealthy node. Only required " "when type is " "'NODE_STATUS_POLL_URL'."), default=True, ), POLL_URL_HEALTHY_RESPONSE: schema.String( _("String pattern in the poll URL " "response body that indicates a " "healthy node. Required when type " "is 'NODE_STATUS_POLL_URL'."), default='', ), POLL_URL_RETRY_LIMIT: schema.Integer( _("Number of times to retry URL " "polling when its return body is " "missing POLL_URL_HEALTHY_RESPONSE " "string before a node is considered " "down. Required when type is " "'NODE_STATUS_POLL_URL'."), default=3, ), POLL_URL_RETRY_INTERVAL: schema.Integer( _("Number of seconds between URL " "polling retries before a node is " "considered down. Required when " "type is 'NODE_STATUS_POLL_URL'."), default=3, ), }, default={}), })) }, required=True, ), RECOVERY: schema.Map( _('Policy aspect for node failure recovery.'), schema={ RECOVERY_ACTIONS: schema.List(_('List of actions to try for node recovery.'), schema=schema.Map( _('Action to try for node recovery.'), schema={ ACTION_NAME: schema.String( _("Name of action to execute."), constraints=[ constraints.AllowedValues( consts.RECOVERY_ACTIONS), ], required=True), ACTION_PARAMS: schema.Map(_("Parameters for the action")), })), RECOVERY_FENCING: schema.List( _('List of services to be fenced.'), schema=schema.String( _('Service to be fenced.'), constraints=[ constraints.AllowedValues(FENCING_OPTION_VALUES), ], required=True, ), ), RECOVERY_DELETE_TIMEOUT: schema.Integer( _("Number of seconds to wait for node deletion to " "finish and start node creation for recreate " "recovery option. Required when type is " "'NODE_STATUS_POLL_URL and recovery action " "is RECREATE'."), default=20, ), RECOVERY_FORCE_RECREATE: schema.Boolean( _("Whether to create node even if node deletion " "failed. Required when type is " "'NODE_STATUS_POLL_URL' and action recovery " "action is RECREATE."), default=False, ), }, required=True, ), } def __init__(self, name, spec, **kwargs): super(HealthPolicy, self).__init__(name, spec, **kwargs) self.interval = self.properties[self.DETECTION].get( self.DETECTION_INTERVAL, 60) self.node_update_timeout = self.properties[self.DETECTION].get( self.NODE_UPDATE_TIMEOUT, 300) self.recovery_conditional = self.properties[self.DETECTION].get( self.RECOVERY_CONDITIONAL, consts.ANY_FAILED) DetectionMode = namedtuple('DetectionMode', [self.DETECTION_TYPE] + list(self._DETECTION_OPTIONS)) self.detection_modes = [] raw_modes = self.properties[self.DETECTION][self.DETECTION_MODES] for mode in raw_modes: options = mode[self.DETECTION_OPTIONS] self.detection_modes.append( DetectionMode( mode[self.DETECTION_TYPE], options.get(self.POLL_URL, ''), options.get(self.POLL_URL_SSL_VERIFY, True), options.get(self.POLL_URL_CONN_ERROR_AS_UNHEALTHY, True), options.get(self.POLL_URL_HEALTHY_RESPONSE, ''), options.get(self.POLL_URL_RETRY_LIMIT, ''), options.get(self.POLL_URL_RETRY_INTERVAL, ''))) recover_settings = self.properties[self.RECOVERY] self.recover_actions = recover_settings[self.RECOVERY_ACTIONS] self.fencing_types = recover_settings[self.RECOVERY_FENCING] self.node_delete_timeout = recover_settings.get( self.RECOVERY_DELETE_TIMEOUT, None) self.node_force_recreate = recover_settings.get( self.RECOVERY_FORCE_RECREATE, False) def validate(self, context, validate_props=False): super(HealthPolicy, self).validate(context, validate_props=validate_props) if len(self.recover_actions) > 1: message = _( "Only one '%s' is supported for now.") % self.RECOVERY_ACTIONS raise exc.ESchema(message=message) if self.interval < cfg.CONF.health_check_interval_min: message = _("Specified interval of %(interval)d seconds has to be " "larger than health_check_interval_min of " "%(min_interval)d seconds set in configuration.") % { "interval": self.interval, "min_interval": cfg.CONF.health_check_interval_min } raise exc.InvalidSpec(message=message) # check valid detection types polling_types = [ consts.NODE_STATUS_POLLING, consts.NODE_STATUS_POLL_URL ] has_valid_polling_types = all(d.type in polling_types for d in self.detection_modes) has_valid_lifecycle_type = (len(self.detection_modes) == 1 and self.detection_modes[0].type == consts.LIFECYCLE_EVENTS) if not has_valid_polling_types and not has_valid_lifecycle_type: message = ("Invalid detection modes in health policy: %s" % ', '.join([d.type for d in self.detection_modes])) raise exc.InvalidSpec(message=message) if len(self.detection_modes) != len(set(self.detection_modes)): message = ("Duplicate detection modes are not allowed in " "health policy: %s" % ', '.join([d.type for d in self.detection_modes])) raise exc.InvalidSpec(message=message) # TODO(Qiming): Add detection of duplicated action names when # support to list of actions is implemented. def attach(self, cluster, enabled=True): """"Hook for policy attach. Register the cluster for health management. :param cluster: The cluster to which the policy is being attached to. :param enabled: The attached cluster policy is enabled or disabled. :return: A tuple comprising execution result and policy data. """ p_type = cluster.rt['profile'].type_name action_names = [a['name'] for a in self.recover_actions] if p_type != 'os.nova.server': if consts.RECOVER_REBUILD in action_names: err_msg = _("Recovery action REBUILD is only applicable to " "os.nova.server clusters.") return False, err_msg if consts.RECOVER_REBOOT in action_names: err_msg = _("Recovery action REBOOT is only applicable to " "os.nova.server clusters.") return False, err_msg kwargs = { 'interval': self.interval, 'node_update_timeout': self.node_update_timeout, 'params': { 'recover_action': self.recover_actions, 'node_delete_timeout': self.node_delete_timeout, 'node_force_recreate': self.node_force_recreate, 'recovery_conditional': self.recovery_conditional, }, 'enabled': enabled } converted_detection_modes = [d._asdict() for d in self.detection_modes] detection_mode = {'detection_modes': converted_detection_modes} kwargs['params'].update(detection_mode) health_manager.register(cluster.id, engine_id=None, **kwargs) data = { 'interval': self.interval, 'node_update_timeout': self.node_update_timeout, 'recovery_conditional': self.recovery_conditional, 'node_delete_timeout': self.node_delete_timeout, 'node_force_recreate': self.node_force_recreate, } data.update(detection_mode) return True, self._build_policy_data(data) def detach(self, cluster): """Hook for policy detach. Unregister the cluster for health management. :param cluster: The target cluster. :returns: A tuple comprising the execution result and reason. """ ret = health_manager.unregister(cluster.id) if not ret: LOG.warning( 'Unregistering health manager for cluster %s ' 'timed out.', cluster.id) return True, '' def pre_op(self, cluster_id, action, **args): """Hook before action execution. One of the task for this routine is to disable health policy if the action is a request that will shrink the cluster. The reason is that the policy may attempt to recover nodes that are to be deleted. :param cluster_id: The ID of the target cluster. :param action: The action to be examined. :param kwargs args: Other keyword arguments to be checked. :returns: Boolean indicating whether the checking passed. """ if action.action in (consts.CLUSTER_SCALE_IN, consts.CLUSTER_DEL_NODES, consts.NODE_DELETE): health_manager.disable(cluster_id) return True if action.action == consts.CLUSTER_RESIZE: deletion = action.data.get('deletion', None) if deletion: health_manager.disable(cluster_id) return True cluster = action.entity current = len(cluster.nodes) res, reason = scaleutils.parse_resize_params( action, cluster, current) if res == base.CHECK_ERROR: action.data['status'] = base.CHECK_ERROR action.data['reason'] = reason return False if action.data.get('deletion', None): health_manager.disable(cluster_id) return True pd = { 'recover_action': self.recover_actions, 'fencing': self.fencing_types, } action.data.update({'health': pd}) action.store(action.context) return True def post_op(self, cluster_id, action, **args): """Hook before action execution. One of the task for this routine is to re-enable health policy if the action is a request that will shrink the cluster thus the policy has been temporarily disabled. :param cluster_id: The ID of the target cluster. :param action: The action to be examined. :param kwargs args: Other keyword arguments to be checked. :returns: Boolean indicating whether the checking passed. """ if action.action in (consts.CLUSTER_SCALE_IN, consts.CLUSTER_DEL_NODES, consts.NODE_DELETE): health_manager.enable(cluster_id) return True if action.action == consts.CLUSTER_RESIZE: deletion = action.data.get('deletion', None) if deletion: health_manager.enable(cluster_id) return True cluster = action.entity current = len(cluster.nodes) res, reason = scaleutils.parse_resize_params( action, cluster, current) if res == base.CHECK_ERROR: action.data['status'] = base.CHECK_ERROR action.data['reason'] = reason return False if action.data.get('deletion', None): health_manager.enable(cluster_id) return True return True
class AffinityPolicy(base.Policy): """Policy for placing members of a cluster based on server groups. This policy is expected to be enforced before new member(s) added to an existing cluster. """ VERSION = '1.0' PRIORITY = 300 TARGET = [ ('BEFORE', consts.CLUSTER_SCALE_OUT), ('BEFORE', consts.CLUSTER_RESIZE), ] PROFILE_TYPE = [ 'os.nova.server-1.0', ] KEYS = ( SERVER_GROUP, AVAILABILITY_ZONE, ENABLE_DRS_EXTENSION, ) = ( 'servergroup', 'availability_zone', 'enable_drs_extension', ) _GROUP_KEYS = ( GROUP_NAME, GROUP_POLICIES, ) = ( 'name', 'policies', ) _POLICIES_VALUES = ( AFFINITY, ANTI_AFFINITY, ) = ( 'affinity', 'anti-affinity', ) properties_schema = { SERVER_GROUP: schema.Map( _('Properties of the VM server group'), schema={ GROUP_NAME: schema.String(_('The name of the server group'), ), GROUP_POLICIES: schema.String( _('The server group policies.'), default=ANTI_AFFINITY, constraints=[ constraints.AllowedValues(_POLICIES_VALUES), ], ), }, ), AVAILABILITY_ZONE: schema.String( _('Name of the availability zone to place the nodes.'), ), ENABLE_DRS_EXTENSION: schema.Boolean( _('Enable vSphere DRS extension.'), default=False, ), } def __init__(self, name, spec, **kwargs): super(AffinityPolicy, self).__init__(name, spec, **kwargs) self.enable_drs = self.properties.get(self.ENABLE_DRS_EXTENSION) self._novaclient = None def nova(self, obj): """Construct nova client based on object. :param obj: Object for which the client is created. It is expected to be None when retrieving an existing client. When creating a client, it conatins the user and project to be used. """ if self._novaclient is not None: return self._novaclient params = self._build_conn_params(obj) self._novaclient = driver.SenlinDriver().compute(params) return self._novaclient def attach(self, cluster): """Routine to be invoked when policy is to be attached to a cluster. :para cluster: The target cluster to attach to; :returns: When the operation was successful, returns a tuple (True, message); otherwise, return a tuple (False, error). """ res, data = super(AffinityPolicy, self).attach(cluster) if res is False: return False, data data = {'inherited_group': False} nc = self.nova(cluster) group = self.properties.get(self.SERVER_GROUP) # guess servergroup name group_name = group.get(self.GROUP_NAME, None) if group_name is None: profile = cluster.rt['profile'] if 'scheduler_hints' in profile.spec: hints = profile.spec['scheduler_hints'] group_name = hints.get('group', None) if group_name: try: server_group = nc.find_server_group(group_name, True) except exception.InternalError as ex: msg = _("Failed in retrieving servergroup '%s'.") % group_name LOG.exception( _LE('%(msg)s: %(ex)s') % { 'msg': msg, 'ex': six.text_type(ex) }) return False, msg if server_group: # Check if the policies match policies = group.get(self.GROUP_POLICIES) if policies and policies != server_group.policies[0]: msg = _( "Policies specified (%(specified)s) doesn't match " "that of the existing servergroup (%(existing)s).") % { 'specified': policies, 'existing': server_group.policies[0] } return False, msg data['servergroup_id'] = server_group.id data['inherited_group'] = True if not data['inherited_group']: # create a random name if necessary if not group_name: group_name = 'server_group_%s' % utils.random_name() try: server_group = nc.create_server_group( name=group_name, policies=[group.get(self.GROUP_POLICIES)]) except Exception as ex: msg = _('Failed in creating servergroup.') LOG.exception( _LE('%(msg)s: %(ex)s') % { 'msg': msg, 'ex': six.text_type(ex) }) return False, msg data['servergroup_id'] = server_group.id policy_data = self._build_policy_data(data) return True, policy_data def detach(self, cluster): """Routine to be called when the policy is detached from a cluster. :param cluster: The cluster from which the policy is to be detached. :returns: When the operation was successful, returns a tuple of (True, data) where the data contains references to the resources created; otherwise returns a tuple of (False, error) where the err contains a error message. """ reason = _('Servergroup resource deletion succeeded.') ctx = context.get_admin_context() binding = cpo.ClusterPolicy.get(ctx, cluster.id, self.id) if not binding or not binding.data: return True, reason policy_data = self._extract_policy_data(binding.data) if not policy_data: return True, reason group_id = policy_data.get('servergroup_id', None) inherited_group = policy_data.get('inherited_group', False) if group_id and not inherited_group: try: self.nova(cluster).delete_server_group(group_id) except Exception as ex: msg = _('Failed in deleting servergroup.') LOG.exception( _LE('%(msg)s: %(ex)s') % { 'msg': msg, 'ex': six.text_type(ex) }) return False, msg return True, reason def pre_op(self, cluster_id, action): """Routine to be called before an 'CLUSTER_SCALE_OUT' action. For this particular policy, we take this chance to intelligently select the most proper hypervisor/vsphere cluster to create nodes. In order to realize the function, we need to create construct meta to handle affinity/anti-affinity then update the profile with the specific parameters at first :param cluster_id: ID of the cluster on which the relevant action is to be executed. :param action: The action object that triggered this operation. :returns: Nothing. """ zone_name = self.properties.get(self.AVAILABILITY_ZONE) if not zone_name and self.enable_drs: # we make a reasonable guess of the zone name for vSphere # support because the zone name is required in that case. zone_name = 'nova' # we respect other policies decisions (if any) and fall back to the # action inputs if no hints found. pd = action.data.get('creation', None) if pd is not None: count = pd.get('count', 1) elif action.action == consts.CLUSTER_SCALE_OUT: count = action.inputs.get('count', 1) else: # CLUSTER_RESIZE db_cluster = co.Cluster.get(action.context, cluster_id) su.parse_resize_params(action, db_cluster) if 'creation' not in action.data: return count = action.data['creation']['count'] cp = cpo.ClusterPolicy.get(action.context, cluster_id, self.id) policy_data = self._extract_policy_data(cp.data) pd_entry = {'servergroup': policy_data['servergroup_id']} # special handling for vSphere DRS case where we need to find out # the name of the vSphere host which has DRS enabled. if self.enable_drs: cluster_obj = co.Cluster.get(action.context, cluster_id) nc = self.nova(cluster_obj) hypervisors = nc.hypervisor_list() hv_id = '' pattern = re.compile(r'.*drs*', re.I) for hypervisor in hypervisors: match = pattern.match(hypervisor.hypervisor_hostname) if match: hv_id = hypervisor.id break if not hv_id: action.data['status'] = base.CHECK_ERROR action.data['status_reason'] = _('No suitable vSphere host ' 'is available.') action.store(action.context) return hv_info = nc.hypervisor_get(hv_id) hostname = hv_info['service']['host'] pd_entry['zone'] = ":".join([zone_name, hostname]) elif zone_name: pd_entry['zone'] = zone_name pd = { 'count': count, 'placements': [pd_entry] * count, } action.data.update({'placement': pd}) action.store(action.context) return
class DeletionPolicy(base.Policy): """Policy for choosing victim node(s) from a cluster for deletion. This policy is enforced when nodes are to be removed from a cluster. It will yield an ordered list of candidates for deletion based on user specified criteria. """ VERSION = '1.0' VERSIONS = { '1.0': [ {'status': consts.SUPPORTED, 'since': '2016.04'} ] } PRIORITY = 400 KEYS = ( CRITERIA, DESTROY_AFTER_DELETION, GRACE_PERIOD, REDUCE_DESIRED_CAPACITY, ) = ( 'criteria', 'destroy_after_deletion', 'grace_period', 'reduce_desired_capacity', ) CRITERIA_VALUES = ( OLDEST_FIRST, OLDEST_PROFILE_FIRST, YOUNGEST_FIRST, RANDOM, ) = ( 'OLDEST_FIRST', 'OLDEST_PROFILE_FIRST', 'YOUNGEST_FIRST', 'RANDOM', ) TARGET = [ ('BEFORE', consts.CLUSTER_SCALE_IN), ('BEFORE', consts.CLUSTER_DEL_NODES), ('BEFORE', consts.CLUSTER_RESIZE), ('BEFORE', consts.NODE_DELETE), ] PROFILE_TYPE = [ 'ANY' ] properties_schema = { CRITERIA: schema.String( _('Criteria used in selecting candidates for deletion'), default=RANDOM, constraints=[ constraints.AllowedValues(CRITERIA_VALUES), ] ), DESTROY_AFTER_DELETION: schema.Boolean( _('Whether a node should be completely destroyed after ' 'deletion. Default to True'), default=True, ), GRACE_PERIOD: schema.Integer( _('Number of seconds before real deletion happens.'), default=0, ), REDUCE_DESIRED_CAPACITY: schema.Boolean( _('Whether the desired capacity of the cluster should be ' 'reduced along the deletion. Default to False.'), default=True, ) } def __init__(self, name, spec, **kwargs): super(DeletionPolicy, self).__init__(name, spec, **kwargs) self.criteria = self.properties[self.CRITERIA] self.grace_period = self.properties[self.GRACE_PERIOD] self.destroy_after_deletion = self.properties[ self.DESTROY_AFTER_DELETION] self.reduce_desired_capacity = self.properties[ self.REDUCE_DESIRED_CAPACITY] def _victims_by_regions(self, cluster, regions): victims = [] for region in sorted(regions.keys()): count = regions[region] nodes = cluster.nodes_by_region(region) if self.criteria == self.RANDOM: candidates = scaleutils.nodes_by_random(nodes, count) elif self.criteria == self.OLDEST_PROFILE_FIRST: candidates = scaleutils.nodes_by_profile_age(nodes, count) elif self.criteria == self.OLDEST_FIRST: candidates = scaleutils.nodes_by_age(nodes, count, True) else: candidates = scaleutils.nodes_by_age(nodes, count, False) victims.extend(candidates) return victims def _victims_by_zones(self, cluster, zones): victims = [] for zone in sorted(zones.keys()): count = zones[zone] nodes = cluster.nodes_by_zone(zone) if self.criteria == self.RANDOM: candidates = scaleutils.nodes_by_random(nodes, count) elif self.criteria == self.OLDEST_PROFILE_FIRST: candidates = scaleutils.nodes_by_profile_age(nodes, count) elif self.criteria == self.OLDEST_FIRST: candidates = scaleutils.nodes_by_age(nodes, count, True) else: candidates = scaleutils.nodes_by_age(nodes, count, False) victims.extend(candidates) return victims def _update_action(self, action, victims): pd = action.data.get('deletion', {}) pd['count'] = len(victims) pd['candidates'] = victims pd['destroy_after_deletion'] = self.destroy_after_deletion pd['grace_period'] = self.grace_period pd['reduce_desired_capacity'] = self.reduce_desired_capacity action.data.update({ 'status': base.CHECK_OK, 'reason': _('Candidates generated'), 'deletion': pd }) action.store(action.context) def pre_op(self, cluster_id, action): """Choose victims that can be deleted. :param cluster_id: ID of the cluster to be handled. :param action: The action object that triggered this policy. """ victims = action.inputs.get('candidates', []) if len(victims) > 0: self._update_action(action, victims) return if action.action == consts.NODE_DELETE: self._update_action(action, [action.node.id]) return db_cluster = None regions = None zones = None deletion = action.data.get('deletion', {}) if deletion: # there are policy decisions count = deletion['count'] regions = deletion.get('regions', None) zones = deletion.get('zones', None) # No policy decision, check action itself: SCALE_IN elif action.action == consts.CLUSTER_SCALE_IN: count = action.inputs.get('count', 1) # No policy decision, check action itself: RESIZE else: db_cluster = co.Cluster.get(action.context, cluster_id) current = no.Node.count_by_cluster(action.context, cluster_id) res, reason = scaleutils.parse_resize_params(action, db_cluster, current) if res == base.CHECK_ERROR: action.data['status'] = base.CHECK_ERROR action.data['reason'] = reason LOG.error(reason) return if 'deletion' not in action.data: return count = action.data['deletion']['count'] cluster = cm.Cluster.load(action.context, dbcluster=db_cluster, cluster_id=cluster_id) # Cross-region if regions: victims = self._victims_by_regions(cluster, regions) self._update_action(action, victims) return # Cross-AZ if zones: victims = self._victims_by_zones(cluster, zones) self._update_action(action, victims) return if count > len(cluster.nodes): count = len(cluster.nodes) if self.criteria == self.RANDOM: victims = scaleutils.nodes_by_random(cluster.nodes, count) elif self.criteria == self.OLDEST_PROFILE_FIRST: victims = scaleutils.nodes_by_profile_age(cluster.nodes, count) elif self.criteria == self.OLDEST_FIRST: victims = scaleutils.nodes_by_age(cluster.nodes, count, True) else: victims = scaleutils.nodes_by_age(cluster.nodes, count, False) self._update_action(action, victims) return
class LoadBalancingPolicy(base.Policy): '''Policy for load balancing among members of a cluster. This policy is expected to be enforced after the member list of a cluster is changed. We need to reload the load-balancer specified (or internally created) when these actions are performed. ''' VERSION = '1.0' TARGET = [ ('AFTER', consts.CLUSTER_ADD_NODES), ('AFTER', consts.CLUSTER_DEL_NODES), ('AFTER', consts.CLUSTER_SCALE_OUT), ('AFTER', consts.CLUSTER_SCALE_IN), ('AFTER', consts.CLUSTER_RESIZE), ] PROFILE_TYPE = [ 'os.nova.server-1.0', ] KEYS = ( POOL, VIP, ) = ( 'pool', 'vip', ) _POOL_KEYS = ( POOL_PROTOCOL, POOL_PROTOCOL_PORT, POOL_SUBNET, POOL_LB_METHOD, POOL_ADMIN_STATE_UP, POOL_SESSION_PERSISTENCE, ) = ( 'protocol', 'protocol_port', 'subnet', 'lb_method', 'admin_state_up', 'session_persistence', ) PROTOCOLS = ( HTTP, HTTPS, TCP, ) = ( 'HTTP', 'HTTPS', 'TCP', ) LB_METHODS = ( ROUND_ROBIN, LEAST_CONNECTIONS, SOURCE_IP, ) = ( 'ROUND_ROBIN', 'LEAST_CONNECTIONS', 'SOURCE_IP', ) _VIP_KEYS = ( VIP_SUBNET, VIP_ADDRESS, VIP_CONNECTION_LIMIT, VIP_PROTOCOL, VIP_PROTOCOL_PORT, VIP_ADMIN_STATE_UP, ) = ( 'subnet', 'address', 'connection_limit', 'protocol', 'protocol_port', 'admin_state_up', ) _SESSION_PERSISTENCE_KEYS = ( PERSISTENCE_TYPE, COOKIE_NAME, ) = ( 'type', 'cookie_name', ) PERSISTENCE_TYPES = ( PERSIST_SOURCE_IP, PERSIST_HTTP_COOKIE, PERSIST_APP_COOKIE, ) = ( 'SOURCE_IP', 'HTTP_COOKIE', 'APP_COOKIE', ) properties_schema = { POOL: schema.Map( _('LB pool properties.'), schema={ POOL_PROTOCOL: schema.String( _('Protocol used for load balancing.'), constraints=[ constraints.AllowedValues(PROTOCOLS), ], default=HTTP, ), POOL_PROTOCOL_PORT: schema.Integer( _('Port on which servers are running on the nodes.'), default=80, ), POOL_SUBNET: schema.String( _('Name or ID of subnet for the port on which nodes can ' 'be connected.'), required=True, ), POOL_LB_METHOD: schema.String( _('Load balancing algorithm.'), constraints=[ constraints.AllowedValues(LB_METHODS), ], default=ROUND_ROBIN, ), POOL_ADMIN_STATE_UP: schema.Boolean( _('Administrative state of the pool.'), default=True, ), POOL_SESSION_PERSISTENCE: schema.Map( _('Session pesistence configuration.'), schema={ PERSISTENCE_TYPE: schema.String( _('Type of session persistence implementation.'), constraints=[ constraints.AllowedValues(PERSISTENCE_TYPES), ], ), COOKIE_NAME: schema.String( _('Name of cookie if type set to APP_COOKIE.'), ), }, default={}, ), }, ), VIP: schema.Map( _('VIP address and port of the pool.'), schema={ VIP_SUBNET: schema.String( _('Name or ID of Subnet on which the VIP address will be ' 'allocated.'), required=True, ), VIP_ADDRESS: schema.String( _('IP address of the VIP.'), default=None, ), VIP_CONNECTION_LIMIT: schema.Integer( _('Maximum number of connections per second allowed for ' 'this VIP'), default=-1, ), VIP_PROTOCOL: schema.String( _('Protocol used for VIP.'), constraints=[ constraints.AllowedValues(PROTOCOLS), ], default=HTTP, ), VIP_PROTOCOL_PORT: schema.Integer( _('TCP port to listen on.'), default=80, ), VIP_ADMIN_STATE_UP: schema.Boolean( _('Administrative state of the VIP.'), default=True, ), }, ), } def __init__(self, name, spec, **kwargs): super(LoadBalancingPolicy, self).__init__(name, spec, **kwargs) self.pool_spec = self.properties.get(self.POOL, {}) self.vip_spec = self.properties.get(self.VIP, {}) self.validate() self.lb = None def validate(self): super(LoadBalancingPolicy, self).validate() # validate subnet's exists # subnet = self.nc.subnet_get(vip[self.VIP_SUBNET]) def attach(self, cluster): """Routine to be invoked when policy is to be attached to a cluster. :param cluster: The target cluster to be attached to; :returns: When the operation was successful, returns a tuple (True, message); otherwise, return a tuple (False, error). """ res, data = super(LoadBalancingPolicy, self).attach(cluster) if res is False: return False, data nodes = node_mod.Node.load_all(oslo_context.get_current(), cluster_id=cluster.id) params = self._build_conn_params(cluster) lb_driver = driver_base.SenlinDriver().loadbalancing(params) res, data = lb_driver.lb_create(self.vip_spec, self.pool_spec) if res is False: return False, data port = self.pool_spec.get(self.POOL_PROTOCOL_PORT) subnet = self.pool_spec.get(self.POOL_SUBNET) for node in nodes: member_id = lb_driver.member_add(node, data['loadbalancer'], data['pool'], port, subnet) if member_id is None: # When failed in adding member, remove all lb resources that # were created and return the failure reason. # TODO(Yanyan Hu): Maybe we should tolerate member adding # failure and allow policy attaching to succeed without # all nodes being added into lb pool? lb_driver.lb_delete(**data) return False, 'Failed in adding node into lb pool' node.data.update({'lb_member': member_id}) node.store(oslo_context.get_current()) policy_data = self._build_policy_data(data) return True, policy_data def detach(self, cluster): """Routine to be called when the policy is detached from a cluster. :param cluster: The cluster from which the policy is to be detached. :returns: When the operation was successful, returns a tuple of (True, data) where the data contains references to the resources created; otherwise returns a tuple of (False, err) where the err contains a error message. """ reason = _('LB resources deletion succeeded.') params = self._build_conn_params(cluster) lb_driver = driver_base.SenlinDriver().loadbalancing(params) cp = cluster_policy.ClusterPolicy.load(oslo_context.get_current(), cluster.id, self.id) policy_data = self._extract_policy_data(cp.data) if policy_data is None: return True, reason res, reason = lb_driver.lb_delete(**policy_data) if res is False: return False, reason nodes = node_mod.Node.load_all(oslo_context.get_current(), cluster_id=cluster.id) for node in nodes: if 'lb_member' in node.data: node.data.pop('lb_member') node.store(oslo_context.get_current()) return True, reason def post_op(self, cluster_id, action): """Routine to be called after an action has been executed. For this particular policy, we take this chance to update the pool maintained by the load-balancer. :param cluster_id: The ID of the cluster on which a relevant action has been executed. :param action: The action object that triggered this operation. :returns: Nothing. """ nodes_added = action.outputs.get('nodes_added', []) nodes_removed = action.outputs.get('nodes_removed', []) if ((len(nodes_added) == 0) and (len(nodes_removed) == 0)): return db_cluster = db_api.cluster_get(action.context, cluster_id) params = self._build_conn_params(db_cluster) lb_driver = driver_base.SenlinDriver().loadbalancing(params) cp = cluster_policy.ClusterPolicy.load(action.context, cluster_id, self.id) policy_data = self._extract_policy_data(cp.data) lb_id = policy_data['loadbalancer'] pool_id = policy_data['pool'] port = self.pool_spec.get(self.POOL_PROTOCOL_PORT) subnet = self.pool_spec.get(self.POOL_SUBNET) # Remove nodes that have been deleted from lb pool for node_id in nodes_removed: node = node_mod.Node.load(action.context, node_id=node_id, show_deleted=True) member_id = node.data.get('lb_member', None) if member_id is None: LOG.warning(_LW('Node %(n)s not found in lb pool %(p)s.'), {'n': node_id, 'p': pool_id}) continue res = lb_driver.member_remove(lb_id, pool_id, member_id) if res is not True: action.data['status'] = base.CHECK_ERROR action.data['reason'] = _('Failed in removing deleted ' 'node(s) from lb pool.') return # Add new nodes to lb pool for node_id in nodes_added: node = node_mod.Node.load(action.context, node_id=node_id, show_deleted=True) member_id = node.data.get('lb_member', None) if member_id: LOG.warning(_LW('Node %(n)s already in lb pool %(p)s.'), {'n': node_id, 'p': pool_id}) continue member_id = lb_driver.member_add(node, lb_id, pool_id, port, subnet) if member_id is None: action.data['status'] = base.CHECK_ERROR action.data['reason'] = _('Failed in adding new node(s) ' 'into lb pool.') return node.data.update({'lb_member': member_id}) node.store(action.context) return
class StackProfile(base.Profile): '''Profile for an OpenStack Heat stack. When this profile is used, the whole cluster is a collection of Heat stacks. ''' KEYS = ( TEMPLATE, CONTEXT, PARAMETERS, FILES, TIMEOUT, DISABLE_ROLLBACK, ENVIRONMENT, ) = ( 'template', 'context', 'parameters', 'files', 'timeout', 'disable_rollback', 'environment', ) properties_schema = { CONTEXT: schema.Map( _('A dictionary for specifying the customized context for ' 'stack operations'), default={}, ), TEMPLATE: schema.Map( _('Heat stack template.'), required=True, updatable=True, ), PARAMETERS: schema.Map( _('Parameters to be passed to Heat for stack operations.'), default={}, updatable=True, ), FILES: schema.Map( _('Contents of files referenced by the template, if any.'), default={}, updatable=True, ), TIMEOUT: schema.Integer( _('A integer that specifies the number of minutes that a ' 'stack operation times out.'), updatable=True, ), DISABLE_ROLLBACK: schema.Boolean( _('A boolean specifying whether a stack operation can be ' 'rolled back.'), default=True, updatable=True, ), ENVIRONMENT: schema.Map( _('A map that specifies the environment used for stack ' 'operations.'), default={}, updatable=True, ) } OP_NAMES = (OP_ABANDON, ) = ('abandon', ) OPERATIONS = {OP_ABANDON: schema.Map(_('Abandon a heat stack node.'), )} def __init__(self, type_name, name, **kwargs): super(StackProfile, self).__init__(type_name, name, **kwargs) self.hc = None self.stack_id = None def heat(self, obj): '''Construct heat client using the combined context.''' if self.hc: return self.hc params = self._build_conn_params(obj.user, obj.project) self.hc = driver_base.SenlinDriver().orchestration(params) return self.hc def do_validate(self, obj): '''Validate if the spec has provided info for stack creation.''' kwargs = { 'stack_name': obj.name, 'template': self.properties[self.TEMPLATE], 'timeout_mins': self.properties[self.TIMEOUT], 'disable_rollback': self.properties[self.DISABLE_ROLLBACK], 'parameters': self.properties[self.PARAMETERS], 'files': self.properties[self.FILES], 'environment': self.properties[self.ENVIRONMENT], } try: self.heat(obj).stacks.validate(**kwargs) except Exception as ex: msg = _('Failed validate stack template due to ' '"%s"') % six.text_type(ex) raise exception.InvalidSpec(message=msg) return True def _check_action_complete(self, obj, action): stack = self.heat(obj).stack_get(self.stack_id) status = stack.status.split('_', 1) if status[0] == action: if status[1] == 'IN_PROGRESS': return False if status[1] == 'COMPLETE': return True raise exception.ResourceStatusError(resource_id=self.stack_id, status=stack.status, reason=stack.status_reason) else: return False def do_create(self, obj): '''Create a stack using the given profile.''' kwargs = { 'stack_name': obj.name + '-' + utils.random_name(8), 'template': self.properties[self.TEMPLATE], 'timeout_mins': self.properties[self.TIMEOUT], 'disable_rollback': self.properties[self.DISABLE_ROLLBACK], 'parameters': self.properties[self.PARAMETERS], 'files': self.properties[self.FILES], 'environment': self.properties[self.ENVIRONMENT], } LOG.info('Creating stack: %s' % kwargs) stack = self.heat(obj).stack_create(**kwargs) self.stack_id = stack.id # Wait for action to complete/fail while not self._check_action_complete(obj, 'CREATE'): scheduler.sleep(1) return stack.id def do_delete(self, obj): self.stack_id = obj.physical_id try: self.heat(obj).stack_delete(self.stack_id, True) self.heat(obj).wait_for_stack_delete(self.stack_id) except Exception as ex: LOG.error('Error: %s' % six.text_type(ex)) raise ex return True def do_update(self, obj, new_profile, **params): '''Perform update on object. :param obj: the node object to operate on :param new_profile: the new profile used for updating :param params: other parametes for the update request. ''' self.stack_id = obj.physical_id if not self.stack_id: return True if not self.validate_for_update(new_profile): return False fields = {} new_template = new_profile.properties[new_profile.TEMPLATE] if new_template != self.properties[self.TEMPLATE]: fields['template'] = new_template new_params = new_profile.properties[new_profile.PARAMETERS] if new_params != self.properties[self.PARAMETERS]: fields['parameters'] = new_params new_timeout = new_profile.properties[new_profile.TIMEOUT] if new_timeout != self.properties[self.TIMEOUT]: fields['timeout_mins'] = new_timeout new_dr = new_profile.properties[new_profile.DISABLE_ROLLBACK] if new_dr != self.properties[self.DISABLE_ROLLBACK]: fields['disable_rollback'] = new_dr new_files = new_profile.properties[new_profile.FILES] if new_files != self.properties[self.FILES]: fields['files'] = new_files new_environment = new_profile.properties[new_profile.ENVIRONMENT] if new_environment != self.properties[self.ENVIRONMENT]: fields['environment'] = new_environment if fields: try: self.heat(obj).stack_update(self.stack_id, **fields) except Exception as ex: LOG.exception(_('Failed in updating stack: %s'), six.text_type(ex)) return False # Wait for action to complete/fail while not self._check_action_complete(obj, 'UPDATE'): scheduler.sleep(1) return True def do_check(self, obj): """Check stack status.""" hc = self.heat(obj) try: stack = hc.stack_get(obj.physical_id) except Exception as ex: raise ex # When the stack is in a status which can't be checked( # CREATE_IN_PROGRESS, DELETE_IN_PROGRESS, etc), return False. try: stack.check(hc.session) except Exception: return False status = stack.status while status == 'CHECK_IN_PROGRESS': status = hc.stack_get(obj.physical_id).status if status == 'CHECK_COMPLETE': return True else: return False def do_get_details(self, obj): if obj.physical_id is None or obj.physical_id == '': return {} return self.heat(obj).stack_get(obj.physical_id) def handle_abandon(self, obj, **options): """Handler for abandoning a heat stack node.""" pass
class ServerProfile(base.Profile): """Profile for an OpenStack Nova server.""" KEYS = ( CONTEXT, ADMIN_PASS, AUTO_DISK_CONFIG, AVAILABILITY_ZONE, BLOCK_DEVICE_MAPPING_V2, CONFIG_DRIVE, FLAVOR, IMAGE, KEY_NAME, METADATA, NAME, NETWORKS, PERSONALITY, SECURITY_GROUPS, USER_DATA, SCHEDULER_HINTS, ) = ( 'context', 'admin_pass', 'auto_disk_config', 'availability_zone', 'block_device_mapping_v2', 'config_drive', 'flavor', 'image', 'key_name', 'metadata', 'name', 'networks', 'personality', 'security_groups', 'user_data', 'scheduler_hints', ) BDM2_KEYS = ( BDM2_UUID, BDM2_SOURCE_TYPE, BDM2_DESTINATION_TYPE, BDM2_DISK_BUS, BDM2_DEVICE_NAME, BDM2_VOLUME_SIZE, BDM2_GUEST_FORMAT, BDM2_BOOT_INDEX, BDM2_DEVICE_TYPE, BDM2_DELETE_ON_TERMINATION, ) = ( 'uuid', 'source_type', 'destination_type', 'disk_bus', 'device_name', 'volume_size', 'guest_format', 'boot_index', 'device_type', 'delete_on_termination', ) NETWORK_KEYS = ( PORT, FIXED_IP, NETWORK, ) = ( 'port', 'fixed_ip', 'network', ) PERSONALITY_KEYS = ( PATH, CONTENTS, ) = ( 'path', 'contents', ) SCHEDULER_HINTS_KEYS = (GROUP, ) = ('group', ) properties_schema = { CONTEXT: schema.Map(_('Customized security context for operating servers.'), ), ADMIN_PASS: schema.String(_('Password for the administrator account.'), ), AUTO_DISK_CONFIG: schema.Boolean( _('Whether the disk partition is done automatically.'), default=True, ), AVAILABILITY_ZONE: schema.String( _('Name of availability zone for running the server.'), ), BLOCK_DEVICE_MAPPING_V2: schema.List( _('A list specifying the properties of block devices to be used ' 'for this server.'), schema=schema.Map( _('A map specifying the properties of a block device to be ' 'used by the server.'), schema={ BDM2_UUID: schema.String( _('ID of the source image, snapshot or volume'), ), BDM2_SOURCE_TYPE: schema.String( _('Volume source type, should be image, snapshot, ' 'volume or blank'), required=True, ), BDM2_DESTINATION_TYPE: schema.String( _('Volume destination type, should be volume or ' 'local'), required=True, ), BDM2_DISK_BUS: schema.String(_('Bus of the device.'), ), BDM2_DEVICE_NAME: schema.String( _('Name of the device(e.g. vda, xda, ....).'), ), BDM2_VOLUME_SIZE: schema.Integer( _('Size of the block device in MB(for swap) and ' 'in GB(for other formats)'), required=True, ), BDM2_GUEST_FORMAT: schema.String( _('Specifies the disk file system format(e.g. swap, ' 'ephemeral, ...).'), ), BDM2_BOOT_INDEX: schema.Integer(_('Define the boot order of the device'), ), BDM2_DEVICE_TYPE: schema.String( _('Type of the device(e.g. disk, cdrom, ...).'), ), BDM2_DELETE_ON_TERMINATION: schema.Boolean( _('Whether to delete the volume when the server ' 'stops.'), ), }), ), CONFIG_DRIVE: schema.Boolean( _('Whether config drive should be enabled for the server.'), ), FLAVOR: schema.String( _('ID of flavor used for the server.'), required=True, updatable=True, ), IMAGE: schema.String( # IMAGE is not required, because there could be BDM or BDMv2 # support and the corresponding settings effective _('ID of image to be used for the new server.'), updatable=True, ), KEY_NAME: schema.String(_('Name of Nova keypair to be injected to server.'), ), METADATA: schema.Map( _('A collection of key/value pairs to be associated with the ' 'server created. Both key and value should be <=255 chars.'), updatable=True, ), NAME: schema.String( _('Name of the server. When omitted, the node name will be used.'), updatable=True, ), NETWORKS: schema.List( _('List of networks for the server.'), schema=schema.Map( _('A map specifying the properties of a network for uses.'), schema={ NETWORK: schema.String( _('Name or ID of network to create a port on.'), ), PORT: schema.String(_('Port ID to be used by the network.'), ), FIXED_IP: schema.String(_('Fixed IP to be used by the network.'), ), }, ), updatable=True, ), PERSONALITY: schema.List( _('List of files to be injected into the server, where each.'), schema=schema.Map( _('A map specifying the path & contents for an injected ' 'file.'), schema={ PATH: schema.String( _('In-instance path for the file to be injected.'), required=True, ), CONTENTS: schema.String( _('Contents of the file to be injected.'), required=True, ), }, ), ), SCHEDULER_HINTS: schema.Map( _('A collection of key/value pairs to be associated with the ' 'Scheduler hints. Both key and value should be <=255 chars.'), ), SECURITY_GROUPS: schema.List( _('List of security groups.'), schema=schema.String( _('Name of a security group'), required=True, ), ), USER_DATA: schema.String(_('User data to be exposed by the metadata server.'), ), } OP_NAMES = ( OP_REBOOT, OP_CHANGE_PASSWORD, ) = ( 'reboot', 'change_password', ) REBOOT_TYPE = 'type' REBOOT_TYPES = (REBOOT_SOFT, REBOOT_HARD) = ('SOFT', 'HARD') ADMIN_PASSWORD = '******' OPERATIONS = { OP_REBOOT: schema.Operation( _("Reboot the nova server."), schema={ REBOOT_TYPE: schema.StringParam( _("Type of reboot which can be 'SOFT' or 'HARD'."), default=REBOOT_SOFT, constraints=[ constraints.AllowedValues(REBOOT_TYPES), ]) }), OP_CHANGE_PASSWORD: schema.Operation(_("Change the administrator password."), schema={ ADMIN_PASSWORD: schema.StringParam( _("New password for the administrator.")) }), } def __init__(self, type_name, name, **kwargs): super(ServerProfile, self).__init__(type_name, name, **kwargs) self.server_id = None def _validate_az(self, obj, az_name, reason=None): try: res = self.compute(obj).validate_azs([az_name]) except exc.InternalError as ex: if reason == 'create': raise exc.EResourceCreation(type='server', message=six.text_type(ex)) else: raise if not res: msg = _("The specified %(key)s '%(value)s' could not be found") % { 'key': self.AVAILABILITY_ZONE, 'value': az_name } if reason == 'create': raise exc.EResourceCreation(type='server', message=msg) else: raise exc.InvalidSpec(message=msg) return az_name def _validate_flavor(self, obj, name_or_id, reason=None): flavor = None msg = '' try: flavor = self.compute(obj).flavor_find(name_or_id, False) except exc.InternalError as ex: msg = six.text_type(ex) if reason is None: # reaons is 'validate' if ex.code == 404: msg = _( "The specified %(k)s '%(v)s' could not be found.") % { 'k': self.FLAVOR, 'v': name_or_id } raise exc.InvalidSpec(message=msg) else: raise if flavor is not None: if not flavor.is_disabled: return flavor msg = _("The specified %(k)s '%(v)s' is disabled") % { 'k': self.FLAVOR, 'v': name_or_id } if reason == 'create': raise exc.EResourceCreation(type='server', message=msg) elif reason == 'update': raise exc.EResourceUpdate(type='server', id=obj.physical_id, message=msg) else: raise exc.InvalidSpec(message=msg) def _validate_image(self, obj, name_or_id, reason=None): try: return self.compute(obj).image_find(name_or_id, False) except exc.InternalError as ex: if reason == 'create': raise exc.EResourceCreation(type='server', message=six.text_type(ex)) elif reason == 'update': raise exc.EResourceUpdate(type='server', id=obj.physical_id, message=six.text_type(ex)) elif ex.code == 404: msg = _("The specified %(k)s '%(v)s' could not be found.") % { 'k': self.IMAGE, 'v': name_or_id } raise exc.InvalidSpec(message=msg) else: raise def _validate_keypair(self, obj, name_or_id, reason=None): try: return self.compute(obj).keypair_find(name_or_id, False) except exc.InternalError as ex: if reason == 'create': raise exc.EResourceCreation(type='server', message=six.text_type(ex)) elif reason == 'update': raise exc.EResourceUpdate(type='server', id=obj.physical_id, message=six.text_type(ex)) elif ex.code == 404: msg = _("The specified %(k)s '%(v)s' could not be found.") % { 'k': self.KEY_NAME, 'v': name_or_id } raise exc.InvalidSpec(message=msg) else: raise def do_validate(self, obj): """Validate if the spec has provided valid info for server creation. :param obj: The node object. """ # validate availability_zone az_name = self.properties[self.AVAILABILITY_ZONE] if az_name is not None: self._validate_az(obj, az_name) # validate flavor flavor = self.properties[self.FLAVOR] self._validate_flavor(obj, flavor) # validate image image = self.properties[self.IMAGE] if image is not None: self._validate_image(obj, image) # validate key_name keypair = self.properties[self.KEY_NAME] if keypair is not None: self._validate_keypair(obj, keypair) # validate networks networks = self.properties[self.NETWORKS] for net in networks: self._validate_network(obj, net) return True def _resolve_bdm(self, bdm): for bd in bdm: for key in self.BDM2_KEYS: if bd[key] is None: del bd[key] return bdm def _validate_network(self, obj, network, reason=None): result = {} error = None # check network net_ident = network.get(self.NETWORK) if net_ident: try: net = self.network(obj).network_get(net_ident) if reason == 'update': result['net_id'] = net.id else: result['uuid'] = net.id except exc.InternalError as ex: error = six.text_type(ex) # check port port_ident = network.get(self.PORT) if not error and port_ident: try: port = self.network(obj).port_find(port_ident) if port.status != 'DOWN': error = _( "The status of the port %(port)s must be DOWN") % { 'port': port_ident } if reason == 'update': result['port_id'] = port.id else: result['port'] = port.id except exc.InternalError as ex: error = six.text_type(ex) elif port_ident is None and net_ident is None: error = _("'%(port)s' is required if '%(net)s' is omitted") % { 'port': self.PORT, 'net': self.NETWORK } fixed_ip = network.get(self.FIXED_IP) if not error and fixed_ip: if port_ident is not None: error = _("The '%(port)s' property and the '%(fixed_ip)s' " "property cannot be specified at the same time") % { 'port': self.PORT, 'fixed_ip': self.FIXED_IP } else: if reason == 'update': result['fixed_ips'] = [{'ip_address': fixed_ip}] else: result['fixed_ip'] = fixed_ip if error: if reason == 'create': raise exc.EResourceCreation(type='server', message=error) elif reason == 'update': raise exc.EResourceUpdate(type='server', id=obj.physical_id, message=error) else: raise exc.InvalidSpec(message=error) return result def _build_metadata(self, obj, usermeta): """Build custom metadata for server. :param obj: The node object to operate on. :return: A dictionary containing the new metadata. """ metadata = usermeta or {} metadata['cluster_node_id'] = obj.id if obj.cluster_id: metadata['cluster_id'] = obj.cluster_id metadata['cluster_node_index'] = six.text_type(obj.index) return metadata def do_create(self, obj): """Create a server for the node object. :param obj: The node object for which a server will be created. """ kwargs = {} for key in self.KEYS: # context is treated as connection parameters if key == self.CONTEXT: continue if self.properties[key] is not None: kwargs[key] = self.properties[key] admin_pass = self.properties[self.ADMIN_PASS] if admin_pass: kwargs.pop(self.ADMIN_PASS) kwargs['adminPass'] = admin_pass auto_disk_config = self.properties[self.AUTO_DISK_CONFIG] kwargs.pop(self.AUTO_DISK_CONFIG) kwargs['OS-DCF:diskConfig'] = 'AUTO' if auto_disk_config else 'MANUAL' image_ident = self.properties[self.IMAGE] if image_ident is not None: image = self._validate_image(obj, image_ident, 'create') kwargs.pop(self.IMAGE) kwargs['imageRef'] = image.id flavor_ident = self.properties[self.FLAVOR] flavor = self._validate_flavor(obj, flavor_ident, 'create') kwargs.pop(self.FLAVOR) kwargs['flavorRef'] = flavor.id keypair_name = self.properties[self.KEY_NAME] if keypair_name: keypair = self._validate_keypair(obj, keypair_name, 'create') kwargs['key_name'] = keypair.name kwargs['name'] = self.properties[self.NAME] or obj.name metadata = self._build_metadata(obj, self.properties[self.METADATA]) kwargs['metadata'] = metadata block_device_mapping_v2 = self.properties[self.BLOCK_DEVICE_MAPPING_V2] if block_device_mapping_v2 is not None: kwargs['block_device_mapping_v2'] = self._resolve_bdm( block_device_mapping_v2) user_data = self.properties[self.USER_DATA] if user_data is not None: ud = encodeutils.safe_encode(user_data) kwargs['user_data'] = encodeutils.safe_decode(base64.b64encode(ud)) networks = self.properties[self.NETWORKS] if networks is not None: kwargs['networks'] = [] for net_spec in networks: net = self._validate_network(obj, net_spec, 'create') kwargs['networks'].append(net) secgroups = self.properties[self.SECURITY_GROUPS] if secgroups: kwargs['security_groups'] = [{'name': sg} for sg in secgroups] if 'placement' in obj.data: if 'zone' in obj.data['placement']: kwargs['availability_zone'] = obj.data['placement']['zone'] if 'servergroup' in obj.data['placement']: group_id = obj.data['placement']['servergroup'] hints = self.properties.get(self.SCHEDULER_HINTS, {}) hints.update({'group': group_id}) kwargs['scheduler_hints'] = hints server = None resource_id = 'UNKNOWN' try: server = self.compute(obj).server_create(**kwargs) self.compute(obj).wait_for_server(server.id) return server.id except exc.InternalError as ex: if server and server.id: resource_id = server.id raise exc.EResourceCreation(type='server', message=ex.message, resource_id=resource_id) def do_delete(self, obj, **params): """Delete the physical resource associated with the specified node. :param obj: The node object to operate on. :param kwargs params: Optional keyword arguments for the delete operation. :returns: This operation always return True unless exception is caught. :raises: `EResourceDeletion` if interaction with compute service fails. """ if not obj.physical_id: return True server_id = obj.physical_id ignore_missing = params.get('ignore_missing', True) force = params.get('force', False) try: driver = self.compute(obj) if force: driver.server_force_delete(server_id, ignore_missing) else: driver.server_delete(server_id, ignore_missing) driver.wait_for_server_delete(server_id) return True except exc.InternalError as ex: raise exc.EResourceDeletion(type='server', id=server_id, message=six.text_type(ex)) def _check_server_name(self, obj, profile): """Check if there is a new name to be assigned to the server. :param obj: The node object to operate on. :param new_profile: The new profile which may contain a name for the server instance. :return: A tuple consisting a boolean indicating whether the name needs change and the server name determined. """ old_name = self.properties[self.NAME] or obj.name new_name = profile.properties[self.NAME] or obj.name if old_name == new_name: return False, new_name return True, new_name def _update_name(self, obj, new_name): """Update the name of the server. :param obj: The node object to operate. :param new_name: The new name for the server instance. :return: ``None``. :raises: ``EResourceUpdate``. """ try: self.compute(obj).server_update(obj.physical_id, name=new_name) except exc.InternalError as ex: raise exc.EResourceUpdate(type='server', id=obj.physical_id, message=six.text_type(ex)) def _check_password(self, obj, new_profile): """Check if the admin password has been changed in the new profile. :param obj: The server node to operate, not used currently. :param new_profile: The new profile which may contain a new password for the server instance. :return: A tuple consisting a boolean indicating whether the password needs a change and the password determined which could be '' if new password is not set. """ old_passwd = self.properties.get(self.ADMIN_PASS) or '' new_passwd = new_profile.properties[self.ADMIN_PASS] or '' if old_passwd == new_passwd: return False, new_passwd return True, new_passwd def _update_password(self, obj, new_password): """Update the admin password for the server. :param obj: The node object to operate. :param new_password: The new password for the server instance. :return: ``None``. :raises: ``EResourceUpdate``. """ try: self.compute(obj).server_change_password(obj.physical_id, new_password) except exc.InternalError as ex: raise exc.EResourceUpdate(type='server', id=obj.physical_id, message=six.text_type(ex)) def _update_metadata(self, obj, new_profile): """Update the server metadata. :param obj: The node object to operate on. :param new_profile: The new profile that may contain some changes to the metadata. :returns: ``None`` :raises: `EResourceUpdate`. """ old_meta = self._build_metadata(obj, self.properties[self.METADATA]) new_meta = self._build_metadata(obj, new_profile.properties[self.METADATA]) if new_meta == old_meta: return try: self.compute(obj).server_metadata_update(obj.physical_id, new_meta) except exc.InternalError as ex: raise exc.EResourceUpdate(type='server', id=obj.physical_id, message=six.text_type(ex)) def _update_flavor(self, obj, new_profile): """Update server flavor. :param obj: The node object to operate on. :param old_flavor: The identity of the current flavor. :param new_flavor: The identity of the new flavor. :returns: ``None``. :raises: `EResourceUpdate` when operation was a failure. """ old_flavor = self.properties[self.FLAVOR] new_flavor = new_profile.properties[self.FLAVOR] cc = self.compute(obj) oldflavor = self._validate_flavor(obj, old_flavor, 'update') newflavor = self._validate_flavor(obj, new_flavor, 'update') if oldflavor.id == newflavor.id: return try: cc.server_resize(obj.physical_id, newflavor.id) cc.wait_for_server(obj.physical_id, 'VERIFY_RESIZE') except exc.InternalError as ex: msg = six.text_type(ex) try: cc.server_resize_revert(obj.physical_id) cc.wait_for_server(obj.physical_id, 'ACTIVE') except exc.InternalError as ex1: msg = six.text_type(ex1) raise exc.EResourceUpdate(type='server', id=obj.physical_id, message=msg) try: cc.server_resize_confirm(obj.physical_id) cc.wait_for_server(obj.physical_id, 'ACTIVE') except exc.InternalError as ex: raise exc.EResourceUpdate(type='server', id=obj.physical_id, message=six.text_type(ex)) def _update_image(self, obj, new_profile, new_name, new_password): """Update image used by server node. :param obj: The node object to operate on. :param new_profile: The profile which may contain a new image name or ID to use. :param new_name: The name for the server node. :param newn_password: The new password for the administrative account if provided. :returns: A boolean indicating whether the image needs an update. :raises: ``InternalError`` if operation was a failure. """ old_image = self.properties[self.IMAGE] new_image = new_profile.properties[self.IMAGE] if not new_image: msg = _("Updating Nova server with image set to None is not " "supported by Nova") raise exc.EResourceUpdate(type='server', id=obj.physical_id, message=msg) # check the new image first img_new = self._validate_image(obj, new_image, reason='update') new_image_id = img_new.id driver = self.compute(obj) if old_image: img_old = self._validate_image(obj, old_image, reason='update') old_image_id = img_old.id else: try: server = driver.server_get(obj.physical_id) except exc.InternalError as ex: raise exc.EResourceUpdate(type='server', id=obj.physical_id, message=six.text_type(ex)) # Still, this 'old_image_id' could be empty, but it doesn't matter # because the comparison below would fail if that is the case old_image_id = server.image.get('id', None) if new_image_id == old_image_id: return False try: driver.server_rebuild(obj.physical_id, new_image_id, new_name, new_password) driver.wait_for_server(obj.physical_id, 'ACTIVE') except exc.InternalError as ex: raise exc.EResourceUpdate(type='server', id=obj.physical_id, message=six.text_type(ex)) return True def _create_interfaces(self, obj, networks): """Create new interfaces for the server node. :param obj: The node object to operate. :param networks: A list containing information about new network interfaces to be created. :returns: ``None``. :raises: ``EResourceUpdate`` if interaction with drivers failed. """ cc = self.compute(obj) try: server = cc.server_get(obj.physical_id) except exc.InternalError as ex: raise exc.EResourceUpdate(type='server', id=obj.physical_id, message=six.text_type(ex)) for net_spec in networks: net_attrs = self._validate_network(obj, net_spec, 'update') if net_attrs: try: cc.server_interface_create(server, **net_attrs) except exc.InternalError as ex: raise exc.EResourceUpdate(type='server', id=obj.physical_id, message=six.text_type(ex)) def _delete_interfaces(self, obj, networks): """Delete existing interfaces from the node. :param obj: The node object to operate. :param networks: A list containing information about network interfaces to be created. :returns: ``None`` :raises: ``EResourceUpdate`` """ def _get_network(nc, net_id, server_id): try: net = nc.network_get(net_id) return net.id except exc.InternalError as ex: raise exc.EResourceUpdate(type='server', id=server_id, message=six.text_type(ex)) def _do_delete(port_id, server_id): try: cc.server_interface_delete(port_id, server_id) except exc.InternalError as ex: raise exc.EResourceUpdate(type='server', id=server_id, message=six.text_type(ex)) cc = self.compute(obj) nc = self.network(obj) try: existing = list(cc.server_interface_list(obj.physical_id)) except exc.InternalError as ex: raise exc.EResourceUpdate(type='server', id=obj.physical_id, message=six.text_type(ex)) ports = [] for intf in existing: fixed_ips = [addr['ip_address'] for addr in intf.fixed_ips] ports.append({ 'id': intf.port_id, 'net': intf.net_id, 'ips': fixed_ips }) for n in networks: network = n.get('network', None) port = n.get('port', None) fixed_ip = n.get('fixed_ip', None) if port: for p in ports: if p['id'] == port: ports.remove(p) _do_delete(port, obj.physical_id) elif fixed_ip: net_id = _get_network(nc, network, obj.physical_id) for p in ports: if (fixed_ip in p['ips'] and net_id == p['net']): ports.remove(p) _do_delete(p['id'], obj.physical_id) elif port is None and fixed_ip is None: net_id = _get_network(nc, network, obj.physical_id) for p in ports: if p['net'] == net_id: ports.remove(p) _do_delete(p['id'], obj.physical_id) def _update_network(self, obj, new_profile): """Updating server network interfaces. :param obj: The node object to operate. :param new_profile: The new profile which may contain new network settings. :return: ``None`` :raises: ``EResourceUpdate`` if there are driver failures. """ networks_current = self.properties[self.NETWORKS] networks_create = new_profile.properties[self.NETWORKS] networks_delete = copy.deepcopy(networks_current) for network in networks_current: if network in networks_create: networks_create.remove(network) networks_delete.remove(network) # Detach some existing interfaces if networks_delete: self._delete_interfaces(obj, networks_delete) # Attach new interfaces if networks_create: self._create_interfaces(obj, networks_create) return def do_update(self, obj, new_profile=None, **params): """Perform update on the server. :param obj: the server to operate on :param new_profile: the new profile for the server. :param params: a dictionary of optional parameters. :returns: True if update was successful or False otherwise. :raises: `EResourceUpdate` if operation fails. """ self.server_id = obj.physical_id if not self.server_id: return False if not new_profile: return False if not self.validate_for_update(new_profile): return False name_changed, new_name = self._check_server_name(obj, new_profile) passwd_changed, new_passwd = self._check_password(obj, new_profile) # Update server image: may have side effect of changing server name # and/or admin password image_changed = self._update_image(obj, new_profile, new_name, new_passwd) if not image_changed: # we do this separately only when rebuild wasn't performed if name_changed: self._update_name(obj, new_name) if passwd_changed: self._update_password(obj, new_passwd) # Update server flavor: note that flavor is a required property self._update_flavor(obj, new_profile) self._update_network(obj, new_profile) # TODO(Yanyan Hu): Update block_device properties # Update server metadata self._update_metadata(obj, new_profile) return True def do_get_details(self, obj): known_keys = { 'OS-DCF:diskConfig', 'OS-EXT-AZ:availability_zone', 'OS-EXT-STS:power_state', 'OS-EXT-STS:vm_state', 'accessIPv4', 'accessIPv6', 'config_drive', 'created', 'hostId', 'id', 'key_name', 'locked', 'metadata', 'name', 'os-extended-volumes:volumes_attached', 'progress', 'status', 'updated' } if obj.physical_id is None or obj.physical_id == '': return {} driver = self.compute(obj) try: server = driver.server_get(obj.physical_id) except exc.InternalError as ex: return {'Error': {'code': ex.code, 'message': six.text_type(ex)}} if server is None: return {} server_data = server.to_dict() details = { 'image': server_data['image']['id'], 'flavor': server_data['flavor']['id'], } for key in known_keys: if key in server_data: details[key] = server_data[key] # process special keys like 'OS-EXT-STS:task_state': these keys have # a default value '-' when not existing special_keys = [ 'OS-EXT-STS:task_state', 'OS-SRV-USG:launched_at', 'OS-SRV-USG:terminated_at', ] for key in special_keys: if key in server_data: val = server_data[key] details[key] = val if val else '-' # process network addresses details['addresses'] = copy.deepcopy(server_data['addresses']) # process security groups sgroups = [] if 'security_groups' in server_data: for sg in server_data['security_groups']: sgroups.append(sg['name']) if len(sgroups) == 0: details['security_groups'] = '' elif len(sgroups) == 1: details['security_groups'] = sgroups[0] else: details['security_groups'] = sgroups return dict((k, details[k]) for k in sorted(details)) def do_join(self, obj, cluster_id): if not obj.physical_id: return False driver = self.compute(obj) metadata = driver.server_metadata_get(obj.physical_id) or {} metadata['cluster_id'] = cluster_id metadata['cluster_node_index'] = six.text_type(obj.index) driver.server_metadata_update(obj.physical_id, metadata) return super(ServerProfile, self).do_join(obj, cluster_id) def do_leave(self, obj): if not obj.physical_id: return False keys = ['cluster_id', 'cluster_node_index'] self.compute(obj).server_metadata_delete(obj.physical_id, keys) return super(ServerProfile, self).do_leave(obj) def do_rebuild(self, obj): if not obj.physical_id: return False self.server_id = obj.physical_id driver = self.compute(obj) try: server = driver.server_get(self.server_id) except exc.InternalError as ex: raise exc.EResourceOperation(op='rebuilding', type='server', id=self.server_id, message=six.text_type(ex)) if server is None or server.image is None: return False image_id = server.image['id'] admin_pass = self.properties.get(self.ADMIN_PASS) try: driver.server_rebuild(self.server_id, image_id, self.properties.get(self.NAME), admin_pass) driver.wait_for_server(self.server_id, 'ACTIVE') except exc.InternalError as ex: raise exc.EResourceOperation(op='rebuilding', type='server', id=self.server_id, message=six.text_type(ex)) return True def do_check(self, obj): if not obj.physical_id: return False try: server = self.compute(obj).server_get(obj.physical_id) except exc.InternalError as ex: raise exc.EResourceOperation(op='checking', type='server', id=obj.physical_id, message=six.text_type(ex)) if (server is None or server.status != 'ACTIVE'): return False return True def do_recover(self, obj, **options): # NOTE: We do a 'get' not a 'pop' here, because the operations may # get fall back to the base class for handling operation = options.get('operation', None) if operation and not isinstance(operation, six.string_types): operation = operation[0] # TODO(Qiming): Handle the case that the operation contains other # alternative recover operation # Depends-On: https://review.openstack.org/#/c/359676/ if operation == 'REBUILD': return self.do_rebuild(obj) return super(ServerProfile, self).do_recover(obj, **options) def handle_reboot(self, obj, **options): """Handler for the reboot operation.""" if not obj.physical_id: return False reboot_type = options.get(self.REBOOT_TYPE, self.REBOOT_SOFT) if (not isinstance(reboot_type, six.string_types) or reboot_type not in self.REBOOT_TYPES): return False self.compute(obj).server_reboot(obj.physical_id, reboot_type) self.compute(obj).wait_for_server(obj.physical_id, 'ACTIVE') return True def handle_change_password(self, obj, **options): """Handler for the change_password operation.""" if not obj.physical_id: return False password = options.get(self.ADMIN_PASSWORD, None) if (password is None or not isinstance(password, six.string_types)): return False self.compute(obj).server_change_password(obj.physical_id, password) return True
class StackProfile(base.Profile): """Profile for an OpenStack Heat stack.""" VERSIONS = { '1.0': [ {'status': consts.SUPPORTED, 'since': '2016.04'} ] } KEYS = ( CONTEXT, TEMPLATE, TEMPLATE_URL, PARAMETERS, FILES, TIMEOUT, DISABLE_ROLLBACK, ENVIRONMENT, ) = ( 'context', 'template', 'template_url', 'parameters', 'files', 'timeout', 'disable_rollback', 'environment', ) properties_schema = { CONTEXT: schema.Map( _('A dictionary for specifying the customized context for ' 'stack operations'), default={}, ), TEMPLATE: schema.Map( _('Heat stack template.'), default={}, updatable=True, ), TEMPLATE_URL: schema.String( _('Heat stack template url.'), default='', updatable=True, ), PARAMETERS: schema.Map( _('Parameters to be passed to Heat for stack operations.'), default={}, updatable=True, ), FILES: schema.Map( _('Contents of files referenced by the template, if any.'), default={}, updatable=True, ), TIMEOUT: schema.Integer( _('A integer that specifies the number of minutes that a ' 'stack operation times out.'), updatable=True, ), DISABLE_ROLLBACK: schema.Boolean( _('A boolean specifying whether a stack operation can be ' 'rolled back.'), default=True, updatable=True, ), ENVIRONMENT: schema.Map( _('A map that specifies the environment used for stack ' 'operations.'), default={}, updatable=True, ) } OP_NAMES = ( OP_ABANDON, ) = ( 'abandon', ) OPERATIONS = { OP_ABANDON: schema.Map( _('Abandon a heat stack node.'), ) } def __init__(self, type_name, name, **kwargs): super(StackProfile, self).__init__(type_name, name, **kwargs) self.stack_id = None def validate(self, validate_props=False): '''Validate the schema and the data provided.''' # general validation self.spec_data.validate() self.properties.validate() # validate template template = self.properties[self.TEMPLATE] template_url = self.properties[self.TEMPLATE_URL] if not template and not template_url: msg = _("Both template and template_url are not specified " "for profile '%s'.") % self.name raise exc.InvalidSpec(message=msg) if validate_props: self.do_validate(obj=self) def do_validate(self, obj): """Validate the stack template used by a node. :param obj: Node object to operate. :returns: True if validation succeeds. :raises: `InvalidSpec` exception is raised if template is invalid. """ kwargs = { 'stack_name': utils.random_name(), 'template': self.properties[self.TEMPLATE], 'template_url': self.properties[self.TEMPLATE_URL], 'parameters': self.properties[self.PARAMETERS], 'files': self.properties[self.FILES], 'environment': self.properties[self.ENVIRONMENT], 'preview': True, } try: self.orchestration(obj).stack_create(**kwargs) except exc.InternalError as ex: msg = _('Failed in validating template: %s') % six.text_type(ex) raise exc.InvalidSpec(message=msg) return True def do_create(self, obj): """Create a heat stack using the given node object. :param obj: The node object to operate on. :returns: The UUID of the heat stack created. """ tags = ["cluster_node_id=%s" % obj.id] if obj.cluster_id: tags.append('cluster_id=%s' % obj.cluster_id) tags.append('cluster_node_index=%s' % obj.index) kwargs = { 'stack_name': obj.name + '-' + utils.random_name(8), 'template': self.properties[self.TEMPLATE], 'template_url': self.properties[self.TEMPLATE_URL], 'timeout_mins': self.properties[self.TIMEOUT], 'disable_rollback': self.properties[self.DISABLE_ROLLBACK], 'parameters': self.properties[self.PARAMETERS], 'files': self.properties[self.FILES], 'environment': self.properties[self.ENVIRONMENT], 'tags': ",".join(tags) } try: stack = self.orchestration(obj).stack_create(**kwargs) # Timeout = None means we will use the 'default_action_timeout' # It can be overridden by the TIMEOUT profile properties timeout = None if self.properties[self.TIMEOUT]: timeout = self.properties[self.TIMEOUT] * 60 self.orchestration(obj).wait_for_stack(stack.id, 'CREATE_COMPLETE', timeout=timeout) return stack.id except exc.InternalError as ex: raise exc.EResourceCreation(type='stack', message=six.text_type(ex)) def do_delete(self, obj, **params): """Delete the physical stack behind the node object. :param obj: The node object to operate on. :param kwargs params: Optional keyword arguments for the delete operation. :returns: This operation always returns True unless exception is caught. :raises: `EResourceDeletion` if interaction with heat fails. """ stack_id = obj.physical_id if not stack_id: return True ignore_missing = params.get('ignore_missing', True) try: self.orchestration(obj).stack_delete(stack_id, ignore_missing) self.orchestration(obj).wait_for_stack_delete(stack_id) except exc.InternalError as ex: raise exc.EResourceDeletion(type='stack', id=stack_id, message=six.text_type(ex)) return True def do_update(self, obj, new_profile, **params): """Perform update on object. :param obj: the node object to operate on :param new_profile: the new profile used for updating :param params: other parameters for the update request. :returns: A boolean indicating whether the operation is successful. """ self.stack_id = obj.physical_id if not self.stack_id: return False if not self.validate_for_update(new_profile): return False fields = {} new_template = new_profile.properties[new_profile.TEMPLATE] if new_template != self.properties[self.TEMPLATE]: fields['template'] = new_template new_params = new_profile.properties[new_profile.PARAMETERS] if new_params != self.properties[self.PARAMETERS]: fields['parameters'] = new_params new_timeout = new_profile.properties[new_profile.TIMEOUT] if new_timeout != self.properties[self.TIMEOUT]: fields['timeout_mins'] = new_timeout new_dr = new_profile.properties[new_profile.DISABLE_ROLLBACK] if new_dr != self.properties[self.DISABLE_ROLLBACK]: fields['disable_rollback'] = new_dr new_files = new_profile.properties[new_profile.FILES] if new_files != self.properties[self.FILES]: fields['files'] = new_files new_environment = new_profile.properties[new_profile.ENVIRONMENT] if new_environment != self.properties[self.ENVIRONMENT]: fields['environment'] = new_environment if not fields: return True try: hc = self.orchestration(obj) # Timeout = None means we will use the 'default_action_timeout' # It can be overridden by the TIMEOUT profile properties timeout = None if self.properties[self.TIMEOUT]: timeout = self.properties[self.TIMEOUT] * 60 hc.stack_update(self.stack_id, **fields) hc.wait_for_stack(self.stack_id, 'UPDATE_COMPLETE', timeout=timeout) except exc.InternalError as ex: raise exc.EResourceUpdate(type='stack', id=self.stack_id, message=six.text_type(ex)) return True def do_check(self, obj): """Check stack status. :param obj: Node object to operate. :returns: True if check succeeded, or False otherwise. """ stack_id = obj.physical_id if stack_id is None: return False hc = self.orchestration(obj) try: # Timeout = None means we will use the 'default_action_timeout' # It can be overridden by the TIMEOUT profile properties timeout = None if self.properties[self.TIMEOUT]: timeout = self.properties[self.TIMEOUT] * 60 hc.stack_check(stack_id) hc.wait_for_stack(stack_id, 'CHECK_COMPLETE', timeout=timeout) except exc.InternalError as ex: raise exc.EResourceOperation(op='checking', type='stack', id=stack_id, message=six.text_type(ex)) return True def do_get_details(self, obj): if not obj.physical_id: return {} try: stack = self.orchestration(obj).stack_get(obj.physical_id) return stack.to_dict() except exc.InternalError as ex: return { 'Error': { 'code': ex.code, 'message': six.text_type(ex) } } def do_adopt(self, obj, overrides=None, snapshot=False): """Adopt an existing stack node for management. :param obj: A node object for this operation. It could be a puppet node that provides only 'user', 'project' and 'physical_id' properties when doing a preview. It can be a real Node object for node adoption. :param overrides: A dict containing the properties that will be overridden when generating a profile for the stack. :param snapshot: A boolean flag indicating whether the profile should attempt a snapshot operation before adopting the stack. If set to True, the ID of the snapshot will be used as the image ID. :returns: A dict containing the spec created from the stack object or a dict containing error information if failure occurred. """ driver = self.orchestration(obj) # TODO(Qiming): Add snapshot support # snapshot = driver.snapshot_create(...) try: stack = driver.stack_get(obj.physical_id) tmpl = driver.stack_get_template(obj.physical_id) env = driver.stack_get_environment(obj.physical_id) files = driver.stack_get_files(obj.physical_id) except exc.InternalError as ex: return {'Error': {'code': ex.code, 'message': six.text_type(ex)}} spec = { self.ENVIRONMENT: env.to_dict(), self.FILES: files, self.TEMPLATE: tmpl.to_dict(), self.PARAMETERS: dict((k, v) for k, v in stack.parameters.items() if k.find('OS::', 0) < 0), self.TIMEOUT: stack.timeout_mins, self.DISABLE_ROLLBACK: stack.is_rollback_disabled } if overrides: spec.update(overrides) return spec def _refresh_tags(self, current, node, add=False): """Refresh tag list. :param current: Current list of tags. :param node: The node object. :param add: Flag indicating whether new tags are added. :returns: (tags, updated) where tags contains a new list of tags and updated indicates whether new tag list differs from the old one. """ tags = [] for tag in current: if tag.find('cluster_id=') == 0: continue elif tag.find('cluster_node_id=') == 0: continue elif tag.find('cluster_node_index=') == 0: continue if tag.strip() != "": tags.append(tag.strip()) if add: tags.append('cluster_id=' + node.cluster_id) tags.append('cluster_node_id=' + node.id) tags.append('cluster_node_index=%s' % node.index) tag_str = ",".join(tags) return (tag_str, tags != current) def do_join(self, obj, cluster_id): if not obj.physical_id: return False hc = self.orchestration(obj) try: stack = hc.stack_get(obj.physical_id) tags, updated = self._refresh_tags(stack.tags, obj, True) field = {'tags': tags} if updated: hc.stack_update(obj.physical_id, **field) except exc.InternalError as ex: LOG.error('Failed in updating stack tags: %s.', ex) return False return True def do_leave(self, obj): if not obj.physical_id: return False hc = self.orchestration(obj) try: stack = hc.stack_get(obj.physical_id) tags, updated = self._refresh_tags(stack.tags, obj, False) field = {'tags': tags} if updated: hc.stack_update(obj.physical_id, **field) except exc.InternalError as ex: LOG.error('Failed in updating stack tags: %s.', ex) return False return True def handle_abandon(self, obj, **options): """Handler for abandoning a heat stack node.""" pass
def test_basic(self): sot = schema.Boolean('desc') self.assertEqual('Boolean', sot['type']) self.assertEqual('desc', sot['description'])
class DeletionPolicy(base.Policy): '''Policy for choosing victim node(s) from a cluster for deletion. This policy is enforced when nodes are to be removed from a cluster. It will yield an ordered list of candidates for deletion based on user specified criteria. ''' VERSION = '1.0' KEYS = ( CRITERIA, DESTROY_AFTER_DELETION, GRACE_PERIOD, REDUCE_DESIRED_CAPACITY, ) = ( 'criteria', 'destroy_after_deletion', 'grace_period', 'reduce_desired_capacity', ) CRITERIA_VALUES = ( OLDEST_FIRST, OLDEST_PROFILE_FIRST, YOUNGEST_FIRST, RANDOM, ) = ( 'OLDEST_FIRST', 'OLDEST_PROFILE_FRIST', 'YOUNGEST_FIRST', 'RANDOM', ) TARGET = [ ('BEFORE', consts.CLUSTER_SCALE_IN), ('BEFORE', consts.CLUSTER_DEL_NODES), ('BEFORE', consts.CLUSTER_RESIZE), ] PROFILE_TYPE = ['ANY'] properties_schema = { CRITERIA: schema.String(_('Criteria used in selecting candidates for deletion'), default=RANDOM, constraints=[ constraints.AllowedValues(CRITERIA_VALUES), ]), DESTROY_AFTER_DELETION: schema.Boolean( _('Whethere a node should be completely destroyed after ' 'deletion. Default to True'), default=True, ), GRACE_PERIOD: schema.Integer( _('Number of seconds before real deletion happens.'), default=0, ), REDUCE_DESIRED_CAPACITY: schema.Boolean( _('Whether the desired capacity of the cluster should be ' 'reduced along the deletion. Default to False.'), default=False, ) } def __init__(self, name, spec, **kwargs): super(DeletionPolicy, self).__init__(name, spec, **kwargs) self.criteria = self.properties[self.CRITERIA] self.grace_period = self.properties[self.GRACE_PERIOD] self.destroy_after_deletion = self.properties[ self.DESTROY_AFTER_DELETION] self.reduce_desired_capacity = self.properties[ self.REDUCE_DESIRED_CAPACITY] random.seed() def _select_candidates(self, context, cluster_id, count): candidates = [] nodes = db_api.node_get_all_by_cluster(context, cluster_id) if count > len(nodes): count = len(nodes) err_nodes = [n for n in nodes if n.status == 'ERROR'] nodes = [n for n in nodes if n.status != 'ERROR'] if count <= len(err_nodes): return [n.id for n in err_nodes[:count]] candidates.extend([n.id for n in err_nodes]) count -= len(err_nodes) # Random selection if self.criteria == self.RANDOM: i = count while i > 0: rand = random.randrange(i) candidates.append(nodes[rand].id) nodes.remove(nodes[rand]) i = i - 1 return candidates # Node age based selection if self.criteria in [self.OLDEST_FIRST, self.YOUNGEST_FIRST]: sorted_list = sorted(nodes, key=lambda r: (r.created_time, r.name)) for i in range(count): if self.criteria == self.OLDEST_FIRST: candidates.append(sorted_list[i].id) else: # YOUNGEST_FIRST candidates.append(sorted_list[-1 - i].id) return candidates # Node profile based selection node_map = [] for node in nodes: profile = db_api.profile_get(context, node.profile_id) created_at = profile.created_time node_map.append({'id': node.id, 'created_at': created_at}) sorted_map = sorted(node_map, key=lambda m: m['created_at']) for i in range(count): candidates.append(sorted_map[i]['id']) return candidates def pre_op(self, cluster_id, action): '''Choose victims that can be deleted.''' if action.action == consts.CLUSTER_RESIZE: cluster = db_api.cluster_get(action.context, cluster_id) scaleutils.parse_resize_params(action, cluster) if 'deletion' not in action.data: return count = action.data['deletion']['count'] else: # CLUSTER_SCALE_IN or CLUSTER_DEL_NODES count = action.inputs.get('count', 1) pd = action.data.get('deletion', {}) candidates = pd.get('candidates', []) # For certain operations ( e.g. DEL_NODES), the candidates might # have been specified if len(candidates) == 0: candidates = self._select_candidates(action.context, cluster_id, count) pd['candidates'] = candidates pd['destroy_after_deletion'] = self.destroy_after_deletion pd['grace_period'] = self.grace_period action.data.update({ 'status': base.CHECK_OK, 'reason': _('Candidates generated'), 'deletion': pd }) action.store(action.context) return
class ServerProfile(base.Profile): '''Profile for an OpenStack Nova server.''' KEYS = ( CONTEXT, ADMIN_PASS, AUTO_DISK_CONFIG, AVAILABILITY_ZONE, BLOCK_DEVICE_MAPPING, BLOCK_DEVICE_MAPPING_V2, CONFIG_DRIVE, FLAVOR, IMAGE, KEY_NAME, METADATA, NAME, NETWORKS, PERSONALITY, SECURITY_GROUPS, USER_DATA, SCHEDULER_HINTS, ) = ( 'context', 'adminPass', 'auto_disk_config', 'availability_zone', 'block_device_mapping', 'block_device_mapping_v2', 'config_drive', 'flavor', 'image', 'key_name', 'metadata', 'name', 'networks', 'personality', 'security_groups', 'user_data', 'scheduler_hints', ) BDM_KEYS = ( BDM_DEVICE_NAME, BDM_VOLUME_SIZE, ) = ( 'device_name', 'volume_size', ) BDM2_KEYS = ( BDM2_UUID, BDM2_SOURCE_TYPE, BDM2_DESTINATION_TYPE, BDM2_DISK_BUS, BDM2_DEVICE_NAME, BDM2_VOLUME_SIZE, BDM2_GUEST_FORMAT, BDM2_BOOT_INDEX, BDM2_DEVICE_TYPE, BDM2_DELETE_ON_TERMINATION, ) = ( 'uuid', 'source_type', 'destination_type', 'disk_bus', 'device_name', 'volume_size', 'guest_format', 'boot_index', 'device_type', 'delete_on_termination', ) NETWORK_KEYS = ( PORT, FIXED_IP, NETWORK, ) = ( 'port', 'fixed-ip', 'network', ) PERSONALITY_KEYS = ( PATH, CONTENTS, ) = ( 'path', 'contents', ) SCHEDULER_HINTS_KEYS = (GROUP, ) = ('group', ) properties_schema = { CONTEXT: schema.Map(_('Customized security context for operating servers.'), ), ADMIN_PASS: schema.String(_('Password for the administrator account.'), ), AUTO_DISK_CONFIG: schema.Boolean( _('Whether the disk partition is done automatically.'), default=True, ), AVAILABILITY_ZONE: schema.String( _('Name of availability zone for running the server.'), ), BLOCK_DEVICE_MAPPING: schema.List( _('A list specifying the properties of block devices to be used ' 'for this server.'), schema=schema.Map( _('A map specifying the properties of a block device to be ' 'used by the server.'), schema={ BDM_DEVICE_NAME: schema.String( _('Block device name, should be <=255 chars.'), ), BDM_VOLUME_SIZE: schema.Integer(_('Block device size in GB.'), ), }), ), BLOCK_DEVICE_MAPPING_V2: schema.List( _('A list specifying the properties of block devices to be used ' 'for this server.'), schema=schema.Map( _('A map specifying the properties of a block device to be ' 'used by the server.'), schema={ BDM2_UUID: schema.String( _('ID of the source image, snapshot or volume'), ), BDM2_SOURCE_TYPE: schema.String( _('Volume source type, should be image, snapshot, ' 'volume or blank'), required=True, ), BDM2_DESTINATION_TYPE: schema.String( _('Volume destination type, should be volume or ' 'local'), required=True, ), BDM2_DISK_BUS: schema.String(_('Bus of the device.'), ), BDM2_DEVICE_NAME: schema.String( _('Name of the device(e.g. vda, xda, ....).'), ), BDM2_VOLUME_SIZE: schema.Integer( _('Size of the block device in MB(for swap) and ' 'in GB(for other formats)'), required=True, ), BDM2_GUEST_FORMAT: schema.String( _('Specifies the disk file system format(e.g. swap, ' 'ephemeral, ...).'), ), BDM2_BOOT_INDEX: schema.Integer(_('Define the boot order of the device'), ), BDM2_DEVICE_TYPE: schema.String( _('Type of the device(e.g. disk, cdrom, ...).'), ), BDM2_DELETE_ON_TERMINATION: schema.Boolean( _('Whether to delete the volume when the server ' 'stops.'), ), }), ), CONFIG_DRIVE: schema.Boolean( _('Whether config drive should be enabled for the server.'), ), FLAVOR: schema.String( _('ID of flavor used for the server.'), required=True, updatable=True, ), IMAGE: schema.String( # IMAGE is not required, because there could be BDM or BDMv2 # support and the corresponding settings effective _('ID of image to be used for the new server.'), updatable=True, ), KEY_NAME: schema.String(_('Name of Nova keypair to be injected to server.'), ), METADATA: schema.Map( _('A collection of key/value pairs to be associated with the ' 'server created. Both key and value should be <=255 chars.'), updatable=True, ), NAME: schema.String( _('Name of the server.'), updatable=True, ), NETWORKS: schema.List( _('List of networks for the server.'), schema=schema.Map( _('A map specifying the properties of a network for uses.'), schema={ NETWORK: schema.String( _('Name or ID of network to create a port on.'), ), PORT: schema.String(_('Port ID to be used by the network.'), ), FIXED_IP: schema.String(_('Fixed IP to be used by the network.'), ), }, ), updatable=True, ), PERSONALITY: schema.List( _('List of files to be injected into the server, where each.'), schema=schema.Map( _('A map specifying the path & contents for an injected ' 'file.'), schema={ PATH: schema.String( _('In-instance path for the file to be injected.'), required=True, ), CONTENTS: schema.String( _('Contents of the file to be injected.'), required=True, ), }, ), ), SCHEDULER_HINTS: schema.Map( _('A collection of key/value pairs to be associated with the ' 'Scheduler hints. Both key and value should be <=255 chars.'), ), SECURITY_GROUPS: schema.List( _('List of security groups.'), schema=schema.String( _('Name of a security group'), required=True, ), ), USER_DATA: schema.String(_('User data to be exposed by the metadata server.'), ), } OP_NAMES = (OP_REBOOT, ) = ('reboot', ) REBOOT_TYPE = 'type' REBOOT_TYPES = (REBOOT_SOFT, REBOOT_HARD) = ('SOFT', 'HARD') OPERATIONS = { OP_REBOOT: schema.Operation( _("Reboot the nova server."), schema={ REBOOT_TYPE: schema.String( _("Type of reboot which can be 'SOFT' or 'HARD'."), default=REBOOT_SOFT, constraints=[ constraints.AllowedValues(REBOOT_TYPES), ]) }) } def __init__(self, type_name, name, **kwargs): super(ServerProfile, self).__init__(type_name, name, **kwargs) self._novaclient = None self._neutronclient = None self.server_id = None def nova(self, obj): '''Construct nova client based on object. :param obj: Object for which the client is created. It is expected to be None when retrieving an existing client. When creating a client, it contains the user and project to be used. ''' if self._novaclient is not None: return self._novaclient params = self._build_conn_params(obj.user, obj.project) self._novaclient = driver_base.SenlinDriver().compute(params) return self._novaclient def neutron(self, obj): '''Construct neutron client based on object. :param obj: Object for which the client is created. It is expected to be None when retrieving an existing client. When creating a client, it contains the user and project to be used. ''' if self._neutronclient is not None: return self._neutronclient params = self._build_conn_params(obj.user, obj.project) self._neutronclient = driver_base.SenlinDriver().network(params) return self._neutronclient def do_validate(self, obj): '''Validate if the spec has provided valid info for server creation.''' return True def _resolve_bdm(self, bdm): for bd in bdm: for key in self.BDM2_KEYS: if bd[key] is None: del bd[key] return bdm def _resolve_network(self, networks, client): for network in networks: net_name_id = network.get(self.NETWORK) if net_name_id: res = client.network_get(net_name_id) network['uuid'] = res.id del network[self.NETWORK] if network['port'] is None: del network['port'] if network['fixed-ip'] is None: del network['fixed-ip'] return networks def do_create(self, obj): '''Create a server using the given profile.''' kwargs = {} for key in self.KEYS: # context is treated as connection parameters if key == self.CONTEXT: continue if self.properties[key] is not None: kwargs[key] = self.properties[key] name_or_id = self.properties[self.IMAGE] if name_or_id is not None: image = self.nova(obj).image_find(name_or_id) # wait for new version of openstacksdk to fix this kwargs.pop(self.IMAGE) kwargs['imageRef'] = image.id flavor_id = self.properties[self.FLAVOR] flavor = self.nova(obj).flavor_find(flavor_id, False) # wait for new verson of openstacksdk to fix this kwargs.pop(self.FLAVOR) kwargs['flavorRef'] = flavor.id name = self.properties[self.NAME] if name: kwargs['name'] = name else: kwargs['name'] = obj.name metadata = self.properties[self.METADATA] or {} if obj.cluster_id: metadata['cluster'] = obj.cluster_id kwargs['metadata'] = metadata block_device_mapping_v2 = self.properties[self.BLOCK_DEVICE_MAPPING_V2] if block_device_mapping_v2 is not None: kwargs['block_device_mapping_v2'] = self._resolve_bdm( block_device_mapping_v2) user_data = self.properties[self.USER_DATA] if user_data is not None: ud = encodeutils.safe_encode(user_data) kwargs['user_data'] = encodeutils.safe_decode(base64.b64encode(ud)) networks = self.properties[self.NETWORKS] if networks is not None: kwargs['networks'] = self._resolve_network(networks, self.neutron(obj)) secgroups = self.properties[self.SECURITY_GROUPS] if secgroups: kwargs['security_groups'] = [{'name': sg} for sg in secgroups] if 'placement' in obj.data: if 'zone' in obj.data['placement']: kwargs['availability_zone'] = obj.data['placement']['zone'] if 'servergroup' in obj.data['placement']: group_id = obj.data['placement']['servergroup'] hints = self.properties.get(self.SCHEDULER_HINTS, {}) hints.update({'group': group_id}) kwargs['scheduler_hints'] = hints LOG.info('Creating server: %s' % kwargs) server = self.nova(obj).server_create(**kwargs) self.nova(obj).wait_for_server(server.id) self.server_id = server.id return server.id def do_delete(self, obj): self.server_id = obj.physical_id if not obj.physical_id: return True try: self.nova(obj).server_delete(self.server_id) self.nova(obj).wait_for_server_delete(self.server_id) except Exception as ex: LOG.error('Error: %s' % six.text_type(ex)) return False return True def do_update(self, obj, new_profile=None, **params): '''Perform update on the server. :param obj: the server to operate on :param new_profile: the new profile for the server. :param params: a dictionary of optional parameters. ''' self.server_id = obj.physical_id if not self.server_id: return True if not new_profile: return True if not self.validate_for_update(new_profile): return False # TODO(Yanyan Hu): Update block_device properties # Update basic properties of server if not self._update_basic_properties(obj, new_profile): return False # Update server flavor flavor = self.properties[self.FLAVOR] new_flavor = new_profile.properties[self.FLAVOR] if new_flavor != flavor: try: self._update_flavor(obj, flavor, new_flavor) except Exception as ex: LOG.exception(_('Failed in updating server flavor: %s'), six.text_type(ex)) return False # Update server image old_passwd = self.properties.get(self.ADMIN_PASS) passwd = old_passwd if new_profile.properties[self.ADMIN_PASS] is not None: passwd = new_profile.properties[self.ADMIN_PASS] image = self.properties[self.IMAGE] new_image = new_profile.properties[self.IMAGE] if new_image != image: try: self._update_image(obj, image, new_image, passwd) except Exception as ex: LOG.exception(_('Failed in updating server image: %s'), six.text_type(ex)) return False elif old_passwd != passwd: # TODO(Jun Xu): update server admin password pass # Update server network networks_current = self.properties[self.NETWORKS] networks_create = new_profile.properties[self.NETWORKS] networks_delete = copy.deepcopy(networks_current) for network in networks_current: if network in networks_create: networks_create.remove(network) networks_delete.remove(network) if networks_create or networks_delete: # We have network interfaces to be deleted and/or created try: self._update_network(obj, networks_create, networks_delete) except Exception as ex: LOG.exception(_('Failed in updating server network: %s'), six.text_type(ex)) return False return True def _update_basic_properties(self, obj, new_profile): '''Updating basic server properties including name, metadata''' # Update server metadata metadata = self.properties[self.METADATA] new_metadata = new_profile.properties[self.METADATA] if new_metadata != metadata: if new_metadata is None: new_metadata = {} try: self.nova(obj).server_metadata_update(self.server_id, new_metadata) except Exception as ex: LOG.exception(_('Failed in updating server metadata: %s'), six.text_type(ex)) return False # Update server name name = self.properties[self.NAME] new_name = new_profile.properties[self.NAME] if new_name != name: attrs = {'name': new_name if new_name else obj.name} try: self.nova(obj).server_update(self.server_id, **attrs) except Exception as ex: LOG.exception(_('Failed in updating server name: %s'), six.text_type(ex)) return False return True def _update_flavor(self, obj, old_flavor, new_flavor): '''Updating server flavor''' res = self.nova(obj).flavor_find(old_flavor) old_flavor_id = res.id res = self.nova(obj).flavor_find(new_flavor) new_flavor_id = res.id if new_flavor_id == old_flavor_id: return try: self.nova(obj).server_resize(obj.physical_id, new_flavor_id) self.nova(obj).wait_for_server(obj.physical_id, 'VERIFY_RESIZE') except Exception as ex: LOG.error(_("Server resizing failed, revert it: %s"), six.text_type(ex)) self.nova(obj).server_resize_revert(obj.physical_id) self.nova(obj).wait_for_server(obj.physical_id, 'ACTIVE') raise exception.ResourceUpdateFailure(resource=obj.physical_id) self.nova(obj).server_resize_confirm(obj.physical_id) self.nova(obj).wait_for_server(obj.physical_id, 'ACTIVE') def _update_image(self, obj, old_image, new_image, admin_password): '''Updating server image''' if old_image: res = self.nova(obj).image_find(old_image) image_id = res.id else: server = self.nova(obj).server_get(obj.physical_id) image_id = server.image['id'] if new_image: res = self.nova(obj).image_find(new_image) new_image_id = res.id if new_image_id != image_id: # (Jun Xu): Not update name here if name changed, # it should be updated in do_update self.nova(obj).server_rebuild(obj.physical_id, new_image_id, self.properties.get(self.NAME), admin_password) self.nova(obj).wait_for_server(obj.physical_id, 'ACTIVE') else: # TODO(Yanyan Hu): Allow server update with new_image # set to None if Nova service supports it LOG.error( _("Updating Nova server with image set to None is " "not supported by Nova.")) raise exception.ResourceUpdateFailure(resource=obj.physical_id) def _update_network(self, obj, networks_create, networks_delete): '''Updating server network interfaces''' server = self.nova(obj).server_get(self.server_id) ports_existing = list(self.nova(obj).server_interface_list(server)) ports = [] for p in ports_existing: fixed_ips = [] for addr in p['fixed_ips']: fixed_ips.append(addr['ip_address']) ports.append({ 'port_id': p['port_id'], 'net_id': p['net_id'], 'fixed_ips': fixed_ips }) # Detach some existing ports # Step1. Accurately search port with port_id or fixed-ip/net_id for n in networks_delete: if n['port'] is not None: for p in ports: if p['port_id'] == n['port']: ports.remove(p) break res = self.nova(obj).server_interface_delete(n['port'], server) elif n['fixed-ip'] is not None: res = self.neutron(obj).network_get(n['network']) net_id = res.id for p in ports: if (n['fixed-ip'] in p['fixed_ips']) and (p['net_id'] == net_id): res = self.nova(obj).server_interface_delete( p['port_id'], server) ports.remove(p) break # Step2. Fuzzy search port with net_id for n in networks_delete: if n['port'] is None and n['fixed-ip'] is None: res = self.neutron(obj).network_get(n['network']) net_id = res.id for p in ports: if p['net_id'] == net_id: res = self.nova(obj).server_interface_delete( p['port_id'], server) ports.remove(p) break # Attach new ports added in new network definition for n in networks_create: net_name_id = n.get(self.NETWORK, None) if net_name_id: res = self.neutron(obj).network_get(net_name_id) n['net_id'] = res.id if n['fixed-ip'] is not None: n['fixed_ips'] = [{'ip_address': n['fixed-ip']}] if n['port'] is not None: n['port_id'] = n['port'] del n['network'] del n['port'] del n['fixed-ip'] self.nova(obj).server_interface_create(server, **n) return def do_check(self, obj): if not obj.physical_id: return False self.server_id = obj.physical_id try: server = self.nova(obj).server_get(self.server_id) except Exception as ex: LOG.error('Error: %s' % six.text_type(ex)) return False if (server is None or server.status != 'ACTIVE'): return False return True def do_get_details(self, obj): known_keys = { 'OS-DCF:diskConfig', 'OS-EXT-AZ:availability_zone', 'OS-EXT-STS:power_state', 'OS-EXT-STS:vm_state', 'accessIPv4', 'accessIPv6', 'config_drive', 'created', 'hostId', 'id', 'key_name', 'locked', 'metadata', 'name', 'os-extended-volumes:volumes_attached', 'progress', 'status', 'updated' } if obj.physical_id is None or obj.physical_id == '': return {} try: server = self.nova(obj).server_get(obj.physical_id) except exception.InternalError as ex: return {'Error': {'code': ex.code, 'message': six.text_type(ex)}} if server is None: return {} server_data = server.to_dict() details = { 'image': server_data['image']['id'], 'flavor': server_data['flavor']['id'], } for key in known_keys: if key in server_data: details[key] = server_data[key] # process special keys like 'OS-EXT-STS:task_state': these keys have # a default value '-' when not existing special_keys = [ 'OS-EXT-STS:task_state', 'OS-SRV-USG:launched_at', 'OS-SRV-USG:terminated_at', ] for key in special_keys: if key in server_data: val = server_data[key] details[key] = val if val else '-' # process network addresses details['addresses'] = {} for net in server_data['addresses']: addresses = [] for addr in server_data['addresses'][net]: # Ignore IPv6 address if addr['version'] == 4: addresses.append(addr['addr']) details['addresses'][net] = addresses # process security groups sgroups = [] if 'security_groups' in server_data: for sg in server_data['security_groups']: sgroups.append(sg['name']) if len(sgroups) == 0: details['security_groups'] = '' elif len(sgroups) == 1: details['security_groups'] = sgroups[0] else: details['security_groups'] = sgroups return dict((k, details[k]) for k in sorted(details)) def do_join(self, obj, cluster_id): if not obj.physical_id: return False metadata = self.nova(obj).server_metadata_get(obj.physical_id) or {} metadata['cluster'] = cluster_id self.nova(obj).server_metadata_update(obj.physical_id, metadata) return super(ServerProfile, self).do_join(obj, cluster_id) def do_leave(self, obj): if not obj.physical_id: return False self.nova(obj).server_metadata_delete(obj.physical_id, ['cluster']) return super(ServerProfile, self).do_leave(obj) def do_rebuild(self, obj): if not obj.physical_id: return False self.server_id = obj.physical_id try: server = self.nova(obj).server_get(self.server_id) except Exception as ex: LOG.exception(_('Failed at getting server: %s'), six.text_type(ex)) return False if server is None or server.image is None: return False image_id = server.image['id'] admin_pass = self.properties.get(self.ADMIN_PASS) try: self.nova(obj).server_rebuild(self.server_id, image_id, self.properties.get(self.NAME), admin_pass) self.nova(obj).wait_for_server(self.server_id, 'ACTIVE') except Exception as ex: LOG.exception(_('Failed at rebuilding server: %s'), six.text_type(ex)) return False return True def do_recover(self, obj, **options): if 'operation' in options: if options['operation'] == 'REBUILD': return self.do_rebuild(obj) res = super(ServerProfile, self).do_recover(obj, **options) return res def handle_reboot(self, obj, **options): """Handler for the reboot operation.""" pass
class ScalingPolicy(base.Policy): '''Policy for changing the size of a cluster. This policy is expected to be enforced before the node count of a cluster is changed. ''' VERSION = '1.0' TARGET = [ ('BEFORE', consts.CLUSTER_SCALE_IN), ('BEFORE', consts.CLUSTER_SCALE_OUT), ] PROFILE_TYPE = [ 'ANY', ] KEYS = ( EVENT, ADJUSTMENT, ) = ( 'event', 'adjustment', ) _SUPPORTED_EVENTS = ( CLUSTER_SCALE_IN, CLUSTER_SCALE_OUT, ) = ( consts.CLUSTER_SCALE_IN, consts.CLUSTER_SCALE_OUT, ) _ADJUSTMENT_KEYS = ( ADJUSTMENT_TYPE, ADJUSTMENT_NUMBER, MIN_STEP, BEST_EFFORT, ) = ( 'type', 'number', 'min_step', 'best_effort', ) properties_schema = { EVENT: schema.String( _('Event that will trigger this policy. Must be one of ' 'CLUSTER_SCALE_IN and CLUSTER_SCALE_OUT.'), constraints=[ constraints.AllowedValues(_SUPPORTED_EVENTS), ], required=True, ), ADJUSTMENT: schema.Map( _('Detailed specification for scaling adjustments.'), schema={ ADJUSTMENT_TYPE: schema.String( _('Type of adjustment when scaling is triggered.'), constraints=[ constraints.AllowedValues(consts.ADJUSTMENT_TYPES), ], default=consts.CHANGE_IN_CAPACITY, ), ADJUSTMENT_NUMBER: schema.Number( _('A number specifying the amount of adjustment.'), default=1, ), MIN_STEP: schema.Integer( _('When adjustment type is set to "CHANGE_IN_PERCENTAGE",' ' this specifies the cluster size will be decreased by ' 'at least this number of nodes.'), default=1, ), BEST_EFFORT: schema.Boolean( _('Whether do best effort scaling when new size of ' 'cluster will break the size limitation'), default=False, ), }), } def __init__(self, name, spec, **kwargs): super(ScalingPolicy, self).__init__(name, spec, **kwargs) self.event = self.properties[self.EVENT] self.singleton = False adjustment = self.properties[self.ADJUSTMENT] self.adjustment_type = adjustment[self.ADJUSTMENT_TYPE] self.adjustment_number = adjustment[self.ADJUSTMENT_NUMBER] self.adjustment_min_step = adjustment[self.MIN_STEP] self.best_effort = adjustment[self.BEST_EFFORT] def _calculate_adjustment_count(self, current_size): '''Calculate adjustment count based on current_size''' if self.adjustment_type == consts.EXACT_CAPACITY: if self.event == consts.CLUSTER_SCALE_IN: count = current_size - self.adjustment_number else: count = self.adjustment_number - current_size elif self.adjustment_type == consts.CHANGE_IN_CAPACITY: count = self.adjustment_number else: # consts.CHANGE_IN_PERCENTAGE: count = int((self.adjustment_number * current_size) / 100.0) if count < self.adjustment_min_step: count = self.adjustment_min_step return count def pre_op(self, cluster_id, action): status = base.CHECK_OK reason = _('Scaling request validated.') # Check if the action is expected by the policy if self.event != action.action: action.data.update({'status': status, 'reason': reason}) action.store(action.context) return cluster = db_api.cluster_get(action.context, cluster_id) nodes = db_api.node_get_all_by_cluster(action.context, cluster_id) current_size = len(nodes) count = self._calculate_adjustment_count(current_size) # Use action input if count is provided count = action.inputs.get('count', count) if count <= 0: status = base.CHECK_ERROR reason = _("Count (%(count)s) invalid for action %(action)s.") % { 'count': count, 'action': action.action } # Check size constraints if action.action == consts.CLUSTER_SCALE_IN: new_size = current_size - count if (new_size < cluster.min_size): if self.best_effort: count = current_size - cluster.min_size reason = _('Do best effort scaling.') else: status = base.CHECK_ERROR reason = _('Attempted scaling below minimum size.') else: new_size = current_size + count if (new_size > cluster.max_size): if self.best_effort: count = cluster.max_size - current_size reason = _('Do best effort scaling.') else: status = base.CHECK_ERROR reason = _('Attempted scaling above maximum size.') pd = {'status': status, 'reason': reason} if status == base.CHECK_OK: if action.action == consts.CLUSTER_SCALE_IN: pd['deletion'] = {'count': count} else: pd['creation'] = {'count': count} action.data.update(pd) action.store(action.context) return
class AffinityPolicy(base.Policy): """Policy for placing members of a cluster based on server groups. This policy is expected to be enforced before new member(s) added to an existing cluster. """ VERSION = '1.0' VERSIONS = {'1.0': [{'status': consts.SUPPORTED, 'since': '2016.10'}]} PRIORITY = 300 TARGET = [ ('BEFORE', consts.CLUSTER_SCALE_OUT), ('BEFORE', consts.CLUSTER_RESIZE), ('BEFORE', consts.NODE_CREATE), ] PROFILE_TYPE = [ 'os.nova.server-1.0', ] KEYS = ( SERVER_GROUP, AVAILABILITY_ZONE, ENABLE_DRS_EXTENSION, ) = ( 'servergroup', 'availability_zone', 'enable_drs_extension', ) _GROUP_KEYS = ( GROUP_NAME, GROUP_POLICIES, ) = ( 'name', 'policies', ) _POLICIES_VALUES = ( # NOTE: soft policies are supported from compute micro version 2.15 AFFINITY, SOFT_AFFINITY, ANTI_AFFINITY, SOFT_ANTI_AFFINITY, ) = ( 'affinity', 'soft-affinity', 'anti-affinity', 'soft-anti-affinity', ) properties_schema = { SERVER_GROUP: schema.Map( _('Properties of the VM server group'), schema={ GROUP_NAME: schema.String(_('The name of the server group'), ), GROUP_POLICIES: schema.String( _('The server group policies.'), default=ANTI_AFFINITY, constraints=[ constraints.AllowedValues(_POLICIES_VALUES), ], ), }, ), AVAILABILITY_ZONE: schema.String( _('Name of the availability zone to place the nodes.'), ), ENABLE_DRS_EXTENSION: schema.Boolean( _('Enable vSphere DRS extension.'), default=False, ), } def __init__(self, name, spec, **kwargs): super(AffinityPolicy, self).__init__(name, spec, **kwargs) self.enable_drs = self.properties.get(self.ENABLE_DRS_EXTENSION) def validate(self, context, validate_props=False): super(AffinityPolicy, self).validate(context, validate_props) if not validate_props: return True az_name = self.properties.get(self.AVAILABILITY_ZONE) if az_name: nc = self.nova(context.user_id, context.project_id) valid_azs = nc.validate_azs([az_name]) if not valid_azs: msg = _("The specified %(key)s '%(value)s' could not be " "found.") % { 'key': self.AVAILABILITY_ZONE, 'value': az_name } raise exc.InvalidSpec(message=msg) return True def attach(self, cluster, enabled=True): """Routine to be invoked when policy is to be attached to a cluster. :para cluster: The cluster to which the policy is being attached to. :param enabled: The attached cluster policy is enabled or disabled. :returns: When the operation was successful, returns a tuple (True, message); otherwise, return a tuple (False, error). """ res, data = super(AffinityPolicy, self).attach(cluster) if res is False: return False, data data = {'inherited_group': False} nc = self.nova(cluster.user, cluster.project) group = self.properties.get(self.SERVER_GROUP) # guess servergroup name group_name = group.get(self.GROUP_NAME, None) if group_name is None: profile = cluster.rt['profile'] if 'scheduler_hints' in profile.spec: hints = profile.spec['scheduler_hints'] group_name = hints.get('group', None) if group_name: try: server_group = nc.server_group_find(group_name, True) except exc.InternalError as ex: msg = _("Failed in retrieving servergroup '%s'.") % group_name LOG.exception('%(msg)s: %(ex)s', {'msg': msg, 'ex': ex}) return False, msg if server_group: # Check if the policies match policies = group.get(self.GROUP_POLICIES) if policies and policies != server_group.policies[0]: msg = _( "Policies specified (%(specified)s) doesn't match " "that of the existing servergroup (%(existing)s).") % { 'specified': policies, 'existing': server_group.policies[0] } return False, msg data['servergroup_id'] = server_group.id data['inherited_group'] = True if not data['inherited_group']: # create a random name if necessary if not group_name: group_name = 'server_group_%s' % utils.random_name() try: server_group = nc.server_group_create( name=group_name, policies=[group.get(self.GROUP_POLICIES)]) except Exception as ex: msg = _('Failed in creating servergroup.') LOG.exception('%(msg)s: %(ex)s', {'msg': msg, 'ex': ex}) return False, msg data['servergroup_id'] = server_group.id policy_data = self._build_policy_data(data) return True, policy_data def detach(self, cluster): """Routine to be called when the policy is detached from a cluster. :param cluster: The cluster from which the policy is to be detached. :returns: When the operation was successful, returns a tuple of (True, data) where the data contains references to the resources created; otherwise returns a tuple of (False, error) where the err contains an error message. """ reason = _('Servergroup resource deletion succeeded.') ctx = context.get_admin_context() binding = cpo.ClusterPolicy.get(ctx, cluster.id, self.id) if not binding or not binding.data: return True, reason policy_data = self._extract_policy_data(binding.data) if not policy_data: return True, reason group_id = policy_data.get('servergroup_id', None) inherited_group = policy_data.get('inherited_group', False) if group_id and not inherited_group: try: nc = self.nova(cluster.user, cluster.project) nc.server_group_delete(group_id) except Exception as ex: msg = _('Failed in deleting servergroup.') LOG.exception('%(msg)s: %(ex)s', {'msg': msg, 'ex': ex}) return False, msg return True, reason def pre_op(self, cluster_id, action): """Routine to be called before target action is executed. This policy annotates the node with a server group ID before the node is actually created. For vSphere DRS, it is equivalent to the selection of vSphere host (cluster). :param cluster_id: ID of the cluster on which the relevant action is to be executed. :param action: The action object that triggered this operation. :returns: Nothing. """ zone_name = self.properties.get(self.AVAILABILITY_ZONE) if not zone_name and self.enable_drs: # we make a reasonable guess of the zone name for vSphere # support because the zone name is required in that case. zone_name = 'nova' # we respect other policies decisions (if any) and fall back to the # action inputs if no hints found. pd = action.data.get('creation', None) if pd is not None: count = pd.get('count', 1) elif action.action == consts.CLUSTER_SCALE_OUT: count = action.inputs.get('count', 1) elif action.action == consts.NODE_CREATE: count = 1 else: # CLUSTER_RESIZE cluster = action.entity current = len(cluster.nodes) su.parse_resize_params(action, cluster, current) if 'creation' not in action.data: return count = action.data['creation']['count'] cp = cpo.ClusterPolicy.get(action.context, cluster_id, self.id) policy_data = self._extract_policy_data(cp.data) pd_entry = {'servergroup': policy_data['servergroup_id']} # special handling for vSphere DRS case where we need to find out # the name of the vSphere host which has DRS enabled. if self.enable_drs: obj = action.entity nc = self.nova(obj.user, obj.project) hypervisors = nc.hypervisor_list() hv_id = '' pattern = re.compile(r'.*drs*', re.I) for hypervisor in hypervisors: match = pattern.match(hypervisor.hypervisor_hostname) if match: hv_id = hypervisor.id break if not hv_id: action.data['status'] = base.CHECK_ERROR action.data['status_reason'] = _('No suitable vSphere host ' 'is available.') action.store(action.context) return hv_info = nc.hypervisor_get(hv_id) hostname = hv_info['service']['host'] pd_entry['zone'] = ":".join([zone_name, hostname]) elif zone_name: pd_entry['zone'] = zone_name pd = { 'count': count, 'placements': [pd_entry] * count, } action.data.update({'placement': pd}) action.store(action.context) return
class ScalingPolicy(base.Policy): """Policy for changing the size of a cluster. This policy is expected to be enforced before the node count of a cluster is changed. """ VERSION = '1.0' VERSIONS = {'1.0': [{'status': consts.SUPPORTED, 'since': '2016.04'}]} PRIORITY = 100 TARGET = [ ('BEFORE', consts.CLUSTER_SCALE_IN), ('BEFORE', consts.CLUSTER_SCALE_OUT), ('AFTER', consts.CLUSTER_SCALE_IN), ('AFTER', consts.CLUSTER_SCALE_OUT), ] PROFILE_TYPE = [ 'ANY', ] KEYS = ( EVENT, ADJUSTMENT, ) = ( 'event', 'adjustment', ) _SUPPORTED_EVENTS = ( CLUSTER_SCALE_IN, CLUSTER_SCALE_OUT, ) = ( consts.CLUSTER_SCALE_IN, consts.CLUSTER_SCALE_OUT, ) _ADJUSTMENT_KEYS = ( ADJUSTMENT_TYPE, ADJUSTMENT_NUMBER, MIN_STEP, BEST_EFFORT, COOLDOWN, ) = ( 'type', 'number', 'min_step', 'best_effort', 'cooldown', ) properties_schema = { EVENT: schema.String( _('Event that will trigger this policy. Must be one of ' 'CLUSTER_SCALE_IN and CLUSTER_SCALE_OUT.'), constraints=[ constraints.AllowedValues(_SUPPORTED_EVENTS), ], required=True, ), ADJUSTMENT: schema.Map( _('Detailed specification for scaling adjustments.'), schema={ ADJUSTMENT_TYPE: schema.String( _('Type of adjustment when scaling is triggered.'), constraints=[ constraints.AllowedValues(consts.ADJUSTMENT_TYPES), ], default=consts.CHANGE_IN_CAPACITY, ), ADJUSTMENT_NUMBER: schema.Number( _('A number specifying the amount of adjustment.'), default=1, ), MIN_STEP: schema.Integer( _('When adjustment type is set to "CHANGE_IN_PERCENTAGE",' ' this specifies the cluster size will be decreased by ' 'at least this number of nodes.'), default=1, ), BEST_EFFORT: schema.Boolean( _('Whether do best effort scaling when new size of ' 'cluster will break the size limitation'), default=False, ), COOLDOWN: schema.Integer( _('Number of seconds to hold the cluster for cool-down ' 'before allowing cluster to be resized again.'), default=0, ), }), } def __init__(self, name, spec, **kwargs): """Initialize a scaling policy object. :param name: Name for the policy object. :param spec: A dictionary containing the detailed specification for the policy. :param dict kwargs: Other optional parameters for policy object creation. :return: An object of `ScalingPolicy`. """ super(ScalingPolicy, self).__init__(name, spec, **kwargs) self.singleton = False self.event = self.properties[self.EVENT] adjustment = self.properties[self.ADJUSTMENT] self.adjustment_type = adjustment[self.ADJUSTMENT_TYPE] self.adjustment_number = adjustment[self.ADJUSTMENT_NUMBER] self.adjustment_min_step = adjustment[self.MIN_STEP] self.best_effort = adjustment[self.BEST_EFFORT] self.cooldown = adjustment[self.COOLDOWN] def validate(self, context, validate_props=False): super(ScalingPolicy, self).validate(context, validate_props) if self.adjustment_number <= 0: msg = _("the 'number' for 'adjustment' must be > 0") raise exc.InvalidSpec(message=msg) if self.adjustment_min_step < 0: msg = _("the 'min_step' for 'adjustment' must be >= 0") raise exc.InvalidSpec(message=msg) if self.cooldown < 0: msg = _("the 'cooldown' for 'adjustment' must be >= 0") raise exc.InvalidSpec(message=msg) def _calculate_adjustment_count(self, current_size): """Calculate adjustment count based on current_size. :param current_size: The current size of the target cluster. :return: The number of nodes to add or to remove. """ if self.adjustment_type == consts.EXACT_CAPACITY: if self.event == consts.CLUSTER_SCALE_IN: count = current_size - self.adjustment_number else: count = self.adjustment_number - current_size elif self.adjustment_type == consts.CHANGE_IN_CAPACITY: count = self.adjustment_number else: # consts.CHANGE_IN_PERCENTAGE: count = int((self.adjustment_number * current_size) / 100.0) if count < self.adjustment_min_step: count = self.adjustment_min_step return count def pre_op(self, cluster_id, action): """The hook function that is executed before the action. The checking result is stored in the ``data`` property of the action object rather than returned directly from the function. :param cluster_id: The ID of the target cluster. :param action: Action instance against which the policy is being checked. :return: None. """ # check cooldown last_op = action.inputs.get('last_op', None) if last_op and not timeutils.is_older_than(last_op, self.cooldown): action.data.update({ 'status': base.CHECK_ERROR, 'reason': _('Policy %s cooldown is still ' 'in progress.') % self.id }) action.store(action.context) return # Use action input if count is provided count_value = action.inputs.get('count', None) cluster = action.entity current = len(cluster.nodes) if count_value is None: # count not specified, calculate it count_value = self._calculate_adjustment_count(current) # Count must be positive value success, count = utils.get_positive_int(count_value) if not success: action.data.update({ 'status': base.CHECK_ERROR, 'reason': _("Invalid count (%(c)s) for action '%(a)s'.") % { 'c': count_value, 'a': action.action } }) action.store(action.context) return # Check size constraints max_size = cluster.max_size if max_size == -1: max_size = cfg.CONF.max_nodes_per_cluster if action.action == consts.CLUSTER_SCALE_IN: if self.best_effort: count = min(count, current - cluster.min_size) result = su.check_size_params(cluster, current - count, strict=not self.best_effort) else: if self.best_effort: count = min(count, max_size - current) result = su.check_size_params(cluster, current + count, strict=not self.best_effort) if result: # failed validation pd = {'status': base.CHECK_ERROR, 'reason': result} else: # passed validation pd = { 'status': base.CHECK_OK, 'reason': _('Scaling request validated.'), } if action.action == consts.CLUSTER_SCALE_IN: pd['deletion'] = {'count': count} else: pd['creation'] = {'count': count} action.data.update(pd) action.store(action.context) return def post_op(self, cluster_id, action): # update last_op for next cooldown check ts = timeutils.utcnow(True) cpo.ClusterPolicy.update(action.context, cluster_id, self.id, {'last_op': ts}) def need_check(self, target, action): # check if target + action matches policy targets if not super(ScalingPolicy, self).need_check(target, action): return False if target == 'BEFORE': # Scaling policy BEFORE check should only be triggered if the # incoming action matches the specific policy event. # E.g. for scale-out policy the BEFORE check to select nodes for # termination should only run for scale-out actions. return self.event == action.action else: # Scaling policy AFTER check to reset cooldown timer should be # triggered for all supported policy events (both scale-in and # scale-out). E.g. a scale-out policy should reset cooldown timer # whenever scale-out or scale-in action completes. return action.action in list(self._SUPPORTED_EVENTS)
class LoadBalancingPolicy(base.Policy): """Policy for load balancing among members of a cluster. This policy is expected to be enforced before or after the membership of a cluster is changed. We need to refresh the load-balancer associated with the cluster (which could be created by the policy) when these actions are performed. """ VERSION = '1.1' VERSIONS = {'1.0': [{'status': consts.SUPPORTED, 'since': '2016.04'}]} PRIORITY = 500 TARGET = [ ('AFTER', consts.CLUSTER_ADD_NODES), ('AFTER', consts.CLUSTER_SCALE_OUT), ('AFTER', consts.CLUSTER_RESIZE), ('AFTER', consts.NODE_RECOVER), ('AFTER', consts.NODE_CREATE), ('BEFORE', consts.CLUSTER_DEL_NODES), ('BEFORE', consts.CLUSTER_SCALE_IN), ('BEFORE', consts.CLUSTER_RESIZE), ('BEFORE', consts.NODE_DELETE), ] PROFILE_TYPE = [ 'os.nova.server-1.0', ] KEYS = (POOL, VIP, HEALTH_MONITOR, LB_STATUS_TIMEOUT) = ('pool', 'vip', 'health_monitor', 'lb_status_timeout') _POOL_KEYS = ( POOL_PROTOCOL, POOL_PROTOCOL_PORT, POOL_SUBNET, POOL_LB_METHOD, POOL_ADMIN_STATE_UP, POOL_SESSION_PERSISTENCE, ) = ( 'protocol', 'protocol_port', 'subnet', 'lb_method', 'admin_state_up', 'session_persistence', ) PROTOCOLS = ( HTTP, HTTPS, TCP, ) = ( 'HTTP', 'HTTPS', 'TCP', ) LB_METHODS = ( ROUND_ROBIN, LEAST_CONNECTIONS, SOURCE_IP, ) = ( 'ROUND_ROBIN', 'LEAST_CONNECTIONS', 'SOURCE_IP', ) HEALTH_MONITOR_TYPES = ( PING, TCP, HTTP, HTTPS, ) = ( 'PING', 'TCP', 'HTTP', 'HTTPS', ) HTTP_METHODS = ( GET, POST, PUT, DELETE, ) = ( 'GET', 'POST', 'PUT', 'DELETE', ) _VIP_KEYS = ( VIP_SUBNET, VIP_ADDRESS, VIP_CONNECTION_LIMIT, VIP_PROTOCOL, VIP_PROTOCOL_PORT, VIP_ADMIN_STATE_UP, ) = ( 'subnet', 'address', 'connection_limit', 'protocol', 'protocol_port', 'admin_state_up', ) HEALTH_MONITOR_KEYS = ( HM_TYPE, HM_DELAY, HM_TIMEOUT, HM_MAX_RETRIES, HM_ADMIN_STATE_UP, HM_HTTP_METHOD, HM_URL_PATH, HM_EXPECTED_CODES, ) = ( 'type', 'delay', 'timeout', 'max_retries', 'admin_state_up', 'http_method', 'url_path', 'expected_codes', ) _SESSION_PERSISTENCE_KEYS = ( PERSISTENCE_TYPE, COOKIE_NAME, ) = ( 'type', 'cookie_name', ) PERSISTENCE_TYPES = ( PERSIST_SOURCE_IP, PERSIST_HTTP_COOKIE, PERSIST_APP_COOKIE, ) = ( 'SOURCE_IP', 'HTTP_COOKIE', 'APP_COOKIE', ) properties_schema = { POOL: schema.Map( _('LB pool properties.'), schema={ POOL_PROTOCOL: schema.String( _('Protocol used for load balancing.'), constraints=[ constraints.AllowedValues(PROTOCOLS), ], default=HTTP, ), POOL_PROTOCOL_PORT: schema.Integer( _('Port on which servers are running on the nodes.'), default=80, ), POOL_SUBNET: schema.String( _('Name or ID of subnet for the port on which nodes can ' 'be connected.'), required=True, ), POOL_LB_METHOD: schema.String( _('Load balancing algorithm.'), constraints=[ constraints.AllowedValues(LB_METHODS), ], default=ROUND_ROBIN, ), POOL_ADMIN_STATE_UP: schema.Boolean( _('Administrative state of the pool.'), default=True, ), POOL_SESSION_PERSISTENCE: schema.Map( _('Session persistence configuration.'), schema={ PERSISTENCE_TYPE: schema.String( _('Type of session persistence implementation.'), constraints=[ constraints.AllowedValues(PERSISTENCE_TYPES), ], ), COOKIE_NAME: schema.String( _('Name of cookie if type set to APP_COOKIE.'), ), }, default={}, ), }, ), VIP: schema.Map( _('VIP address and port of the pool.'), schema={ VIP_SUBNET: schema.String( _('Name or ID of Subnet on which the VIP address will be ' 'allocated.'), required=True, ), VIP_ADDRESS: schema.String( _('IP address of the VIP.'), default=None, ), VIP_CONNECTION_LIMIT: schema.Integer( _('Maximum number of connections per second allowed for ' 'this VIP'), default=-1, ), VIP_PROTOCOL: schema.String( _('Protocol used for VIP.'), constraints=[ constraints.AllowedValues(PROTOCOLS), ], default=HTTP, ), VIP_PROTOCOL_PORT: schema.Integer( _('TCP port to listen on.'), default=80, ), VIP_ADMIN_STATE_UP: schema.Boolean( _('Administrative state of the VIP.'), default=True, ), }, ), HEALTH_MONITOR: schema.Map( _('Health monitor for loadbalancer.'), schema={ HM_TYPE: schema.String( _('The type of probe sent by the loadbalancer to verify ' 'the member state.'), constraints=[ constraints.AllowedValues(HEALTH_MONITOR_TYPES), ], default=PING, ), HM_DELAY: schema.Integer( _('The amount of time in milliseconds between sending ' 'probes to members.'), default=10, ), HM_TIMEOUT: schema.Integer( _('The maximum time in milliseconds that a monitor waits ' 'to connect before it times out.'), default=5, ), HM_MAX_RETRIES: schema.Integer( _('The number of allowed connection failures before ' 'changing the status of the member to INACTIVE.'), default=3, ), HM_ADMIN_STATE_UP: schema.Boolean( _('Administrative state of the health monitor.'), default=True, ), HM_HTTP_METHOD: schema.String( _('The HTTP method that the monitor uses for requests.'), constraints=[ constraints.AllowedValues(HTTP_METHODS), ], ), HM_URL_PATH: schema.String( _('The HTTP path of the request sent by the monitor to ' 'test the health of a member.'), ), HM_EXPECTED_CODES: schema.String( _('Expected HTTP codes for a passing HTTP(S) monitor.'), ), }, ), LB_STATUS_TIMEOUT: schema.Integer( _('Time in second to wait for loadbalancer to become ready ' 'after senlin requests LBaaS V2 service for operations.'), default=300, ) } def __init__(self, name, spec, **kwargs): super(LoadBalancingPolicy, self).__init__(name, spec, **kwargs) self.pool_spec = self.properties.get(self.POOL, {}) self.vip_spec = self.properties.get(self.VIP, {}) self.hm_spec = self.properties.get(self.HEALTH_MONITOR, None) self.lb_status_timeout = self.properties.get(self.LB_STATUS_TIMEOUT) self.lb = None def validate(self, context, validate_props=False): super(LoadBalancingPolicy, self).validate(context, validate_props) if not validate_props: return True nc = self.network(context.user, context.project) # validate pool subnet name_or_id = self.pool_spec.get(self.POOL_SUBNET) try: nc.subnet_get(name_or_id) except exc.InternalError: msg = _( "The specified %(key)s '%(value)s' could not be found.") % { 'key': self.POOL_SUBNET, 'value': name_or_id } raise exc.InvalidSpec(message=msg) # validate VIP subnet name_or_id = self.vip_spec.get(self.VIP_SUBNET) try: nc.subnet_get(name_or_id) except exc.InternalError: msg = _( "The specified %(key)s '%(value)s' could not be found.") % { 'key': self.VIP_SUBNET, 'value': name_or_id } raise exc.InvalidSpec(message=msg) def attach(self, cluster, enabled=True): """Routine to be invoked when policy is to be attached to a cluster. :param cluster: The cluster to which the policy is being attached to. :param enabled: The attached cluster policy is enabled or disabled. :returns: When the operation was successful, returns a tuple (True, message); otherwise, return a tuple (False, error). """ res, data = super(LoadBalancingPolicy, self).attach(cluster) if res is False: return False, data nodes = nm.Node.load_all(oslo_context.get_current(), cluster_id=cluster.id) lb_driver = self.lbaas(cluster.user, cluster.project) lb_driver.lb_status_timeout = self.lb_status_timeout # TODO(Anyone): Check if existing nodes has conflicts regarding the # subnets. Each VM addresses detail has a key named to the network # which can be used for validation. res, data = lb_driver.lb_create(self.vip_spec, self.pool_spec, self.hm_spec) if res is False: return False, data port = self.pool_spec.get(self.POOL_PROTOCOL_PORT) subnet = self.pool_spec.get(self.POOL_SUBNET) for node in nodes: member_id = lb_driver.member_add(node, data['loadbalancer'], data['pool'], port, subnet) if member_id is None: # When failed in adding member, remove all lb resources that # were created and return the failure reason. # TODO(anyone): May need to "roll-back" changes caused by any # successful member_add() calls. lb_driver.lb_delete(**data) return False, 'Failed in adding node into lb pool' node.data.update({'lb_member': member_id}) node.store(oslo_context.get_current()) cluster_data_lb = cluster.data.get('loadbalancers', {}) cluster_data_lb[self.id] = {'vip_address': data.pop('vip_address')} cluster.data['loadbalancers'] = cluster_data_lb policy_data = self._build_policy_data(data) return True, policy_data def detach(self, cluster): """Routine to be called when the policy is detached from a cluster. :param cluster: The cluster from which the policy is to be detached. :returns: When the operation was successful, returns a tuple of (True, data) where the data contains references to the resources created; otherwise returns a tuple of (False, err) where the err contains a error message. """ reason = _('LB resources deletion succeeded.') lb_driver = self.lbaas(cluster.user, cluster.project) lb_driver.lb_status_timeout = self.lb_status_timeout cp = cluster_policy.ClusterPolicy.load(oslo_context.get_current(), cluster.id, self.id) policy_data = self._extract_policy_data(cp.data) if policy_data is None: return True, reason res, reason = lb_driver.lb_delete(**policy_data) if res is False: return False, reason nodes = nm.Node.load_all(oslo_context.get_current(), cluster_id=cluster.id, project_safe=False) for node in nodes: if 'lb_member' in node.data: node.data.pop('lb_member') node.store(oslo_context.get_current()) lb_data = cluster.data.get('loadbalancers', {}) if lb_data and isinstance(lb_data, dict): lb_data.pop(self.id, None) if lb_data: cluster.data['loadbalancers'] = lb_data else: cluster.data.pop('loadbalancers') return True, reason def _get_delete_candidates(self, cluster_id, action): deletion = action.data.get('deletion', None) # No deletion field in action.data which means no scaling # policy or deletion policy is attached. candidates = None if deletion is None: if action.action == consts.NODE_DELETE: candidates = [action.node.id] count = 1 elif action.action == consts.CLUSTER_DEL_NODES: # Get candidates from action.input candidates = action.inputs.get('candidates', []) count = len(candidates) elif action.action == consts.CLUSTER_RESIZE: # Calculate deletion count based on action input db_cluster = co.Cluster.get(action.context, cluster_id) current = no.Node.count_by_cluster(action.context, cluster_id) scaleutils.parse_resize_params(action, db_cluster, current) if 'deletion' not in action.data: return [] else: count = action.data['deletion']['count'] else: # action.action == consts.CLUSTER_SCALE_IN count = 1 else: count = deletion.get('count', 0) candidates = deletion.get('candidates', None) # Still no candidates available, pick count of nodes randomly if candidates is None: if count == 0: return [] nodes = no.Node.get_all_by_cluster(action.context, cluster_id) if count > len(nodes): count = len(nodes) candidates = scaleutils.nodes_by_random(nodes, count) deletion_data = action.data.get('deletion', {}) deletion_data.update({ 'count': len(candidates), 'candidates': candidates }) action.data.update({'deletion': deletion_data}) return candidates def _remove_member(self, candidates, policy, action, driver, handle_err=True): # Load policy data policy_data = self._extract_policy_data(policy.data) lb_id = policy_data['loadbalancer'] pool_id = policy_data['pool'] failed_nodes = [] for node_id in candidates: node = no.Node.get(action.context, node_id=node_id) node_data = node.data or {} member_id = node_data.get('lb_member', None) if member_id is None: LOG.warning('Node %(n)s not found in lb pool %(p)s.', { 'n': node_id, 'p': pool_id }) continue res = driver.member_remove(lb_id, pool_id, member_id) values = {} if res is not True and handle_err is True: failed_nodes.append(node.id) values['status'] = consts.NS_WARNING values['status_reason'] = _( 'Failed in removing node from lb pool.') else: node.data.pop('lb_member', None) values['data'] = node.data no.Node.update(action.context, node_id, values) return failed_nodes def _add_member(self, candidates, policy, action, driver): # Load policy data policy_data = self._extract_policy_data(policy.data) lb_id = policy_data['loadbalancer'] pool_id = policy_data['pool'] port = self.pool_spec.get(self.POOL_PROTOCOL_PORT) subnet = self.pool_spec.get(self.POOL_SUBNET) failed_nodes = [] for node_id in candidates: node = no.Node.get(action.context, node_id=node_id) node_data = node.data or {} member_id = node_data.get('lb_member', None) if member_id: LOG.warning('Node %(n)s already in lb pool %(p)s.', { 'n': node_id, 'p': pool_id }) continue member_id = driver.member_add(node, lb_id, pool_id, port, subnet) values = {} if member_id is None: failed_nodes.append(node.id) values['status'] = consts.NS_WARNING values['status_reason'] = _( 'Failed in adding node into lb pool.') else: node.data.update({'lb_member': member_id}) values['data'] = node.data no.Node.update(action.context, node_id, values) return failed_nodes def _get_post_candidates(self, action): # This method will parse action data passed from action layer candidates = [] if action.action == consts.NODE_CREATE: candidates = [action.node.id] elif action.action == consts.NODE_RECOVER: recovery = action.outputs.get('recovery', None) if recovery is not None and 'action' in recovery: action_name = recovery['action'] if action_name.upper() == consts.RECOVER_RECREATE: candidates = recovery.get('node', []) else: creation = action.data.get('creation', None) candidates = creation.get('nodes', []) if creation else [] return candidates def pre_op(self, cluster_id, action): """Routine to be called before an action has been executed. For this particular policy, we take this chance to update the pool maintained by the load-balancer. :param cluster_id: The ID of the cluster on which a relevant action has been executed. :param action: The action object that triggered this operation. :returns: Nothing. """ candidates = self._get_delete_candidates(cluster_id, action) if len(candidates) == 0: return db_cluster = co.Cluster.get(action.context, cluster_id) lb_driver = self.lbaas(db_cluster.user, db_cluster.project) lb_driver.lb_status_timeout = self.lb_status_timeout cp = cluster_policy.ClusterPolicy.load(action.context, cluster_id, self.id) # Remove nodes that will be deleted from lb pool failed_nodes = self._remove_member(candidates, cp, action, lb_driver) if failed_nodes: error = _('Failed in removing deleted node(s) from lb pool: %s' ) % failed_nodes action.data['status'] = base.CHECK_ERROR action.data['reason'] = error return def post_op(self, cluster_id, action): """Routine to be called after an action has been executed. For this particular policy, we take this chance to update the pool maintained by the load-balancer. :param cluster_id: The ID of the cluster on which a relevant action has been executed. :param action: The action object that triggered this operation. :returns: Nothing. """ # TODO(Yanyanhu): Need special handling for cross-az scenario # which is supported by Neutron lbaas. candidates = self._get_post_candidates(action) if len(candidates) == 0: return db_cluster = co.Cluster.get(action.context, cluster_id) lb_driver = self.lbaas(db_cluster.user, db_cluster.project) lb_driver.lb_status_timeout = self.lb_status_timeout cp = cluster_policy.ClusterPolicy.load(action.context, cluster_id, self.id) if action.action == consts.NODE_RECOVER: self._remove_member(candidates, cp, action, lb_driver, handle_err=False) # Add new nodes to lb pool failed_nodes = self._add_member(candidates, cp, action, lb_driver) if failed_nodes: error = _('Failed in adding nodes into lb pool: %s') % failed_nodes action.data['status'] = base.CHECK_ERROR action.data['reason'] = error return
class ServerProfile(base.Profile): '''Profile for an OpenStack Nova server.''' KEYS = ( CONTEXT, ADMIN_PASS, AUTO_DISK_CONFIG, AVAILABILITY_ZONE, BLOCK_DEVICE_MAPPING, # BLOCK_DEVICE_MAPPING_V2, CONFIG_DRIVE, FLAVOR, IMAGE, KEY_NAME, METADATA, NAME, NETWORKS, PERSONALITY, SECURITY_GROUPS, TIMEOUT, USER_DATA, SCHEDULER_HINTS, ) = ( 'context', 'adminPass', 'auto_disk_config', 'availability_zone', 'block_device_mapping', # 'block_device_mapping_v2', 'config_drive', 'flavor', 'image', 'key_name', 'metadata', 'name', 'networks', 'personality', 'security_groups', 'timeout', 'user_data', 'scheduler_hints', ) BDM_KEYS = ( BDM_DEVICE_NAME, BDM_VOLUME_SIZE, ) = ( 'device_name', 'volume_size', ) NETWORK_KEYS = ( PORT, FIXED_IP, NETWORK, ) = ( 'port', 'fixed-ip', 'network', ) PERSONALITY_KEYS = ( PATH, CONTENTS, ) = ( 'path', 'contents', ) SCHEDULER_HINTS_KEYS = (GROUP, ) = ('group', ) properties_schema = { CONTEXT: schema.Map(_('Customized security context for operating servers.'), ), ADMIN_PASS: schema.String(_('Password for the administrator account.'), ), AUTO_DISK_CONFIG: schema.Boolean( _('Whether the disk partition is done automatically.'), default=True, ), AVAILABILITY_ZONE: schema.String( _('Name of availability zone for running the server.'), ), BLOCK_DEVICE_MAPPING: schema.List( _('A list specifying the properties of block devices to be used ' 'for this server.'), schema=schema.Map( _('A map specifying the properties of a block device to be ' 'used by the server.'), schema={ BDM_DEVICE_NAME: schema.String( _('Block device name, should be <=255 chars.'), ), BDM_VOLUME_SIZE: schema.Integer(_('Block device size in GB.'), ), }), ), CONFIG_DRIVE: schema.Boolean( _('Whether config drive should be enabled for the server.'), ), FLAVOR: schema.String( _('ID of flavor used for the server.'), required=True, ), IMAGE: schema.String( # IMAGE is not required, because there could be BDM or BDMv2 # support and the corresponding settings effective _('ID of image to be used for the new server.'), ), KEY_NAME: schema.String(_('Name of Nova keypair to be injected to server.'), ), METADATA: schema.Map( _('A collection of key/value pairs to be associated with the ' 'server created. Both key and value should be <=255 chars.'), ), NAME: schema.String( _('Name of the server.'), required=True, ), NETWORKS: schema.List( _('List of networks for the server.'), schema=schema.Map( _('A map specifying the properties of a network for uses.'), schema={ NETWORK: schema.String( _('Name or ID of network to create a port on.'), ), PORT: schema.String(_('Port ID to be used by the network.'), ), FIXED_IP: schema.String(_('Fixed IP to be used by the network.'), ), }, ), ), PERSONALITY: schema.List( _('List of files to be injected into the server, where each.'), schema=schema.Map( _('A map specifying the path & contents for an injected ' 'file.'), schema={ PATH: schema.String( _('In-instance path for the file to be injected.'), required=True, ), CONTENTS: schema.String( _('Contents of the file to be injected.'), required=True, ), }, ), ), SCHEDULER_HINTS: schema.Map( _('A collection of key/value pairs to be associated with the ' 'Scheduler hints. Both key and value should be <=255 chars.'), ), SECURITY_GROUPS: schema.List( _('List of security groups.'), schema=schema.String( _('Name of a security group'), required=True, ), ), TIMEOUT: schema.Integer( _('Time out threshold for server operations.'), default=120, ), USER_DATA: schema.String(_('User data to be exposed by the metadata server.'), ), } def __init__(self, type_name, name, **kwargs): super(ServerProfile, self).__init__(type_name, name, **kwargs) self._novaclient = None self._neutronclient = None self.server_id = None def validate(self): super(ServerProfile, self).validate() if self.properties[self.TIMEOUT] > cfg.CONF.default_action_timeout: suggest = cfg.CONF.default_action_timeout err = _("Value of the 'timeout' property must be lower than the " "upper limit (%s).") % suggest raise exception.InvalidSpec(message=err) def nova(self, obj): '''Construct nova client based on object. :param obj: Object for which the client is created. It is expected to be None when retrieving an existing client. When creating a client, it contains the user and project to be used. ''' if self._novaclient is not None: return self._novaclient params = self._build_conn_params(obj.user, obj.project) self._novaclient = driver_base.SenlinDriver().compute(params) return self._novaclient def neutron(self, obj): '''Construct neutron client based on object. :param obj: Object for which the client is created. It is expected to be None when retrieving an existing client. When creating a client, it contains the user and project to be used. ''' if self._neutronclient is not None: return self._neutronclient params = self._build_conn_params(obj.user, obj.project) self._neutronclient = driver_base.SenlinDriver().network(params) return self._neutronclient def do_validate(self, obj): '''Validate if the spec has provided valid info for server creation.''' return True def do_create(self, obj): '''Create a server using the given profile.''' kwargs = {} for key in self.KEYS: # context is treated as connection parameters if key == self.CONTEXT: continue if self.properties[key] is not None: kwargs[key] = self.properties[key] name_or_id = self.properties[self.IMAGE] if name_or_id is not None: image = self.nova(obj).image_get_by_name(name_or_id) # wait for new version of openstacksdk to fix this kwargs.pop(self.IMAGE) kwargs['imageRef'] = image.id flavor_id = self.properties[self.FLAVOR] flavor = self.nova(obj).flavor_find(flavor_id, False) # wait for new verson of openstacksdk to fix this kwargs.pop(self.FLAVOR) kwargs['flavorRef'] = flavor.id if obj.name is not None: kwargs[self.NAME] = obj.name + '-' + utils.random_name(8) metadata = self.properties[self.METADATA] or {} if obj.cluster_id is not None: metadata['cluster'] = obj.cluster_id kwargs['metadata'] = metadata scheduler_hint = self.properties[self.SCHEDULER_HINTS] if scheduler_hint is not None: kwargs['scheduler_hints'] = scheduler_hint user_data = self.properties[self.USER_DATA] if user_data is not None: ud = encodeutils.safe_encode(user_data) kwargs['user_data'] = encodeutils.safe_decode(base64.b64encode(ud)) networks = self.properties[self.NETWORKS] if networks is not None: for network in networks: net_name_id = network.get(self.NETWORK) if net_name_id: res = self.neutron(obj).network_get(net_name_id) network['uuid'] = res.id del network[self.NETWORK] if network['port'] is None: del network['port'] if network['fixed-ip'] is None: del network['fixed-ip'] kwargs['networks'] = networks LOG.info('Creating server: %s' % kwargs) try: server = self.nova(obj).server_create(**kwargs) self.nova(obj).wait_for_server(server) except Exception as ex: LOG.exception(_('Failed in creating server: %s'), six.text_type(ex)) return False self.server_id = server.id return server.id def do_delete(self, obj): self.server_id = obj.physical_id if not obj.physical_id: return True try: self.nova(obj).server_delete(self.server_id) self.nova(obj).wait_for_server_delete(self.server_id) except Exception as ex: LOG.error('Error: %s' % six.text_type(ex)) return False return True def do_update(self, obj, new_profile, **params): '''Perform update on the server. :param obj: the server to operate on :param new_profile: the new profile for the server. :param params: a dictionary of optional parameters. ''' self.server_id = obj.physical_id if not self.server_id: return True # TODO(anyone): Validate the new profile # TODO(anyone): Do update based on the fields provided. # self.nova(obj).server_update(**fields) return True def do_check(self, obj): # TODO(anyone): Check server status return True def do_get_details(self, obj): if obj.physical_id is None or obj.physical_id == '': return {} server = self.nova(obj).server_get(obj.physical_id) if server is None: return {} details = { 'id': server.id, 'name': server.name, 'access_ipv4': server.access_ipv4, 'access_ipv6': server.access_ipv6, 'addresses': server.addresses, 'created_at': server.created_at, 'flavor': server.flavor, 'host_id': server.host_id, 'image': server.image, 'links': server.links, 'metadata': server.metadata, 'progress': server.progress, 'project_id': server.project_id, 'status': server.status, 'updated_at': server.updated_at, 'user_id': server.user_id, } return details def do_join(self, obj, cluster_id): if not obj.physical_id: return {} metadata = self.nova(obj).server_metadata_get( server_id=obj.physical_id) or {} metadata['cluster'] = cluster_id return self.nova(obj).server_metadata_update(**metadata) def do_leave(self, obj): if not obj.physical_id: return metadata = self.nova(obj).server_metadata_get( server_id=obj.physical_id) or {} if 'cluster' in metadata: del metadata['cluster'] return self.nova(obj).server_metadata_update(**metadata)
class ScalingPolicy(base.Policy): """Policy for changing the size of a cluster. This policy is expected to be enforced before the node count of a cluster is changed. """ VERSION = '1.0' PRIORITY = 100 TARGET = [ ('BEFORE', consts.CLUSTER_SCALE_IN), ('BEFORE', consts.CLUSTER_SCALE_OUT), ] PROFILE_TYPE = [ 'ANY', ] KEYS = ( EVENT, ADJUSTMENT, ) = ( 'event', 'adjustment', ) _SUPPORTED_EVENTS = ( CLUSTER_SCALE_IN, CLUSTER_SCALE_OUT, ) = ( consts.CLUSTER_SCALE_IN, consts.CLUSTER_SCALE_OUT, ) _ADJUSTMENT_KEYS = ( ADJUSTMENT_TYPE, ADJUSTMENT_NUMBER, MIN_STEP, BEST_EFFORT, COOLDOWN, ) = ( 'type', 'number', 'min_step', 'best_effort', 'cooldown', ) properties_schema = { EVENT: schema.String( _('Event that will trigger this policy. Must be one of ' 'CLUSTER_SCALE_IN and CLUSTER_SCALE_OUT.'), constraints=[ constraints.AllowedValues(_SUPPORTED_EVENTS), ], required=True, ), ADJUSTMENT: schema.Map( _('Detailed specification for scaling adjustments.'), schema={ ADJUSTMENT_TYPE: schema.String( _('Type of adjustment when scaling is triggered.'), constraints=[ constraints.AllowedValues(consts.ADJUSTMENT_TYPES), ], default=consts.CHANGE_IN_CAPACITY, ), ADJUSTMENT_NUMBER: schema.Number( _('A number specifying the amount of adjustment.'), default=1, ), MIN_STEP: schema.Integer( _('When adjustment type is set to "CHANGE_IN_PERCENTAGE",' ' this specifies the cluster size will be decreased by ' 'at least this number of nodes.'), default=1, ), BEST_EFFORT: schema.Boolean( _('Whether do best effort scaling when new size of ' 'cluster will break the size limitation'), default=False, ), COOLDOWN: schema.Integer( _('Number of seconds to hold the cluster for cool-down ' 'before allowing cluster to be resized again.'), default=0, ), }), } def __init__(self, name, spec, **kwargs): """Intialize a scaling policy object. :param name: Name for the policy object. :param spec: A dictionary containing the detailed specification for the policy. :param \*\*kwargs: Other optional parameters for policy object creation. :return: An object of `ScalingPolicy`. """ super(ScalingPolicy, self).__init__(name, spec, **kwargs) self.singleton = False self.event = self.properties[self.EVENT] adjustment = self.properties[self.ADJUSTMENT] self.adjustment_type = adjustment[self.ADJUSTMENT_TYPE] self.adjustment_number = adjustment[self.ADJUSTMENT_NUMBER] self.adjustment_min_step = adjustment[self.MIN_STEP] self.best_effort = adjustment[self.BEST_EFFORT] self.cooldown = adjustment[self.COOLDOWN] def _calculate_adjustment_count(self, current_size): """Calculate adjustment count based on current_size. :param current_size: The current size of the target cluster. :return: The number of nodes to add or to remove. """ if self.adjustment_type == consts.EXACT_CAPACITY: if self.event == consts.CLUSTER_SCALE_IN: count = current_size - self.adjustment_number else: count = self.adjustment_number - current_size elif self.adjustment_type == consts.CHANGE_IN_CAPACITY: count = self.adjustment_number else: # consts.CHANGE_IN_PERCENTAGE: count = int((self.adjustment_number * current_size) / 100.0) if count < self.adjustment_min_step: count = self.adjustment_min_step return count def pre_op(self, cluster_id, action): """The hook function that is executed before the action. The checking result is stored in the ``data`` property of the action object rather than returned directly from the function. :param cluster_id: The ID of the target cluster. :param action: Action instance against which the policy is being checked. :return: None. """ # Use action input if count is provided count = action.inputs.get('count', None) current = db_api.node_count_by_cluster(action.context, cluster_id) if count is None: # count not specified, calculate it count = self._calculate_adjustment_count(current) # Count must be positive value try: count = utils.parse_int_param('count', count, allow_zero=False) except exception.InvalidParameter: action.data.update({ 'status': base.CHECK_ERROR, 'reason': _("Invalid count (%(c)s) for action '%(a)s'.") % { 'c': count, 'a': action.action } }) action.store(action.context) return # Check size constraints cluster = db_api.cluster_get(action.context, cluster_id) if action.action == consts.CLUSTER_SCALE_IN: if self.best_effort: count = min(count, current - cluster.min_size) result = su.check_size_params(cluster, current - count, strict=not self.best_effort) else: if self.best_effort: count = min(count, cluster.max_size - current) result = su.check_size_params(cluster, current + count, strict=not self.best_effort) if result: # failed validation pd = {'status': base.CHECK_ERROR, 'reason': result} else: # passed validation pd = { 'status': base.CHECK_OK, 'reason': _('Scaling request validated.'), } if action.action == consts.CLUSTER_SCALE_IN: pd['deletion'] = {'count': count} else: pd['creation'] = {'count': count} action.data.update(pd) action.store(action.context) return def need_check(self, target, action): res = super(ScalingPolicy, self).need_check(target, action) if res: # Check if the action is expected by the policy res = (self.event == action.action) return res
class Alarm(base.Trigger): # time constraints alarm_schema = { REPEAT: schema.Boolean( _('Whether the actions should be re-triggered on each evaluation ' 'cycle. Default to False.'), default=False, ), TIME_CONSTRAINTS: schema.List(schema=schema.Map( _('A map of time constraint settings.'), schema={ NAME: schema.String(_('Name of the time constraint.'), ), TC_DESCRIPTION: schema.String(_('A description of the time constraint.'), ), TC_START: schema.String( _('Start point of the time constraint, expressed as a ' 'string in cron expression format.'), required=True, ), TC_DURATION: schema.Integer( _('How long the constraint should last, in seconds.'), required=True, ), TC_TIMEZONE: schema.String( _('Time zone of the constraint.'), default='', ), }, ), ) } def __init__(self, name, spec, **kwargs): super(Alarm, self).__init__(name, spec, **kwargs) self.alarm_properties = schema.Spec(self.alarm_schema, spec) self.namespace = 'default' self.rule = None def validate(self): # validate cron expression if specified if TIME_CONSTRAINTS in self.spec: tcs = self.alarm_properties[TIME_CONSTRAINTS] for tc in tcs: exp = tc.get(TC_START, '') try: croniter.croniter(exp) except Exception as ex: msg = _("Invalid cron expression specified for property " "'%(property)s' (%(exp)s): %(ex)s") % { 'property': TC_START, 'exp': exp, 'ex': six.text_type(ex) } raise exc.InvalidSpec(message=msg) tz = tc.get(TC_TIMEZONE, '') try: pytz.timezone(tz) except Exception as ex: msg = _("Invalid timezone value specified for property " "'%(property)s' (%(tz)s): %(ex)s") % { 'property': TC_TIMEZONE, 'tz': tz, 'ex': six.text_type(ex) } raise exc.InvalidSpec(message=msg) def create(self, ctx, **kwargs): """Create an alarm for a cluster. :param name: The name for the alarm. :param urls: A list of URLs for webhooks to be triggered. :returns: A dict containing properties of the alarm. """ self.ok_actions = kwargs.get(OK_ACTIONS, []) self.alarm_actions = kwargs.get(ALARM_ACTIONS, []) self.insufficient_data_actions = kwargs.get(INSUFFICIENT_DATA_ACTIONS, []) rule_name = self.namespace + '_rule' rule_data = dict((k, v) for k, v in self.rule.items()) params = { NAME: self.name, DESCRIPTION: self.desc, TYPE: self.namespace, STATE: self.state, SEVERITY: self.severity, ENABLED: self.enabled, OK_ACTIONS: self.ok_actions, ALARM_ACTIONS: self.alarm_actions, INSUFFICIENT_DATA_ACTIONS: self.insufficient_data_actions, TIME_CONSTRAINTS: self.alarm_properties[TIME_CONSTRAINTS], REPEAT: self.alarm_properties[REPEAT], rule_name: rule_data, } try: cc = driver_base.SenlinDriver().telemetry(ctx.to_dict()) alarm = cc.alarm_create(**params) self.physical_id = alarm.id self.store(ctx) return True, alarm.to_dict() except exc.SenlinException as ex: return False, six.text_type(ex) def delete(self, ctx, identifier): """Delete an alarm. :param identifier: This must be an alarm ID. """ try: cc = driver_base.SenlinDriver().telemetry(ctx) res = cc.alarm_delete(identifier, True) return True, res except exc.InternalError as ex: return False, six.text_type(ex) def update(self, identifier, values): return NotImplemented