class LoadBalancingPolicy(base.Policy): """Policy for load balancing among members of a cluster. This policy is expected to be enforced before or after the membership of a cluster is changed. We need to refresh the load-balancer associated with the cluster (which could be created by the policy) when these actions are performed. """ VERSION = '1.0' PRIORITY = 500 TARGET = [ ('AFTER', consts.CLUSTER_ADD_NODES), ('AFTER', consts.CLUSTER_SCALE_OUT), ('AFTER', consts.CLUSTER_RESIZE), ('BEFORE', consts.CLUSTER_DEL_NODES), ('BEFORE', consts.CLUSTER_SCALE_IN), ('BEFORE', consts.CLUSTER_RESIZE), ] PROFILE_TYPE = [ 'os.nova.server-1.0', ] KEYS = ( POOL, VIP, HEALTH_MONITOR, ) = ( 'pool', 'vip', 'health_monitor', ) _POOL_KEYS = ( POOL_PROTOCOL, POOL_PROTOCOL_PORT, POOL_SUBNET, POOL_LB_METHOD, POOL_ADMIN_STATE_UP, POOL_SESSION_PERSISTENCE, ) = ( 'protocol', 'protocol_port', 'subnet', 'lb_method', 'admin_state_up', 'session_persistence', ) PROTOCOLS = ( HTTP, HTTPS, TCP, ) = ( 'HTTP', 'HTTPS', 'TCP', ) LB_METHODS = ( ROUND_ROBIN, LEAST_CONNECTIONS, SOURCE_IP, ) = ( 'ROUND_ROBIN', 'LEAST_CONNECTIONS', 'SOURCE_IP', ) HEALTH_MONITOR_TYPES = ( PING, TCP, HTTP, HTTPS, ) = ( 'PING', 'TCP', 'HTTP', 'HTTPS', ) HTTP_METHODS = ( GET, POST, PUT, DELETE, ) = ( 'GET', 'POST', 'PUT', 'DELETE', ) _VIP_KEYS = ( VIP_SUBNET, VIP_ADDRESS, VIP_CONNECTION_LIMIT, VIP_PROTOCOL, VIP_PROTOCOL_PORT, VIP_ADMIN_STATE_UP, ) = ( 'subnet', 'address', 'connection_limit', 'protocol', 'protocol_port', 'admin_state_up', ) HEALTH_MONITOR_KEYS = ( HM_TYPE, HM_DELAY, HM_TIMEOUT, HM_MAX_RETRIES, HM_ADMIN_STATE_UP, HM_HTTP_METHOD, HM_URL_PATH, HM_EXPECTED_CODES, ) = ( 'type', 'delay', 'timeout', 'max_retries', 'admin_state_up', 'http_method', 'url_path', 'expected_codes', ) _SESSION_PERSISTENCE_KEYS = ( PERSISTENCE_TYPE, COOKIE_NAME, ) = ( 'type', 'cookie_name', ) PERSISTENCE_TYPES = ( PERSIST_SOURCE_IP, PERSIST_HTTP_COOKIE, PERSIST_APP_COOKIE, ) = ( 'SOURCE_IP', 'HTTP_COOKIE', 'APP_COOKIE', ) properties_schema = { POOL: schema.Map( _('LB pool properties.'), schema={ POOL_PROTOCOL: schema.String( _('Protocol used for load balancing.'), constraints=[ constraints.AllowedValues(PROTOCOLS), ], default=HTTP, ), POOL_PROTOCOL_PORT: schema.Integer( _('Port on which servers are running on the nodes.'), default=80, ), POOL_SUBNET: schema.String( _('Name or ID of subnet for the port on which nodes can ' 'be connected.'), required=True, ), POOL_LB_METHOD: schema.String( _('Load balancing algorithm.'), constraints=[ constraints.AllowedValues(LB_METHODS), ], default=ROUND_ROBIN, ), POOL_ADMIN_STATE_UP: schema.Boolean( _('Administrative state of the pool.'), default=True, ), POOL_SESSION_PERSISTENCE: schema.Map( _('Session pesistence configuration.'), schema={ PERSISTENCE_TYPE: schema.String( _('Type of session persistence implementation.'), constraints=[ constraints.AllowedValues(PERSISTENCE_TYPES), ], ), COOKIE_NAME: schema.String( _('Name of cookie if type set to APP_COOKIE.'), ), }, default={}, ), }, ), VIP: schema.Map( _('VIP address and port of the pool.'), schema={ VIP_SUBNET: schema.String( _('Name or ID of Subnet on which the VIP address will be ' 'allocated.'), required=True, ), VIP_ADDRESS: schema.String( _('IP address of the VIP.'), default=None, ), VIP_CONNECTION_LIMIT: schema.Integer( _('Maximum number of connections per second allowed for ' 'this VIP'), default=-1, ), VIP_PROTOCOL: schema.String( _('Protocol used for VIP.'), constraints=[ constraints.AllowedValues(PROTOCOLS), ], default=HTTP, ), VIP_PROTOCOL_PORT: schema.Integer( _('TCP port to listen on.'), default=80, ), VIP_ADMIN_STATE_UP: schema.Boolean( _('Administrative state of the VIP.'), default=True, ), }, ), HEALTH_MONITOR: schema.Map( _('Health monitor for loadbalancer.'), schema={ HM_TYPE: schema.String( _('The type of probe sent by the load balancer to verify ' 'the member state.'), constraints=[ constraints.AllowedValues(HEALTH_MONITOR_TYPES), ], default=PING, ), HM_DELAY: schema.Integer( _('The amount of time in seconds between sending ' 'probes to members.'), default=10, ), HM_TIMEOUT: schema.Integer( _('The maximum time in seconds that a monitor waits to ' 'connect before it times out.'), default=5, ), HM_MAX_RETRIES: schema.Integer( _('The number of allowed connection failures before ' 'changing the status of the member to INACTIVE.'), default=3, ), HM_ADMIN_STATE_UP: schema.Boolean( _('Administrative state of the health monitor.'), default=True, ), HM_HTTP_METHOD: schema.String( _('The HTTP method that the monitor uses for requests.'), constraints=[ constraints.AllowedValues(HTTP_METHODS), ], ), HM_URL_PATH: schema.String( _('The HTTP path of the request sent by the monitor to ' 'test the health of a member.'), ), HM_EXPECTED_CODES: schema.String( _('Expected HTTP codes for a passing HTTP(S) monitor.'), ), }, ), } def __init__(self, name, spec, **kwargs): super(LoadBalancingPolicy, self).__init__(name, spec, **kwargs) self.pool_spec = self.properties.get(self.POOL, {}) self.vip_spec = self.properties.get(self.VIP, {}) self.hm_spec = self.properties.get(self.HEALTH_MONITOR, None) self.validate() self.lb = None def validate(self): super(LoadBalancingPolicy, self).validate() # validate subnet's exists # subnet = self.nc.subnet_get(vip[self.VIP_SUBNET]) def attach(self, cluster): """Routine to be invoked when policy is to be attached to a cluster. :param cluster: The target cluster to be attached to; :returns: When the operation was successful, returns a tuple (True, message); otherwise, return a tuple (False, error). """ res, data = super(LoadBalancingPolicy, self).attach(cluster) if res is False: return False, data nodes = node_mod.Node.load_all(oslo_context.get_current(), cluster_id=cluster.id) params = self._build_conn_params(cluster) lb_driver = driver_base.SenlinDriver().loadbalancing(params) res, data = lb_driver.lb_create(self.vip_spec, self.pool_spec, self.hm_spec) if res is False: return False, data port = self.pool_spec.get(self.POOL_PROTOCOL_PORT) subnet = self.pool_spec.get(self.POOL_SUBNET) for node in nodes: member_id = lb_driver.member_add(node, data['loadbalancer'], data['pool'], port, subnet) if member_id is None: # When failed in adding member, remove all lb resources that # were created and return the failure reason. # TODO(anyone): May need to "roll-back" changes caused by any # successful member_add() calls. lb_driver.lb_delete(**data) return False, 'Failed in adding node into lb pool' node.data.update({'lb_member': member_id}) node.store(oslo_context.get_current()) cluster_data_lb = cluster.data.get('loadbalancers', {}) cluster_data_lb[self.id] = {'vip_address': data.pop('vip_address')} cluster.data['loadbalancers'] = cluster_data_lb policy_data = self._build_policy_data(data) return True, policy_data def detach(self, cluster): """Routine to be called when the policy is detached from a cluster. :param cluster: The cluster from which the policy is to be detached. :returns: When the operation was successful, returns a tuple of (True, data) where the data contains references to the resources created; otherwise returns a tuple of (False, err) where the err contains a error message. """ reason = _('LB resources deletion succeeded.') params = self._build_conn_params(cluster) lb_driver = driver_base.SenlinDriver().loadbalancing(params) cp = cluster_policy.ClusterPolicy.load(oslo_context.get_current(), cluster.id, self.id) policy_data = self._extract_policy_data(cp.data) if policy_data is None: return True, reason res, reason = lb_driver.lb_delete(**policy_data) if res is False: return False, reason nodes = node_mod.Node.load_all(oslo_context.get_current(), cluster_id=cluster.id) for node in nodes: if 'lb_member' in node.data: node.data.pop('lb_member') node.store(oslo_context.get_current()) lb_data = cluster.data.get('loadbalancers', {}) if lb_data and isinstance(lb_data, dict): lb_data.pop(self.id, None) if lb_data: cluster.data['loadbalancers'] = lb_data else: cluster.data.pop('loadbalancers') return True, reason def _get_delete_candidates(self, cluster_id, action): deletion = action.data.get('deletion', None) # No deletion field in action.data which means no scaling # policy or deletion policy is attached. candidates = None if deletion is None: if action.action == consts.CLUSTER_DEL_NODES: # Get candidates from action.input candidates = action.inputs.get('candidates', []) count = len(candidates) elif action.action == consts.CLUSTER_RESIZE: # Calculate deletion count based on action input db_cluster = db_api.cluster_get(action.context, cluster_id) scaleutils.parse_resize_params(action, db_cluster) if 'deletion' not in action.data: return [] else: count = action.data['deletion']['count'] else: # action.action == consts.CLUSTER_SCALE_IN count = 1 else: count = deletion.get('count', 0) candidates = deletion.get('candidates', None) # Still no candidates available, pick count of nodes randomly if candidates is None: if count == 0: return [] nodes = db_api.node_get_all_by_cluster(action.context, cluster_id=cluster_id) if count > len(nodes): count = len(nodes) candidates = scaleutils.nodes_by_random(nodes, count) deletion_data = action.data.get('deletion', {}) deletion_data.update({ 'count': len(candidates), 'candidates': candidates }) action.data.update({'deletion': deletion_data}) return candidates def pre_op(self, cluster_id, action): """Routine to be called before an action has been executed. For this particular policy, we take this chance to update the pool maintained by the load-balancer. :param cluster_id: The ID of the cluster on which a relevant action has been executed. :param action: The action object that triggered this operation. :returns: Nothing. """ candidates = self._get_delete_candidates(cluster_id, action) if len(candidates) == 0: return db_cluster = db_api.cluster_get(action.context, cluster_id) params = self._build_conn_params(db_cluster) lb_driver = driver_base.SenlinDriver().loadbalancing(params) cp = cluster_policy.ClusterPolicy.load(action.context, cluster_id, self.id) policy_data = self._extract_policy_data(cp.data) lb_id = policy_data['loadbalancer'] pool_id = policy_data['pool'] # Remove nodes that will be deleted from lb pool for node_id in candidates: node = node_mod.Node.load(action.context, node_id=node_id) member_id = node.data.get('lb_member', None) if member_id is None: LOG.warning(_LW('Node %(n)s not found in lb pool %(p)s.'), { 'n': node_id, 'p': pool_id }) continue res = lb_driver.member_remove(lb_id, pool_id, member_id) if res is not True: action.data['status'] = base.CHECK_ERROR action.data['reason'] = _('Failed in removing deleted ' 'node(s) from lb pool.') return return def post_op(self, cluster_id, action): """Routine to be called after an action has been executed. For this particular policy, we take this chance to update the pool maintained by the load-balancer. :param cluster_id: The ID of the cluster on which a relevant action has been executed. :param action: The action object that triggered this operation. :returns: Nothing. """ # TODO(Yanyanhu): Need special handling for cross-az scenario # which is supported by Neutron lbaas. creation = action.data.get('creation', None) nodes_added = creation.get('nodes', []) if creation else [] if len(nodes_added) == 0: return db_cluster = db_api.cluster_get(action.context, cluster_id) params = self._build_conn_params(db_cluster) lb_driver = driver_base.SenlinDriver().loadbalancing(params) cp = cluster_policy.ClusterPolicy.load(action.context, cluster_id, self.id) policy_data = self._extract_policy_data(cp.data) lb_id = policy_data['loadbalancer'] pool_id = policy_data['pool'] port = self.pool_spec.get(self.POOL_PROTOCOL_PORT) subnet = self.pool_spec.get(self.POOL_SUBNET) # Add new nodes to lb pool for node_id in nodes_added: node = node_mod.Node.load(action.context, node_id=node_id) member_id = node.data.get('lb_member', None) if member_id: LOG.warning(_LW('Node %(n)s already in lb pool %(p)s.'), { 'n': node_id, 'p': pool_id }) continue member_id = lb_driver.member_add(node, lb_id, pool_id, port, subnet) if member_id is None: action.data['status'] = base.CHECK_ERROR action.data['reason'] = _('Failed in adding new node(s) ' 'into lb pool.') return node.data.update({'lb_member': member_id}) node.store(action.context) return
def test_validate_failed(self): sot = schema.List(schema=schema.String()) ex = self.assertRaises(exc.ESchema, sot.validate, None) self.assertEqual("'None' is not a List", six.text_type(ex))
class DeletionPolicy(base.Policy): """Policy for choosing victim node(s) from a cluster for deletion. This policy is enforced when nodes are to be removed from a cluster. It will yield an ordered list of candidates for deletion based on user specified criteria. """ VERSION = '1.0' PRIORITY = 400 KEYS = ( CRITERIA, DESTROY_AFTER_DELETION, GRACE_PERIOD, REDUCE_DESIRED_CAPACITY, ) = ( 'criteria', 'destroy_after_deletion', 'grace_period', 'reduce_desired_capacity', ) CRITERIA_VALUES = ( OLDEST_FIRST, OLDEST_PROFILE_FIRST, YOUNGEST_FIRST, RANDOM, ) = ( 'OLDEST_FIRST', 'OLDEST_PROFILE_FIRST', 'YOUNGEST_FIRST', 'RANDOM', ) TARGET = [ ('BEFORE', consts.CLUSTER_SCALE_IN), ('BEFORE', consts.CLUSTER_DEL_NODES), ('BEFORE', consts.CLUSTER_RESIZE), ] PROFILE_TYPE = ['ANY'] properties_schema = { CRITERIA: schema.String(_('Criteria used in selecting candidates for deletion'), default=RANDOM, constraints=[ constraints.AllowedValues(CRITERIA_VALUES), ]), DESTROY_AFTER_DELETION: schema.Boolean( _('Whether a node should be completely destroyed after ' 'deletion. Default to True'), default=True, ), GRACE_PERIOD: schema.Integer( _('Number of seconds before real deletion happens.'), default=0, ), REDUCE_DESIRED_CAPACITY: schema.Boolean( _('Whether the desired capacity of the cluster should be ' 'reduced along the deletion. Default to False.'), default=False, ) } def __init__(self, name, spec, **kwargs): super(DeletionPolicy, self).__init__(name, spec, **kwargs) self.criteria = self.properties[self.CRITERIA] self.grace_period = self.properties[self.GRACE_PERIOD] self.destroy_after_deletion = self.properties[ self.DESTROY_AFTER_DELETION] self.reduce_desired_capacity = self.properties[ self.REDUCE_DESIRED_CAPACITY] def _victims_by_regions(self, cluster, regions): victims = [] for region in sorted(regions.keys()): count = regions[region] nodes = cluster.nodes_by_region(region) if self.criteria == self.RANDOM: candidates = scaleutils.nodes_by_random(nodes, count) elif self.criteria == self.OLDEST_PROFILE_FIRST: candidates = scaleutils.nodes_by_profile_age(nodes, count) elif self.criteria == self.OLDEST_FIRST: candidates = scaleutils.nodes_by_age(nodes, count, True) else: candidates = scaleutils.nodes_by_age(nodes, count, False) victims.extend(candidates) return victims def _victims_by_zones(self, cluster, zones): victims = [] for zone in sorted(zones.keys()): count = zones[zone] nodes = cluster.nodes_by_zone(zone) if self.criteria == self.RANDOM: candidates = scaleutils.nodes_by_random(nodes, count) elif self.criteria == self.OLDEST_PROFILE_FIRST: candidates = scaleutils.nodes_by_profile_age(nodes, count) elif self.criteria == self.OLDEST_FIRST: candidates = scaleutils.nodes_by_age(nodes, count, True) else: candidates = scaleutils.nodes_by_age(nodes, count, False) victims.extend(candidates) return victims def _update_action(self, action, victims): pd = action.data.get('deletion', {}) pd['count'] = len(victims) pd['candidates'] = victims pd['destroy_after_deletion'] = self.destroy_after_deletion pd['grace_period'] = self.grace_period action.data.update({ 'status': base.CHECK_OK, 'reason': _('Candidates generated'), 'deletion': pd }) action.store(action.context) def pre_op(self, cluster_id, action): """Choose victims that can be deleted. :param cluster_id: ID of the cluster to be handled. :param action: The action object that triggered this policy. """ victims = action.inputs.get('candidates', []) if len(victims) > 0: self._update_action(action, victims) return db_cluster = None regions = None zones = None deletion = action.data.get('deletion', {}) if deletion: # there are policy decisions count = deletion['count'] regions = deletion.get('regions', None) zones = deletion.get('zones', None) # No policy decision, check action itself: SCALE_IN elif action.action == consts.CLUSTER_SCALE_IN: count = action.inputs.get('count', 1) # No policy decision, check action itself: RESIZE else: db_cluster = co.Cluster.get(action.context, cluster_id, project_safe=True) res = scaleutils.parse_resize_params(action, db_cluster) if res[0] == base.CHECK_ERROR: action.data['status'] = base.CHECK_ERROR action.data['reason'] = res[1] LOG.error(res[1]) return if 'deletion' not in action.data: return count = action.data['deletion']['count'] cluster = cm.Cluster.load(action.context, dbcluster=db_cluster, cluster_id=cluster_id) # Cross-region if regions: victims = self._victims_by_regions(cluster, regions) self._update_action(action, victims) return # Cross-AZ if zones: victims = self._victims_by_zones(cluster, zones) self._update_action(action, victims) return if count > len(cluster.nodes): count = len(cluster.nodes) if self.criteria == self.RANDOM: victims = scaleutils.nodes_by_random(cluster.nodes, count) elif self.criteria == self.OLDEST_PROFILE_FIRST: victims = scaleutils.nodes_by_profile_age(cluster.nodes, count) elif self.criteria == self.OLDEST_FIRST: victims = scaleutils.nodes_by_age(cluster.nodes, count, True) else: victims = scaleutils.nodes_by_age(cluster.nodes, count, False) self._update_action(action, victims) return
def test_basic(self): sot = schema.String('desc') self.assertEqual('String', sot['type']) self.assertEqual('desc', sot['description'])
def test_get_children(self): sot = schema.List('desc', schema=schema.String()) res = sot._get_children(['v1', 'v2'], [0, 1]) self.assertEqual(['v1', 'v2'], list(res))
class Profile(object): '''Base class for profiles.''' KEYS = ( TYPE, VERSION, PROPERTIES, ) = ( 'type', 'version', 'properties', ) spec_schema = { TYPE: schema.String( _('Name of the profile type.'), required=True, ), VERSION: schema.String( _('Version number of the profile type.'), required=True, ), PROPERTIES: schema.Map( _('Properties for the profile.'), required=True, ) } properties_schema = {} def __new__(cls, name, spec, **kwargs): """Create a new profile of the appropriate class. :param name: The name for the profile. :param spec: A dictionary containing the spec for the profile. :param kwargs: Keyword arguments for profile creation. :returns: An instance of a specific sub-class of Profile. """ type_name, version = schema.get_spec_version(spec) type_str = "-".join([type_name, version]) if cls != Profile: ProfileClass = cls else: ProfileClass = environment.global_env().get_profile(type_str) return super(Profile, cls).__new__(ProfileClass) def __init__(self, name, spec, **kwargs): """Initialize a profile instance. :param name: A string that specifies the name for the profile. :param spec: A dictionary containing the detailed profile spec. :param kwargs: Keyword arguments for initializing the profile. :returns: An instance of a specific sub-class of Profile. """ type_name, version = schema.get_spec_version(spec) type_str = "-".join([type_name, version]) self.name = name self.spec = spec self.id = kwargs.get('id', None) self.type = kwargs.get('type', type_str) self.user = kwargs.get('user') self.project = kwargs.get('project') self.domain = kwargs.get('domain') self.metadata = kwargs.get('metadata', {}) self.created_at = kwargs.get('created_at', None) self.updated_at = kwargs.get('updated_at', None) self.spec_data = schema.Spec(self.spec_schema, self.spec) self.properties = schema.Spec(self.properties_schema, self.spec.get(self.PROPERTIES, {})) if not self.id: # new object needs a context dict self.context = self._init_context() else: self.context = kwargs.get('context') @classmethod def from_db_record(cls, record): '''Construct a profile object from database record. :param record: a DB Profle object that contains all required fields. ''' kwargs = { 'id': record.id, 'type': record.type, 'context': record.context, 'user': record.user, 'project': record.project, 'domain': record.domain, 'metadata': record.meta_data, 'created_at': record.created_at, 'updated_at': record.updated_at, } return cls(record.name, record.spec, **kwargs) @classmethod def load(cls, ctx, profile=None, profile_id=None, project_safe=True): '''Retrieve a profile object from database.''' if profile is None: profile = db_api.profile_get(ctx, profile_id, project_safe=project_safe) if profile is None: raise exception.ProfileNotFound(profile=profile_id) return cls.from_db_record(profile) @classmethod def load_all(cls, ctx, limit=None, marker=None, sort=None, filters=None, project_safe=True): """Retrieve all profiles from database.""" records = db_api.profile_get_all(ctx, limit=limit, marker=marker, sort=sort, filters=filters, project_safe=project_safe) for record in records: yield cls.from_db_record(record) @classmethod def delete(cls, ctx, profile_id): db_api.profile_delete(ctx, profile_id) def store(self, ctx): '''Store the profile into database and return its ID.''' timestamp = timeutils.utcnow() values = { 'name': self.name, 'type': self.type, 'context': self.context, 'spec': self.spec, 'user': self.user, 'project': self.project, 'domain': self.domain, 'meta_data': self.metadata, } if self.id: self.updated_at = timestamp values['updated_at'] = timestamp db_api.profile_update(ctx, self.id, values) else: self.created_at = timestamp values['created_at'] = timestamp profile = db_api.profile_create(ctx, values) self.id = profile.id return self.id @classmethod def create_object(cls, ctx, obj): profile = cls.load(ctx, profile_id=obj.profile_id) return profile.do_create(obj) @classmethod def check_object(cls, ctx, obj): profile = cls.load(ctx, profile_id=obj.profile_id) return profile.do_check(obj) @classmethod def delete_object(cls, ctx, obj): profile = cls.load(ctx, profile_id=obj.profile_id) return profile.do_delete(obj) @classmethod def update_object(cls, ctx, obj, new_profile_id=None, **params): profile = cls.load(ctx, profile_id=obj.profile_id) new_profile = None if new_profile_id: new_profile = cls.load(ctx, profile_id=new_profile_id) return profile.do_update(obj, new_profile, **params) @classmethod def recover_object(cls, ctx, obj, **options): profile = cls.load(ctx, profile_id=obj.profile_id) return profile.do_recover(obj, **options) @classmethod def get_details(cls, ctx, obj): profile = cls.load(ctx, profile_id=obj.profile_id) return profile.do_get_details(obj) @classmethod def join_cluster(cls, ctx, obj, cluster_id): profile = cls.load(ctx, profile_id=obj.profile_id) return profile.do_join(obj, cluster_id) @classmethod def leave_cluster(cls, ctx, obj): profile = cls.load(ctx, profile_id=obj.profile_id) return profile.do_leave(obj) def validate(self): '''Validate the schema and the data provided.''' # general validation self.spec_data.validate() self.properties.validate() # TODO(Anyone): need to check the contents in self.CONTEXT @classmethod def get_schema(cls): return dict((name, dict(schema)) for name, schema in cls.properties_schema.items()) def _init_context(self): profile_context = {} if self.CONTEXT in self.properties: profile_context = self.properties[self.CONTEXT] or {} ctx_dict = context.get_service_context(**profile_context) ctx_dict.pop('project_name', None) ctx_dict.pop('project_domain_name', None) return ctx_dict def _build_conn_params(self, user, project): """Build connection params for specific user and project. :param user: The ID of the user for which a trust will be used. :param project: The ID of the project for which a trust will be used. :returns: A dict containing the required parameters for connection creation. """ cred = db_api.cred_get(oslo_context.get_current(), user, project) if cred is None: raise exception.TrustNotFound(trustor=user) trust_id = cred.cred['openstack']['trust'] # This is supposed to be trust-based authentication params = copy.deepcopy(self.context) params['trust_id'] = trust_id return params def do_create(self, obj): '''For subclass to override.''' return NotImplemented def do_delete(self, obj): '''For subclass to override.''' return NotImplemented def do_update(self, obj, new_profile, **params): '''For subclass to override.''' return NotImplemented def do_check(self, obj): '''For subclass to override.''' return NotImplemented def do_get_details(self, obj): '''For subclass to override.''' return NotImplemented def do_join(self, obj, cluster_id): '''For subclass to override to perform extra operations.''' return True def do_leave(self, obj): '''For subclass to override to perform extra operations.''' return True def do_rebuild(self, obj): '''For subclass to override.''' return NotImplemented def do_recover(self, obj, **options): '''For subclass to override.''' operation = options.get('operation', None) if operation and operation != 'RECREATE': return NotImplemented # NOTE: do_delete always returns a boolean res = self.do_delete(obj) if res: try: res = self.do_create(obj) except Exception as ex: LOG.exception(_('Failed at recovering obj: %s '), six.text_type(ex)) return False return res def to_dict(self): pb_dict = { 'id': self.id, 'name': self.name, 'type': self.type, 'user': self.user, 'project': self.project, 'domain': self.domain, 'spec': self.spec, 'metadata': self.metadata, 'created_at': utils.format_time(self.created_at), 'updated_at': utils.format_time(self.updated_at), } return pb_dict def validate_for_update(self, new_profile): non_updatables = [] for (k, v) in new_profile.properties.items(): if self.properties.get(k, None) != v: if not self.properties_schema[k].updatable: non_updatables.append(k) if not non_updatables: return True msg = ", ".join(non_updatables) LOG.error(_LE("The following properties are not updatable: %s.") % msg) return False
class ZonePlacementPolicy(base.Policy): """Policy for placing members of a cluster across availability zones.""" VERSION = '1.0' PRIORITY = 300 TARGET = [ ('BEFORE', consts.CLUSTER_SCALE_OUT), ('BEFORE', consts.CLUSTER_SCALE_IN), ('BEFORE', consts.CLUSTER_RESIZE), ] PROFILE_TYPE = [ 'os.nova.server-1.0', ] KEYS = (ZONES, ) = ('zones', ) _AZ_KEYS = ( ZONE_NAME, ZONE_WEIGHT, ) = ( 'name', 'weight', ) properties_schema = { ZONES: schema.List( _('List of availability zones to choose from.'), schema=schema.Map( _('An availability zone as candidate.'), schema={ ZONE_NAME: schema.String(_('Name of an availability zone.'), ), ZONE_WEIGHT: schema.Integer( _('Weight of the availability zone (default is 100).'), default=100, required=False, ) }, ), ), } def __init__(self, name, spec, **kwargs): super(ZonePlacementPolicy, self).__init__(name, spec, **kwargs) self._novaclient = None self.zones = dict((z[self.ZONE_NAME], z[self.ZONE_WEIGHT]) for z in self.properties.get(self.ZONES)) def _nova(self, obj): """Construct nova client based on object. :param obj: Object for which the client is created. It is expected to be None when retrieving an existing client. When creating a client, it contains the user and project to be used. """ if self._novaclient is not None: return self._novaclient params = self._build_conn_params(obj) self._novaclient = driver.SenlinDriver().compute(params) return self._novaclient def _create_plan(self, current, zones, count, expand): """Compute a placement plan based on the weights of AZs. :param current: Distribution of existing nodes. :returns: A dict that contains a placement plan. """ # sort candidate zones by distribution and covert it into a list candidates = sorted(zones.items(), key=operator.itemgetter(1), reverse=expand) sum_weight = sum(zones.values()) if expand: total = count + sum(current.values()) else: total = sum(current.values()) - count remain = count plan = dict.fromkeys(zones.keys(), 0) for i in range(len(zones)): zone = candidates[i][0] weight = candidates[i][1] q = total * weight / float(sum_weight) if expand: quota = int(math.ceil(q)) headroom = quota - current[zone] else: quota = int(math.floor(q)) headroom = current[zone] - quota if headroom <= 0: continue if headroom < remain: plan[zone] = headroom remain -= headroom else: plan[zone] = remain if remain > 0 else 0 remain = 0 break if remain > 0: return None # filter out zero values result = {} for z, c in plan.items(): if c > 0: result[z] = c return result def _get_count(self, cluster_id, action): """Get number of nodes to create or delete. :param cluster_id: The ID of the target cluster. :param action: The action object which triggered this policy check. :return: An integer value which can be 1) positive - number of nodes to create; 2) negative - number of nodes to delete; 3) 0 - something wrong happened, and the policy check failed. """ if action.action == consts.CLUSTER_RESIZE: if action.data.get('deletion', None): return -action.data['deletion']['count'] elif action.data.get('creation', None): return action.data['creation']['count'] db_cluster = co.Cluster.get(action.context, cluster_id) res = scaleutils.parse_resize_params(action, db_cluster) if res[0] == base.CHECK_ERROR: action.data['status'] = base.CHECK_ERROR action.data['reason'] = res[1] LOG.error(res[1]) return 0 if action.data.get('deletion', None): return -action.data['deletion']['count'] else: return action.data['creation']['count'] if action.action == consts.CLUSTER_SCALE_IN: pd = action.data.get('deletion', None) if pd is None: return -action.inputs.get('count', 1) else: return -pd.get('count', 1) # CLUSTER_SCALE_OUT: an action that inflates the cluster pd = action.data.get('creation', None) if pd is None: return action.inputs.get('count', 1) else: return pd.get('count', 1) def pre_op(self, cluster_id, action): """Callback function when cluster membership is about to change. :param cluster_id: ID of the target cluster. :param action: The action that triggers this policy check. """ count = self._get_count(cluster_id, action) if count == 0: return expand = True if count < 0: expand = False count = -count cluster = cm.Cluster.load(action.context, cluster_id) nc = self._nova(cluster) zones_good = nc.validate_azs(self.zones.keys()) if len(zones_good) == 0: action.data['status'] = base.CHECK_ERROR action.data['reason'] = _('No availability zone found available.') LOG.error(_LE('No availability zone found available.')) return zones = {} for z, w in self.zones.items(): if z in zones_good: zones[z] = w current = cluster.get_zone_distribution(action.context, zones.keys()) result = self._create_plan(current, zones, count, expand) if not result: action.data['status'] = base.CHECK_ERROR action.data['reason'] = _('There is no feasible plan to ' 'handle all nodes.') LOG.error(_LE('There is no feasible plan to handle all nodes.')) return if expand: if 'creation' not in action.data: action.data['creation'] = {} action.data['creation']['count'] = count action.data['creation']['zones'] = result else: if 'deletion' not in action.data: action.data['deletion'] = {} action.data['deletion']['count'] = count action.data['deletion']['zones'] = result
class RegionPlacementPolicy(base.Policy): """Policy for placing members of a cluster across multiple regions.""" VERSION = '1.0' PRIORITY = 200 TARGET = [ ('BEFORE', consts.CLUSTER_SCALE_OUT), ('BEFORE', consts.CLUSTER_SCALE_IN), ('BEFORE', consts.CLUSTER_RESIZE), ] PROFILE_TYPE = ['ANY'] KEYS = (REGIONS, ) = ('regions', ) _AZ_KEYS = ( REGION_NAME, REGION_WEIGHT, REGION_CAP, ) = ( 'name', 'weight', 'cap', ) properties_schema = { REGIONS: schema.List( _('List of regions to choose from.'), schema=schema.Map( _('An region as a candidate.'), schema={ REGION_NAME: schema.String(_('Name of a region.'), ), REGION_WEIGHT: schema.Integer( _('Weight of the region. The default is 100.'), default=100, ), REGION_CAP: schema.Integer( _('Maximum number of nodes in this region. The ' 'default is -1 which means no cap set.'), default=-1, ), }, ), ), } def __init__(self, name, spec, **kwargs): super(RegionPlacementPolicy, self).__init__(name, spec, **kwargs) self._keystoneclient = None regions = {} for r in self.properties.get(self.REGIONS): regions[r[self.REGION_NAME]] = { 'weight': r[self.REGION_WEIGHT], 'cap': r[self.REGION_CAP], } self.regions = regions def _keystone(self, obj): """Construct keystone client based on object. :param obj: Object for which the client is created. It is expected to be None when retrieving an existing client. When creating a client, it contains the user and project to be used. """ if self._keystoneclient is not None: return self._keystoneclient params = self._build_conn_params(obj) self._keystoneclient = driver_base.SenlinDriver().identity(params) return self._keystoneclient def _create_plan(self, current, regions, count, expand): """Compute a placement plan based on the weights of regions. :param current: Distribution of existing nodes. :param regions: Usable regions for node creation. :param count: Number of nodes to create/delete in this plan. :param expand: True if the plan is for inflating the cluster, False otherwise. :returns: A list of region names selected for the nodes. """ # sort candidate regions by distribution and covert it into a list candidates = sorted(regions.items(), key=lambda x: x[1]['weight'], reverse=expand) sum_weight = sum(r['weight'] for r in regions.values()) if expand: total = count + sum(current.values()) else: total = sum(current.values()) - count remain = count plan = dict.fromkeys(regions.keys(), 0) for i in range(len(candidates)): region = candidates[i] r_name = region[0] r_weight = region[1]['weight'] r_cap = region[1]['cap'] # maximum number of nodes on current region q = total * r_weight / float(sum_weight) if expand: quota = int(math.ceil(q)) # respect the cap setting, if any if r_cap >= 0: quota = min(quota, r_cap) headroom = quota - current[r_name] else: quota = int(math.floor(q)) headroom = current[r_name] - quota if headroom <= 0: continue if headroom < remain: plan[r_name] = headroom remain -= headroom else: plan[r_name] = remain if remain > 0 else 0 remain = 0 break # we have leftovers if remain > 0: return None result = {} for reg, count in plan.items(): if count > 0: result[reg] = count return result def _get_count(self, cluster_id, action): """Get number of nodes to create or delete. :param cluster_id: The ID of the target cluster. :param action: The action object which triggered this policy check. :return: An integer value which can be 1) positive - number of nodes to create; 2) negative - number of nodes to delete; 3) 0 - something wrong happened, and the policy check failed. """ if action.action == consts.CLUSTER_RESIZE: if action.data.get('deletion', None): return -action.data['deletion']['count'] elif action.data.get('creation', None): return action.data['creation']['count'] db_cluster = db_api.cluster_get(action.context, cluster_id) res = scaleutils.parse_resize_params(action, db_cluster) if res[0] == base.CHECK_ERROR: action.data['status'] = base.CHECK_ERROR action.data['reason'] = res[1] LOG.error(res[1]) return 0 if action.data.get('deletion', None): return -action.data['deletion']['count'] else: return action.data['creation']['count'] if action.action == consts.CLUSTER_SCALE_IN: pd = action.data.get('deletion', None) if pd is None: return -action.inputs.get('count', 1) else: return -pd.get('count', 1) # CLUSTER_SCALE_OUT: an action that inflates the cluster pd = action.data.get('creation', None) if pd is None: return action.inputs.get('count', 1) else: return pd.get('count', 1) def pre_op(self, cluster_id, action): """Callback function when cluster membership is about to change. :param cluster_id: ID of the target cluster. :param action: The action that triggers this policy check. :returns: ``None``. """ count = self._get_count(cluster_id, action) if count == 0: return expand = True if count < 0: expand = False count = -count cluster = cluster_mod.Cluster.load(action.context, cluster_id) kc = self._keystone(cluster) regions_good = kc.validate_regions(self.regions.keys()) if len(regions_good) == 0: action.data['status'] = base.CHECK_ERROR action.data['reason'] = _('No region is found usable.') LOG.error(_LE('No region is found usable.')) return regions = {} for r in self.regions.items(): if r[0] in regions_good: regions[r[0]] = r[1] current_dist = cluster.get_region_distribution(regions_good) result = self._create_plan(current_dist, regions, count, expand) if not result: action.data['status'] = base.CHECK_ERROR action.data['reason'] = _('There is no feasible plan to ' 'handle all nodes.') LOG.error(_LE('There is no feasible plan to handle all nodes.')) return if expand: if 'creation' not in action.data: action.data['creation'] = {} action.data['creation']['count'] = count action.data['creation']['regions'] = result else: if 'deletion' not in action.data: action.data['deletion'] = {} action.data['deletion']['count'] = count action.data['deletion']['regions'] = result
class HealthPolicy(base.Policy): """Policy for health management of a cluster.""" VERSION = '1.0' PRIORITY = 600 TARGET = [ ('BEFORE', consts.CLUSTER_CHECK), ('BEFORE', consts.CLUSTER_DEL_NODES), ('BEFORE', consts.CLUSTER_RECOVER), ('BEFORE', consts.CLUSTER_RESIZE), ('BEFORE', consts.CLUSTER_SCALE_IN), ('BEFORE', consts.NODE_DELETE), ('AFTER', consts.CLUSTER_DEL_NODES), ('AFTER', consts.CLUSTER_SCALE_IN), ('AFTER', consts.CLUSTER_RESIZE), ('AFTER', consts.NODE_DELETE), ] # Should be ANY if profile provides health check support? PROFILE_TYPE = [ 'os.nova.server', 'os.heat.stack', ] KEYS = (DETECTION, RECOVERY) = ('detection', 'recovery') _DETECTION_KEYS = ( DETECTION_TYPE, DETECTION_OPTIONS, ) = ('type', 'options') _DETECTION_OPTIONS = (DETECTION_INTERVAL, ) = ('interval', ) _RECOVERY_KEYS = (RECOVERY_ACTIONS, RECOVERY_FENCING) = ('actions', 'fencing') RECOVERY_ACTION_VALUES = ( REBUILD, RECREATE, # REBOOT, MIGRATE, EVACUATE, ) = ( "REBUILD", "RECREATE", # 'REBOOT', 'MIGRATE', 'EVACUATE', ) FENCING_OPTION_VALUES = ( COMPUTE, # STORAGE, NETWORK, ) = ( 'COMPUTE', # 'STORAGE', 'NETWORK' ) properties_schema = { DETECTION: schema.Map( _('Policy aspect for node failure detection.'), schema={ DETECTION_TYPE: schema.String( _('Type of node failure detection.'), constraints=[ constraints.AllowedValues(consts.DETECTION_TYPES), ], required=True, ), DETECTION_OPTIONS: schema.Map( schema={ DETECTION_INTERVAL: schema.Integer( _("Number of seconds between pollings. Only " "required when type is 'NODE_STATUS_POLLING'."), default=60, ), }), }, required=True, ), RECOVERY: schema.Map( _('Policy aspect for node failure recovery.'), schema={ RECOVERY_ACTIONS: schema.List( _('List of actions to try for node recovery.'), schema=schema.String( _('Action to try for node recovery.'), constraints=[ constraints.AllowedValues(RECOVERY_ACTION_VALUES), ]), ), RECOVERY_FENCING: schema.List( _('List of services to be fenced.'), schema=schema.String( _('Service to be fenced.'), constraints=[ constraints.AllowedValues(FENCING_OPTION_VALUES), ], ), ), }), } def __init__(self, name, spec, **kwargs): super(HealthPolicy, self).__init__(name, spec, **kwargs) self.check_type = self.properties[self.DETECTION][self.DETECTION_TYPE] options = self.properties[self.DETECTION][self.DETECTION_OPTIONS] self.interval = options[self.DETECTION_INTERVAL] recover_settings = self.properties[self.RECOVERY] self.recover_actions = recover_settings[self.RECOVERY_ACTIONS] self.fencing_types = recover_settings[self.RECOVERY_FENCING] def attach(self, cluster): """"Hook for policy attach. Register the cluster for health management. :param cluster: The target cluster. :return: A tuple comprising execution result and policy data. """ kwargs = { 'check_type': self.check_type, 'interval': self.interval, 'params': {}, } health_manager.register(cluster.id, engine_id=None, **kwargs) data = { 'check_type': self.check_type, 'interval': self.interval, } return True, self._build_policy_data(data) def detach(self, cluster): """Hook for policy detach. Unregister the cluster for health management. :param cluster: The target cluster. :returns: A tuple comprising the execution result and reason. """ health_manager.unregister(cluster.id) return True, '' def pre_op(self, cluster_id, action, **args): """Hook before action execution. One of the task for this routine is to disable health policy if the action is a request that will shrink the cluster. The reason is that the policy may attempt to recover nodes that are to be deleted. :param cluster_id: The ID of the target cluster. :param action: The action to be examined. :param kwargs args: Other keyword arguments to be checked. :returns: Boolean indicating whether the checking passed. """ if action.action in (consts.CLUSTER_SCALE_IN, consts.CLUSTER_DEL_NODES, consts.NODE_DELETE): health_manager.disable(cluster_id) return True if action.action == consts.CLUSTER_RESIZE: deletion = action.data.get('deletion', None) if deletion: health_manager.disable(cluster_id) return True db_cluster = co.Cluster.get(action.context, cluster_id) current = no.Node.count_by_cluster(action.context, cluster_id) res, reason = scaleutils.parse_resize_params( action, db_cluster, current) if res == base.CHECK_ERROR: action.data['status'] = base.CHECK_ERROR action.data['reason'] = reason return False if action.data.get('deletion', None): health_manager.disable(cluster_id) return True pd = { 'recover_action': self.recover_actions, 'fencing': self.fencing_types, } action.data.update({'health': pd}) action.store(action.context) return True def post_op(self, cluster_id, action, **args): """Hook before action execution. One of the task for this routine is to re-enable health policy if the action is a request that will shrink the cluster thus the policy has been temporarily disabled. :param cluster_id: The ID of the target cluster. :param action: The action to be examined. :param kwargs args: Other keyword arguments to be checked. :returns: Boolean indicating whether the checking passed. """ if action.action in (consts.CLUSTER_SCALE_IN, consts.CLUSTER_DEL_NODES, consts.NODE_DELETE): health_manager.enable(cluster_id) return True if action.action == consts.CLUSTER_RESIZE: deletion = action.data.get('deletion', None) if deletion: health_manager.enable(cluster_id) return True db_cluster = co.Cluster.get(action.context, cluster_id) current = no.Node.count_by_cluster(action.context, cluster_id) res, reason = scaleutils.parse_resize_params( action, db_cluster, current) if res == base.CHECK_ERROR: action.data['status'] = base.CHECK_ERROR action.data['reason'] = reason return False if action.data.get('deletion', None): health_manager.enable(cluster_id) return True return True
class TestSpec(base.SenlinTestCase): spec_schema = { 'key1': schema.String('first key', default='value1'), 'key2': schema.Integer('second key', required=True), } def test_init(self): data = {'key1': 'value1', 'key2': 2} sot = schema.Spec(self.spec_schema, data) self.assertEqual(self.spec_schema, sot._schema) self.assertEqual(data, sot._data) self.assertIsNone(sot._version) def test_init_with_version(self): data = {'key1': 'value1', 'key2': 2} sot = schema.Spec(self.spec_schema, data, version='1.2') self.assertEqual(self.spec_schema, sot._schema) self.assertEqual(data, sot._data) self.assertEqual('1.2', sot._version) def test_validate(self): data = {'key1': 'value1', 'key2': 2} sot = schema.Spec(self.spec_schema, data) res = sot.validate() self.assertIsNone(res) data1 = {'key2': 2} sot = schema.Spec(self.spec_schema, data1) res = sot.validate() self.assertIsNone(res) def test_validate_fail_unrecognizable_key(self): spec_schema = { 'key1': schema.String('first key', default='value1'), } data = {'key1': 'value1', 'key2': 2} sot = schema.Spec(spec_schema, data, version='1.0') ex = self.assertRaises(exc.ESchema, sot.validate) self.assertIn("Unrecognizable spec item 'key2'", six.text_type(ex.message)) def test_validate_fail_value_type_incorrect(self): spec_schema = { 'key1': schema.String('first key', default='value1'), 'key2': schema.Integer('second key', required=True), } data = {'key1': 'value1', 'key2': 'abc'} spec = schema.Spec(spec_schema, data, version='1.0') ex = self.assertRaises(exc.ESchema, spec.validate) self.assertIn("The value 'abc' is not a valid Integer", six.text_type(ex.message)) def test_validate_version_good(self): spec_schema = { 'type': schema.String('Type name', required=True), 'version': schema.String('Version number', required=True), 'key1': schema.String('first key', default='value1'), 'key2': schema.Integer('second key', required=True, min_version='1.0', max_version='1.2'), } data = { 'key1': 'value1', 'key2': 2, 'type': 'test-type', 'version': '1.0' } spec = schema.Spec(spec_schema, data) self.assertIsNone(spec.validate()) data = {'key2': 2, 'type': 'test-type', 'version': '1.2'} spec = schema.Spec(spec_schema, data) self.assertIsNone(spec.validate()) def test_validate_version_fail_unsupported_version(self): spec_schema = { 'type': schema.String('Type name', required=True), 'version': schema.String('Version number', required=True), 'key1': schema.String('first key', default='value1', min_version='1.1'), 'key2': schema.Integer('second key', required=True), } data = { 'key1': 'value1', 'key2': 2, 'type': 'test-type', 'version': '1.0' } spec = schema.Spec(spec_schema, data, version='1.0') ex = self.assertRaises(exc.ESchema, spec.validate) msg = 'key1 (min_version=1.1) is not supported by spec version 1.0.' self.assertIn(msg, six.text_type(ex.message)) def test_validate_version_fail_version_over_max(self): spec_schema = { 'type': schema.String('Type name', required=True), 'version': schema.String('Version number', required=True), 'key1': schema.String('first key', default='value1', max_version='2.0'), 'key2': schema.Integer('second key', required=True), } data = { 'key1': 'value1', 'key2': 2, 'type': 'test-type', 'version': '3.0' } spec = schema.Spec(spec_schema, data, version='3.0') ex = self.assertRaises(exc.ESchema, spec.validate) msg = 'key1 (max_version=2.0) is not supported by spec version 3.0.' self.assertIn(msg, six.text_type(ex.message)) def test_resolve_value(self): data = {'key2': 2} sot = schema.Spec(self.spec_schema, data, version='1.2') res = sot.resolve_value('key2') self.assertEqual(2, res) res = sot.resolve_value('key1') self.assertEqual('value1', res) ex = self.assertRaises(exc.ESchema, sot.resolve_value, 'key3') self.assertEqual("Invalid spec item: key3", six.text_type(ex)) def test_resolve_value_required_key_missing(self): data = {'key1': 'value1'} sot = schema.Spec(self.spec_schema, data, version='1.0') ex = self.assertRaises(exc.ESchema, sot.resolve_value, 'key2') self.assertIn("Required spec item 'key2' not provided", six.text_type(ex.message)) def test___getitem__(self): data = {'key2': 2} sot = schema.Spec(self.spec_schema, data, version='1.2') res = sot['key1'] self.assertEqual('value1', res) res = sot['key2'] self.assertEqual(2, res) def test___len__(self): data = {'key2': 2} sot = schema.Spec(self.spec_schema, data, version='1.2') res = len(sot) self.assertEqual(2, res) def test___contains__(self): data = {'key2': 2} sot = schema.Spec(self.spec_schema, data, version='1.2') self.assertIn('key1', sot) self.assertIn('key2', sot) self.assertNotIn('key3', sot) def test__iter__(self): data = {'key2': 2} sot = schema.Spec(self.spec_schema, data, version='1.2') res = [k for k in iter(sot)] self.assertIn('key1', res) self.assertIn('key2', res)
class ZonePlacementPolicy(base.Policy): """Policy for placing members of a cluster across availability zones.""" VERSION = '1.0' VERSIONS = { '1.0': [ { 'status': consts.EXPERIMENTAL, 'since': '2016.04' }, { 'status': consts.SUPPORTED, 'since': '2016.10' }, ] } PRIORITY = 300 TARGET = [ ('BEFORE', consts.CLUSTER_SCALE_OUT), ('BEFORE', consts.CLUSTER_SCALE_IN), ('BEFORE', consts.CLUSTER_RESIZE), ('BEFORE', consts.NODE_CREATE), ] PROFILE_TYPE = [ 'os.nova.server-1.0', ] KEYS = (ZONES, ) = ('zones', ) _AZ_KEYS = ( ZONE_NAME, ZONE_WEIGHT, ) = ( 'name', 'weight', ) properties_schema = { ZONES: schema.List( _('List of availability zones to choose from.'), schema=schema.Map( _('An availability zone as candidate.'), schema={ ZONE_NAME: schema.String(_('Name of an availability zone.'), ), ZONE_WEIGHT: schema.Integer( _('Weight of the availability zone (default is 100).'), default=100, required=False, ) }, ), ), } def __init__(self, name, spec, **kwargs): super(ZonePlacementPolicy, self).__init__(name, spec, **kwargs) self.zones = dict((z[self.ZONE_NAME], z[self.ZONE_WEIGHT]) for z in self.properties.get(self.ZONES)) def validate(self, context, validate_props=False): super(ZonePlacementPolicy, self).validate(context, validate_props) if not validate_props: return True nc = self.nova(context.user, context.project) input_azs = sorted(self.zones.keys()) valid_azs = nc.validate_azs(input_azs) invalid_azs = sorted(set(input_azs) - set(valid_azs)) if invalid_azs: msg = _("The specified %(key)s '%(value)s' could not be " "found.") % { 'key': self.ZONE_NAME, 'value': list(invalid_azs) } raise exc.InvalidSpec(message=msg) return True def _create_plan(self, current, zones, count, expand): """Compute a placement plan based on the weights of AZs. :param current: Distribution of existing nodes. :returns: A dict that contains a placement plan. """ # sort candidate zones by distribution and covert it into a list candidates = sorted(zones.items(), key=operator.itemgetter(1), reverse=expand) sum_weight = sum(zones.values()) if expand: total = count + sum(current.values()) else: total = sum(current.values()) - count remain = count plan = dict.fromkeys(zones.keys(), 0) for i in range(len(zones)): zone = candidates[i][0] weight = candidates[i][1] q = total * weight / float(sum_weight) if expand: quota = int(math.ceil(q)) headroom = quota - current[zone] else: quota = int(math.floor(q)) headroom = current[zone] - quota if headroom <= 0: continue if headroom < remain: plan[zone] = headroom remain -= headroom else: plan[zone] = remain if remain > 0 else 0 remain = 0 break if remain > 0: return None # filter out zero values result = {} for z, c in plan.items(): if c > 0: result[z] = c return result def _get_count(self, cluster_id, action): """Get number of nodes to create or delete. :param cluster_id: The ID of the target cluster. :param action: The action object which triggered this policy check. :return: An integer value which can be 1) positive - number of nodes to create; 2) negative - number of nodes to delete; 3) 0 - something wrong happened, and the policy check failed. """ if action.action == consts.NODE_CREATE: # skip the policy if availability zone is specified in profile profile = action.node.rt['profile'] if profile.properties[profile.AVAILABILITY_ZONE]: return 0 return 1 if action.action == consts.CLUSTER_RESIZE: if action.data.get('deletion', None): return -action.data['deletion']['count'] elif action.data.get('creation', None): return action.data['creation']['count'] db_cluster = co.Cluster.get(action.context, cluster_id) current = no.Node.count_by_cluster(action.context, cluster_id) res = scaleutils.parse_resize_params(action, db_cluster, current) if res[0] == base.CHECK_ERROR: action.data['status'] = base.CHECK_ERROR action.data['reason'] = res[1] LOG.error(res[1]) return 0 if action.data.get('deletion', None): return -action.data['deletion']['count'] else: return action.data['creation']['count'] if action.action == consts.CLUSTER_SCALE_IN: pd = action.data.get('deletion', None) if pd is None: return -action.inputs.get('count', 1) else: return -pd.get('count', 1) # CLUSTER_SCALE_OUT: an action that inflates the cluster pd = action.data.get('creation', None) if pd is None: return action.inputs.get('count', 1) else: return pd.get('count', 1) def pre_op(self, cluster_id, action): """Callback function when cluster membership is about to change. :param cluster_id: ID of the target cluster. :param action: The action that triggers this policy check. """ count = self._get_count(cluster_id, action) if count == 0: return expand = True if count < 0: expand = False count = -count cluster = cm.Cluster.load(action.context, cluster_id) nc = self.nova(cluster.user, cluster.project) zones_good = nc.validate_azs(self.zones.keys()) if len(zones_good) == 0: action.data['status'] = base.CHECK_ERROR action.data['reason'] = _('No availability zone found available.') LOG.error('No availability zone found available.') return zones = {} for z, w in self.zones.items(): if z in zones_good: zones[z] = w current = cluster.get_zone_distribution(action.context, zones.keys()) result = self._create_plan(current, zones, count, expand) if not result: action.data['status'] = base.CHECK_ERROR action.data['reason'] = _('There is no feasible plan to ' 'handle all nodes.') LOG.error('There is no feasible plan to handle all nodes.') return if expand: if 'creation' not in action.data: action.data['creation'] = {} action.data['creation']['count'] = count action.data['creation']['zones'] = result else: if 'deletion' not in action.data: action.data['deletion'] = {} action.data['deletion']['count'] = count action.data['deletion']['zones'] = result
def test_validate(self): sot = schema.Map(schema={'foo': schema.String()}) res = sot.validate({"foo": "bar"}) self.assertIsNone(res)
class DockerProfile(base.Profile): """Profile for a docker container.""" _VALID_HOST_TYPES = [ HOST_NOVA_SERVER, HOST_HEAT_STACK, ] = [ "os.nova.server", "os.heat.stack", ] KEYS = ( CONTEXT, IMAGE, NAME, COMMAND, HOST_NODE, HOST_CLUSTER, PORT, ) = ( 'context', 'image', 'name', 'command', 'host_node', 'host_cluster', 'port', ) properties_schema = { CONTEXT: schema.Map(_('Customized security context for operating containers.')), IMAGE: schema.String( _('The image used to create a container'), required=True, ), NAME: schema.String(_('The name of the container.')), COMMAND: schema.String(_('The command to run when container is started.')), PORT: schema.Integer(_('The port number used to connect to docker daemon.'), default=2375), HOST_NODE: schema.String(_('The node on which container will be launched.')), HOST_CLUSTER: schema.String(_('The cluster on which container will be launched.')), } OPERATIONS = {} def __init__(self, type_name, name, **kwargs): super(DockerProfile, self).__init__(type_name, name, **kwargs) self._dockerclient = None self.container_id = None self.host = None self.cluster = None @classmethod def create(cls, ctx, name, spec, metadata=None): profile = super(DockerProfile, cls).create(ctx, name, spec, metadata) host_cluster = profile.properties.get(profile.HOST_CLUSTER, None) if host_cluster: db_api.cluster_add_dependents(ctx, host_cluster, profile.id) host_node = profile.properties.get(profile.HOST_NODE, None) if host_node: db_api.node_add_dependents(ctx, host_node, profile.id, 'profile') return profile @classmethod def delete(cls, ctx, profile_id): obj = cls.load(ctx, profile_id=profile_id) cluster_id = obj.properties.get(obj.HOST_CLUSTER, None) if cluster_id: db_api.cluster_remove_dependents(ctx, cluster_id, profile_id) node_id = obj.properties.get(obj.HOST_NODE, None) if node_id: db_api.node_remove_dependents(ctx, node_id, profile_id, 'profile') super(DockerProfile, cls).delete(ctx, profile_id) def docker(self, obj): """Construct docker client based on object. :param obj: Object for which the client is created. It is expected to be None when retrieving an existing client. When creating a client, it contains the user and project to be used. """ if self._dockerclient is not None: return self._dockerclient host_node = self.properties.get(self.HOST_NODE, None) host_cluster = self.properties.get(self.HOST_CLUSTER, None) ctx = context.get_admin_context() self.host = self._get_host(ctx, host_node, host_cluster) # TODO(Anyone): Check node.data for per-node host selection host_type = self.host.rt['profile'].type_name if host_type not in self._VALID_HOST_TYPES: msg = _("Type of host node (%s) is not supported") % host_type raise exc.InternalError(message=msg) host_ip = self._get_host_ip(obj, self.host.physical_id, host_type) if host_ip is None: msg = _("Unable to determine the IP address of host node") raise exc.InternalError(message=msg) url = 'tcp://%(ip)s:%(port)d' % { 'ip': host_ip, 'port': self.properties[self.PORT] } self._dockerclient = docker_driver.DockerClient(url) return self._dockerclient def _get_host(self, ctx, host_node, host_cluster): """Determine which node to launch container on. :param ctx: An instance of the request context. :param host_node: The uuid of the hosting node. :param host_cluster: The uuid of the hosting cluster. """ host = None if host_node is not None: try: host = node_mod.Node.load(ctx, node_id=host_node) except exc.ResourceNotFound as ex: msg = ex.enhance_msg('host', ex) raise exc.InternalError(message=msg) return host if host_cluster is not None: host = self._get_random_node(ctx, host_cluster) return host def _get_random_node(self, ctx, host_cluster): """Get a node randomly from the host cluster. :param ctx: An instance of the request context. :param host_cluster: The uuid of the hosting cluster. """ self.cluster = None try: self.cluster = cluster.Cluster.load(ctx, cluster_id=host_cluster) except exc.ResourceNotFound as ex: msg = ex.enhance_msg('host', ex) raise exc.InternalError(message=msg) nodes = self.cluster.rt['nodes'] if len(nodes) == 0: msg = _("The cluster (%s) contains no nodes") % host_cluster raise exc.InternalError(message=msg) else: good_nodes = [] for i in range(len(nodes)): if nodes[i].status == "ACTIVE": good_nodes.append(nodes[i]) if len(good_nodes) > 0: node = good_nodes[random.randrange(len(good_nodes))] else: msg = _("There is no active nodes running in the cluster (%s)" ) % host_cluster raise exc.InternalError(message=msg) return node def _get_host_ip(self, obj, host_node, host_type): """Fetch the ip address of physical node. :param obj: The node object representing the container instance. :param host_node: The name or ID of the hosting node object. :param host_type: The type of the hosting node, which can be either a nova server or a heat stack. :returns: The fixed IP address of the hosting node. """ host_ip = None if host_type == self.HOST_NOVA_SERVER: server = self.compute(obj).server_get(host_node) private_addrs = server.addresses['private'] for addr in private_addrs: if addr['version'] == 4 and addr['OS-EXT-IPS:type'] == 'fixed': host_ip = addr['addr'] elif host_type == self.HOST_HEAT_STACK: stack = self.orchestration(obj).stack_get(host_node) outputs = stack.outputs or {} if outputs: for output in outputs: if output['output_key'] == 'fixed_ip': host_ip = output['output_value'] break if not outputs or host_ip is None: msg = _("Output 'fixed_ip' is missing from the provided stack" " node") raise exc.InternalError(message=msg) return host_ip def do_validate(self, obj): """Validate if the spec has provided valid configuration. :param obj: The node object. """ cluster = self.properties[self.HOST_CLUSTER] node = self.properties[self.HOST_NODE] if all([cluster, node]): msg = _("Either '%(c)s' or '%(n)s' should be specified, but not " "both.") % { 'c': self.HOST_CLUSTER, 'n': self.HOST_NODE } raise exc.InvalidSpec(message=msg) if not any([cluster, node]): msg = _("Either '%(c)s' or '%(n)s' should be specified.") % { 'c': self.HOST_CLUSTER, 'n': self.HOST_NODE } raise exc.InvalidSpec(message=msg) if cluster: try: co.Cluster.find(self.context, cluster) except (exc.ResourceNotFound, exc.MultipleChoices): msg = _("The specified %(key)s '%(val)s' could not be found " "or is not unique.") % { 'key': self.HOST_CLUSTER, 'val': cluster } raise exc.InvalidSpec(message=msg) if node: try: no.Node.find(self.context, node) except (exc.ResourceNotFound, exc.MultipleChoices): msg = _("The specified %(key)s '%(val)s' could not be found " "or is not unique.") % { 'key': self.HOST_NODE, 'val': node } raise exc.InvalidSpec(message=msg) def do_create(self, obj): """Create a container instance using the given profile. :param obj: The node object for this container. :returns: ID of the container instance or ``None`` if driver fails. :raises: `EResourceCreation` """ name = self.properties[self.NAME] if name is None: name = '-'.join([obj.name, utils.random_name()]) params = { 'image': self.properties[self.IMAGE], 'name': self.properties[self.NAME], 'command': self.properties[self.COMMAND], } try: ctx = context.get_admin_context() dockerclient = self.docker(obj) db_api.node_add_dependents(ctx, self.host.id, obj.id) container = dockerclient.container_create(**params) except exc.InternalError as ex: raise exc.EResourceCreation(type='container', message=six.text_type(ex)) self.container_id = container['Id'][:36] return self.container_id def do_delete(self, obj): """Delete a container node. :param obj: The node object representing the container. :returns: `None` """ if not obj.physical_id: return try: self.docker(obj).container_delete(obj.physical_id) except exc.InternalError as ex: raise exc.EResourceDeletion(type='container', id=obj.physical_id, message=six.text_type(ex)) ctx = context.get_admin_context() db_api.node_remove_dependents(ctx, self.host.id, obj.id) return
class HealthPolicy(base.Policy): """Policy for health management of a cluster.""" VERSION = '1.0' VERSIONS = {'1.0': [{'status': consts.EXPERIMENTAL, 'since': '2017.02'}]} PRIORITY = 600 TARGET = [ ('BEFORE', consts.CLUSTER_RECOVER), ('BEFORE', consts.CLUSTER_DEL_NODES), ('BEFORE', consts.CLUSTER_SCALE_IN), ('BEFORE', consts.CLUSTER_RESIZE), ('BEFORE', consts.NODE_DELETE), ('AFTER', consts.CLUSTER_DEL_NODES), ('AFTER', consts.CLUSTER_SCALE_IN), ('AFTER', consts.CLUSTER_RESIZE), ('AFTER', consts.NODE_DELETE), ] # Should be ANY if profile provides health check support? PROFILE_TYPE = [ 'os.nova.server', 'os.heat.stack', ] KEYS = (DETECTION, RECOVERY) = ('detection', 'recovery') _DETECTION_KEYS = ( DETECTION_TYPE, DETECTION_OPTIONS, ) = ('type', 'options') _DETECTION_OPTIONS = (DETECTION_INTERVAL, ) = ('interval', ) _RECOVERY_KEYS = (RECOVERY_ACTIONS, RECOVERY_FENCING) = ('actions', 'fencing') FENCING_OPTION_VALUES = ( COMPUTE, # STORAGE, NETWORK, ) = ( 'COMPUTE', # 'STORAGE', 'NETWORK' ) ACTION_KEYS = ( ACTION_NAME, ACTION_PARAMS, ) = ( 'name', 'params', ) properties_schema = { DETECTION: schema.Map( _('Policy aspect for node failure detection.'), schema={ DETECTION_TYPE: schema.String( _('Type of node failure detection.'), constraints=[ constraints.AllowedValues(consts.DETECTION_TYPES), ], required=True, ), DETECTION_OPTIONS: schema.Map( schema={ DETECTION_INTERVAL: schema.Integer( _("Number of seconds between pollings. Only " "required when type is 'NODE_STATUS_POLLING'."), default=60, ), }), }, required=True, ), RECOVERY: schema.Map( _('Policy aspect for node failure recovery.'), schema={ RECOVERY_ACTIONS: schema.List(_('List of actions to try for node recovery.'), schema=schema.Map( _('Action to try for node recovery.'), schema={ ACTION_NAME: schema.String( _("Name of action to execute."), constraints=[ constraints.AllowedValues( consts.RECOVERY_ACTIONS), ], required=True), ACTION_PARAMS: schema.Map(_("Parameters for the action")), })), RECOVERY_FENCING: schema.List( _('List of services to be fenced.'), schema=schema.String( _('Service to be fenced.'), constraints=[ constraints.AllowedValues(FENCING_OPTION_VALUES), ], required=True, ), ), }), } def __init__(self, name, spec, **kwargs): super(HealthPolicy, self).__init__(name, spec, **kwargs) self.check_type = self.properties[self.DETECTION][self.DETECTION_TYPE] options = self.properties[self.DETECTION][self.DETECTION_OPTIONS] self.interval = options[self.DETECTION_INTERVAL] recover_settings = self.properties[self.RECOVERY] self.recover_actions = recover_settings[self.RECOVERY_ACTIONS] self.fencing_types = recover_settings[self.RECOVERY_FENCING] def validate(self, context, validate_props=False): super(HealthPolicy, self).validate(context, validate_props=validate_props) if len(self.recover_actions) > 1: message = _( "Only one '%s' is supported for now.") % self.RECOVERY_ACTIONS raise exc.ESchema(message=message) # TODO(Qiming): Add detection of duplicated action names when # support to list of actions is implemented. def attach(self, cluster, enabled=True): """"Hook for policy attach. Register the cluster for health management. :param cluster: The cluster to which the policy is being attached to. :param enabled: The attached cluster policy is enabled or disabled. :return: A tuple comprising execution result and policy data. """ p_type = cluster.rt['profile'].type_name action_names = [a['name'] for a in self.recover_actions] if p_type != 'os.nova.server': if consts.RECOVER_REBUILD in action_names: err_msg = _("Recovery action REBUILD is only applicable to " "os.nova.server clusters.") return False, err_msg if consts.RECOVER_REBOOT in action_names: err_msg = _("Recovery action REBOOT is only applicable to " "os.nova.server clusters.") return False, err_msg kwargs = { 'check_type': self.check_type, 'interval': self.interval, 'params': {}, 'enabled': enabled } health_manager.register(cluster.id, engine_id=None, **kwargs) data = { 'check_type': self.check_type, 'interval': self.interval, } return True, self._build_policy_data(data) def detach(self, cluster): """Hook for policy detach. Unregister the cluster for health management. :param cluster: The target cluster. :returns: A tuple comprising the execution result and reason. """ health_manager.unregister(cluster.id) return True, '' def pre_op(self, cluster_id, action, **args): """Hook before action execution. One of the task for this routine is to disable health policy if the action is a request that will shrink the cluster. The reason is that the policy may attempt to recover nodes that are to be deleted. :param cluster_id: The ID of the target cluster. :param action: The action to be examined. :param kwargs args: Other keyword arguments to be checked. :returns: Boolean indicating whether the checking passed. """ if action.action in (consts.CLUSTER_SCALE_IN, consts.CLUSTER_DEL_NODES, consts.NODE_DELETE): health_manager.disable(cluster_id) return True if action.action == consts.CLUSTER_RESIZE: deletion = action.data.get('deletion', None) if deletion: health_manager.disable(cluster_id) return True db_cluster = co.Cluster.get(action.context, cluster_id) current = no.Node.count_by_cluster(action.context, cluster_id) res, reason = scaleutils.parse_resize_params( action, db_cluster, current) if res == base.CHECK_ERROR: action.data['status'] = base.CHECK_ERROR action.data['reason'] = reason return False if action.data.get('deletion', None): health_manager.disable(cluster_id) return True pd = { 'recover_action': self.recover_actions, 'fencing': self.fencing_types, } action.data.update({'health': pd}) action.store(action.context) return True def post_op(self, cluster_id, action, **args): """Hook before action execution. One of the task for this routine is to re-enable health policy if the action is a request that will shrink the cluster thus the policy has been temporarily disabled. :param cluster_id: The ID of the target cluster. :param action: The action to be examined. :param kwargs args: Other keyword arguments to be checked. :returns: Boolean indicating whether the checking passed. """ if action.action in (consts.CLUSTER_SCALE_IN, consts.CLUSTER_DEL_NODES, consts.NODE_DELETE): health_manager.enable(cluster_id) return True if action.action == consts.CLUSTER_RESIZE: deletion = action.data.get('deletion', None) if deletion: health_manager.enable(cluster_id) return True db_cluster = co.Cluster.get(action.context, cluster_id) current = no.Node.count_by_cluster(action.context, cluster_id) res, reason = scaleutils.parse_resize_params( action, db_cluster, current) if res == base.CHECK_ERROR: action.data['status'] = base.CHECK_ERROR action.data['reason'] = reason return False if action.data.get('deletion', None): health_manager.enable(cluster_id) return True return True
class DockerProfile(base.Profile): """Profile for a docker container.""" VERSIONS = {'1.0': [{'status': consts.EXPERIMENTAL, 'since': '2017.02'}]} _VALID_HOST_TYPES = [ HOST_NOVA_SERVER, HOST_HEAT_STACK, ] = [ "os.nova.server", "os.heat.stack", ] KEYS = ( CONTEXT, IMAGE, NAME, COMMAND, HOST_NODE, HOST_CLUSTER, PORT, ) = ( 'context', 'image', 'name', 'command', 'host_node', 'host_cluster', 'port', ) properties_schema = { CONTEXT: schema.Map(_('Customized security context for operating containers.')), IMAGE: schema.String( _('The image used to create a container'), required=True, ), NAME: schema.String( _('The name of the container.'), updatable=True, ), COMMAND: schema.String(_('The command to run when container is started.')), PORT: schema.Integer(_('The port number used to connect to docker daemon.'), default=2375), HOST_NODE: schema.String(_('The node on which container will be launched.')), HOST_CLUSTER: schema.String(_('The cluster on which container will be launched.')), } OP_NAMES = ( OP_RESTART, OP_PAUSE, OP_UNPAUSE, ) = ( 'restart', 'pause', 'unpause', ) _RESTART_WAIT = (RESTART_WAIT) = ('wait_time') OPERATIONS = { OP_RESTART: schema.Operation( _("Restart a container."), schema={ RESTART_WAIT: schema.IntegerParam( _("Number of seconds to wait before killing the " "container.")) }), OP_PAUSE: schema.Operation(_("Pause a container.")), OP_UNPAUSE: schema.Operation(_("Unpause a container.")) } def __init__(self, type_name, name, **kwargs): super(DockerProfile, self).__init__(type_name, name, **kwargs) self._dockerclient = None self.container_id = None self.host = None self.cluster = None @classmethod def create(cls, ctx, name, spec, metadata=None): profile = super(DockerProfile, cls).create(ctx, name, spec, metadata) host_cluster = profile.properties.get(profile.HOST_CLUSTER, None) if host_cluster: db_api.cluster_add_dependents(ctx, host_cluster, profile.id) host_node = profile.properties.get(profile.HOST_NODE, None) if host_node: db_api.node_add_dependents(ctx, host_node, profile.id, 'profile') return profile @classmethod def delete(cls, ctx, profile_id): obj = cls.load(ctx, profile_id=profile_id) cluster_id = obj.properties.get(obj.HOST_CLUSTER, None) if cluster_id: db_api.cluster_remove_dependents(ctx, cluster_id, profile_id) node_id = obj.properties.get(obj.HOST_NODE, None) if node_id: db_api.node_remove_dependents(ctx, node_id, profile_id, 'profile') super(DockerProfile, cls).delete(ctx, profile_id) def docker(self, obj): """Construct docker client based on object. :param obj: Object for which the client is created. It is expected to be None when retrieving an existing client. When creating a client, it contains the user and project to be used. """ if self._dockerclient is not None: return self._dockerclient host_node = self.properties.get(self.HOST_NODE, None) host_cluster = self.properties.get(self.HOST_CLUSTER, None) ctx = context.get_admin_context() self.host = self._get_host(ctx, host_node, host_cluster) # TODO(Anyone): Check node.data for per-node host selection host_type = self.host.rt['profile'].type_name if host_type not in self._VALID_HOST_TYPES: msg = _("Type of host node (%s) is not supported") % host_type raise exc.InternalError(message=msg) host_ip = self._get_host_ip(obj, self.host.physical_id, host_type) if host_ip is None: msg = _("Unable to determine the IP address of host node") raise exc.InternalError(message=msg) url = 'tcp://%(ip)s:%(port)d' % { 'ip': host_ip, 'port': self.properties[self.PORT] } self._dockerclient = docker_driver.DockerClient(url) return self._dockerclient def _get_host(self, ctx, host_node, host_cluster): """Determine which node to launch container on. :param ctx: An instance of the request context. :param host_node: The uuid of the hosting node. :param host_cluster: The uuid of the hosting cluster. """ host = None if host_node is not None: try: host = node_mod.Node.load(ctx, node_id=host_node) except exc.ResourceNotFound as ex: msg = ex.enhance_msg('host', ex) raise exc.InternalError(message=msg) return host if host_cluster is not None: host = self._get_random_node(ctx, host_cluster) return host def _get_random_node(self, ctx, host_cluster): """Get a node randomly from the host cluster. :param ctx: An instance of the request context. :param host_cluster: The uuid of the hosting cluster. """ self.cluster = None try: self.cluster = cluster.Cluster.load(ctx, cluster_id=host_cluster) except exc.ResourceNotFound as ex: msg = ex.enhance_msg('host', ex) raise exc.InternalError(message=msg) filters = {consts.NODE_STATUS: consts.NS_ACTIVE} nodes = no.Node.get_all_by_cluster(ctx, cluster_id=host_cluster, filters=filters) if len(nodes) == 0: msg = _("The cluster (%s) contains no active nodes") % host_cluster raise exc.InternalError(message=msg) # TODO(anyone): Should pick a node by its load db_node = nodes[random.randrange(len(nodes))] return node_mod.Node.load(ctx, db_node=db_node) def _get_host_ip(self, obj, host_node, host_type): """Fetch the ip address of physical node. :param obj: The node object representing the container instance. :param host_node: The name or ID of the hosting node object. :param host_type: The type of the hosting node, which can be either a nova server or a heat stack. :returns: The fixed IP address of the hosting node. """ host_ip = None if host_type == self.HOST_NOVA_SERVER: server = self.compute(obj).server_get(host_node) private_addrs = server.addresses['private'] for addr in private_addrs: if addr['version'] == 4 and addr['OS-EXT-IPS:type'] == 'fixed': host_ip = addr['addr'] elif host_type == self.HOST_HEAT_STACK: stack = self.orchestration(obj).stack_get(host_node) outputs = stack.outputs or {} if outputs: for output in outputs: if output['output_key'] == 'fixed_ip': host_ip = output['output_value'] break if not outputs or host_ip is None: msg = _("Output 'fixed_ip' is missing from the provided stack" " node") raise exc.InternalError(message=msg) return host_ip def do_validate(self, obj): """Validate if the spec has provided valid configuration. :param obj: The node object. """ cluster = self.properties[self.HOST_CLUSTER] node = self.properties[self.HOST_NODE] if all([cluster, node]): msg = _("Either '%(c)s' or '%(n)s' must be specified, but not " "both.") % { 'c': self.HOST_CLUSTER, 'n': self.HOST_NODE } raise exc.InvalidSpec(message=msg) if not any([cluster, node]): msg = _("Either '%(c)s' or '%(n)s' must be specified.") % { 'c': self.HOST_CLUSTER, 'n': self.HOST_NODE } raise exc.InvalidSpec(message=msg) if cluster: try: co.Cluster.find(self.context, cluster) except (exc.ResourceNotFound, exc.MultipleChoices): msg = _("The specified %(key)s '%(val)s' could not be found " "or is not unique.") % { 'key': self.HOST_CLUSTER, 'val': cluster } raise exc.InvalidSpec(message=msg) if node: try: no.Node.find(self.context, node) except (exc.ResourceNotFound, exc.MultipleChoices): msg = _("The specified %(key)s '%(val)s' could not be found " "or is not unique.") % { 'key': self.HOST_NODE, 'val': node } raise exc.InvalidSpec(message=msg) def do_create(self, obj): """Create a container instance using the given profile. :param obj: The node object for this container. :returns: ID of the container instance or ``None`` if driver fails. :raises: `EResourceCreation` """ name = self.properties[self.NAME] if name is None: name = '-'.join([obj.name, utils.random_name()]) params = { 'image': self.properties[self.IMAGE], 'name': name, 'command': self.properties[self.COMMAND], } try: ctx = context.get_service_context(project=obj.project, user=obj.user) dockerclient = self.docker(obj) db_api.node_add_dependents(ctx, self.host.id, obj.id) container = dockerclient.container_create(**params) dockerclient.start(container['Id']) except exc.InternalError as ex: raise exc.EResourceCreation(type='container', message=six.text_type(ex)) self.container_id = container['Id'][:36] return self.container_id def do_delete(self, obj): """Delete a container node. :param obj: The node object representing the container. :returns: `None` """ if not obj.physical_id: return try: self.handle_stop(obj) self.docker(obj).container_delete(obj.physical_id) except exc.InternalError as ex: raise exc.EResourceDeletion(type='container', id=obj.physical_id, message=six.text_type(ex)) ctx = context.get_admin_context() db_api.node_remove_dependents(ctx, self.host.id, obj.id) return def do_update(self, obj, new_profile=None, **params): """Perform update on the container. :param obj: the container to operate on :param new_profile: the new profile for the container. :param params: a dictionary of optional parameters. :returns: True if update was successful or False otherwise. :raises: `EResourceUpdate` if operation fails. """ self.server_id = obj.physical_id if not self.server_id: return False if not new_profile: return False if not self.validate_for_update(new_profile): return False name_changed, new_name = self._check_container_name(obj, new_profile) if name_changed: self._update_name(obj, new_name) return True def _check_container_name(self, obj, profile): """Check if there is a new name to be assigned to the container. :param obj: The node object to operate on. :param new_profile: The new profile which may contain a name for the container. :return: A tuple consisting a boolean indicating whether the name needs change and the container name determined. """ old_name = self.properties[self.NAME] or obj.name new_name = profile.properties[self.NAME] or obj.name if old_name == new_name: return False, new_name return True, new_name def _update_name(self, obj, new_name): try: self.docker(obj).rename(obj.physical_id, new_name) except exc.InternalError as ex: raise exc.EResourceUpdate(type='container', id=obj.physical_id, message=six.text_type(ex)) def handle_reboot(self, obj, **options): """Handler for a reboot operation. :param obj: The node object representing the container. :returns: None """ if not obj.physical_id: return if 'timeout' in options: params = {'timeout': options['timeout']} else: params = {} try: self.docker(obj).restart(obj.physical_id, **params) except exc.InternalError as ex: raise exc.EResourceOperation(type='container', id=obj.physical_id[:8], op='rebooting', message=six.text_type(ex)) return def handle_pause(self, obj): """Handler for a pause operation. :param obj: The node object representing the container. :returns: None """ if not obj.physical_id: return try: self.docker(obj).pause(obj.physical_id) except exc.InternalError as ex: raise exc.EResourceOperation(type='container', id=obj.physical_id[:8], op='pausing', message=six.text_type(ex)) return def handle_unpause(self, obj): """Handler for an unpause operation. :param obj: The node object representing the container. :returns: None """ if not obj.physical_id: return try: self.docker(obj).unpause(obj.physical_id) except exc.InternalError as ex: raise exc.EResourceOperation(type='container', id=obj.physical_id[:8], op='unpausing', message=six.text_type(ex)) return def handle_stop(self, obj, **options): """Handler for the stop operation.""" if not obj.physical_id: return timeout = options.get('timeout', None) if timeout: timeout = int(timeout) try: self.docker(obj).stop(obj.physical_id, timeout=timeout) except exc.InternalError as ex: raise exc.EResourceOperation(type='container', id=obj.physical_id[:8], op='stop', message=six.text_type(ex))
class Alarm(base.Trigger): # time constraints alarm_schema = { REPEAT: schema.Boolean( _('Whether the actions should be re-triggered on each evaluation ' 'cycle. Default to False.'), default=False, ), TIME_CONSTRAINTS: schema.List(schema=schema.Map( _('A map of time constraint settings.'), schema={ NAME: schema.String(_('Name of the time constraint.'), ), TC_DESCRIPTION: schema.String(_('A description of the time constraint.'), ), TC_START: schema.String( _('Start point of the time constraint, expressed as a ' 'string in cron expression format.'), required=True, ), TC_DURATION: schema.Integer( _('How long the constraint should last, in seconds.'), required=True, ), TC_TIMEZONE: schema.String( _('Time zone of the constraint.'), default='', ), }, ), ) } def __init__(self, name, spec, **kwargs): super(Alarm, self).__init__(name, spec, **kwargs) self.alarm_properties = schema.Spec(self.alarm_schema, spec) self.namespace = 'default' self.rule = None def validate(self): # validate cron expression if specified if TIME_CONSTRAINTS in self.spec: tcs = self.alarm_properties[TIME_CONSTRAINTS] for tc in tcs: exp = tc.get(TC_START, '') try: croniter.croniter(exp) except Exception as ex: msg = _("Invalid cron expression specified for property " "'%(property)s' (%(exp)s): %(ex)s") % { 'property': TC_START, 'exp': exp, 'ex': six.text_type(ex) } raise exc.InvalidSpec(message=msg) tz = tc.get(TC_TIMEZONE, '') try: pytz.timezone(tz) except Exception as ex: msg = _("Invalid timezone value specified for property " "'%(property)s' (%(tz)s): %(ex)s") % { 'property': TC_TIMEZONE, 'tz': tz, 'ex': six.text_type(ex) } raise exc.InvalidSpec(message=msg) def create(self, ctx, **kwargs): """Create an alarm for a cluster. :param name: The name for the alarm. :param urls: A list of URLs for webhooks to be triggered. :returns: A dict containing properties of the alarm. """ self.ok_actions = kwargs.get(OK_ACTIONS, []) self.alarm_actions = kwargs.get(ALARM_ACTIONS, []) self.insufficient_data_actions = kwargs.get(INSUFFICIENT_DATA_ACTIONS, []) rule_name = self.namespace + '_rule' rule_data = dict((k, v) for k, v in self.rule.items()) params = { NAME: self.name, DESCRIPTION: self.desc, TYPE: self.namespace, STATE: self.state, SEVERITY: self.severity, ENABLED: self.enabled, OK_ACTIONS: self.ok_actions, ALARM_ACTIONS: self.alarm_actions, INSUFFICIENT_DATA_ACTIONS: self.insufficient_data_actions, TIME_CONSTRAINTS: self.alarm_properties[TIME_CONSTRAINTS], REPEAT: self.alarm_properties[REPEAT], rule_name: rule_data, } try: cc = driver_base.SenlinDriver().telemetry(ctx.to_dict()) alarm = cc.alarm_create(**params) self.physical_id = alarm.id self.store(ctx) return True, alarm.to_dict() except exc.SenlinException as ex: return False, six.text_type(ex) def delete(self, ctx, identifier): """Delete an alarm. :param identifier: This must be an alarm ID. """ try: cc = driver_base.SenlinDriver().telemetry(ctx) res = cc.alarm_delete(identifier, True) return True, res except exc.InternalError as ex: return False, six.text_type(ex) def update(self, identifier, values): return NotImplemented
class Profile(object): """Base class for profiles.""" VERSIONS = {} KEYS = ( TYPE, VERSION, PROPERTIES, ) = ( 'type', 'version', 'properties', ) spec_schema = { TYPE: schema.String( _('Name of the profile type.'), required=True, ), VERSION: schema.String( _('Version number of the profile type.'), required=True, ), PROPERTIES: schema.Map( _('Properties for the profile.'), required=True, ) } properties_schema = {} OPERATIONS = {} def __new__(cls, name, spec, **kwargs): """Create a new profile of the appropriate class. :param name: The name for the profile. :param spec: A dictionary containing the spec for the profile. :param kwargs: Keyword arguments for profile creation. :returns: An instance of a specific sub-class of Profile. """ type_name, version = schema.get_spec_version(spec) type_str = "-".join([type_name, version]) if cls != Profile: ProfileClass = cls else: ProfileClass = environment.global_env().get_profile(type_str) return super(Profile, cls).__new__(ProfileClass) def __init__(self, name, spec, **kwargs): """Initialize a profile instance. :param name: A string that specifies the name for the profile. :param spec: A dictionary containing the detailed profile spec. :param kwargs: Keyword arguments for initializing the profile. :returns: An instance of a specific sub-class of Profile. """ type_name, version = schema.get_spec_version(spec) self.type_name = type_name self.version = version type_str = "-".join([type_name, version]) self.name = name self.spec = spec self.id = kwargs.get('id', None) self.type = kwargs.get('type', type_str) self.user = kwargs.get('user') self.project = kwargs.get('project') self.domain = kwargs.get('domain') self.metadata = kwargs.get('metadata', {}) self.created_at = kwargs.get('created_at', None) self.updated_at = kwargs.get('updated_at', None) self.spec_data = schema.Spec(self.spec_schema, self.spec) self.properties = schema.Spec(self.properties_schema, self.spec.get(self.PROPERTIES, {}), version) if not self.id: # new object needs a context dict self.context = self._init_context() else: self.context = kwargs.get('context') # initialize clients self._computeclient = None self._networkclient = None self._orchestrationclient = None @classmethod def _from_object(cls, profile): '''Construct a profile from profile object. :param profile: a profile object that contains all required fields. ''' kwargs = { 'id': profile.id, 'type': profile.type, 'context': profile.context, 'user': profile.user, 'project': profile.project, 'domain': profile.domain, 'metadata': profile.metadata, 'created_at': profile.created_at, 'updated_at': profile.updated_at, } return cls(profile.name, profile.spec, **kwargs) @classmethod def load(cls, ctx, profile=None, profile_id=None, project_safe=True): '''Retrieve a profile object from database.''' if profile is None: profile = po.Profile.get(ctx, profile_id, project_safe=project_safe) if profile is None: raise exc.ResourceNotFound(type='profile', id=profile_id) return cls._from_object(profile) @classmethod def create(cls, ctx, name, spec, metadata=None): """Create a profile object and validate it. :param ctx: The requesting context. :param name: The name for the profile object. :param spec: A dict containing the detailed spec. :param metadata: An optional dictionary specifying key-value pairs to be associated with the profile. :returns: An instance of Profile. """ if metadata is None: metadata = {} profile = None try: profile = cls(name, spec, metadata=metadata, user=ctx.user, project=ctx.project) profile.validate(True) except (exc.ResourceNotFound, exc.ESchema) as ex: error = _("Failed in creating profile %(name)s: %(error)s") % { "name": name, "error": six.text_type(ex) } raise exc.InvalidSpec(message=error) profile.store(ctx) return profile @classmethod def delete(cls, ctx, profile_id): po.Profile.delete(ctx, profile_id) def store(self, ctx): '''Store the profile into database and return its ID.''' timestamp = timeutils.utcnow(True) values = { 'name': self.name, 'type': self.type, 'context': self.context, 'spec': self.spec, 'user': self.user, 'project': self.project, 'domain': self.domain, 'meta_data': self.metadata, } if self.id: self.updated_at = timestamp values['updated_at'] = timestamp po.Profile.update(ctx, self.id, values) else: self.created_at = timestamp values['created_at'] = timestamp profile = po.Profile.create(ctx, values) self.id = profile.id return self.id @classmethod @profiler.trace('Profile.create_object', hide_args=False) def create_object(cls, ctx, obj): profile = cls.load(ctx, profile_id=obj.profile_id) return profile.do_create(obj) @classmethod @profiler.trace('Profile.delete_object', hide_args=False) def delete_object(cls, ctx, obj, **params): profile = cls.load(ctx, profile_id=obj.profile_id) return profile.do_delete(obj, **params) @classmethod @profiler.trace('Profile.update_object', hide_args=False) def update_object(cls, ctx, obj, new_profile_id=None, **params): profile = cls.load(ctx, profile_id=obj.profile_id) new_profile = None if new_profile_id: new_profile = cls.load(ctx, profile_id=new_profile_id) return profile.do_update(obj, new_profile, **params) @classmethod @profiler.trace('Profile.get_details', hide_args=False) def get_details(cls, ctx, obj): profile = cls.load(ctx, profile_id=obj.profile_id) return profile.do_get_details(obj) @classmethod @profiler.trace('Profile.adopt_node', hide_args=False) def adopt_node(cls, ctx, obj, type_name, overrides=None, snapshot=False): """Adopt a node. :param ctx: Request context. :param obj: A temporary node object. :param overrides: An optional parameter that specifies the set of properties to be overridden. :param snapshot: A boolean flag indicating whether a snapshot should be created before adopting the node. :returns: A dictionary containing the profile spec created from the specific node, or a dictionary containing error message. """ parts = type_name.split("-") tmpspec = {"type": parts[0], "version": parts[1]} profile = cls("name", tmpspec) return profile.do_adopt(obj, overrides=overrides, snapshot=snapshot) @classmethod @profiler.trace('Profile.join_cluster', hide_args=False) def join_cluster(cls, ctx, obj, cluster_id): profile = cls.load(ctx, profile_id=obj.profile_id) return profile.do_join(obj, cluster_id) @classmethod @profiler.trace('Profile.leave_cluster', hide_args=False) def leave_cluster(cls, ctx, obj): profile = cls.load(ctx, profile_id=obj.profile_id) return profile.do_leave(obj) @classmethod @profiler.trace('Profile.check_object', hide_args=False) def check_object(cls, ctx, obj): profile = cls.load(ctx, profile_id=obj.profile_id) try: return profile.do_check(obj) except exc.InternalError as ex: LOG.error(ex) return False @classmethod @profiler.trace('Profile.recover_object', hide_args=False) def recover_object(cls, ctx, obj, **options): profile = cls.load(ctx, profile_id=obj.profile_id) return profile.do_recover(obj, **options) def validate(self, validate_props=False): """Validate the schema and the data provided.""" # general validation self.spec_data.validate() self.properties.validate() ctx_dict = self.properties.get('context', {}) if ctx_dict: argspec = inspect.getargspec(context.RequestContext.__init__) valid_keys = argspec.args bad_keys = [k for k in ctx_dict if k not in valid_keys] if bad_keys: msg = _("Some keys in 'context' are invalid: %s") % bad_keys raise exc.ESchema(message=msg) if validate_props: self.do_validate(obj=self) @classmethod def get_schema(cls): return dict((name, dict(schema)) for name, schema in cls.properties_schema.items()) @classmethod def get_ops(cls): return dict( (name, dict(schema)) for name, schema in cls.OPERATIONS.items()) def _init_context(self): profile_context = {} if self.CONTEXT in self.properties: profile_context = self.properties[self.CONTEXT] or {} ctx_dict = context.get_service_credentials(**profile_context) ctx_dict.pop('project_name', None) ctx_dict.pop('project_domain_name', None) return ctx_dict def _build_conn_params(self, user, project): """Build connection params for specific user and project. :param user: The ID of the user for which a trust will be used. :param project: The ID of the project for which a trust will be used. :returns: A dict containing the required parameters for connection creation. """ cred = co.Credential.get(oslo_context.get_current(), user, project) if cred is None: raise exc.TrustNotFound(trustor=user) trust_id = cred.cred['openstack']['trust'] # This is supposed to be trust-based authentication params = copy.deepcopy(self.context) params['trust_id'] = trust_id return params def compute(self, obj): '''Construct compute client based on object. :param obj: Object for which the client is created. It is expected to be None when retrieving an existing client. When creating a client, it contains the user and project to be used. ''' if self._computeclient is not None: return self._computeclient params = self._build_conn_params(obj.user, obj.project) self._computeclient = driver_base.SenlinDriver().compute(params) return self._computeclient def network(self, obj): """Construct network client based on object. :param obj: Object for which the client is created. It is expected to be None when retrieving an existing client. When creating a client, it contains the user and project to be used. """ if self._networkclient is not None: return self._networkclient params = self._build_conn_params(obj.user, obj.project) self._networkclient = driver_base.SenlinDriver().network(params) return self._networkclient def orchestration(self, obj): """Construct orchestration client based on object. :param obj: Object for which the client is created. It is expected to be None when retrieving an existing client. When creating a client, it contains the user and project to be used. """ if self._orchestrationclient is not None: return self._orchestrationclient params = self._build_conn_params(obj.user, obj.project) oc = driver_base.SenlinDriver().orchestration(params) self._orchestrationclient = oc return oc def do_create(self, obj): """For subclass to override.""" raise NotImplementedError def do_delete(self, obj, **params): """For subclass to override.""" raise NotImplementedError def do_update(self, obj, new_profile, **params): """For subclass to override.""" LOG.warning("Update operation not supported.") return True def do_check(self, obj): """For subclass to override.""" LOG.warning("Check operation not supported.") return True def do_get_details(self, obj): """For subclass to override.""" LOG.warning("Get_details operation not supported.") return {} def do_adopt(self, obj, overrides=None, snapshot=False): """For subclass to overrid.""" LOG.warning("Adopt operation not supported.") return {} def do_join(self, obj, cluster_id): """For subclass to override to perform extra operations.""" LOG.warning("Join operation not specialized.") return True def do_leave(self, obj): """For subclass to override to perform extra operations.""" LOG.warning("Leave operation not specialized.") return True def do_recover(self, obj, **options): """Default recover operation. This is provided as a fallback if a specific profile type does not override this method. :param obj: The node object to operate on. :param options: Keyword arguments for the recover operation. """ operation = options.pop('operation', None) # The operation is a list of action names with optional parameters if operation and not isinstance(operation, six.string_types): operation = operation[0] if operation and operation['name'] != consts.RECOVER_RECREATE: LOG.error("Recover operation not supported: %s", operation) return False extra_params = options.get('params', {}) fence_compute = extra_params.get('fence_compute', False) try: self.do_delete(obj, force=fence_compute) except exc.EResourceDeletion as ex: raise exc.EResourceOperation(op='recovering', type='node', id=obj.id, message=six.text_type(ex)) res = None try: res = self.do_create(obj) except exc.EResourceCreation as ex: raise exc.EResourceOperation(op='recovering', type='node', id=obj.id, message=six.text_type(ex)) return res def do_validate(self, obj): """For subclass to override.""" LOG.warning("Validate operation not supported.") return True def to_dict(self): pb_dict = { 'id': self.id, 'name': self.name, 'type': self.type, 'user': self.user, 'project': self.project, 'domain': self.domain, 'spec': self.spec, 'metadata': self.metadata, 'created_at': utils.isotime(self.created_at), 'updated_at': utils.isotime(self.updated_at), } return pb_dict def validate_for_update(self, new_profile): non_updatables = [] for (k, v) in new_profile.properties.items(): if self.properties.get(k, None) != v: if not self.properties_schema[k].updatable: non_updatables.append(k) if not non_updatables: return True msg = ", ".join(non_updatables) LOG.error("The following properties are not updatable: %s.", msg) return False
class ThresholdAlarm(Alarm): rule_schema = { METER_NAME: schema.String( _('Name of a meter to evaluate against.'), required=True, ), OPERATOR: schema.String( _('Comparison operator for evaluation.'), constraints=[ constraints.AllowedValues(OPERATOR_VALUES), ], default=OP_EQUAL, ), THRESHOLD: schema.Number(_('Threshold for evaluation.'), required=True), PERIOD: schema.Integer( _('Length of every evaluation period in seconds.'), default=60, ), EVALUATIONS: schema.Integer( _('Number of periods to evaluate over.'), default=1, ), STATISTIC: schema.String( _('Statistics to evaluate. Must be one of %s, default to "avg".') % list(STATISTIC_VALUES), constraints=[ constraints.AllowedValues(STATISTIC_VALUES), ], default=SV_AVG, ), QUERY: schema.List( _('The query to find the dat afor computing statistics.'), schema=schema.Map( schema={ Q_FIELD: schema.String( _('A field of a meter to query.'), required=True, ), Q_OP: schema.String( _('An operator for meter comparison.'), default='==', ), Q_VALUE: schema.String( _('A value for comparison.'), required=True, ) }), ) } def __init__(self, name, spec, **kwargs): super(ThresholdAlarm, self).__init__(name, spec, **kwargs) rule_spec = spec.get('rule', {}) self.rule = schema.Spec(self.rule_schema, rule_spec) self.namespace = 'threshold'
class ScalingPolicy(base.Policy): """Policy for changing the size of a cluster. This policy is expected to be enforced before the node count of a cluster is changed. """ VERSION = '1.0' PRIORITY = 100 TARGET = [ ('BEFORE', consts.CLUSTER_SCALE_IN), ('BEFORE', consts.CLUSTER_SCALE_OUT), ] PROFILE_TYPE = [ 'ANY', ] KEYS = ( EVENT, ADJUSTMENT, ) = ( 'event', 'adjustment', ) _SUPPORTED_EVENTS = ( CLUSTER_SCALE_IN, CLUSTER_SCALE_OUT, ) = ( consts.CLUSTER_SCALE_IN, consts.CLUSTER_SCALE_OUT, ) _ADJUSTMENT_KEYS = ( ADJUSTMENT_TYPE, ADJUSTMENT_NUMBER, MIN_STEP, BEST_EFFORT, COOLDOWN, ) = ( 'type', 'number', 'min_step', 'best_effort', 'cooldown', ) properties_schema = { EVENT: schema.String( _('Event that will trigger this policy. Must be one of ' 'CLUSTER_SCALE_IN and CLUSTER_SCALE_OUT.'), constraints=[ constraints.AllowedValues(_SUPPORTED_EVENTS), ], required=True, ), ADJUSTMENT: schema.Map( _('Detailed specification for scaling adjustments.'), schema={ ADJUSTMENT_TYPE: schema.String( _('Type of adjustment when scaling is triggered.'), constraints=[ constraints.AllowedValues(consts.ADJUSTMENT_TYPES), ], default=consts.CHANGE_IN_CAPACITY, ), ADJUSTMENT_NUMBER: schema.Number( _('A number specifying the amount of adjustment.'), default=1, ), MIN_STEP: schema.Integer( _('When adjustment type is set to "CHANGE_IN_PERCENTAGE",' ' this specifies the cluster size will be decreased by ' 'at least this number of nodes.'), default=1, ), BEST_EFFORT: schema.Boolean( _('Whether do best effort scaling when new size of ' 'cluster will break the size limitation'), default=False, ), COOLDOWN: schema.Integer( _('Number of seconds to hold the cluster for cool-down ' 'before allowing cluster to be resized again.'), default=0, ), }), } def __init__(self, name, spec, **kwargs): """Intialize a scaling policy object. :param name: Name for the policy object. :param spec: A dictionary containing the detailed specification for the policy. :param \*\*kwargs: Other optional parameters for policy object creation. :return: An object of `ScalingPolicy`. """ super(ScalingPolicy, self).__init__(name, spec, **kwargs) self.singleton = False self.event = self.properties[self.EVENT] adjustment = self.properties[self.ADJUSTMENT] self.adjustment_type = adjustment[self.ADJUSTMENT_TYPE] self.adjustment_number = adjustment[self.ADJUSTMENT_NUMBER] self.adjustment_min_step = adjustment[self.MIN_STEP] self.best_effort = adjustment[self.BEST_EFFORT] self.cooldown = adjustment[self.COOLDOWN] def _calculate_adjustment_count(self, current_size): """Calculate adjustment count based on current_size. :param current_size: The current size of the target cluster. :return: The number of nodes to add or to remove. """ if self.adjustment_type == consts.EXACT_CAPACITY: if self.event == consts.CLUSTER_SCALE_IN: count = current_size - self.adjustment_number else: count = self.adjustment_number - current_size elif self.adjustment_type == consts.CHANGE_IN_CAPACITY: count = self.adjustment_number else: # consts.CHANGE_IN_PERCENTAGE: count = int((self.adjustment_number * current_size) / 100.0) if count < self.adjustment_min_step: count = self.adjustment_min_step return count def pre_op(self, cluster_id, action): """The hook function that is executed before the action. The checking result is stored in the ``data`` property of the action object rather than returned directly from the function. :param cluster_id: The ID of the target cluster. :param action: Action instance against which the policy is being checked. :return: None. """ # Use action input if count is provided count = action.inputs.get('count', None) current = no.Node.count_by_cluster(action.context, cluster_id) if count is None: # count not specified, calculate it count = self._calculate_adjustment_count(current) # Count must be positive value try: count = utils.parse_int_param('count', count, allow_zero=False) except exception.InvalidParameter: action.data.update({ 'status': base.CHECK_ERROR, 'reason': _("Invalid count (%(c)s) for action '%(a)s'.") % { 'c': count, 'a': action.action } }) action.store(action.context) return # Check size constraints cluster = db_api.cluster_get(action.context, cluster_id) if action.action == consts.CLUSTER_SCALE_IN: if self.best_effort: count = min(count, current - cluster.min_size) result = su.check_size_params(cluster, current - count, strict=not self.best_effort) else: if self.best_effort: count = min(count, cluster.max_size - current) result = su.check_size_params(cluster, current + count, strict=not self.best_effort) if result: # failed validation pd = {'status': base.CHECK_ERROR, 'reason': result} else: # passed validation pd = { 'status': base.CHECK_OK, 'reason': _('Scaling request validated.'), } if action.action == consts.CLUSTER_SCALE_IN: pd['deletion'] = {'count': count} else: pd['creation'] = {'count': count} action.data.update(pd) action.store(action.context) return def need_check(self, target, action): res = super(ScalingPolicy, self).need_check(target, action) if res: # Check if the action is expected by the policy res = (self.event == action.action) return res
class HealthPolicy(base.Policy): """Policy for health management of a cluster.""" VERSION = '1.0' VERSIONS = { '1.0': [ { 'status': consts.EXPERIMENTAL, 'since': '2017.02' }, { 'status': consts.SUPPORTED, 'since': '2018.06' }, ] } PRIORITY = 600 TARGET = [ ('BEFORE', consts.CLUSTER_RECOVER), ('BEFORE', consts.CLUSTER_DEL_NODES), ('BEFORE', consts.CLUSTER_SCALE_IN), ('BEFORE', consts.CLUSTER_RESIZE), ('BEFORE', consts.NODE_DELETE), ('AFTER', consts.CLUSTER_DEL_NODES), ('AFTER', consts.CLUSTER_SCALE_IN), ('AFTER', consts.CLUSTER_RESIZE), ('AFTER', consts.NODE_DELETE), ] # Should be ANY if profile provides health check support? PROFILE_TYPE = [ 'os.nova.server', 'os.heat.stack', ] KEYS = (DETECTION, RECOVERY) = ('detection', 'recovery') _DETECTION_KEYS = ( DETECTION_TYPE, DETECTION_OPTIONS, ) = ('type', 'options') _DETECTION_OPTIONS = ( DETECTION_INTERVAL, POLL_URL, POLL_URL_SSL_VERIFY, POLL_URL_HEALTHY_RESPONSE, POLL_URL_RETRY_LIMIT, POLL_URL_RETRY_INTERVAL, NODE_UPDATE_TIMEOUT, ) = ( 'interval', 'poll_url', 'poll_url_ssl_verify', 'poll_url_healthy_response', 'poll_url_retry_limit', 'poll_url_retry_interval', 'node_update_timeout', ) _RECOVERY_KEYS = ( RECOVERY_ACTIONS, RECOVERY_FENCING, RECOVERY_DELETE_TIMEOUT, RECOVERY_FORCE_RECREATE, ) = ( 'actions', 'fencing', 'node_delete_timeout', 'node_force_recreate', ) FENCING_OPTION_VALUES = ( COMPUTE, # STORAGE, NETWORK, ) = ( 'COMPUTE', # 'STORAGE', 'NETWORK' ) ACTION_KEYS = ( ACTION_NAME, ACTION_PARAMS, ) = ( 'name', 'params', ) properties_schema = { DETECTION: schema.Map( _('Policy aspect for node failure detection.'), schema={ DETECTION_TYPE: schema.String( _('Type of node failure detection.'), constraints=[ constraints.AllowedValues(consts.DETECTION_TYPES), ], required=True, ), DETECTION_OPTIONS: schema.Map(schema={ DETECTION_INTERVAL: schema.Integer( _("Number of seconds between pollings. Only " "required when type is 'NODE_STATUS_POLLING' or " "'NODE_STATUS_POLL_URL'."), default=60, ), POLL_URL: schema.String( _("URL to poll for node status. See documentation " "for valid expansion parameters. Only required " "when type is 'NODE_STATUS_POLL_URL'."), default='', ), POLL_URL_SSL_VERIFY: schema.Boolean( _("Whether to verify SSL when calling URL to poll " "for node status. Only required when type is " "'NODE_STATUS_POLL_URL'."), default=True, ), POLL_URL_HEALTHY_RESPONSE: schema.String( _("String pattern in the poll URL response body " "that indicates a healthy node. " "Required when type is 'NODE_STATUS_POLL_URL'."), default='', ), POLL_URL_RETRY_LIMIT: schema.Integer( _("Number of times to retry URL polling when its " "return body is missing " "POLL_URL_HEALTHY_RESPONSE string before a node " "is considered down. Required when type is " "'NODE_STATUS_POLL_URL'."), default=3, ), POLL_URL_RETRY_INTERVAL: schema.Integer( _("Number of seconds between URL polling retries " "before a node is considered down. " "Required when type is 'NODE_STATUS_POLL_URL'."), default=3, ), NODE_UPDATE_TIMEOUT: schema.Integer( _("Number of seconds since last node update to " "wait before checking node health. " "Required when type is 'NODE_STATUS_POLL_URL'."), default=300, ), }, default={}), }, required=True, ), RECOVERY: schema.Map( _('Policy aspect for node failure recovery.'), schema={ RECOVERY_ACTIONS: schema.List(_('List of actions to try for node recovery.'), schema=schema.Map( _('Action to try for node recovery.'), schema={ ACTION_NAME: schema.String( _("Name of action to execute."), constraints=[ constraints.AllowedValues( consts.RECOVERY_ACTIONS), ], required=True), ACTION_PARAMS: schema.Map(_("Parameters for the action")), })), RECOVERY_FENCING: schema.List( _('List of services to be fenced.'), schema=schema.String( _('Service to be fenced.'), constraints=[ constraints.AllowedValues(FENCING_OPTION_VALUES), ], required=True, ), ), RECOVERY_DELETE_TIMEOUT: schema.Integer( _("Number of seconds to wait for node deletion to " "finish and start node creation for recreate " "recovery option. Required when type is " "'NODE_STATUS_POLL_URL and recovery action " "is RECREATE'."), default=20, ), RECOVERY_FORCE_RECREATE: schema.Boolean( _("Whether to create node even if node deletion " "failed. Required when type is " "'NODE_STATUS_POLL_URL' and action recovery " "action is RECREATE."), default=False, ), }), } def __init__(self, name, spec, **kwargs): super(HealthPolicy, self).__init__(name, spec, **kwargs) self.check_type = self.properties[self.DETECTION][self.DETECTION_TYPE] options = self.properties[self.DETECTION][self.DETECTION_OPTIONS] self.interval = options.get(self.DETECTION_INTERVAL, 60) self.poll_url = options.get(self.POLL_URL, '') self.poll_url_ssl_verify = options.get(self.POLL_URL_SSL_VERIFY, True) self.poll_url_healthy_response = options.get( self.POLL_URL_HEALTHY_RESPONSE, '') self.poll_url_retry_limit = options.get(self.POLL_URL_RETRY_LIMIT, '') self.poll_url_retry_interval = options.get( self.POLL_URL_RETRY_INTERVAL, '') self.node_update_timeout = options.get(self.NODE_UPDATE_TIMEOUT, 300) recover_settings = self.properties[self.RECOVERY] self.recover_actions = recover_settings[self.RECOVERY_ACTIONS] self.fencing_types = recover_settings[self.RECOVERY_FENCING] self.node_delete_timeout = recover_settings.get( self.RECOVERY_DELETE_TIMEOUT, None) self.node_force_recreate = recover_settings.get( self.RECOVERY_FORCE_RECREATE, False) def validate(self, context, validate_props=False): super(HealthPolicy, self).validate(context, validate_props=validate_props) if len(self.recover_actions) > 1: message = _( "Only one '%s' is supported for now.") % self.RECOVERY_ACTIONS raise exc.ESchema(message=message) if self.interval < cfg.CONF.health_check_interval_min: message = _("Specified interval of %(interval)d seconds has to be " "larger than health_check_interval_min of " "%(min_interval)d seconds set in configuration.") % { "interval": self.interval, "min_interval": cfg.CONF.health_check_interval_min } raise exc.InvalidSpec(message=message) # TODO(Qiming): Add detection of duplicated action names when # support to list of actions is implemented. def attach(self, cluster, enabled=True): """"Hook for policy attach. Register the cluster for health management. :param cluster: The cluster to which the policy is being attached to. :param enabled: The attached cluster policy is enabled or disabled. :return: A tuple comprising execution result and policy data. """ p_type = cluster.rt['profile'].type_name action_names = [a['name'] for a in self.recover_actions] if p_type != 'os.nova.server': if consts.RECOVER_REBUILD in action_names: err_msg = _("Recovery action REBUILD is only applicable to " "os.nova.server clusters.") return False, err_msg if consts.RECOVER_REBOOT in action_names: err_msg = _("Recovery action REBOOT is only applicable to " "os.nova.server clusters.") return False, err_msg kwargs = { 'check_type': self.check_type, 'interval': self.interval, 'params': { 'recover_action': self.recover_actions, 'poll_url': self.poll_url, 'poll_url_ssl_verify': self.poll_url_ssl_verify, 'poll_url_healthy_response': self.poll_url_healthy_response, 'poll_url_retry_limit': self.poll_url_retry_limit, 'poll_url_retry_interval': self.poll_url_retry_interval, 'node_update_timeout': self.node_update_timeout, 'node_delete_timeout': self.node_delete_timeout, 'node_force_recreate': self.node_force_recreate, }, 'enabled': enabled } health_manager.register(cluster.id, engine_id=None, **kwargs) data = { 'check_type': self.check_type, 'interval': self.interval, 'poll_url': self.poll_url, 'poll_url_ssl_verify': self.poll_url_ssl_verify, 'poll_url_healthy_response': self.poll_url_healthy_response, 'poll_url_retry_limit': self.poll_url_retry_limit, 'poll_url_retry_interval': self.poll_url_retry_interval, 'node_update_timeout': self.node_update_timeout, 'node_delete_timeout': self.node_delete_timeout, 'node_force_recreate': self.node_force_recreate, } return True, self._build_policy_data(data) def detach(self, cluster): """Hook for policy detach. Unregister the cluster for health management. :param cluster: The target cluster. :returns: A tuple comprising the execution result and reason. """ health_manager.unregister(cluster.id) return True, '' def pre_op(self, cluster_id, action, **args): """Hook before action execution. One of the task for this routine is to disable health policy if the action is a request that will shrink the cluster. The reason is that the policy may attempt to recover nodes that are to be deleted. :param cluster_id: The ID of the target cluster. :param action: The action to be examined. :param kwargs args: Other keyword arguments to be checked. :returns: Boolean indicating whether the checking passed. """ if action.action in (consts.CLUSTER_SCALE_IN, consts.CLUSTER_DEL_NODES, consts.NODE_DELETE): health_manager.disable(cluster_id) return True if action.action == consts.CLUSTER_RESIZE: deletion = action.data.get('deletion', None) if deletion: health_manager.disable(cluster_id) return True cluster = action.entity current = len(cluster.nodes) res, reason = scaleutils.parse_resize_params( action, cluster, current) if res == base.CHECK_ERROR: action.data['status'] = base.CHECK_ERROR action.data['reason'] = reason return False if action.data.get('deletion', None): health_manager.disable(cluster_id) return True pd = { 'recover_action': self.recover_actions, 'fencing': self.fencing_types, } action.data.update({'health': pd}) action.store(action.context) return True def post_op(self, cluster_id, action, **args): """Hook before action execution. One of the task for this routine is to re-enable health policy if the action is a request that will shrink the cluster thus the policy has been temporarily disabled. :param cluster_id: The ID of the target cluster. :param action: The action to be examined. :param kwargs args: Other keyword arguments to be checked. :returns: Boolean indicating whether the checking passed. """ if action.action in (consts.CLUSTER_SCALE_IN, consts.CLUSTER_DEL_NODES, consts.NODE_DELETE): health_manager.enable(cluster_id) return True if action.action == consts.CLUSTER_RESIZE: deletion = action.data.get('deletion', None) if deletion: health_manager.enable(cluster_id) return True cluster = action.entity current = len(cluster.nodes) res, reason = scaleutils.parse_resize_params( action, cluster, current) if res == base.CHECK_ERROR: action.data['status'] = base.CHECK_ERROR action.data['reason'] = reason return False if action.data.get('deletion', None): health_manager.enable(cluster_id) return True return True
class ScalingPolicy(base.Policy): """Policy for changing the size of a cluster. This policy is expected to be enforced before the node count of a cluster is changed. """ VERSION = '1.0' VERSIONS = {'1.0': [{'status': consts.SUPPORTED, 'since': '2016.04'}]} PRIORITY = 100 TARGET = [ ('BEFORE', consts.CLUSTER_SCALE_IN), ('BEFORE', consts.CLUSTER_SCALE_OUT), ('AFTER', consts.CLUSTER_SCALE_IN), ('AFTER', consts.CLUSTER_SCALE_OUT), ] PROFILE_TYPE = [ 'ANY', ] KEYS = ( EVENT, ADJUSTMENT, ) = ( 'event', 'adjustment', ) _SUPPORTED_EVENTS = ( CLUSTER_SCALE_IN, CLUSTER_SCALE_OUT, ) = ( consts.CLUSTER_SCALE_IN, consts.CLUSTER_SCALE_OUT, ) _ADJUSTMENT_KEYS = ( ADJUSTMENT_TYPE, ADJUSTMENT_NUMBER, MIN_STEP, BEST_EFFORT, COOLDOWN, ) = ( 'type', 'number', 'min_step', 'best_effort', 'cooldown', ) properties_schema = { EVENT: schema.String( _('Event that will trigger this policy. Must be one of ' 'CLUSTER_SCALE_IN and CLUSTER_SCALE_OUT.'), constraints=[ constraints.AllowedValues(_SUPPORTED_EVENTS), ], required=True, ), ADJUSTMENT: schema.Map( _('Detailed specification for scaling adjustments.'), schema={ ADJUSTMENT_TYPE: schema.String( _('Type of adjustment when scaling is triggered.'), constraints=[ constraints.AllowedValues(consts.ADJUSTMENT_TYPES), ], default=consts.CHANGE_IN_CAPACITY, ), ADJUSTMENT_NUMBER: schema.Number( _('A number specifying the amount of adjustment.'), default=1, ), MIN_STEP: schema.Integer( _('When adjustment type is set to "CHANGE_IN_PERCENTAGE",' ' this specifies the cluster size will be decreased by ' 'at least this number of nodes.'), default=1, ), BEST_EFFORT: schema.Boolean( _('Whether do best effort scaling when new size of ' 'cluster will break the size limitation'), default=False, ), COOLDOWN: schema.Integer( _('Number of seconds to hold the cluster for cool-down ' 'before allowing cluster to be resized again.'), default=0, ), }), } def __init__(self, name, spec, **kwargs): """Initialize a scaling policy object. :param name: Name for the policy object. :param spec: A dictionary containing the detailed specification for the policy. :param dict kwargs: Other optional parameters for policy object creation. :return: An object of `ScalingPolicy`. """ super(ScalingPolicy, self).__init__(name, spec, **kwargs) self.singleton = False self.event = self.properties[self.EVENT] adjustment = self.properties[self.ADJUSTMENT] self.adjustment_type = adjustment[self.ADJUSTMENT_TYPE] self.adjustment_number = adjustment[self.ADJUSTMENT_NUMBER] self.adjustment_min_step = adjustment[self.MIN_STEP] self.best_effort = adjustment[self.BEST_EFFORT] self.cooldown = adjustment[self.COOLDOWN] def validate(self, context, validate_props=False): super(ScalingPolicy, self).validate(context, validate_props) if self.adjustment_number <= 0: msg = _("the 'number' for 'adjustment' must be > 0") raise exc.InvalidSpec(message=msg) if self.adjustment_min_step < 0: msg = _("the 'min_step' for 'adjustment' must be >= 0") raise exc.InvalidSpec(message=msg) if self.cooldown < 0: msg = _("the 'cooldown' for 'adjustment' must be >= 0") raise exc.InvalidSpec(message=msg) def _calculate_adjustment_count(self, current_size): """Calculate adjustment count based on current_size. :param current_size: The current size of the target cluster. :return: The number of nodes to add or to remove. """ if self.adjustment_type == consts.EXACT_CAPACITY: if self.event == consts.CLUSTER_SCALE_IN: count = current_size - self.adjustment_number else: count = self.adjustment_number - current_size elif self.adjustment_type == consts.CHANGE_IN_CAPACITY: count = self.adjustment_number else: # consts.CHANGE_IN_PERCENTAGE: count = int((self.adjustment_number * current_size) / 100.0) if count < self.adjustment_min_step: count = self.adjustment_min_step return count def pre_op(self, cluster_id, action): """The hook function that is executed before the action. The checking result is stored in the ``data`` property of the action object rather than returned directly from the function. :param cluster_id: The ID of the target cluster. :param action: Action instance against which the policy is being checked. :return: None. """ # check cooldown last_op = action.inputs.get('last_op', None) if last_op and not timeutils.is_older_than(last_op, self.cooldown): action.data.update({ 'status': base.CHECK_ERROR, 'reason': _('Policy %s cooldown is still ' 'in progress.') % self.id }) action.store(action.context) return # Use action input if count is provided count_value = action.inputs.get('count', None) cluster = action.entity current = len(cluster.nodes) if count_value is None: # count not specified, calculate it count_value = self._calculate_adjustment_count(current) # Count must be positive value success, count = utils.get_positive_int(count_value) if not success: action.data.update({ 'status': base.CHECK_ERROR, 'reason': _("Invalid count (%(c)s) for action '%(a)s'.") % { 'c': count_value, 'a': action.action } }) action.store(action.context) return # Check size constraints max_size = cluster.max_size if max_size == -1: max_size = cfg.CONF.max_nodes_per_cluster if action.action == consts.CLUSTER_SCALE_IN: if self.best_effort: count = min(count, current - cluster.min_size) result = su.check_size_params(cluster, current - count, strict=not self.best_effort) else: if self.best_effort: count = min(count, max_size - current) result = su.check_size_params(cluster, current + count, strict=not self.best_effort) if result: # failed validation pd = {'status': base.CHECK_ERROR, 'reason': result} else: # passed validation pd = { 'status': base.CHECK_OK, 'reason': _('Scaling request validated.'), } if action.action == consts.CLUSTER_SCALE_IN: pd['deletion'] = {'count': count} else: pd['creation'] = {'count': count} action.data.update(pd) action.store(action.context) return def post_op(self, cluster_id, action): # update last_op for next cooldown check ts = timeutils.utcnow(True) cpo.ClusterPolicy.update(action.context, cluster_id, self.id, {'last_op': ts}) def need_check(self, target, action): # check if target + action matches policy targets if not super(ScalingPolicy, self).need_check(target, action): return False if target == 'BEFORE': # Scaling policy BEFORE check should only be triggered if the # incoming action matches the specific policy event. # E.g. for scale-out policy the BEFORE check to select nodes for # termination should only run for scale-out actions. return self.event == action.action else: # Scaling policy AFTER check to reset cooldown timer should be # triggered for all supported policy events (both scale-in and # scale-out). E.g. a scale-out policy should reset cooldown timer # whenever scale-out or scale-in action completes. return action.action in list(self._SUPPORTED_EVENTS)
class StackProfile(base.Profile): """Profile for an OpenStack Heat stack.""" VERSIONS = {'1.0': [{'status': consts.SUPPORTED, 'since': '2016.04'}]} KEYS = ( CONTEXT, TEMPLATE, TEMPLATE_URL, PARAMETERS, FILES, TIMEOUT, DISABLE_ROLLBACK, ENVIRONMENT, ) = ( 'context', 'template', 'template_url', 'parameters', 'files', 'timeout', 'disable_rollback', 'environment', ) properties_schema = { CONTEXT: schema.Map( _('A dictionary for specifying the customized context for ' 'stack operations'), default={}, ), TEMPLATE: schema.Map( _('Heat stack template.'), default={}, updatable=True, ), TEMPLATE_URL: schema.String( _('Heat stack template url.'), default='', updatable=True, ), PARAMETERS: schema.Map( _('Parameters to be passed to Heat for stack operations.'), default={}, updatable=True, ), FILES: schema.Map( _('Contents of files referenced by the template, if any.'), default={}, updatable=True, ), TIMEOUT: schema.Integer( _('A integer that specifies the number of minutes that a ' 'stack operation times out.'), updatable=True, ), DISABLE_ROLLBACK: schema.Boolean( _('A boolean specifying whether a stack operation can be ' 'rolled back.'), default=True, updatable=True, ), ENVIRONMENT: schema.Map( _('A map that specifies the environment used for stack ' 'operations.'), default={}, updatable=True, ) } OP_NAMES = (OP_ABANDON, ) = ('abandon', ) OPERATIONS = {OP_ABANDON: schema.Map(_('Abandon a heat stack node.'), )} def __init__(self, type_name, name, **kwargs): super(StackProfile, self).__init__(type_name, name, **kwargs) self.stack_id = None def validate(self, validate_props=False): """Validate the schema and the data provided.""" # general validation self.spec_data.validate() self.properties.validate() # validate template template = self.properties[self.TEMPLATE] template_url = self.properties[self.TEMPLATE_URL] if not template and not template_url: msg = _("Both template and template_url are not specified " "for profile '%s'.") % self.name raise exc.InvalidSpec(message=msg) if validate_props: self.do_validate(obj=self) def do_validate(self, obj): """Validate the stack template used by a node. :param obj: Node object to operate. :returns: True if validation succeeds. :raises: `InvalidSpec` exception is raised if template is invalid. """ kwargs = { 'stack_name': utils.random_name(), 'template': self.properties[self.TEMPLATE], 'template_url': self.properties[self.TEMPLATE_URL], 'parameters': self.properties[self.PARAMETERS], 'files': self.properties[self.FILES], 'environment': self.properties[self.ENVIRONMENT], 'preview': True, } try: self.orchestration(obj).stack_create(**kwargs) except exc.InternalError as ex: msg = _('Failed in validating template: %s') % six.text_type(ex) raise exc.InvalidSpec(message=msg) return True def do_create(self, obj): """Create a heat stack using the given node object. :param obj: The node object to operate on. :returns: The UUID of the heat stack created. """ tags = ["cluster_node_id=%s" % obj.id] if obj.cluster_id: tags.append('cluster_id=%s' % obj.cluster_id) tags.append('cluster_node_index=%s' % obj.index) kwargs = { 'stack_name': obj.name + '-' + utils.random_name(8), 'template': self.properties[self.TEMPLATE], 'template_url': self.properties[self.TEMPLATE_URL], 'timeout_mins': self.properties[self.TIMEOUT], 'disable_rollback': self.properties[self.DISABLE_ROLLBACK], 'parameters': self.properties[self.PARAMETERS], 'files': self.properties[self.FILES], 'environment': self.properties[self.ENVIRONMENT], 'tags': ",".join(tags) } try: stack = self.orchestration(obj).stack_create(**kwargs) # Timeout = None means we will use the 'default_action_timeout' # It can be overridden by the TIMEOUT profile properties timeout = None if self.properties[self.TIMEOUT]: timeout = self.properties[self.TIMEOUT] * 60 self.orchestration(obj).wait_for_stack(stack.id, 'CREATE_COMPLETE', timeout=timeout) return stack.id except exc.InternalError as ex: raise exc.EResourceCreation(type='stack', message=six.text_type(ex)) def do_delete(self, obj, **params): """Delete the physical stack behind the node object. :param obj: The node object to operate on. :param kwargs params: Optional keyword arguments for the delete operation. :returns: This operation always returns True unless exception is caught. :raises: `EResourceDeletion` if interaction with heat fails. """ stack_id = obj.physical_id if not stack_id: return True ignore_missing = params.get('ignore_missing', True) try: self.orchestration(obj).stack_delete(stack_id, ignore_missing) self.orchestration(obj).wait_for_stack_delete(stack_id) except exc.InternalError as ex: raise exc.EResourceDeletion(type='stack', id=stack_id, message=six.text_type(ex)) return True def do_update(self, obj, new_profile, **params): """Perform update on object. :param obj: the node object to operate on :param new_profile: the new profile used for updating :param params: other parameters for the update request. :returns: A boolean indicating whether the operation is successful. """ self.stack_id = obj.physical_id if not self.stack_id: return False if not self.validate_for_update(new_profile): return False fields = {} new_template = new_profile.properties[new_profile.TEMPLATE] if new_template != self.properties[self.TEMPLATE]: fields['template'] = new_template new_params = new_profile.properties[new_profile.PARAMETERS] if new_params != self.properties[self.PARAMETERS]: fields['parameters'] = new_params new_timeout = new_profile.properties[new_profile.TIMEOUT] if new_timeout != self.properties[self.TIMEOUT]: fields['timeout_mins'] = new_timeout new_dr = new_profile.properties[new_profile.DISABLE_ROLLBACK] if new_dr != self.properties[self.DISABLE_ROLLBACK]: fields['disable_rollback'] = new_dr new_files = new_profile.properties[new_profile.FILES] if new_files != self.properties[self.FILES]: fields['files'] = new_files new_environment = new_profile.properties[new_profile.ENVIRONMENT] if new_environment != self.properties[self.ENVIRONMENT]: fields['environment'] = new_environment if not fields: return True try: hc = self.orchestration(obj) # Timeout = None means we will use the 'default_action_timeout' # It can be overridden by the TIMEOUT profile properties timeout = None if self.properties[self.TIMEOUT]: timeout = self.properties[self.TIMEOUT] * 60 hc.stack_update(self.stack_id, **fields) hc.wait_for_stack(self.stack_id, 'UPDATE_COMPLETE', timeout=timeout) except exc.InternalError as ex: raise exc.EResourceUpdate(type='stack', id=self.stack_id, message=six.text_type(ex)) return True def do_check(self, obj): """Check stack status. :param obj: Node object to operate. :returns: True if check succeeded, or False otherwise. """ stack_id = obj.physical_id if stack_id is None: return False hc = self.orchestration(obj) try: # Timeout = None means we will use the 'default_action_timeout' # It can be overridden by the TIMEOUT profile properties timeout = None if self.properties[self.TIMEOUT]: timeout = self.properties[self.TIMEOUT] * 60 hc.stack_check(stack_id) hc.wait_for_stack(stack_id, 'CHECK_COMPLETE', timeout=timeout) except exc.InternalError as ex: raise exc.EResourceOperation(op='checking', type='stack', id=stack_id, message=six.text_type(ex)) return True def do_get_details(self, obj): if not obj.physical_id: return {} try: stack = self.orchestration(obj).stack_get(obj.physical_id) return stack.to_dict() except exc.InternalError as ex: return {'Error': {'code': ex.code, 'message': six.text_type(ex)}} def do_adopt(self, obj, overrides=None, snapshot=False): """Adopt an existing stack node for management. :param obj: A node object for this operation. It could be a puppet node that provides only 'user', 'project' and 'physical_id' properties when doing a preview. It can be a real Node object for node adoption. :param overrides: A dict containing the properties that will be overridden when generating a profile for the stack. :param snapshot: A boolean flag indicating whether the profile should attempt a snapshot operation before adopting the stack. If set to True, the ID of the snapshot will be used as the image ID. :returns: A dict containing the spec created from the stack object or a dict containing error information if failure occurred. """ driver = self.orchestration(obj) # TODO(Qiming): Add snapshot support # snapshot = driver.snapshot_create(...) try: stack = driver.stack_get(obj.physical_id) tmpl = driver.stack_get_template(obj.physical_id) env = driver.stack_get_environment(obj.physical_id) files = driver.stack_get_files(obj.physical_id) except exc.InternalError as ex: return {'Error': {'code': ex.code, 'message': six.text_type(ex)}} spec = { self.ENVIRONMENT: env.to_dict(), self.FILES: files, self.TEMPLATE: tmpl.to_dict(), self.PARAMETERS: dict((k, v) for k, v in stack.parameters.items() if k.find('OS::', 0) < 0), self.TIMEOUT: stack.timeout_mins, self.DISABLE_ROLLBACK: stack.is_rollback_disabled } if overrides: spec.update(overrides) return spec def _refresh_tags(self, current, node, add=False): """Refresh tag list. :param current: Current list of tags. :param node: The node object. :param add: Flag indicating whether new tags are added. :returns: (tags, updated) where tags contains a new list of tags and updated indicates whether new tag list differs from the old one. """ tags = [] for tag in current: if tag.find('cluster_id=') == 0: continue elif tag.find('cluster_node_id=') == 0: continue elif tag.find('cluster_node_index=') == 0: continue if tag.strip() != "": tags.append(tag.strip()) if add: tags.append('cluster_id=' + node.cluster_id) tags.append('cluster_node_id=' + node.id) tags.append('cluster_node_index=%s' % node.index) tag_str = ",".join(tags) return (tag_str, tags != current) def do_join(self, obj, cluster_id): if not obj.physical_id: return False hc = self.orchestration(obj) try: stack = hc.stack_get(obj.physical_id) tags, updated = self._refresh_tags(stack.tags, obj, True) field = {'tags': tags} if updated: hc.stack_update(obj.physical_id, **field) except exc.InternalError as ex: LOG.error('Failed in updating stack tags: %s.', ex) return False return True def do_leave(self, obj): if not obj.physical_id: return False hc = self.orchestration(obj) try: stack = hc.stack_get(obj.physical_id) tags, updated = self._refresh_tags(stack.tags, obj, False) field = {'tags': tags} if updated: hc.stack_update(obj.physical_id, **field) except exc.InternalError as ex: LOG.error('Failed in updating stack tags: %s.', ex) return False return True def handle_abandon(self, obj, **options): """Handler for abandoning a heat stack node.""" pass
def test_invalid_constructor(self): self.assertRaises(exc.ESchema, schema.String, schema=schema.String('String'))
class HealthPolicy(base.Policy): '''Policy for health management of a cluster.''' VERSION = '1.0' PRIORITY = 600 TARGET = [ ('BEFORE', consts.CLUSTER_CHECK), ('BEFORE', consts.CLUSTER_RECOVER), ] # Should be ANY if profile provides health check support? PROFILE_TYPE = [ 'os.nova.server', 'os.heat.stack', ] KEYS = (DETECTION, RECOVERY) = ('detection', 'recovery') _DETECTION_KEYS = ( DETECTION_TYPE, DETECTION_OPTIONS, ) = ('type', 'options') DETECTION_TYPES = ( VM_LIFECYCLE_EVENTS, NODE_STATUS_POLLING, LB_STATUS_POLLING, ) = ( 'VM_LIFECYCLE_EVENTS', 'NODE_STATUS_POLLING', 'LB_STATUS_POLLING', ) _DETECTION_OPTIONS = (DETECTION_INTERVAL, ) = ('interval', ) _RECOVERY_KEYS = (RECOVERY_ACTIONS, RECOVERY_FENCING) = ('actions', 'fencing') RECOVERY_ACTION_VALUES = (REBOOT, REBUILD, MIGRATE, EVACUATE, RECREATE, NOP) = ( 'REBOOT', 'REBUILD', 'MIGRATE', 'EVACUATE', 'RECREATE', 'NOP', ) FENCING_OPTION_VALUES = ( COMPUTE, STORAGE, NETWORK, ) = ('COMPUTE', 'STORAGE', 'NETWORK') properties_schema = { DETECTION: schema.Map( _('Policy aspect for node failure detection.'), schema={ DETECTION_TYPE: schema.String( _('Type of node failure detection.'), constraints=[ constraints.AllowedValues(DETECTION_TYPES), ], required=True, ), DETECTION_OPTIONS: schema.Map( schema={ DETECTION_INTERVAL: schema.Integer( _("Number of seconds between pollings. Only " "required when type is 'NODE_STATUS_POLLING'."), default=60, ), }), }, required=True, ), RECOVERY: schema.Map( _('Policy aspect for node failure recovery.'), schema={ RECOVERY_ACTIONS: schema.List( _('List of actions to try for node recovery.'), schema=schema.String( _('Action to try for node recovery.'), constraints=[ constraints.AllowedValues(RECOVERY_ACTION_VALUES), ]), ), RECOVERY_FENCING: schema.List( _('List of services to be fenced.'), schema=schema.String( _('Service to be fenced.'), constraints=[ constraints.AllowedValues(FENCING_OPTION_VALUES), ], ), ), }), } def __init__(self, name, spec, **kwargs): super(HealthPolicy, self).__init__(name, spec, **kwargs) self.check_type = self.properties[self.DETECTION][self.DETECTION_TYPE] options = self.properties[self.DETECTION][self.DETECTION_OPTIONS] self.interval = options[self.DETECTION_INTERVAL] recover_settings = self.properties[self.RECOVERY] self.recover_actions = recover_settings[self.RECOVERY_ACTIONS] def attach(self, cluster): """"Hook for policy attach. Register the cluster for health management. """ kwargs = { 'check_type': self.check_type, 'interval': self.interval, 'params': {}, } health_manager.register(cluster.id, engine_id=None, **kwargs) data = { 'check_type': self.check_type, 'interval': self.interval, } return True, self._build_policy_data(data) def detach(self, cluster): '''Hook for policy detach. Unregister the cluster for health management. ''' health_manager.unregister(cluster.id) return True, '' def pre_op(self, cluster_id, action, **args): # Ignore actions that are not required to be processed at this stage if action.action != consts.CLUSTER_RECOVER: return True pd = { 'recover_action': self.recover_actions[0], } action.data.update({'health': pd}) action.store(action.context) return True def post_op(self, cluster_id, action, **args): # Ignore irrelevant action here if action.action not in (consts.CLUSTER_CHECK, consts.CLUSTER_RECOVER): return True # TODO(anyone): subscribe to vm-lifecycle-events for the specified VM # or add vm to the list of VM status polling return True
def test_validate(self): sot = schema.List(schema=schema.String()) res = sot.validate(['abc', 'def']) self.assertIsNone(res)
class Policy(object): '''Base class for policies.''' PROFILE_TYPE = 'ANY' KEYS = ( TYPE, VERSION, DESCRIPTION, PROPERTIES, ) = ( 'type', 'version', 'description', 'properties', ) spec_schema = { TYPE: schema.String( _('Name of the policy type.'), required=True, ), VERSION: schema.String( _('Version number of the policy type.'), required=True, ), DESCRIPTION: schema.String( _('A text description of policy.'), default='', ), PROPERTIES: schema.Map( _('Properties for the policy.'), required=True, ) } properties_schema = {} def __new__(cls, name, spec, **kwargs): """Create a new policy of the appropriate class. :param name: The name for the policy. :param spec: A dictionary containing the spec for the policy. :param kwargs: Keyword arguments for policy creation. :returns: An instance of a specific sub-class of Policy. """ type_name, version = schema.get_spec_version(spec) if cls != Policy: PolicyClass = cls else: PolicyClass = environment.global_env().get_policy(type_name) return super(Policy, cls).__new__(PolicyClass) def __init__(self, name, spec, **kwargs): """Initialize a policy instance. :param name: The name for the policy. :param spec: A dictionary containing the detailed policy spec. :param kwargs: Keyword arguments for initializing the policy. :returns: An instance of a specific sub-class of Policy. """ type_name, version = schema.get_spec_version(spec) self.name = name self.spec = spec self.id = kwargs.get('id', None) self.type = kwargs.get('type', "%s-%s" % (type_name, version)) self.user = kwargs.get('user') self.project = kwargs.get('project') self.domain = kwargs.get('domain') self.level = kwargs.get('level', SHOULD) self.cooldown = kwargs.get('cooldown', 0) self.data = kwargs.get('data', {}) self.created_time = kwargs.get('created_time', None) self.updated_time = kwargs.get('updated_time', None) self.deleted_time = kwargs.get('deleted_time', None) self.spec_data = schema.Spec(self.spec_schema, spec) self.properties = schema.Spec(self.properties_schema, self.spec.get(self.PROPERTIES, {})) self.singleton = True @classmethod def _from_db_record(cls, record): '''Construct a policy object from a database record.''' kwargs = { 'id': record.id, 'type': record.type, 'user': record.user, 'project': record.project, 'domain': record.domain, 'level': record.level, 'cooldown': record.cooldown, 'created_time': record.created_time, 'updated_time': record.updated_time, 'deleted_time': record.deleted_time, 'data': record.data, } return cls(record.name, record.spec, **kwargs) @classmethod def load(cls, context, policy_id=None, db_policy=None): """Retrieve and reconstruct a policy object from DB. :param context: DB context for object retrieval. :param policy_id: Optional parameter specifying the ID of policy. :param db_policy: Optional parameter referencing a policy DB object. :returns: An object of the proper policy class. """ if db_policy is None: db_policy = db_api.policy_get(context, policy_id) if db_policy is None: raise exception.PolicyNotFound(policy=policy_id) return cls._from_db_record(db_policy) @classmethod def load_all(cls, context, limit=None, sort_keys=None, marker=None, sort_dir=None, filters=None, show_deleted=False): '''Retrieve all policies from database.''' records = db_api.policy_get_all(context, limit=limit, marker=marker, sort_keys=sort_keys, sort_dir=sort_dir, filters=filters, show_deleted=show_deleted) for record in records: yield cls._from_db_record(record) @classmethod def delete(cls, context, policy_id): db_api.policy_delete(context, policy_id) def store(self, context): '''Store the policy object into database table.''' timestamp = timeutils.utcnow() values = { 'name': self.name, 'type': self.type, 'user': self.user, 'project': self.project, 'domain': self.domain, 'spec': self.spec, 'level': self.level, 'cooldown': self.cooldown, 'data': self.data, } if self.id is not None: self.updated_time = timestamp values['updated_time'] = timestamp db_api.policy_update(context, self.id, values) else: self.created_time = timestamp values['created_time'] = timestamp policy = db_api.policy_create(context, values) self.id = policy.id return self.id def validate(self): '''Validate the schema and the data provided.''' self.spec_data.validate() self.properties.validate() @classmethod def get_schema(cls): return dict((name, dict(schema)) for name, schema in cls.properties_schema.items()) def _build_policy_data(self, data): clsname = self.__class__.__name__ version = self.VERSION result = { clsname: { 'version': version, 'data': data, } } return result def _extract_policy_data(self, policy_data): clsname = self.__class__.__name__ if clsname not in policy_data: return None data = policy_data.get(clsname) if 'version' not in data or data['version'] != self.VERSION: return None return data.get('data', None) def attach(self, cluster): '''Method to be invoked before policy is attached to a cluster. :param cluster: the cluster to which the policy is being attached to. :returns: (True, message) if the operation is successful, or (False, error) otherwise. ''' if self.PROFILE_TYPE == ['ANY']: return True, None profile = cluster.rt['profile'] if profile.type not in self.PROFILE_TYPE: error = _('Policy not applicable on profile type: ' '%s') % profile.type return False, error return True, None def detach(self, cluster): '''Method to be invoked before policy is detached from a cluster.''' return True, None def pre_op(self, cluster_id, action): '''A method that will be invoked before an action execution.''' return def post_op(self, cluster_id, action): '''A method that will be invoked after an action execution.''' return def to_dict(self): def _fmt_time(value): return value and value.isoformat() pb_dict = { 'id': self.id, 'name': self.name, 'type': self.type, 'user': self.user, 'project': self.project, 'domain': self.domain, 'spec': self.spec, 'level': self.level, 'cooldown': self.cooldown, 'created_time': _fmt_time(self.created_time), 'updated_time': _fmt_time(self.updated_time), 'deleted_time': _fmt_time(self.deleted_time), 'data': self.data, } return pb_dict def _build_conn_params(self, cluster): """Build trust-based connection parameters. :param cluster: the cluste for which the trust will be checked. """ service_creds = senlin_context.get_service_context() params = { 'username': service_creds.get('username'), 'password': service_creds.get('password'), 'auth_url': service_creds.get('auth_url'), 'user_domain_name': service_creds.get('user_domain_name') } cred = db_api.cred_get(oslo_context.get_current(), cluster.user, cluster.project) if cred is None: raise exception.TrustNotFound(trustor=cluster.user) params['trust_id'] = [cred.cred['openstack']['trust']] return params
def test_get_children(self): sot = schema.Map('desc', schema={'foo': schema.String()}) res = sot._get_children({'foo': 'bar'}) self.assertEqual({'foo': 'bar'}, dict(res))
class Trigger(object): KEYS = ( TYPE, VERSION, RULE, ) = ( 'type', 'version', 'rule', ) spec_schema = { TYPE: schema.String( _('Type name of the trigger type.'), required=True, ), VERSION: schema.String( _('Version number string of the trigger type.'), required=True, ), RULE: schema.Map( _('Rule collection for the trigger.'), required=True, ) } def __new__(cls, name, spec, **kwargs): """Create a trigger instance based on its type and version. :param name: The name for the trigger. :param spec: A dictionary containing the spec for the trigger. :param kwargs: Keyword arguments for trigger creation. :returns: An instance of a specific sub-class of BaseTrigger. """ type_name, version = schema.get_spec_version(spec) if cls != Trigger: TriggerClass = cls else: TriggerClass = environment.global_env().get_trigger(type_name) return super(Trigger, cls).__new__(TriggerClass) def __init__(self, name, spec, **kwargs): """Initialize a trigger instance. :param name: The name for the trigger. :param spec: A dictionary containing the detailed trigger spec. :param kwargs: Keyword arguments for initializing the trigger. :returns: An instance of a specific sub-class of BaseTrigger. """ type_name, version = schema.get_spec_version(spec) self.type_name = type_name self.name = name self.id = kwargs.get('id', None) self.physical_id = kwargs.get('physical_id', None) self.desc = kwargs.get('desc', '') self.state = kwargs.get('state', INSUFFICIENT_DATA) self.enabled = kwargs.get('enabled', True) self.severity = kwargs.get('severity', S_LOW) self.links = kwargs.get('links', {}) self.user = kwargs.get('user') self.project = kwargs.get('project') self.domain = kwargs.get('domain') self.created_time = kwargs.get('created_time', None) self.updated_time = kwargs.get('updated_time', None) self.deleted_time = kwargs.get('deleted_time', None) self.spec = spec self.spec_data = schema.Spec(self.spec_schema, spec) @classmethod def _from_db_record(cls, record): """Construct a trigger object from a database record.""" kwargs = { 'id': record.id, 'physical_id': record.physical_id, 'desc': record.desc, 'state': record.state, 'enabled': record.enabled, 'severity': record.severity, 'links': record.links, 'user': record.user, 'project': record.project, 'domain': record.domain, 'created_time': record.created_time, 'updated_time': record.updated_time, 'deleted_time': record.deleted_time, } return cls(record.name, record.spec, **kwargs) @classmethod def load(cls, ctx, trigger_id=None, db_trigger=None): """Retrieve and reconstruct a trigger object from DB. :param ctx: A request context for DB operations. :param trigger_id: The ID of a trigger for retrieval. :param db_trigger: A DB record for a trigger. """ if db_trigger is None: db_trigger = db_api.trigger_get(ctx, trigger_id) if db_trigger is None: raise exception.TriggerNotFound(trigger=trigger_id) return cls._from_db_record(db_trigger) @classmethod def load_all(cls, ctx, limit=None, marker=None, sort_keys=None, sort_dir=None, filters=None, project_safe=True, show_deleted=False): """Retrieve all trigger objects from database. Optionally, you can use some parameters to fine tune the query. :param ctx: A request context for DB operations. :param limit: Maximum number of records to return. :param marker: The ID of a last-seen record. Only records after this ID value will be returned. :param sort_keys: A list of trigger properties for sorting. :param sort_dir: A string indicating the sorting direction. It can be either `desc` for descending sorting or `asc` for ascending sorting. :param filters: A map consisting key-value pairs to filter the results. :param show_deleted: A boolean indicating whether soft-deleted objects should be included in the results. """ records = db_api.trigger_get_all(ctx, limit=limit, marker=marker, sort_keys=sort_keys, sort_dir=sort_dir, filters=filters, project_safe=project_safe, show_deleted=show_deleted) for record in records: yield cls._from_db_record(record) @classmethod def delete(cls, ctx, trigger_id): """Deletes the specified trigger. :param ctx: The request context for DB operations. :param trigger_id: The unique ID of a trigger. """ return db_api.trigger_delete(ctx, trigger_id) def store(self, ctx): """Store the trigger object into the database table. :param context: The request context for DB operations. """ timestamp = timeutils.utcnow() values = { 'name': self.name, 'type': self.type_name, 'desc': self.desc, 'state': self.state, 'enabled': self.enabled, 'severity': self.severity, 'links': self.links, 'spec': self.spec, } if self.id is not None: self.updated_time = timestamp values['updated_time'] = timestamp db_api.trigger_update(ctx, self.id, values) else: self.created_time = timestamp values['created_time'] = timestamp values['user'] = ctx.user values['project'] = ctx.project values['domain'] = ctx.domain db_trigger = db_api.trigger_create(ctx, values) self.id = db_trigger.id return self.id def validate(self): """Validate the schema and the data provided.""" self.spec_data.validate() # NOTE: the rule property is supposed to be assigned in subclasses. self.rule.validate() @classmethod def get_schema(cls): return dict( (name, dict(schema)) for name, schema in cls.spec_schema.items()) def to_dict(self): def _fmt_time(value): return value and value.isoformat() trigger_dict = { 'id': self.id, 'name': self.name, 'type': self.type_name, 'desc': self.desc, 'state': self.state, 'enabled': self.enabled, 'severity': self.severity, 'links': self.links, 'spec': self.spec, 'user': self.user, 'project': self.project, 'domain': self.domain, 'created_time': _fmt_time(self.created_time), 'updated_time': _fmt_time(self.updated_time), 'deleted_time': _fmt_time(self.deleted_time), } return trigger_dict