def test_allowed_values_numeric_int(self): '''Test AllowedValues constraint for numeric integer values. Test if the AllowedValues constraint works for numeric values in any combination of numeric strings or numbers in the constraint and numeric strings or numbers as value. ''' # Allowed values defined as integer numbers s = schema.Integer(constraints=[constraints.AllowedValues([1, 2, 4])]) # ... and value as number or string self.assertIsNone(s.validate(1)) err = self.assertRaises(exc.ESchema, s.validate, 3) self.assertEqual("'3' must be one of the allowed values: 1, 2, 4", six.text_type(err)) self.assertIsNone(s.validate('1')) err = self.assertRaises(exc.ESchema, s.validate, '3') self.assertEqual("'3' must be one of the allowed values: 1, 2, 4", six.text_type(err)) # Allowed values defined as integer strings s = schema.Integer( constraints=[constraints.AllowedValues(['1', '2', '4'])]) # ... and value as number or string self.assertIsNone(s.validate(1)) err = self.assertRaises(exc.ESchema, s.validate, 3) self.assertEqual("'3' must be one of the allowed values: 1, 2, 4", six.text_type(err)) self.assertIsNone(s.validate('1')) err = self.assertRaises(exc.ESchema, s.validate, '3') self.assertEqual("'3' must be one of the allowed values: 1, 2, 4", six.text_type(err))
class TestProfile(profile_base.Profile): CONTEXT = 'context' properties_schema = { 'INT': schema.Integer('int property', default=0), 'STR': schema.String('string property', default='a string'), 'MAP': schema.Map( 'map property', schema={ 'KEY1': schema.Integer('key1'), 'KEY2': schema.String('key2') } ), 'LIST': schema.List( 'list property', schema=schema.String('list item'), ), } OPERATIONS = {} def __init__(self, name, spec, **kwargs): super(TestProfile, self).__init__(name, spec, **kwargs) def do_create(self): return {} def do_delete(self, id): return True def do_update(self): return {} def do_check(self, id): return True
class ResourceAlarm(Alarm): rule_schema = { METRIC: schema.String( _('Name of a metric to evaluate against.'), required=True, ), OPERATOR: schema.String( _('Comparison operator for evaluation.'), constraints=[ constraints.AllowedValues(OPERATOR_VALUES), ], default=OP_EQUAL, ), THRESHOLD: schema.Number(_('Threshold for evaluation.'), required=True), GRANULARITY: schema.Integer( _('Length of each evaluation period in seconds.'), default=60, ), EVALUATIONS: schema.Integer( _('Number of periods to evaluate over.'), default=1, ), AGG_METHOD: schema.String( _('Statistics to evaluate. Must be one of %s, default to "avg".') % list(STATISTIC_VALUES), constraints=[ constraints.AllowedValues(STATISTIC_VALUES), ], default=SV_AVG, ), RESOURCE_TYPE: schema.String( _('The resource type.'), required=True, ), RESOURCE_ID: schema.String( _('The ID of a resource.'), required=True, ) } def __init__(self, name, spec, **kwargs): super(ResourceAlarm, self).__init__(name, spec, **kwargs) rule_spec = spec.get('rule', {}) self.rule = schema.Spec(self.rule_schema, rule_spec) self.namespace = 'gnocchi_resources_threshold'
class BatchPolicy(base.Policy): """Policy for batching the operations on a cluster's nodes.""" VERSION = '1.0' TARGET = [ ('BEFORE', consts.CLUSTER_UPDATE), ] PROFILE_TYPE = [ 'ANY' ] KEYS = ( MIN_IN_SERVICE, MAX_BATCH_SIZE, PAUSE_TIME, ) = ( 'min_in_service', 'max_batch_size', 'pause_time', ) properties_schema = { MIN_IN_SERVICE: schema.Integer( _('Minimum number of nodes in service when performing updates.'), default=1, ), MAX_BATCH_SIZE: schema.Integer( _('Maximum number of nodes that can be updated at the same ' 'time.'), default=-1, ), PAUSE_TIME: schema.Integer( _('Number of seconds between update batches if any.'), default=60, ) } def __init__(self, name, spec, **kwargs): super(BatchPolicy, self).__init__(name, spec, **kwargs) self.min_in_service = self.properties[self.MIN_IN_SERVICE] self.max_batch_size = self.properties[self.MAX_BATCH_SIZE] self.pause_time = self.properties[self.PAUSE_TIME] def pre_op(self, cluster_id, action): # TODO(anyone): compute batches action.data['candidates'] = [] action.store(action.context) return True def post_op(self, cluster_id, action): # TODO(anyone): handle pause_time here return True
def test_validate_version_good(self): spec_schema = { 'type': schema.String('Type name', required=True), 'version': schema.String('Version number', required=True), 'key1': schema.String('first key', default='value1'), 'key2': schema.Integer('second key', required=True, min_version='1.0', max_version='1.2'), } data = { 'key1': 'value1', 'key2': 2, 'type': 'test-type', 'version': '1.0' } spec = schema.Spec(spec_schema, data) self.assertIsNone(spec.validate()) data = {'key2': 2, 'type': 'test-type', 'version': '1.2'} spec = schema.Spec(spec_schema, data) self.assertIsNone(spec.validate())
class TestPolicy(policy_base.Policy): VERSION = 1.0 properties_schema = { 'KEY1': schema.String('key1', default='default1'), 'KEY2': schema.Integer('key2', required=True), } TARGET = [ ('BEFORE', 'CLUSTER_ADD_NODES') ] def __init__(self, name, spec, **kwargs): super(TestPolicy, self).__init__(name, spec, **kwargs) def attach(self, cluster): return True, {} def detach(self, cluster): return True, 'OK' def pre_op(self, cluster_id, action): return def post_op(self, cluster_id, action): return
class DummyProfile(pb.Profile): VERSION = '1.0' CONTEXT = 'context' properties_schema = { CONTEXT: schema.Map('context data'), 'key1': schema.String( 'first key', default='value1', updatable=True, ), 'key2': schema.Integer( 'second key', required=True, updatable=True, ), 'key3': schema.String('third key', ), } OPERATIONS = { 'op1': schema.Operation( 'Operation 1', schema={'param1': schema.StringParam('description of param1', )}) } def __init__(self, name, spec, **kwargs): super(DummyProfile, self).__init__(name, spec, **kwargs)
class DummyProfile(pb.Profile): VERSION = '1.0' CONTEXT = 'context' properties_schema = { CONTEXT: schema.Map( 'context data' ), 'key1': schema.String( 'first key', default='value1', updatable=True, ), 'key2': schema.Integer( 'second key', required=True, updatable=True, ), 'key3': schema.String( 'third key', ), } def __init__(self, name, spec, **kwargs): super(DummyProfile, self).__init__(name, spec, **kwargs)
class AggregateByMetricsAlarm(Alarm): rule_schema = { METRICS: schema.String( _('Metrics to evaluate against.'), required=True, ), OPERATOR: schema.String( _('Comparison operator for evaluation.'), constraints=[ constraints.AllowedValues(OPERATOR_VALUES), ], default=OP_EQUAL, ), THRESHOLD: schema.Number(_('Threshold for evaluation.'), required=True), GRANULARITY: schema.Integer( _('Length of every evaluation period in seconds.'), default=60, ), EVALUATIONS: schema.Integer( _('Number of periods to evaluate over.'), default=1, ), AGG_METHOD: schema.String( _('Statistics to evaluate. Must be one of %s.') % list(STATISTIC_VALUES), constraints=[ constraints.AllowedValues(STATISTIC_VALUES), ], default=SV_AVG, ), } def __init__(self, name, spec, **kwargs): super(AggregateByMetricsAlarm, self).__init__(name, spec, **kwargs) rule_spec = spec.get('rule', {}) self.rule = schema.Spec(self.rule_schema, rule_spec) self.namespace = 'gnocchi_aggregation_by_metrics_threshold'
def test_validate_fail_value_type_incorrect(self): spec_schema = { 'key1': schema.String('first key', default='value1'), 'key2': schema.Integer('second key', required=True), } data = {'key1': 'value1', 'key2': 'abc'} spec = schema.Spec(spec_schema, data, version='1.0') ex = self.assertRaises(exc.ESchema, spec.validate) self.assertIn("The value 'abc' is not a valid Integer", six.text_type(ex.message))
class TestTrigger(trigger_base.Trigger): rule_schema = { 'KEY1': schema.String('key1', default='default1'), 'KEY2': schema.Integer('key2', default=1), } def __init__(self, name, spec, **kwargs): super(TestTrigger, self).__init__(name, spec, **kwargs) rule_spec = spec.get('rule', {}) self.rule = schema.Spec(self.rule_schema, rule_spec) self.namespace = 'test'
def test_policy_validate_fail_required_key_missing(self): spec_schema = { 'key1': schema.String('first key', default='value1'), 'key2': schema.Integer('second key', required=True), } data = {'key1': 'value1'} spec = schema.Spec(spec_schema, data) ex = self.assertRaises(exception.SpecValidationFailed, spec.validate) msg = _('Required spec item "%s" not assigned') % 'key2' self.assertTrue(six.text_type(ex.message).find(msg) != -1)
def test_spec_validate_fail_value_type_incorrect(self): spec_schema = { 'key1': schema.String('first key', default='value1'), 'key2': schema.Integer('second key', required=True), } data = {'key1': 'value1', 'key2': 'abc'} spec = schema.Spec(spec_schema, data) ex = self.assertRaises(exception.SpecValidationFailed, spec.validate) msg = _('The value "%s" cannot be converted into an ' 'integer.') % data['key2'] self.assertTrue(six.text_type(ex.message).find(msg) != -1)
class DummyPolicy(pb.Policy): VERSION = '1.0' properties_schema = { 'key1': schema.String('first key', default='value1'), 'key2': schema.Integer( 'second key', required=True, ), } def __init__(self, name, spec, **kwargs): super(DummyPolicy, self).__init__(name, spec, **kwargs)
def test_spec_validate_good(self): spec_schema = { 'key1': schema.String('first key', default='value1'), 'key2': schema.Integer('second key', required=True), } data = {'key1': 'value1', 'key2': 2} spec = schema.Spec(spec_schema, data) self.assertEqual(None, spec.validate()) data = {'key2': 2} spec = schema.Spec(spec_schema, data) self.assertEqual(None, spec.validate())
def test_to_schema_type(self): sot = schema.Integer('desc') res = sot.to_schema_type(123) self.assertEqual(123, res) res = sot.to_schema_type('123') self.assertEqual(123, res) res = sot.to_schema_type(False) self.assertEqual(0, res) ex = self.assertRaises(exc.ESchema, sot.to_schema_type, '456L') self.assertEqual("The value '456L' is not a valid Integer", six.text_type(ex))
def test_resolve(self): sot = schema.Integer() res = sot.resolve(1) self.assertEqual(1, res) res = sot.resolve(True) self.assertEqual(1, res) res = sot.resolve(False) self.assertEqual(0, res) ex = self.assertRaises(exc.ESchema, sot.resolve, '456L') self.assertEqual("The value '456L' is not a valid Integer", six.text_type(ex))
def test_validate_version_fail_version_over_max(self): spec_schema = { 'type': schema.String('Type name', required=True), 'version': schema.String('Version number', required=True), 'key1': schema.String('first key', default='value1', max_version='2.0'), 'key2': schema.Integer('second key', required=True), } data = { 'key1': 'value1', 'key2': 2, 'type': 'test-type', 'version': '3.0' } spec = schema.Spec(spec_schema, data, version='3.0') ex = self.assertRaises(exc.ESchema, spec.validate) msg = 'key1 (max_version=2.0) is not supported by spec version 3.0.' self.assertIn(msg, str(ex.message))
def test_validate_version_fail_unsupported_version(self): spec_schema = { 'type': schema.String('Type name', required=True), 'version': schema.String('Version number', required=True), 'key1': schema.String('first key', default='value1', min_version='1.1'), 'key2': schema.Integer('second key', required=True), } data = { 'key1': 'value1', 'key2': 2, 'type': 'test-type', 'version': '1.0' } spec = schema.Spec(spec_schema, data, version='1.0') ex = self.assertRaises(exc.ESchema, spec.validate) msg = 'key1 (min_version=1.1) is not supported by spec version 1.0.' self.assertIn(msg, six.text_type(ex.message))
def test_validate(self): sot = schema.Integer() res = sot.validate(1) self.assertIsNone(res) res = sot.validate('1') self.assertIsNone(res) res = sot.validate(True) self.assertIsNone(res) mock_constraints = self.patchobject(sot, 'validate_constraints', return_value=None) res = sot.validate(1) self.assertIsNone(res) mock_constraints.assert_called_once_with(1, schema=sot, context=None) ex = self.assertRaises(exc.ESchema, sot.validate, 'bogus') self.assertEqual("The value 'bogus' is not a valid Integer", str(ex))
class LoadBalancingPolicy(base.Policy): """Policy for load balancing among members of a cluster. This policy is expected to be enforced before or after the membership of a cluster is changed. We need to refresh the load-balancer associated with the cluster (which could be created by the policy) when these actions are performed. """ VERSION = '1.1' VERSIONS = {'1.0': [{'status': consts.SUPPORTED, 'since': '2016.04'}]} PRIORITY = 500 TARGET = [ ('AFTER', consts.CLUSTER_ADD_NODES), ('AFTER', consts.CLUSTER_SCALE_OUT), ('AFTER', consts.CLUSTER_RESIZE), ('AFTER', consts.NODE_RECOVER), ('AFTER', consts.NODE_CREATE), ('BEFORE', consts.CLUSTER_DEL_NODES), ('BEFORE', consts.CLUSTER_SCALE_IN), ('BEFORE', consts.CLUSTER_RESIZE), ('BEFORE', consts.NODE_DELETE), ] PROFILE_TYPE = [ 'os.nova.server-1.0', ] KEYS = (POOL, VIP, HEALTH_MONITOR, LB_STATUS_TIMEOUT) = ('pool', 'vip', 'health_monitor', 'lb_status_timeout') _POOL_KEYS = ( POOL_PROTOCOL, POOL_PROTOCOL_PORT, POOL_SUBNET, POOL_LB_METHOD, POOL_ADMIN_STATE_UP, POOL_SESSION_PERSISTENCE, ) = ( 'protocol', 'protocol_port', 'subnet', 'lb_method', 'admin_state_up', 'session_persistence', ) PROTOCOLS = ( HTTP, HTTPS, TCP, ) = ( 'HTTP', 'HTTPS', 'TCP', ) LB_METHODS = ( ROUND_ROBIN, LEAST_CONNECTIONS, SOURCE_IP, ) = ( 'ROUND_ROBIN', 'LEAST_CONNECTIONS', 'SOURCE_IP', ) HEALTH_MONITOR_TYPES = ( PING, TCP, HTTP, HTTPS, ) = ( 'PING', 'TCP', 'HTTP', 'HTTPS', ) HTTP_METHODS = ( GET, POST, PUT, DELETE, ) = ( 'GET', 'POST', 'PUT', 'DELETE', ) _VIP_KEYS = ( VIP_SUBNET, VIP_ADDRESS, VIP_CONNECTION_LIMIT, VIP_PROTOCOL, VIP_PROTOCOL_PORT, VIP_ADMIN_STATE_UP, ) = ( 'subnet', 'address', 'connection_limit', 'protocol', 'protocol_port', 'admin_state_up', ) HEALTH_MONITOR_KEYS = ( HM_TYPE, HM_DELAY, HM_TIMEOUT, HM_MAX_RETRIES, HM_ADMIN_STATE_UP, HM_HTTP_METHOD, HM_URL_PATH, HM_EXPECTED_CODES, ) = ( 'type', 'delay', 'timeout', 'max_retries', 'admin_state_up', 'http_method', 'url_path', 'expected_codes', ) _SESSION_PERSISTENCE_KEYS = ( PERSISTENCE_TYPE, COOKIE_NAME, ) = ( 'type', 'cookie_name', ) PERSISTENCE_TYPES = ( PERSIST_SOURCE_IP, PERSIST_HTTP_COOKIE, PERSIST_APP_COOKIE, ) = ( 'SOURCE_IP', 'HTTP_COOKIE', 'APP_COOKIE', ) properties_schema = { POOL: schema.Map( _('LB pool properties.'), schema={ POOL_PROTOCOL: schema.String( _('Protocol used for load balancing.'), constraints=[ constraints.AllowedValues(PROTOCOLS), ], default=HTTP, ), POOL_PROTOCOL_PORT: schema.Integer( _('Port on which servers are running on the nodes.'), default=80, ), POOL_SUBNET: schema.String( _('Name or ID of subnet for the port on which nodes can ' 'be connected.'), required=True, ), POOL_LB_METHOD: schema.String( _('Load balancing algorithm.'), constraints=[ constraints.AllowedValues(LB_METHODS), ], default=ROUND_ROBIN, ), POOL_ADMIN_STATE_UP: schema.Boolean( _('Administrative state of the pool.'), default=True, ), POOL_SESSION_PERSISTENCE: schema.Map( _('Session persistence configuration.'), schema={ PERSISTENCE_TYPE: schema.String( _('Type of session persistence implementation.'), constraints=[ constraints.AllowedValues(PERSISTENCE_TYPES), ], ), COOKIE_NAME: schema.String( _('Name of cookie if type set to APP_COOKIE.'), ), }, default={}, ), }, ), VIP: schema.Map( _('VIP address and port of the pool.'), schema={ VIP_SUBNET: schema.String( _('Name or ID of Subnet on which the VIP address will be ' 'allocated.'), required=True, ), VIP_ADDRESS: schema.String( _('IP address of the VIP.'), default=None, ), VIP_CONNECTION_LIMIT: schema.Integer( _('Maximum number of connections per second allowed for ' 'this VIP'), default=-1, ), VIP_PROTOCOL: schema.String( _('Protocol used for VIP.'), constraints=[ constraints.AllowedValues(PROTOCOLS), ], default=HTTP, ), VIP_PROTOCOL_PORT: schema.Integer( _('TCP port to listen on.'), default=80, ), VIP_ADMIN_STATE_UP: schema.Boolean( _('Administrative state of the VIP.'), default=True, ), }, ), HEALTH_MONITOR: schema.Map( _('Health monitor for loadbalancer.'), schema={ HM_TYPE: schema.String( _('The type of probe sent by the loadbalancer to verify ' 'the member state.'), constraints=[ constraints.AllowedValues(HEALTH_MONITOR_TYPES), ], default=PING, ), HM_DELAY: schema.Integer( _('The amount of time in milliseconds between sending ' 'probes to members.'), default=10, ), HM_TIMEOUT: schema.Integer( _('The maximum time in milliseconds that a monitor waits ' 'to connect before it times out.'), default=5, ), HM_MAX_RETRIES: schema.Integer( _('The number of allowed connection failures before ' 'changing the status of the member to INACTIVE.'), default=3, ), HM_ADMIN_STATE_UP: schema.Boolean( _('Administrative state of the health monitor.'), default=True, ), HM_HTTP_METHOD: schema.String( _('The HTTP method that the monitor uses for requests.'), constraints=[ constraints.AllowedValues(HTTP_METHODS), ], ), HM_URL_PATH: schema.String( _('The HTTP path of the request sent by the monitor to ' 'test the health of a member.'), ), HM_EXPECTED_CODES: schema.String( _('Expected HTTP codes for a passing HTTP(S) monitor.'), ), }, ), LB_STATUS_TIMEOUT: schema.Integer( _('Time in second to wait for loadbalancer to become ready ' 'after senlin requests LBaaS V2 service for operations.'), default=300, ) } def __init__(self, name, spec, **kwargs): super(LoadBalancingPolicy, self).__init__(name, spec, **kwargs) self.pool_spec = self.properties.get(self.POOL, {}) self.vip_spec = self.properties.get(self.VIP, {}) self.hm_spec = self.properties.get(self.HEALTH_MONITOR, None) self.lb_status_timeout = self.properties.get(self.LB_STATUS_TIMEOUT) self.lb = None def validate(self, context, validate_props=False): super(LoadBalancingPolicy, self).validate(context, validate_props) if not validate_props: return True nc = self.network(context.user, context.project) # validate pool subnet name_or_id = self.pool_spec.get(self.POOL_SUBNET) try: nc.subnet_get(name_or_id) except exc.InternalError: msg = _( "The specified %(key)s '%(value)s' could not be found.") % { 'key': self.POOL_SUBNET, 'value': name_or_id } raise exc.InvalidSpec(message=msg) # validate VIP subnet name_or_id = self.vip_spec.get(self.VIP_SUBNET) try: nc.subnet_get(name_or_id) except exc.InternalError: msg = _( "The specified %(key)s '%(value)s' could not be found.") % { 'key': self.VIP_SUBNET, 'value': name_or_id } raise exc.InvalidSpec(message=msg) def attach(self, cluster, enabled=True): """Routine to be invoked when policy is to be attached to a cluster. :param cluster: The cluster to which the policy is being attached to. :param enabled: The attached cluster policy is enabled or disabled. :returns: When the operation was successful, returns a tuple (True, message); otherwise, return a tuple (False, error). """ res, data = super(LoadBalancingPolicy, self).attach(cluster) if res is False: return False, data nodes = nm.Node.load_all(oslo_context.get_current(), cluster_id=cluster.id) lb_driver = self.lbaas(cluster.user, cluster.project) lb_driver.lb_status_timeout = self.lb_status_timeout # TODO(Anyone): Check if existing nodes has conflicts regarding the # subnets. Each VM addresses detail has a key named to the network # which can be used for validation. res, data = lb_driver.lb_create(self.vip_spec, self.pool_spec, self.hm_spec) if res is False: return False, data port = self.pool_spec.get(self.POOL_PROTOCOL_PORT) subnet = self.pool_spec.get(self.POOL_SUBNET) for node in nodes: member_id = lb_driver.member_add(node, data['loadbalancer'], data['pool'], port, subnet) if member_id is None: # When failed in adding member, remove all lb resources that # were created and return the failure reason. # TODO(anyone): May need to "roll-back" changes caused by any # successful member_add() calls. lb_driver.lb_delete(**data) return False, 'Failed in adding node into lb pool' node.data.update({'lb_member': member_id}) node.store(oslo_context.get_current()) cluster_data_lb = cluster.data.get('loadbalancers', {}) cluster_data_lb[self.id] = {'vip_address': data.pop('vip_address')} cluster.data['loadbalancers'] = cluster_data_lb policy_data = self._build_policy_data(data) return True, policy_data def detach(self, cluster): """Routine to be called when the policy is detached from a cluster. :param cluster: The cluster from which the policy is to be detached. :returns: When the operation was successful, returns a tuple of (True, data) where the data contains references to the resources created; otherwise returns a tuple of (False, err) where the err contains a error message. """ reason = _('LB resources deletion succeeded.') lb_driver = self.lbaas(cluster.user, cluster.project) lb_driver.lb_status_timeout = self.lb_status_timeout cp = cluster_policy.ClusterPolicy.load(oslo_context.get_current(), cluster.id, self.id) policy_data = self._extract_policy_data(cp.data) if policy_data is None: return True, reason res, reason = lb_driver.lb_delete(**policy_data) if res is False: return False, reason nodes = nm.Node.load_all(oslo_context.get_current(), cluster_id=cluster.id, project_safe=False) for node in nodes: if 'lb_member' in node.data: node.data.pop('lb_member') node.store(oslo_context.get_current()) lb_data = cluster.data.get('loadbalancers', {}) if lb_data and isinstance(lb_data, dict): lb_data.pop(self.id, None) if lb_data: cluster.data['loadbalancers'] = lb_data else: cluster.data.pop('loadbalancers') return True, reason def _get_delete_candidates(self, cluster_id, action): deletion = action.data.get('deletion', None) # No deletion field in action.data which means no scaling # policy or deletion policy is attached. candidates = None if deletion is None: if action.action == consts.NODE_DELETE: candidates = [action.node.id] count = 1 elif action.action == consts.CLUSTER_DEL_NODES: # Get candidates from action.input candidates = action.inputs.get('candidates', []) count = len(candidates) elif action.action == consts.CLUSTER_RESIZE: # Calculate deletion count based on action input db_cluster = co.Cluster.get(action.context, cluster_id) current = no.Node.count_by_cluster(action.context, cluster_id) scaleutils.parse_resize_params(action, db_cluster, current) if 'deletion' not in action.data: return [] else: count = action.data['deletion']['count'] else: # action.action == consts.CLUSTER_SCALE_IN count = 1 else: count = deletion.get('count', 0) candidates = deletion.get('candidates', None) # Still no candidates available, pick count of nodes randomly if candidates is None: if count == 0: return [] nodes = no.Node.get_all_by_cluster(action.context, cluster_id) if count > len(nodes): count = len(nodes) candidates = scaleutils.nodes_by_random(nodes, count) deletion_data = action.data.get('deletion', {}) deletion_data.update({ 'count': len(candidates), 'candidates': candidates }) action.data.update({'deletion': deletion_data}) return candidates def _remove_member(self, candidates, policy, action, driver, handle_err=True): # Load policy data policy_data = self._extract_policy_data(policy.data) lb_id = policy_data['loadbalancer'] pool_id = policy_data['pool'] failed_nodes = [] for node_id in candidates: node = no.Node.get(action.context, node_id=node_id) node_data = node.data or {} member_id = node_data.get('lb_member', None) if member_id is None: LOG.warning('Node %(n)s not found in lb pool %(p)s.', { 'n': node_id, 'p': pool_id }) continue res = driver.member_remove(lb_id, pool_id, member_id) values = {} if res is not True and handle_err is True: failed_nodes.append(node.id) values['status'] = consts.NS_WARNING values['status_reason'] = _( 'Failed in removing node from lb pool.') else: node.data.pop('lb_member', None) values['data'] = node.data no.Node.update(action.context, node_id, values) return failed_nodes def _add_member(self, candidates, policy, action, driver): # Load policy data policy_data = self._extract_policy_data(policy.data) lb_id = policy_data['loadbalancer'] pool_id = policy_data['pool'] port = self.pool_spec.get(self.POOL_PROTOCOL_PORT) subnet = self.pool_spec.get(self.POOL_SUBNET) failed_nodes = [] for node_id in candidates: node = no.Node.get(action.context, node_id=node_id) node_data = node.data or {} member_id = node_data.get('lb_member', None) if member_id: LOG.warning('Node %(n)s already in lb pool %(p)s.', { 'n': node_id, 'p': pool_id }) continue member_id = driver.member_add(node, lb_id, pool_id, port, subnet) values = {} if member_id is None: failed_nodes.append(node.id) values['status'] = consts.NS_WARNING values['status_reason'] = _( 'Failed in adding node into lb pool.') else: node.data.update({'lb_member': member_id}) values['data'] = node.data no.Node.update(action.context, node_id, values) return failed_nodes def _get_post_candidates(self, action): # This method will parse action data passed from action layer candidates = [] if action.action == consts.NODE_CREATE: candidates = [action.node.id] elif action.action == consts.NODE_RECOVER: recovery = action.outputs.get('recovery', None) if recovery is not None and 'action' in recovery: action_name = recovery['action'] if action_name.upper() == consts.RECOVER_RECREATE: candidates = recovery.get('node', []) else: creation = action.data.get('creation', None) candidates = creation.get('nodes', []) if creation else [] return candidates def pre_op(self, cluster_id, action): """Routine to be called before an action has been executed. For this particular policy, we take this chance to update the pool maintained by the load-balancer. :param cluster_id: The ID of the cluster on which a relevant action has been executed. :param action: The action object that triggered this operation. :returns: Nothing. """ candidates = self._get_delete_candidates(cluster_id, action) if len(candidates) == 0: return db_cluster = co.Cluster.get(action.context, cluster_id) lb_driver = self.lbaas(db_cluster.user, db_cluster.project) lb_driver.lb_status_timeout = self.lb_status_timeout cp = cluster_policy.ClusterPolicy.load(action.context, cluster_id, self.id) # Remove nodes that will be deleted from lb pool failed_nodes = self._remove_member(candidates, cp, action, lb_driver) if failed_nodes: error = _('Failed in removing deleted node(s) from lb pool: %s' ) % failed_nodes action.data['status'] = base.CHECK_ERROR action.data['reason'] = error return def post_op(self, cluster_id, action): """Routine to be called after an action has been executed. For this particular policy, we take this chance to update the pool maintained by the load-balancer. :param cluster_id: The ID of the cluster on which a relevant action has been executed. :param action: The action object that triggered this operation. :returns: Nothing. """ # TODO(Yanyanhu): Need special handling for cross-az scenario # which is supported by Neutron lbaas. candidates = self._get_post_candidates(action) if len(candidates) == 0: return db_cluster = co.Cluster.get(action.context, cluster_id) lb_driver = self.lbaas(db_cluster.user, db_cluster.project) lb_driver.lb_status_timeout = self.lb_status_timeout cp = cluster_policy.ClusterPolicy.load(action.context, cluster_id, self.id) if action.action == consts.NODE_RECOVER: self._remove_member(candidates, cp, action, lb_driver, handle_err=False) # Add new nodes to lb pool failed_nodes = self._add_member(candidates, cp, action, lb_driver) if failed_nodes: error = _('Failed in adding nodes into lb pool: %s') % failed_nodes action.data['status'] = base.CHECK_ERROR action.data['reason'] = error return
class ServerProfile(base.KubeBaseProfile): """Profile for an kubernetes master server.""" VERSIONS = {'1.0': [{'status': consts.EXPERIMENTAL, 'since': '2017.10'}]} KEYS = ( CONTEXT, FLAVOR, IMAGE, KEY_NAME, PUBLIC_NETWORK, BLOCK_DEVICE_MAPPING_V2, ) = ( 'context', 'flavor', 'image', 'key_name', 'public_network', 'block_device_mapping_v2', ) INTERNAL_KEYS = ( KUBEADM_TOKEN, KUBE_MASTER_IP, SECURITY_GROUP, PRIVATE_NETWORK, PRIVATE_SUBNET, PRIVATE_ROUTER, KUBE_MASTER_FLOATINGIP, KUBE_MASTER_FLOATINGIP_ID, SCALE_OUT_RECV_ID, SCALE_OUT_URL, ) = ( 'kubeadm_token', 'kube_master_ip', 'security_group', 'private_network', 'private_subnet', 'private_router', 'kube_master_floatingip', 'kube_master_floatingip_id', 'scale_out_recv_id', 'scale_out_url', ) NETWORK_KEYS = ( PORT, FIXED_IP, NETWORK, PORT_SECURITY_GROUPS, FLOATING_NETWORK, FLOATING_IP, ) = ( 'port', 'fixed_ip', 'network', 'security_groups', 'floating_network', 'floating_ip', ) BDM2_KEYS = ( BDM2_UUID, BDM2_SOURCE_TYPE, BDM2_DESTINATION_TYPE, BDM2_DISK_BUS, BDM2_DEVICE_NAME, BDM2_VOLUME_SIZE, BDM2_GUEST_FORMAT, BDM2_BOOT_INDEX, BDM2_DEVICE_TYPE, BDM2_DELETE_ON_TERMINATION, ) = ( 'uuid', 'source_type', 'destination_type', 'disk_bus', 'device_name', 'volume_size', 'guest_format', 'boot_index', 'device_type', 'delete_on_termination', ) properties_schema = { CONTEXT: schema.Map(_('Customized security context for operating servers.'), ), FLAVOR: schema.String( _('ID of flavor used for the server.'), required=True, updatable=True, ), IMAGE: schema.String( # IMAGE is not required, because there could be BDM or BDMv2 # support and the corresponding settings effective _('ID of image to be used for the new server.'), updatable=True, ), KEY_NAME: schema.String(_('Name of Nova keypair to be injected to server.'), ), PUBLIC_NETWORK: schema.String( _('Public network for kubernetes.'), required=True, ), BLOCK_DEVICE_MAPPING_V2: schema.List( _('A list specifying the properties of block devices to be used ' 'for this server.'), schema=schema.Map( _('A map specifying the properties of a block device to be ' 'used by the server.'), schema={ BDM2_UUID: schema.String( _('ID of the source image, snapshot or volume'), ), BDM2_SOURCE_TYPE: schema.String( _("Volume source type, must be one of 'image', " "'snapshot', 'volume' or 'blank'"), required=True, ), BDM2_DESTINATION_TYPE: schema.String( _("Volume destination type, must be 'volume' or " "'local'"), required=True, ), BDM2_DISK_BUS: schema.String(_('Bus of the device.'), ), BDM2_DEVICE_NAME: schema.String( _('Name of the device(e.g. vda, xda, ....).'), ), BDM2_VOLUME_SIZE: schema.Integer( _('Size of the block device in MB(for swap) and ' 'in GB(for other formats)'), required=True, ), BDM2_GUEST_FORMAT: schema.String( _('Specifies the disk file system format(e.g. swap, ' 'ephemeral, ...).'), ), BDM2_BOOT_INDEX: schema.Integer(_('Define the boot order of the device'), ), BDM2_DEVICE_TYPE: schema.String( _('Type of the device(e.g. disk, cdrom, ...).'), ), BDM2_DELETE_ON_TERMINATION: schema.Boolean( _('Whether to delete the volume when the server ' 'stops.'), ), }), ), } def __init__(self, type_name, name, **kwargs): super(ServerProfile, self).__init__(type_name, name, **kwargs) self.server_id = None def do_cluster_create(self, obj): self._generate_kubeadm_token(obj) self._create_security_group(obj) self._create_network(obj) def do_cluster_delete(self, obj): if obj.dependents and 'kube-node' in obj.dependents: msg = ("Cluster %s delete failed, " "Node clusters %s must be deleted first." % (obj.id, obj.dependents['kube-node'])) raise exc.EResourceDeletion(type='kubernetes.master', id=obj.id, message=msg) self._delete_network(obj) self._delete_security_group(obj) def do_create(self, obj): """Create a server for the node object. :param obj: The node object for which a server will be created. """ kwargs = {} for key in self.KEYS: if self.properties[key] is not None: kwargs[key] = self.properties[key] image_ident = self.properties[self.IMAGE] if image_ident is not None: image = self._validate_image(obj, image_ident, 'create') kwargs.pop(self.IMAGE) kwargs['imageRef'] = image.id flavor_ident = self.properties[self.FLAVOR] flavor = self._validate_flavor(obj, flavor_ident, 'create') kwargs.pop(self.FLAVOR) kwargs['flavorRef'] = flavor.id keypair_name = self.properties[self.KEY_NAME] if keypair_name: keypair = self._validate_keypair(obj, keypair_name, 'create') kwargs['key_name'] = keypair.name kwargs['name'] = obj.name metadata = self._build_metadata(obj, {}) kwargs['metadata'] = metadata jj_vars = {} cluster_data = self._get_cluster_data(obj) kwargs['networks'] = [{'uuid': cluster_data[self.PRIVATE_NETWORK]}] # Get user_data parameters from metadata jj_vars['KUBETOKEN'] = cluster_data[self.KUBEADM_TOKEN] jj_vars['MASTER_FLOATINGIP'] = cluster_data[ self.KUBE_MASTER_FLOATINGIP] block_device_mapping_v2 = self.properties[self.BLOCK_DEVICE_MAPPING_V2] if block_device_mapping_v2 is not None: kwargs['block_device_mapping_v2'] = self._resolve_bdm( obj, block_device_mapping_v2, 'create') # user_data = self.properties[self.USER_DATA] user_data = base.loadScript('./scripts/master.sh') if user_data is not None: # Use jinja2 to replace variables defined in user_data try: jj_t = jinja2.Template(user_data) user_data = jj_t.render(**jj_vars) except (jinja2.exceptions.UndefinedError, ValueError) as ex: # TODO(anyone) Handle jinja2 error pass ud = encodeutils.safe_encode(user_data) kwargs['user_data'] = encodeutils.safe_decode(base64.b64encode(ud)) sgid = self._get_security_group(obj) kwargs['security_groups'] = [{'name': sgid}] server = None resource_id = None try: server = self.compute(obj).server_create(**kwargs) self.compute(obj).wait_for_server(server.id) server = self.compute(obj).server_get(server.id) self._update_master_ip(obj, server.addresses[''][0]['addr']) self._associate_floatingip(obj, server) LOG.info("Created master node: %s" % server.id) return server.id except exc.InternalError as ex: if server and server.id: resource_id = server.id raise exc.EResourceCreation(type='server', message=six.text_type(ex), resource_id=resource_id) def do_delete(self, obj, **params): """Delete the physical resource associated with the specified node. :param obj: The node object to operate on. :param kwargs params: Optional keyword arguments for the delete operation. :returns: This operation always return True unless exception is caught. :raises: `EResourceDeletion` if interaction with compute service fails. """ if not obj.physical_id: return True server_id = obj.physical_id ignore_missing = params.get('ignore_missing', True) internal_ports = obj.data.get('internal_ports', []) force = params.get('force', False) try: self._disassociate_floatingip(obj, server_id) driver = self.compute(obj) if force: driver.server_force_delete(server_id, ignore_missing) else: driver.server_delete(server_id, ignore_missing) driver.wait_for_server_delete(server_id) if internal_ports: ex = self._delete_ports(obj, internal_ports) if ex: raise ex return True except exc.InternalError as ex: raise exc.EResourceDeletion(type='server', id=server_id, message=six.text_type(ex))
class ZonePlacementPolicy(base.Policy): """Policy for placing members of a cluster across availability zones.""" VERSION = '1.0' PRIORITY = 300 TARGET = [ ('BEFORE', consts.CLUSTER_SCALE_OUT), ('BEFORE', consts.CLUSTER_SCALE_IN), ('BEFORE', consts.CLUSTER_RESIZE), ] PROFILE_TYPE = [ 'os.nova.server-1.0', ] KEYS = ( ZONES, ) = ( 'zones', ) _AZ_KEYS = ( ZONE_NAME, ZONE_WEIGHT, ) = ( 'name', 'weight', ) properties_schema = { ZONES: schema.List( _('List of availability zones to choose from.'), schema=schema.Map( _('An availability zone as candidate.'), schema={ ZONE_NAME: schema.String( _('Name of an availability zone.'), ), ZONE_WEIGHT: schema.Integer( _('Weight of the availability zone (default is 100).'), default=100, required=False, ) }, ), ), } def __init__(self, name, spec, **kwargs): super(ZonePlacementPolicy, self).__init__(name, spec, **kwargs) self._novaclient = None self.zones = dict((z[self.ZONE_NAME], z[self.ZONE_WEIGHT]) for z in self.properties.get(self.ZONES)) def _nova(self, obj): """Construct nova client based on object. :param obj: Object for which the client is created. It is expected to be None when retrieving an existing client. When creating a client, it contains the user and project to be used. """ if self._novaclient is not None: return self._novaclient params = self._build_conn_params(obj) self._novaclient = driver_base.SenlinDriver().compute(params) return self._novaclient def _create_plan(self, current, zones, count, expand): """Compute a placement plan based on the weights of AZs. :param current: Distribution of existing nodes. :returns: A dict that contains a placement plan. """ # sort candidate zones by distribution and covert it into a list candidates = sorted(zones.items(), key=operator.itemgetter(1), reverse=expand) sum_weight = sum(zones.values()) if expand: total = count + sum(current.values()) else: total = sum(current.values()) - count remain = count plan = dict.fromkeys(zones.keys(), 0) for i in range(len(zones)): zone = candidates[i][0] weight = candidates[i][1] q = total * weight / float(sum_weight) if expand: quota = int(math.ceil(q)) headroom = quota - current[zone] else: quota = int(math.floor(q)) headroom = current[zone] - quota if headroom <= 0: continue if headroom < remain: plan[zone] = headroom remain -= headroom else: plan[zone] = remain if remain > 0 else 0 remain = 0 break if remain > 0: return None # filter out zero values result = {} for z, c in plan.items(): if c > 0: result[z] = c return result def _get_count(self, cluster_id, action): """Get number of nodes to create or delete. :param cluster_id: The ID of the target cluster. :param action: The action object which triggered this policy check. :return: An integer value which can be 1) positive - number of nodes to create; 2) negative - number of nodes to delete; 3) 0 - something wrong happened, and the policy check failed. """ if action.action == consts.CLUSTER_RESIZE: if action.data.get('deletion', None): return -action.data['deletion']['count'] elif action.data.get('creation', None): return action.data['creation']['count'] db_cluster = db_api.cluster_get(action.context, cluster_id) res = scaleutils.parse_resize_params(action, db_cluster) if res[0] == base.CHECK_ERROR: action.data['status'] = base.CHECK_ERROR action.data['reason'] = res[1] LOG.error(res[1]) return 0 if action.data.get('deletion', None): return -action.data['deletion']['count'] else: return action.data['creation']['count'] if action.action == consts.CLUSTER_SCALE_IN: pd = action.data.get('deletion', None) if pd is None: return -action.inputs.get('count', 1) else: return -pd.get('count', 1) # CLUSTER_SCALE_OUT: an action that inflates the cluster pd = action.data.get('creation', None) if pd is None: return action.inputs.get('count', 1) else: return pd.get('count', 1) def pre_op(self, cluster_id, action): """Callback function when cluster membership is about to change. :param cluster_id: ID of the target cluster. :param action: The action that triggers this policy check. """ count = self._get_count(cluster_id, action) if count == 0: return expand = True if count < 0: expand = False count = -count cluster = cluster_mod.Cluster.load(action.context, cluster_id) nc = self._nova(cluster) zones_good = nc.validate_azs(self.zones.keys()) if len(zones_good) == 0: action.data['status'] = base.CHECK_ERROR action.data['reason'] = _('No availability zone found available.') LOG.error(_LE('No availability zone found available.')) return zones = {} for z, w in self.zones.items(): if z in zones_good: zones[z] = w current = cluster.get_zone_distribution(action.context, zones.keys()) result = self._create_plan(current, zones, count, expand) if not result: action.data['status'] = base.CHECK_ERROR action.data['reason'] = _('There is no feasible plan to ' 'handle all nodes.') LOG.error(_LE('There is no feasible plan to handle all nodes.')) return if expand: if 'creation' not in action.data: action.data['creation'] = {} action.data['creation']['count'] = count action.data['creation']['zones'] = result else: if 'deletion' not in action.data: action.data['deletion'] = {} action.data['deletion']['count'] = count action.data['deletion']['zones'] = result
class StackProfile(base.Profile): '''Profile for an OpenStack Heat stack. When this profile is used, the whole cluster is a collection of Heat stacks. ''' KEYS = ( CONTEXT, TEMPLATE, TEMPLATE_URL, PARAMETERS, FILES, TIMEOUT, DISABLE_ROLLBACK, ENVIRONMENT, ) = ( 'context', 'template', 'template_url', 'parameters', 'files', 'timeout', 'disable_rollback', 'environment', ) properties_schema = { CONTEXT: schema.Map( _('A dictionary for specifying the customized context for ' 'stack operations'), default={}, ), TEMPLATE: schema.Map( _('Heat stack template.'), default={}, updatable=True, ), TEMPLATE_URL: schema.String( _('Heat stack template url.'), default='', updatable=True, ), PARAMETERS: schema.Map( _('Parameters to be passed to Heat for stack operations.'), default={}, updatable=True, ), FILES: schema.Map( _('Contents of files referenced by the template, if any.'), default={}, updatable=True, ), TIMEOUT: schema.Integer( _('A integer that specifies the number of minutes that a ' 'stack operation times out.'), updatable=True, ), DISABLE_ROLLBACK: schema.Boolean( _('A boolean specifying whether a stack operation can be ' 'rolled back.'), default=True, updatable=True, ), ENVIRONMENT: schema.Map( _('A map that specifies the environment used for stack ' 'operations.'), default={}, updatable=True, ) } OP_NAMES = ( OP_ABANDON, ) = ( 'abandon', ) OPERATIONS = { OP_ABANDON: schema.Map( _('Abandon a heat stack node.'), ) } def __init__(self, type_name, name, **kwargs): super(StackProfile, self).__init__(type_name, name, **kwargs) self.stack_id = None def validate(self, validate_props=False): '''Validate the schema and the data provided.''' # general validation self.spec_data.validate() self.properties.validate() # validate template template = self.properties[self.TEMPLATE] template_url = self.properties[self.TEMPLATE_URL] if not template and not template_url: msg = _("Both template and template_url are not specified " "for profile '%s'.") % self.name raise exc.InvalidSpec(message=msg) if validate_props: self.do_validate(obj=self) def do_validate(self, obj): """Validate the stack template used by a node. :param obj: Node object to operate. :returns: True if validation succeeds. :raises: `InvalidSpec` exception is raised if template is invalid. """ kwargs = { 'stack_name': utils.random_name(), 'template': self.properties[self.TEMPLATE], 'template_url': self.properties[self.TEMPLATE_URL], 'parameters': self.properties[self.PARAMETERS], 'files': self.properties[self.FILES], 'environment': self.properties[self.ENVIRONMENT], 'preview': True, } try: self.orchestration(obj).stack_create(**kwargs) except exc.InternalError as ex: msg = _('Failed in validating template: %s') % six.text_type(ex) raise exc.InvalidSpec(message=msg) return True def do_create(self, obj): """Create a heat stack using the given node object. :param obj: The node object to operate on. :returns: The UUID of the heat stack created. """ kwargs = { 'stack_name': obj.name + '-' + utils.random_name(8), 'template': self.properties[self.TEMPLATE], 'template_url': self.properties[self.TEMPLATE_URL], 'timeout_mins': self.properties[self.TIMEOUT], 'disable_rollback': self.properties[self.DISABLE_ROLLBACK], 'parameters': self.properties[self.PARAMETERS], 'files': self.properties[self.FILES], 'environment': self.properties[self.ENVIRONMENT], } try: stack = self.orchestration(obj).stack_create(**kwargs) # Timeout = None means we will use the 'default_action_timeout' # It can be overridden by the TIMEOUT profile propertie timeout = None if self.properties[self.TIMEOUT]: timeout = self.properties[self.TIMEOUT] * 60 self.orchestration(obj).wait_for_stack(stack.id, 'CREATE_COMPLETE', timeout=timeout) return stack.id except exc.InternalError as ex: raise exc.EResourceCreation(type='stack', message=ex.message) def do_delete(self, obj, **params): """Delete the physical stack behind the node object. :param obj: The node object to operate on. :param kwargs params: Optional keyword arguments for the delete operation. :returns: This operation always return True unless exception is caught. :raises: `EResourceDeletion` if interaction with heat fails. """ stack_id = obj.physical_id ignore_missing = params.get('ignore_missing', True) try: self.orchestration(obj).stack_delete(stack_id, ignore_missing) self.orchestration(obj).wait_for_stack_delete(stack_id) except exc.InternalError as ex: raise exc.EResourceDeletion(type='stack', id=stack_id, message=six.text_type(ex)) return True def do_update(self, obj, new_profile, **params): """Perform update on object. :param obj: the node object to operate on :param new_profile: the new profile used for updating :param params: other parameters for the update request. :returns: A boolean indicating whether the operation is successful. """ self.stack_id = obj.physical_id if not self.stack_id: return False if not self.validate_for_update(new_profile): return False fields = {} new_template = new_profile.properties[new_profile.TEMPLATE] if new_template != self.properties[self.TEMPLATE]: fields['template'] = new_template new_params = new_profile.properties[new_profile.PARAMETERS] if new_params != self.properties[self.PARAMETERS]: fields['parameters'] = new_params new_timeout = new_profile.properties[new_profile.TIMEOUT] if new_timeout != self.properties[self.TIMEOUT]: fields['timeout_mins'] = new_timeout new_dr = new_profile.properties[new_profile.DISABLE_ROLLBACK] if new_dr != self.properties[self.DISABLE_ROLLBACK]: fields['disable_rollback'] = new_dr new_files = new_profile.properties[new_profile.FILES] if new_files != self.properties[self.FILES]: fields['files'] = new_files new_environment = new_profile.properties[new_profile.ENVIRONMENT] if new_environment != self.properties[self.ENVIRONMENT]: fields['environment'] = new_environment if not fields: return True try: hc = self.orchestration(obj) # Timeout = None means we will use the 'default_action_timeout' # It can be overridden by the TIMEOUT profile propertie timeout = None if self.properties[self.TIMEOUT]: timeout = self.properties[self.TIMEOUT] * 60 hc.stack_update(self.stack_id, **fields) hc.wait_for_stack(self.stack_id, 'UPDATE_COMPLETE', timeout=timeout) except exc.InternalError as ex: raise exc.EResourceUpdate(type='stack', id=self.stack_id, message=ex.message) return True def do_check(self, obj): """Check stack status. :param obj: Node object to operate. :returns: True if check succeeded, or False otherwise. """ stack_id = obj.physical_id if stack_id is None: return False hc = self.orchestration(obj) try: # Timeout = None means we will use the 'default_action_timeout' # It can be overridden by the TIMEOUT profile propertie timeout = None if self.properties[self.TIMEOUT]: timeout = self.properties[self.TIMEOUT] * 60 hc.stack_check(stack_id) hc.wait_for_stack(stack_id, 'CHECK_COMPLETE', timeout=timeout) except exc.InternalError as ex: LOG.error(_LE('Failed in checking stack: %s.'), ex) return False return True def do_get_details(self, obj): if not obj.physical_id: return {} try: stack = self.orchestration(obj).stack_get(obj.physical_id) return stack.to_dict() except exc.InternalError as ex: return { 'Error': { 'code': ex.code, 'message': six.text_type(ex) } } def handle_abandon(self, obj, **options): """Handler for abandoning a heat stack node.""" pass
class BatchPolicy(base.Policy): """Policy for batching the operations on a cluster's nodes.""" VERSION = '1.0' VERSIONS = {'1.0': [{'status': consts.EXPERIMENTAL, 'since': '2017.02'}]} PRIORITY = 200 TARGET = [ ('BEFORE', consts.CLUSTER_UPDATE), ] PROFILE_TYPE = ['ANY'] KEYS = ( MIN_IN_SERVICE, MAX_BATCH_SIZE, PAUSE_TIME, ) = ( 'min_in_service', 'max_batch_size', 'pause_time', ) properties_schema = { MIN_IN_SERVICE: schema.Integer( _('Minimum number of nodes in service when performing updates.'), default=1, ), MAX_BATCH_SIZE: schema.Integer( _('Maximum number of nodes that will be updated in parallel.'), default=-1, ), PAUSE_TIME: schema.Integer( _('Interval in seconds between update batches if any.'), default=60, ) } def __init__(self, name, spec, **kwargs): super(BatchPolicy, self).__init__(name, spec, **kwargs) self.min_in_service = self.properties[self.MIN_IN_SERVICE] self.max_batch_size = self.properties[self.MAX_BATCH_SIZE] self.pause_time = self.properties[self.PAUSE_TIME] def _get_batch_size(self, total): """Get batch size for update operation. :param total: Total number of nodes. :returns: Size of each batch and number of batches. """ # if the number of nodes less than min_in_service, # we divided it to 2 batches diff = int(math.ceil(float(total) / 2)) if total > self.min_in_service: diff = total - self.min_in_service # max_batch_size is -1 if not specified if self.max_batch_size == -1 or diff < self.max_batch_size: batch_size = diff else: batch_size = self.max_batch_size batch_num = int(math.ceil(float(total) / float(batch_size))) return batch_size, batch_num def _pick_nodes(self, nodes, batch_size, batch_num): """Select nodes based on size and number of batches. :param nodes: list of node objects. :param batch_size: the number of nodes of each batch. :param batch_num: the number of batches. :returns: a list of sets containing the nodes' IDs we selected based on the input params. """ candidates, good = su.filter_error_nodes(nodes) result = [] # NOTE: we leave the nodes known to be good (ACTIVE) at the end of the # list so that we have a better chance to ensure 'min_in_service' # constraint for n in good: candidates.append(n.id) for start in range(0, len(candidates), batch_size): end = start + batch_size result.append(set(candidates[start:end])) return result def _create_plan(self, action): nodes = action.entity.nodes plan = {'pause_time': self.pause_time} if len(nodes) == 0: plan['plan'] = [] return True, plan batch_size, batch_num = self._get_batch_size(len(nodes)) plan['plan'] = self._pick_nodes(nodes, batch_size, batch_num) return True, plan def pre_op(self, cluster_id, action): pd = { 'status': base.CHECK_OK, 'reason': _('Batching request validated.'), } # for updating result, value = self._create_plan(action) if result is False: pd = { 'status': base.CHECK_ERROR, 'reason': value, } else: pd['update'] = value action.data.update(pd) action.store(action.context) return
class HealthPolicy(base.Policy): """Policy for health management of a cluster.""" VERSION = '1.1' VERSIONS = { '1.0': [ { 'status': consts.EXPERIMENTAL, 'since': '2017.02' }, { 'status': consts.SUPPORTED, 'since': '2018.06' }, ], '1.1': [{ 'status': consts.SUPPORTED, 'since': '2018.09' }], } PRIORITY = 600 TARGET = [ ('BEFORE', consts.CLUSTER_RECOVER), ('BEFORE', consts.CLUSTER_DEL_NODES), ('BEFORE', consts.CLUSTER_SCALE_IN), ('BEFORE', consts.CLUSTER_RESIZE), ('BEFORE', consts.NODE_DELETE), ('AFTER', consts.CLUSTER_DEL_NODES), ('AFTER', consts.CLUSTER_SCALE_IN), ('AFTER', consts.CLUSTER_RESIZE), ('AFTER', consts.NODE_DELETE), ] # Should be ANY if profile provides health check support? PROFILE_TYPE = [ 'os.nova.server', 'os.heat.stack', ] KEYS = (DETECTION, RECOVERY) = ('detection', 'recovery') _DETECTION_KEYS = (DETECTION_MODES, DETECTION_TYPE, DETECTION_OPTIONS, DETECTION_INTERVAL, NODE_UPDATE_TIMEOUT, RECOVERY_CONDITIONAL) = ('detection_modes', 'type', 'options', 'interval', 'node_update_timeout', 'recovery_conditional') _DETECTION_OPTIONS = ( POLL_URL, POLL_URL_SSL_VERIFY, POLL_URL_CONN_ERROR_AS_UNHEALTHY, POLL_URL_HEALTHY_RESPONSE, POLL_URL_RETRY_LIMIT, POLL_URL_RETRY_INTERVAL, ) = ('poll_url', 'poll_url_ssl_verify', 'poll_url_conn_error_as_unhealthy', 'poll_url_healthy_response', 'poll_url_retry_limit', 'poll_url_retry_interval') _RECOVERY_KEYS = ( RECOVERY_ACTIONS, RECOVERY_FENCING, RECOVERY_DELETE_TIMEOUT, RECOVERY_FORCE_RECREATE, ) = ( 'actions', 'fencing', 'node_delete_timeout', 'node_force_recreate', ) FENCING_OPTION_VALUES = ( COMPUTE, # STORAGE, NETWORK, ) = ( 'COMPUTE', # 'STORAGE', 'NETWORK' ) ACTION_KEYS = ( ACTION_NAME, ACTION_PARAMS, ) = ( 'name', 'params', ) properties_schema = { DETECTION: schema.Map( _('Policy aspect for node failure detection.'), schema={ DETECTION_INTERVAL: schema.Integer( _("Number of seconds between pollings. Only " "required when type is 'NODE_STATUS_POLLING' or " "'NODE_STATUS_POLL_URL'."), default=60, ), NODE_UPDATE_TIMEOUT: schema.Integer( _("Number of seconds since last node update to " "wait before checking node health."), default=300, ), RECOVERY_CONDITIONAL: schema.String( _("The conditional that determines when recovery should be" " performed in case multiple detection modes are " "specified. 'ALL_FAILED' means that all " "detection modes have to return failed health checks " "before a node is recovered. 'ANY_FAILED'" " means that a failed health check with a single " "detection mode triggers a node recovery."), constraints=[ constraints.AllowedValues(consts.RECOVERY_CONDITIONAL), ], default=consts.ANY_FAILED, required=False, ), DETECTION_MODES: schema.List( _('List of node failure detection modes.'), schema=schema.Map( _('Node failure detection mode to try'), schema={ DETECTION_TYPE: schema.String( _('Type of node failure detection.'), constraints=[ constraints.AllowedValues( consts.DETECTION_TYPES), ], required=True, ), DETECTION_OPTIONS: schema.Map(schema={ POLL_URL: schema.String( _("URL to poll for node status. See " "documentation for valid expansion " "parameters. Only required " "when type is " "'NODE_STATUS_POLL_URL'."), default='', ), POLL_URL_SSL_VERIFY: schema.Boolean( _("Whether to verify SSL when calling " "URL to poll for node status. Only " "required when type is " "'NODE_STATUS_POLL_URL'."), default=True, ), POLL_URL_CONN_ERROR_AS_UNHEALTHY: schema.Boolean( _("Whether to treat URL connection " "errors as an indication of an " "unhealthy node. Only required " "when type is " "'NODE_STATUS_POLL_URL'."), default=True, ), POLL_URL_HEALTHY_RESPONSE: schema.String( _("String pattern in the poll URL " "response body that indicates a " "healthy node. Required when type " "is 'NODE_STATUS_POLL_URL'."), default='', ), POLL_URL_RETRY_LIMIT: schema.Integer( _("Number of times to retry URL " "polling when its return body is " "missing POLL_URL_HEALTHY_RESPONSE " "string before a node is considered " "down. Required when type is " "'NODE_STATUS_POLL_URL'."), default=3, ), POLL_URL_RETRY_INTERVAL: schema.Integer( _("Number of seconds between URL " "polling retries before a node is " "considered down. Required when " "type is 'NODE_STATUS_POLL_URL'."), default=3, ), }, default={}), })) }, required=True, ), RECOVERY: schema.Map( _('Policy aspect for node failure recovery.'), schema={ RECOVERY_ACTIONS: schema.List(_('List of actions to try for node recovery.'), schema=schema.Map( _('Action to try for node recovery.'), schema={ ACTION_NAME: schema.String( _("Name of action to execute."), constraints=[ constraints.AllowedValues( consts.RECOVERY_ACTIONS), ], required=True), ACTION_PARAMS: schema.Map(_("Parameters for the action")), })), RECOVERY_FENCING: schema.List( _('List of services to be fenced.'), schema=schema.String( _('Service to be fenced.'), constraints=[ constraints.AllowedValues(FENCING_OPTION_VALUES), ], required=True, ), ), RECOVERY_DELETE_TIMEOUT: schema.Integer( _("Number of seconds to wait for node deletion to " "finish and start node creation for recreate " "recovery option. Required when type is " "'NODE_STATUS_POLL_URL and recovery action " "is RECREATE'."), default=20, ), RECOVERY_FORCE_RECREATE: schema.Boolean( _("Whether to create node even if node deletion " "failed. Required when type is " "'NODE_STATUS_POLL_URL' and action recovery " "action is RECREATE."), default=False, ), }, required=True, ), } def __init__(self, name, spec, **kwargs): super(HealthPolicy, self).__init__(name, spec, **kwargs) self.interval = self.properties[self.DETECTION].get( self.DETECTION_INTERVAL, 60) self.node_update_timeout = self.properties[self.DETECTION].get( self.NODE_UPDATE_TIMEOUT, 300) self.recovery_conditional = self.properties[self.DETECTION].get( self.RECOVERY_CONDITIONAL, consts.ANY_FAILED) DetectionMode = namedtuple('DetectionMode', [self.DETECTION_TYPE] + list(self._DETECTION_OPTIONS)) self.detection_modes = [] raw_modes = self.properties[self.DETECTION][self.DETECTION_MODES] for mode in raw_modes: options = mode[self.DETECTION_OPTIONS] self.detection_modes.append( DetectionMode( mode[self.DETECTION_TYPE], options.get(self.POLL_URL, ''), options.get(self.POLL_URL_SSL_VERIFY, True), options.get(self.POLL_URL_CONN_ERROR_AS_UNHEALTHY, True), options.get(self.POLL_URL_HEALTHY_RESPONSE, ''), options.get(self.POLL_URL_RETRY_LIMIT, ''), options.get(self.POLL_URL_RETRY_INTERVAL, ''))) recover_settings = self.properties[self.RECOVERY] self.recover_actions = recover_settings[self.RECOVERY_ACTIONS] self.fencing_types = recover_settings[self.RECOVERY_FENCING] self.node_delete_timeout = recover_settings.get( self.RECOVERY_DELETE_TIMEOUT, None) self.node_force_recreate = recover_settings.get( self.RECOVERY_FORCE_RECREATE, False) def validate(self, context, validate_props=False): super(HealthPolicy, self).validate(context, validate_props=validate_props) if len(self.recover_actions) > 1: message = _( "Only one '%s' is supported for now.") % self.RECOVERY_ACTIONS raise exc.ESchema(message=message) if self.interval < cfg.CONF.health_check_interval_min: message = _("Specified interval of %(interval)d seconds has to be " "larger than health_check_interval_min of " "%(min_interval)d seconds set in configuration.") % { "interval": self.interval, "min_interval": cfg.CONF.health_check_interval_min } raise exc.InvalidSpec(message=message) # check valid detection types polling_types = [ consts.NODE_STATUS_POLLING, consts.NODE_STATUS_POLL_URL ] has_valid_polling_types = all(d.type in polling_types for d in self.detection_modes) has_valid_lifecycle_type = (len(self.detection_modes) == 1 and self.detection_modes[0].type == consts.LIFECYCLE_EVENTS) if not has_valid_polling_types and not has_valid_lifecycle_type: message = ("Invalid detection modes in health policy: %s" % ', '.join([d.type for d in self.detection_modes])) raise exc.InvalidSpec(message=message) if len(self.detection_modes) != len(set(self.detection_modes)): message = ("Duplicate detection modes are not allowed in " "health policy: %s" % ', '.join([d.type for d in self.detection_modes])) raise exc.InvalidSpec(message=message) # TODO(Qiming): Add detection of duplicated action names when # support to list of actions is implemented. def attach(self, cluster, enabled=True): """"Hook for policy attach. Register the cluster for health management. :param cluster: The cluster to which the policy is being attached to. :param enabled: The attached cluster policy is enabled or disabled. :return: A tuple comprising execution result and policy data. """ p_type = cluster.rt['profile'].type_name action_names = [a['name'] for a in self.recover_actions] if p_type != 'os.nova.server': if consts.RECOVER_REBUILD in action_names: err_msg = _("Recovery action REBUILD is only applicable to " "os.nova.server clusters.") return False, err_msg if consts.RECOVER_REBOOT in action_names: err_msg = _("Recovery action REBOOT is only applicable to " "os.nova.server clusters.") return False, err_msg kwargs = { 'interval': self.interval, 'node_update_timeout': self.node_update_timeout, 'params': { 'recover_action': self.recover_actions, 'node_delete_timeout': self.node_delete_timeout, 'node_force_recreate': self.node_force_recreate, 'recovery_conditional': self.recovery_conditional, }, 'enabled': enabled } converted_detection_modes = [d._asdict() for d in self.detection_modes] detection_mode = {'detection_modes': converted_detection_modes} kwargs['params'].update(detection_mode) health_manager.register(cluster.id, engine_id=None, **kwargs) data = { 'interval': self.interval, 'node_update_timeout': self.node_update_timeout, 'recovery_conditional': self.recovery_conditional, 'node_delete_timeout': self.node_delete_timeout, 'node_force_recreate': self.node_force_recreate, } data.update(detection_mode) return True, self._build_policy_data(data) def detach(self, cluster): """Hook for policy detach. Unregister the cluster for health management. :param cluster: The target cluster. :returns: A tuple comprising the execution result and reason. """ ret = health_manager.unregister(cluster.id) if not ret: LOG.warning( 'Unregistering health manager for cluster %s ' 'timed out.', cluster.id) return True, '' def pre_op(self, cluster_id, action, **args): """Hook before action execution. One of the task for this routine is to disable health policy if the action is a request that will shrink the cluster. The reason is that the policy may attempt to recover nodes that are to be deleted. :param cluster_id: The ID of the target cluster. :param action: The action to be examined. :param kwargs args: Other keyword arguments to be checked. :returns: Boolean indicating whether the checking passed. """ if action.action in (consts.CLUSTER_SCALE_IN, consts.CLUSTER_DEL_NODES, consts.NODE_DELETE): health_manager.disable(cluster_id) return True if action.action == consts.CLUSTER_RESIZE: deletion = action.data.get('deletion', None) if deletion: health_manager.disable(cluster_id) return True cluster = action.entity current = len(cluster.nodes) res, reason = scaleutils.parse_resize_params( action, cluster, current) if res == base.CHECK_ERROR: action.data['status'] = base.CHECK_ERROR action.data['reason'] = reason return False if action.data.get('deletion', None): health_manager.disable(cluster_id) return True pd = { 'recover_action': self.recover_actions, 'fencing': self.fencing_types, } action.data.update({'health': pd}) action.store(action.context) return True def post_op(self, cluster_id, action, **args): """Hook before action execution. One of the task for this routine is to re-enable health policy if the action is a request that will shrink the cluster thus the policy has been temporarily disabled. :param cluster_id: The ID of the target cluster. :param action: The action to be examined. :param kwargs args: Other keyword arguments to be checked. :returns: Boolean indicating whether the checking passed. """ if action.action in (consts.CLUSTER_SCALE_IN, consts.CLUSTER_DEL_NODES, consts.NODE_DELETE): health_manager.enable(cluster_id) return True if action.action == consts.CLUSTER_RESIZE: deletion = action.data.get('deletion', None) if deletion: health_manager.enable(cluster_id) return True cluster = action.entity current = len(cluster.nodes) res, reason = scaleutils.parse_resize_params( action, cluster, current) if res == base.CHECK_ERROR: action.data['status'] = base.CHECK_ERROR action.data['reason'] = reason return False if action.data.get('deletion', None): health_manager.enable(cluster_id) return True return True
class BatchPolicy(base.Policy): """Policy for batching the operations on a cluster's nodes.""" VERSION = '1.0' VERSIONS = { '1.0': [ {'status': consts.EXPERIMENTAL, 'since': '2017.02'} ] } PRIORITY = 200 TARGET = [ ('BEFORE', consts.CLUSTER_UPDATE), ('BEFORE', consts.CLUSTER_DELETE), ] PROFILE_TYPE = [ 'ANY' ] KEYS = ( MIN_IN_SERVICE, MAX_BATCH_SIZE, PAUSE_TIME, ) = ( 'min_in_service', 'max_batch_size', 'pause_time', ) properties_schema = { MIN_IN_SERVICE: schema.Integer( _('Minimum number of nodes in service when performing updates.'), default=1, ), MAX_BATCH_SIZE: schema.Integer( _('Maximum number of nodes that will be updated in parallel.'), default=-1, ), PAUSE_TIME: schema.Integer( _('Interval in seconds between update batches if any.'), default=60, ) } def __init__(self, name, spec, **kwargs): super(BatchPolicy, self).__init__(name, spec, **kwargs) self.min_in_service = self.properties[self.MIN_IN_SERVICE] self.max_batch_size = self.properties[self.MAX_BATCH_SIZE] self.pause_time = self.properties[self.PAUSE_TIME] def _cal_batch_size(self, total, action_name): batch_num = 0 batch_size = 0 diff = 0 # if the action is CLUSTER_DELETE or number of nodes less than # min_in_service, we divided it to 2 batches diff = int(math.ceil(float(total) / 2)) if (action_name == consts.CLUSTER_UPDATE and total > self.min_in_service): diff = total - self.min_in_service # max_batch_size is -1 if not specified if self.max_batch_size == -1 or diff < self.max_batch_size: batch_size = diff else: batch_size = self.max_batch_size batch_num = int(math.ceil(float(total) / float(batch_size))) return batch_size, batch_num def _pick_nodes(self, batch_size, batch_num, candidates, good): """Select nodes based on size and number of batches. :param batch_size: the number of nodes of each batch. :param batch_num: the number of batches. :param candidates: a list of IDs for 'ERROR' nodes. :param good: a list of active node objects. :returns: a list of sets containing the nodes' IDs we selected based on the input params. """ nodes_list = [] # NOTE: we leave the nodes known to be good (ACTIVE) # at the end of the list so that we have a better # chance to ensure 'min_in_service' constraint for node in good: candidates.append(node.id) for start in range(0, len(candidates), batch_size): end = start + batch_size nodes_list.append(set(candidates[start:end])) return nodes_list def _create_plan(self, cluster, action): current = no.Node.count_by_cluster(action.context, cluster.id) action_name = action.action plan_list = [{}] plan = { 'pause_time': self.pause_time, } if current == 0: if action_name == consts.CLUSTER_UPDATE: plan['plan'] = plan_list return True, plan else: plan['batch_size'] = 0 return True, plan batch_size, batch_num = self._cal_batch_size(current, action_name) if action_name == consts.CLUSTER_DELETE: plan['batch_size'] = batch_size return True, plan nodes_list = cluster.nodes bad_list, good_list = su.filter_error_nodes(nodes_list) plan_list = self._pick_nodes(batch_size, batch_num, bad_list, good_list) plan['plan'] = plan_list return True, plan def pre_op(self, cluster_id, action): cluster = cm.Cluster.load(action.context, cluster_id) pd = { 'status': base.CHECK_OK, 'reason': _('Batching request validated.'), } # for updating and deleting result, value = self._create_plan(cluster, action) if result is False: pd = { 'status': base.CHECK_ERROR, 'reason': value, } else: if action.action == consts.CLUSTER_UPDATE: pd['update'] = value else: pd['delete'] = value action.data.update(pd) action.store(action.context) return
class ServerProfile(base.Profile): """Profile for an OpenStack Nova server.""" KEYS = ( CONTEXT, ADMIN_PASS, AUTO_DISK_CONFIG, AVAILABILITY_ZONE, BLOCK_DEVICE_MAPPING_V2, CONFIG_DRIVE, FLAVOR, IMAGE, KEY_NAME, METADATA, NAME, NETWORKS, PERSONALITY, SECURITY_GROUPS, USER_DATA, SCHEDULER_HINTS, ) = ( 'context', 'admin_pass', 'auto_disk_config', 'availability_zone', 'block_device_mapping_v2', 'config_drive', 'flavor', 'image', 'key_name', 'metadata', 'name', 'networks', 'personality', 'security_groups', 'user_data', 'scheduler_hints', ) BDM2_KEYS = ( BDM2_UUID, BDM2_SOURCE_TYPE, BDM2_DESTINATION_TYPE, BDM2_DISK_BUS, BDM2_DEVICE_NAME, BDM2_VOLUME_SIZE, BDM2_GUEST_FORMAT, BDM2_BOOT_INDEX, BDM2_DEVICE_TYPE, BDM2_DELETE_ON_TERMINATION, ) = ( 'uuid', 'source_type', 'destination_type', 'disk_bus', 'device_name', 'volume_size', 'guest_format', 'boot_index', 'device_type', 'delete_on_termination', ) NETWORK_KEYS = ( PORT, FIXED_IP, NETWORK, ) = ( 'port', 'fixed_ip', 'network', ) PERSONALITY_KEYS = ( PATH, CONTENTS, ) = ( 'path', 'contents', ) SCHEDULER_HINTS_KEYS = (GROUP, ) = ('group', ) properties_schema = { CONTEXT: schema.Map(_('Customized security context for operating servers.'), ), ADMIN_PASS: schema.String(_('Password for the administrator account.'), ), AUTO_DISK_CONFIG: schema.Boolean( _('Whether the disk partition is done automatically.'), default=True, ), AVAILABILITY_ZONE: schema.String( _('Name of availability zone for running the server.'), ), BLOCK_DEVICE_MAPPING_V2: schema.List( _('A list specifying the properties of block devices to be used ' 'for this server.'), schema=schema.Map( _('A map specifying the properties of a block device to be ' 'used by the server.'), schema={ BDM2_UUID: schema.String( _('ID of the source image, snapshot or volume'), ), BDM2_SOURCE_TYPE: schema.String( _('Volume source type, should be image, snapshot, ' 'volume or blank'), required=True, ), BDM2_DESTINATION_TYPE: schema.String( _('Volume destination type, should be volume or ' 'local'), required=True, ), BDM2_DISK_BUS: schema.String(_('Bus of the device.'), ), BDM2_DEVICE_NAME: schema.String( _('Name of the device(e.g. vda, xda, ....).'), ), BDM2_VOLUME_SIZE: schema.Integer( _('Size of the block device in MB(for swap) and ' 'in GB(for other formats)'), required=True, ), BDM2_GUEST_FORMAT: schema.String( _('Specifies the disk file system format(e.g. swap, ' 'ephemeral, ...).'), ), BDM2_BOOT_INDEX: schema.Integer(_('Define the boot order of the device'), ), BDM2_DEVICE_TYPE: schema.String( _('Type of the device(e.g. disk, cdrom, ...).'), ), BDM2_DELETE_ON_TERMINATION: schema.Boolean( _('Whether to delete the volume when the server ' 'stops.'), ), }), ), CONFIG_DRIVE: schema.Boolean( _('Whether config drive should be enabled for the server.'), ), FLAVOR: schema.String( _('ID of flavor used for the server.'), required=True, updatable=True, ), IMAGE: schema.String( # IMAGE is not required, because there could be BDM or BDMv2 # support and the corresponding settings effective _('ID of image to be used for the new server.'), updatable=True, ), KEY_NAME: schema.String(_('Name of Nova keypair to be injected to server.'), ), METADATA: schema.Map( _('A collection of key/value pairs to be associated with the ' 'server created. Both key and value should be <=255 chars.'), updatable=True, ), NAME: schema.String( _('Name of the server. When omitted, the node name will be used.'), updatable=True, ), NETWORKS: schema.List( _('List of networks for the server.'), schema=schema.Map( _('A map specifying the properties of a network for uses.'), schema={ NETWORK: schema.String( _('Name or ID of network to create a port on.'), ), PORT: schema.String(_('Port ID to be used by the network.'), ), FIXED_IP: schema.String(_('Fixed IP to be used by the network.'), ), }, ), updatable=True, ), PERSONALITY: schema.List( _('List of files to be injected into the server, where each.'), schema=schema.Map( _('A map specifying the path & contents for an injected ' 'file.'), schema={ PATH: schema.String( _('In-instance path for the file to be injected.'), required=True, ), CONTENTS: schema.String( _('Contents of the file to be injected.'), required=True, ), }, ), ), SCHEDULER_HINTS: schema.Map( _('A collection of key/value pairs to be associated with the ' 'Scheduler hints. Both key and value should be <=255 chars.'), ), SECURITY_GROUPS: schema.List( _('List of security groups.'), schema=schema.String( _('Name of a security group'), required=True, ), ), USER_DATA: schema.String(_('User data to be exposed by the metadata server.'), ), } OP_NAMES = ( OP_REBOOT, OP_CHANGE_PASSWORD, ) = ( 'reboot', 'change_password', ) REBOOT_TYPE = 'type' REBOOT_TYPES = (REBOOT_SOFT, REBOOT_HARD) = ('SOFT', 'HARD') ADMIN_PASSWORD = '******' OPERATIONS = { OP_REBOOT: schema.Operation( _("Reboot the nova server."), schema={ REBOOT_TYPE: schema.StringParam( _("Type of reboot which can be 'SOFT' or 'HARD'."), default=REBOOT_SOFT, constraints=[ constraints.AllowedValues(REBOOT_TYPES), ]) }), OP_CHANGE_PASSWORD: schema.Operation(_("Change the administrator password."), schema={ ADMIN_PASSWORD: schema.StringParam( _("New password for the administrator.")) }), } def __init__(self, type_name, name, **kwargs): super(ServerProfile, self).__init__(type_name, name, **kwargs) self.server_id = None def _validate_az(self, obj, az_name, reason=None): try: res = self.compute(obj).validate_azs([az_name]) except exc.InternalError as ex: if reason == 'create': raise exc.EResourceCreation(type='server', message=six.text_type(ex)) else: raise if not res: msg = _("The specified %(key)s '%(value)s' could not be found") % { 'key': self.AVAILABILITY_ZONE, 'value': az_name } if reason == 'create': raise exc.EResourceCreation(type='server', message=msg) else: raise exc.InvalidSpec(message=msg) return az_name def _validate_flavor(self, obj, name_or_id, reason=None): flavor = None msg = '' try: flavor = self.compute(obj).flavor_find(name_or_id, False) except exc.InternalError as ex: msg = six.text_type(ex) if reason is None: # reaons is 'validate' if ex.code == 404: msg = _( "The specified %(k)s '%(v)s' could not be found.") % { 'k': self.FLAVOR, 'v': name_or_id } raise exc.InvalidSpec(message=msg) else: raise if flavor is not None: if not flavor.is_disabled: return flavor msg = _("The specified %(k)s '%(v)s' is disabled") % { 'k': self.FLAVOR, 'v': name_or_id } if reason == 'create': raise exc.EResourceCreation(type='server', message=msg) elif reason == 'update': raise exc.EResourceUpdate(type='server', id=obj.physical_id, message=msg) else: raise exc.InvalidSpec(message=msg) def _validate_image(self, obj, name_or_id, reason=None): try: return self.compute(obj).image_find(name_or_id, False) except exc.InternalError as ex: if reason == 'create': raise exc.EResourceCreation(type='server', message=six.text_type(ex)) elif reason == 'update': raise exc.EResourceUpdate(type='server', id=obj.physical_id, message=six.text_type(ex)) elif ex.code == 404: msg = _("The specified %(k)s '%(v)s' could not be found.") % { 'k': self.IMAGE, 'v': name_or_id } raise exc.InvalidSpec(message=msg) else: raise def _validate_keypair(self, obj, name_or_id, reason=None): try: return self.compute(obj).keypair_find(name_or_id, False) except exc.InternalError as ex: if reason == 'create': raise exc.EResourceCreation(type='server', message=six.text_type(ex)) elif reason == 'update': raise exc.EResourceUpdate(type='server', id=obj.physical_id, message=six.text_type(ex)) elif ex.code == 404: msg = _("The specified %(k)s '%(v)s' could not be found.") % { 'k': self.KEY_NAME, 'v': name_or_id } raise exc.InvalidSpec(message=msg) else: raise def do_validate(self, obj): """Validate if the spec has provided valid info for server creation. :param obj: The node object. """ # validate availability_zone az_name = self.properties[self.AVAILABILITY_ZONE] if az_name is not None: self._validate_az(obj, az_name) # validate flavor flavor = self.properties[self.FLAVOR] self._validate_flavor(obj, flavor) # validate image image = self.properties[self.IMAGE] if image is not None: self._validate_image(obj, image) # validate key_name keypair = self.properties[self.KEY_NAME] if keypair is not None: self._validate_keypair(obj, keypair) # validate networks networks = self.properties[self.NETWORKS] for net in networks: self._validate_network(obj, net) return True def _resolve_bdm(self, bdm): for bd in bdm: for key in self.BDM2_KEYS: if bd[key] is None: del bd[key] return bdm def _validate_network(self, obj, network, reason=None): result = {} error = None # check network net_ident = network.get(self.NETWORK) if net_ident: try: net = self.network(obj).network_get(net_ident) if reason == 'update': result['net_id'] = net.id else: result['uuid'] = net.id except exc.InternalError as ex: error = six.text_type(ex) # check port port_ident = network.get(self.PORT) if not error and port_ident: try: port = self.network(obj).port_find(port_ident) if port.status != 'DOWN': error = _( "The status of the port %(port)s must be DOWN") % { 'port': port_ident } if reason == 'update': result['port_id'] = port.id else: result['port'] = port.id except exc.InternalError as ex: error = six.text_type(ex) elif port_ident is None and net_ident is None: error = _("'%(port)s' is required if '%(net)s' is omitted") % { 'port': self.PORT, 'net': self.NETWORK } fixed_ip = network.get(self.FIXED_IP) if not error and fixed_ip: if port_ident is not None: error = _("The '%(port)s' property and the '%(fixed_ip)s' " "property cannot be specified at the same time") % { 'port': self.PORT, 'fixed_ip': self.FIXED_IP } else: if reason == 'update': result['fixed_ips'] = [{'ip_address': fixed_ip}] else: result['fixed_ip'] = fixed_ip if error: if reason == 'create': raise exc.EResourceCreation(type='server', message=error) elif reason == 'update': raise exc.EResourceUpdate(type='server', id=obj.physical_id, message=error) else: raise exc.InvalidSpec(message=error) return result def _build_metadata(self, obj, usermeta): """Build custom metadata for server. :param obj: The node object to operate on. :return: A dictionary containing the new metadata. """ metadata = usermeta or {} metadata['cluster_node_id'] = obj.id if obj.cluster_id: metadata['cluster_id'] = obj.cluster_id metadata['cluster_node_index'] = six.text_type(obj.index) return metadata def do_create(self, obj): """Create a server for the node object. :param obj: The node object for which a server will be created. """ kwargs = {} for key in self.KEYS: # context is treated as connection parameters if key == self.CONTEXT: continue if self.properties[key] is not None: kwargs[key] = self.properties[key] admin_pass = self.properties[self.ADMIN_PASS] if admin_pass: kwargs.pop(self.ADMIN_PASS) kwargs['adminPass'] = admin_pass auto_disk_config = self.properties[self.AUTO_DISK_CONFIG] kwargs.pop(self.AUTO_DISK_CONFIG) kwargs['OS-DCF:diskConfig'] = 'AUTO' if auto_disk_config else 'MANUAL' image_ident = self.properties[self.IMAGE] if image_ident is not None: image = self._validate_image(obj, image_ident, 'create') kwargs.pop(self.IMAGE) kwargs['imageRef'] = image.id flavor_ident = self.properties[self.FLAVOR] flavor = self._validate_flavor(obj, flavor_ident, 'create') kwargs.pop(self.FLAVOR) kwargs['flavorRef'] = flavor.id keypair_name = self.properties[self.KEY_NAME] if keypair_name: keypair = self._validate_keypair(obj, keypair_name, 'create') kwargs['key_name'] = keypair.name kwargs['name'] = self.properties[self.NAME] or obj.name metadata = self._build_metadata(obj, self.properties[self.METADATA]) kwargs['metadata'] = metadata block_device_mapping_v2 = self.properties[self.BLOCK_DEVICE_MAPPING_V2] if block_device_mapping_v2 is not None: kwargs['block_device_mapping_v2'] = self._resolve_bdm( block_device_mapping_v2) user_data = self.properties[self.USER_DATA] if user_data is not None: ud = encodeutils.safe_encode(user_data) kwargs['user_data'] = encodeutils.safe_decode(base64.b64encode(ud)) networks = self.properties[self.NETWORKS] if networks is not None: kwargs['networks'] = [] for net_spec in networks: net = self._validate_network(obj, net_spec, 'create') kwargs['networks'].append(net) secgroups = self.properties[self.SECURITY_GROUPS] if secgroups: kwargs['security_groups'] = [{'name': sg} for sg in secgroups] if 'placement' in obj.data: if 'zone' in obj.data['placement']: kwargs['availability_zone'] = obj.data['placement']['zone'] if 'servergroup' in obj.data['placement']: group_id = obj.data['placement']['servergroup'] hints = self.properties.get(self.SCHEDULER_HINTS, {}) hints.update({'group': group_id}) kwargs['scheduler_hints'] = hints server = None resource_id = 'UNKNOWN' try: server = self.compute(obj).server_create(**kwargs) self.compute(obj).wait_for_server(server.id) return server.id except exc.InternalError as ex: if server and server.id: resource_id = server.id raise exc.EResourceCreation(type='server', message=ex.message, resource_id=resource_id) def do_delete(self, obj, **params): """Delete the physical resource associated with the specified node. :param obj: The node object to operate on. :param kwargs params: Optional keyword arguments for the delete operation. :returns: This operation always return True unless exception is caught. :raises: `EResourceDeletion` if interaction with compute service fails. """ if not obj.physical_id: return True server_id = obj.physical_id ignore_missing = params.get('ignore_missing', True) force = params.get('force', False) try: driver = self.compute(obj) if force: driver.server_force_delete(server_id, ignore_missing) else: driver.server_delete(server_id, ignore_missing) driver.wait_for_server_delete(server_id) return True except exc.InternalError as ex: raise exc.EResourceDeletion(type='server', id=server_id, message=six.text_type(ex)) def _check_server_name(self, obj, profile): """Check if there is a new name to be assigned to the server. :param obj: The node object to operate on. :param new_profile: The new profile which may contain a name for the server instance. :return: A tuple consisting a boolean indicating whether the name needs change and the server name determined. """ old_name = self.properties[self.NAME] or obj.name new_name = profile.properties[self.NAME] or obj.name if old_name == new_name: return False, new_name return True, new_name def _update_name(self, obj, new_name): """Update the name of the server. :param obj: The node object to operate. :param new_name: The new name for the server instance. :return: ``None``. :raises: ``EResourceUpdate``. """ try: self.compute(obj).server_update(obj.physical_id, name=new_name) except exc.InternalError as ex: raise exc.EResourceUpdate(type='server', id=obj.physical_id, message=six.text_type(ex)) def _check_password(self, obj, new_profile): """Check if the admin password has been changed in the new profile. :param obj: The server node to operate, not used currently. :param new_profile: The new profile which may contain a new password for the server instance. :return: A tuple consisting a boolean indicating whether the password needs a change and the password determined which could be '' if new password is not set. """ old_passwd = self.properties.get(self.ADMIN_PASS) or '' new_passwd = new_profile.properties[self.ADMIN_PASS] or '' if old_passwd == new_passwd: return False, new_passwd return True, new_passwd def _update_password(self, obj, new_password): """Update the admin password for the server. :param obj: The node object to operate. :param new_password: The new password for the server instance. :return: ``None``. :raises: ``EResourceUpdate``. """ try: self.compute(obj).server_change_password(obj.physical_id, new_password) except exc.InternalError as ex: raise exc.EResourceUpdate(type='server', id=obj.physical_id, message=six.text_type(ex)) def _update_metadata(self, obj, new_profile): """Update the server metadata. :param obj: The node object to operate on. :param new_profile: The new profile that may contain some changes to the metadata. :returns: ``None`` :raises: `EResourceUpdate`. """ old_meta = self._build_metadata(obj, self.properties[self.METADATA]) new_meta = self._build_metadata(obj, new_profile.properties[self.METADATA]) if new_meta == old_meta: return try: self.compute(obj).server_metadata_update(obj.physical_id, new_meta) except exc.InternalError as ex: raise exc.EResourceUpdate(type='server', id=obj.physical_id, message=six.text_type(ex)) def _update_flavor(self, obj, new_profile): """Update server flavor. :param obj: The node object to operate on. :param old_flavor: The identity of the current flavor. :param new_flavor: The identity of the new flavor. :returns: ``None``. :raises: `EResourceUpdate` when operation was a failure. """ old_flavor = self.properties[self.FLAVOR] new_flavor = new_profile.properties[self.FLAVOR] cc = self.compute(obj) oldflavor = self._validate_flavor(obj, old_flavor, 'update') newflavor = self._validate_flavor(obj, new_flavor, 'update') if oldflavor.id == newflavor.id: return try: cc.server_resize(obj.physical_id, newflavor.id) cc.wait_for_server(obj.physical_id, 'VERIFY_RESIZE') except exc.InternalError as ex: msg = six.text_type(ex) try: cc.server_resize_revert(obj.physical_id) cc.wait_for_server(obj.physical_id, 'ACTIVE') except exc.InternalError as ex1: msg = six.text_type(ex1) raise exc.EResourceUpdate(type='server', id=obj.physical_id, message=msg) try: cc.server_resize_confirm(obj.physical_id) cc.wait_for_server(obj.physical_id, 'ACTIVE') except exc.InternalError as ex: raise exc.EResourceUpdate(type='server', id=obj.physical_id, message=six.text_type(ex)) def _update_image(self, obj, new_profile, new_name, new_password): """Update image used by server node. :param obj: The node object to operate on. :param new_profile: The profile which may contain a new image name or ID to use. :param new_name: The name for the server node. :param newn_password: The new password for the administrative account if provided. :returns: A boolean indicating whether the image needs an update. :raises: ``InternalError`` if operation was a failure. """ old_image = self.properties[self.IMAGE] new_image = new_profile.properties[self.IMAGE] if not new_image: msg = _("Updating Nova server with image set to None is not " "supported by Nova") raise exc.EResourceUpdate(type='server', id=obj.physical_id, message=msg) # check the new image first img_new = self._validate_image(obj, new_image, reason='update') new_image_id = img_new.id driver = self.compute(obj) if old_image: img_old = self._validate_image(obj, old_image, reason='update') old_image_id = img_old.id else: try: server = driver.server_get(obj.physical_id) except exc.InternalError as ex: raise exc.EResourceUpdate(type='server', id=obj.physical_id, message=six.text_type(ex)) # Still, this 'old_image_id' could be empty, but it doesn't matter # because the comparison below would fail if that is the case old_image_id = server.image.get('id', None) if new_image_id == old_image_id: return False try: driver.server_rebuild(obj.physical_id, new_image_id, new_name, new_password) driver.wait_for_server(obj.physical_id, 'ACTIVE') except exc.InternalError as ex: raise exc.EResourceUpdate(type='server', id=obj.physical_id, message=six.text_type(ex)) return True def _create_interfaces(self, obj, networks): """Create new interfaces for the server node. :param obj: The node object to operate. :param networks: A list containing information about new network interfaces to be created. :returns: ``None``. :raises: ``EResourceUpdate`` if interaction with drivers failed. """ cc = self.compute(obj) try: server = cc.server_get(obj.physical_id) except exc.InternalError as ex: raise exc.EResourceUpdate(type='server', id=obj.physical_id, message=six.text_type(ex)) for net_spec in networks: net_attrs = self._validate_network(obj, net_spec, 'update') if net_attrs: try: cc.server_interface_create(server, **net_attrs) except exc.InternalError as ex: raise exc.EResourceUpdate(type='server', id=obj.physical_id, message=six.text_type(ex)) def _delete_interfaces(self, obj, networks): """Delete existing interfaces from the node. :param obj: The node object to operate. :param networks: A list containing information about network interfaces to be created. :returns: ``None`` :raises: ``EResourceUpdate`` """ def _get_network(nc, net_id, server_id): try: net = nc.network_get(net_id) return net.id except exc.InternalError as ex: raise exc.EResourceUpdate(type='server', id=server_id, message=six.text_type(ex)) def _do_delete(port_id, server_id): try: cc.server_interface_delete(port_id, server_id) except exc.InternalError as ex: raise exc.EResourceUpdate(type='server', id=server_id, message=six.text_type(ex)) cc = self.compute(obj) nc = self.network(obj) try: existing = list(cc.server_interface_list(obj.physical_id)) except exc.InternalError as ex: raise exc.EResourceUpdate(type='server', id=obj.physical_id, message=six.text_type(ex)) ports = [] for intf in existing: fixed_ips = [addr['ip_address'] for addr in intf.fixed_ips] ports.append({ 'id': intf.port_id, 'net': intf.net_id, 'ips': fixed_ips }) for n in networks: network = n.get('network', None) port = n.get('port', None) fixed_ip = n.get('fixed_ip', None) if port: for p in ports: if p['id'] == port: ports.remove(p) _do_delete(port, obj.physical_id) elif fixed_ip: net_id = _get_network(nc, network, obj.physical_id) for p in ports: if (fixed_ip in p['ips'] and net_id == p['net']): ports.remove(p) _do_delete(p['id'], obj.physical_id) elif port is None and fixed_ip is None: net_id = _get_network(nc, network, obj.physical_id) for p in ports: if p['net'] == net_id: ports.remove(p) _do_delete(p['id'], obj.physical_id) def _update_network(self, obj, new_profile): """Updating server network interfaces. :param obj: The node object to operate. :param new_profile: The new profile which may contain new network settings. :return: ``None`` :raises: ``EResourceUpdate`` if there are driver failures. """ networks_current = self.properties[self.NETWORKS] networks_create = new_profile.properties[self.NETWORKS] networks_delete = copy.deepcopy(networks_current) for network in networks_current: if network in networks_create: networks_create.remove(network) networks_delete.remove(network) # Detach some existing interfaces if networks_delete: self._delete_interfaces(obj, networks_delete) # Attach new interfaces if networks_create: self._create_interfaces(obj, networks_create) return def do_update(self, obj, new_profile=None, **params): """Perform update on the server. :param obj: the server to operate on :param new_profile: the new profile for the server. :param params: a dictionary of optional parameters. :returns: True if update was successful or False otherwise. :raises: `EResourceUpdate` if operation fails. """ self.server_id = obj.physical_id if not self.server_id: return False if not new_profile: return False if not self.validate_for_update(new_profile): return False name_changed, new_name = self._check_server_name(obj, new_profile) passwd_changed, new_passwd = self._check_password(obj, new_profile) # Update server image: may have side effect of changing server name # and/or admin password image_changed = self._update_image(obj, new_profile, new_name, new_passwd) if not image_changed: # we do this separately only when rebuild wasn't performed if name_changed: self._update_name(obj, new_name) if passwd_changed: self._update_password(obj, new_passwd) # Update server flavor: note that flavor is a required property self._update_flavor(obj, new_profile) self._update_network(obj, new_profile) # TODO(Yanyan Hu): Update block_device properties # Update server metadata self._update_metadata(obj, new_profile) return True def do_get_details(self, obj): known_keys = { 'OS-DCF:diskConfig', 'OS-EXT-AZ:availability_zone', 'OS-EXT-STS:power_state', 'OS-EXT-STS:vm_state', 'accessIPv4', 'accessIPv6', 'config_drive', 'created', 'hostId', 'id', 'key_name', 'locked', 'metadata', 'name', 'os-extended-volumes:volumes_attached', 'progress', 'status', 'updated' } if obj.physical_id is None or obj.physical_id == '': return {} driver = self.compute(obj) try: server = driver.server_get(obj.physical_id) except exc.InternalError as ex: return {'Error': {'code': ex.code, 'message': six.text_type(ex)}} if server is None: return {} server_data = server.to_dict() details = { 'image': server_data['image']['id'], 'flavor': server_data['flavor']['id'], } for key in known_keys: if key in server_data: details[key] = server_data[key] # process special keys like 'OS-EXT-STS:task_state': these keys have # a default value '-' when not existing special_keys = [ 'OS-EXT-STS:task_state', 'OS-SRV-USG:launched_at', 'OS-SRV-USG:terminated_at', ] for key in special_keys: if key in server_data: val = server_data[key] details[key] = val if val else '-' # process network addresses details['addresses'] = copy.deepcopy(server_data['addresses']) # process security groups sgroups = [] if 'security_groups' in server_data: for sg in server_data['security_groups']: sgroups.append(sg['name']) if len(sgroups) == 0: details['security_groups'] = '' elif len(sgroups) == 1: details['security_groups'] = sgroups[0] else: details['security_groups'] = sgroups return dict((k, details[k]) for k in sorted(details)) def do_join(self, obj, cluster_id): if not obj.physical_id: return False driver = self.compute(obj) metadata = driver.server_metadata_get(obj.physical_id) or {} metadata['cluster_id'] = cluster_id metadata['cluster_node_index'] = six.text_type(obj.index) driver.server_metadata_update(obj.physical_id, metadata) return super(ServerProfile, self).do_join(obj, cluster_id) def do_leave(self, obj): if not obj.physical_id: return False keys = ['cluster_id', 'cluster_node_index'] self.compute(obj).server_metadata_delete(obj.physical_id, keys) return super(ServerProfile, self).do_leave(obj) def do_rebuild(self, obj): if not obj.physical_id: return False self.server_id = obj.physical_id driver = self.compute(obj) try: server = driver.server_get(self.server_id) except exc.InternalError as ex: raise exc.EResourceOperation(op='rebuilding', type='server', id=self.server_id, message=six.text_type(ex)) if server is None or server.image is None: return False image_id = server.image['id'] admin_pass = self.properties.get(self.ADMIN_PASS) try: driver.server_rebuild(self.server_id, image_id, self.properties.get(self.NAME), admin_pass) driver.wait_for_server(self.server_id, 'ACTIVE') except exc.InternalError as ex: raise exc.EResourceOperation(op='rebuilding', type='server', id=self.server_id, message=six.text_type(ex)) return True def do_check(self, obj): if not obj.physical_id: return False try: server = self.compute(obj).server_get(obj.physical_id) except exc.InternalError as ex: raise exc.EResourceOperation(op='checking', type='server', id=obj.physical_id, message=six.text_type(ex)) if (server is None or server.status != 'ACTIVE'): return False return True def do_recover(self, obj, **options): # NOTE: We do a 'get' not a 'pop' here, because the operations may # get fall back to the base class for handling operation = options.get('operation', None) if operation and not isinstance(operation, six.string_types): operation = operation[0] # TODO(Qiming): Handle the case that the operation contains other # alternative recover operation # Depends-On: https://review.openstack.org/#/c/359676/ if operation == 'REBUILD': return self.do_rebuild(obj) return super(ServerProfile, self).do_recover(obj, **options) def handle_reboot(self, obj, **options): """Handler for the reboot operation.""" if not obj.physical_id: return False reboot_type = options.get(self.REBOOT_TYPE, self.REBOOT_SOFT) if (not isinstance(reboot_type, six.string_types) or reboot_type not in self.REBOOT_TYPES): return False self.compute(obj).server_reboot(obj.physical_id, reboot_type) self.compute(obj).wait_for_server(obj.physical_id, 'ACTIVE') return True def handle_change_password(self, obj, **options): """Handler for the change_password operation.""" if not obj.physical_id: return False password = options.get(self.ADMIN_PASSWORD, None) if (password is None or not isinstance(password, six.string_types)): return False self.compute(obj).server_change_password(obj.physical_id, password) return True
class ScalingPolicy(base.Policy): """Policy for changing the size of a cluster. This policy is expected to be enforced before the node count of a cluster is changed. """ VERSION = '1.0' PRIORITY = 100 TARGET = [ ('BEFORE', consts.CLUSTER_SCALE_IN), ('BEFORE', consts.CLUSTER_SCALE_OUT), ] PROFILE_TYPE = [ 'ANY', ] KEYS = ( EVENT, ADJUSTMENT, ) = ( 'event', 'adjustment', ) _SUPPORTED_EVENTS = ( CLUSTER_SCALE_IN, CLUSTER_SCALE_OUT, ) = ( consts.CLUSTER_SCALE_IN, consts.CLUSTER_SCALE_OUT, ) _ADJUSTMENT_KEYS = ( ADJUSTMENT_TYPE, ADJUSTMENT_NUMBER, MIN_STEP, BEST_EFFORT, COOLDOWN, ) = ( 'type', 'number', 'min_step', 'best_effort', 'cooldown', ) properties_schema = { EVENT: schema.String( _('Event that will trigger this policy. Must be one of ' 'CLUSTER_SCALE_IN and CLUSTER_SCALE_OUT.'), constraints=[ constraints.AllowedValues(_SUPPORTED_EVENTS), ], required=True, ), ADJUSTMENT: schema.Map( _('Detailed specification for scaling adjustments.'), schema={ ADJUSTMENT_TYPE: schema.String( _('Type of adjustment when scaling is triggered.'), constraints=[ constraints.AllowedValues(consts.ADJUSTMENT_TYPES), ], default=consts.CHANGE_IN_CAPACITY, ), ADJUSTMENT_NUMBER: schema.Number( _('A number specifying the amount of adjustment.'), default=1, ), MIN_STEP: schema.Integer( _('When adjustment type is set to "CHANGE_IN_PERCENTAGE",' ' this specifies the cluster size will be decreased by ' 'at least this number of nodes.'), default=1, ), BEST_EFFORT: schema.Boolean( _('Whether do best effort scaling when new size of ' 'cluster will break the size limitation'), default=False, ), COOLDOWN: schema.Integer( _('Number of seconds to hold the cluster for cool-down ' 'before allowing cluster to be resized again.'), default=0, ), }), } def __init__(self, name, spec, **kwargs): """Intialize a scaling policy object. :param name: Name for the policy object. :param spec: A dictionary containing the detailed specification for the policy. :param \*\*kwargs: Other optional parameters for policy object creation. :return: An object of `ScalingPolicy`. """ super(ScalingPolicy, self).__init__(name, spec, **kwargs) self.singleton = False self.event = self.properties[self.EVENT] adjustment = self.properties[self.ADJUSTMENT] self.adjustment_type = adjustment[self.ADJUSTMENT_TYPE] self.adjustment_number = adjustment[self.ADJUSTMENT_NUMBER] self.adjustment_min_step = adjustment[self.MIN_STEP] self.best_effort = adjustment[self.BEST_EFFORT] self.cooldown = adjustment[self.COOLDOWN] def _calculate_adjustment_count(self, current_size): """Calculate adjustment count based on current_size. :param current_size: The current size of the target cluster. :return: The number of nodes to add or to remove. """ if self.adjustment_type == consts.EXACT_CAPACITY: if self.event == consts.CLUSTER_SCALE_IN: count = current_size - self.adjustment_number else: count = self.adjustment_number - current_size elif self.adjustment_type == consts.CHANGE_IN_CAPACITY: count = self.adjustment_number else: # consts.CHANGE_IN_PERCENTAGE: count = int((self.adjustment_number * current_size) / 100.0) if count < self.adjustment_min_step: count = self.adjustment_min_step return count def pre_op(self, cluster_id, action): """The hook function that is executed before the action. The checking result is stored in the ``data`` property of the action object rather than returned directly from the function. :param cluster_id: The ID of the target cluster. :param action: Action instance against which the policy is being checked. :return: None. """ # Use action input if count is provided count = action.inputs.get('count', None) current = db_api.node_count_by_cluster(action.context, cluster_id) if count is None: # count not specified, calculate it count = self._calculate_adjustment_count(current) # Count must be positive value try: count = utils.parse_int_param('count', count, allow_zero=False) except exception.InvalidParameter: action.data.update({ 'status': base.CHECK_ERROR, 'reason': _("Invalid count (%(c)s) for action '%(a)s'.") % { 'c': count, 'a': action.action } }) action.store(action.context) return # Check size constraints cluster = db_api.cluster_get(action.context, cluster_id) if action.action == consts.CLUSTER_SCALE_IN: if self.best_effort: count = min(count, current - cluster.min_size) result = su.check_size_params(cluster, current - count, strict=not self.best_effort) else: if self.best_effort: count = min(count, cluster.max_size - current) result = su.check_size_params(cluster, current + count, strict=not self.best_effort) if result: # failed validation pd = {'status': base.CHECK_ERROR, 'reason': result} else: # passed validation pd = { 'status': base.CHECK_OK, 'reason': _('Scaling request validated.'), } if action.action == consts.CLUSTER_SCALE_IN: pd['deletion'] = {'count': count} else: pd['creation'] = {'count': count} action.data.update(pd) action.store(action.context) return def need_check(self, target, action): res = super(ScalingPolicy, self).need_check(target, action) if res: # Check if the action is expected by the policy res = (self.event == action.action) return res
class DeletionPolicy(base.Policy): """Policy for choosing victim node(s) from a cluster for deletion. This policy is enforced when nodes are to be removed from a cluster. It will yield an ordered list of candidates for deletion based on user specified criteria. """ VERSION = '1.0' VERSIONS = { '1.0': [ {'status': consts.SUPPORTED, 'since': '2016.04'} ] } PRIORITY = 400 KEYS = ( CRITERIA, DESTROY_AFTER_DELETION, GRACE_PERIOD, REDUCE_DESIRED_CAPACITY, ) = ( 'criteria', 'destroy_after_deletion', 'grace_period', 'reduce_desired_capacity', ) CRITERIA_VALUES = ( OLDEST_FIRST, OLDEST_PROFILE_FIRST, YOUNGEST_FIRST, RANDOM, ) = ( 'OLDEST_FIRST', 'OLDEST_PROFILE_FIRST', 'YOUNGEST_FIRST', 'RANDOM', ) TARGET = [ ('BEFORE', consts.CLUSTER_SCALE_IN), ('BEFORE', consts.CLUSTER_DEL_NODES), ('BEFORE', consts.CLUSTER_RESIZE), ('BEFORE', consts.NODE_DELETE), ] PROFILE_TYPE = [ 'ANY' ] properties_schema = { CRITERIA: schema.String( _('Criteria used in selecting candidates for deletion'), default=RANDOM, constraints=[ constraints.AllowedValues(CRITERIA_VALUES), ] ), DESTROY_AFTER_DELETION: schema.Boolean( _('Whether a node should be completely destroyed after ' 'deletion. Default to True'), default=True, ), GRACE_PERIOD: schema.Integer( _('Number of seconds before real deletion happens.'), default=0, ), REDUCE_DESIRED_CAPACITY: schema.Boolean( _('Whether the desired capacity of the cluster should be ' 'reduced along the deletion. Default to False.'), default=True, ) } def __init__(self, name, spec, **kwargs): super(DeletionPolicy, self).__init__(name, spec, **kwargs) self.criteria = self.properties[self.CRITERIA] self.grace_period = self.properties[self.GRACE_PERIOD] self.destroy_after_deletion = self.properties[ self.DESTROY_AFTER_DELETION] self.reduce_desired_capacity = self.properties[ self.REDUCE_DESIRED_CAPACITY] def _victims_by_regions(self, cluster, regions): victims = [] for region in sorted(regions.keys()): count = regions[region] nodes = cluster.nodes_by_region(region) if self.criteria == self.RANDOM: candidates = scaleutils.nodes_by_random(nodes, count) elif self.criteria == self.OLDEST_PROFILE_FIRST: candidates = scaleutils.nodes_by_profile_age(nodes, count) elif self.criteria == self.OLDEST_FIRST: candidates = scaleutils.nodes_by_age(nodes, count, True) else: candidates = scaleutils.nodes_by_age(nodes, count, False) victims.extend(candidates) return victims def _victims_by_zones(self, cluster, zones): victims = [] for zone in sorted(zones.keys()): count = zones[zone] nodes = cluster.nodes_by_zone(zone) if self.criteria == self.RANDOM: candidates = scaleutils.nodes_by_random(nodes, count) elif self.criteria == self.OLDEST_PROFILE_FIRST: candidates = scaleutils.nodes_by_profile_age(nodes, count) elif self.criteria == self.OLDEST_FIRST: candidates = scaleutils.nodes_by_age(nodes, count, True) else: candidates = scaleutils.nodes_by_age(nodes, count, False) victims.extend(candidates) return victims def _update_action(self, action, victims): pd = action.data.get('deletion', {}) pd['count'] = len(victims) pd['candidates'] = victims pd['destroy_after_deletion'] = self.destroy_after_deletion pd['grace_period'] = self.grace_period pd['reduce_desired_capacity'] = self.reduce_desired_capacity action.data.update({ 'status': base.CHECK_OK, 'reason': _('Candidates generated'), 'deletion': pd }) action.store(action.context) def pre_op(self, cluster_id, action): """Choose victims that can be deleted. :param cluster_id: ID of the cluster to be handled. :param action: The action object that triggered this policy. """ victims = action.inputs.get('candidates', []) if len(victims) > 0: self._update_action(action, victims) return if action.action == consts.NODE_DELETE: self._update_action(action, [action.node.id]) return db_cluster = None regions = None zones = None deletion = action.data.get('deletion', {}) if deletion: # there are policy decisions count = deletion['count'] regions = deletion.get('regions', None) zones = deletion.get('zones', None) # No policy decision, check action itself: SCALE_IN elif action.action == consts.CLUSTER_SCALE_IN: count = action.inputs.get('count', 1) # No policy decision, check action itself: RESIZE else: db_cluster = co.Cluster.get(action.context, cluster_id) current = no.Node.count_by_cluster(action.context, cluster_id) res, reason = scaleutils.parse_resize_params(action, db_cluster, current) if res == base.CHECK_ERROR: action.data['status'] = base.CHECK_ERROR action.data['reason'] = reason LOG.error(reason) return if 'deletion' not in action.data: return count = action.data['deletion']['count'] cluster = cm.Cluster.load(action.context, dbcluster=db_cluster, cluster_id=cluster_id) # Cross-region if regions: victims = self._victims_by_regions(cluster, regions) self._update_action(action, victims) return # Cross-AZ if zones: victims = self._victims_by_zones(cluster, zones) self._update_action(action, victims) return if count > len(cluster.nodes): count = len(cluster.nodes) if self.criteria == self.RANDOM: victims = scaleutils.nodes_by_random(cluster.nodes, count) elif self.criteria == self.OLDEST_PROFILE_FIRST: victims = scaleutils.nodes_by_profile_age(cluster.nodes, count) elif self.criteria == self.OLDEST_FIRST: victims = scaleutils.nodes_by_age(cluster.nodes, count, True) else: victims = scaleutils.nodes_by_age(cluster.nodes, count, False) self._update_action(action, victims) return