def member_remove(self, lb_id, pool_id, member_id): """Delete a member from Neutron lbaas pool. :param lb_id: The ID of the loadbalancer the operation is targeted at; :param pool_id: The ID of the pool from which the member is deleted; :param member_id: The ID of the LB member. :returns: True if the operation succeeded or False if errors occurred. """ try: # FIXME(Yanyan Hu): Currently, Neutron lbaasv2 service can not # handle concurrent lb member operations well: new member creation # deletion request will directly fail rather than being lined up # when another operation is still in progress. In this workaround, # loadbalancer status will be checked before deleting lb member # request is sent out. If loadbalancer keeps unready till waiting # timeout, exception will be raised to fail member_remove. res = self._wait_for_lb_ready(lb_id) if not res: msg = _LE('Loadbalancer %s is not ready.') % lb_id raise exception.Error(msg) self.nc().pool_member_delete(pool_id, member_id) except (exception.InternalError, exception.Error) as ex: msg = _LE('Failed in removing member %(m)s from pool %(p)s: ' '%(ex)s') % {'m': member_id, 'p': pool_id, 'ex': six.text_type(ex)} LOG.exception(msg) return None res = self._wait_for_lb_ready(lb_id) if res is False: LOG.error(_LE('Failed in deleting pool member (%s).') % member_id) return None return True
def pre_op(self, cluster_id, action): """Callback function when cluster membership is about to change. :param cluster_id: ID of the target cluster. :param action: The action that triggers this policy check. :returns: ``None``. """ if action.action == consts.CLUSTER_SCALE_IN: expand = False # use action input directly if available count = action.inputs.get('count', None) if not count: # check if policy decisions available pd = action.data.get('deletion', None) count = pd.get('count', 1) if pd else 1 else: # this is an action that inflates the cluster expand = True count = action.inputs.get('count', None) if not count: # check if policy decisions available pd = action.data.get('creation', None) count = pd.get('count', 1) if pd else 1 cluster = cluster_mod.Cluster.load(action.context, cluster_id) kc = self._keystone(cluster) regions_good = kc.validate_regions(self.regions.keys()) if len(regions_good) == 0: action.data['status'] = base.CHECK_ERROR action.data['reason'] = _('No region is found usable.') LOG.error(_LE('No region is found usable.')) return regions = {} for r in self.regions.items(): if r[0] in regions_good: regions[r[0]] = r[1] current_dist = cluster.get_region_distribution(regions_good) result = self._create_plan(current_dist, regions, count, expand) if not result: action.data['status'] = base.CHECK_ERROR action.data['reason'] = _('There is no feasible plan to ' 'handle all nodes.') LOG.error(_LE('There is no feasible plan to handle all nodes.')) return if expand: if 'creation' not in action.data: action.data['creation'] = {} action.data['creation']['count'] = count action.data['creation']['regions'] = result else: if 'deletion' not in action.data: action.data['deletion'] = {} action.data['deletion']['count'] = count action.data['deletion']['regions'] = result
def __call__(self, request): """WSGI method that controls (de)serialization and method dispatch.""" action_args = self.get_action_args(request.environ) action = action_args.pop('action', None) try: deserialized_request = self.dispatch(self.deserializer, action, request) action_args.update(deserialized_request) LOG.debug(('Calling %(controller)s : %(action)s'), { 'controller': self.controller, 'action': action }) action_result = self.dispatch(self.controller, action, request, **action_args) except TypeError as err: LOG.error(_LE('Exception handling resource: %s') % err) msg = _('The server could not comply with the request since ' 'it is either malformed or otherwise incorrect.') err = webob.exc.HTTPBadRequest(msg) http_exc = translate_exception(err, request.best_match_language()) # NOTE(luisg): We disguise HTTP exceptions, otherwise they will be # treated by wsgi as responses ready to be sent back and they # won't make it into the pipeline app that serializes errors raise exception.HTTPExceptionDisguise(http_exc) except webob.exc.HTTPException as err: if not isinstance(err, webob.exc.HTTPError): # Some HTTPException are actually not errors, they are # responses ready to be sent back to the users, so we don't # create error log, but disguise and translate them to meet # openstacksdk's need. http_exc = translate_exception(err, request.best_match_language()) raise exception.HTTPExceptionDisguise(http_exc) if isinstance(err, webob.exc.HTTPServerError): LOG.error(_LE("Returning %(code)s to user: %(explanation)s"), { 'code': err.code, 'explanation': err.explanation }) http_exc = translate_exception(err, request.best_match_language()) raise exception.HTTPExceptionDisguise(http_exc) except exception.SenlinException as err: raise translate_exception(err, request.best_match_language()) except Exception as err: log_exception(err, sys.exc_info()) raise translate_exception(err, request.best_match_language()) serializer = self.serializer or serializers.JSONResponseSerializer() try: response = webob.Response(request=request) self.dispatch(serializer, action, response, action_result) return response # return unserializable result (typically an exception) except Exception: return action_result
def member_add(self, node, lb_id, pool_id, port, subnet): """Add a member to Neutron lbaas pool. :param node: A node object to be added to the specified pool. :param lb_id: The ID of the loadbalancer. :param pool_id: The ID of the pool for receiving the node. :param port: The port for the new LB member to be created. :param subnet: The subnet to be used by the new LB member. :returns: The ID of the new LB member or None if errors occurred. """ try: subnet_obj = self.nc().subnet_get(subnet) net_id = subnet_obj.network_id net = self.nc().network_get(net_id) except exception.InternalError as ex: resource = 'subnet' if subnet in ex.message else 'network' msg = _LE('Failed in getting %(resource)s: %(msg)s.' ) % {'resource': resource, 'msg': six.text_type(ex)} LOG.exception(msg) return None net_name = net.name node_detail = node.get_details(oslo_context.get_current()) addresses = node_detail.get('addresses') if net_name not in addresses: msg = _LE('Node is not in subnet %(subnet)s') LOG.error(msg, {'subnet': subnet}) return None # Use the first IP address if more than one are found in target network address = addresses[net_name][0]['addr'] try: # FIXME(Yanyan Hu): Currently, Neutron lbaasv2 service can not # handle concurrent lb member operations well: new member creation # deletion request will directly fail rather than being lined up # when another operation is still in progress. In this workaround, # loadbalancer status will be checked before creating lb member # request is sent out. If loadbalancer keeps unready till waiting # timeout, exception will be raised to fail member_add. res = self._wait_for_lb_ready(lb_id) if not res: msg = _LE('Loadbalancer %s is not ready.') % lb_id raise exception.Error(msg) member = self.nc().pool_member_create(pool_id, address, port, subnet_obj.id) except (exception.InternalError, exception.Error) as ex: msg = _LE('Failed in creating lb pool member: %s.' ) % six.text_type(ex) LOG.exception(msg) return None res = self._wait_for_lb_ready(lb_id) if res is False: LOG.error(_LE('Failed in creating pool member (%s).') % member.id) return None return member.id
def __call__(self, request): """WSGI method that controls (de)serialization and method dispatch.""" action_args = self.get_action_args(request.environ) action = action_args.pop('action', None) try: deserialized_request = self.dispatch(self.deserializer, action, request) action_args.update(deserialized_request) LOG.debug(('Calling %(controller)s : %(action)s'), {'controller': self.controller, 'action': action}) action_result = self.dispatch(self.controller, action, request, **action_args) except TypeError as err: LOG.error(_LE('Exception handling resource: %s') % err) msg = _('The server could not comply with the request since ' 'it is either malformed or otherwise incorrect.') err = webob.exc.HTTPBadRequest(msg) http_exc = translate_exception(err, request.best_match_language()) # NOTE(luisg): We disguise HTTP exceptions, otherwise they will be # treated by wsgi as responses ready to be sent back and they # won't make it into the pipeline app that serializes errors raise exception.HTTPExceptionDisguise(http_exc) except webob.exc.HTTPException as err: if not isinstance(err, webob.exc.HTTPError): # Some HTTPException are actually not errors, they are # responses ready to be sent back to the users, so we don't # create error log, but disguise and translate them to meet # openstacksdk's need. http_exc = translate_exception(err, request.best_match_language()) raise exception.HTTPExceptionDisguise(http_exc) if isinstance(err, webob.exc.HTTPServerError): LOG.error( _LE("Returning %(code)s to user: %(explanation)s"), {'code': err.code, 'explanation': err.explanation}) http_exc = translate_exception(err, request.best_match_language()) raise exception.HTTPExceptionDisguise(http_exc) except exception.SenlinException as err: raise translate_exception(err, request.best_match_language()) except Exception as err: log_exception(err, sys.exc_info()) raise translate_exception(err, request.best_match_language()) serializer = self.serializer or serializers.JSONResponseSerializer() try: response = webob.Response(request=request) self.dispatch(serializer, action, response, action_result) return response # return unserializable result (typically an exception) except Exception: return action_result
def pre_op(self, cluster_id, action): """Callback function when cluster membership is about to change. :param cluster_id: ID of the target cluster. :param action: The action that triggers this policy check. """ count = action.inputs.get('count', None) if action.action == consts.CLUSTER_SCALE_IN: expand = False if not count: pd = action.data.get('deletion', None) count = pd.get('count', 1) if pd else 1 else: expand = True if not count: pd = action.data.get('creation', None) count = pd.get('count', 1) if pd else 1 cluster = cluster_mod.Cluster.load(action.context, cluster_id) nc = self._nova(cluster) zones_good = nc.validate_azs(self.zones.keys()) if len(zones_good) == 0: action.data['status'] = base.CHECK_ERROR action.data['reason'] = _('No availability zone found available.') LOG.error(_LE('No availability zone found available.')) return zones = {} for z, w in self.zones.items(): if z in zones_good: zones[z] = w current = cluster.get_zone_distribution(action.context, zones.keys()) result = self._create_plan(current, zones, count, expand) if not result: action.data['status'] = base.CHECK_ERROR action.data['reason'] = _('There is no feasible plan to ' 'handle all nodes.') LOG.error(_LE('There is no feasible plan to handle all nodes.')) return if expand: if 'creation' not in action.data: action.data['creation'] = {} action.data['creation']['count'] = count action.data['creation']['zones'] = result else: if 'deletion' not in action.data: action.data['deletion'] = {} action.data['deletion']['count'] = count action.data['deletion']['zones'] = result
def member_add(self, node, lb_id, pool_id, port, subnet): """Add a member to Neutron lbaas pool. :param node: A node object to be added to the specified pool. :param lb_id: The ID of the loadbalancer. :param pool_id: The ID of the pool for receiving the node. :param port: The port for the new LB member to be created. :param subnet: The subnet to be used by the new LB member. :returns: The ID of the new LB member or None if errors occurred. """ addresses = self._get_node_address(node, version=4) if not addresses: LOG.error(_LE('Node (%(n)s) does not have valid IPv4 address.'), {'n': node.id}) return None try: subnet_obj = self.nc().subnet_get(subnet) net_id = subnet_obj.network_id net = self.nc().network_get(net_id) except exception.InternalError as ex: resource = 'subnet' if subnet in ex.message else 'network' msg = _LE('Failed in getting %(resource)s: %(msg)s.' ) % {'resource': resource, 'msg': six.text_type(ex)} LOG.exception(msg) event.warning(oslo_context.get_current(), self, resource.upper()+'_GET', 'ERROR', msg) return None net_name = net.name if net_name not in addresses: LOG.error(_LE('Node is not in subnet %(subnet)s'), {'subnet': subnet}) return None address = addresses[net_name] try: member = self.nc().pool_member_create(pool_id, address, port, subnet_obj.id) except exception.InternalError as ex: msg = _LE('Failed in creating lb pool member: %s.' ) % six.text_type(ex) LOG.exception(msg) event.warning(oslo_context.get_current(), self, 'POOL_MEMBER_CREATE', 'ERROR', msg) return None res = self._wait_for_lb_ready(lb_id) if res is False: LOG.error(_LE('Failed in creating pool member (%s).') % member.id) return None return member.id
def pre_op(self, cluster_id, action): """Callback function when cluster membership is about to change. :param cluster_id: ID of the target cluster. :param action: The action that triggers this policy check. :returns: ``None``. """ count = self._get_count(cluster_id, action) if count == 0: return expand = True if count < 0: expand = False count = -count cluster = cluster_mod.Cluster.load(action.context, cluster_id) kc = self._keystone(cluster) regions_good = kc.validate_regions(self.regions.keys()) if len(regions_good) == 0: action.data['status'] = base.CHECK_ERROR action.data['reason'] = _('No region is found usable.') LOG.error(_LE('No region is found usable.')) return regions = {} for r in self.regions.items(): if r[0] in regions_good: regions[r[0]] = r[1] current_dist = cluster.get_region_distribution(regions_good) result = self._create_plan(current_dist, regions, count, expand) if not result: action.data['status'] = base.CHECK_ERROR action.data['reason'] = _('There is no feasible plan to ' 'handle all nodes.') LOG.error(_LE('There is no feasible plan to handle all nodes.')) return if expand: if 'creation' not in action.data: action.data['creation'] = {} action.data['creation']['count'] = count action.data['creation']['regions'] = result else: if 'deletion' not in action.data: action.data['deletion'] = {} action.data['deletion']['count'] = count action.data['deletion']['regions'] = result
def pre_op(self, cluster_id, action): """Callback function when cluster membership is about to change. :param cluster_id: ID of the target cluster. :param action: The action that triggers this policy check. :returns: ``None``. """ count = self._get_count(cluster_id, action) if count == 0: return expand = True if count < 0: expand = False count = -count cluster = cm.Cluster.load(action.context, cluster_id) kc = self._keystone(cluster) regions_good = kc.validate_regions(self.regions.keys()) if len(regions_good) == 0: action.data['status'] = base.CHECK_ERROR action.data['reason'] = _('No region is found usable.') LOG.error(_LE('No region is found usable.')) return regions = {} for r in self.regions.items(): if r[0] in regions_good: regions[r[0]] = r[1] current_dist = cluster.get_region_distribution(regions_good) result = self._create_plan(current_dist, regions, count, expand) if not result: action.data['status'] = base.CHECK_ERROR action.data['reason'] = _('There is no feasible plan to ' 'handle all nodes.') LOG.error(_LE('There is no feasible plan to handle all nodes.')) return if expand: if 'creation' not in action.data: action.data['creation'] = {} action.data['creation']['count'] = count action.data['creation']['regions'] = result else: if 'deletion' not in action.data: action.data['deletion'] = {} action.data['deletion']['count'] = count action.data['deletion']['regions'] = result
def pre_op(self, cluster_id, action): """Callback function when cluster membership is about to change. :param cluster_id: ID of the target cluster. :param action: The action that triggers this policy check. """ count = self._get_count(cluster_id, action) if count == 0: return expand = True if count < 0: expand = False count = -count cluster = cluster_mod.Cluster.load(action.context, cluster_id) nc = self._nova(cluster) zones_good = nc.validate_azs(self.zones.keys()) if len(zones_good) == 0: action.data['status'] = base.CHECK_ERROR action.data['reason'] = _('No availability zone found available.') LOG.error(_LE('No availability zone found available.')) return zones = {} for z, w in self.zones.items(): if z in zones_good: zones[z] = w current = cluster.get_zone_distribution(action.context, zones.keys()) result = self._create_plan(current, zones, count, expand) if not result: action.data['status'] = base.CHECK_ERROR action.data['reason'] = _('There is no feasible plan to ' 'handle all nodes.') LOG.error(_LE('There is no feasible plan to handle all nodes.')) return if expand: if 'creation' not in action.data: action.data['creation'] = {} action.data['creation']['count'] = count action.data['creation']['zones'] = result else: if 'deletion' not in action.data: action.data['deletion'] = {} action.data['deletion']['count'] = count action.data['deletion']['zones'] = result
def attach(self, cluster): """Routine to be invoked when policy is to be attached to a cluster. :para cluster: The Target cluster to be attached to; :returns: When the operation was successful, returns a tuple (True, message); otherwise, return a tuple (False, error). """ data = {} nv_client = self.nova(cluster) placement_group = self.properties.get(self.PLACEMENT_GROUP) group_name = placement_group.get('group_name', None) if group_name is None: profile = profile_base.Profile.load( oslo_context.get_current(), cluster.profile_id) if 'scheduler_hints' in profile.spec: hints = profile.spec['scheduler_hints'] group_name = hints.get('group', None) if group_name is not None: # to add into nova driver try: server_group = nv_client.get_server_group(group_name) except exception.InternalError as ex: msg = 'Failed in searching server_group' LOG.exception(_LE('%(msg)s: %(ex)s') % { 'msg': msg, 'ex': six.text_type(ex)}) return False, msg data['group_id'] = server_group.id data['inherited_group'] = True if data.get('group_id') is None: # to add into nova driver rule = placement_group.get('placement_rule', 'anti-affinity') try: server_group = nv_client.create_server_group(rule) except exception.InternalError as ex: msg = 'Failed in creating server_group' LOG.exception(_LE('%(msg)s: %(ex)s') % { 'msg': msg, 'ex': six.text_type(ex)}) return False, msg data['group_id'] = server_group.id data['inherited_group'] = False policy_data = self._build_policy_data(data) return True, policy_data
def ActionProc(context, action_id): '''Action process.''' # Step 1: materialize the action object action = Action.load(context, action_id=action_id) if action is None: LOG.error(_LE('Action "%s" could not be found.'), action_id) return False # TODO(Anyone): Remove context usage in event module EVENT.info(action.context, action, action.action, 'START') reason = 'Action completed' success = True try: # Step 2: execute the action result, reason = action.execute() except Exception as ex: # We catch exception here to make sure the following logics are # executed. result = action.RES_ERROR reason = six.text_type(ex) LOG.exception(_('Unexpected exception occurred during action ' '%(action)s (%(id)s) execution: %(reason)s'), {'action': action.action, 'id': action.id, 'reason': reason}) success = False finally: # NOTE: locks on action is eventually released here by status update action.set_status(result, reason) return success
def do_recover(self, obj, **options): """Default recover operation. :param obj: The node object to operate on. :param options: Keyword arguments for the recover operation. """ operation = options.pop('operation', None) # TODO(Qiming): The operation input could be a list of operations. if operation and not isinstance(operation, six.string_types): operation = operation[0] if operation and operation != consts.RECOVER_RECREATE: LOG.error(_LE("Recover operation not supported: %s"), operation) return False try: self.do_delete(obj, **options) except exc.EResourceDeletion as ex: raise exc.EResourceOperation(op='recovering', type='node', id=obj.id, message=six.text_type(ex)) res = None try: res = self.do_create(obj) except exc.EResourceCreation as ex: raise exc.EResourceOperation(op='recovering', type='node', id=obj.id, message=six.text_type(ex)) return res
def hup(*args): # Shuts down the server(s), but allows running requests to complete self.LOG.error(_LE('SIGHUP received')) signal.signal(signal.SIGHUP, signal.SIG_IGN) os.killpg(0, signal.SIGHUP) signal.signal(signal.SIGHUP, hup)
def node_lock_acquire(context, node_id, action_id, engine=None, forced=False): """Try to lock the specified node. :param context: the context used for DB operations; :param node_id: ID of the node to be locked. :param action_id: ID of the action that attempts to lock the node. :param engine: ID of the engine that attempts to lock the node. :param forced: set to True to cancel current action that owns the lock, if any. :returns: True if lock is acquired, or False otherwise. """ # Step 1: try lock the node - if the returned owner_id is the # action id, it was a success owner = db_api.node_lock_acquire(node_id, action_id) if action_id == owner: return True # Step 2: retry using global configuration options retries = cfg.CONF.lock_retry_times retry_interval = cfg.CONF.lock_retry_interval while retries > 0: scheduler.sleep(retry_interval) LOG.debug('Acquire lock for node %s again' % node_id) owner = db_api.node_lock_acquire(node_id, action_id) if action_id == owner: return True retries = retries - 1 # Step 3: Last resort is 'forced locking', only needed when retry failed if forced: owner = db_api.node_lock_steal(node_id, action_id) return action_id == owner # if this node lock by dead engine action = db_api.action_get(context, owner) if (action and action.owner and action.owner != engine and is_engine_dead(context, action.owner)): LOG.info( _LI('The node %(n)s is locked by dead action %(a)s, ' 'try to steal the lock.'), { 'n': node_id, 'a': owner }) reason = _('Engine died when executing this action.') db_api.action_mark_failed(context, action.id, time.time(), reason=reason) db_api.node_lock_steal(node_id, action_id) return True LOG.error( _LE('Node is already locked by action %(old)s, ' 'action %(new)s failed grabbing the lock'), { 'old': owner, 'new': action_id }) return False
def detach(self, cluster): """Routine to be called when the policy is detached from a cluster. :param cluster: The cluster from which the policy is to be detached. :returns: When the operation was successful, returns a tuple of (True, data) where the data contains references to the resources created; otherwise returns a tuple of (False, error) where the err contains a error message. """ reason = _('Servergroup resource deletion succeeded.') ctx = context.get_admin_context() binding = db_api.cluster_policy_get(ctx, cluster.id, self.id) if not binding or not binding.data: return True, reason policy_data = self._extract_policy_data(binding.data) if not policy_data: return True, reason group_id = policy_data.get('servergroup_id', None) inherited_group = policy_data.get('inherited_group', False) if group_id and not inherited_group: try: self.nova(cluster).delete_server_group(group_id) except Exception as ex: msg = _('Failed in deleting servergroup.') LOG.exception(_LE('%(msg)s: %(ex)s') % { 'msg': msg, 'ex': six.text_type(ex)}) return False, msg return True, reason
def execute(self, **kwargs): '''Wrapper of action execution. This is mainly a wrapper that executes an action with cluster lock acquired. :return: A tuple (res, reason) that indicates whether the execution was a success and why if it wasn't a success. ''' try: cluster = cluster_mod.Cluster.load(self.context, self.target) except exception.NotFound: reason = _('Cluster %(id)s not found') % {'id': self.target} LOG.error(_LE(reason)) return self.RES_ERROR, reason # Try to lock cluster before do real operation forced = True if self.action == self.CLUSTER_DELETE else False res = senlin_lock.cluster_lock_acquire(cluster.id, self.id, senlin_lock.CLUSTER_SCOPE, forced) if not res: return self.RES_ERROR, _('Failed locking cluster') try: res, reason = self._execute(cluster) finally: senlin_lock.cluster_lock_release(cluster.id, self.id, senlin_lock.CLUSTER_SCOPE) return res, reason
def detach(self, cluster): """Routine to be called when the policy is detached from a cluster. :param cluster: The cluster from which the policy is to be detached. :returns: When the operation was successful, returns a tuple of (True, data) where the data contains references to the resources created; otherwise returns a tuple of (False, error) where the err contains a error message. """ reason = _('Servergroup resource deletion succeeded.') ctx = context.get_admin_context() binding = cpo.ClusterPolicy.get(ctx, cluster.id, self.id) if not binding or not binding.data: return True, reason policy_data = self._extract_policy_data(binding.data) if not policy_data: return True, reason group_id = policy_data.get('servergroup_id', None) inherited_group = policy_data.get('inherited_group', False) if group_id and not inherited_group: try: self.nova(cluster).delete_server_group(group_id) except Exception as ex: msg = _('Failed in deleting servergroup.') LOG.exception(_LE('%(msg)s: %(ex)s') % { 'msg': msg, 'ex': six.text_type(ex)}) return False, msg return True, reason
def node_lock_acquire(node_id, action_id, forced=False): '''Try to lock the specified node. :param forced_locking: set to True to cancel current action that owns the lock, if any. ''' # Step 1: try lock the node - if the returned owner_id is the # action id, it was a success owner = db_api.node_lock_acquire(node_id, action_id) if action_id == owner: return True # Step 2: retry using global configuration options retries = cfg.CONF.lock_retry_times retry_interval = cfg.CONF.lock_retry_interval while retries > 0: scheduler.sleep(retry_interval) owner = db_api.node_lock_acquire(node_id, action_id) if action_id == owner: return True retries = retries - 1 # Step 3: Last resort is 'forced locking', only needed when retry failed if forced: owner = db_api.node_lock_steal(node_id, action_id) return action_id == owner LOG.error(_LE('Node is already locked by action %(old)s, ' 'action %(new)s failed grabbing the lock') % { 'old': owner, 'new': action_id}) return False
def detach(self, cluster): """Routine to be called when the policy is detached from a cluster. :param cluster: The cluster from which the policy is to be detached. :returns: When the operation was successful, returns a tuple of (True, data) where the data contains references to the resources created; otherwise returns a tuple of (False, error) where the err contains a error message. """ reason = _('Server group resources deletion succeeded') cp = cluster_policy.ClusterPolicy.load(oslo_context.get_current(), cluster.id, self.id) if cp is None or cp.data is None: return True, reason policy_data = self._extract_policy_data(cp.data) if policy_data is None: return True, reason group_id = policy_data.get('group_id', None) inherited_group = policy_data.get('inherited_group', False) if group_id and not inherited_group: try: # to add into nova driver self.nova(cluster).delete_server_group(group_id) except exception.InternalError as ex: msg = 'Failed in deleting server_group' LOG.exception(_LE('%(msg)s: %(ex)s') % { 'msg': msg, 'ex': six.text_type(ex)}) return False, msg return True, reason
def kill_children(self, *args): """Kills the entire process group.""" LOG.error(_LE('SIGTERM received')) signal.signal(signal.SIGTERM, signal.SIG_IGN) signal.signal(signal.SIGINT, signal.SIG_IGN) self.running = False os.killpg(0, signal.SIGTERM)
def member_add(self, node, lb_id, pool_id, port, subnet): """Add a member to Neutron lbaas pool. :param node: A node object to be added to the specified pool. :param lb_id: The ID of the loadbalancer. :param pool_id: The ID of the pool for receiving the node. :param port: The port for the new LB member to be created. :param subnet: The subnet to be used by the new LB member. :returns: The ID of the new LB member or None if errors occurred. """ try: subnet_obj = self.nc().subnet_get(subnet) net_id = subnet_obj.network_id net = self.nc().network_get(net_id) except exception.InternalError as ex: resource = 'subnet' if subnet in ex.message else 'network' msg = _LE('Failed in getting %(resource)s: %(msg)s.' ) % {'resource': resource, 'msg': six.text_type(ex)} LOG.exception(msg) return None net_name = net.name node_detail = node.get_details(oslo_context.get_current()) addresses = node_detail.get('addresses') if net_name not in addresses: LOG.error(_LE('Node is not in subnet %(subnet)s'), {'subnet': subnet}) return None # Use the first IP address if more than one are found in target network address = addresses[net_name][0] try: member = self.nc().pool_member_create(pool_id, address, port, subnet_obj.id) except exception.InternalError as ex: msg = _LE('Failed in creating lb pool member: %s.' ) % six.text_type(ex) LOG.exception(msg) return None res = self._wait_for_lb_ready(lb_id) if res is False: LOG.error(_LE('Failed in creating pool member (%s).') % member.id) return None return member.id
def cluster_lock_acquire(context, cluster_id, action_id, engine=None, scope=CLUSTER_SCOPE, forced=False): """Try to lock the specified cluster. :param cluster_id: ID of the cluster to be locked. :param action_id: ID of the action which wants to lock the cluster. :param engine: ID of the engine which wants to lock the cluster. :param scope: scope of lock, could be cluster wide lock, or node-wide lock. :param forced: set to True to cancel current action that owns the lock, if any. :returns: True if lock is acquired, or False otherwise. """ # Step 1: try lock the cluster - if the returned owner_id is the # action id, it was a success owners = db_api.cluster_lock_acquire(cluster_id, action_id, scope) if action_id in owners: return True # Step 2: retry using global configuration options retries = cfg.CONF.lock_retry_times retry_interval = cfg.CONF.lock_retry_interval while retries > 0: scheduler.sleep(retry_interval) LOG.debug('Acquire lock for cluster %s again' % cluster_id) owners = db_api.cluster_lock_acquire(cluster_id, action_id, scope) if action_id in owners: return True retries = retries - 1 # Step 3: Last resort is 'forced locking', only needed when retry failed if forced: owners = db_api.cluster_lock_steal(cluster_id, action_id) return action_id in owners # Will reach here only because scope == CLUSTER_SCOPE action = db_api.action_get(context, owners[0]) if (action and action.owner and action.owner != engine and is_engine_dead(context, action.owner)): LOG.info(_LI('The cluster %(c)s is locked by dead action %(a)s, ' 'try to steal the lock.'), { 'c': cluster_id, 'a': owners[0] }) reason = _('Engine died when executing this action.') db_api.action_mark_failed(context, action.id, time.time(), reason=reason) owners = db_api.cluster_lock_steal(cluster_id, action_id) return action_id in owners LOG.error(_LE('Cluster is already locked by action %(old)s, ' 'action %(new)s failed grabbing the lock'), {'old': str(owners), 'new': action_id}) return False
def lb_delete(self, **kwargs): """Delete a Neutron lbaas instance The following Neutron lbaas resources will be deleted in order: 1)healthmonitor; 2)pool; 3)listener; 4)loadbalancer. """ lb_id = kwargs.pop('loadbalancer') healthmonitor_id = kwargs.pop('healthmonitor', None) if healthmonitor_id: try: self.nc().healthmonitor_delete(healthmonitor_id) except exception.InternalError as ex: msg = _LE('Failed in deleting healthmonitor: %s.' ) % six.text_type(ex) LOG.exception(msg) return False, msg res = self._wait_for_lb_ready(lb_id) if res is False: msg = _LE('Failed in deleting healthmonitor ' '(%s).') % healthmonitor_id return False, msg pool_id = kwargs.pop('pool', None) if pool_id: try: self.nc().pool_delete(pool_id) except exception.InternalError as ex: msg = _LE('Failed in deleting lb pool: %s.' ) % six.text_type(ex) LOG.exception(msg) return False, msg res = self._wait_for_lb_ready(lb_id) if res is False: msg = _LE('Failed in deleting pool (%s).') % pool_id return False, msg listener_id = kwargs.pop('listener', None) if listener_id: try: self.nc().listener_delete(listener_id) except exception.InternalError as ex: msg = _LE('Failed in deleting listener: %s.' ) % six.text_type(ex) LOG.exception(msg) return False, msg res = self._wait_for_lb_ready(lb_id) if res is False: msg = _LE('Failed in deleting listener (%s).') % listener_id return False, msg self.nc().loadbalancer_delete(lb_id) res = self._wait_for_lb_ready(lb_id, ignore_not_found=True) if res is False: msg = _LE('Failed in deleting loadbalancer (%s).') % lb_id return False, msg return True, _('LB deletion succeeded')
def node_lock_acquire(context, node_id, action_id, engine=None, forced=False): """Try to lock the specified node. :param context: the context used for DB operations; :param node_id: ID of the node to be locked. :param action_id: ID of the action that attempts to lock the node. :param engine: ID of the engine that attempts to lock the node. :param forced: set to True to cancel current action that owns the lock, if any. :returns: True if lock is acquired, or False otherwise. """ # Step 1: try lock the node - if the returned owner_id is the # action id, it was a success owner = db_api.node_lock_acquire(node_id, action_id) if action_id == owner: return True # Step 2: retry using global configuration options retries = cfg.CONF.lock_retry_times retry_interval = cfg.CONF.lock_retry_interval while retries > 0: scheduler.sleep(retry_interval) LOG.debug('Acquire lock for node %s again' % node_id) owner = db_api.node_lock_acquire(node_id, action_id) if action_id == owner: return True retries = retries - 1 # Step 3: Last resort is 'forced locking', only needed when retry failed if forced: owner = db_api.node_lock_steal(node_id, action_id) return action_id == owner # if this node lock by dead engine action = db_api.action_get(context, owner) if (action and action.owner and action.owner != engine and is_engine_dead(context, action.owner)): LOG.info(_LI('The node %(n)s is locked by dead action %(a)s, ' 'try to steal the lock.'), { 'n': node_id, 'a': owner }) reason = _('Engine died when executing this action.') db_api.action_mark_failed(context, action.id, time.time(), reason=reason) db_api.node_lock_steal(node_id, action_id) return True LOG.error(_LE('Node is already locked by action %(old)s, ' 'action %(new)s failed grabbing the lock'), {'old': owner, 'new': action_id}) return False
def do_create(self, context, **kwargs): '''Additional logic at the beginning of cluster creation process. Set cluster status to CREATING. ''' if self.status != self.INIT: LOG.error(_LE('Cluster is in status "%s"'), self.status) return False self.set_status(context, self.CREATING, reason='Creation in progress') return True
def cluster_lock_acquire(context, cluster_id, action_id, engine=None, scope=CLUSTER_SCOPE, forced=False): """Try to lock the specified cluster. :param cluster_id: ID of the cluster to be locked. :param action_id: ID of the action which wants to lock the cluster. :param engine: ID of the engine which wants to lock the cluster. :param scope: scope of lock, could be cluster wide lock, or node-wide lock. :param forced: set to True to cancel current action that owns the lock, if any. :returns: True if lock is acquired, or False otherwise. """ # Step 1: try lock the cluster - if the returned owner_id is the # action id, it was a success owners = cl_obj.ClusterLock.acquire(cluster_id, action_id, scope) if action_id in owners: return True # Step 2: Last resort is 'forced locking', only needed when retry failed if forced: owners = cl_obj.ClusterLock.steal(cluster_id, action_id) return action_id in owners # Step 3: check if the owner is a dead engine, if so, steal the lock. # Will reach here only because scope == CLUSTER_SCOPE action = ao.Action.get(context, owners[0]) if (action and action.owner and action.owner != engine and utils.is_engine_dead(context, action.owner)): LOG.info( _LI('The cluster %(c)s is locked by dead action %(a)s, ' 'try to steal the lock.'), { 'c': cluster_id, 'a': owners[0] }) reason = _('Engine died when executing this action.') owners = cl_obj.ClusterLock.steal(cluster_id, action_id) # Mark the old action to failed. ao.Action.mark_failed(context, action.id, time.time(), reason) return action_id in owners LOG.error( _LE('Cluster is already locked by action %(old)s, ' 'action %(new)s failed grabbing the lock'), { 'old': str(owners), 'new': action_id }) return False
def error(context, entity, action, status=None, status_reason=None, timestamp=None): timestamp = timestamp or timeutils.utcnow(True) event = Event(timestamp, logging.ERROR, entity, action=action, status=status, status_reason=status_reason, user=context.user, project=context.project) event.store(context) LOG.error(_LE('%(name)s [%(id)s] %(action)s - %(status)s: %(reason)s'), {'name': event.oname, 'id': event.oid and event.oid[:8], 'action': action, 'status': status, 'reason': status_reason})
def member_remove(self, lb_id, pool_id, member_id): """Delete a member from Neutron lbaas pool. :param lb_id: The ID of the loadbalancer the operation is targeted at; :param pool_id: The ID of the pool from which the member is deleted; :param member_id: The ID of the LB member. :returns: True if the operation succeeded or False if errors occurred. """ try: self.nc().pool_member_delete(pool_id, member_id) except exception.InternalError as ex: msg = _LE('Failed in removing member %(m)s from pool %(p)s: ' '%(ex)s') % {'m': member_id, 'p': pool_id, 'ex': six.text_type(ex)} LOG.exception(msg) return None res = self._wait_for_lb_ready(lb_id) if res is False: LOG.error(_LE('Failed in deleting pool member (%s).') % member_id) return None return True
def _verify_and_respawn_children(self, pid, status): if len(self.stale_children) == 0: LOG.debug('No stale children') if os.WIFEXITED(status) and os.WEXITSTATUS(status) != 0: LOG.error(_LE('Not respawning child %d, cannot ' 'recover from termination'), pid) if not self.children and not self.stale_children: LOG.info(_LI('All workers have terminated. Exiting')) self.running = False else: if len(self.children) < self.conf.workers: self.run_child()
def validate_for_update(self, new_profile): non_updatables = [] for (k, v) in new_profile.properties.items(): if self.properties.get(k, None) != v: if not self.properties_schema[k].updatable: non_updatables.append(k) if not non_updatables: return True msg = ", ".join(non_updatables) LOG.error(_LE("The following properties are not updatable: %s.") % msg) return False
def validate_for_update(self, new_profile): non_updatables = [] for (k, v) in new_profile.properties.items(): if self.properties.get(k, None) != v: if not self.properties_schema[k].updatable: non_updatables.append(k) if not non_updatables: return True msg = ", ".join(non_updatables) LOG.error(_LE("The following properties are not updatable: %s." ) % msg) return False
def _verify_and_respawn_children(self, pid, status): if len(self.stale_children) == 0: LOG.debug('No stale children') if os.WIFEXITED(status) and os.WEXITSTATUS(status) != 0: LOG.error( _LE('Not respawning child %d, cannot ' 'recover from termination'), pid) if not self.children and not self.stale_children: LOG.info(_LI('All workers have terminated. Exiting')) self.running = False else: if len(self.children) < self.conf.workers: self.run_child()
def read_global_environment(self): '''Read and parse global environment files.''' cfg.CONF.import_opt('environment_dir', 'senlin.common.config') env_dir = cfg.CONF.environment_dir try: files = glob.glob(os.path.join(env_dir, '*')) except OSError as ex: LOG.error(_LE('Failed to read %s'), env_dir) LOG.exception(ex) return for fname in files: try: with open(fname) as f: LOG.info(_LI('Loading environment from %s'), fname) self.load(self.parse(f.read())) except ValueError as vex: LOG.error(_LE('Failed to parse %s'), fname) LOG.exception(six.text_type(vex)) except IOError as ioex: LOG.error(_LE('Failed to read %s'), fname) LOG.exception(six.text_type(ioex))
def member_remove(self, lb_id, pool_id, member_id): """Delete a member from Neutron lbaas pool. :param lb_id: The ID of the loadbalancer the operation is targeted at; :param pool_id: The ID of the pool from which the member is deleted; :param member_id: The ID of the LB member. :returns: True if the operation succeeded or False if errors occurred. """ try: self.nc().pool_member_delete(pool_id, member_id) except exception.InternalError as ex: msg = _LE('Failed in removing member %(m)s from pool %(p)s: ' '%(ex)s') % {'m': member_id, 'p': pool_id, 'ex': six.text_type(ex)} LOG.exception(msg) EVENT.warning(oslo_context.get_current(), self, 'POOL_MEMBER_DELETE', 'ERROR', msg) return None res = self._wait_for_lb_ready(lb_id) if res is False: LOG.error(_LE('Failed in deleting pool member (%s).') % member_id) return None return True
def __init__(self, **kwargs): self.kwargs = kwargs try: self.message = self.msg_fmt % kwargs except KeyError: # exc_info = sys.exc_info() # if kwargs doesn't match a variable in the message # log the issue and the kwargs LOG.exception(_LE('Exception in string format operation')) for name, value in six.iteritems(kwargs): LOG.error("%s: %s" % (name, value)) # noqa if _FATAL_EXCEPTION_FORMAT_ERRORS: raise
def node_lock_acquire(context, node_id, action_id, engine=None, forced=False): """Try to lock the specified node. :param context: the context used for DB operations; :param node_id: ID of the node to be locked. :param action_id: ID of the action that attempts to lock the node. :param engine: ID of the engine that attempts to lock the node. :param forced: set to True to cancel current action that owns the lock, if any. :returns: True if lock is acquired, or False otherwise. """ # Step 1: try lock the node - if the returned owner_id is the # action id, it was a success owner = nl_obj.NodeLock.acquire(node_id, action_id) if action_id == owner: return True # Step 2: Last resort is 'forced locking', only needed when retry failed if forced: owner = nl_obj.NodeLock.steal(node_id, action_id) return action_id == owner # Step 3: Try to steal a lock if it's owner is a dead engine. # if this node lock by dead engine action = ao.Action.get(context, owner) if (action and action.owner and action.owner != engine and utils.is_engine_dead(context, action.owner)): LOG.info( _LI('The node %(n)s is locked by dead action %(a)s, ' 'try to steal the lock.'), { 'n': node_id, 'a': owner }) reason = _('Engine died when executing this action.') nl_obj.NodeLock.steal(node_id, action_id) ao.Action.mark_failed(context, action.id, time.time(), reason) return True LOG.error( _LE('Node is already locked by action %(old)s, ' 'action %(new)s failed grabbing the lock'), { 'old': owner, 'new': action_id }) return False
def wait_on_children(self): while self.running: try: pid, status = os.wait() if os.WIFEXITED(status) or os.WIFSIGNALED(status): self.LOG.error(_LE('Removing dead child %s') % pid) self.children.remove(pid) self.run_child() except OSError as err: if err.errno not in (errno.EINTR, errno.ECHILD): raise except KeyboardInterrupt: self.LOG.info(_LI('Caught keyboard interrupt. Exiting.')) os.killpg(0, signal.SIGTERM) break eventlet.greenio.shutdown_safe(self.sock) self.sock.close() self.LOG.debug('Exited')
def __init__(self, **kwargs): self.kwargs = kwargs try: self.message = self.msg_fmt % kwargs # if last char is '.', wipe out redundant '.' if self.message[-1] == '.': self.message = self.message.rstrip('.') + '.' except KeyError: # exc_info = sys.exc_info() # if kwargs doesn't match a variable in the message # log the issue and the kwargs LOG.exception(_LE('Exception in string format operation')) for name, value in kwargs.items(): LOG.error("%s: %s" % (name, value)) # noqa if _FATAL_EXCEPTION_FORMAT_ERRORS: raise
def do_create(self, context): if self.status != self.INIT: LOG.error(_LE('Node is in status "%s"'), self.status) return False self.set_status(context, self.CREATING, reason='Creation in progress') event_mod.info(context, self, 'create') try: physical_id = profile_base.Profile.create_object(context, self) except exception.InternalError as ex: self._handle_exception(context, 'create', self.ERROR, ex) return False if not physical_id: return False status_reason = 'Creation succeeded' self.set_status(context, self.ACTIVE, status_reason) self.physical_id = physical_id self.store(context) return True
def _dump(level, action, phase, reason, timestamp): global dispatchers if timestamp is None: timestamp = timeutils.utcnow(True) # We check the logging level threshold only when debug is False if cfg.CONF.debug is False: watermark = cfg.CONF.dispatchers.priority.upper() bound = consts.EVENT_LEVELS.get(watermark, logging.INFO) if level < bound: return try: dispatchers.map_method("dump", level, action, phase=phase, reason=reason, timestamp=timestamp) except Exception as ex: LOG.exception(_LE("Dispatcher failed to handle the event: %s"), six.text_type(ex))
def do_create(self, context): if self.status != self.INIT: LOG.error(_LE('Node is in status "%s"'), self.status) return False self.set_status(context, self.CREATING, reason='Creation in progress') event_mod.info(context, self, 'create') physical_id = profile_base.Profile.create_object(context, self) if not physical_id: return False if self.cluster_id is not None: self.index = db_api.cluster_get_next_index(context, self.cluster_id) self.physical_id = physical_id self.created_time = datetime.datetime.utcnow() self.status = self.ACTIVE self.status_reason = 'Creation succeeded' self.store(context) return True
def do_create(self, context): if self.status != consts.NS_INIT: LOG.error(_LE('Node is in status "%s"'), self.status) return False self.set_status(context, consts.NS_CREATING, _('Creation in progress')) try: physical_id = pb.Profile.create_object(context, self) except exc.EResourceCreation as ex: physical_id = ex.resource_id self.set_status(context, consts.NS_ERROR, six.text_type(ex), physical_id=physical_id) return False self.set_status(context, consts.NS_ACTIVE, _('Creation succeeded'), physical_id=physical_id) return True
def do_recover(self, obj, **options): """Default recover operation. :param obj: The node object to operate on. :param options: Keyword arguments for the recover operation. """ operation = options.get('operation', None) if operation and operation != consts.RECOVER_RECREATE: LOG.error(_LE("Recover operation not supported: %s"), operation) return False res = self.do_delete(obj) if res: try: res = self.do_create(obj) except Exception as ex: LOG.exception(_('Failed at recovering obj: %s '), six.text_type(ex)) return False return res
def cluster_lock_acquire(cluster_id, action_id, scope=CLUSTER_SCOPE, forced=False): """Try to lock the specified cluster. :param cluster_id: ID of the cluster to be locked. :param action_id: ID of the action which wants to lock the cluster. :param scope: scope of lock, could be cluster wide lock, or node-wide lock. :param forced: set to True to cancel current action that owns the lock, if any. :returns: True if lock is acquired, or False otherwise. """ # Step 1: try lock the cluster - if the returned owner_id is the # action id, it was a success owners = db_api.cluster_lock_acquire(cluster_id, action_id, scope) if action_id in owners: return True # Step 2: retry using global configuration options retries = cfg.CONF.lock_retry_times retry_interval = cfg.CONF.lock_retry_interval while retries > 0: scheduler.sleep(retry_interval) owners = db_api.cluster_lock_acquire(cluster_id, action_id, scope) if action_id in owners: return True retries = retries - 1 # Step 3: Last resort is 'forced locking', only needed when retry failed if forced: owners = db_api.cluster_lock_steal(cluster_id, action_id) return action_id in owners LOG.error(_LE('Cluster is already locked by action %(old)s, ' 'action %(new)s failed grabbing the lock'), {'old': str(owners), 'new': action_id}) return False
def pre_op(self, cluster_id, action): """Callback function when new nodes are to be created for a cluster. :param cluster_id: ID of the target cluster. :param action: The action that triggers this policy check. """ pd = action.data.get('creation', {}) if pd: count = pd.get('count', 1) else: # If no scaling policy is attached, use the input count directly count = action.inputs.get('count', 1) cluster = cluster_mod.Cluster.load(action.context, cluster_id) zones = self._validate_zones(cluster) if len(zones) == 0: action.data['status'] = base.CHECK_ERROR action.data['reason'] = _('No availability zone found available.') LOG.error(_LE('No availability zone found available.')) return # Calculate AZ distribution for exiting nodes current_dist = self._get_current_dist(action.context, zones, cluster) # Calculate placement plan for new nodes plan = self._create_plan(current_dist, zones, count) placement = action.data.get('placement', {}) placement['count'] = count placement['placements'] = [] for az, count in plan.items(): if count > 0: entry = {'zone': az} placement['placements'].extend([entry] * count) action.data.update({'placement': placement}) return
def _wait_for_lb_ready(self, lb_id, timeout=60, ignore_not_found=False): """Keep waiting until loadbalancer is ready This method will keep waiting until loadbalancer resource specified by lb_id becomes ready, i.e. its provisioning_status is ACTIVE and its operating_status is ONLINE. :param lb_id: ID of the load-balancer to check. :param timeout: timeout in seconds. :param ignore_not_found: if set to True, nonexistent loadbalancer resource is also an acceptable result. """ waited = 0 while waited < timeout: try: lb = self.nc().loadbalancer_get(lb_id) except exception.InternalError as ex: msg = _LE('Failed in getting loadbalancer: %s.' ) % six.text_type(ex) LOG.exception(msg) EVENT.warning(oslo_context.get_current(), self, 'LB_GET', 'ERROR', msg) return False if lb is None: lb_ready = ignore_not_found else: lb_ready = ((lb.provisioning_status == 'ACTIVE') and (lb.operating_status == 'ONLINE')) if lb_ready is True: return True LOG.debug(_('Waiting for loadbalancer %(lb)s to become ready'), {'lb': lb_id}) eventlet.sleep(2) waited += 2 return False
def _wait_for_lb_ready(self, lb_id, timeout=60, ignore_not_found=False): """Keep waiting until loadbalancer is ready This method will keep waiting until loadbalancer resource specified by lb_id becomes ready, i.e. its provisioning_status is ACTIVE and its operating_status is ONLINE. :param lb_id: ID of the load-balancer to check. :param timeout: timeout in seconds. :param ignore_not_found: if set to True, nonexistent loadbalancer resource is also an acceptable result. """ waited = 0 while waited < timeout: try: lb = self.nc().loadbalancer_get(lb_id) except exception.InternalError as ex: msg = _LE('Failed in getting loadbalancer: %s.' ) % six.text_type(ex) LOG.exception(msg) return False if lb is None: lb_ready = ignore_not_found else: lb_ready = ((lb.provisioning_status == 'ACTIVE') and (lb.operating_status == 'ONLINE')) if lb_ready is True: return True LOG.debug(_('Waiting for loadbalancer %(lb)s to become ready'), {'lb': lb_id}) eventlet.sleep(2) waited += 2 return False
def do_check(self, obj): """Check stack status. :param obj: Node object to operate. :returns: True if check succeeded, or False otherwise. """ stack_id = obj.physical_id if stack_id is None: return False hc = self.orchestration(obj) try: # Timeout = None means we will use the 'default_action_timeout' # It can be overridden by the TIMEOUT profile propertie timeout = None if self.properties[self.TIMEOUT]: timeout = self.properties[self.TIMEOUT] * 60 hc.stack_check(stack_id) hc.wait_for_stack(stack_id, 'CHECK_COMPLETE', timeout=timeout) except exc.InternalError as ex: LOG.error(_LE('Failed in checking stack: %s.'), ex) return False return True
def signal(self, cmd): """Send a signal to the action. :param cmd: One of the command word defined in self.COMMANDS. :returns: None """ if cmd not in self.COMMANDS: return if cmd == self.SIG_CANCEL: expected = (self.INIT, self.WAITING, self.READY, self.RUNNING) elif cmd == self.SIG_SUSPEND: expected = (self.RUNNING) else: # SIG_RESUME expected = (self.SUSPENDED) if self.status not in expected: LOG.error( _LE("Action (%(id)s) is in status (%(actual)s) while " "expected status should be one of (%(expected)s)."), dict(id=self.id[:8], expected=expected, actual=self.status)) return ao.Action.signal(self.context, self.id, cmd)
def log_exception(err, exc_info): args = {'exc_info': exc_info} if cfg.CONF.verbose or cfg.CONF.debug else {} logging.error(_LE("Unexpected error occurred serving API: %s") % err, **args)
def attach(self, cluster): """Routine to be invoked when policy is to be attached to a cluster. :para cluster: The target cluster to attach to; :returns: When the operation was successful, returns a tuple (True, message); otherwise, return a tuple (False, error). """ res, data = super(AffinityPolicy, self).attach(cluster) if res is False: return False, data data = {'inherited_group': False} nc = self.nova(cluster) group = self.properties.get(self.SERVER_GROUP) # guess servergroup name group_name = group.get(self.GROUP_NAME, None) if group_name is None: profile = cluster.rt['profile'] if 'scheduler_hints' in profile.spec: hints = profile.spec['scheduler_hints'] group_name = hints.get('group', None) if group_name: try: server_group = nc.find_server_group(group_name, True) except exception.InternalError as ex: msg = _("Failed in retrieving servergroup '%s'.") % group_name LOG.exception( _LE('%(msg)s: %(ex)s') % { 'msg': msg, 'ex': six.text_type(ex) }) return False, msg if server_group: # Check if the policies match policies = group.get(self.GROUP_POLICIES) if policies and policies != server_group.policies[0]: msg = _( "Policies specified (%(specified)s) doesn't match " "that of the existing servergroup (%(existing)s).") % { 'specified': policies, 'existing': server_group.policies[0] } return False, msg data['servergroup_id'] = server_group.id data['inherited_group'] = True if not data['inherited_group']: # create a random name if necessary if not group_name: group_name = 'server_group_%s' % utils.random_name() try: server_group = nc.create_server_group( name=group_name, policies=[group.get(self.GROUP_POLICIES)]) except Exception as ex: msg = _('Failed in creating servergroup.') LOG.exception( _LE('%(msg)s: %(ex)s') % { 'msg': msg, 'ex': six.text_type(ex) }) return False, msg data['servergroup_id'] = server_group.id policy_data = self._build_policy_data(data) return True, policy_data
def lb_create(self, vip, pool, hm=None): """Create a LBaaS instance :param vip: A dict containing the properties for the VIP; :param pool: A dict describing the pool of load-balancer members. :param pool: A dict describing the health monitor. """ def _cleanup(msg, **kwargs): LOG.error(msg) self.lb_delete(**kwargs) return result = {} # Create loadblancer try: subnet = self.nc().subnet_get(vip['subnet']) except exception.InternalError as ex: msg = _LE('Failed in getting subnet: %s.') % six.text_type(ex) LOG.exception(msg) return False, msg subnet_id = subnet.id try: lb = self.nc().loadbalancer_create(subnet_id, vip.get('address', None), vip['admin_state_up']) except exception.InternalError as ex: msg = _LE('Failed in creating loadbalancer: %s.' ) % six.text_type(ex) LOG.exception(msg) return False, msg result['loadbalancer'] = lb.id result['vip_address'] = lb.vip_address res = self._wait_for_lb_ready(lb.id) if res is False: msg = _LE('Failed in creating load balancer (%s).') % lb.id del result['vip_address'] _cleanup(msg, **result) return False, msg # Create listener try: listener = self.nc().listener_create(lb.id, vip['protocol'], vip['protocol_port'], vip.get('connection_limit', None), vip['admin_state_up']) except exception.InternalError as ex: msg = _LE('Failed in creating lb listener: %s.' ) % six.text_type(ex) LOG.exception(msg) return False, msg result['listener'] = listener.id res = self._wait_for_lb_ready(lb.id) if res is False: msg = _LE('Failed in creating listener (%s).') % listener.id del result['vip_address'] _cleanup(msg, **result) return res, msg # Create pool try: pool = self.nc().pool_create(pool['lb_method'], listener.id, pool['protocol'], pool['admin_state_up']) except exception.InternalError as ex: msg = _LE('Failed in creating lb pool: %s.' ) % six.text_type(ex) LOG.exception(msg) return False, msg result['pool'] = pool.id res = self._wait_for_lb_ready(lb.id) if res is False: msg = _LE('Failed in creating pool (%s).') % pool.id del result['vip_address'] _cleanup(msg, **result) return res, msg if not hm: return True, result if not hm: return True, result # Create health monitor try: health_monitor = self.nc().healthmonitor_create( hm['type'], hm['delay'], hm['timeout'], hm['max_retries'], pool.id, hm['admin_state_up'], hm['http_method'], hm['url_path'], hm['expected_codes']) except exception.InternalError as ex: msg = _LE('Failed in creating lb health monitor: %s.' ) % six.text_type(ex) LOG.exception(msg) return False, msg result['healthmonitor'] = health_monitor.id res = self._wait_for_lb_ready(lb.id) if res is False: msg = _LE('Failed in creating health monitor (%s).' ) % health_monitor.id del result['vip_address'] _cleanup(msg, **result) return res, msg return True, result