def read_global_environment(env, env_dir=None): if env_dir is None: cfg.CONF.import_opt('environment_dir', 'heat.common.config') env_dir = cfg.CONF.environment_dir try: env_files = glob.glob(os.path.join(env_dir, '*')) except OSError as osex: LOG.error(_LE('Failed to read %s'), env_dir) LOG.exception(osex) return for file_path in env_files: try: with open(file_path) as env_fd: LOG.info(_LI('Loading %s'), file_path) env_body = env_fmt.parse(env_fd.read()) env_fmt.default_for_missing(env_body) env.load(env_body) except ValueError as vex: LOG.error(_LE('Failed to parse %(file_path)s'), { 'file_path': file_path}) LOG.exception(vex) except IOError as ioex: LOG.error(_LE('Failed to read %(file_path)s'), { 'file_path': file_path}) LOG.exception(ioex)
def restart_resource(self, resource_name): ''' stop resource_name and all that depend on it start resource_name and all that depend on it ''' deps = self.dependencies[self[resource_name]] failed = False for res in reversed(deps): try: scheduler.TaskRunner(res.destroy)() except exception.ResourceFailure as ex: failed = True LOG.error(_LE('Resource %(name)s delete failed: %(ex)s'), {'name': res.name, 'ex': ex}) for res in deps: if not failed: try: res.state_reset() scheduler.TaskRunner(res.create)() except exception.ResourceFailure as ex: LOG.exception(_LE('Resource %(name)s create failed: ' '%(ex)s') % {'name': res.name, 'ex': ex}) failed = True else: res.state_set(res.CREATE, res.FAILED, 'Resource restart aborted')
def get_plug_point_class_instances(): ''' Get list of instances of classes that (may) implement pre and post stack operation methods. The list of class instances is sorted using get_ordinal methods on the plug point classes. If class1.ordinal() < class2.ordinal(), then class1 will be before before class2 in the list. ''' global pp_class_instances if pp_class_instances is None: pp_class_instances = [] pp_classes = [] try: slps = resources.global_env().get_stack_lifecycle_plugins() pp_classes = [cls for name, cls in slps] except Exception: LOG.exception(_LE("failed to get lifecycle plug point classes")) for ppc in pp_classes: try: pp_class_instances.append(ppc()) except Exception: LOG.exception( _LE("failed to instantiate stack lifecycle class %s"), ppc) try: pp_class_instances = sorted(pp_class_instances, key=lambda ppci: ppci.get_ordinal()) except Exception: LOG.exception(_LE("failed to sort lifecycle plug point classes")) return pp_class_instances
def _v2_client_init(self): kwargs = { 'auth_url': self.context.auth_url, 'endpoint': self.context.auth_url, 'region_name': cfg.CONF.region_name_for_services } if self.context.region_name is not None: kwargs['region_name'] = self.context.region_name auth_kwargs = {} # Note try trust_id first, as we can't reuse auth_token in that case if self.context.trust_id is not None: # We got a trust_id, so we use the admin credentials # to authenticate, then re-scope the token to the # trust impersonating the trustor user. # Note that this currently requires the trustor tenant_id # to be passed to the authenticate(), unlike the v3 call kwargs.update(self._service_admin_creds()) auth_kwargs['trust_id'] = self.context.trust_id auth_kwargs['tenant_id'] = self.context.tenant_id elif self.context.auth_token is not None: kwargs['tenant_name'] = self.context.project_name kwargs['token'] = self.context.auth_token elif self.context.password is not None: kwargs['username'] = self.context.username kwargs['password'] = self.context.password kwargs['tenant_name'] = self.context.project_name kwargs['tenant_id'] = self.context.tenant_id else: LOG.error(_LE("Keystone v2 API connection failed, no password " "or auth_token!")) raise exception.AuthorizationFailure() kwargs['cacert'] = self._get_client_option('ca_file') kwargs['insecure'] = self._get_client_option('insecure') kwargs['cert'] = self._get_client_option('cert_file') kwargs['key'] = self._get_client_option('key_file') client = kc.Client(**kwargs) client.authenticate(**auth_kwargs) # If we are authenticating with a trust auth_kwargs are set, so set # the context auth_token with the re-scoped trust token if auth_kwargs: # Sanity check if not client.auth_ref.trust_scoped: LOG.error(_LE("v2 trust token re-scoping failed!")) raise exception.AuthorizationFailure() # All OK so update the context with the token self.context.auth_token = client.auth_ref.auth_token self.context.auth_url = kwargs.get('auth_url') # Ensure the v2 API we're using is not impacted by keystone # bug #1239303, otherwise we can't trust the user_id if self.context.trustor_user_id != client.auth_ref.user_id: LOG.error(_LE("Trust impersonation failed, bug #1239303 " "suspected, you may need a newer keystone")) raise exception.AuthorizationFailure() return client
def _get_domain_id_from_name(self, domain_name): domains = self.domain_admin_client.domains.list(name=domain_name) if len(domains) == 1: return domains[0].id elif len(domains) == 0: msg = _LE('Can\'t find domain id for %s!') LOG.error(msg, domain_name) raise exception.Error(msg % domain_name) else: msg = _LE('Multiple domain ids were found for %s!') LOG.error(msg, domain_name) raise exception.Error(msg % domain_name)
def _get_domain_id_from_name(self, domain_name): domains = self.domain_admin_client.domains.list(name=domain_name) if len(domains) == 1: return domains[0].id elif len(domains) == 0: LOG.error(_LE("Can't find domain id for %(domain)s!"), { 'domain': domain_name}) raise exception.Error(_("Failed to find domain %s") % domain_name) else: LOG.error(_LE("Unexpected response looking for %(domain)s!"), { 'domain': domain_name}) raise exception.Error(_("Unexpected response looking for " "domain %s") % domain_name)
def check_stack_watches(self, sid): # Retrieve the stored credentials & create context # Require tenant_safe=False to the stack_get to defeat tenant # scoping otherwise we fail to retrieve the stack LOG.debug("Periodic watcher task for stack %s" % sid) admin_context = context.get_admin_context() db_stack = stack_object.Stack.get_by_id(admin_context, sid, tenant_safe=False, eager_load=True) if not db_stack: LOG.error(_LE("Unable to retrieve stack %s for periodic task"), sid) return stk = stack.Stack.load(admin_context, stack=db_stack, use_stored_context=True) # recurse into any nested stacks. children = stack_object.Stack.get_all_by_owner_id(admin_context, sid) for child in children: self.check_stack_watches(child.id) # Get all watchrules for this stack and evaluate them try: wrs = watch_rule_object.WatchRule.get_all_by_stack(admin_context, sid) except Exception as ex: LOG.warn(_LW("periodic_task db error watch rule removed? %(ex)s"), ex) return def run_alarm_action(stk, actions, details): for action in actions: action(details=details) for res in six.itervalues(stk): res.metadata_update() for wr in wrs: rule = watchrule.WatchRule.load(stk.context, watch=wr) actions = rule.evaluate() if actions: self.thread_group_mgr.start(sid, run_alarm_action, stk, actions, rule.get_details())
def kill_children(self, *args): """Kills the entire process group.""" LOG.error(_LE('SIGTERM received')) signal.signal(signal.SIGTERM, signal.SIG_IGN) signal.signal(signal.SIGINT, signal.SIG_IGN) self.running = False os.killpg(0, signal.SIGTERM)
def _store_or_update(self, action, status, reason): self.action = action self.status = status self.status_reason = reason if self.id is not None: try: rs = db_api.resource_get(self.context, self.id) rs.update_and_save({ 'action': self.action, 'status': self.status, 'status_reason': reason, 'stack_id': self.stack.id, 'updated_at': self.updated_time, 'properties_data': self._stored_properties_data, 'nova_instance': self.resource_id}) except Exception as ex: LOG.error(_LE('DB error %s'), ex) # store resource in DB on transition to CREATE_IN_PROGRESS # all other transitions (other than to DELETE_COMPLETE) # should be handled by the update_and_save above.. elif (action, status) in [(self.CREATE, self.IN_PROGRESS), (self.ADOPT, self.IN_PROGRESS)]: self._store()
def _create_auth_plugin(self): if self.trust_id: importutils.import_module('keystonemiddleware.auth_token') username = cfg.CONF.keystone_authtoken.admin_user password = cfg.CONF.keystone_authtoken.admin_password return v3.Password(username=username, password=password, user_domain_id='default', auth_url=self._keystone_v3_endpoint, trust_id=self.trust_id) if self.auth_token_info: auth_ref = access.AccessInfo.factory(body=self.auth_token_info, auth_token=self.auth_token) return _AccessInfoPlugin(self._keystone_v3_endpoint, auth_ref) if self.auth_token: # FIXME(jamielennox): This is broken but consistent. If you # only have a token but don't load a service catalog then # url_for wont work. Stub with the keystone endpoint so at # least it might be right. return token_endpoint.Token(endpoint=self._keystone_v3_endpoint, token=self.auth_token) if self.password: return v3.Password(username=self.username, password=self.password, project_id=self.tenant_id, user_domain_id='default', auth_url=self._keystone_v3_endpoint) LOG.error(_LE("Keystone v3 API connection failed, no password " "trust or auth_token!")) raise exception.AuthorizationFailure()
def signal(self, details=None): ''' signal the resource. Subclasses should provide a handle_signal() method to implement the signal, the base-class raise an exception if no handler is implemented. ''' if self.action in (self.SUSPEND, self.DELETE): self._add_event(self.action, self.status, 'Cannot signal resource during %s' % self.action) ex = Exception(_('Cannot signal resource during %s') % self.action) raise exception.ResourceFailure(ex, self) def get_string_details(): if details is None: return 'No signal details provided' if isinstance(details, six.string_types): return details if isinstance(details, dict): if all(k in details for k in ('previous', 'current', 'reason')): # this is from Ceilometer. auto = '%(previous)s to %(current)s (%(reason)s)' % details return 'alarm state changed from %s' % auto elif 'state' in details: # this is from watchrule return 'alarm state changed to %(state)s' % details return 'Unknown' # Clear the hook without interfering with resources' # `handle_signal` callbacks: if (details and 'unset_hook' in details and environment.valid_hook_type(details.get('unset_hook'))): hook = details['unset_hook'] if self.has_hook(hook): self.clear_hook(hook) LOG.info(_LI('Clearing %(hook)s hook on %(resource)s'), {'hook': hook, 'resource': six.text_type(self)}) self._add_event(self.action, self.status, "Hook %s is cleared" % hook) return if not callable(getattr(self, 'handle_signal', None)): raise exception.ResourceActionNotSupported(action='signal') try: signal_result = self.handle_signal(details) if signal_result: reason_string = "Signal: %s" % signal_result else: reason_string = get_string_details() self._add_event('SIGNAL', self.status, reason_string) except NoActionRequired: # Don't log an event as it just spams the user. pass except Exception as ex: LOG.exception(_LE('signal %(name)s : %(msg)s') % {'name': six.text_type(self), 'msg': ex}) failure = exception.ResourceFailure(ex, self) raise failure
def _create_auth_plugin(self): if self.auth_token_info: access_info = access.create(body=self.auth_token_info, auth_token=self.auth_token) return access_plugin.AccessInfoPlugin( auth_ref=access_info, auth_url=self.keystone_v3_endpoint) if self.auth_token: # FIXME(jamielennox): This is broken but consistent. If you # only have a token but don't load a service catalog then # url_for wont work. Stub with the keystone endpoint so at # least it might be right. return token_endpoint.Token(endpoint=self.keystone_v3_endpoint, token=self.auth_token) if self.password: return generic.Password(username=self.username, password=self.password, project_id=self.tenant_id, user_domain_id=self.user_domain, auth_url=self.keystone_v3_endpoint) LOG.error(_LE("Keystone API connection failed, no password " "trust or auth_token!")) raise exception.AuthorizationFailure()
def adjust(self, adjustment, adjustment_type=sc_util.CFN_CHANGE_IN_CAPACITY, min_adjustment_step=None, signal=False): """Adjust the size of the scaling group if the cooldown permits.""" if not self._is_scaling_allowed(): LOG.info(_LI("%(name)s NOT performing scaling adjustment, " "cooldown %(cooldown)s"), {'name': self.name, 'cooldown': self.properties[self.COOLDOWN]}) if signal: raise exception.NoActionRequired() else: return capacity = grouputils.get_size(self) lower = self.properties[self.MIN_SIZE] upper = self.properties[self.MAX_SIZE] new_capacity = sc_util.calculate_new_capacity(capacity, adjustment, adjustment_type, min_adjustment_step, lower, upper) changed_size = new_capacity != capacity # send a notification before, on-error and on-success. notif = { 'stack': self.stack, 'adjustment': adjustment, 'adjustment_type': adjustment_type, 'capacity': capacity, 'groupname': self.FnGetRefId(), 'message': _("Start resizing the group %(group)s") % { 'group': self.FnGetRefId()}, 'suffix': 'start', } notification.send(**notif) try: self.resize(new_capacity) except Exception as resize_ex: with excutils.save_and_reraise_exception(): try: notif.update({'suffix': 'error', 'message': six.text_type(resize_ex), 'capacity': grouputils.get_size(self), }) notification.send(**notif) except Exception: LOG.exception(_LE('Failed sending error notification')) else: notif.update({ 'suffix': 'end', 'capacity': new_capacity, 'message': _("End resizing the group %(group)s") % { 'group': notif['groupname']}, }) notification.send(**notif) finally: self._finished_scaling("%s : %s" % (adjustment_type, adjustment), changed_size=changed_size) return changed_size
def _store(self, metadata=None): '''Create the resource in the database.''' try: rs = {'action': self.action, 'status': self.status, 'status_reason': self.status_reason, 'stack_id': self.stack.id, 'nova_instance': self.resource_id, 'name': self.name, 'rsrc_metadata': metadata, 'properties_data': self._stored_properties_data, 'needed_by': self.needed_by, 'requires': self.requires, 'replaces': self.replaces, 'replaced_by': self.replaced_by, 'current_template_id': self.current_template_id, 'stack_name': self.stack.name} new_rs = resource_objects.Resource.create(self.context, rs) self.id = new_rs.id self.uuid = new_rs.uuid self.created_time = new_rs.created_at self._rsrc_metadata = metadata except Exception as ex: LOG.error(_LE('DB error %s'), ex)
def _do_ops(cinstances, opname, cnxt, stack, current_stack=None, action=None, is_stack_failure=None): success_count = 0 failure = False failure_exception_message = None for ci in cinstances: op = getattr(ci, opname, None) if callable(op): try: if is_stack_failure is not None: op(cnxt, stack, current_stack, action, is_stack_failure) else: op(cnxt, stack, current_stack, action) success_count += 1 except Exception as ex: LOG.exception(_LE( "%(opname) %(ci)s failed for %(a)s on %(sid)s") % {'opname': opname, 'ci': type(ci), 'a': action, 'sid': stack.id}) failure = True failure_exception_message = ex.args[0] if ex.args else str(ex) break LOG.info(_LI("done with class=%(c)s, stackid=%(sid)s, action=%(a)s") % {'c': type(ci), 'sid': stack.id, 'a': action}) return (failure, failure_exception_message, success_count)
def create_stack_user(self, username, password=''): """Create a user defined as part of a stack. The user is defined either via template or created internally by a resource. This user will be added to the heat_stack_user_role as defined in the config. Returns the keystone ID of the resulting user. """ # FIXME(shardy): There's duplicated logic between here and # create_stack_domain user, but this function is expected to # be removed after the transition of all resources to domain # users has been completed stack_user_role = self.client.roles.list( name=cfg.CONF.heat_stack_user_role) if len(stack_user_role) == 1: role_id = stack_user_role[0].id # Create the user user = self.client.users.create( name=self._get_username(username), password=password, default_project=self.context.tenant_id) # Add user to heat_stack_user_role LOG.debug("Adding user %(user)s to role %(role)s" % { 'user': user.id, 'role': role_id}) self.client.roles.grant(role=role_id, user=user.id, project=self.context.tenant_id) else: LOG.error(_LE("Failed to add user %(user)s to role %(role)s, " "check role exists!"), { 'user': username, 'role': cfg.CONF.heat_stack_user_role}) raise exception.Error(_("Can't find role %s") % cfg.CONF.heat_stack_user_role) return user.id
def _store(self): '''Create the resource in the database.''' LOG.info("==== storing the new resource") metadata = self.metadata_get() sha = sha1() sha.update(str(self.frozen_definition())) self.rsrc_defn_hash = sha.hexdigest() rsrc_defn = json.dumps(dict(self.frozen_definition())) try: rs = {'action': self.action, 'status': self.status, 'status_reason': self.status_reason, 'stack_id': self.stack.id, 'nova_instance': self.resource_id, 'name': self.name, 'rsrc_metadata': metadata, 'properties_data': self._stored_properties_data, 'stack_name': self.stack.name, 'rsrc_defn': rsrc_defn, 'rsrc_defn_hash': self.rsrc_defn_hash, 'version': self.version} new_rs = db_api.resource_create(self.context, rs) self.id = new_rs.id self.created_time = new_rs.created_at self._rsrc_metadata = metadata except Exception as ex: LOG.error(_LE('DB error %s'), ex)
def handle_create(self): """Allocate a floating IP for the current tenant.""" ips = None if self.properties[self.DOMAIN]: ext_net = internet_gateway.InternetGateway.get_external_network_id( self.neutron()) props = {'floating_network_id': ext_net} ips = self.neutron().create_floatingip({ 'floatingip': props})['floatingip'] self.ipaddress = ips['floating_ip_address'] self.resource_id_set(ips['id']) LOG.info(_LI('ElasticIp create %s'), str(ips)) else: try: ips = self.nova().floating_ips.create() except Exception as e: with excutils.save_and_reraise_exception(): if self.client_plugin('nova').is_not_found(e): LOG.error(_LE("No default floating IP pool configured." " Set 'default_floating_pool' in " "nova.conf.")) if ips: self.ipaddress = ips.ip self.resource_id_set(ips.id) LOG.info(_LI('ElasticIp create %s'), str(ips)) instance_id = self.properties[self.INSTANCE_ID] if instance_id: server = self.nova().servers.get(instance_id) server.add_floating_ip(self._ipaddress())
def __init__(self, **kwargs): self.kwargs = kwargs try: self.message = self.msg_fmt % kwargs except KeyError: exc_info = sys.exc_info() # kwargs doesn't match a variable in the message # log the issue and the kwargs LOG.exception(_LE('Exception in string format operation')) for name, value in six.iteritems(kwargs): LOG.error(_LE("%(name)s: %(value)s"), {'name': name, 'value': value}) # noqa if _FATAL_EXCEPTION_FORMAT_ERRORS: raise_(exc_info[0], exc_info[1], exc_info[2])
def validate(self): super(RemoteStack, self).validate() try: self.heat() except Exception as ex: exc_info = dict(region=self._region_name, exc=six.text_type(ex)) msg = _('Cannot establish connection to Heat endpoint at region ' '"%(region)s" due to "%(exc)s"') % exc_info raise exception.StackValidationFailed(message=msg) try: params = self.properties[self.PARAMETERS] env = environment.get_child_environment(self.stack.env, params) tmpl = template_format.parse(self.properties[self.TEMPLATE]) args = { 'template': tmpl, 'files': self.stack.t.files, 'environment': env.user_env_as_dict(), } self.heat().stacks.validate(**args) except Exception as ex: exc_info = dict(region=self._region_name, exc=six.text_type(ex)) LOG.error(_LE('exception: %s'), type(ex)) msg = _('Failed validating stack template using Heat endpoint at ' 'region "%(region)s" due to "%(exc)s"') % exc_info raise exception.StackValidationFailed(message=msg)
def domain_admin_auth(self): if not self._domain_admin_auth: # Note we must specify the domain when getting the token # as only a domain scoped token can create projects in the domain if self._stack_domain_is_id: auth_kwargs = {'domain_id': self.stack_domain, 'user_domain_id': self.stack_domain} else: auth_kwargs = {'domain_name': self.stack_domain, 'user_domain_name': self.stack_domain} auth = kc_auth_v3.Password(username=self.domain_admin_user, password=self.domain_admin_password, auth_url=self.v3_endpoint, **auth_kwargs) # NOTE(jamielennox): just do something to ensure a valid token try: auth.get_token(self.session) except kc_exception.Unauthorized: LOG.error(_LE("Domain admin client authentication failed")) raise exception.AuthorizationFailure() self._domain_admin_auth = auth return self._domain_admin_auth
def _action_recorder(self, action, expected_exceptions=tuple()): '''Return a context manager to record the progress of an action. Upon entering the context manager, the state is set to IN_PROGRESS. Upon exiting, the state will be set to COMPLETE if no exception was raised, or FAILED otherwise. Non-exit exceptions will be translated to ResourceFailure exceptions. Expected exceptions are re-raised, with the Resource left in the IN_PROGRESS state. ''' try: self.state_set(action, self.IN_PROGRESS) yield except expected_exceptions as ex: with excutils.save_and_reraise_exception(): LOG.debug('%s', six.text_type(ex)) except Exception as ex: LOG.info('%(action)s: %(info)s', {"action": action, "info": six.text_type(self)}, exc_info=True) failure = exception.ResourceFailure(ex, self, action) self.state_set(action, self.FAILED, six.text_type(failure)) raise failure except: # noqa with excutils.save_and_reraise_exception(): try: self.state_set(action, self.FAILED, '%s aborted' % action) except Exception: LOG.exception(_LE('Error marking resource as failed')) else: self.state_set(action, self.COMPLETE)
def hup(self, *args): """ Reloads configuration files with zero down time. """ LOG.error(_LE('SIGHUP received')) signal.signal(signal.SIGHUP, signal.SIG_IGN) raise exception.SIGHUPInterrupt
def _update_software_config_transport(self, prop_diff): if not self.user_data_software_config(): return try: metadata = self.metadata_get(True) or {} self._create_transport_credentials(prop_diff) self._populate_deployments_metadata(metadata, prop_diff) # push new metadata to all sources by creating a dummy # deployment sc = self.rpc_client().create_software_config( self.context, 'ignored', 'ignored', '') sd = self.rpc_client().create_software_deployment( self.context, self.resource_id, sc['id']) self.rpc_client().delete_software_deployment( self.context, sd['id']) self.rpc_client().delete_software_config( self.context, sc['id']) except Exception: # Updating the software config transport is on a best-effort # basis as any raised exception here would result in the resource # going into an ERROR state, which will be replaced on the next # stack update. This is not desirable for a server. The old # transport will continue to work, and the new transport may work # despite exceptions in the above block. LOG.exception( _LE('Error while updating software config transport') )
def hup(*args): """ Shuts down the server(s), but allows running requests to complete """ LOG.error(_LE('SIGHUP received')) signal.signal(signal.SIGHUP, signal.SIG_IGN) os.killpg(0, signal.SIGHUP) signal.signal(signal.SIGHUP, hup)
def extract_args(params): """Extract arguments passed as parameters and return them as a dictionary. Extract any arguments passed as parameters through the API and return them as a dictionary. This allows us to filter the passed args and do type conversion where appropriate """ kwargs = {} timeout_mins = params.get(rpc_api.PARAM_TIMEOUT) if timeout_mins not in ('0', 0, None): try: timeout = int(timeout_mins) except (ValueError, TypeError): LOG.exception(_LE('Timeout conversion failed')) else: if timeout > 0: kwargs[rpc_api.PARAM_TIMEOUT] = timeout else: raise ValueError(_('Invalid timeout value %s') % timeout) name = rpc_api.PARAM_DISABLE_ROLLBACK if name in params: disable_rollback = param_utils.extract_bool(name, params[name]) kwargs[name] = disable_rollback name = rpc_api.PARAM_SHOW_DELETED if name in params: params[name] = param_utils.extract_bool(name, params[name]) adopt_data = params.get(rpc_api.PARAM_ADOPT_STACK_DATA) if adopt_data: try: adopt_data = template_format.simple_parse(adopt_data) except ValueError as exc: raise ValueError(_('Invalid adopt data: %s') % exc) kwargs[rpc_api.PARAM_ADOPT_STACK_DATA] = adopt_data tags = params.get(rpc_api.PARAM_TAGS) if tags: if not isinstance(tags, list): raise ValueError(_('Invalid tags, not a list: %s') % tags) for tag in tags: if not isinstance(tag, six.string_types): raise ValueError(_('Invalid tag, "%s" is not a string') % tag) if len(tag) > 80: raise ValueError(_('Invalid tag, "%s" is longer than 80 ' 'characters') % tag) # Comma is not allowed as per the API WG tagging guidelines if ',' in tag: raise ValueError(_('Invalid tag, "%s" contains a comma') % tag) kwargs[rpc_api.PARAM_TAGS] = tags return kwargs
def update_with_template(self, child_template, user_params=None, timeout_mins=None): """Update the nested stack with the new template.""" if self.id is None: self._store() nested_stack = self.nested() if nested_stack is None: # if the create failed for some reason and the nested # stack was not created, we need to create an empty stack # here so that the update will work. def _check_for_completion(creator_fn): while not self.check_create_complete(creator_fn): yield empty_temp = template_format.parse( "heat_template_version: '2013-05-23'") stack_creator = self.create_with_template(empty_temp, {}) checker = scheduler.TaskRunner(_check_for_completion, stack_creator) checker(timeout=self.stack.timeout_secs()) if stack_creator is not None: stack_creator.run_to_completion() nested_stack = self.nested() if timeout_mins is None: timeout_mins = self.stack.timeout_mins if user_params is None: user_params = self.child_params() child_env = environment.get_child_environment( self.stack.env, user_params, child_resource_name=self.name, item_to_remove=self.resource_info) parsed_template = self._child_parsed_template(child_template, child_env) cookie = {'previous': { 'updated_at': nested_stack.updated_time, 'state': nested_stack.state}} args = {rpc_api.PARAM_TIMEOUT: timeout_mins} try: self.rpc_client().update_stack( self.context, nested_stack.identifier(), parsed_template.t, child_env.user_env_as_dict(), parsed_template.files, args) except Exception as ex: LOG.exception(_LE('update_stack')) self.raise_local_exception(ex) return cookie
def stop(self): # Stop rpc connection at first for preventing new requests LOG.info(_LI("Stopping %(topic)s in engine %(engine)s."), {"topic": self.topic, "engine": self.engine_id}) try: self._rpc_server.stop() self._rpc_server.wait() except Exception as e: LOG.error(_LE("%(topic)s is failed to stop, %(exc)s"), {"topic": self.topic, "exc": e}) super(WorkerService, self).stop()
def stop(self): # Stop rpc connection at first for preventing new requests LOG.info(_LI("Stopping WorkerService ...")) try: self._rpc_server.stop() self._rpc_server.wait() except Exception as e: LOG.error(_LE("WorkerService is failed to stop, %s"), e) super(WorkerService, self).stop()
def adjust(self, adjustment, adjustment_type=CHANGE_IN_CAPACITY): """ Adjust the size of the scaling group if the cooldown permits. """ if self._cooldown_inprogress(): LOG.info(_LI("%(name)s NOT performing scaling adjustment, " "cooldown %(cooldown)s"), {'name': self.name, 'cooldown': self.properties[self.COOLDOWN]}) return capacity = grouputils.get_size(self) lower = self.properties[self.MIN_SIZE] upper = self.properties[self.MAX_SIZE] new_capacity = _calculate_new_capacity(capacity, adjustment, adjustment_type, lower, upper) if new_capacity == capacity: LOG.debug('no change in capacity %d' % capacity) return # send a notification before, on-error and on-success. notif = { 'stack': self.stack, 'adjustment': adjustment, 'adjustment_type': adjustment_type, 'capacity': capacity, 'groupname': self.FnGetRefId(), 'message': _("Start resizing the group %(group)s") % { 'group': self.FnGetRefId()}, 'suffix': 'start', } notification.send(**notif) try: self.resize(new_capacity) except Exception as resize_ex: with excutils.save_and_reraise_exception(): try: notif.update({'suffix': 'error', 'message': six.text_type(resize_ex), }) notification.send(**notif) except Exception: LOG.exception(_LE('Failed sending error notification')) else: notif.update({ 'suffix': 'end', 'capacity': new_capacity, 'message': _("End resizing the group %(group)s") % { 'group': notif['groupname']}, }) notification.send(**notif) self._cooldown_timestamp("%s : %s" % (adjustment_type, adjustment))
def format_watch_data(wd, rule_names): # Demangle DB format data into something more easily used in the API # We are expecting a dict with exactly two items, Namespace and # a metric key namespace = wd.data['Namespace'] metric = [(k, v) for k, v in wd.data.items() if k != 'Namespace'] if len(metric) == 1: metric_name, metric_data = metric[0] else: LOG.error(_LE("Unexpected number of keys in watch_data.data!")) return result = { rpc_api.WATCH_DATA_ALARM: rule_names.get(wd.watch_rule_id), rpc_api.WATCH_DATA_METRIC: metric_name, rpc_api.WATCH_DATA_TIME: heat_timeutils.isotime(wd.created_at), rpc_api.WATCH_DATA_NAMESPACE: namespace, rpc_api.WATCH_DATA: metric_data } return result
def _handle_update_portInfo(self, prop_diff): instance_id_update = prop_diff.get(self.INSTANCE_ID) ni_id_update = prop_diff.get(self.NETWORK_INTERFACE_ID) eip = self.properties[self.EIP] allocation_id = self.properties[self.ALLOCATION_ID] # if update portInfo, no need to detach the port from # old instance/floatingip. if eip: server = self.client().servers.get(instance_id_update) server.add_floating_ip(eip) else: port_id, port_rsrc = self._get_port_info(ni_id_update, instance_id_update) if not port_id or not port_rsrc: LOG.error(_LE('Port not specified.')) raise exception.NotFound( _('Failed to update, can not found ' 'port info.')) network_id = port_rsrc['network_id'] self._neutron_add_gateway_router(allocation_id, network_id) self._neutron_update_floating_ip(allocation_id, port_id)
def create_stack_user(self, username, password=''): """Create a user. User can be defined as part of a stack, either via template or created internally by a resource. This user will be added to the heat_stack_user_role as defined in the config Returns the keystone ID of the resulting user """ if len(username) > 64: LOG.warning(_LW("Truncating the username %s to the last 64 " "characters."), username) # get the last 64 characters of the username username = username[-64:] user = self.client.users.create(username, password, '*****@*****.**' % username, tenant_id=self.context.tenant_id, enabled=True) # We add the new user to a special keystone role # This role is designed to allow easier differentiation of the # heat-generated "stack users" which will generally have credentials # deployed on an instance (hence are implicitly untrusted) roles = self.client.roles.list() stack_user_role = [r.id for r in roles if r.name == cfg.CONF.heat_stack_user_role] if len(stack_user_role) == 1: role_id = stack_user_role[0] LOG.debug("Adding user %(user)s to role %(role)s" % {'user': user.id, 'role': role_id}) self.client.roles.add_user_role(user.id, role_id, self.context.tenant_id) else: LOG.error(_LE("Failed to add user %(user)s to role %(role)s, " "check role exists!"), {'user': username, 'role': cfg.CONF.heat_stack_user_role}) return user.id
def _store(self): '''Create the resource in the database.''' metadata = self.metadata_get() try: rs = { 'action': self.action, 'status': self.status, 'status_reason': self.status_reason, 'stack_id': self.stack.id, 'nova_instance': self.resource_id, 'name': self.name, 'rsrc_metadata': metadata, 'properties_data': self._stored_properties_data, 'stack_name': self.stack.name } new_rs = db_api.resource_create(self.context, rs) self.id = new_rs.id self.created_time = new_rs.created_at self._rsrc_metadata = metadata except Exception as ex: LOG.error(_LE('DB error %s'), ex)
def domain_admin_auth(self): if not self._domain_admin_auth: # Note we must specify the domain when getting the token # as only a domain scoped token can create projects in the domain auth = ks_auth.Password(username=self.domain_admin_user, password=self.domain_admin_password, auth_url=self.v3_endpoint, domain_id=self._stack_domain_id, domain_name=self.stack_domain_name, user_domain_id=self._stack_domain_id, user_domain_name=self.stack_domain_name) # NOTE(jamielennox): just do something to ensure a valid token try: auth.get_token(self.session) except ks_exception.Unauthorized: LOG.error(_LE("Domain admin client authentication failed")) raise exception.AuthorizationFailure() self._domain_admin_auth = auth return self._domain_admin_auth
def _store_or_update(self, action, status, reason): prev_action = self.action self.action = action self.status = status self.status_reason = reason data = { 'action': self.action, 'status': self.status, 'status_reason': reason, 'stack_id': self.stack.id, 'updated_at': self.updated_time, 'properties_data': self._stored_properties_data, 'needed_by': self.needed_by, 'requires': self.requires, 'replaces': self.replaces, 'replaced_by': self.replaced_by, 'current_template_id': self.current_template_id, 'nova_instance': self.resource_id } if prev_action == self.INIT: metadata = self.t.metadata() data['rsrc_metadata'] = metadata else: metadata = self._rsrc_metadata if self.id is not None: try: rs = resource_objects.Resource.get_obj(self.context, self.id) rs.update_and_save(data) except Exception as ex: LOG.error(_LE('DB error %s'), ex) else: self._rsrc_metadata = metadata else: # This should only happen in unit tests LOG.warning(_LW('Resource "%s" not pre-stored in DB'), self) self._store(metadata)
def _handle_update_eipInfo(self, prop_diff): eip_update = prop_diff.get(self.EIP) allocation_id_update = prop_diff.get(self.ALLOCATION_ID) instance_id = self.properties[self.INSTANCE_ID] ni_id = self.properties[self.NETWORK_INTERFACE_ID] if eip_update: server = self._floatingIp_detach(neutron_ignore_not_found=True) if server: # then to attach the eip_update to the instance server.add_floating_ip(eip_update) self.resource_id_set(eip_update) elif allocation_id_update: self._floatingIp_detach(nova_ignore_not_found=True) port_id, port_rsrc = self._get_port_info(ni_id, instance_id) if not port_id or not port_rsrc: LOG.error(_LE('Port not specified.')) raise exception.NotFound(_('Failed to update, can not found ' 'port info.')) network_id = port_rsrc['network_id'] self._neutron_add_gateway_router(allocation_id_update, network_id) self._neutron_update_floating_ip(allocation_id_update, port_id) self.resource_id_set(allocation_id_update)
def stop_all_workers(self, stack): # stop the traversal if stack.status == stack.IN_PROGRESS: self.stop_traversal(stack) # cancel existing workers cancelled = _cancel_workers(stack, self.thread_group_mgr, self.engine_id, self._rpc_client) if not cancelled: LOG.error( _LE("Failed to stop all workers of stack %(name)s " ", stack cancel not complete"), {'name': stack.name}) return False LOG.info( _LI('[%(name)s(%(id)s)] Stopped all active workers for stack ' '%(action)s'), { 'name': stack.name, 'id': stack.id, 'action': stack.action }) return True
def extract_args(params): ''' Extract any arguments passed as parameters through the API and return them as a dictionary. This allows us to filter the passed args and do type conversion where appropriate ''' kwargs = {} timeout_mins = params.get(rpc_api.PARAM_TIMEOUT) if timeout_mins not in ('0', 0, None): try: timeout = int(timeout_mins) except (ValueError, TypeError): LOG.exception(_LE('Timeout conversion failed')) else: if timeout > 0: kwargs[rpc_api.PARAM_TIMEOUT] = timeout else: raise ValueError(_('Invalid timeout value %s') % timeout) if rpc_api.PARAM_DISABLE_ROLLBACK in params: disable_rollback = param_utils.extract_bool( params[rpc_api.PARAM_DISABLE_ROLLBACK]) kwargs[rpc_api.PARAM_DISABLE_ROLLBACK] = disable_rollback if rpc_api.PARAM_SHOW_DELETED in params: params[rpc_api.PARAM_SHOW_DELETED] = param_utils.extract_bool( params[rpc_api.PARAM_SHOW_DELETED]) adopt_data = params.get(rpc_api.PARAM_ADOPT_STACK_DATA) if adopt_data: try: adopt_data = template_format.simple_parse(adopt_data) except ValueError as exc: raise ValueError(_('Invalid adopt data: %s') % exc) kwargs[rpc_api.PARAM_ADOPT_STACK_DATA] = adopt_data return kwargs
def signal(self, details=None): ''' signal the resource. Subclasses should provide a handle_signal() method to implement the signal, the base-class raise an exception if no handler is implemented. ''' def get_string_details(): if details is None: return 'No signal details provided' if isinstance(details, six.string_types): return details if isinstance(details, dict): if all(k in details for k in ('previous', 'current', 'reason')): # this is from Ceilometer. auto = '%(previous)s to %(current)s (%(reason)s)' % details return 'alarm state changed from %s' % auto elif 'state' in details: # this is from watchrule return 'alarm state changed to %(state)s' % details return 'Unknown' if not callable(getattr(self, 'handle_signal', None)): raise exception.ResourceActionNotSupported(action='signal') try: signal_result = self.handle_signal(details) if signal_result: reason_string = "Signal: %s" % signal_result else: reason_string = get_string_details() self._add_event('signal', self.status, reason_string) except Exception as ex: LOG.exception(_LE('signal %(name)s : %(msg)s') % {'name': six.text_type(self), 'msg': ex}) failure = exception.ResourceFailure(ex, self) raise failure
def _do_ops(cinstances, opname, cnxt, stack, current_stack=None, action=None, is_stack_failure=None): success_count = 0 failure = False failure_exception_message = None for ci in cinstances: op = getattr(ci, opname, None) if callable(op): try: if is_stack_failure is not None: op(cnxt, stack, current_stack, action, is_stack_failure) else: op(cnxt, stack, current_stack, action) success_count += 1 except Exception as ex: LOG.exception( _LE("%(opname) %(ci)s failed for %(a)s on %(sid)s") % { 'opname': opname, 'ci': type(ci), 'a': action, 'sid': stack.id }) failure = True failure_exception_message = ex.args[0] if ex.args else str(ex) break LOG.info( _LI("done with class=%(c)s, stackid=%(sid)s, action=%(a)s") % { 'c': type(ci), 'sid': stack.id, 'a': action }) return (failure, failure_exception_message, success_count)
def _create_auth_plugin(self): if self.trust_id: importutils.import_module('keystonemiddleware.auth_token') username = cfg.CONF.keystone_authtoken.admin_user password = cfg.CONF.keystone_authtoken.admin_password return v3.Password(username=username, password=password, user_domain_id='default', auth_url=self._keystone_v3_endpoint, trust_id=self.trust_id) if self.auth_token_info: auth_ref = access.AccessInfo.factory(body=self.auth_token_info, auth_token=self.auth_token) return access_plugin.AccessInfoPlugin( auth_url=self._keystone_v3_endpoint, auth_ref=auth_ref) if self.auth_token: # FIXME(jamielennox): This is broken but consistent. If you # only have a token but don't load a service catalog then # url_for wont work. Stub with the keystone endpoint so at # least it might be right. return token_endpoint.Token(endpoint=self._keystone_v3_endpoint, token=self.auth_token) if self.password: return v3.Password(username=self.username, password=self.password, project_id=self.tenant_id, user_domain_id='default', auth_url=self._keystone_v3_endpoint) LOG.error( _LE("Keystone v3 API connection failed, no password " "trust or auth_token!")) raise exception.AuthorizationFailure()
def kill_children(*args): """Kills the entire process group.""" LOG.error(_LE('SIGTERM received')) signal.signal(signal.SIGTERM, signal.SIG_IGN) self.running = False os.killpg(0, signal.SIGTERM)
def delete(self, action=DELETE, backup=False, abandon=False): ''' Delete all of the resources, and then the stack itself. The action parameter is used to differentiate between a user initiated delete and an automatic stack rollback after a failed create, which amount to the same thing, but the states are recorded differently. Note abandon is a delete where all resources have been set to a RETAIN deletion policy, but we also don't want to delete anything required for those resources, e.g the stack_user_project. ''' if action not in (self.DELETE, self.ROLLBACK): LOG.error(_LE("Unexpected action %s passed to delete!"), action) self.state_set(self.DELETE, self.FAILED, "Invalid action %s" % action) return stack_status = self.COMPLETE reason = 'Stack %s completed successfully' % action self.state_set(action, self.IN_PROGRESS, 'Stack %s started' % action) backup_stack = self._backup_stack(False) if backup_stack: self._delete_backup_stack(backup_stack) if backup_stack.status != backup_stack.COMPLETE: errs = backup_stack.status_reason failure = 'Error deleting backup resources: %s' % errs self.state_set(action, self.FAILED, 'Failed to %s : %s' % (action, failure)) return snapshots = db_api.snapshot_get_all(self.context, self.id) for snapshot in snapshots: self.delete_snapshot(snapshot) if not backup: try: lifecycle_plugin_utils.do_pre_ops(self.context, self, None, action) except Exception as e: self.state_set( action, self.FAILED, e.args[0] if e.args else 'Failed stack pre-ops: %s' % six.text_type(e)) return action_task = scheduler.DependencyTaskGroup(self.dependencies, resource.Resource.destroy, reverse=True) try: scheduler.TaskRunner(action_task)(timeout=self.timeout_secs()) except exception.ResourceFailure as ex: stack_status = self.FAILED reason = 'Resource %s failed: %s' % (action, six.text_type(ex)) except scheduler.Timeout: stack_status = self.FAILED reason = '%s timed out' % action.title() # If the stack delete succeeded, this is not a backup stack and it's # not a nested stack, we should delete the credentials if stack_status != self.FAILED and not backup and not self.owner_id: stack_status, reason = self._delete_credentials( stack_status, reason, abandon) try: self.state_set(action, stack_status, reason) except exception.NotFound: LOG.info(_LI("Tried to delete stack that does not exist " "%s "), self.id) if not backup: lifecycle_plugin_utils.do_post_ops(self.context, self, None, action, (self.status == self.FAILED)) if stack_status != self.FAILED: # delete the stack try: db_api.stack_delete(self.context, self.id) except exception.NotFound: LOG.info( _LI("Tried to delete stack that does not exist " "%s "), self.id) self.id = None
def update_task(self, newstack, action=UPDATE, event=None): if action not in (self.UPDATE, self.ROLLBACK, self.RESTORE): LOG.error(_LE("Unexpected action %s passed to update!"), action) self.state_set(self.UPDATE, self.FAILED, "Invalid action %s" % action) return try: lifecycle_plugin_utils.do_pre_ops(self.context, self, newstack, action) except Exception as e: self.state_set( action, self.FAILED, e.args[0] if e.args else 'Failed stack pre-ops: %s' % six.text_type(e)) return if self.status == self.IN_PROGRESS: if action == self.ROLLBACK: LOG.debug("Starting update rollback for %s" % self.name) else: self.state_set(action, self.FAILED, 'State invalid for %s' % action) return self.state_set(action, self.IN_PROGRESS, 'Stack %s started' % action) if action == self.UPDATE: # Oldstack is useless when the action is not UPDATE , so we don't # need to build it, this can avoid some unexpected errors. oldstack = Stack(self.context, self.name, copy.deepcopy(self.t), self.env) backup_stack = self._backup_stack() try: update_task = update.StackUpdate( self, newstack, backup_stack, rollback=action == self.ROLLBACK, error_wait_time=cfg.CONF.error_wait_time) updater = scheduler.TaskRunner(update_task) self.env = newstack.env self.parameters = newstack.parameters self.t.files = newstack.t.files self.disable_rollback = newstack.disable_rollback self.timeout_mins = newstack.timeout_mins self._set_param_stackid() try: updater.start(timeout=self.timeout_secs()) yield while not updater.step(): if event is None or not event.ready(): yield else: message = event.wait() if message == rpc_api.THREAD_CANCEL: raise ForcedCancel() finally: self.reset_dependencies() if action == self.UPDATE: reason = 'Stack successfully updated' elif action == self.RESTORE: reason = 'Stack successfully restored' else: reason = 'Stack rollback completed' stack_status = self.COMPLETE except scheduler.Timeout: stack_status = self.FAILED reason = 'Timed out' except ForcedCancel as e: reason = six.text_type(e) stack_status = self.FAILED if action == self.UPDATE: update_task.updater.cancel_all() yield self.update_task(oldstack, action=self.ROLLBACK) return except exception.ResourceFailure as e: reason = six.text_type(e) stack_status = self.FAILED if action == self.UPDATE: # If rollback is enabled, we do another update, with the # existing template, so we roll back to the original state if not self.disable_rollback: yield self.update_task(oldstack, action=self.ROLLBACK) return else: LOG.debug('Deleting backup stack') backup_stack.delete(backup=True) # flip the template to the newstack values self.t = newstack.t template_outputs = self.t[self.t.OUTPUTS] self.outputs = self.resolve_static_data(template_outputs) # Don't use state_set to do only one update query and avoid race # condition with the COMPLETE status self.action = action self.status = stack_status self.status_reason = reason self.store() lifecycle_plugin_utils.do_post_ops(self.context, self, newstack, action, (self.status == self.FAILED)) notification.send(self)
def _v3_client_init(self): kwargs = {'auth_url': self.v3_endpoint, 'endpoint': self.v3_endpoint} # Note try trust_id first, as we can't reuse auth_token in that case if self.context.trust_id is not None: # We got a trust_id, so we use the admin credentials # to authenticate with the trust_id so we can use the # trust impersonating the trustor user. kwargs.update(self._service_admin_creds()) kwargs['trust_id'] = self.context.trust_id kwargs.pop('project_name') elif self.context.auth_token_info is not None: # The auth_ref version must be set according to the token version if 'access' in self.context.auth_token_info: kwargs['auth_ref'] = copy.deepcopy( self.context.auth_token_info['access']) kwargs['auth_ref']['version'] = 'v2.0' kwargs['auth_ref']['token']['id'] = self.context.auth_token elif 'token' in self.context.auth_token_info: kwargs['auth_ref'] = copy.deepcopy( self.context.auth_token_info['token']) kwargs['auth_ref']['version'] = 'v3' kwargs['auth_ref']['auth_token'] = self.context.auth_token else: LOG.error(_LE('Unknown version in auth_token_info')) raise exception.AuthorizationFailure( _('Unknown token version')) elif self.context.auth_token is not None: kwargs['token'] = self.context.auth_token kwargs['project_id'] = self.context.tenant_id elif self.context.password is not None: kwargs['username'] = self.context.username kwargs['password'] = self.context.password kwargs['project_id'] = self.context.tenant_id else: LOG.error( _LE("Keystone v3 API connection failed, no password " "trust or auth_token!")) raise exception.AuthorizationFailure() kwargs.update(self._ssl_options()) client = kc_v3.Client(**kwargs) # If auth_ref has already be specified via auth_token_info, don't # authenticate as we want to reuse, rather than request a new token if 'auth_ref' not in kwargs: try: client.authenticate() except kc_exception.Unauthorized: LOG.error(_LE("Keystone client authentication failed")) raise exception.AuthorizationFailure() # If we are authenticating with a trust set the context auth_token # with the trust scoped token if 'trust_id' in kwargs: # Sanity check if not client.auth_ref.trust_scoped: LOG.error(_LE("trust token re-scoping failed!")) raise exception.AuthorizationFailure() # Sanity check that impersonation is effective if self.context.trustor_user_id != client.auth_ref.user_id: LOG.error(_LE("Trust impersonation failed")) raise exception.AuthorizationFailure() return client
def adjust(self, adjustment, adjustment_type=sc_util.CFN_CHANGE_IN_CAPACITY, min_adjustment_step=None, signal=False): """ Adjust the size of the scaling group if the cooldown permits. """ if self._cooldown_inprogress(): LOG.info( _LI("%(name)s NOT performing scaling adjustment, " "cooldown %(cooldown)s"), { 'name': self.name, 'cooldown': self.properties[self.COOLDOWN] }) if signal: raise exception.NoActionRequired() else: return capacity = grouputils.get_size(self) lower = self.properties[self.MIN_SIZE] upper = self.properties[self.MAX_SIZE] new_capacity = sc_util.calculate_new_capacity(capacity, adjustment, adjustment_type, min_adjustment_step, lower, upper) # send a notification before, on-error and on-success. notif = { 'stack': self.stack, 'adjustment': adjustment, 'adjustment_type': adjustment_type, 'capacity': capacity, 'groupname': self.FnGetRefId(), 'message': _("Start resizing the group %(group)s") % { 'group': self.FnGetRefId() }, 'suffix': 'start', } notification.send(**notif) try: self.resize(new_capacity) except Exception as resize_ex: with excutils.save_and_reraise_exception(): try: notif.update({ 'suffix': 'error', 'message': six.text_type(resize_ex), 'capacity': grouputils.get_size(self), }) notification.send(**notif) except Exception: LOG.exception(_LE('Failed sending error notification')) else: notif.update({ 'suffix': 'end', 'capacity': new_capacity, 'message': _("End resizing the group %(group)s") % { 'group': notif['groupname'] }, }) notification.send(**notif) finally: self._cooldown_timestamp("%s : %s" % (adjustment_type, adjustment))
def adjust(self, adjustment, adjustment_type=sc_util.CFN_CHANGE_IN_CAPACITY, min_adjustment_step=None): """Adjust the size of the scaling group if the cooldown permits.""" if self.status != self.COMPLETE: LOG.info( _LI("%s NOT performing scaling adjustment, " "when status is not COMPLETE") % self.name) raise exception.NoActionRequired() capacity = grouputils.get_size(self) new_capacity = self._get_new_capacity(capacity, adjustment, adjustment_type, min_adjustment_step) if new_capacity == capacity: LOG.info( _LI("%s NOT performing scaling adjustment, " "as there is no change in capacity.") % self.name) raise exception.NoActionRequired() if not self._is_scaling_allowed(): LOG.info( _LI("%(name)s NOT performing scaling adjustment, " "cooldown %(cooldown)s") % { 'name': self.name, 'cooldown': self.properties[self.COOLDOWN] }) raise exception.NoActionRequired() # send a notification before, on-error and on-success. notif = { 'stack': self.stack, 'adjustment': adjustment, 'adjustment_type': adjustment_type, 'capacity': capacity, 'groupname': self.FnGetRefId(), 'message': _("Start resizing the group %(group)s") % { 'group': self.FnGetRefId() }, 'suffix': 'start', } size_changed = False try: notification.send(**notif) try: self.resize(new_capacity) except Exception as resize_ex: with excutils.save_and_reraise_exception(): try: notif.update({ 'suffix': 'error', 'message': six.text_type(resize_ex), 'capacity': grouputils.get_size(self), }) notification.send(**notif) except Exception: LOG.exception(_LE('Failed sending error notification')) else: size_changed = True notif.update({ 'suffix': 'end', 'capacity': new_capacity, 'message': _("End resizing the group %(group)s") % { 'group': notif['groupname'] }, }) notification.send(**notif) except Exception: LOG.error( _LE("Error in performing scaling adjustment for" "group %s.") % self.name) raise finally: self._finished_scaling("%s : %s" % (adjustment_type, adjustment), size_changed=size_changed)
def handle_signal(self, details=None): # Template author can use scaling policy with any of the actions # of an alarm (i.e alarm_actions, insufficient_data_actions) and # it would be actioned irrespective of the alarm state. It's # fair to assume that the alarm state would be the appropriate one. # The responsibility of using a scaling policy with desired actions # lies with the template author, though this is normally expected to # be used with 'alarm_actions'. # # We also assume that the alarm state is 'alarm' when 'details' is None # or no 'current'/'state' key in 'details'. Watchrule has upper case # states, so we lower() them. This is only used for logging the alarm # state. if details is None: alarm_state = 'alarm' else: alarm_state = details.get('current', details.get('state', 'alarm')).lower() LOG.info(_LI('Alarm %(name)s, new state %(state)s'), { 'name': self.name, 'state': alarm_state }) asgn_id = self.properties[self.AUTO_SCALING_GROUP_NAME] group = self.stack.resource_by_refid(asgn_id) if group is None: raise exception.NotFound( _('Alarm %(alarm)s could not find ' 'scaling group named "%(group)s"') % { 'alarm': self.name, 'group': asgn_id }) if not self._is_scaling_allowed(): LOG.info( _LI("%(name)s NOT performing scaling action, " "cooldown %(cooldown)s") % { 'name': self.name, 'cooldown': self.properties[self.COOLDOWN] }) raise exception.NoActionRequired() LOG.info( _LI('%(name)s alarm, adjusting group %(group)s with id ' '%(asgn_id)s by %(filter)s') % { 'name': self.name, 'group': group.name, 'asgn_id': asgn_id, 'filter': self.properties[self.SCALING_ADJUSTMENT] }) size_changed = False try: group.adjust(self.properties[self.SCALING_ADJUSTMENT], self.properties[self.ADJUSTMENT_TYPE], self.properties[self.MIN_ADJUSTMENT_STEP]) size_changed = True except Exception as ex: if not isinstance(ex, exception.NoActionRequired): LOG.error( _LE("Error in performing scaling adjustment with " "%(name)s alarm for group %(group)s.") % { 'name': self.name, 'group': group.name }) raise finally: self._finished_scaling("%s : %s" % (self.properties[self.ADJUSTMENT_TYPE], self.properties[self.SCALING_ADJUSTMENT]), size_changed=size_changed)
def _authorize(self, req, auth_uri): # Read request signature and access id. # If we find X-Auth-User in the headers we ignore a key error # here so that we can use both authentication methods. # Returning here just means the user didn't supply AWS # authentication and we'll let the app try native keystone next. LOG.info(_LI("Checking AWS credentials..")) signature = self._get_signature(req) if not signature: if 'X-Auth-User' in req.headers: return self.application else: LOG.info(_LI("No AWS Signature found.")) raise exception.HeatIncompleteSignatureError() access = self._get_access(req) if not access: if 'X-Auth-User' in req.headers: return self.application else: LOG.info(_LI("No AWSAccessKeyId/Authorization Credential")) raise exception.HeatMissingAuthenticationTokenError() LOG.info(_LI("AWS credentials found, checking against keystone.")) if not auth_uri: LOG.error( _LE("Ec2Token authorization failed, no auth_uri " "specified in config file")) raise exception.HeatInternalFailureError( _('Service ' 'misconfigured')) # Make a copy of args for authentication and signature verification. auth_params = dict(req.params) # 'Signature' param Not part of authentication args auth_params.pop('Signature', None) # Authenticate the request. # AWS v4 authentication requires a hash of the body body_hash = hashlib.sha256(req.body).hexdigest() creds = { 'ec2Credentials': { 'access': access, 'signature': signature, 'host': req.host, 'verb': req.method, 'path': req.path, 'params': auth_params, 'headers': req.headers, 'body_hash': body_hash } } creds_json = json.dumps(creds) headers = {'Content-Type': 'application/json'} keystone_ec2_uri = self._conf_get_keystone_ec2_uri(auth_uri) LOG.info(_LI('Authenticating with %s'), keystone_ec2_uri) response = requests.post(keystone_ec2_uri, data=creds_json, headers=headers, verify=self.ssl_options['verify'], cert=self.ssl_options['cert']) result = response.json() try: token_id = result['access']['token']['id'] tenant = result['access']['token']['tenant']['name'] tenant_id = result['access']['token']['tenant']['id'] LOG.info(_LI("AWS authentication successful.")) except (AttributeError, KeyError): LOG.info(_LI("AWS authentication failure.")) # Try to extract the reason for failure so we can return the # appropriate AWS error via raising an exception try: reason = result['error']['message'] except KeyError: reason = None if reason == "EC2 access key not found.": raise exception.HeatInvalidClientTokenIdError() elif reason == "EC2 signature not supplied.": raise exception.HeatSignatureError() else: raise exception.HeatAccessDeniedError() # Authenticated! ec2_creds = { 'ec2Credentials': { 'access': access, 'signature': signature } } req.headers['X-Auth-EC2-Creds'] = json.dumps(ec2_creds) req.headers['X-Auth-Token'] = token_id req.headers['X-Tenant-Name'] = tenant req.headers['X-Tenant-Id'] = tenant_id req.headers['X-Auth-URL'] = auth_uri metadata = result['access'].get('metadata', {}) roles = metadata.get('roles', []) req.headers['X-Roles'] = ','.join(roles) return self.application
def hup(self, *args): """Reloads configuration files with zero down time.""" LOG.error(_LE('SIGHUP received')) signal.signal(signal.SIGHUP, signal.SIG_IGN) raise exception.SIGHUPInterrupt
def adjust(self, adjustment, adjustment_type=CHANGE_IN_CAPACITY): """ Adjust the size of the scaling group if the cooldown permits. """ if self._cooldown_inprogress(): LOG.info( _LI("%(name)s NOT performing scaling adjustment, " "cooldown %(cooldown)s"), { 'name': self.name, 'cooldown': self.properties[self.COOLDOWN] }) return capacity = grouputils.get_size(self) lower = self.properties[self.MIN_SIZE] upper = self.properties[self.MAX_SIZE] new_capacity = _calculate_new_capacity(capacity, adjustment, adjustment_type, lower, upper) total = grouputils.get_size(self, include_failed=True) # if there are failed resources in nested_stack, has to change if new_capacity == total: LOG.debug('no change in capacity %d' % capacity) return # send a notification before, on-error and on-success. notif = { 'stack': self.stack, 'adjustment': adjustment, 'adjustment_type': adjustment_type, 'capacity': capacity, 'groupname': self.FnGetRefId(), 'message': _("Start resizing the group %(group)s") % { 'group': self.FnGetRefId() }, 'suffix': 'start', } notification.send(**notif) try: self.resize(new_capacity) except Exception as resize_ex: with excutils.save_and_reraise_exception(): try: notif.update({ 'suffix': 'error', 'message': six.text_type(resize_ex), }) notification.send(**notif) except Exception: LOG.exception(_LE('Failed sending error notification')) else: notif.update({ 'suffix': 'end', 'capacity': new_capacity, 'message': _("End resizing the group %(group)s") % { 'group': notif['groupname'] }, }) notification.send(**notif) self._cooldown_timestamp("%s : %s" % (adjustment_type, adjustment))
def __call__(self, request): """WSGI method that controls (de)serialization and method dispatch.""" action_args = self.get_action_args(request.environ) action = action_args.pop('action', None) # From reading the boto code, and observation of real AWS api responses # it seems that the AWS api ignores the content-type in the html header # Instead it looks at a "ContentType" GET query parameter # This doesn't seem to be documented in the AWS cfn API spec, but it # would appear that the default response serialization is XML, as # described in the API docs, but passing a query parameter of # ContentType=JSON results in a JSON serialized response... content_type = request.params.get("ContentType") try: deserialized_request = self.dispatch(self.deserializer, action, request) action_args.update(deserialized_request) LOG.debug(('Calling %(controller)s : %(action)s'), { 'controller': self.controller, 'action': action }) action_result = self.dispatch(self.controller, action, request, **action_args) except TypeError as err: LOG.error(_LE('Exception handling resource: %s'), err) msg = _('The server could not comply with the request since ' 'it is either malformed or otherwise incorrect.') err = webob.exc.HTTPBadRequest(msg) http_exc = translate_exception(err, request.best_match_language()) # NOTE(luisg): We disguise HTTP exceptions, otherwise they will be # treated by wsgi as responses ready to be sent back and they # won't make it into the pipeline app that serializes errors raise exception.HTTPExceptionDisguise(http_exc) except webob.exc.HTTPException as err: if isinstance(err, aws_exception.HeatAPIException): # The AWS compatible API's don't use faultwrap, so # we want to detect the HeatAPIException subclasses # and raise rather than wrapping in HTTPExceptionDisguise raise if not isinstance(err, webob.exc.HTTPError): # Some HTTPException are actually not errors, they are # responses ready to be sent back to the users, so we don't # error log, disguise or translate those raise if isinstance(err, webob.exc.HTTPServerError): LOG.error(_LE("Returning %(code)s to user: %(explanation)s"), { 'code': err.code, 'explanation': err.explanation }) http_exc = translate_exception(err, request.best_match_language()) raise exception.HTTPExceptionDisguise(http_exc) except exception.HeatException as err: raise translate_exception(err, request.best_match_language()) except Exception as err: log_exception(err, sys.exc_info()) raise translate_exception(err, request.best_match_language()) # Here we support either passing in a serializer or detecting it # based on the content type. try: serializer = self.serializer if serializer is None: if content_type == "JSON": serializer = serializers.JSONResponseSerializer() else: serializer = serializers.XMLResponseSerializer() response = webob.Response(request=request) self.dispatch(serializer, action, response, action_result) return response # return unserializable result (typically an exception) except Exception: # Here we should get API exceptions derived from HeatAPIException # these implement get_unserialized_body(), which allow us to get # a dict containing the unserialized error response. # We only need to serialize for JSON content_type, as the # exception body is pre-serialized to the default XML in the # HeatAPIException constructor # If we get something else here (e.g a webob.exc exception), # this will fail, and we just return it without serializing, # which will not conform to the expected AWS error response format if content_type == "JSON": try: err_body = action_result.get_unserialized_body() serializer.default(action_result, err_body) except Exception: LOG.warning( _LW("Unable to serialize exception " "response")) return action_result
def handle_signal(self, details=None): # ceilometer sends details like this: # {u'alarm_id': ID, u'previous': u'ok', u'current': u'alarm', # u'reason': u'...'}) # in this policy we currently assume that this gets called # only when there is an alarm. But the template writer can # put the policy in all the alarm notifiers (nodata, and ok). # # our watchrule has upper case states so lower() them all. if details is None: alarm_state = 'alarm' else: alarm_state = details.get('current', details.get('state', 'alarm')).lower() LOG.info(_LI('Alarm %(name)s, new state %(state)s'), { 'name': self.name, 'state': alarm_state }) if alarm_state != 'alarm': raise exception.NoActionRequired() asgn_id = self.properties[self.AUTO_SCALING_GROUP_NAME] group = self.stack.resource_by_refid(asgn_id) if group is None: raise exception.NotFound( _('Alarm %(alarm)s could not find ' 'scaling group named "%(group)s"') % { 'alarm': self.name, 'group': asgn_id }) if not self._is_scaling_allowed(): LOG.info( _LI("%(name)s NOT performing scaling action, " "cooldown %(cooldown)s") % { 'name': self.name, 'cooldown': self.properties[self.COOLDOWN] }) raise exception.NoActionRequired() LOG.info( _LI('%(name)s alarm, adjusting group %(group)s with id ' '%(asgn_id)s by %(filter)s') % { 'name': self.name, 'group': group.name, 'asgn_id': asgn_id, 'filter': self.properties[self.SCALING_ADJUSTMENT] }) size_changed = False try: group.adjust(self.properties[self.SCALING_ADJUSTMENT], self.properties[self.ADJUSTMENT_TYPE], self.properties[self.MIN_ADJUSTMENT_STEP]) size_changed = True except Exception as ex: if not isinstance(ex, exception.NoActionRequired): LOG.error( _LE("Error in performing scaling adjustment with " "%(name)s alarm for group %(group)s.") % { 'name': self.name, 'group': group.name }) raise finally: self._finished_scaling("%s : %s" % (self.properties[self.ADJUSTMENT_TYPE], self.properties[self.SCALING_ADJUSTMENT]), size_changed=size_changed)
def log_exception(err, exc_info): args = {'exc_info': exc_info} if cfg.CONF.verbose or cfg.CONF.debug else {} LOG.error(_LE("Unexpected error occurred serving API: %s"), err, **args)
def signal(self, details=None): ''' signal the resource. Subclasses should provide a handle_signal() method to implement the signal, the base-class raise an exception if no handler is implemented. ''' if self.action in (self.SUSPEND, self.DELETE): self._add_event(self.action, self.status, 'Cannot signal resource during %s' % self.action) ex = Exception(_('Cannot signal resource during %s') % self.action) raise exception.ResourceFailure(ex, self) def get_string_details(): if details is None: return 'No signal details provided' if isinstance(details, six.string_types): return details if isinstance(details, dict): if all(k in details for k in ('previous', 'current', 'reason')): # this is from Ceilometer. auto = '%(previous)s to %(current)s (%(reason)s)' % details return 'alarm state changed from %s' % auto elif 'state' in details: # this is from watchrule return 'alarm state changed to %(state)s' % details return 'Unknown' # Clear the hook without interfering with resources' # `handle_signal` callbacks: if (details and 'unset_hook' in details and environment.valid_hook_type(details.get('unset_hook'))): hook = details['unset_hook'] if self.has_hook(hook): self.clear_hook(hook) LOG.info(_LI('Clearing %(hook)s hook on %(resource)s'), { 'hook': hook, 'resource': six.text_type(self) }) self._add_event(self.action, self.status, "Hook %s is cleared" % hook) return if not callable(getattr(self, 'handle_signal', None)): raise exception.ResourceActionNotSupported(action='signal') try: signal_result = self.handle_signal(details) if signal_result: reason_string = "Signal: %s" % signal_result else: reason_string = get_string_details() self._add_event('SIGNAL', self.status, reason_string) except Exception as ex: LOG.exception( _LE('signal %(name)s : %(msg)s') % { 'name': six.text_type(self), 'msg': ex }) failure = exception.ResourceFailure(ex, self) raise failure