def node_lock_acquire(node_id, action_id, forced=False): '''Try to lock the specified node. :param forced_locking: set to True to cancel current action that owns the lock, if any. ''' # Step 1: try lock the node - if the returned owner_id is the # action id, it was a success owner = db_api.node_lock_acquire(node_id, action_id) if action_id == owner: return True # Step 2: retry using global configuration options retries = cfg.CONF.lock_retry_times retry_interval = cfg.CONF.lock_retry_interval while retries > 0: scheduler.sleep(retry_interval) owner = db_api.node_lock_acquire(node_id, action_id) if action_id == owner: return True retries = retries - 1 # Step 3: Last resort is 'forced locking', only needed when retry failed if forced: owner = db_api.node_lock_steal(node_id, action_id) return action_id == owner LOG.error(_LE('Node is already locked by action %(old)s, ' 'action %(new)s failed grabbing the lock') % { 'old': owner, 'new': action_id}) return False
def do_update(self, obj, new_profile, **params): '''Perform update on object. :param obj: the node object to operate on :param new_profile: the new profile used for updating :param params: other parametes for the update request. ''' self.stack_id = obj.physical_id if not self.stack_id: return True # TODO(anyone): Check if template differs # TODO(anyone): Check if params differs fields = { 'parameters': new_profile.properties[new_profile.PARAMETERS], 'template': new_profile.properties[new_profile.TEMPLATE], 'timeout_mins': new_profile.properties[new_profile.TIMEOUT], 'disable_rollback': new_profile.properties[ new_profile.DISABLE_ROLLBACK], 'files': new_profile.properties[new_profile.FILES], 'environment': new_profile.properties[new_profile.ENVIRONMENT], } self.heat(obj).stack_update(self.stack_id, **fields) # Wait for action to complete/fail while not self._check_action_complete(obj, 'UPDATE'): scheduler.sleep(1) return True
def node_lock_acquire(context, node_id, action_id, engine=None, forced=False): """Try to lock the specified node. :param context: the context used for DB operations; :param node_id: ID of the node to be locked. :param action_id: ID of the action that attempts to lock the node. :param engine: ID of the engine that attempts to lock the node. :param forced: set to True to cancel current action that owns the lock, if any. :returns: True if lock is acquired, or False otherwise. """ # Step 1: try lock the node - if the returned owner_id is the # action id, it was a success owner = db_api.node_lock_acquire(node_id, action_id) if action_id == owner: return True # Step 2: retry using global configuration options retries = cfg.CONF.lock_retry_times retry_interval = cfg.CONF.lock_retry_interval while retries > 0: scheduler.sleep(retry_interval) LOG.debug('Acquire lock for node %s again' % node_id) owner = db_api.node_lock_acquire(node_id, action_id) if action_id == owner: return True retries = retries - 1 # Step 3: Last resort is 'forced locking', only needed when retry failed if forced: owner = db_api.node_lock_steal(node_id, action_id) return action_id == owner # if this node lock by dead engine action = db_api.action_get(context, owner) if (action and action.owner and action.owner != engine and is_engine_dead(context, action.owner)): LOG.info( _LI('The node %(n)s is locked by dead action %(a)s, ' 'try to steal the lock.'), { 'n': node_id, 'a': owner }) reason = _('Engine died when executing this action.') db_api.action_mark_failed(context, action.id, time.time(), reason=reason) db_api.node_lock_steal(node_id, action_id) return True LOG.error( _LE('Node is already locked by action %(old)s, ' 'action %(new)s failed grabbing the lock'), { 'old': owner, 'new': action_id }) return False
def do_update(self, obj, new_profile, **params): '''Perform update on object. :param obj: the node object to operate on :param new_profile: the new profile used for updating :param params: other parametes for the update request. ''' self.stack_id = obj.physical_id if not self.stack_id: return True # TODO(anyone): Check if template differs # TODO(anyone): Check if params differs fields = { 'parameters': new_profile.properties[new_profile.PARAMETERS], 'template': new_profile.properties[new_profile.TEMPLATE], 'timeout_mins': new_profile.properties[new_profile.TIMEOUT], 'disable_rollback': new_profile.properties[new_profile.DISABLE_ROLLBACK], 'files': new_profile.properties[new_profile.FILES], 'environment': new_profile.properties[new_profile.ENVIRONMENT], } self.heat(obj).stack_update(self.stack_id, **fields) # Wait for action to complete/fail while not self._check_action_complete(obj, 'UPDATE'): scheduler.sleep(1) return True
def cluster_lock_acquire(context, cluster_id, action_id, engine=None, scope=CLUSTER_SCOPE, forced=False): """Try to lock the specified cluster. :param cluster_id: ID of the cluster to be locked. :param action_id: ID of the action which wants to lock the cluster. :param engine: ID of the engine which wants to lock the cluster. :param scope: scope of lock, could be cluster wide lock, or node-wide lock. :param forced: set to True to cancel current action that owns the lock, if any. :returns: True if lock is acquired, or False otherwise. """ # Step 1: try lock the cluster - if the returned owner_id is the # action id, it was a success owners = db_api.cluster_lock_acquire(cluster_id, action_id, scope) if action_id in owners: return True # Step 2: retry using global configuration options retries = cfg.CONF.lock_retry_times retry_interval = cfg.CONF.lock_retry_interval while retries > 0: scheduler.sleep(retry_interval) LOG.debug('Acquire lock for cluster %s again' % cluster_id) owners = db_api.cluster_lock_acquire(cluster_id, action_id, scope) if action_id in owners: return True retries = retries - 1 # Step 3: Last resort is 'forced locking', only needed when retry failed if forced: owners = db_api.cluster_lock_steal(cluster_id, action_id) return action_id in owners # Will reach here only because scope == CLUSTER_SCOPE action = db_api.action_get(context, owners[0]) if (action and action.owner and action.owner != engine and is_engine_dead(context, action.owner)): LOG.info(_LI('The cluster %(c)s is locked by dead action %(a)s, ' 'try to steal the lock.'), { 'c': cluster_id, 'a': owners[0] }) reason = _('Engine died when executing this action.') db_api.action_mark_failed(context, action.id, time.time(), reason=reason) owners = db_api.cluster_lock_steal(cluster_id, action_id) return action_id in owners LOG.error(_LE('Cluster is already locked by action %(old)s, ' 'action %(new)s failed grabbing the lock'), {'old': str(owners), 'new': action_id}) return False
def node_lock_acquire(context, node_id, action_id, engine=None, forced=False): """Try to lock the specified node. :param context: the context used for DB operations; :param node_id: ID of the node to be locked. :param action_id: ID of the action that attempts to lock the node. :param engine: ID of the engine that attempts to lock the node. :param forced: set to True to cancel current action that owns the lock, if any. :returns: True if lock is acquired, or False otherwise. """ # Step 1: try lock the node - if the returned owner_id is the # action id, it was a success owner = db_api.node_lock_acquire(node_id, action_id) if action_id == owner: return True # Step 2: retry using global configuration options retries = cfg.CONF.lock_retry_times retry_interval = cfg.CONF.lock_retry_interval while retries > 0: scheduler.sleep(retry_interval) LOG.debug('Acquire lock for node %s again' % node_id) owner = db_api.node_lock_acquire(node_id, action_id) if action_id == owner: return True retries = retries - 1 # Step 3: Last resort is 'forced locking', only needed when retry failed if forced: owner = db_api.node_lock_steal(node_id, action_id) return action_id == owner # if this node lock by dead engine action = db_api.action_get(context, owner) if (action and action.owner and action.owner != engine and is_engine_dead(context, action.owner)): LOG.info(_LI('The node %(n)s is locked by dead action %(a)s, ' 'try to steal the lock.'), { 'n': node_id, 'a': owner }) reason = _('Engine died when executing this action.') db_api.action_mark_failed(context, action.id, time.time(), reason=reason) db_api.node_lock_steal(node_id, action_id) return True LOG.error(_LE('Node is already locked by action %(old)s, ' 'action %(new)s failed grabbing the lock'), {'old': owner, 'new': action_id}) return False
def do_update(self, obj, new_profile, **params): '''Perform update on object. :param obj: the node object to operate on :param new_profile: the new profile used for updating :param params: other parametes for the update request. ''' self.stack_id = obj.physical_id if not self.stack_id: return True if not self.validate_for_update(new_profile): return False fields = {} new_template = new_profile.properties[new_profile.TEMPLATE] if new_template != self.properties[self.TEMPLATE]: fields['template'] = new_template new_params = new_profile.properties[new_profile.PARAMETERS] if new_params != self.properties[self.PARAMETERS]: fields['parameters'] = new_params new_timeout = new_profile.properties[new_profile.TIMEOUT] if new_timeout != self.properties[self.TIMEOUT]: fields['timeout_mins'] = new_timeout new_dr = new_profile.properties[new_profile.DISABLE_ROLLBACK] if new_dr != self.properties[self.DISABLE_ROLLBACK]: fields['disable_rollback'] = new_dr new_files = new_profile.properties[new_profile.FILES] if new_files != self.properties[self.FILES]: fields['files'] = new_files new_environment = new_profile.properties[new_profile.ENVIRONMENT] if new_environment != self.properties[self.ENVIRONMENT]: fields['environment'] = new_environment if fields: try: self.heat(obj).stack_update(self.stack_id, **fields) except Exception as ex: LOG.exception(_('Failed in updating stack: %s' ), six.text_type(ex)) return False # Wait for action to complete/fail while not self._check_action_complete(obj, 'UPDATE'): scheduler.sleep(1) return True
def do_update(self, obj, new_profile, **params): '''Perform update on object. :param obj: the node object to operate on :param new_profile: the new profile used for updating :param params: other parametes for the update request. ''' self.stack_id = obj.physical_id if not self.stack_id: return True if not self.validate_for_update(new_profile): return False fields = {} new_template = new_profile.properties[new_profile.TEMPLATE] if new_template != self.properties[self.TEMPLATE]: fields['template'] = new_template new_params = new_profile.properties[new_profile.PARAMETERS] if new_params != self.properties[self.PARAMETERS]: fields['parameters'] = new_params new_timeout = new_profile.properties[new_profile.TIMEOUT] if new_timeout != self.properties[self.TIMEOUT]: fields['timeout_mins'] = new_timeout new_dr = new_profile.properties[new_profile.DISABLE_ROLLBACK] if new_dr != self.properties[self.DISABLE_ROLLBACK]: fields['disable_rollback'] = new_dr new_files = new_profile.properties[new_profile.FILES] if new_files != self.properties[self.FILES]: fields['files'] = new_files new_environment = new_profile.properties[new_profile.ENVIRONMENT] if new_environment != self.properties[self.ENVIRONMENT]: fields['environment'] = new_environment if fields: try: self.heat(obj).stack_update(self.stack_id, **fields) except Exception as ex: LOG.exception(_('Failed in updating stack: %s'), six.text_type(ex)) return False # Wait for action to complete/fail while not self._check_action_complete(obj, 'UPDATE'): scheduler.sleep(1) return True
def do_delete(self, obj): self.stack_id = obj.physical_id try: self.heat(obj).stack_delete(self.stack_id, True) except Exception as ex: raise ex # Wait for action to complete/fail while not self._check_action_complete(obj, 'DELETE'): scheduler.sleep(1) return True
def cluster_lock_acquire(cluster_id, action_id, scope=CLUSTER_SCOPE, forced=False): """Try to lock the specified cluster. :param cluster_id: ID of the cluster to be locked. :param action_id: ID of the action which wants to lock the cluster. :param scope: scope of lock, could be cluster wide lock, or node-wide lock. :param forced: set to True to cancel current action that owns the lock, if any. :returns: True if lock is acquired, or False otherwise. """ # Step 1: try lock the cluster - if the returned owner_id is the # action id, it was a success owners = db_api.cluster_lock_acquire(cluster_id, action_id, scope) if action_id in owners: return True # Step 2: retry using global configuration options retries = cfg.CONF.lock_retry_times retry_interval = cfg.CONF.lock_retry_interval while retries > 0: scheduler.sleep(retry_interval) owners = db_api.cluster_lock_acquire(cluster_id, action_id, scope) if action_id in owners: return True retries = retries - 1 # Step 3: Last resort is 'forced locking', only needed when retry failed if forced: owners = db_api.cluster_lock_steal(cluster_id, action_id) return action_id in owners LOG.error(_LE('Cluster is already locked by action %(old)s, ' 'action %(new)s failed grabbing the lock'), {'old': str(owners), 'new': action_id}) return False
def do_create(self, obj): '''Create a stack using the given profile.''' kwargs = { 'stack_name': obj.name + '-' + utils.random_name(8), 'template': self.properties[self.TEMPLATE], 'timeout_mins': self.properties[self.TIMEOUT], 'disable_rollback': self.properties[self.DISABLE_ROLLBACK], 'parameters': self.properties[self.PARAMETERS], 'files': self.properties[self.FILES], 'environment': self.properties[self.ENVIRONMENT], } LOG.info('Creating stack: %s' % kwargs) stack = self.heat(obj).stack_create(**kwargs) self.stack_id = stack.id # Wait for action to complete/fail while not self._check_action_complete(obj, 'CREATE'): scheduler.sleep(1) return stack.id
def test_sleep(self): mock_sleep = self.patchobject(eventlet, 'sleep') scheduler.sleep(1) mock_sleep.assert_called_once_with(1)
def cluster_lock_acquire(context, cluster_id, action_id, engine=None, scope=CLUSTER_SCOPE, forced=False): """Try to lock the specified cluster. :param cluster_id: ID of the cluster to be locked. :param action_id: ID of the action which wants to lock the cluster. :param engine: ID of the engine which wants to lock the cluster. :param scope: scope of lock, could be cluster wide lock, or node-wide lock. :param forced: set to True to cancel current action that owns the lock, if any. :returns: True if lock is acquired, or False otherwise. """ # Step 1: try lock the cluster - if the returned owner_id is the # action id, it was a success owners = db_api.cluster_lock_acquire(cluster_id, action_id, scope) if action_id in owners: return True # Step 2: retry using global configuration options retries = cfg.CONF.lock_retry_times retry_interval = cfg.CONF.lock_retry_interval while retries > 0: scheduler.sleep(retry_interval) LOG.debug('Acquire lock for cluster %s again' % cluster_id) owners = db_api.cluster_lock_acquire(cluster_id, action_id, scope) if action_id in owners: return True retries = retries - 1 # Step 3: Last resort is 'forced locking', only needed when retry failed if forced: owners = db_api.cluster_lock_steal(cluster_id, action_id) return action_id in owners # Will reach here only because scope == CLUSTER_SCOPE action = db_api.action_get(context, owners[0]) if (action and action.owner and action.owner != engine and is_engine_dead(context, action.owner)): LOG.info( _LI('The cluster %(c)s is locked by dead action %(a)s, ' 'try to steal the lock.'), { 'c': cluster_id, 'a': owners[0] }) reason = _('Engine died when executing this action.') db_api.action_mark_failed(context, action.id, time.time(), reason=reason) owners = db_api.cluster_lock_steal(cluster_id, action_id) return action_id in owners LOG.error( _LE('Cluster is already locked by action %(old)s, ' 'action %(new)s failed grabbing the lock'), { 'old': str(owners), 'new': action_id }) return False