def get_state_copy(self): with utils.get_rlock(lcon.ACI_TREE_LOCK_NAME_PREFIX + self.tenant_name): return structured_tree.StructuredHashTree.from_string( str(self._state), root_key=self._state.root_key, has_populated=self._state.has_populated)
def establish_ws_session(self, max_retries=None): try: with utils.get_rlock(lcon.ACI_WS_CONNECTION_LOCK, blocking=False): retries = 0 self._reload_websocket_config() max_retries = max_retries or 2 * len(self.ws_urls) while retries < max_retries: if self.session and self.session.session: self.session.close() LOG.info('Establishing WS connection with url: %s', self.ws_urls[0]) self.session = acitoolkit.Session( self.ws_urls[0], self.apic_username, self.apic_password, verify_ssl=self.verify_ssl_certificate, cert_name=self.cert_name, key=self.private_key_file) resp = self.session.login() if not resp.ok: LOG.warn('Websocket connection failed: %s' % resp.text) self.ws_urls.rotate(-1) LOG.info('Rotating websocket URL, ' 'using: %s' % self.ws_urls[0]) retries += 1 continue LOG.info('Websocket connection succeeded.') self._spawn_monitors() return self.session utils.perform_harakiri( LOG, "Cannot establish WS connection " "after %s retries." % retries) except utils.LockNotAcquired: # Some other thread is trying to reconnect return
def establish_ws_session(self, max_retries=None): try: with utils.get_rlock(lcon.ACI_WS_CONNECTION_LOCK, blocking=False): retries = 0 self._reload_websocket_config() max_retries = max_retries or 2 * len(self.ws_urls) while retries < max_retries: if self.session and self.session.session: self.session.close() LOG.info('Establishing WS connection with url: %s', self.ws_urls[0]) self.session = acitoolkit.Session( self.ws_urls[0], self.apic_username, self.apic_password, verify_ssl=self.verify_ssl_certificate, cert_name=self.cert_name, key=self.private_key_file) resp = self.session.login() if not resp.ok: LOG.warn('Websocket connection failed: %s' % resp.text) self.ws_urls.rotate(-1) LOG.info('Rotating websocket URL, ' 'using: %s' % self.ws_urls[0]) retries += 1 continue LOG.info('Websocket connection succeeded.') self._spawn_monitors() return self.session utils.perform_harakiri(LOG, "Cannot establish WS connection " "after %s retries." % retries) except utils.LockNotAcquired: # Some other thread is trying to reconnect return
def observe(self, context): # Copy state accumulated so far global serving_tenants new_state = {} for tenant in serving_tenants.keys(): # Only copy state if the tenant is warm with utils.get_rlock(lcon.ACI_TREE_LOCK_NAME_PREFIX + tenant): if serving_tenants[tenant].is_warm(): new_state[tenant] = self._get_state_copy(tenant) self._state = new_state
def _reconciliation_cycle(self, serve=True): # Regenerate context at each reconciliation cycle # TODO(ivar): set request-id so that oslo log can track it aim_ctx = context.AimContext(store=api.get_store()) if serve: LOG.info("Start serving cycle.") tenants = self._calculate_tenants(aim_ctx) # Serve tenants for pair in self.multiverse: pair[DESIRED].serve(aim_ctx, tenants) pair[CURRENT].serve(aim_ctx, tenants) LOG.info("AID %s is currently serving: " "%s" % (self.agent.id, tenants)) LOG.info("Start reconciliation cycle.") # REVISIT(ivar) Might be wise to wait here upon tenant serving to allow # time for events to happen # Observe the two universes to fix their current state with utils.get_rlock(lcon.AID_OBSERVER_LOCK): for pair in self.multiverse: pair[DESIRED].observe(aim_ctx) pair[CURRENT].observe(aim_ctx) delete_candidates = set() vetoes = set() for pair in self.multiverse: pair[DESIRED].vote_deletion_candidates( aim_ctx, pair[CURRENT], delete_candidates, vetoes) pair[CURRENT].vote_deletion_candidates( aim_ctx, pair[DESIRED], delete_candidates, vetoes) # Reconcile everything changes = False for pair in self.multiverse: changes |= pair[CURRENT].reconcile(aim_ctx, pair[DESIRED], delete_candidates) if not changes: LOG.info("Congratulations! your multiverse is nice and synced :)") for pair in self.multiverse: pair[DESIRED].finalize_deletion_candidates(aim_ctx, pair[CURRENT], delete_candidates) pair[CURRENT].finalize_deletion_candidates(aim_ctx, pair[DESIRED], delete_candidates) # Delete tenants if there's consensus for tenant in delete_candidates: # All the universes agree on this tenant cleanup for pair in self.multiverse: for universe in pair.values(): LOG.info("%s removing tenant from AID %s" % (universe.name, tenant)) universe.cleanup_state(aim_ctx, tenant)
def _event_loop(self): start_time = time.time() # Push the backlog at right before the event loop, so that # all the events we generate here are likely caught in this # iteration. self._push_aim_resources() if self.ws_context.has_event(self.tenant.urls): with utils.get_rlock(lcon.ACI_TREE_LOCK_NAME_PREFIX + self.tenant_name): events = self.ws_context.get_event_data(self.tenant.urls) for event in events: # REVISIT(ivar): remove vmmDomP once websocket ACI bug is # fixed if (list(event.keys())[0] in [ self.tenant.type, 'vmmDomP' ] and not event[list( event.keys())[0]]['attributes'].get(STATUS_FIELD)): LOG.info("Resetting Tree %s" % self.tenant_name) # REVISIT(ivar): on subscription to VMMPolicy objects, # aci doesn't return the root object itself because of # a bug. Let's craft a fake root to work around this # problem if self.tenant_name.startswith('vmmp-'): LOG.debug('Faking vmmProvP %s' % self.tenant_name) events.append({ 'vmmProvP': { 'attributes': { 'dn': self.tenant.dn } } }) # This is a full resync, trees need to be reset self._state = structured_tree.StructuredHashTree() self._operational_state = ( structured_tree.StructuredHashTree()) self._monitored_state = ( structured_tree.StructuredHashTree()) self.tag_set = set() break # REVISIT(ivar): there's already a debug log in acitoolkit # listing all the events received one by one. The following # would be more compact, we need to choose which to keep. # LOG.debug("received events for root %s: %s" % # (self.tenant_name, events)) # Make events list flat self.flat_events(events) # Pull incomplete objects events = self._fill_events(events) # Manage Tags events = self.ownership_mgr.filter_ownership(events) self._event_to_tree(events) time.sleep(max(0, self.polling_yield - (time.time() - start_time)))
def push_aim_resources(self, resources): """Given a map of AIM resources for this tenant, push them into APIC Stash the objects to be eventually pushed. Given the nature of the system we don't really care if we lose one or two messages, or even all of them, or if we mess the order, or get involved in a catastrophic meteor impact, we should always be able to get back in sync. :param resources: a dictionary with "create" and "delete" resources :return: """ try: with utils.get_rlock(lcon.ACI_BACKLOG_LOCK_NAME_PREFIX + self.tenant_name, blocking=False): backlock = Queue.Queue() while not self.object_backlog.empty(): requests = self.object_backlog.get() # check if there's an event to squash for op in ['create', 'delete']: for i, req in enumerate(requests.get(op, [])): for j, new in enumerate(resources.get(op, [])): if op is 'create': req_dn = req.dn new_dn = new.dn else: # Delete items are in ACI format req_dn = list( req.values())[0]['attributes']['dn'] new_dn = list( new.values())[0]['attributes']['dn'] if req_dn == new_dn: # Replace old with new requests[op][i] = new break else: # No colliding item found continue # new can be removed from resources resources[op].pop(j) backlock.put(requests) if any(resources.values()): backlock.put(resources) self.object_backlog = backlock except utils.LockNotAcquired: # If changes need to be pushed, AID will do it on the next # iteration pass
def push_aim_resources(self, resources): """Given a map of AIM resources for this tenant, push them into APIC Stash the objects to be eventually pushed. Given the nature of the system we don't really care if we lose one or two messages, or even all of them, or if we mess the order, or get involved in a catastrophic meteor impact, we should always be able to get back in sync. :param resources: a dictionary with "create" and "delete" resources :return: """ try: with utils.get_rlock(lcon.ACI_BACKLOG_LOCK_NAME_PREFIX + self.tenant_name, blocking=False): backlock = Queue.Queue() while not self.object_backlog.empty(): requests = self.object_backlog.get() # check if there's an event to squash for op in ['create', 'delete']: for i, req in enumerate(requests.get(op, [])): for j, new in enumerate(resources.get(op, [])): if op is 'create': req_dn = req.dn new_dn = new.dn else: # Delete items are in ACI format req_dn = req.values()[0][ 'attributes']['dn'] new_dn = new.values()[0][ 'attributes']['dn'] if req_dn == new_dn: # Replace old with new requests[op][i] = new break else: # No colliding item found continue # new can be removed from resources resources[op].pop(j) backlock.put(requests) if any(resources.values()): backlock.put(resources) self.object_backlog = backlock except utils.LockNotAcquired: # If changes need to be pushed, AID will do it on the next # iteration pass
def _init_aim_k8s(self, types_to_observe): if self._needs_init: # NOTE(ivar): we need to lock the observer here to prevent it # from reading empty or incomplete trees # REVISIT(ivar): this is NOT gonna work for multi AID. In general, # the whole K8S watcher cannot run as-is in a multi AID environment with utils.get_rlock(lcon.AID_OBSERVER_LOCK): self._reset_trees() self._renew_klient_watch() self._version_by_type = {} for typ in self._k8s_types_to_observe: self._init_stream_for_type(typ) self._persistence_loop(save_on_empty=True) LOG.info("Trees initialized") self._needs_init = False
def _event_loop(self): start_time = time.time() # Push the backlog at right before the event loop, so that # all the events we generate here are likely caught in this # iteration. self._push_aim_resources() if self.ws_context.has_event(self.tenant.urls): with utils.get_rlock(lcon.ACI_TREE_LOCK_NAME_PREFIX + self.tenant_name): events = self.ws_context.get_event_data(self.tenant.urls) for event in events: # REVISIT(ivar): remove vmmDomP once websocket ACI bug is # fixed if (event.keys()[0] in [self.tenant.type, 'vmmDomP'] and not event[event.keys()[0]]['attributes'].get( STATUS_FIELD)): LOG.info("Resetting Tree %s" % self.tenant_name) # REVISIT(ivar): on subscription to VMMPolicy objects, # aci doesn't return the root object itself because of # a bug. Let's craft a fake root to work around this # problem if self.tenant_name.startswith('vmmp-'): LOG.debug('Faking vmmProvP %s' % self.tenant_name) events.append({'vmmProvP': { 'attributes': {'dn': self.tenant.dn}}}) # This is a full resync, trees need to be reset self._state = structured_tree.StructuredHashTree() self._operational_state = ( structured_tree.StructuredHashTree()) self._monitored_state = ( structured_tree.StructuredHashTree()) self.tag_set = set() break # REVISIT(ivar): there's already a debug log in acitoolkit # listing all the events received one by one. The following # would be more compact, we need to choose which to keep. # LOG.debug("received events for root %s: %s" % # (self.tenant_name, events)) # Make events list flat self.flat_events(events) # Pull incomplete objects events = self._fill_events(events) # Manage Tags events = self._filter_ownership(events) self._event_to_tree(events) time.sleep(max(0, self.polling_yield - (time.time() - start_time)))
def _daemon_loop(self, aim_ctx, serve=True): if serve: LOG.info("Start serving cycle.") tenants = self._calculate_tenants(aim_ctx) # Filter delete candidates with currently served tenants self.delete_candidates = { k: v for k, v in self.delete_candidates.iteritems() if k in tenants } # Serve tenants for pair in self.multiverse: pair[DESIRED].serve(tenants) pair[CURRENT].serve(tenants) LOG.info("AID %s is currently serving: " "%s" % (self.agent.id, tenants)) LOG.info("Start reconciliation cycle.") # REVISIT(ivar) Might be wise to wait here upon tenant serving to allow # time for events to happen # Observe the two universes to fix their current state with utils.get_rlock(lcon.AID_OBSERVER_LOCK): for pair in self.multiverse: pair[DESIRED].observe() pair[CURRENT].observe() # Reconcile everything changes = False for pair in self.multiverse: changes |= pair[CURRENT].reconcile(pair[DESIRED], self.delete_candidates) if not changes: LOG.info("Congratulations! your multiverse is nice and synced :)") # Delete tenants if there's consensus for tenant, votes in self.delete_candidates.iteritems(): if len(votes) == self.consensus: # All the universes agree on this tenant cleanup for pair in self.multiverse: for universe in pair.values(): LOG.info("%s removing tenant from AIM %s" % (universe.name, tenant)) universe.cleanup_state(tenant)
def establish_ws_session(self, max_retries=None, recovery_mode=False): try: with utils.get_rlock(lcon.ACI_WS_CONNECTION_LOCK, blocking=False): if not recovery_mode: purpose = NORMAL_PURPOSE self._reload_websocket_config() self.need_recovery = False else: purpose = RECOVERY_PURPOSE backup_urls = collections.deque() max_retries = max_retries or 2 * len(self.ws_urls) url_max_retries = max(1, max_retries / len(self.ws_urls)) aim_context = aim_ctx.AimContext(store=api.get_store()) for url in self.ws_urls: apic_assign = api_infra.ApicAssignment(apic_host=url) apic_assign_obj = self.manager.get(aim_context, apic_assign) if (apic_assign_obj and apic_assign_obj.aim_aid_id != self.agent_id and not apic_assign_obj.is_available(aim_context)): backup_urls.append(url) continue # This means the original aim-aid owner might have # crashed or something. We will just take it! if (recovery_mode and apic_assign_obj and self.session.ipaddr in url): obj = self._update_apic_assign_db( aim_context, apic_assign, apic_assign_obj) if obj is None: continue self.need_recovery = False self.apic_assign_obj = obj return is_conn_successful = self._ws_session_login( url, url_max_retries, purpose, aim_context, apic_assign, apic_assign_obj) if is_conn_successful: return else: backup_urls.append(url) if recovery_mode: return # Try the backup urls. Randomly rotate the list first so that # the extra aim-aids won't all go for the same backup url. backup_urls_len = len(backup_urls) if backup_urls_len > 1: backup_urls.rotate(random.randint(1, backup_urls_len)) for url in backup_urls: is_conn_successful = self._ws_session_login( url, url_max_retries, BACKUP_PURPOSE) if is_conn_successful: return utils.perform_harakiri( LOG, "Cannot establish WS connection " "after %s retries." % max_retries) except utils.LockNotAcquired: # Some other thread is trying to reconnect return
def _push_aim_resources(self): dn_mgr = apic_client.DNManager() decompose = dn_mgr.aci_decompose_dn_guess with utils.get_rlock(lcon.ACI_BACKLOG_LOCK_NAME_PREFIX + self.tenant_name): while not self.object_backlog.empty(): request = self.object_backlog.get() for method, aim_objects in request.items(): # Method will be either "create" or "delete" # sort the aim_objects based on DN first for DELETE method sorted_aim_objs = aim_objects if method == base_universe.DELETE: sorted_aim_objs = sorted(aim_objects, key=lambda x: list(x.values()) [0]['attributes']['dn']) potential_parent_dn = ' ' for aim_object in sorted_aim_objs: # get MO from ACI client, identify it via its DN parts # and push the new body if method == base_universe.DELETE: # If a parent is also being deleted then we don't # have to send those children requests to APIC dn = list( aim_object.values())[0]['attributes']['dn'] res_type = list(aim_object.keys())[0] decomposed = decompose(dn, res_type) parent_dn = dn_mgr.build(decomposed[1][:-1]) if parent_dn.startswith(potential_parent_dn): continue else: potential_parent_dn = dn to_push = [copy.deepcopy(aim_object)] else: if getattr(aim_object, 'monitored', False): # When pushing to APIC, treat monitored # objects as pre-existing aim_object.monitored = False aim_object.pre_existing = True to_push = self.to_aci_converter.convert( [aim_object]) LOG.debug('%s AIM object %s in APIC' % (method, repr(aim_object))) try: if method == base_universe.CREATE: # Set ownership before pushing the request to_push = self.ownership_mgr.set_ownership_key( to_push) LOG.debug("POSTING into APIC: %s" % to_push) self._post_with_transaction(to_push) self.creation_succeeded(aim_object) else: to_delete, to_update = ( self.ownership_mgr.set_ownership_change( to_push)) LOG.debug("DELETING from APIC: %s" % to_delete) for obj in to_delete: attr = list(obj.values())[0]['attributes'] self.aci_session.DELETE('/mo/%s.json' % attr.pop('dn')) LOG.debug("UPDATING in APIC: %s" % to_update) # Update object ownership self._post_with_transaction(to_update, modified=True) if to_update: self.creation_succeeded(aim_object) except Exception as e: LOG.debug(traceback.format_exc()) LOG.error("An error has occurred during %s for " "object %s: %s" % (method, aim_object, str(e))) if method == base_universe.CREATE: err_type = ( self.error_handler.analyze_exception(e)) # REVISIT(ivar): for now, treat UNKNOWN errors # the same way as OPERATION_TRANSIENT. # Investigate a way to understand when such # errors might require agent restart. self.creation_failed(aim_object, str(e), err_type)
def locked_func(self): with internal_utils.get_rlock('test2'): pass
def finalize_deletion_candidates(self, context, other_universe, delete_candidates): for root in delete_candidates: with utils.get_rlock(lcon.SYNC_LOG_LOCK + root): self._sync_log.pop(root, None)
def _event_to_tree(self, events): """Parse the event and push it into the tree This method requires translation between ACI and AIM model in order to honor the Universe contract. :param events: an ACI event in the form of a list of objects :return: """ with utils.get_rlock(lcon.ACI_TREE_LOCK_NAME_PREFIX + self.tenant_name): removed, updated = [], [] removing_dns = set() filtered_events = [] # Set the owned events for event in events: # Exclude some events from monitored objects. # Some RS objects can be set from AIM even for monitored # objects, therefore we need to exclude events regarding those # RS objects when we don't own them. One example is fvRsProv on # external networks type = event.keys()[0] if type in ACI_TYPES_NOT_CONVERT_IF_MONITOR: # Check that the object is indeed correct looking at the # parent if self._check_parent_type( event, ACI_TYPES_NOT_CONVERT_IF_MONITOR[type]): if not self._is_owned(event): # For an RS object like fvRsProv we check the # parent ownership as well. continue # Exclude from conversion those list RS objects that we want # allow to be manually configured in ACI if type in ACI_TYPES_SKIP_ON_MANAGES: if self._check_parent_type( event, ACI_TYPES_SKIP_ON_MANAGES[type]): # Check whether the event is owned, and whether its # parent is. if (not self._is_owned(event, check_parent=False) and self._is_owned(event)): continue if self.is_child_object(type) and self._is_deleting(event): # Can be excluded, we expect parent objects continue if self._is_deleting(event): dn = event.values()[0]['attributes']['dn'] removing_dns.add(dn) filtered_events.append(event) for event in self.to_aim_converter.convert(filtered_events): if event.dn not in self.tag_set: event.monitored = True if event.dn in removing_dns: LOG.info('ACI event: REMOVED %s' % event) removed.append(event) else: LOG.info('ACI event: ADDED %s' % event) updated.append(event) upd_trees, upd_op_trees, upd_mon_trees = self.tree_builder.build( [], updated, removed, {self.tree_builder.CONFIG: {self.tenant_name: self._state}, self.tree_builder.MONITOR: {self.tenant_name: self._monitored_state}, self.tree_builder.OPER: {self.tenant_name: self._operational_state}}) # Send events on update modified = False for upd, tree, readable in [ (upd_trees, self._state, "configuration"), (upd_op_trees, self._operational_state, "operational"), (upd_mon_trees, self._monitored_state, "monitored")]: if upd: modified = True LOG.debug("New %s tree for tenant %s: %s" % (readable, self.tenant_name, tree)) if modified: event_handler.EventHandler.reconcile()
def _push_aim_resources(self): dn_mgr = apic_client.DNManager() decompose = dn_mgr.aci_decompose_dn_guess with utils.get_rlock(lcon.ACI_BACKLOG_LOCK_NAME_PREFIX + self.tenant_name): while not self.object_backlog.empty(): request = self.object_backlog.get() for method, aim_objects in request.iteritems(): # Method will be either "create" or "delete" # sort the aim_objects based on DN first for DELETE method sorted_aim_objs = aim_objects if method == base_universe.DELETE: sorted_aim_objs = sorted( aim_objects, key=lambda x: x.values()[0]['attributes']['dn']) potential_parent_dn = ' ' for aim_object in sorted_aim_objs: # get MO from ACI client, identify it via its DN parts # and push the new body if method == base_universe.DELETE: # If a parent is also being deleted then we don't # have to send those children requests to APIC dn = aim_object.values()[0]['attributes']['dn'] res_type = aim_object.keys()[0] decomposed = decompose(dn, res_type) parent_dn = dn_mgr.build(decomposed[1][:-1]) if parent_dn.startswith(potential_parent_dn): continue else: potential_parent_dn = dn to_push = [copy.deepcopy(aim_object)] else: if getattr(aim_object, 'monitored', False): # When pushing to APIC, treat monitored # objects as pre-existing aim_object.monitored = False aim_object.pre_existing = True to_push = self.to_aci_converter.convert( [aim_object]) LOG.debug('%s AIM object %s in APIC' % ( method, repr(aim_object))) # Set TAGs before pushing the request tags = [] if method == base_universe.CREATE: # No need to deal with tags on deletion for obj in to_push: if not obj.keys()[0].startswith(TAG_KEY): dn = obj.values()[0]['attributes']['dn'] dn += '/tag-%s' % self.tag_name tags.append({"tagInst__%s" % obj.keys()[0]: {"attributes": {"dn": dn}}}) LOG.debug("Pushing %s into APIC: %s" % (method, to_push + tags)) # Multiple objects could result from a conversion, push # them in a single transaction try: if method == base_universe.DELETE: for obj in to_push + tags: attr = obj.values()[0]['attributes'] self.aci_session.DELETE( '/mo/%s.json' % attr.pop('dn')) else: with self.aci_session.transaction( top_send=True) as trs: for obj in to_push + tags: attr = obj.values()[0]['attributes'] mo, parents_rns = decompose( attr.pop('dn'), obj.keys()[0]) rns = dn_mgr.filter_rns(parents_rns) getattr(getattr(self.aci_session, mo), method)(*rns, transaction=trs, **attr) # Object creation was successful, change object # state self.creation_succeeded(aim_object) except Exception as e: LOG.debug(traceback.format_exc()) LOG.error("An error has occurred during %s for " "object %s: %s" % (method, aim_object, e.message)) if method == base_universe.CREATE: err_type = ( self.error_handler.analyze_exception(e)) # REVISIT(ivar): for now, treat UNKNOWN errors # the same way as OPERATION_TRANSIENT. # Investigate a way to understand when such # errors might require agent restart. self.creation_failed(aim_object, e.message, err_type)
def _pop_up_sync_log(self, delete_candidates): for root in delete_candidates: with utils.get_rlock(lcon.SYNC_LOG_LOCK + root): self._sync_log.pop(root, None)
def _track_universe_actions(self, actions, root): """Track Universe Actions. Keep track of what the universe has been doing in the past few iterations. Keeping count of any operation repeated over time and decreasing count of actions that are not happening in this iteration. :param actions: dictionary in the form {'root': {'create': {'hash':}, 'delete': {}}} :param root: root under consideration :return: """ # TODO(ivar): if tenant is unserved, its action track will leak until # AID is restarted. Be aware of this during tracking refactoring. curr_time = time.time() reset = False seen = set() fail = [] skip = [] # TODO(ivar): we might try to acquire lock in a non-blocking fashion, # and skip synchronization for this root if it fails. with utils.get_rlock(lcon.SYNC_LOG_LOCK + root): root_state = self._sync_log.setdefault( root, {'create': {}, 'delete': {}}) new_state = {'create': {}, 'delete': {}} for action in [CREATE, DELETE]: for res in self._action_items_to_aim_resources(actions, action): if res in seen: continue seen.add(res) # Same resource created twice in the same iteration is # increased only once if root != res.root: raise exceptions.BadTrackingArgument( exp=root, act=res.root, res=actions) new = (new_state[action].setdefault( res, {'limit': self.reset_retry_limit, 'res': res, 'retries': -1, 'action': ACTION_RESET, 'last': curr_time, 'next': curr_time})) curr = root_state[action].get(res, {}) if curr: new.update(curr) curr = new if curr_time < curr['next']: # Let's not make any consideration about this object LOG.debug("AIM object %s is being re-tried too soon " "(delta: %s secs). Skipping for now." % (str(res), curr['next'] - curr_time)) skip.append((action, res)) continue curr['next'] = curr_time + utils.get_backoff_time( self.max_backoff_time, curr['retries']) curr['retries'] += 1 if curr['retries'] > curr['limit']: if curr['action'] == ACTION_RESET: LOG.warn("AIM object %s failed %s more than %s " "times, resetting its root" % (str(res), action, curr['retries'])) reset = True curr['limit'] = self.purge_retry_limit curr['action'] = ACTION_PURGE else: LOG.warn("AIM object %s failed %s more than %s " "times, going to ERROR state" % (str(res), action, curr['retries'])) curr['limit'] += 5 fail.append((action, res)) self._sync_log[root] = new_state return reset, fail, skip
def _event_to_tree(self, events): """Parse the event and push it into the tree This method requires translation between ACI and AIM model in order to honor the Universe contract. :param events: an ACI event in the form of a list of objects :return: """ with utils.get_rlock(lcon.ACI_TREE_LOCK_NAME_PREFIX + self.tenant_name): removed, updated = [], [] removing_dns = set() filtered_events = [] # Set the owned events for event in events: # Exclude some events from monitored objects. # Some RS objects can be set from AIM even for monitored # objects, therefore we need to exclude events regarding those # RS objects when we don't own them. One example is fvRsProv on # external networks type = event.keys()[0] if type in ACI_TYPES_NOT_CONVERT_IF_MONITOR: # Check that the object is indeed correct looking at the # parent if self._check_parent_type( event, ACI_TYPES_NOT_CONVERT_IF_MONITOR[type]): if not self._is_owned(event): # For an RS object like fvRsProv we check the # parent ownership as well. continue # Exclude from conversion those list RS objects that we want # allow to be manually configured in ACI if type in ACI_TYPES_SKIP_ON_MANAGES: if self._check_parent_type( event, ACI_TYPES_SKIP_ON_MANAGES[type]): # Check whether the event is owned, and whether its # parent is. if (not self._is_owned(event, check_parent=False) and self._is_owned(event)): continue if self.is_child_object(type) and self._is_deleting(event): # Can be excluded, we expect parent objects continue if self._is_deleting(event): dn = event.values()[0]['attributes']['dn'] removing_dns.add(dn) filtered_events.append(event) for event in self.to_aim_converter.convert(filtered_events): if event.dn not in self.tag_set: event.monitored = True if event.dn in removing_dns: LOG.info('ACI event: REMOVED %s' % event) removed.append(event) else: LOG.info('ACI event: ADDED %s' % event) updated.append(event) upd_trees, upd_op_trees, upd_mon_trees = self.tree_builder.build( [], updated, removed, { self.tree_builder.CONFIG: { self.tenant_name: self._state }, self.tree_builder.MONITOR: { self.tenant_name: self._monitored_state }, self.tree_builder.OPER: { self.tenant_name: self._operational_state } }) # Send events on update modified = False for upd, tree, readable in [ (upd_trees, self._state, "configuration"), (upd_op_trees, self._operational_state, "operational"), (upd_mon_trees, self._monitored_state, "monitored") ]: if upd: modified = True LOG.debug("New %s tree for tenant %s: %s" % (readable, self.tenant_name, tree)) if modified: event_handler.EventHandler.reconcile()
def _push_aim_resources(self): with utils.get_rlock(lcon.ACI_BACKLOG_LOCK_NAME_PREFIX + self.tenant_name): while not self.object_backlog.empty(): request = self.object_backlog.get() for method, aim_objects in request.iteritems(): # Method will be either "create" or "delete" for aim_object in aim_objects: # get MO from ACI client, identify it via its DN parts # and push the new body LOG.debug('%s AIM object %s in APIC' % (method, repr(aim_object))) if method == base_universe.DELETE: to_push = [copy.deepcopy(aim_object)] else: if getattr(aim_object, 'monitored', False): # When pushing to APIC, treat monitored # objects as pre-existing aim_object.monitored = False aim_object.pre_existing = True to_push = self.to_aci_converter.convert( [aim_object]) # Set TAGs before pushing the request tags = [] if method == base_universe.CREATE: # No need to deal with tags on deletion for obj in to_push: if not obj.keys()[0].startswith(TAG_KEY): dn = obj.values()[0]['attributes']['dn'] dn += '/tag-%s' % self.tag_name tags.append({ "tagInst__%s" % obj.keys()[0]: { "attributes": { "dn": dn } } }) LOG.debug("Pushing %s into APIC: %s" % (method, to_push + tags)) # Multiple objects could result from a conversion, push # them in a single transaction dn_mgr = apic_client.DNManager() decompose = dn_mgr.aci_decompose_dn_guess try: if method == base_universe.CREATE: with self.aci_session.transaction( top_send=True) as trs: for obj in to_push + tags: attr = obj.values()[0]['attributes'] mo, parents_rns = decompose( attr.pop('dn'), obj.keys()[0]) rns = dn_mgr.filter_rns(parents_rns) getattr(getattr(self.aci_session, mo), method)(*rns, transaction=trs, **attr) else: for obj in to_push + tags: attr = obj.values()[0]['attributes'] self.aci_session.DELETE('/mo/%s.json' % attr.pop('dn')) # Object creation was successful, change object # state if method == base_universe.CREATE: self.creation_succeeded(aim_object) except Exception as e: LOG.debug(traceback.format_exc()) LOG.error("An error has occurred during %s for " "object %s: %s" % (method, aim_object, e.message)) if method == base_universe.CREATE: err_type = ( self.error_handler.analyze_exception(e)) # REVISIT(ivar): for now, treat UNKNOWN errors # the same way as OPERATION_TRANSIENT. # Investigate a way to understand when such # errors might require agent restart. self.creation_failed(aim_object, e.message, err_type)
def get_monitored_state_copy(self): with utils.get_rlock(lcon.ACI_TREE_LOCK_NAME_PREFIX + self.tenant_name): return structured_tree.StructuredHashTree.from_string( str(self._monitored_state), root_key=self._monitored_state.root_key)