Exemple #1
0
 def get_state_copy(self):
     with utils.get_rlock(lcon.ACI_TREE_LOCK_NAME_PREFIX +
                          self.tenant_name):
         return structured_tree.StructuredHashTree.from_string(
             str(self._state),
             root_key=self._state.root_key,
             has_populated=self._state.has_populated)
 def establish_ws_session(self, max_retries=None):
     try:
         with utils.get_rlock(lcon.ACI_WS_CONNECTION_LOCK, blocking=False):
             retries = 0
             self._reload_websocket_config()
             max_retries = max_retries or 2 * len(self.ws_urls)
             while retries < max_retries:
                 if self.session and self.session.session:
                     self.session.close()
                 LOG.info('Establishing WS connection with url: %s',
                          self.ws_urls[0])
                 self.session = acitoolkit.Session(
                     self.ws_urls[0],
                     self.apic_username,
                     self.apic_password,
                     verify_ssl=self.verify_ssl_certificate,
                     cert_name=self.cert_name,
                     key=self.private_key_file)
                 resp = self.session.login()
                 if not resp.ok:
                     LOG.warn('Websocket connection failed: %s' % resp.text)
                     self.ws_urls.rotate(-1)
                     LOG.info('Rotating websocket URL, '
                              'using: %s' % self.ws_urls[0])
                     retries += 1
                     continue
                 LOG.info('Websocket connection succeeded.')
                 self._spawn_monitors()
                 return self.session
             utils.perform_harakiri(
                 LOG, "Cannot establish WS connection "
                 "after %s retries." % retries)
     except utils.LockNotAcquired:
         # Some other thread is trying to reconnect
         return
 def establish_ws_session(self, max_retries=None):
     try:
         with utils.get_rlock(lcon.ACI_WS_CONNECTION_LOCK, blocking=False):
             retries = 0
             self._reload_websocket_config()
             max_retries = max_retries or 2 * len(self.ws_urls)
             while retries < max_retries:
                 if self.session and self.session.session:
                     self.session.close()
                 LOG.info('Establishing WS connection with url: %s',
                          self.ws_urls[0])
                 self.session = acitoolkit.Session(
                     self.ws_urls[0], self.apic_username,
                     self.apic_password,
                     verify_ssl=self.verify_ssl_certificate,
                     cert_name=self.cert_name, key=self.private_key_file)
                 resp = self.session.login()
                 if not resp.ok:
                     LOG.warn('Websocket connection failed: %s' % resp.text)
                     self.ws_urls.rotate(-1)
                     LOG.info('Rotating websocket URL, '
                              'using: %s' % self.ws_urls[0])
                     retries += 1
                     continue
                 LOG.info('Websocket connection succeeded.')
                 self._spawn_monitors()
                 return self.session
             utils.perform_harakiri(LOG, "Cannot establish WS connection "
                                         "after %s retries." % retries)
     except utils.LockNotAcquired:
         # Some other thread is trying to reconnect
         return
 def observe(self, context):
     # Copy state accumulated so far
     global serving_tenants
     new_state = {}
     for tenant in serving_tenants.keys():
         # Only copy state if the tenant is warm
         with utils.get_rlock(lcon.ACI_TREE_LOCK_NAME_PREFIX + tenant):
             if serving_tenants[tenant].is_warm():
                 new_state[tenant] = self._get_state_copy(tenant)
     self._state = new_state
 def observe(self, context):
     # Copy state accumulated so far
     global serving_tenants
     new_state = {}
     for tenant in serving_tenants.keys():
         # Only copy state if the tenant is warm
         with utils.get_rlock(lcon.ACI_TREE_LOCK_NAME_PREFIX + tenant):
             if serving_tenants[tenant].is_warm():
                 new_state[tenant] = self._get_state_copy(tenant)
     self._state = new_state
Exemple #6
0
    def _reconciliation_cycle(self, serve=True):
        # Regenerate context at each reconciliation cycle
        # TODO(ivar): set request-id so that oslo log can track it
        aim_ctx = context.AimContext(store=api.get_store())
        if serve:
            LOG.info("Start serving cycle.")
            tenants = self._calculate_tenants(aim_ctx)
            # Serve tenants
            for pair in self.multiverse:
                pair[DESIRED].serve(aim_ctx, tenants)
                pair[CURRENT].serve(aim_ctx, tenants)
            LOG.info("AID %s is currently serving: "
                     "%s" % (self.agent.id, tenants))

        LOG.info("Start reconciliation cycle.")
        # REVISIT(ivar) Might be wise to wait here upon tenant serving to allow
        # time for events to happen

        # Observe the two universes to fix their current state
        with utils.get_rlock(lcon.AID_OBSERVER_LOCK):
            for pair in self.multiverse:
                pair[DESIRED].observe(aim_ctx)
                pair[CURRENT].observe(aim_ctx)

        delete_candidates = set()
        vetoes = set()
        for pair in self.multiverse:
            pair[DESIRED].vote_deletion_candidates(
                aim_ctx, pair[CURRENT], delete_candidates, vetoes)
            pair[CURRENT].vote_deletion_candidates(
                aim_ctx, pair[DESIRED], delete_candidates, vetoes)
        # Reconcile everything
        changes = False
        for pair in self.multiverse:
            changes |= pair[CURRENT].reconcile(aim_ctx, pair[DESIRED],
                                               delete_candidates)
        if not changes:
            LOG.info("Congratulations! your multiverse is nice and synced :)")

        for pair in self.multiverse:
            pair[DESIRED].finalize_deletion_candidates(aim_ctx, pair[CURRENT],
                                                       delete_candidates)
            pair[CURRENT].finalize_deletion_candidates(aim_ctx, pair[DESIRED],
                                                       delete_candidates)

        # Delete tenants if there's consensus
        for tenant in delete_candidates:
            # All the universes agree on this tenant cleanup
            for pair in self.multiverse:
                for universe in pair.values():
                    LOG.info("%s removing tenant from AID %s" %
                             (universe.name, tenant))
                    universe.cleanup_state(aim_ctx, tenant)
 def _event_loop(self):
     start_time = time.time()
     # Push the backlog at right before the event loop, so that
     # all the events we generate here are likely caught in this
     # iteration.
     self._push_aim_resources()
     if self.ws_context.has_event(self.tenant.urls):
         with utils.get_rlock(lcon.ACI_TREE_LOCK_NAME_PREFIX +
                              self.tenant_name):
             events = self.ws_context.get_event_data(self.tenant.urls)
             for event in events:
                 # REVISIT(ivar): remove vmmDomP once websocket ACI bug is
                 # fixed
                 if (list(event.keys())[0] in [
                         self.tenant.type, 'vmmDomP'
                 ] and not event[list(
                         event.keys())[0]]['attributes'].get(STATUS_FIELD)):
                     LOG.info("Resetting Tree %s" % self.tenant_name)
                     # REVISIT(ivar): on subscription to VMMPolicy objects,
                     # aci doesn't return the root object itself because of
                     # a bug. Let's craft a fake root to work around this
                     # problem
                     if self.tenant_name.startswith('vmmp-'):
                         LOG.debug('Faking vmmProvP %s' % self.tenant_name)
                         events.append({
                             'vmmProvP': {
                                 'attributes': {
                                     'dn': self.tenant.dn
                                 }
                             }
                         })
                     # This is a full resync, trees need to be reset
                     self._state = structured_tree.StructuredHashTree()
                     self._operational_state = (
                         structured_tree.StructuredHashTree())
                     self._monitored_state = (
                         structured_tree.StructuredHashTree())
                     self.tag_set = set()
                     break
             # REVISIT(ivar): there's already a debug log in acitoolkit
             # listing all the events received one by one. The following
             # would be more compact, we need to choose which to keep.
             # LOG.debug("received events for root %s: %s" %
             #           (self.tenant_name, events))
             # Make events list flat
             self.flat_events(events)
             # Pull incomplete objects
             events = self._fill_events(events)
             # Manage Tags
             events = self.ownership_mgr.filter_ownership(events)
             self._event_to_tree(events)
     time.sleep(max(0, self.polling_yield - (time.time() - start_time)))
    def push_aim_resources(self, resources):
        """Given a map of AIM resources for this tenant, push them into APIC

        Stash the objects to be eventually pushed. Given the nature of the
        system we don't really care if we lose one or two messages, or
        even all of them, or if we mess the order, or get involved in
        a catastrophic meteor impact, we should always be able to get
        back in sync.

        :param resources: a dictionary with "create" and "delete" resources
        :return:
        """
        try:
            with utils.get_rlock(lcon.ACI_BACKLOG_LOCK_NAME_PREFIX +
                                 self.tenant_name,
                                 blocking=False):
                backlock = Queue.Queue()
                while not self.object_backlog.empty():
                    requests = self.object_backlog.get()
                    # check if there's an event to squash
                    for op in ['create', 'delete']:
                        for i, req in enumerate(requests.get(op, [])):
                            for j, new in enumerate(resources.get(op, [])):
                                if op is 'create':
                                    req_dn = req.dn
                                    new_dn = new.dn
                                else:
                                    # Delete items are in ACI format
                                    req_dn = list(
                                        req.values())[0]['attributes']['dn']
                                    new_dn = list(
                                        new.values())[0]['attributes']['dn']
                                if req_dn == new_dn:
                                    # Replace old with new
                                    requests[op][i] = new
                                    break
                            else:
                                # No colliding item found
                                continue
                            # new can be removed from resources
                            resources[op].pop(j)
                    backlock.put(requests)
                if any(resources.values()):
                    backlock.put(resources)
                self.object_backlog = backlock
        except utils.LockNotAcquired:
            # If changes need to be pushed, AID will do it on the next
            # iteration
            pass
    def push_aim_resources(self, resources):
        """Given a map of AIM resources for this tenant, push them into APIC

        Stash the objects to be eventually pushed. Given the nature of the
        system we don't really care if we lose one or two messages, or
        even all of them, or if we mess the order, or get involved in
        a catastrophic meteor impact, we should always be able to get
        back in sync.

        :param resources: a dictionary with "create" and "delete" resources
        :return:
        """
        try:
            with utils.get_rlock(lcon.ACI_BACKLOG_LOCK_NAME_PREFIX +
                                 self.tenant_name, blocking=False):
                backlock = Queue.Queue()
                while not self.object_backlog.empty():
                    requests = self.object_backlog.get()
                    # check if there's an event to squash
                    for op in ['create', 'delete']:
                        for i, req in enumerate(requests.get(op, [])):
                            for j, new in enumerate(resources.get(op, [])):
                                if op is 'create':
                                    req_dn = req.dn
                                    new_dn = new.dn
                                else:
                                    # Delete items are in ACI format
                                    req_dn = req.values()[0][
                                        'attributes']['dn']
                                    new_dn = new.values()[0][
                                        'attributes']['dn']
                                if req_dn == new_dn:
                                    # Replace old with new
                                    requests[op][i] = new
                                    break
                            else:
                                # No colliding item found
                                continue
                            # new can be removed from resources
                            resources[op].pop(j)
                    backlock.put(requests)
                if any(resources.values()):
                    backlock.put(resources)
                self.object_backlog = backlock
        except utils.LockNotAcquired:
            # If changes need to be pushed, AID will do it on the next
            # iteration
            pass
    def _init_aim_k8s(self, types_to_observe):
        if self._needs_init:
            # NOTE(ivar): we need to lock the observer here to prevent it
            # from reading empty or incomplete trees
            # REVISIT(ivar): this is NOT gonna work for multi AID. In general,
            # the whole K8S watcher cannot run as-is in a multi AID environment
            with utils.get_rlock(lcon.AID_OBSERVER_LOCK):
                self._reset_trees()
                self._renew_klient_watch()

                self._version_by_type = {}
                for typ in self._k8s_types_to_observe:
                    self._init_stream_for_type(typ)
                self._persistence_loop(save_on_empty=True)
                LOG.info("Trees initialized")
                self._needs_init = False
 def _event_loop(self):
     start_time = time.time()
     # Push the backlog at right before the event loop, so that
     # all the events we generate here are likely caught in this
     # iteration.
     self._push_aim_resources()
     if self.ws_context.has_event(self.tenant.urls):
         with utils.get_rlock(lcon.ACI_TREE_LOCK_NAME_PREFIX +
                              self.tenant_name):
             events = self.ws_context.get_event_data(self.tenant.urls)
             for event in events:
                 # REVISIT(ivar): remove vmmDomP once websocket ACI bug is
                 # fixed
                 if (event.keys()[0] in [self.tenant.type,
                                         'vmmDomP'] and not
                         event[event.keys()[0]]['attributes'].get(
                             STATUS_FIELD)):
                     LOG.info("Resetting Tree %s" % self.tenant_name)
                     # REVISIT(ivar): on subscription to VMMPolicy objects,
                     # aci doesn't return the root object itself because of
                     # a bug. Let's craft a fake root to work around this
                     # problem
                     if self.tenant_name.startswith('vmmp-'):
                         LOG.debug('Faking vmmProvP %s' % self.tenant_name)
                         events.append({'vmmProvP': {
                             'attributes': {'dn': self.tenant.dn}}})
                     # This is a full resync, trees need to be reset
                     self._state = structured_tree.StructuredHashTree()
                     self._operational_state = (
                         structured_tree.StructuredHashTree())
                     self._monitored_state = (
                         structured_tree.StructuredHashTree())
                     self.tag_set = set()
                     break
             # REVISIT(ivar): there's already a debug log in acitoolkit
             # listing all the events received one by one. The following
             # would be more compact, we need to choose which to keep.
             # LOG.debug("received events for root %s: %s" %
             #           (self.tenant_name, events))
             # Make events list flat
             self.flat_events(events)
             # Pull incomplete objects
             events = self._fill_events(events)
             # Manage Tags
             events = self._filter_ownership(events)
             self._event_to_tree(events)
     time.sleep(max(0, self.polling_yield - (time.time() - start_time)))
    def _daemon_loop(self, aim_ctx, serve=True):
        if serve:
            LOG.info("Start serving cycle.")
            tenants = self._calculate_tenants(aim_ctx)
            # Filter delete candidates with currently served tenants
            self.delete_candidates = {
                k: v
                for k, v in self.delete_candidates.iteritems() if k in tenants
            }
            # Serve tenants
            for pair in self.multiverse:
                pair[DESIRED].serve(tenants)
                pair[CURRENT].serve(tenants)
            LOG.info("AID %s is currently serving: "
                     "%s" % (self.agent.id, tenants))

        LOG.info("Start reconciliation cycle.")
        # REVISIT(ivar) Might be wise to wait here upon tenant serving to allow
        # time for events to happen

        # Observe the two universes to fix their current state
        with utils.get_rlock(lcon.AID_OBSERVER_LOCK):
            for pair in self.multiverse:
                pair[DESIRED].observe()
                pair[CURRENT].observe()

        # Reconcile everything
        changes = False
        for pair in self.multiverse:
            changes |= pair[CURRENT].reconcile(pair[DESIRED],
                                               self.delete_candidates)
        if not changes:
            LOG.info("Congratulations! your multiverse is nice and synced :)")

        # Delete tenants if there's consensus
        for tenant, votes in self.delete_candidates.iteritems():
            if len(votes) == self.consensus:
                # All the universes agree on this tenant cleanup
                for pair in self.multiverse:
                    for universe in pair.values():
                        LOG.info("%s removing tenant from AIM %s" %
                                 (universe.name, tenant))
                        universe.cleanup_state(tenant)
    def establish_ws_session(self, max_retries=None, recovery_mode=False):
        try:
            with utils.get_rlock(lcon.ACI_WS_CONNECTION_LOCK, blocking=False):
                if not recovery_mode:
                    purpose = NORMAL_PURPOSE
                    self._reload_websocket_config()
                    self.need_recovery = False
                else:
                    purpose = RECOVERY_PURPOSE
                backup_urls = collections.deque()
                max_retries = max_retries or 2 * len(self.ws_urls)
                url_max_retries = max(1, max_retries / len(self.ws_urls))
                aim_context = aim_ctx.AimContext(store=api.get_store())
                for url in self.ws_urls:
                    apic_assign = api_infra.ApicAssignment(apic_host=url)
                    apic_assign_obj = self.manager.get(aim_context,
                                                       apic_assign)
                    if (apic_assign_obj
                            and apic_assign_obj.aim_aid_id != self.agent_id
                            and not apic_assign_obj.is_available(aim_context)):
                        backup_urls.append(url)
                        continue

                    # This means the original aim-aid owner might have
                    # crashed or something. We will just take it!
                    if (recovery_mode and apic_assign_obj
                            and self.session.ipaddr in url):
                        obj = self._update_apic_assign_db(
                            aim_context, apic_assign, apic_assign_obj)
                        if obj is None:
                            continue
                        self.need_recovery = False
                        self.apic_assign_obj = obj
                        return

                    is_conn_successful = self._ws_session_login(
                        url, url_max_retries, purpose, aim_context,
                        apic_assign, apic_assign_obj)
                    if is_conn_successful:
                        return
                    else:
                        backup_urls.append(url)

                if recovery_mode:
                    return
                # Try the backup urls. Randomly rotate the list first so that
                # the extra aim-aids won't all go for the same backup url.
                backup_urls_len = len(backup_urls)
                if backup_urls_len > 1:
                    backup_urls.rotate(random.randint(1, backup_urls_len))
                for url in backup_urls:
                    is_conn_successful = self._ws_session_login(
                        url, url_max_retries, BACKUP_PURPOSE)
                    if is_conn_successful:
                        return
                utils.perform_harakiri(
                    LOG, "Cannot establish WS connection "
                    "after %s retries." % max_retries)
        except utils.LockNotAcquired:
            # Some other thread is trying to reconnect
            return
 def _push_aim_resources(self):
     dn_mgr = apic_client.DNManager()
     decompose = dn_mgr.aci_decompose_dn_guess
     with utils.get_rlock(lcon.ACI_BACKLOG_LOCK_NAME_PREFIX +
                          self.tenant_name):
         while not self.object_backlog.empty():
             request = self.object_backlog.get()
             for method, aim_objects in request.items():
                 # Method will be either "create" or "delete"
                 # sort the aim_objects based on DN first for DELETE method
                 sorted_aim_objs = aim_objects
                 if method == base_universe.DELETE:
                     sorted_aim_objs = sorted(aim_objects,
                                              key=lambda x: list(x.values())
                                              [0]['attributes']['dn'])
                 potential_parent_dn = ' '
                 for aim_object in sorted_aim_objs:
                     # get MO from ACI client, identify it via its DN parts
                     # and push the new body
                     if method == base_universe.DELETE:
                         # If a parent is also being deleted then we don't
                         # have to send those children requests to APIC
                         dn = list(
                             aim_object.values())[0]['attributes']['dn']
                         res_type = list(aim_object.keys())[0]
                         decomposed = decompose(dn, res_type)
                         parent_dn = dn_mgr.build(decomposed[1][:-1])
                         if parent_dn.startswith(potential_parent_dn):
                             continue
                         else:
                             potential_parent_dn = dn
                         to_push = [copy.deepcopy(aim_object)]
                     else:
                         if getattr(aim_object, 'monitored', False):
                             # When pushing to APIC, treat monitored
                             # objects as pre-existing
                             aim_object.monitored = False
                             aim_object.pre_existing = True
                         to_push = self.to_aci_converter.convert(
                             [aim_object])
                     LOG.debug('%s AIM object %s in APIC' %
                               (method, repr(aim_object)))
                     try:
                         if method == base_universe.CREATE:
                             # Set ownership before pushing the request
                             to_push = self.ownership_mgr.set_ownership_key(
                                 to_push)
                             LOG.debug("POSTING into APIC: %s" % to_push)
                             self._post_with_transaction(to_push)
                             self.creation_succeeded(aim_object)
                         else:
                             to_delete, to_update = (
                                 self.ownership_mgr.set_ownership_change(
                                     to_push))
                             LOG.debug("DELETING from APIC: %s" % to_delete)
                             for obj in to_delete:
                                 attr = list(obj.values())[0]['attributes']
                                 self.aci_session.DELETE('/mo/%s.json' %
                                                         attr.pop('dn'))
                             LOG.debug("UPDATING in APIC: %s" % to_update)
                             # Update object ownership
                             self._post_with_transaction(to_update,
                                                         modified=True)
                             if to_update:
                                 self.creation_succeeded(aim_object)
                     except Exception as e:
                         LOG.debug(traceback.format_exc())
                         LOG.error("An error has occurred during %s for "
                                   "object %s: %s" %
                                   (method, aim_object, str(e)))
                         if method == base_universe.CREATE:
                             err_type = (
                                 self.error_handler.analyze_exception(e))
                             # REVISIT(ivar): for now, treat UNKNOWN errors
                             # the same way as OPERATION_TRANSIENT.
                             # Investigate a way to understand when such
                             # errors might require agent restart.
                             self.creation_failed(aim_object, str(e),
                                                  err_type)
 def locked_func(self):
     with internal_utils.get_rlock('test2'):
         pass
 def locked_func(self):
     with internal_utils.get_rlock('test2'):
         pass
 def finalize_deletion_candidates(self, context, other_universe,
                                  delete_candidates):
     for root in delete_candidates:
         with utils.get_rlock(lcon.SYNC_LOG_LOCK + root):
             self._sync_log.pop(root, None)
    def _event_to_tree(self, events):
        """Parse the event and push it into the tree

        This method requires translation between ACI and AIM model in order
        to  honor the Universe contract.
        :param events: an ACI event in the form of a list of objects
        :return:
        """
        with utils.get_rlock(lcon.ACI_TREE_LOCK_NAME_PREFIX +
                             self.tenant_name):
            removed, updated = [], []
            removing_dns = set()
            filtered_events = []
            # Set the owned events
            for event in events:
                # Exclude some events from monitored objects.
                # Some RS objects can be set from AIM even for monitored
                # objects, therefore we need to exclude events regarding those
                # RS objects when we don't own them. One example is fvRsProv on
                # external networks
                type = event.keys()[0]
                if type in ACI_TYPES_NOT_CONVERT_IF_MONITOR:
                    # Check that the object is indeed correct looking at the
                    # parent
                    if self._check_parent_type(
                            event,
                            ACI_TYPES_NOT_CONVERT_IF_MONITOR[type]):
                        if not self._is_owned(event):
                            # For an RS object like fvRsProv we check the
                            # parent ownership as well.
                            continue
                # Exclude from conversion those list RS objects that we want
                # allow to be manually configured in ACI
                if type in ACI_TYPES_SKIP_ON_MANAGES:
                    if self._check_parent_type(
                            event, ACI_TYPES_SKIP_ON_MANAGES[type]):
                        # Check whether the event is owned, and whether its
                        # parent is.
                        if (not self._is_owned(event, check_parent=False) and
                                self._is_owned(event)):
                            continue
                if self.is_child_object(type) and self._is_deleting(event):
                    # Can be excluded, we expect parent objects
                    continue

                if self._is_deleting(event):
                    dn = event.values()[0]['attributes']['dn']
                    removing_dns.add(dn)
                filtered_events.append(event)
            for event in self.to_aim_converter.convert(filtered_events):
                if event.dn not in self.tag_set:
                    event.monitored = True
                if event.dn in removing_dns:
                    LOG.info('ACI event: REMOVED %s' % event)
                    removed.append(event)
                else:
                    LOG.info('ACI event: ADDED %s' % event)
                    updated.append(event)
            upd_trees, upd_op_trees, upd_mon_trees = self.tree_builder.build(
                [], updated, removed,
                {self.tree_builder.CONFIG: {self.tenant_name: self._state},
                 self.tree_builder.MONITOR:
                     {self.tenant_name: self._monitored_state},
                 self.tree_builder.OPER:
                     {self.tenant_name: self._operational_state}})

            # Send events on update
            modified = False
            for upd, tree, readable in [
                    (upd_trees, self._state, "configuration"),
                    (upd_op_trees, self._operational_state, "operational"),
                    (upd_mon_trees, self._monitored_state, "monitored")]:
                if upd:
                    modified = True
                    LOG.debug("New %s tree for tenant %s: %s" %
                              (readable, self.tenant_name, tree))
            if modified:
                event_handler.EventHandler.reconcile()
 def _push_aim_resources(self):
     dn_mgr = apic_client.DNManager()
     decompose = dn_mgr.aci_decompose_dn_guess
     with utils.get_rlock(lcon.ACI_BACKLOG_LOCK_NAME_PREFIX +
                          self.tenant_name):
         while not self.object_backlog.empty():
             request = self.object_backlog.get()
             for method, aim_objects in request.iteritems():
                 # Method will be either "create" or "delete"
                 # sort the aim_objects based on DN first for DELETE method
                 sorted_aim_objs = aim_objects
                 if method == base_universe.DELETE:
                     sorted_aim_objs = sorted(
                         aim_objects,
                         key=lambda x: x.values()[0]['attributes']['dn'])
                 potential_parent_dn = ' '
                 for aim_object in sorted_aim_objs:
                     # get MO from ACI client, identify it via its DN parts
                     # and push the new body
                     if method == base_universe.DELETE:
                         # If a parent is also being deleted then we don't
                         # have to send those children requests to APIC
                         dn = aim_object.values()[0]['attributes']['dn']
                         res_type = aim_object.keys()[0]
                         decomposed = decompose(dn, res_type)
                         parent_dn = dn_mgr.build(decomposed[1][:-1])
                         if parent_dn.startswith(potential_parent_dn):
                             continue
                         else:
                             potential_parent_dn = dn
                         to_push = [copy.deepcopy(aim_object)]
                     else:
                         if getattr(aim_object, 'monitored', False):
                             # When pushing to APIC, treat monitored
                             # objects as pre-existing
                             aim_object.monitored = False
                             aim_object.pre_existing = True
                         to_push = self.to_aci_converter.convert(
                             [aim_object])
                     LOG.debug('%s AIM object %s in APIC' % (
                         method, repr(aim_object)))
                     # Set TAGs before pushing the request
                     tags = []
                     if method == base_universe.CREATE:
                         # No need to deal with tags on deletion
                         for obj in to_push:
                             if not obj.keys()[0].startswith(TAG_KEY):
                                 dn = obj.values()[0]['attributes']['dn']
                                 dn += '/tag-%s' % self.tag_name
                                 tags.append({"tagInst__%s" % obj.keys()[0]:
                                              {"attributes": {"dn": dn}}})
                     LOG.debug("Pushing %s into APIC: %s" %
                               (method, to_push + tags))
                     # Multiple objects could result from a conversion, push
                     # them in a single transaction
                     try:
                         if method == base_universe.DELETE:
                             for obj in to_push + tags:
                                 attr = obj.values()[0]['attributes']
                                 self.aci_session.DELETE(
                                     '/mo/%s.json' % attr.pop('dn'))
                         else:
                             with self.aci_session.transaction(
                                     top_send=True) as trs:
                                 for obj in to_push + tags:
                                     attr = obj.values()[0]['attributes']
                                     mo, parents_rns = decompose(
                                         attr.pop('dn'), obj.keys()[0])
                                     rns = dn_mgr.filter_rns(parents_rns)
                                     getattr(getattr(self.aci_session, mo),
                                             method)(*rns, transaction=trs,
                                                     **attr)
                             # Object creation was successful, change object
                             # state
                             self.creation_succeeded(aim_object)
                     except Exception as e:
                         LOG.debug(traceback.format_exc())
                         LOG.error("An error has occurred during %s for "
                                   "object %s: %s" % (method, aim_object,
                                                      e.message))
                         if method == base_universe.CREATE:
                             err_type = (
                                 self.error_handler.analyze_exception(e))
                             # REVISIT(ivar): for now, treat UNKNOWN errors
                             # the same way as OPERATION_TRANSIENT.
                             # Investigate a way to understand when such
                             # errors might require agent restart.
                             self.creation_failed(aim_object, e.message,
                                                  err_type)
Exemple #20
0
 def _pop_up_sync_log(self, delete_candidates):
     for root in delete_candidates:
         with utils.get_rlock(lcon.SYNC_LOG_LOCK + root):
             self._sync_log.pop(root, None)
Exemple #21
0
    def _track_universe_actions(self, actions, root):
        """Track Universe Actions.

        Keep track of what the universe has been doing in the past few
        iterations. Keeping count of any operation repeated over time and
        decreasing count of actions that are not happening in this iteration.
        :param actions: dictionary in the form {'root': {'create': {'hash':},
                                                         'delete': {}}}
        :param root: root under consideration
        :return:
        """
        # TODO(ivar): if tenant is unserved, its action track will leak until
        # AID is restarted. Be aware of this during tracking refactoring.
        curr_time = time.time()
        reset = False
        seen = set()
        fail = []
        skip = []
        # TODO(ivar): we might try to acquire lock in a non-blocking fashion,
        # and skip synchronization for this root if it fails.
        with utils.get_rlock(lcon.SYNC_LOG_LOCK + root):
            root_state = self._sync_log.setdefault(
                root, {'create': {}, 'delete': {}})
            new_state = {'create': {}, 'delete': {}}
            for action in [CREATE, DELETE]:
                for res in self._action_items_to_aim_resources(actions,
                                                               action):
                    if res in seen:
                        continue
                    seen.add(res)
                    # Same resource created twice in the same iteration is
                    # increased only once
                    if root != res.root:
                        raise exceptions.BadTrackingArgument(
                            exp=root, act=res.root, res=actions)
                    new = (new_state[action].setdefault(
                        res, {'limit': self.reset_retry_limit, 'res': res,
                              'retries': -1, 'action': ACTION_RESET,
                              'last': curr_time, 'next': curr_time}))
                    curr = root_state[action].get(res, {})
                    if curr:
                        new.update(curr)
                    curr = new
                    if curr_time < curr['next']:
                        # Let's not make any consideration about this object
                        LOG.debug("AIM object %s is being re-tried too soon "
                                  "(delta: %s secs). Skipping for now." %
                                  (str(res), curr['next'] - curr_time))
                        skip.append((action, res))
                        continue

                    curr['next'] = curr_time + utils.get_backoff_time(
                        self.max_backoff_time, curr['retries'])
                    curr['retries'] += 1
                    if curr['retries'] > curr['limit']:
                        if curr['action'] == ACTION_RESET:
                            LOG.warn("AIM object %s failed %s more than %s "
                                     "times, resetting its root" %
                                     (str(res), action, curr['retries']))
                            reset = True
                            curr['limit'] = self.purge_retry_limit
                            curr['action'] = ACTION_PURGE
                        else:
                            LOG.warn("AIM object %s failed %s more than %s "
                                     "times, going to ERROR state" %
                                     (str(res), action, curr['retries']))
                            curr['limit'] += 5
                            fail.append((action, res))
            self._sync_log[root] = new_state
            return reset, fail, skip
Exemple #22
0
    def _event_to_tree(self, events):
        """Parse the event and push it into the tree

        This method requires translation between ACI and AIM model in order
        to  honor the Universe contract.
        :param events: an ACI event in the form of a list of objects
        :return:
        """
        with utils.get_rlock(lcon.ACI_TREE_LOCK_NAME_PREFIX +
                             self.tenant_name):
            removed, updated = [], []
            removing_dns = set()
            filtered_events = []
            # Set the owned events
            for event in events:
                # Exclude some events from monitored objects.
                # Some RS objects can be set from AIM even for monitored
                # objects, therefore we need to exclude events regarding those
                # RS objects when we don't own them. One example is fvRsProv on
                # external networks
                type = event.keys()[0]
                if type in ACI_TYPES_NOT_CONVERT_IF_MONITOR:
                    # Check that the object is indeed correct looking at the
                    # parent
                    if self._check_parent_type(
                            event, ACI_TYPES_NOT_CONVERT_IF_MONITOR[type]):
                        if not self._is_owned(event):
                            # For an RS object like fvRsProv we check the
                            # parent ownership as well.
                            continue
                # Exclude from conversion those list RS objects that we want
                # allow to be manually configured in ACI
                if type in ACI_TYPES_SKIP_ON_MANAGES:
                    if self._check_parent_type(
                            event, ACI_TYPES_SKIP_ON_MANAGES[type]):
                        # Check whether the event is owned, and whether its
                        # parent is.
                        if (not self._is_owned(event, check_parent=False)
                                and self._is_owned(event)):
                            continue
                if self.is_child_object(type) and self._is_deleting(event):
                    # Can be excluded, we expect parent objects
                    continue

                if self._is_deleting(event):
                    dn = event.values()[0]['attributes']['dn']
                    removing_dns.add(dn)
                filtered_events.append(event)
            for event in self.to_aim_converter.convert(filtered_events):
                if event.dn not in self.tag_set:
                    event.monitored = True
                if event.dn in removing_dns:
                    LOG.info('ACI event: REMOVED %s' % event)
                    removed.append(event)
                else:
                    LOG.info('ACI event: ADDED %s' % event)
                    updated.append(event)
            upd_trees, upd_op_trees, upd_mon_trees = self.tree_builder.build(
                [], updated, removed, {
                    self.tree_builder.CONFIG: {
                        self.tenant_name: self._state
                    },
                    self.tree_builder.MONITOR: {
                        self.tenant_name: self._monitored_state
                    },
                    self.tree_builder.OPER: {
                        self.tenant_name: self._operational_state
                    }
                })

            # Send events on update
            modified = False
            for upd, tree, readable in [
                (upd_trees, self._state, "configuration"),
                (upd_op_trees, self._operational_state, "operational"),
                (upd_mon_trees, self._monitored_state, "monitored")
            ]:
                if upd:
                    modified = True
                    LOG.debug("New %s tree for tenant %s: %s" %
                              (readable, self.tenant_name, tree))
            if modified:
                event_handler.EventHandler.reconcile()
Exemple #23
0
 def _push_aim_resources(self):
     with utils.get_rlock(lcon.ACI_BACKLOG_LOCK_NAME_PREFIX +
                          self.tenant_name):
         while not self.object_backlog.empty():
             request = self.object_backlog.get()
             for method, aim_objects in request.iteritems():
                 # Method will be either "create" or "delete"
                 for aim_object in aim_objects:
                     # get MO from ACI client, identify it via its DN parts
                     # and push the new body
                     LOG.debug('%s AIM object %s in APIC' %
                               (method, repr(aim_object)))
                     if method == base_universe.DELETE:
                         to_push = [copy.deepcopy(aim_object)]
                     else:
                         if getattr(aim_object, 'monitored', False):
                             # When pushing to APIC, treat monitored
                             # objects as pre-existing
                             aim_object.monitored = False
                             aim_object.pre_existing = True
                         to_push = self.to_aci_converter.convert(
                             [aim_object])
                     # Set TAGs before pushing the request
                     tags = []
                     if method == base_universe.CREATE:
                         # No need to deal with tags on deletion
                         for obj in to_push:
                             if not obj.keys()[0].startswith(TAG_KEY):
                                 dn = obj.values()[0]['attributes']['dn']
                                 dn += '/tag-%s' % self.tag_name
                                 tags.append({
                                     "tagInst__%s" % obj.keys()[0]: {
                                         "attributes": {
                                             "dn": dn
                                         }
                                     }
                                 })
                     LOG.debug("Pushing %s into APIC: %s" %
                               (method, to_push + tags))
                     # Multiple objects could result from a conversion, push
                     # them in a single transaction
                     dn_mgr = apic_client.DNManager()
                     decompose = dn_mgr.aci_decompose_dn_guess
                     try:
                         if method == base_universe.CREATE:
                             with self.aci_session.transaction(
                                     top_send=True) as trs:
                                 for obj in to_push + tags:
                                     attr = obj.values()[0]['attributes']
                                     mo, parents_rns = decompose(
                                         attr.pop('dn'),
                                         obj.keys()[0])
                                     rns = dn_mgr.filter_rns(parents_rns)
                                     getattr(getattr(self.aci_session, mo),
                                             method)(*rns,
                                                     transaction=trs,
                                                     **attr)
                         else:
                             for obj in to_push + tags:
                                 attr = obj.values()[0]['attributes']
                                 self.aci_session.DELETE('/mo/%s.json' %
                                                         attr.pop('dn'))
                         # Object creation was successful, change object
                         # state
                         if method == base_universe.CREATE:
                             self.creation_succeeded(aim_object)
                     except Exception as e:
                         LOG.debug(traceback.format_exc())
                         LOG.error("An error has occurred during %s for "
                                   "object %s: %s" %
                                   (method, aim_object, e.message))
                         if method == base_universe.CREATE:
                             err_type = (
                                 self.error_handler.analyze_exception(e))
                             # REVISIT(ivar): for now, treat UNKNOWN errors
                             # the same way as OPERATION_TRANSIENT.
                             # Investigate a way to understand when such
                             # errors might require agent restart.
                             self.creation_failed(aim_object, e.message,
                                                  err_type)
 def get_monitored_state_copy(self):
     with utils.get_rlock(lcon.ACI_TREE_LOCK_NAME_PREFIX +
                          self.tenant_name):
         return structured_tree.StructuredHashTree.from_string(
             str(self._monitored_state),
             root_key=self._monitored_state.root_key)