def _thread_monitor(self, thread, name, flag):
     # TODO(ivar): I could have used thread.join instead of this
     retries = None
     max_retries = len(self.ws_urls)
     LOG.debug("Monitoring thread %s" % name)
     try:
         while flag['monitor_runs']:
             if not thread.isAlive():
                 if retries and retries.get() >= max_retries:
                     utils.perform_harakiri(
                         LOG, "Critical thread %s stopped working" % name)
                 else:
                     retries = utils.exponential_backoff(
                         self.monitor_max_backoff, tentative=retries)
                     try:
                         self.establish_ws_session()
                     except Exception as e:
                         LOG.debug(
                             "Monitor for thread %s tried to reconnect web "
                             "socket, but something went wrong. Will retry "
                             "%s more times: %s" %
                             (name, max_retries - retries.get(), e.message))
                         continue
             else:
                 LOG.debug("Thread %s is in good shape" % name)
                 retries = None
             time.sleep(self.monitor_sleep_time)
             # for testing purposes
             flag['monitor_runs'] -= 1
     except Exception as e:
         msg = ("Unknown error in thread monitor "
                "for %s: %s" % (name, e.message))
         LOG.error(msg)
         utils.perform_harakiri(LOG, msg)
 def test_exponential_backoff(self):
     with mock.patch.object(internal_utils.random, 'random',
                            return_value=1):
         with mock.patch.object(internal_utils.time, 'sleep') as sleep:
             tentative = None
             tentative = internal_utils.exponential_backoff(10, tentative)
             self.assertEqual(1, tentative.get())
             sleep.assert_called_with(1)
             tentative.increment()
             tentative = internal_utils.exponential_backoff(10, tentative)
             self.assertEqual(3, tentative.get())
             sleep.assert_called_with(4)
             tentative.increment()
             tentative.increment()
             internal_utils.exponential_backoff(10, tentative)
             sleep.assert_called_with(10)
 def test_exponential_backoff(self):
     with mock.patch.object(internal_utils.random, 'random',
                            return_value=1):
         with mock.patch.object(internal_utils.time, 'sleep') as sleep:
             tentative = None
             tentative = internal_utils.exponential_backoff(10, tentative)
             self.assertEqual(1, tentative.get())
             sleep.assert_called_with(1)
             tentative.increment()
             tentative = internal_utils.exponential_backoff(10, tentative)
             self.assertEqual(3, tentative.get())
             sleep.assert_called_with(4)
             tentative.increment()
             tentative.increment()
             internal_utils.exponential_backoff(10, tentative)
             sleep.assert_called_with(10)
 def initialize(self, conf):
     try:
         self.conf_manager = conf
         self.us_path = self.conf_manager.get_option('unix_socket_path',
                                                     group='aim')
         self.sock = socket.socket(socket.AF_UNIX, socket.SOCK_DGRAM)
         self.sock.connect(self.us_path)
         LOG.info("Connected to %s" % self.us_path)
         self.recovery_retries = None
         return self
     except Exception:
         LOG.error(traceback.format_exc())
         self.recovery_retries = utils.exponential_backoff(
             SOCKET_RECONNECT_MAX_WAIT, tentative=self.recovery_retries)
 def initialize(self, conf):
     try:
         self.conf_manager = conf
         self.us_path = self.conf_manager.get_option('unix_socket_path',
                                                     group='aim')
         self.sock = socket.socket(socket.AF_UNIX, socket.SOCK_DGRAM)
         self.sock.connect(self.us_path)
         LOG.info("Connected to %s" % self.us_path)
         self.recovery_retries = None
         return self
     except Exception:
         LOG.error(traceback.format_exc())
         self.recovery_retries = utils.exponential_backoff(
             SOCKET_RECONNECT_MAX_WAIT, tentative=self.recovery_retries)
 def _poll(self):
     # Loop count is the equivalent of a True in normal usage, but it's
     # useful for testing.
     while self.loop_count > 0:
         try:
             start_time = time.time()
             self._daemon_loop()
             utils.wait_for_next_cycle(
                 start_time, self.polling_interval,
                 LOG, readable_caller='Event Service Poller',
                 notify_exceeding_timeout=False)
             self.loop_count -= 1
             self.recovery_retries = None
         except Exception:
             LOG.error('A error occurred in polling agent.')
             LOG.error(traceback.format_exc())
             self.recovery_retries = utils.exponential_backoff(
                 10, tentative=self.recovery_retries)
 def _thread_monitor(self, flag):
     login_thread_name = 'login_thread'
     subscription_thread_name = 'subscription_thread'
     name_to_retry = {
         login_thread_name: None,
         subscription_thread_name: None
     }
     max_retries = len(self.ws_urls)
     LOG.debug("Monitoring threads login and subscription")
     try:
         while flag['monitor_runs']:
             for thd, name in [(self.login_thread, 'login_thread'),
                               (self.subs_thread, 'subscription_thread')]:
                 if thd and not thd.isAlive():
                     if name_to_retry[name] and name_to_retry[name].get(
                     ) >= max_retries:
                         utils.perform_harakiri(
                             LOG, "Critical thread %s stopped "
                             "working" % name)
                     else:
                         name_to_retry[name] = utils.exponential_backoff(
                             self.monitor_max_backoff,
                             tentative=name_to_retry[name])
                         try:
                             self.establish_ws_session()
                         except Exception as e:
                             LOG.debug(
                                 "Monitor for thread %s tried to reconnect "
                                 "web socket, but something went wrong. "
                                 "Will retry %s more times: %s" %
                                 (name, max_retries -
                                  name_to_retry[name].get(), e.message))
                             continue
                 elif thd:
                     LOG.debug("Thread %s is in good shape" % name)
                     name_to_retry[name] = None
             time.sleep(self.monitor_sleep_time)
             # for testing purposes
             flag['monitor_runs'] -= 1
     except Exception as e:
         msg = ("Unknown error in thread monitor: %s" % e.message)
         LOG.error(msg)
         utils.perform_harakiri(LOG, msg)
 def _thread_monitor(self, flag):
     login_thread_name = 'login_thread'
     subscription_thread_name = 'subscription_thread'
     name_to_retry = {login_thread_name: None,
                      subscription_thread_name: None}
     max_retries = len(self.ws_urls)
     LOG.debug("Monitoring threads login and subscription")
     try:
         while flag['monitor_runs']:
             for thd, name in [(self.login_thread, 'login_thread'),
                               (self.subs_thread, 'subscription_thread')]:
                 if thd and not thd.isAlive():
                     if name_to_retry[name] and name_to_retry[
                             name].get() >= max_retries:
                         utils.perform_harakiri(
                             LOG, "Critical thread %s stopped "
                                  "working" % name)
                     else:
                         name_to_retry[name] = utils.exponential_backoff(
                             self.monitor_max_backoff,
                             tentative=name_to_retry[name])
                         try:
                             self.establish_ws_session()
                         except Exception as e:
                             LOG.debug(
                                 "Monitor for thread %s tried to reconnect "
                                 "web socket, but something went wrong. "
                                 "Will retry %s more times: %s" %
                                 (name,
                                  max_retries - name_to_retry[name].get(),
                                  e.message))
                             continue
                 elif thd:
                     LOG.debug("Thread %s is in good shape" % name)
                     name_to_retry[name] = None
             time.sleep(self.monitor_sleep_time)
             # for testing purposes
             flag['monitor_runs'] -= 1
     except Exception as e:
         msg = ("Unknown error in thread monitor: %s" % e.message)
         LOG.error(msg)
         utils.perform_harakiri(LOG, msg)
 def _listener(self):
     # Multiple event notifiers can connect to AID
     while True:
         try:
             self._connect()
             LOG.info("Listening for Events on %s", self.us_path)
             while True:
                 self._recv_loop()
             self.recovery_retries = None
         except Exception as e:
             LOG.debug(traceback.format_exc())
             LOG.error("An error as occurred in the event listener "
                       "thread: %s" % e)
             self.recovery_retries = utils.exponential_backoff(
                 SOCKET_RECONNECT_MAX_WAIT, tentative=self.recovery_retries)
         finally:
             try:
                 self.sock.close()
             except AttributeError:
                 LOG.debug("Socket wasn't initialized before failure")
Example #10
0
 def _poll(self):
     # Loop count is the equivalent of a True in normal usage, but it's
     # useful for testing.
     while self.loop_count > 0:
         try:
             start_time = time.time()
             self._daemon_loop()
             utils.wait_for_next_cycle(
                 start_time,
                 self.polling_interval,
                 LOG,
                 readable_caller='Event Service Poller',
                 notify_exceeding_timeout=False)
             self.loop_count -= 1
             self.recovery_retries = None
         except Exception:
             LOG.error('A error occurred in polling agent.')
             LOG.error(traceback.format_exc())
             self.recovery_retries = utils.exponential_backoff(
                 10, tentative=self.recovery_retries)
 def _listener(self):
     # Multiple event notifiers can connect to AID
     while True:
         try:
             self._connect()
             LOG.info("Listening for Events on %s", self.us_path)
             while True:
                 self._recv_loop()
             self.recovery_retries = None
         except Exception as e:
             LOG.debug(traceback.format_exc())
             LOG.error("An error as occurred in the event listener "
                       "thread: %s" % e)
             self.recovery_retries = utils.exponential_backoff(
                 SOCKET_RECONNECT_MAX_WAIT,
                 tentative=self.recovery_retries)
         finally:
             try:
                 self.sock.close()
             except AttributeError:
                 LOG.debug("Socket wasn't initialized before failure")
 def _main_loop(self):
     try:
         # tenant subscription is redone upon exception
         self._subscribe_tenant()
         LOG.debug("Starting event loop for tenant %s" % self.tenant_name)
         last_time = 0
         epsilon = 0.5
         while not self._stop and self.num_loop_runs > 0:
             start = time.time()
             if start > self.scheduled_reset:
                 raise ScheduledReset()
             self._event_loop()
             curr_time = time.time() - start
             if abs(curr_time - last_time) > epsilon:
                 # Only log significant differences
                 LOG.debug("Event loop for tenant %s completed in %s "
                           "seconds" % (self.tenant_name,
                                        time.time() - start))
                 last_time = curr_time
             if not last_time:
                 last_time = curr_time
             # Successfull run
             self.num_loop_runs -= 1
             self.recovery_retries = None
     except ScheduledReset:
         LOG.info("Scheduled tree reset for root %s" % self.tenant_name)
         self._unsubscribe_tenant()
     except Exception as e:
         LOG.error("An exception has occurred in thread serving tenant "
                   "%s, error: %s" % (self.tenant_name, e.message))
         LOG.error(traceback.format_exc())
         self._unsubscribe_tenant()
         self.recovery_retries = utils.exponential_backoff(
             TENANT_FAILURE_MAX_WAIT, tentative=self.recovery_retries)
         if self.recovery_retries.get() >= self.max_retries:
             LOG.error("Exceeded max recovery retries for tenant %s. "
                       "Destroying the manager." %
                       self.tenant_name)
             self.kill()
 def _main_loop(self):
     try:
         # tenant subscription is redone upon exception
         self._subscribe_tenant()
         LOG.debug("Starting event loop for tenant %s" % self.tenant_name)
         last_time = 0
         epsilon = 0.5
         while not self._stop and self.num_loop_runs > 0:
             start = time.time()
             if start > self.scheduled_reset:
                 raise ScheduledReset()
             self._event_loop()
             curr_time = time.time() - start
             if abs(curr_time - last_time) > epsilon:
                 # Only log significant differences
                 LOG.debug("Event loop for tenant %s completed in %s "
                           "seconds" %
                           (self.tenant_name, time.time() - start))
                 last_time = curr_time
             if not last_time:
                 last_time = curr_time
             # Successfull run
             self.num_loop_runs -= 1
             self.recovery_retries = None
     except ScheduledReset:
         LOG.info("Scheduled tree reset for root %s" % self.tenant_name)
         self._unsubscribe_tenant()
     except Exception as e:
         LOG.error("An exception has occurred in thread serving tenant "
                   "%s, error: %s" % (self.tenant_name, str(e)))
         LOG.error(traceback.format_exc())
         self._unsubscribe_tenant()
         self.recovery_retries = utils.exponential_backoff(
             TENANT_FAILURE_MAX_WAIT, tentative=self.recovery_retries)
         if self.recovery_retries.get() >= self.max_retries:
             LOG.error("Exceeded max recovery retries for tenant %s. "
                       "Destroying the manager." % self.tenant_name)
             self.kill()
    def _thread_monitor(self, flag):
        login_thread_name = 'login_thread'
        subscription_thread_name = 'subscription_thread'
        name_to_retry = {
            login_thread_name: None,
            subscription_thread_name: None
        }
        max_retries = len(self.ws_urls)
        recovery_timer = utils.get_time()
        recovery_retry = 0
        aim_context = aim_ctx.AimContext(store=api.get_store())
        LOG.debug("Monitoring threads login and subscription")
        try:
            while flag['monitor_runs']:
                for thd, name in [(self.login_thread, 'login_thread'),
                                  (self.subs_thread, 'subscription_thread')]:
                    if thd and not thd.isAlive():
                        if name_to_retry[name] and name_to_retry[name].get(
                        ) >= max_retries:
                            utils.perform_harakiri(
                                LOG, "Critical thread %s stopped "
                                "working" % name)
                        else:
                            name_to_retry[name] = utils.exponential_backoff(
                                self.monitor_max_backoff,
                                tentative=name_to_retry[name])
                            try:
                                self.establish_ws_session()
                            except Exception as e:
                                LOG.debug(
                                    "Monitor for thread %s tried to reconnect "
                                    "web socket, but something went wrong. "
                                    "Will retry %s more times: %s" %
                                    (name, max_retries -
                                     name_to_retry[name].get(), str(e)))
                                continue
                    elif thd:
                        LOG.debug("Thread %s is in good shape" % name)
                        name_to_retry[name] = None

                if self.need_recovery:
                    # No point to do any recovery session if we
                    # only have 1 ws_url.
                    if (len(self.ws_urls) > 1
                            and utils.get_time() > recovery_timer):
                        self.establish_ws_session(recovery_mode=True)
                        # Still fail to recover
                        if self.need_recovery:
                            recovery_retry += 1
                            recovery_timer = (
                                utils.get_time() + utils.get_backoff_time(
                                    self.recovery_max_backoff, recovery_retry))
                        else:
                            recovery_retry = 0
                else:
                    # Update the last_update_timestamp
                    if self.apic_assign_obj:
                        self.apic_assign_obj = self.manager.update(
                            aim_context, self.apic_assign_obj)
                    else:
                        # This should never happen
                        LOG.error('There is no such apic_assign_obj exist '
                                  'for %s!' % self.session.ipaddr)

                time.sleep(self.monitor_sleep_time)
                # for testing purposes
                flag['monitor_runs'] -= 1
        except Exception as e:
            msg = ("Unknown error in thread monitor: %s" % str(e))
            LOG.error(msg)
            utils.perform_harakiri(LOG, msg)