def _heartbeat_loop(self): start_time = time.time() aim_ctx = context.AimContext(store=api.get_store()) self._send_heartbeat(aim_ctx) utils.wait_for_next_cycle(start_time, self.report_interval, LOG, readable_caller='AID-HB', notify_exceeding_timeout=False)
def _daemon_loop(self): serve = False # wait first event first_event_time = None squash_time = AID_EXIT_CHECK_INTERVAL while squash_time > 0: event = self.events.get_event(squash_time) if not event and first_event_time is None: # This is a lone timeout, just check if we need to exit if not self.run_daemon_loop: LOG.info("Stopping AID main loop.") raise utils.StopLoop() continue if not first_event_time: first_event_time = time.time() if event in event_handler.EVENTS + [None]: # Set squash timeout squash_time = (first_event_time + self.squash_time - time.time()) if event == event_handler.EVENT_SERVE: # Serving tenants is required as well serve = True start_time = time.time() self._reconciliation_cycle(serve) utils.wait_for_next_cycle(start_time, self.polling_interval, LOG, readable_caller='AID', notify_exceeding_timeout=False)
def _main_loop(self): try: start = time.time() self._poll_and_execute() utils.wait_for_next_cycle(start, self.polling_interval, LOG, readable_caller='Config Subscriber') except Exception as e: LOG.error("An exception has occurred in config subscriber thread " "%s" % e.message) LOG.error(traceback.format_exc())
def _poll(self): # Loop count is the equivalent of a True in normal usage, but it's # useful for testing. while self.loop_count > 0: try: start_time = time.time() self._daemon_loop() utils.wait_for_next_cycle( start_time, self.polling_interval, LOG, readable_caller='Event Service Poller', notify_exceeding_timeout=False) self.loop_count -= 1 self.recovery_retries = None except Exception: LOG.error('A error occurred in polling agent.') LOG.error(traceback.format_exc()) self.recovery_retries = utils.exponential_backoff( 10, tentative=self.recovery_retries)
def _heartbeat_loop(self): start_time = time.time() aim_ctx = context.AimContext(store=api.get_store()) self._send_heartbeat(aim_ctx) # REVISIT: This code should be removed once we've # removed all the locking in AID. if start_time > self.daemon_loop_time: down_time = start_time - self.daemon_loop_time if down_time > DEADLOCK_TIME: utils.perform_harakiri( LOG, "Agent has been down for %s " "seconds." % down_time) utils.wait_for_next_cycle(start_time, self.report_interval, LOG, readable_caller='AID-HB', notify_exceeding_timeout=False)
def daemon_loop(self): aim_ctx = context.AimContext(store=api.get_store()) # Serve tenants the very first time regardless of the events received self._daemon_loop(aim_ctx, True) while True: try: serve = False # wait first event first_event_time = None squash_time = AID_EXIT_CHECK_INTERVAL while squash_time > 0: event = self.events.get_event(squash_time) if not event and first_event_time is None: # This is a lone timeout, just check if we need to exit if not self.run_daemon_loop: LOG.info("Stopping AID main loop.") return continue if not first_event_time: first_event_time = time.time() if event in event_handler.EVENTS + [None]: # Set squash timeout squash_time = (first_event_time + self.squash_time - time.time()) if event == event_handler.EVENT_SERVE: # Serving tenants is required as well serve = True start_time = time.time() self._daemon_loop(aim_ctx, serve) utils.wait_for_next_cycle(start_time, self.polling_interval, LOG, readable_caller='AID', notify_exceeding_timeout=False) except Exception: LOG.error('A error occurred in agent') LOG.error(traceback.format_exc())