def run(self): self.log.info(f"{self.name} starting up") while True: service_instance = self.instances_to_bounce_now.get() try: bounce_again_in_seconds, return_code, bounce_timers = self.process_service_instance(service_instance) except Exception as e: self.log.error("Worker failed to process service instance and will retry. " "Caused by exception: {}".format(e)) return_code = -2 bounce_timers = service_instance.bounce_timers failures = service_instance.failures if return_code != 0: failures = service_instance.failures + 1 bounce_again_in_seconds = exponential_back_off( failures=failures, factor=self.config.get_deployd_worker_failure_backoff_factor(), base=2, max_time=6000, ) if bounce_again_in_seconds: service_instance = ServiceInstance( service=service_instance.service, instance=service_instance.instance, cluster=self.config.get_cluster(), bounce_by=int(time.time()) + bounce_again_in_seconds, watcher=self.name, bounce_timers=bounce_timers, priority=service_instance.priority, failures=failures, ) self.instances_to_bounce_later.put(service_instance) time.sleep(0.1)
def run(self): self.log.info("{} starting up".format(self.name)) while True: service_instance = self.bounce_q.get() failures = service_instance.failures bounce_timers = self.setup_timers(service_instance) self.log.info("{} processing {}.{}".format( self.name, service_instance.service, service_instance.instance)) marathon_apps = marathon_tools.get_all_marathon_apps( self.marathon_client, embed_failures=True) bounce_timers.setup_marathon.start() try: return_code, bounce_again_in_seconds = deploy_marathon_service( service=service_instance.service, instance=service_instance.instance, client=self.marathon_client, soa_dir=marathon_tools.DEFAULT_SOA_DIR, marathon_config=self.marathon_config, marathon_apps=marathon_apps) except Exception as e: self.log.warning( "deploy_marathon_service caused exception: {}".format(e)) return_code = -2 if return_code != 0: failures += 1 bounce_again_in_seconds = exponential_back_off( failures=failures, factor=self.config. get_deployd_worker_failure_backoff_factor(), base=2, max_time=6000) bounce_timers.setup_marathon.stop() self.log.info( "setup marathon completed with exit code {} for {}.{}".format( return_code, service_instance.service, service_instance.instance)) if bounce_again_in_seconds: bounce_timers.processed_by_worker.start() self.log.info( "{}.{} not in steady state so bouncing again in {} " "seconds".format(service_instance.service, service_instance.instance, bounce_again_in_seconds)) service_instance = ServiceInstance( service=service_instance.service, instance=service_instance.instance, bounce_by=int(time.time()) + bounce_again_in_seconds, watcher=self.name, bounce_timers=bounce_timers, failures=failures) self.inbox_q.put(service_instance) else: bounce_timers.bounce_length.stop() self.log.info("{}.{} in steady state".format( service_instance.service, service_instance.instance)) time.sleep(0.1)
def run(self) -> None: """Takes things from the to_bounce_now queue, processes them, then might put them on the bounce_later queue for future processing""" self.log.info(f"{self.name} starting up") while True: with self.instances_to_bounce.get() as service_instance: self.busy = True try: ( bounce_again_in_seconds, return_code, ) = self.process_service_instance(service_instance) except Exception: self.log.error( f"{self.name} Worker failed to process service instance and will retry. " f"Caused by exception: {traceback.format_exc()}") return_code = -2 failures = service_instance.failures if return_code != 0: failures = service_instance.failures + 1 bounce_again_in_seconds = exponential_back_off( failures=failures, factor=self.config. get_deployd_worker_failure_backoff_factor(), base=2, max_time=6000, ) if bounce_again_in_seconds: if failures >= self.max_failures: self.log.info( f"{self.name} Worker removing " f"{service_instance.service}.{service_instance.instance} " f"from queue because it has failed {failures} times " f"(max is {self.max_failures})") else: bounce_by = int(time.time()) + bounce_again_in_seconds service_instance = ServiceInstance( service=service_instance.service, instance=service_instance.instance, bounce_by=bounce_by, wait_until=bounce_by, watcher=self.name, failures=failures, processed_count=service_instance.processed_count + 1, bounce_start_time=service_instance. bounce_start_time, enqueue_time=time.time(), ) self.instances_to_bounce.put(service_instance) self.busy = False time.sleep(0.1)
def run(self): """Takes things from the to_bounce_now queue, processes them, then might put them on the bounce_later queue for future processing""" self.log.info(f"{self.name} starting up") while True: service_instance = self.instances_to_bounce.get() self.busy = True try: bounce_again_in_seconds, return_code, bounce_timers = self.process_service_instance( service_instance) except Exception as e: self.log.error( f"{self.name} Worker failed to process service instance and will retry. " f"Caused by exception: {format(e)}") return_code = -2 bounce_timers = service_instance.bounce_timers failures = service_instance.failures if return_code != 0: failures = service_instance.failures + 1 bounce_again_in_seconds = exponential_back_off( failures=failures, factor=self.config. get_deployd_worker_failure_backoff_factor(), base=2, max_time=6000, ) if bounce_again_in_seconds: bounce_by = int(time.time()) + bounce_again_in_seconds service_instance = ServiceInstance( service=service_instance.service, instance=service_instance.instance, cluster=self.config.get_cluster(), bounce_by=bounce_by, wait_until=bounce_by, watcher=self.name, bounce_timers=bounce_timers, failures=failures, processed_count=service_instance.processed_count + 1, ) self.instances_to_bounce.put(service_instance) self.busy = False time.sleep(0.1)
def test_exponential_back_off(): assert exponential_back_off(0, 60, 2, 6000) == 60 assert exponential_back_off(2, 60, 2, 6000) == 240 assert exponential_back_off(99, 60, 2, 6000) == 6000