Example #1
0
 def run(self):
     self.log.info(f"{self.name} starting up")
     while True:
         service_instance = self.instances_to_bounce_now.get()
         try:
             bounce_again_in_seconds, return_code, bounce_timers = self.process_service_instance(service_instance)
         except Exception as e:
             self.log.error("Worker failed to process service instance and will retry. "
                            "Caused by exception: {}".format(e))
             return_code = -2
             bounce_timers = service_instance.bounce_timers
         failures = service_instance.failures
         if return_code != 0:
             failures = service_instance.failures + 1
             bounce_again_in_seconds = exponential_back_off(
                 failures=failures,
                 factor=self.config.get_deployd_worker_failure_backoff_factor(),
                 base=2,
                 max_time=6000,
             )
         if bounce_again_in_seconds:
             service_instance = ServiceInstance(
                 service=service_instance.service,
                 instance=service_instance.instance,
                 cluster=self.config.get_cluster(),
                 bounce_by=int(time.time()) + bounce_again_in_seconds,
                 watcher=self.name,
                 bounce_timers=bounce_timers,
                 priority=service_instance.priority,
                 failures=failures,
             )
             self.instances_to_bounce_later.put(service_instance)
         time.sleep(0.1)
Example #2
0
    def run(self):
        self.log.info("{} starting up".format(self.name))
        while True:
            service_instance = self.bounce_q.get()
            failures = service_instance.failures
            bounce_timers = self.setup_timers(service_instance)
            self.log.info("{} processing {}.{}".format(
                self.name, service_instance.service,
                service_instance.instance))
            marathon_apps = marathon_tools.get_all_marathon_apps(
                self.marathon_client, embed_failures=True)
            bounce_timers.setup_marathon.start()
            try:
                return_code, bounce_again_in_seconds = deploy_marathon_service(
                    service=service_instance.service,
                    instance=service_instance.instance,
                    client=self.marathon_client,
                    soa_dir=marathon_tools.DEFAULT_SOA_DIR,
                    marathon_config=self.marathon_config,
                    marathon_apps=marathon_apps)
            except Exception as e:
                self.log.warning(
                    "deploy_marathon_service caused exception: {}".format(e))
                return_code = -2
            if return_code != 0:
                failures += 1
                bounce_again_in_seconds = exponential_back_off(
                    failures=failures,
                    factor=self.config.
                    get_deployd_worker_failure_backoff_factor(),
                    base=2,
                    max_time=6000)

            bounce_timers.setup_marathon.stop()
            self.log.info(
                "setup marathon completed with exit code {} for {}.{}".format(
                    return_code, service_instance.service,
                    service_instance.instance))
            if bounce_again_in_seconds:
                bounce_timers.processed_by_worker.start()
                self.log.info(
                    "{}.{} not in steady state so bouncing again in {} "
                    "seconds".format(service_instance.service,
                                     service_instance.instance,
                                     bounce_again_in_seconds))
                service_instance = ServiceInstance(
                    service=service_instance.service,
                    instance=service_instance.instance,
                    bounce_by=int(time.time()) + bounce_again_in_seconds,
                    watcher=self.name,
                    bounce_timers=bounce_timers,
                    failures=failures)
                self.inbox_q.put(service_instance)
            else:
                bounce_timers.bounce_length.stop()
                self.log.info("{}.{} in steady state".format(
                    service_instance.service, service_instance.instance))
            time.sleep(0.1)
Example #3
0
 def run(self) -> None:
     """Takes things from the to_bounce_now queue, processes them, then
     might put them on the bounce_later queue for future processing"""
     self.log.info(f"{self.name} starting up")
     while True:
         with self.instances_to_bounce.get() as service_instance:
             self.busy = True
             try:
                 (
                     bounce_again_in_seconds,
                     return_code,
                 ) = self.process_service_instance(service_instance)
             except Exception:
                 self.log.error(
                     f"{self.name} Worker failed to process service instance and will retry. "
                     f"Caused by exception: {traceback.format_exc()}")
                 return_code = -2
             failures = service_instance.failures
             if return_code != 0:
                 failures = service_instance.failures + 1
                 bounce_again_in_seconds = exponential_back_off(
                     failures=failures,
                     factor=self.config.
                     get_deployd_worker_failure_backoff_factor(),
                     base=2,
                     max_time=6000,
                 )
             if bounce_again_in_seconds:
                 if failures >= self.max_failures:
                     self.log.info(
                         f"{self.name} Worker removing "
                         f"{service_instance.service}.{service_instance.instance} "
                         f"from queue because it has failed {failures} times "
                         f"(max is {self.max_failures})")
                 else:
                     bounce_by = int(time.time()) + bounce_again_in_seconds
                     service_instance = ServiceInstance(
                         service=service_instance.service,
                         instance=service_instance.instance,
                         bounce_by=bounce_by,
                         wait_until=bounce_by,
                         watcher=self.name,
                         failures=failures,
                         processed_count=service_instance.processed_count +
                         1,
                         bounce_start_time=service_instance.
                         bounce_start_time,
                         enqueue_time=time.time(),
                     )
                     self.instances_to_bounce.put(service_instance)
         self.busy = False
         time.sleep(0.1)
Example #4
0
 def run(self):
     """Takes things from the to_bounce_now queue, processes them, then
     might put them on the bounce_later queue for future processing"""
     self.log.info(f"{self.name} starting up")
     while True:
         service_instance = self.instances_to_bounce.get()
         self.busy = True
         try:
             bounce_again_in_seconds, return_code, bounce_timers = self.process_service_instance(
                 service_instance)
         except Exception as e:
             self.log.error(
                 f"{self.name} Worker failed to process service instance and will retry. "
                 f"Caused by exception: {format(e)}")
             return_code = -2
             bounce_timers = service_instance.bounce_timers
         failures = service_instance.failures
         if return_code != 0:
             failures = service_instance.failures + 1
             bounce_again_in_seconds = exponential_back_off(
                 failures=failures,
                 factor=self.config.
                 get_deployd_worker_failure_backoff_factor(),
                 base=2,
                 max_time=6000,
             )
         if bounce_again_in_seconds:
             bounce_by = int(time.time()) + bounce_again_in_seconds
             service_instance = ServiceInstance(
                 service=service_instance.service,
                 instance=service_instance.instance,
                 cluster=self.config.get_cluster(),
                 bounce_by=bounce_by,
                 wait_until=bounce_by,
                 watcher=self.name,
                 bounce_timers=bounce_timers,
                 failures=failures,
                 processed_count=service_instance.processed_count + 1,
             )
             self.instances_to_bounce.put(service_instance)
         self.busy = False
         time.sleep(0.1)
Example #5
0
def test_exponential_back_off():
    assert exponential_back_off(0, 60, 2, 6000) == 60
    assert exponential_back_off(2, 60, 2, 6000) == 240
    assert exponential_back_off(99, 60, 2, 6000) == 6000