class Running(object): def __init__(self, scheduler, name, user='', master=os.getenv('MESOS_MASTER'), implicit_acknowledge=1, *args, **kwargs): scheduler = SchedulerProxy(scheduler) framework = FrameworkInfo(name=name, user=user, *args, **kwargs) self.driver = MesosSchedulerDriver(scheduler, encode(framework), master, implicit_acknowledge) def shutdown(signal, frame): self.driver.stop() signal.signal(signal.SIGINT, shutdown) signal.signal(signal.SIGTERM, shutdown) atexit.register(self.driver.stop) def run(self): return self.driver.run() def start(self): status = self.driver.start() assert status == mesos_pb2.DRIVER_RUNNING return status def stop(self): logging.info("Stopping Mesos driver") self.driver.stop() logging.info("Joining Mesos driver") result = self.driver.join() logging.info("Joined Mesos driver") if result != mesos_pb2.DRIVER_STOPPED: raise RuntimeError("Mesos driver failed with %i", result) def join(self): return self.driver.join() def __enter__(self): self.start() return self def __exit__(self, type, value, traceback): self.stop()
class Running(object): def __init__(self, scheduler, name, user='', master=os.getenv('MESOS_MASTER'), implicit_acknowledge=1, *args, **kwargs): scheduler = SchedulerProxy(scheduler) framework = FrameworkInfo(name=name, user=user, *args, **kwargs) self.driver = MesosSchedulerDriver(scheduler, encode(framework), master, implicit_acknowledge) def shutdown(signal, frame): self.stop() signal.signal(signal.SIGINT, shutdown) signal.signal(signal.SIGTERM, shutdown) atexit.register(self.stop) def run(self): return self.driver.run() def start(self): status = self.driver.start() assert status == mesos_pb2.DRIVER_RUNNING return status def stop(self): return self.driver.stop() def join(self): return self.driver.join() def __enter__(self): self.start() return self def __exit__(self, exc_type, exc_value, traceback): self.stop() self.join() if exc_type: raise exc_type, exc_value, traceback
class Running(object): def __init__(self, scheduler, name, user='', master=os.getenv('MESOS_MASTER'), implicit_acknowledge=1, *args, **kwargs): framework = FrameworkInfo(name=name, user=user, *args, **kwargs) scheduler = SchedulerProxy(scheduler) self.driver = MesosSchedulerDriver(scheduler, encode(framework), master, implicit_acknowledge) def shutdown(signal, frame): self.stop() signal.signal(signal.SIGINT, shutdown) signal.signal(signal.SIGTERM, shutdown) atexit.register(self.stop) def run(self): return self.driver.run() def start(self): status = self.driver.start() assert status == mesos_pb2.DRIVER_RUNNING return status def stop(self): return self.driver.stop() def join(self): return self.driver.join() def __enter__(self): self.start() return self def __exit__(self, exc_type, exc_value, traceback): self.stop() self.join() if exc_type: raise exc_type, exc_value, traceback
def run(api_url, mesos_master, user, config_dir, state_file, stats=None): scheduler = ChangesScheduler(config_dir, state_file, api=ChangesAPI(api_url), stats=stats) executor = mesos_pb2.ExecutorInfo() executor.executor_id.value = "default" executor.command.value = os.path.abspath("./executor.py") executor.name = "Changes Executor" executor.source = "changes" framework = mesos_pb2.FrameworkInfo() framework.user = user framework.name = "Changes Scheduler" framework.principal = "changes" # Give the scheduler 30s to restart before mesos cancels the tasks. framework.failover_timeout = 30 if scheduler.framework_id: framework.id.value = scheduler.framework_id executor.framework_id.value = scheduler.framework_id driver = MesosSchedulerDriver( scheduler, framework, mesos_master) stopped = threading.Event() def handle_interrupt(signal, frame): stopped.set() logging.info("Received interrupt, shutting down") logging.warning("Not saving state. Will wait for running tasks to finish.") scheduler.shuttingDown.set() while scheduler.activeTasks > 0: logging.info("Waiting for %d tasks to finish running", scheduler.activeTasks) sleep(5) driver.stop() def handle_sigterm(signal, frame): stopped.set() logging.info("Received sigterm, shutting down") scheduler.shuttingDown.set() if scheduler.state_file: try: scheduler.save_state() logging.info("Successfully saved state to %s.", state_file) except Exception: logging.exception("Failed to save state") driver.stop() return # With `failover` set to true, we do not tell Mesos to stop the existing tasks # started by this framework. Instead, the tasks will run for # `fail_timeout` more seconds set above or we start a scheduler with # the same framework id. driver.stop(True) else: logging.warning("State file location not set. Not saving state. Existing builds will be cancelled.") driver.stop() signal.signal(signal.SIGINT, handle_interrupt) signal.signal(signal.SIGTERM, handle_sigterm) driver.start() logging.info("Driver started") while not stopped.is_set(): stopped.wait(3) status = 0 if driver.join() == mesos_pb2.DRIVER_STOPPED else 1 # Ensure that the driver process terminates. if status == 1: driver.stop() sys.exit(status)
def run(api_url, mesos_master, user, config_dir, state_file, changes_request_limit, http_port, stats=None): scheduler = ChangesScheduler(state_file, api=ChangesAPI(api_url), stats=stats, blacklist=FileBlacklist( os.path.join(config_dir, 'blacklist')), changes_request_limit=changes_request_limit) executor = mesos_pb2.ExecutorInfo() executor.executor_id.value = "default" executor.command.value = os.path.abspath("./executor.py") executor.name = "Changes Executor" executor.source = "changes" framework = mesos_pb2.FrameworkInfo() framework.user = user framework.name = "Changes Scheduler" framework.principal = "changes" # Give the scheduler 1 week to restart before mesos cancels the tasks. # this is the setting recommended by the docs. framework.failover_timeout = 3600 * 24 * 7 if scheduler.framework_id: framework.id.value = scheduler.framework_id executor.framework_id.value = scheduler.framework_id driver = MesosSchedulerDriver(scheduler, framework, mesos_master) stopped = threading.Event() def handle_interrupt(signal, frame): stopped.set() logging.info("Received interrupt, shutting down") logging.warning( "Not saving state. Will wait for running tasks to finish.") scheduler.shuttingDown.set() while scheduler.activeTasks > 0: logging.info("Waiting for %d tasks to finish running", scheduler.activeTasks) sleep(5) driver.stop() def handle_sigterm(signal, frame): # TODO: Avoid save_state race conditions by having handle_sigterm() # only set shuttingDown, then do the actual save-state and driver.stop() # in the main thread after all other threads are join()ed. # Also, stopped doesn't appear to be used. stopped.set() logging.info("Received sigterm, shutting down") scheduler.shuttingDown.set() if scheduler.state_file: try: scheduler.save_state() logging.info("Successfully saved state to %s.", state_file) except Exception: logging.exception("Failed to save state") driver.stop() return # With `failover` set to true, we do not tell Mesos to stop the existing tasks # started by this framework. Instead, the tasks will run for # `fail_timeout` more seconds set above or we start a scheduler with # the same framework id. driver.stop(True) else: logging.warning( "State file location not set. Not saving state. Existing builds will be cancelled." ) driver.stop() signal.signal(signal.SIGINT, handle_interrupt) signal.signal(signal.SIGTERM, handle_sigterm) driver.start() logging.info("Driver started") app = Flask("Changes Mesos Scheduler") app.add_url_rule('/api/state_json', 'state_json', json_handler(scheduler.state_json)) http_thread = threading.Thread(target=app.run, kwargs={'port': http_port}) http_thread.start() scheduler.poll_changes_until_shutdown(driver, 5) status = 0 if driver.join() == mesos_pb2.DRIVER_STOPPED: logging.info("Driver stopped cleanly.") else: # Ensure that the driver process terminates. status = 1 logging.info("Stopping driver forcibly.") driver.stop() logging.info("Stopping HTTP server.") http_thread.terminate() http_thread.join() logging.info("Clean shutdown complete. Exiting status %d.", status) sys.exit(status)