def main(_): """Main""" zkconn = kazoo_client.TwitterKazooClient(FLAGS.zk) zkconn.start() soa_data = SOAData(ttl=FLAGS.soa_ttl, ns1=FLAGS.soa_nameserver or socket.getfqdn(), email=FLAGS.soa_email or 'root.%s' % FLAGS.domain, refresh=FLAGS.soa_refresh, retry=FLAGS.soa_retry, expire=FLAGS.soa_expire, nxdomain_ttl=FLAGS.soa_nxdomain_ttl) server = ZknsServer(zk_handle=zkconn, domain=FLAGS.domain, ttl=FLAGS.ttl, soa_data=soa_data) thread = ExceptionalThread( target=lambda: server.run(FLAGS.listen, FLAGS.port, server='cherrypy')) thread.daemon = True thread.start() try: wait_forever() except KeyboardInterrupt: log.fatal('KeyboardInterrupt! Shutting down.')
def __init__(self, task_id, task_monitor, disk_collector=DiskCollector, process_collection_interval=PROCESS_COLLECTION_INTERVAL, disk_collection_interval=DISK_COLLECTION_INTERVAL, history_time=HISTORY_TIME, history_provider=HistoryProvider()): """ task_monitor: TaskMonitor object specifying the task whose resources should be monitored sandbox: Directory for which to monitor disk utilisation """ self._task_monitor = task_monitor # exposes PIDs, sandbox self._task_id = task_id log.debug('Initialising resource collection for task %s' % self._task_id) self._process_collectors = dict( ) # ProcessStatus => ProcessTreeCollector self._disk_collector_class = disk_collector self._disk_collector = None self._process_collection_interval = process_collection_interval.as_( Time.SECONDS) self._disk_collection_interval = disk_collection_interval.as_( Time.SECONDS) min_collection_interval = min(self._process_collection_interval, self._disk_collection_interval) self._history = history_provider.provides(history_time, min_collection_interval) self._kill_signal = threading.Event() ExceptionalThread.__init__(self, name='%s[%s]' % (self.__class__.__name__, task_id)) self.daemon = True
def __init__( self, task_id, task_monitor, disk_collector_provider=DiskCollectorProvider(), process_collection_interval=PROCESS_COLLECTION_INTERVAL, disk_collection_interval=DiskCollectorSettings.DISK_COLLECTION_INTERVAL, history_time=HISTORY_TIME, history_provider=HistoryProvider()): """ task_monitor: TaskMonitor object specifying the task whose resources should be monitored sandbox: Directory for which to monitor disk utilisation """ self._task_monitor = task_monitor # exposes PIDs, sandbox self._task_id = task_id log.debug('Initialising resource collection for task %s', self._task_id) self._process_collectors = dict() # ProcessStatus => ProcessTreeCollector self._disk_collector_provider = disk_collector_provider self._disk_collector = None self._process_collection_interval = process_collection_interval.as_(Time.SECONDS) self._disk_collection_interval = disk_collection_interval.as_(Time.SECONDS) min_collection_interval = min(self._process_collection_interval, self._disk_collection_interval) self._history = history_provider.provides(history_time, min_collection_interval) self._kill_signal = threading.Event() ExceptionalThread.__init__(self, name='%s[%s]' % (self.__class__.__name__, task_id)) self.daemon = True
def __init__(self, task_id, task_monitor, process_collector=ProcessTreeCollector, disk_collector=DiskCollector, process_collection_interval=Amount(20, Time.SECONDS), disk_collection_interval=Amount(1, Time.MINUTES), history_time=Amount(1, Time.HOURS)): """ task_monitor: TaskMonitor object specifying the task whose resources should be monitored sandbox: Directory for which to monitor disk utilisation """ self._task_monitor = task_monitor # exposes PIDs, sandbox self._task_id = task_id log.debug('Initialising resource collection for task %s' % self._task_id) self._process_collectors = dict() # ProcessStatus => ProcessTreeCollector self._process_collector_factory = process_collector self._disk_collector_class = disk_collector self._disk_collector = None self._process_collection_interval = process_collection_interval.as_(Time.SECONDS) self._disk_collection_interval = disk_collection_interval.as_(Time.SECONDS) min_collection_interval = min(self._process_collection_interval, self._disk_collection_interval) history_length = int(history_time.as_(Time.SECONDS) / min_collection_interval) if history_length > self.MAX_HISTORY: raise ValueError("Requested history length too large") log.debug("Initialising ResourceHistory of length %s" % history_length) self._history = ResourceHistory(history_length) self._kill_signal = threading.Event() ExceptionalThread.__init__(self) self.daemon = True
def __init__(self, checkpoint_root, verbose=True, task_killer=TaskKiller, executor_detector=ExecutorDetector, task_garbage_collector=TaskGarbageCollector, clock=time): ExecutorBase.__init__(self) ExceptionalThread.__init__(self) self.daemon = True self._stop_event = threading.Event() # mapping of task_id => (TaskInfo, AdjustRetainedTasks), in the order in # which they were received via a launchTask. self._gc_task_queue = OrderedDict() # cache the ExecutorDriver provided by the slave, so we can use it out # of band from slave-initiated callbacks. This should be supplied by # ExecutorBase.registered() when the executor first registers with the # slave. self._driver = None self._slave_id = None # cache the slave ID provided by the slave self._task_id = None # the task_id currently being executed by the ThermosGCExecutor, if any self._start_time = None # the start time of a task currently being executed, if any self._detector = executor_detector() self._collector = task_garbage_collector(root=checkpoint_root) self._clock = clock self._task_killer = task_killer self._checkpoint_root = checkpoint_root self._dropped_tasks = AtomicGauge('dropped_tasks') self.metrics.register(self._dropped_tasks)
def __init__(self, task_id, task_monitor, process_collector=ProcessTreeCollector, disk_collector=DiskCollector, process_collection_interval=Amount(20, Time.SECONDS), disk_collection_interval=Amount(1, Time.MINUTES), history_time=Amount(1, Time.HOURS)): """ task_monitor: TaskMonitor object specifying the task whose resources should be monitored sandbox: Directory for which to monitor disk utilisation """ self._task_monitor = task_monitor # exposes PIDs, sandbox self._task_id = task_id log.debug('Initialising resource collection for task %s' % self._task_id) self._process_collectors = dict() # ProcessStatus => ProcessTreeCollector self._process_collector_factory = process_collector self._disk_collector_class = disk_collector self._disk_collector = None self._process_collection_interval = process_collection_interval.as_(Time.SECONDS) self._disk_collection_interval = disk_collection_interval.as_(Time.SECONDS) min_collection_interval = min(self._process_collection_interval, self._disk_collection_interval) history_length = int(history_time.as_(Time.SECONDS) / min_collection_interval) if history_length > self.MAX_HISTORY: raise ValueError("Requested history length too large") log.debug("Initialising ResourceHistory of length %s" % history_length) self._history = ResourceHistory(history_length) self._kill_signal = threading.Event() ExceptionalThread.__init__(self, name='%s[%s]' % (self.__class__.__name__, task_id)) self.daemon = True
def __init__(self, clock=time): self._clock = clock self._self = psutil.Process(os.getpid()) self._orphan = False self.metrics.register(LambdaGauge('orphan', lambda: int(self._orphan))) self._metrics = dict((metric, MutatorGauge(metric, 0)) for metric in self.MUTATOR_METRICS) for metric in self._metrics.values(): self.metrics.register(metric) ExceptionalThread.__init__(self) self.daemon = True
def main(_, options): observer = initialize(options) observer.start() root_server = configure_server(observer) thread = ExceptionalThread(target=lambda: root_server.run(options.ip, options.port, 'cherrypy')) thread.daemon = True thread.start() sleep_forever()
def main(_, options): observer = initialize(options) observer.start() root_server = configure_server(observer) thread = ExceptionalThread(target=lambda: root_server.run('0.0.0.0', options.port, 'cherrypy')) thread.daemon = True thread.start() sleep_forever()
def main(_, opts): path_detector = FixedPathDetector(opts.root) task_observer = TaskObserver(path_detector) task_observer.start() server = configure_server(task_observer) thread = ExceptionalThread(target=lambda: server.run('0.0.0.0', opts.port, 'cherrypy')) thread.daemon = True thread.start() sleep_forever()
def __init__(self, root, resource_monitor_class=TaskResourceMonitor): self._pathspec = TaskPath(root=root) self._detector = TaskDetector(root) if not issubclass(resource_monitor_class, ResourceMonitorBase): raise ValueError("resource monitor class must implement ResourceMonitorBase!") self._resource_monitor = resource_monitor_class self._active_tasks = {} # task_id => ActiveObservedTask self._finished_tasks = {} # task_id => FinishedObservedTask self._stop_event = threading.Event() ExceptionalThread.__init__(self) Lockable.__init__(self) self.daemon = True
def main(_, opts): path_detector = FixedPathDetector(opts.root) task_observer = TaskObserver(path_detector) task_observer.start() server = configure_server(task_observer) thread = ExceptionalThread( target=lambda: server.run('0.0.0.0', opts.port, 'cherrypy')) thread.daemon = True thread.start() sleep_forever()
def main(args, opts): """Main""" server = RedirServer(opts.zk_basepath, opts.subdomain, opts.base_domain) thread = ExceptionalThread( target=lambda: server.run(opts.listen, opts.port, server='cherrypy')) thread.daemon = True thread.start() wait_forever()
def main(_, options): path_detector = ChainedPathDetector( FixedPathDetector(options.root), MesosPathDetector(options.mesos_root), ) observer = TaskObserver(path_detector) observer.start() root_server = configure_server(observer) thread = ExceptionalThread(target=lambda: root_server.run('0.0.0.0', options.port, 'cherrypy')) thread.daemon = True thread.start() sleep_forever()
def main(_, options): path_detector = ChainedPathDetector( FixedPathDetector(options.root), MesosPathDetector(options.mesos_root), ) observer = TaskObserver(path_detector) observer.start() root_server = configure_server(observer) thread = ExceptionalThread( target=lambda: root_server.run('0.0.0.0', options.port, 'cherrypy')) thread.daemon = True thread.start() sleep_forever()
def test_quitquitquit(): def main(): app.wait_forever() def wait_and_quit(): time.sleep(0.5) app.quitquitquit() stop_thread = ExceptionalThread(target=wait_and_quit) stop_thread.start() app = TestApplication(main) app.main() assert app.exited_rc == 0
def __init__(self, clock=time): self._clock = clock self._self = psutil.Process(os.getpid()) if hasattr(self._self, "getcwd"): self._version = self.get_release_from_binary(os.path.join(self._self.getcwd(), self._self.cmdline[1])) else: self._version = "UNKNOWN" self.metrics.register(NamedGauge("version", self._version)) self._orphan = False self.metrics.register(LambdaGauge("orphan", lambda: int(self._orphan))) self._metrics = dict((metric, MutatorGauge(metric, 0)) for metric in self.MUTATOR_METRICS) for metric in self._metrics.values(): self.metrics.register(metric) ExceptionalThread.__init__(self) self.daemon = True
def __init__(self, clock=time): self._clock = clock self._self = psutil.Process(os.getpid()) try: self._version = self.get_release_from_binary( os.path.join(self._self.cwd(), self._self.cmdline()[1])) except (IndexError, psutil.Error): self._version = 'UNKNOWN' self.metrics.register(NamedGauge('version', self._version)) self._orphan = False self.metrics.register(LambdaGauge('orphan', lambda: int(self._orphan))) self._metrics = dict((metric, MutatorGauge(metric, 0)) for metric in self.MUTATOR_METRICS) for metric in self._metrics.values(): self.metrics.register(metric) ExceptionalThread.__init__(self) self.daemon = True
def __init__(self, path_detector, interval=POLLING_INTERVAL, task_process_collection_interval=TaskResourceMonitor.PROCESS_COLLECTION_INTERVAL, task_disk_collection_interval=TaskResourceMonitor.DISK_COLLECTION_INTERVAL): self._detector = ObserverTaskDetector( path_detector, self.__on_active, self.__on_finished, self.__on_removed) self._interval = interval self._task_process_collection_interval = task_process_collection_interval self._task_disk_collection_interval = task_disk_collection_interval self._active_tasks = {} # task_id => ActiveObservedTask self._finished_tasks = {} # task_id => FinishedObservedTask self._stop_event = threading.Event() ExceptionalThread.__init__(self) Lockable.__init__(self) self.daemon = True
def __init__(self, path_detector, resource_monitor_class=TaskResourceMonitor, interval=POLLING_INTERVAL): self._detector = ObserverTaskDetector( path_detector, self.__on_active, self.__on_finished, self.__on_removed) if not issubclass(resource_monitor_class, ResourceMonitorBase): raise ValueError("resource monitor class must implement ResourceMonitorBase!") self._resource_monitor_class = resource_monitor_class self._interval = interval self._active_tasks = {} # task_id => ActiveObservedTask self._finished_tasks = {} # task_id => FinishedObservedTask self._stop_event = threading.Event() ExceptionalThread.__init__(self) Lockable.__init__(self) self.daemon = True
def main(_, opts): """Main""" if not opts.bucket: log.error('--bucket is required.') app.help() server = S3Web(bucket=opts.bucket, prefix=opts.prefix, access_key_id=opts.access_key_id, secret_key=opts.secret_key) thread = ExceptionalThread( target=lambda: server.run(opts.listen, opts.port, server='cherrypy')) thread.daemon = True thread.start() log.info('Ready.') app.wait_forever()
def __init__(self, path_detector, interval=POLLING_INTERVAL, task_process_collection_interval=TaskResourceMonitor. PROCESS_COLLECTION_INTERVAL, enable_mesos_disk_collector=False, disk_collector_settings=DiskCollectorSettings()): self._detector = ObserverTaskDetector(path_detector, self.__on_active, self.__on_finished, self.__on_removed) self._interval = interval self._task_process_collection_interval = task_process_collection_interval self._enable_mesos_disk_collector = enable_mesos_disk_collector self._disk_collector_settings = disk_collector_settings self._active_tasks = {} # task_id => ActiveObservedTask self._finished_tasks = {} # task_id => FinishedObservedTask self._stop_event = threading.Event() ExceptionalThread.__init__(self) Lockable.__init__(self) self.daemon = True
def main(args, opts): """Main""" zkconn = kazoo_client.TwitterKazooClient(opts.zk) zkconn.start() server = RedirServer(zkconn, opts.zk_basepath, opts.scheduler_url, opts.subdomain, opts.base_domain) thread = ExceptionalThread( target=lambda: server.run(opts.listen, opts.port, server='cherrypy')) thread.daemon = True thread.start() # Wait forever, basically. thread.join()
def main(args, opts): if args: print("ERROR: unrecognized arguments: %s\n" % (" ".join(args)), file=sys.stderr) app.help() sys.exit(1) root_server = HttpServer() root_server.mount_routes(DiagnosticsEndpoints()) task_observer = TaskObserver(opts.root) task_observer.start() bottle_wrapper = BottleObserver(task_observer) root_server.mount_routes(bottle_wrapper) def run(): root_server.run('0.0.0.0', opts.port, 'cherrypy') et = ExceptionalThread(target=run) et.daemon = True et.start() et.join()
def main(args, options): log.info("Options in use: %s", options) if not options.api_port: app.error('Must specify --port') if not options.mesos_master: app.error('Must specify --mesos_master') if not options.framework_user: app.error('Must specify --framework_user') if not options.executor_uri: app.error('Must specify --executor_uri') if not options.executor_cmd: app.error('Must specify --executor_cmd') if not options.zk_url: app.error('Must specify --zk_url') if not options.admin_keypath: app.error('Must specify --admin_keypath') try: election_timeout = parse_time(options.election_timeout) framework_failover_timeout = parse_time( options.framework_failover_timeout) except InvalidTime as e: app.error(e.message) try: _, zk_servers, zk_root = zookeeper.parse(options.zk_url) except Exception as e: app.error("Invalid --zk_url: %s" % e.message) web_assets_dir = os.path.join(options.work_dir, "web") pkgutil.unpack_assets(web_assets_dir, MYSOS_MODULE, ASSET_RELPATH) log.info("Extracted web assets into %s" % options.work_dir) fw_principal = None fw_secret = None if options.framework_authentication_file: try: with open(options.framework_authentication_file, "r") as f: cred = yaml.load(f) fw_principal = cred["principal"] fw_secret = cred["secret"] log.info( "Loaded credential (principal=%s) for framework authentication" % fw_principal) except IOError as e: app.error( "Unable to read the framework authentication key file: %s" % e) except (KeyError, yaml.YAMLError) as e: app.error( "Invalid framework authentication key file format %s" % e) log.info("Starting Mysos scheduler") kazoo = KazooClient(zk_servers) kazoo.start() if options.state_storage == 'zk': log.info("Using ZooKeeper (path: %s) for state storage" % zk_root) state_provider = ZooKeeperStateProvider(kazoo, zk_root) else: log.info("Using local disk for state storage") state_provider = LocalStateProvider(options.work_dir) try: state = state_provider.load_scheduler_state() except StateProvider.Error as e: app.error(e.message) if state: log.info("Successfully restored scheduler state") framework_info = state.framework_info if framework_info.HasField('id'): log.info("Recovered scheduler's FrameworkID is %s" % framework_info.id.value) else: log.info("No scheduler state to restore") framework_info = FrameworkInfo( user=options.framework_user, name=FRAMEWORK_NAME, checkpoint=True, failover_timeout=framework_failover_timeout.as_(Time.SECONDS), role=options.framework_role) if fw_principal: framework_info.principal = fw_principal state = Scheduler(framework_info) state_provider.dump_scheduler_state(state) scheduler = MysosScheduler(state, state_provider, options.framework_user, options.executor_uri, options.executor_cmd, kazoo, options.zk_url, election_timeout, options.admin_keypath, installer_args=options.installer_args, backup_store_args=options.backup_store_args, executor_environ=options.executor_environ, framework_role=options.framework_role) if fw_principal and fw_secret: cred = Credential(principal=fw_principal, secret=fw_secret) scheduler_driver = mesos.native.MesosSchedulerDriver( scheduler, framework_info, options.mesos_master, cred) else: scheduler_driver = mesos.native.MesosSchedulerDriver( scheduler, framework_info, options.mesos_master) scheduler_driver.start() server = HttpServer() server.mount_routes(MysosServer(scheduler, web_assets_dir)) et = ExceptionalThread(target=server.run, args=('0.0.0.0', options.api_port, 'cherrypy')) et.daemon = True et.start() try: # Wait for the scheduler to stop. # The use of 'stopped' event instead of scheduler_driver.join() is necessary to stop the # process with SIGINT. while not scheduler.stopped.wait(timeout=0.5): pass except KeyboardInterrupt: log.info('Interrupted, exiting.') else: log.info('Scheduler exited.') app.shutdown( 1 ) # Mysos scheduler is supposed to be long-running thus the use of exit status 1.
def __init__(self, period, clock): self._stop = threading.Event() self._period = period self._clock = clock ExceptionalThread.__init__(self) self.daemon = True
def start(self): ExceptionalThread.start(self)
def main(args, options): log.info("Options in use: %s", options) if not options.api_port: app.error('Must specify --port') if not options.mesos_master: app.error('Must specify --mesos_master') if not options.framework_user: app.error('Must specify --framework_user') if not options.executor_uri: app.error('Must specify --executor_uri') if not options.executor_cmd: app.error('Must specify --executor_cmd') if not options.zk_url: app.error('Must specify --zk_url') if not options.admin_keypath: app.error('Must specify --admin_keypath') try: election_timeout = parse_time(options.election_timeout) framework_failover_timeout = parse_time(options.framework_failover_timeout) except InvalidTime as e: app.error(e.message) try: _, zk_servers, zk_root = zookeeper.parse(options.zk_url) except Exception as e: app.error("Invalid --zk_url: %s" % e.message) web_assets_dir = os.path.join(options.work_dir, "web") pkgutil.unpack_assets(web_assets_dir, MYSOS_MODULE, ASSET_RELPATH) log.info("Extracted web assets into %s" % options.work_dir) fw_principal = None fw_secret = None if options.framework_authentication_file: try: with open(options.framework_authentication_file, "r") as f: cred = yaml.load(f) fw_principal = cred["principal"] fw_secret = cred["secret"] log.info("Loaded credential (principal=%s) for framework authentication" % fw_principal) except IOError as e: app.error("Unable to read the framework authentication key file: %s" % e) except (KeyError, yaml.YAMLError) as e: app.error("Invalid framework authentication key file format %s" % e) log.info("Starting Mysos scheduler") kazoo = KazooClient(zk_servers) kazoo.start() if options.state_storage == 'zk': log.info("Using ZooKeeper (path: %s) for state storage" % zk_root) state_provider = ZooKeeperStateProvider(kazoo, zk_root) else: log.info("Using local disk for state storage") state_provider = LocalStateProvider(options.work_dir) try: state = state_provider.load_scheduler_state() except StateProvider.Error as e: app.error(e.message) if state: log.info("Successfully restored scheduler state") framework_info = state.framework_info if framework_info.HasField('id'): log.info("Recovered scheduler's FrameworkID is %s" % framework_info.id.value) else: log.info("No scheduler state to restore") framework_info = FrameworkInfo( user=options.framework_user, name=FRAMEWORK_NAME, checkpoint=True, failover_timeout=framework_failover_timeout.as_(Time.SECONDS), role=options.framework_role) if fw_principal: framework_info.principal = fw_principal state = Scheduler(framework_info) state_provider.dump_scheduler_state(state) scheduler = MysosScheduler( state, state_provider, options.framework_user, options.executor_uri, options.executor_cmd, kazoo, options.zk_url, election_timeout, options.admin_keypath, installer_args=options.installer_args, backup_store_args=options.backup_store_args, executor_environ=options.executor_environ, framework_role=options.framework_role) if fw_principal and fw_secret: cred = Credential(principal=fw_principal, secret=fw_secret) scheduler_driver = mesos.native.MesosSchedulerDriver( scheduler, framework_info, options.mesos_master, cred) else: scheduler_driver = mesos.native.MesosSchedulerDriver( scheduler, framework_info, options.mesos_master) scheduler_driver.start() server = HttpServer() server.mount_routes(MysosServer(scheduler, web_assets_dir)) et = ExceptionalThread( target=server.run, args=('0.0.0.0', options.api_port, 'cherrypy')) et.daemon = True et.start() try: # Wait for the scheduler to stop. # The use of 'stopped' event instead of scheduler_driver.join() is necessary to stop the # process with SIGINT. while not scheduler.stopped.wait(timeout=0.5): pass except KeyboardInterrupt: log.info('Interrupted, exiting.') else: log.info('Scheduler exited.') app.shutdown(1) # Mysos scheduler is supposed to be long-running thus the use of exit status 1.
def start(self): StatusChecker.start(self) ExceptionalThread.start(self)