def test_bad(): bad_strings = ['foo', 'dhms', '1s30d', 'a b c d', ' ', '1s2s3s'] for bad_string in bad_strings: with pytest.raises(InvalidTime): parse_time(bad_string) bad_strings = [123, type] for bad_string in bad_strings: with pytest.raises(TypeError): parse_time(bad_string)
def parse_and_validate_sla_drain_default(options): """Parses and validates host SLA default 3-tuple (percentage, duration, timeout). :param options: command line options :type options: list of app.option :rtype: a tuple of: default percentage (float), default duration (Amount) and timeout (Amount) """ percentage = parse_sla_percentage(options.default_percentage) duration = parse_time(options.default_duration).as_(Time.SECONDS) timeout = parse_time(options.timeout).as_(Time.SECONDS) return percentage, duration, timeout
def parse_and_validate_sla_overrides(options, hostnames): """Parses and validates host SLA override 3-tuple (percentage, duration, reason). In addition, logs an admin message about overriding default SLA values. :param options: command line options :type options: list of app.option :param hostnames: host names override is issued to :type hostnames: list of string :rtype: a tuple of: override percentage (float) and override duration (Amount) """ has_override = bool(options.percentage) or bool(options.duration) or bool(options.reason) all_overrides = bool(options.percentage) and bool(options.duration) and bool(options.reason) if has_override != all_overrides: die('All --override_* options are required when attempting to override default SLA values.') print(options.percentage) percentage = parse_sla_percentage(options.percentage) if options.percentage else None duration = parse_time(options.duration) if options.duration else None if options.reason: log_admin_message( logging.WARNING, 'Default SLA values (percentage: %s, duration: %s) are overridden for the following ' 'hosts: %s. New percentage: %s, duration: %s, override reason: %s' % ( SLA_UPTIME_PERCENTAGE_LIMIT, SLA_UPTIME_DURATION_LIMIT, hostnames, percentage, duration, options.reason)) return percentage or SLA_UPTIME_PERCENTAGE_LIMIT, duration or SLA_UPTIME_DURATION_LIMIT
def sla_probe_hosts(cluster, percentage, duration): """usage: sla_probe_hosts [--filename=FILENAME] [--grouping=GROUPING] [--hosts=HOSTS] [--min_job_instance_count=COUNT] cluster percentage duration Probes individual hosts with respect to their job SLA. Specifically, given a host, outputs all affected jobs with their projected SLAs if the host goes down. In addition, if a job's projected SLA does not clear the specified limits suggests the approximate time when that job reaches its SLA. Output format: HOST JOB PREDICTED_SLA SAFE? PREDICTED_SAFE_IN where: HOST - host being probed. JOB - job that has tasks running on the host being probed. PREDICTED_SLA - predicted effective percentage of up tasks if the host is shut down. SAFE? - PREDICTED_SLA >= percentage PREDICTED_SAFE_IN - expected wait time in seconds for the job to reach requested SLA threshold. """ options = app.get_options() sla_percentage = parse_sla_percentage(percentage) sla_duration = parse_time(duration) hosts = parse_hostnames(options.filename, options.hosts) get_grouping_or_die(options.grouping) vector = make_admin_client(cluster).sla_get_safe_domain_vector(options.min_instance_count, hosts) groups = vector.probe_hosts(sla_percentage, sla_duration.as_(Time.SECONDS), options.grouping) output, _ = format_sla_results(groups) print_results(output)
def parse_and_validate_sla_overrides(options, hostnames): """Parses and validates host SLA override 3-tuple (percentage, duration, reason). In addition, logs an admin message about overriding default SLA values. :param options: command line options :type options: list of app.option :param hostnames: host names override is issued to :type hostnames: list of string :rtype: a tuple of: override percentage (float) and override duration (Amount) """ has_override = bool(options.percentage) or bool(options.duration) or bool( options.reason) all_overrides = bool(options.percentage) and bool( options.duration) and bool(options.reason) if has_override != all_overrides: die('All --override_* options are required when attempting to override default SLA values.' ) print(options.percentage) percentage = parse_sla_percentage( options.percentage) if options.percentage else None duration = parse_time(options.duration) if options.duration else None if options.reason: log_admin_message( logging.WARNING, 'Default SLA values (percentage: %s, duration: %s) are overridden for the following ' 'hosts: %s. New percentage: %s, duration: %s, override reason: %s' % (SLA_UPTIME_PERCENTAGE_LIMIT, SLA_UPTIME_DURATION_LIMIT, hostnames, percentage, duration, options.reason)) return percentage or SLA_UPTIME_PERCENTAGE_LIMIT, duration or SLA_UPTIME_DURATION_LIMIT
def gc(args, options): """Garbage collect task(s) and task metadata. Usage: thermos gc [options] [task_id1 task_id2 ...] If tasks specified, restrict garbage collection to only those tasks, otherwise all tasks are considered. The optional constraints are still honored. """ print('Analyzing root at %s' % options.root) gc_options = {} if options.max_age is not None: gc_options['max_age'] = parse_time(options.max_age) if options.max_space is not None: gc_options['max_space'] = parse_data(options.max_space) if options.max_tasks is not None: gc_options['max_tasks'] = int(options.max_tasks) gc_options.update(include_metadata=not options.keep_metadata, include_logs=not options.keep_logs, verbose=True, logger=print) if args: gc_tasks = list(tasks_from_re(args, state='finished')) else: print('No task ids specified, using default collector.') gc_tasks = [(task.checkpoint_root, task.task_id) for task in GarbageCollectionPolicy(get_path_detector(), **gc_options).run()] if not gc_tasks: print('No tasks to garbage collect. Exiting') return def maybe(function, *args): if options.dryrun: print(' would run %s%r' % (function.__name__, args)) else: function(*args) value = 'y' if not options.force: value = raw_input("Continue [y/N]? ") or 'N' if value.lower() == 'y': print('Running gc...') for checkpoint_root, task_id in gc_tasks: tgc = TaskGarbageCollector(checkpoint_root, task_id) print(' Task %s ' % task_id, end='') print('data (%s) ' % ('keeping' if options.keep_data else 'deleting'), end='') print('logs (%s) ' % ('keeping' if options.keep_logs else 'deleting'), end='') print('metadata (%s) ' % ('keeping' if options.keep_metadata else 'deleting')) if not options.keep_data: maybe(tgc.erase_data) if not options.keep_logs: maybe(tgc.erase_logs) if not options.keep_metadata: maybe(tgc.erase_metadata) print('done.') else: print('Cancelling gc.')
def test_basic(): assert parse_time('') == Amount(0, Time.SECONDS) assert parse_time('1s') == Amount(1, Time.SECONDS) assert parse_time('2m60s') == Amount(3, Time.MINUTES) assert parse_time('1d') == Amount(1, Time.DAYS) assert parse_time('1d1H3600s') == Amount(26, Time.HOURS) assert parse_time('1d-1s') == Amount(86399, Time.SECONDS)
def perform_maintenance_hosts(cluster): """usage: perform_maintenance_hosts {--filename=filename | --hosts=hosts} [--post_drain_script=path] [--grouping=function] [--override_percentage=percentage] [--override_duration=duration] [--override_reason=reason] [--unsafe_hosts_file=unsafe_hosts_filename] cluster Asks the scheduler to remove any running tasks from the machine and remove it from service temporarily, perform some action on them, then return the machines to service. """ options = app.get_options() drainable_hosts = parse_hostnames(options.filename, options.hosts) get_grouping_or_die(options.grouping) has_override = bool(options.percentage) or bool(options.duration) or bool(options.reason) all_overrides = bool(options.percentage) and bool(options.duration) and bool(options.reason) if has_override != all_overrides: die("All --override_* options are required when attempting to override default SLA values.") percentage = parse_sla_percentage(options.percentage) if options.percentage else None duration = parse_time(options.duration) if options.duration else None if options.reason: log_admin_message( logging.WARNING, "Default SLA values (percentage: %s, duration: %s) are overridden for the following " "hosts: %s. New percentage: %s, duration: %s, override reason: %s" % ( HostMaintenance.SLA_UPTIME_PERCENTAGE_LIMIT, HostMaintenance.SLA_UPTIME_DURATION_LIMIT, drainable_hosts, percentage, duration, options.reason, ), ) drained_callback = parse_script(options.post_drain_script) HostMaintenance(CLUSTERS[cluster], options.verbosity).perform_maintenance( drainable_hosts, grouping_function=options.grouping, callback=drained_callback, percentage=percentage, duration=duration, output_file=options.unsafe_hosts_filename, )
def sla_probe_hosts(cluster, percentage, duration): """usage: sla_probe_hosts [--filename=filename] [--hosts=hosts] cluster percentage duration Probes individual hosts with respect to their job SLA. Specifically, given a host, outputs all affected jobs with their projected SLAs if the host goes down. In addition, if a job's projected SLA does not clear the specified limits suggests the approximate time when that job reaches its SLA. Output format: HOST JOB PREDICTED_SLA SAFE? PREDICTED_SAFE_IN where: HOST - host being probed. JOB - job that has tasks running on the host being probed. PREDICTED_SLA - predicted effective percentage of up tasks if the host is shut down. SAFE? - PREDICTED_SLA >= percentage PREDICTED_SAFE_IN - expected wait time in seconds for the job to reach requested SLA threshold. """ options = app.get_options() sla_percentage = parse_sla_percentage(percentage) sla_duration = parse_time(duration) hosts = parse_hosts(options.filename, options.hosts) vector = AuroraClientAPI(CLUSTERS[cluster], options.verbosity).sla_get_safe_domain_vector(hosts) probed_hosts = vector.probe_hosts(sla_percentage, sla_duration.as_(Time.SECONDS), hosts) results = [] for host, job_details in sorted(probed_hosts.items()): results.append( "\n".join( [ "%s\t%s\t%.2f\t%s\t%s" % ( host, d.job.to_path(), d.predicted_percentage, d.safe, "n/a" if d.safe_in_secs is None else d.safe_in_secs, ) for d in sorted(job_details) ] ) ) print_results(results)
def parse_jobs_file(filename): result = {} with open(filename, 'r') as overrides: for line in overrides: if not line.strip(): continue tokens = line.split() if len(tokens) != 3: die('Invalid line in %s:%s' % (filename, line)) job_key = AuroraJobKey.from_path(tokens[0]) result[job_key] = JobUpTimeLimit( job=job_key, percentage=parse_sla_percentage(tokens[1]), duration_secs=parse_time(tokens[2]).as_(Time.SECONDS)) return result
def parse_jobs_file(filename): result = {} with open(filename, 'r') as overrides: for line in overrides: if not line.strip(): continue tokens = line.split() if len(tokens) != 3: die('Invalid line in %s:%s' % (filename, line)) job_key = AuroraJobKey.from_path(tokens[0]) result[job_key] = JobUpTimeLimit( job=job_key, percentage=parse_sla_percentage(tokens[1]), duration_secs=parse_time(tokens[2]).as_(Time.SECONDS) ) return result
def gc(args, options): """Garbage collect task(s) and task metadata. Usage: thermos gc [options] [task_id1 task_id2 ...] If tasks specified, restrict garbage collection to only those tasks, otherwise all tasks are considered. The optional constraints are still honored. Options: --max_age=AGE Max age in quasi-human readable form, e.g. --max_age=2d5h, format *d*h*m*s [default: skip] --max_tasks=NUM Max number of tasks to keep [default: skip] --max_space=SPACE Max space to allow for tasks [default: skip] --[keep/delete-]metadata Garbage collect metadata [default: keep] --[keep/delete-]logs Garbage collect logs [default: keep] --[keep/delete-]data Garbage collect data [default: keep] WARNING: Do NOT do this if your sandbox is $HOME. --force Perform garbage collection without confirmation [default: false] --dryrun Don't actually run garbage collection [default: false] """ print('Analyzing root at %s' % options.root) gc_options = {} if options.max_age is not None: gc_options['max_age'] = parse_time(options.max_age) if options.max_space is not None: gc_options['max_space'] = parse_data(options.max_space) if options.max_tasks is not None: gc_options['max_tasks'] = int(options.max_tasks) gc_options.update(include_data=not options.keep_data, include_metadata=not options.keep_metadata, include_logs=not options.keep_logs, verbose=True, logger=print) tgc = TaskGarbageCollector(root=options.root) if args: gc_tasks = tasks_from_re(args, options.root, state='finished') else: print('No task ids specified, using default collector.') gc_tasks = [task.task_id for task in DefaultCollector(tgc, **gc_options).run()] if not gc_tasks: print('No tasks to garbage collect. Exiting') return def maybe(function, *args): if options.dryrun: print(' would run %s%r' % (function.__name__, args)) else: function(*args) value = 'y' if not options.force: value = raw_input("Continue [y/N]? ") or 'N' if value.lower() == 'y': print('Running gc...') tgc = TaskGarbageCollector(root=options.root) for task in gc_tasks: print(' Task %s ' % task, end='') print('data (%s) ' % ('keeping' if options.keep_data else 'deleting'), end='') print('logs (%s) ' % ('keeping' if options.keep_logs else 'deleting'), end='') print('metadata (%s) ' % ('keeping' if options.keep_metadata else 'deleting')) if not options.keep_data: maybe(tgc.erase_data, task) if not options.keep_logs: maybe(tgc.erase_logs, task) if not options.keep_metadata: maybe(tgc.erase_metadata, task) print('done.') else: print('Cancelling gc.')
def sla_list_safe_domain(cluster, percentage, duration): """usage: sla_list_safe_domain [--exclude_file=FILENAME] [--exclude_hosts=HOSTS] [--grouping=GROUPING] [--include_file=FILENAME] [--include_hosts=HOSTS] [--list_jobs] [--min_job_instance_count=COUNT] [--override_jobs=FILENAME] cluster percentage duration Returns a list of relevant hosts where it would be safe to kill tasks without violating their job SLA. The SLA is defined as a pair of percentage and duration, where: percentage - Percentage of tasks required to be up within the duration. Applied to all jobs except those listed in --override_jobs file; duration - Time interval (now - value) for the percentage of up tasks. Applied to all jobs except those listed in --override_jobs file. Format: XdYhZmWs (each field is optional but must be in that order.) Examples: 5m, 1d3h45m. NOTE: if --grouping option is specified and is set to anything other than default (by_host) the results will be processed and filtered based on the grouping function on a all-or-nothing basis. In other words, the group is 'safe' IFF it is safe to kill tasks on all hosts in the group at the same time. """ def parse_jobs_file(filename): result = {} with open(filename, 'r') as overrides: for line in overrides: if not line.strip(): continue tokens = line.split() if len(tokens) != 3: die('Invalid line in %s:%s' % (filename, line)) job_key = AuroraJobKey.from_path(tokens[0]) result[job_key] = JobUpTimeLimit( job=job_key, percentage=parse_sla_percentage(tokens[1]), duration_secs=parse_time(tokens[2]).as_(Time.SECONDS) ) return result options = app.get_options() sla_percentage = parse_sla_percentage(percentage) sla_duration = parse_time(duration) exclude_hosts = parse_hostnames_optional(options.exclude_hosts, options.exclude_filename) include_hosts = parse_hostnames_optional(options.include_hosts, options.include_filename) override_jobs = parse_jobs_file(options.override_filename) if options.override_filename else {} get_grouping_or_die(options.grouping) vector = make_admin_client(cluster).sla_get_safe_domain_vector( options.min_instance_count, include_hosts) groups = vector.get_safe_hosts(sla_percentage, sla_duration.as_(Time.SECONDS), override_jobs, options.grouping) results = [] for group in groups: for host in sorted(group.keys()): if exclude_hosts and host in exclude_hosts: continue if options.list_jobs: results.append('\n'.join(['%s\t%s\t%.2f\t%d' % (host, d.job.to_path(), d.percentage, d.duration_secs) for d in sorted(group[host])])) else: results.append('%s' % host) print_results(results)
def main(args, options): log.info("Options in use: %s", options) if not options.api_port: app.error('Must specify --port') if not options.mesos_master: app.error('Must specify --mesos_master') if not options.framework_user: app.error('Must specify --framework_user') if not options.executor_uri: app.error('Must specify --executor_uri') if not options.executor_cmd: app.error('Must specify --executor_cmd') if not options.zk_url: app.error('Must specify --zk_url') if not options.admin_keypath: app.error('Must specify --admin_keypath') try: election_timeout = parse_time(options.election_timeout) framework_failover_timeout = parse_time(options.framework_failover_timeout) except InvalidTime as e: app.error(e.message) try: _, zk_servers, zk_root = zookeeper.parse(options.zk_url) except Exception as e: app.error("Invalid --zk_url: %s" % e.message) web_assets_dir = os.path.join(options.work_dir, "web") pkgutil.unpack_assets(web_assets_dir, MYSOS_MODULE, ASSET_RELPATH) log.info("Extracted web assets into %s" % options.work_dir) fw_principal = None fw_secret = None if options.framework_authentication_file: try: with open(options.framework_authentication_file, "r") as f: cred = yaml.load(f) fw_principal = cred["principal"] fw_secret = cred["secret"] log.info("Loaded credential (principal=%s) for framework authentication" % fw_principal) except IOError as e: app.error("Unable to read the framework authentication key file: %s" % e) except (KeyError, yaml.YAMLError) as e: app.error("Invalid framework authentication key file format %s" % e) log.info("Starting Mysos scheduler") kazoo = KazooClient(zk_servers) kazoo.start() if options.state_storage == 'zk': log.info("Using ZooKeeper (path: %s) for state storage" % zk_root) state_provider = ZooKeeperStateProvider(kazoo, zk_root) else: log.info("Using local disk for state storage") state_provider = LocalStateProvider(options.work_dir) try: state = state_provider.load_scheduler_state() except StateProvider.Error as e: app.error(e.message) if state: log.info("Successfully restored scheduler state") framework_info = state.framework_info if framework_info.HasField('id'): log.info("Recovered scheduler's FrameworkID is %s" % framework_info.id.value) else: log.info("No scheduler state to restore") framework_info = FrameworkInfo( user=options.framework_user, name=FRAMEWORK_NAME, checkpoint=True, failover_timeout=framework_failover_timeout.as_(Time.SECONDS), role=options.framework_role) if fw_principal: framework_info.principal = fw_principal state = Scheduler(framework_info) state_provider.dump_scheduler_state(state) scheduler = MysosScheduler( state, state_provider, options.framework_user, options.executor_uri, options.executor_cmd, kazoo, options.zk_url, election_timeout, options.admin_keypath, installer_args=options.installer_args, backup_store_args=options.backup_store_args, executor_environ=options.executor_environ, framework_role=options.framework_role) if fw_principal and fw_secret: cred = Credential(principal=fw_principal, secret=fw_secret) scheduler_driver = mesos.native.MesosSchedulerDriver( scheduler, framework_info, options.mesos_master, cred) else: scheduler_driver = mesos.native.MesosSchedulerDriver( scheduler, framework_info, options.mesos_master) scheduler_driver.start() server = HttpServer() server.mount_routes(MysosServer(scheduler, web_assets_dir)) et = ExceptionalThread( target=server.run, args=('0.0.0.0', options.api_port, 'cherrypy')) et.daemon = True et.start() try: # Wait for the scheduler to stop. # The use of 'stopped' event instead of scheduler_driver.join() is necessary to stop the # process with SIGINT. while not scheduler.stopped.wait(timeout=0.5): pass except KeyboardInterrupt: log.info('Interrupted, exiting.') else: log.info('Scheduler exited.') app.shutdown(1) # Mysos scheduler is supposed to be long-running thus the use of exit status 1.
def sla_list_safe_domain(cluster, percentage, duration): """usage: sla_list_safe_domain [--exclude_hosts=filename] [--include_hosts=filename] [--list_jobs] [--override_jobs=filename] cluster percentage duration Returns a list of relevant hosts where it would be safe to kill tasks without violating their job SLA. The SLA is defined as a pair of percentage and duration, where: percentage - Percentage of tasks required to be up within the duration. Applied to all jobs except those listed in --override_jobs file; duration - Time interval (now - value) for the percentage of up tasks. Applied to all jobs except those listed in --override_jobs file. Format: XdYhZmWs (each field is optional but must be in that order.) Examples: 5m, 1d3h45m. """ def parse_jobs_file(filename): result = {} with open(filename, "r") as overrides: for line in overrides: if not line.strip(): continue tokens = line.split() if len(tokens) != 3: die("Invalid line in %s:%s" % (filename, line)) job_key = AuroraJobKey.from_path(tokens[0]) result[job_key] = DomainUpTimeSlaVector.JobUpTimeLimit( job=job_key, percentage=parse_sla_percentage(tokens[1]), duration_secs=parse_time(tokens[2]).as_(Time.SECONDS), ) return result options = app.get_options() sla_percentage = parse_sla_percentage(percentage) sla_duration = parse_time(duration) exclude_hosts = parse_hosts_optional(options.exclude_hosts, options.exclude_filename) include_hosts = parse_hosts_optional(options.include_hosts, options.include_filename) override_jobs = parse_jobs_file(options.override_filename) if options.override_filename else {} vector = AuroraClientAPI(CLUSTERS[cluster], options.verbosity).sla_get_safe_domain_vector(include_hosts) hosts = vector.get_safe_hosts(sla_percentage, sla_duration.as_(Time.SECONDS), override_jobs) results = [] for host in sorted(hosts.keys()): if exclude_hosts and host in exclude_hosts: continue if options.list_jobs: results.append( "\n".join( [ "%s\t%s\t%.2f\t%d" % (host, d.job.to_path(), d.percentage, d.duration_secs) for d in sorted(hosts[host]) ] ) ) else: results.append("%s" % host) print_results(results)
def __init__(self, user): self._user = user self._max_delay = parse_time(MAX_UPLOAD_DELAY)
def main(args, options): log.info("Options in use: %s", options) if not options.api_port: app.error('Must specify --port') if not options.mesos_master: app.error('Must specify --mesos_master') if not options.framework_user: app.error('Must specify --framework_user') if not options.executor_uri: app.error('Must specify --executor_uri') if not options.executor_cmd: app.error('Must specify --executor_cmd') if not options.zk_url: app.error('Must specify --zk_url') if not options.admin_keypath: app.error('Must specify --admin_keypath') try: election_timeout = parse_time(options.election_timeout) framework_failover_timeout = parse_time( options.framework_failover_timeout) except InvalidTime as e: app.error(e.message) try: _, zk_servers, zk_root = zookeeper.parse(options.zk_url) except Exception as e: app.error("Invalid --zk_url: %s" % e.message) web_assets_dir = os.path.join(options.work_dir, "web") pkgutil.unpack_assets(web_assets_dir, MYSOS_MODULE, ASSET_RELPATH) log.info("Extracted web assets into %s" % options.work_dir) fw_principal = None fw_secret = None if options.framework_authentication_file: try: with open(options.framework_authentication_file, "r") as f: cred = yaml.load(f) fw_principal = cred["principal"] fw_secret = cred["secret"] log.info( "Loaded credential (principal=%s) for framework authentication" % fw_principal) except IOError as e: app.error( "Unable to read the framework authentication key file: %s" % e) except (KeyError, yaml.YAMLError) as e: app.error( "Invalid framework authentication key file format %s" % e) log.info("Starting Mysos scheduler") kazoo = KazooClient(zk_servers) kazoo.start() if options.state_storage == 'zk': log.info("Using ZooKeeper (path: %s) for state storage" % zk_root) state_provider = ZooKeeperStateProvider(kazoo, zk_root) else: log.info("Using local disk for state storage") state_provider = LocalStateProvider(options.work_dir) try: state = state_provider.load_scheduler_state() except StateProvider.Error as e: app.error(e.message) if state: log.info("Successfully restored scheduler state") framework_info = state.framework_info if framework_info.HasField('id'): log.info("Recovered scheduler's FrameworkID is %s" % framework_info.id.value) else: log.info("No scheduler state to restore") framework_info = FrameworkInfo( user=options.framework_user, name=FRAMEWORK_NAME, checkpoint=True, failover_timeout=framework_failover_timeout.as_(Time.SECONDS), role=options.framework_role) if fw_principal: framework_info.principal = fw_principal state = Scheduler(framework_info) state_provider.dump_scheduler_state(state) scheduler = MysosScheduler(state, state_provider, options.framework_user, options.executor_uri, options.executor_cmd, kazoo, options.zk_url, election_timeout, options.admin_keypath, installer_args=options.installer_args, backup_store_args=options.backup_store_args, executor_environ=options.executor_environ, framework_role=options.framework_role) if fw_principal and fw_secret: cred = Credential(principal=fw_principal, secret=fw_secret) scheduler_driver = mesos.native.MesosSchedulerDriver( scheduler, framework_info, options.mesos_master, cred) else: scheduler_driver = mesos.native.MesosSchedulerDriver( scheduler, framework_info, options.mesos_master) scheduler_driver.start() server = HttpServer() server.mount_routes(MysosServer(scheduler, web_assets_dir)) et = ExceptionalThread(target=server.run, args=('0.0.0.0', options.api_port, 'cherrypy')) et.daemon = True et.start() try: # Wait for the scheduler to stop. # The use of 'stopped' event instead of scheduler_driver.join() is necessary to stop the # process with SIGINT. while not scheduler.stopped.wait(timeout=0.5): pass except KeyboardInterrupt: log.info('Interrupted, exiting.') else: log.info('Scheduler exited.') app.shutdown( 1 ) # Mysos scheduler is supposed to be long-running thus the use of exit status 1.