def main(): global task parser = OptionParser( "%prog --daemon_status_id <id> --queue_name <queue_name> \ [--nice <0>] [--stdout <file_name|DEFAULT>] [--stderr <file_name>|STDOUT>] [--debug]" ) parser.add_option( "--daemon_status_id", action="store", type="int", help="The id of the daemon status that launched this Task" ) parser.add_option("--queue_name", action="store", type="string", help="The name of the queue from which to read") parser.add_option("--nice", action="store", type="int", default=0, help="nice this process. defaults to 5.") parser.add_option( "--stdout", action="store", type="string", help="Send stdout to this file, or special value 'DEFAULT' \ sends it a the stream unique to this Task request", ) parser.add_option( "--stderr", action="store", type="string", help="Send stderr to this file, or special value 'STDOUT' sends it to stdout", ) parser.add_option("--debug", action="store_true", help="more messages") (options, args) = parser.parse_args() # option parsing if not options.daemon_status_id or not options.queue_name: sys.exit(parser.get_usage()) log.set_logging_debug(options.debug) if not options.nice == 0: os.nice(options.nice) console_stderr = None try: c = SQSConnection(settings.AWS_ACCESS_KEY_ID, settings.AWS_SECRET_ACCESS_KEY) q = c.get_queue(options.queue_name) boto_message = q.read() task = __get_task__(boto_message, options.queue_name) if task == None: log.debug("No task in queue '%s' pid:%s" % (options.queue_name, os.getpid())) sys.exit(133) else: log.debug("Starting SQS Queue '%s' Task:%s pid:%s" % (options.queue_name, task.get_id(), os.getpid())) q.delete_message(boto_message) console_stderr = __redirect_outputs__(task, options.stdout, options.stderr) daemon_status = __get_daemon_status__(options.daemon_status_id) __run_task__(task, daemon_status) ending_status = task.get_current_run_status() if ending_status == None: sys.exit(134) if not ending_status.was_successful(): sys.exit(1) except SystemExit, se: # in python 2.4, SystemExit extends Exception, this is changed in 2.5 to # extend BaseException, specifically so this check isn't necessary. But # we're using 2.4; upon upgrade, this check will be unecessary but ignorable. sys.exit(se.code)
def main(): parser = OptionParser("%prog [--debug]") #parser.add_option("--no_log_redirect", action="store_true" # , help="print daemon logging to sys.stdout & sys.stderr instead of redirecting them to a TMS log file.") parser.add_option("--debug", action="store_true", help="more messages") (options, args) = parser.parse_args() log.set_logging_debug(options.debug)
def main(): global task, iteration, region parser = OptionParser("%prog --daemon_status_id <id> --iteration_id <id> \ --task_library <lib> --task_id <id> [--nice 5] [--stdout <file_name>] [--stderr <file_name>|STDOUT>] [--debug]") parser.add_option("--daemon_status_id", action="store", type="int" , help="The id of the daemon status that launched this Task") parser.add_option("--iteration_id", action="store", type="int" , help="The id of the iteration in which this Task runs") parser.add_option("--task_library", action="store", type="string" , help="The path Task (permalink.tms_impl.models.EnqueudArchiveRequest)") parser.add_option("--task_id", action="store", type="string" , help="The id of this Task in this library") parser.add_option("--nice", action="store", type="int", default=5 , help="nice this process. defaults to 5.") parser.add_option("--stdout", action="store", type="string" , help="Send stdout to this file") parser.add_option("--stderr", action="store", type="string" , help="Send stderr to this file, or special value 'STDOUT' sends it to stdout") parser.add_option("--debug", action="store_true", help="more messages") (options, args) = parser.parse_args() # option parsing if not options.daemon_status_id or not options.iteration_id \ or not options.task_library or not options.task_id: sys.exit(parser.get_usage()) if options.debug: log.set_logging_debug(options.debug) console_stderr = sys.stderr if options.stdout: sys.stdout = open(options.stdout, 'a') if options.stderr: if options.stderr == 'STDOUT': sys.stderr = sys.stdout else: sys.stderr = open(options.stderr, 'a') if not options.nice == 0: os.nice(options.nice) try: task_class = __get_task_class__(options.task_library) task = __get_task__(options.task_library, task_class, options.task_id) daemon_status = __get_daemon_status__(options.daemon_status_id) iteration = __get_iteration__(options.iteration_id) region = daemon_status.get_region() __run_task__(task, iteration, daemon_status) ending_status = task.get_current_run_status(iteration) if not ending_status == None and not ending_status.was_successful(): # if there's no run status, assume success; resource management may have prevented it from working. return False return True except SystemExit, se: # in python 2.4, SystemExit extends Exception, this is changed in 2.5 to # extend BaseException, specifically so this check isn't necessary. But # we're using 2.4; upon upgrade, this check will be unecessary but ignorable. sys.exit(se.code)
def main(): parser = OptionParser("%prog --region <regionname> [--poll_frequency 3] [--threads] [--no_log_redirect] [--debug]") parser.add_option("--poll_frequency", action="store", default=3, type="int" , help="delay in seconds between looking for tasks to run") parser.add_option("--region", action="store", help="region this daemon runs in") parser.add_option("--threads", action="store_true" , help="use threading instead of subprocesses. \ Note that threads in Python cannot be interrupted without killing the daemon!") parser.add_option("--no_log_redirect", action="store_true" , help="print daemon logging to sys.stdout & sys.stderr instead of redirecting them to a TMS log file.") parser.add_option("--debug", action="store_true", help="more messages") (options, args) = parser.parse_args() if options.debug: log.set_logging_debug(options.debug) if options.poll_frequency < 1: raise Exception("--poll_frequency must be >= 1") if not options.region: sys.exit(parser.get_usage()) # resolve the region region = tms_models.ResourceRegion.get(options.region) if region == None: raise Exception("Don't know region '%s'" % (options.region)) # register signal handlers for interrupt (ctl-c) & terminate ($ kill <pid>). def __handle_SIGINT__(signum, frame): assert signum == signal.SIGINT, "This signal handler only handles SIGINT, not '%s'. BUG!" % (signum) daemon.request_stop() def __handle_SIGTERM__(signum, frame): assert signum == signal.SIGTERM, "This signal handler only handles SIGTERM, not '%s'. BUG!" % (signum) daemon.request_kill() signal.signal(signal.SIGINT, __handle_SIGINT__) signal.signal(signal.SIGTERM, __handle_SIGTERM__) if options.threads: # multi-threaded; spawn new threads for new Tasks daemon = ThreadingNorcDaemon(region, options.poll_frequency, settings.TMS_LOG_DIR, not options.no_log_redirect) else: # single-threaded; fork new Tasks daemon = ForkingNorcDaemon(region, options.poll_frequency, settings.TMS_LOG_DIR, not options.no_log_redirect) ended_gracefully = daemon.run() if ended_gracefully: sys.exit(0) elif options.threads: # there's no way in python to interrupt threads; so gotta force 'em. # exit code is 137 on OS X os.kill(os.getpid(), signal.SIGKILL) else: sys.exit(137)
def main(): parser = OptionParser("%prog --queue_name <queue_name> --max_to_run <#> \ [--poll_frequency <3>] [--no_log_redirect] [--debug]") parser.add_option("--poll_frequency", action="store", default=3, type="int" , help="delay in seconds between looking for tasks to run") parser.add_option("--queue_name", action="store", help="queue name this daemon monitors") parser.add_option("--max_to_run", action="store", type="int" , help="max Tasks that can be run at a time") parser.add_option("--no_log_redirect", action="store_true" , help="print daemon logging to sys.stdout & sys.stderr instead of redirecting them to a TMS log file.") parser.add_option("--debug", action="store_true", help="more messages") (options, args) = parser.parse_args() log.set_logging_debug(options.debug) if options.poll_frequency < 1: raise Exception("--poll_frequency must be >= 1") if not options.max_to_run or options.max_to_run < 1: raise Exception("--max_to_run must be >= 1. found %s" % (options.max_to_run)) if not options.queue_name: sys.exit(parser.get_usage()) # resolve the region # currently an SQS Queue is mapped 1:1 to a ResourceRegion region = tms_models.ResourceRegion.get(options.queue_name) if region == None: raise Exception("Don't know region '%s'" % (options.queue_name)) # register signal handlers for interrupt (ctl-c) & terminate ($ kill <pid>). def __handle_SIGINT__(signum, frame): assert signum == signal.SIGINT, "This signal handler only handles SIGINT, not '%s'. BUG!" % (signum) daemon.request_stop() def __handle_SIGTERM__(signum, frame): assert signum == signal.SIGTERM, "This signal handler only handles SIGTERM, not '%s'. BUG!" % (signum) daemon.request_kill() signal.signal(signal.SIGINT, __handle_SIGINT__) signal.signal(signal.SIGTERM, __handle_SIGTERM__) daemon = ForkingSQSDaemon(region, options.poll_frequency, settings.TMS_LOG_DIR , not options.no_log_redirect, max_to_run=options.max_to_run) ended_gracefully = daemon.run() if ended_gracefully: sys.exit(0) else: sys.exit(137)
def main(): parser = OptionParser("%prog --task_name <name> --region <regionname> [--debug]") parser.add_option("--task_name", action="store", help="the task to run") parser.add_option("--region", action="store", help="run the task in this region") parser.add_option("--debug", action="store_true", help="more messages") (options, args) = parser.parse_args() if not options.task_name or not options.region: raise parser.get_usage() if options.debug: log.set_logging_debug(options.debug) task = get_task(options.task_name) run_task(task, region_name=options.region) return True
def main(): global WAIT_POLL_SECONDS parser = OptionParser(usage()) parser.add_option("--status", action="store_true" , help="show status of all running norc daemons.") parser.add_option("--details", action="store", type="int" , help="show details for tmsd given by id.") parser.add_option("--filter_status", action="store", default="interesting" , help="if showing status, limit to this set. Defaults to 'interesting', which is active+errored.") parser.add_option("--salvage", action="store" , type="int", help="don't exit tms daemon as requested; leave it running.") parser.add_option("--pause", action="store", type="int" , help="pause the tms daemon of given ID so no more tasks are run") parser.add_option("--stop", action="store", type="int" , help="stop the tms daemon of given ID after all currently running tasks have finished") parser.add_option("--kill", action="store" , type="int", help="immediately kill the tms daemon of given ID") parser.add_option("--delete", action="store" , type="int", help="mark tms daemon of given ID as deleted for convenience. Only changes DB.") parser.add_option("--wait_seconds", action="store", default=0 , type="int", help="wait for N seconds for tmsd to stop after kill or stop is issued. Default is 0") parser.add_option("--force", action="store_true", help="overrides some safety checks. Use carefully by trying not to use it first.") parser.add_option("--due_to_run", action="store", type="int" , help="show a max # of Tasks due to run (currently an expensive DB call)") parser.add_option("--debug", action="store_true", help="more messages") (options, args) = parser.parse_args() if options.debug: log.set_logging_debug(options.debug) if not options.status and not options.details \ and not options.pause and not options.stop and not options.kill \ and not options.salvage and not options.delete: raise usage() if options.stop and (options.kill or options.salvage or options.details or options.pause) \ or options.kill and (options.stop or options.salvage or options.details or options.pause) \ or options.details and (options.kill or options.stop or options.salvage or options.pause) \ or options.pause and (options.kill or options.stop or options.salvage or options.details): raise usage() # # edit a tmsd # tds_id = None; tds = None if options.pause: tds_id = options.pause elif options.stop: tds_id = options.stop elif options.kill: tds_id = options.kill elif options.salvage: tds_id = options.salvage elif options.delete: tds_id = options.delete elif options.details: tds_id = options.details if not tds_id == None: tds = get_tds(tds_id) if options.pause and tds.is_paused() or tds.is_pause_requested(): raise Exception("tmsd %s is already paused or pause has been requested." % (tds.id)) if options.stop and tds.is_stop_requested(): raise Exception("tmsd %s is already scheduled to stop. You can also try --kill <id>." % (tds.id)) elif options.kill and tds.is_kill_requested(): raise Exception("tmsd %s is already scheduled to be killed. The only thing more severe is $kill -9 %s." % (tds.id, tds.pid)) elif options.salvage and (not tds.is_stop_requested() and not tds.is_kill_requested() and not tds.is_paused()): raise Exception("tmsd %s cannot be salvaged. Its status is not paused, stop- or kill- requested" % (tds.id)) if options.delete: if not options.force and not tds.is_done_with_error(): raise Exception("tmsd %s cannot be deleted because it has status %s. Use --force to override." % (tds.id, tds.get_status())) log.info("Deleting tmsd %s" % (tds)) tds.set_status(tms_models.NorcDaemonStatus.STATUS_DELETED) elif options.salvage: log.info("Salvaging tmsd %s" % (tds)) tds.set_status(tms_models.NorcDaemonStatus.STATUS_RUNNING) elif options.pause or options.stop or options.kill: if tds.is_done(): raise Exception("tmsd %s is not running. It cannot be shutdown or paused." % (tds.id)) if options.pause: log.info("Sending pause request to tmsd %s" % (tds)) tds.set_status(tms_models.NorcDaemonStatus.STATUS_PAUSEREQUESTED) elif options.stop: log.info("Sending stop request to tmsd %s" % (tds)) tds.set_status(tms_models.NorcDaemonStatus.STATUS_STOPREQUESTED) elif options.kill: log.info("Sending kill request to tmsd %s" % (tds)) tds.set_status(tms_models.NorcDaemonStatus.STATUS_KILLREQUESTED) # if options.wait_seconds: seconds_waited = 0 timeout = False while True: if seconds_waited >= options.wait_seconds: timeout = True break tds = get_tds(tds_id) if tds.is_shutting_down(): log.info("Waiting for shutdown of tmsd %s. It's been %s seconds." % (tds.id, seconds_waited), indent_chars=4) elif tds.is_done(): log.info("tmsd %s is done with status '%s'" % (tds.id, tds.get_status())) break else: raise Exception("tmsd %s shutdown was requested but not honored or was overwritten in DB. This is bad, but try \"kill <pid>\" directly." % (tms.id)) time.sleep(WAIT_POLL_SECONDS) seconds_waited += WAIT_POLL_SECONDS if timeout: log.info("Timeout reached waiting for tmsd %s to finish. Check process id %s on host '%s'" % (tds.id, tds.pid, tds.host)) sys.exit(1) # # report on status # if options.status and not tds == None: report_tmsd_status(options.filter_status, [tds], max_tasks_due_to_run=options.due_to_run) elif options.status: report_tmsd_status(options.filter_status, max_tasks_due_to_run=options.due_to_run) if options.details: daemon_type = tds.get_daemon_type() if daemon_type == tms_models.NorcDaemonStatus.DAEMON_TYPE_TMS: report_tmsd_details(options.filter_status, tds) elif daemon_type == tms_models.NorcDaemonStatus.DAEMON_TYPE_SQS: report_sqsd_details(options.filter_status, tds) else: raise Exception("Unknown daemon_type '%s'" % (daemon_type))