Exemple #1
0
def main():
    global task
    parser = OptionParser(
        "%prog --daemon_status_id <id> --queue_name <queue_name> \
[--nice <0>] [--stdout <file_name|DEFAULT>] [--stderr <file_name>|STDOUT>] [--debug]"
    )
    parser.add_option(
        "--daemon_status_id", action="store", type="int", help="The id of the daemon status that launched this Task"
    )
    parser.add_option("--queue_name", action="store", type="string", help="The name of the queue from which to read")
    parser.add_option("--nice", action="store", type="int", default=0, help="nice this process. defaults to 5.")
    parser.add_option(
        "--stdout",
        action="store",
        type="string",
        help="Send stdout to this file, or special value 'DEFAULT' \
sends it a the stream unique to this Task request",
    )
    parser.add_option(
        "--stderr",
        action="store",
        type="string",
        help="Send stderr to this file, or special value 'STDOUT' sends it to stdout",
    )
    parser.add_option("--debug", action="store_true", help="more messages")
    (options, args) = parser.parse_args()

    # option parsing
    if not options.daemon_status_id or not options.queue_name:
        sys.exit(parser.get_usage())
    log.set_logging_debug(options.debug)

    if not options.nice == 0:
        os.nice(options.nice)

    console_stderr = None
    try:
        c = SQSConnection(settings.AWS_ACCESS_KEY_ID, settings.AWS_SECRET_ACCESS_KEY)
        q = c.get_queue(options.queue_name)
        boto_message = q.read()
        task = __get_task__(boto_message, options.queue_name)
        if task == None:
            log.debug("No task in queue '%s' pid:%s" % (options.queue_name, os.getpid()))
            sys.exit(133)
        else:
            log.debug("Starting SQS Queue '%s' Task:%s pid:%s" % (options.queue_name, task.get_id(), os.getpid()))
            q.delete_message(boto_message)
            console_stderr = __redirect_outputs__(task, options.stdout, options.stderr)
            daemon_status = __get_daemon_status__(options.daemon_status_id)
            __run_task__(task, daemon_status)
            ending_status = task.get_current_run_status()
            if ending_status == None:
                sys.exit(134)
            if not ending_status.was_successful():
                sys.exit(1)
    except SystemExit, se:
        # in python 2.4, SystemExit extends Exception, this is changed in 2.5 to
        # extend BaseException, specifically so this check isn't necessary. But
        # we're using 2.4; upon upgrade, this check will be unecessary but ignorable.
        sys.exit(se.code)
Exemple #2
0
def main():
    parser = OptionParser("%prog [--debug]")
    #parser.add_option("--no_log_redirect", action="store_true"
    #    , help="print daemon logging to sys.stdout & sys.stderr instead of redirecting them to a TMS log file.")
    parser.add_option("--debug", action="store_true", help="more messages")
    (options, args) = parser.parse_args()
    
    log.set_logging_debug(options.debug)
Exemple #3
0
def main():
    global task, iteration, region
    parser = OptionParser("%prog --daemon_status_id <id> --iteration_id <id> \
--task_library <lib> --task_id <id> [--nice 5] [--stdout <file_name>] [--stderr <file_name>|STDOUT>] [--debug]")
    parser.add_option("--daemon_status_id", action="store", type="int"
        , help="The id of the daemon status that launched this Task")
    parser.add_option("--iteration_id", action="store", type="int"
        , help="The id of the iteration in which this Task runs")
    parser.add_option("--task_library", action="store", type="string"
        , help="The path Task (permalink.tms_impl.models.EnqueudArchiveRequest)")
    parser.add_option("--task_id", action="store", type="string"
        , help="The id of this Task in this library")
    parser.add_option("--nice", action="store", type="int", default=5
        , help="nice this process. defaults to 5.")
    parser.add_option("--stdout", action="store", type="string"
        , help="Send stdout to this file")
    parser.add_option("--stderr", action="store", type="string"
        , help="Send stderr to this file, or special value 'STDOUT' sends it to stdout")
    parser.add_option("--debug", action="store_true", help="more messages")
    (options, args) = parser.parse_args()
    
    # option parsing
    if not options.daemon_status_id or not options.iteration_id \
        or not options.task_library or not options.task_id:
        sys.exit(parser.get_usage())
    if options.debug:
        log.set_logging_debug(options.debug)
    console_stderr = sys.stderr
    
    if options.stdout:
        sys.stdout = open(options.stdout, 'a')
    if options.stderr:
        if options.stderr == 'STDOUT':
            sys.stderr = sys.stdout
        else:
            sys.stderr = open(options.stderr, 'a')
    if not options.nice == 0:
        os.nice(options.nice)
    
    try:
        task_class = __get_task_class__(options.task_library)
        task = __get_task__(options.task_library, task_class, options.task_id)
        daemon_status = __get_daemon_status__(options.daemon_status_id)
        iteration = __get_iteration__(options.iteration_id)
        region = daemon_status.get_region()
        __run_task__(task, iteration, daemon_status)
        ending_status = task.get_current_run_status(iteration)
        if not ending_status == None and not ending_status.was_successful():
            # if there's no run status, assume success; resource management may have prevented it from working.
            return False
        return True
    except SystemExit, se:
        # in python 2.4, SystemExit extends Exception, this is changed in 2.5 to 
        # extend BaseException, specifically so this check isn't necessary. But
        # we're using 2.4; upon upgrade, this check will be unecessary but ignorable.
        sys.exit(se.code)
Exemple #4
0
def main():
    parser = OptionParser("%prog --region <regionname> [--poll_frequency 3] [--threads] [--no_log_redirect] [--debug]")
    parser.add_option("--poll_frequency", action="store", default=3, type="int"
        , help="delay in seconds between looking for tasks to run")
    parser.add_option("--region", action="store", help="region this daemon runs in")
    parser.add_option("--threads", action="store_true"
        , help="use threading instead of subprocesses. \
        Note that threads in Python cannot be interrupted without killing the daemon!")
    parser.add_option("--no_log_redirect", action="store_true"
        , help="print daemon logging to sys.stdout & sys.stderr instead of redirecting them to a TMS log file.")
    parser.add_option("--debug", action="store_true", help="more messages")
    (options, args) = parser.parse_args()
    
    if options.debug:
        log.set_logging_debug(options.debug)
    
    if options.poll_frequency < 1:
        raise Exception("--poll_frequency must be >= 1")
    
    if not options.region:
        sys.exit(parser.get_usage())
    
    # resolve the region
    region = tms_models.ResourceRegion.get(options.region)
    if region == None:
        raise Exception("Don't know region '%s'" % (options.region))
    
    # register signal handlers for interrupt (ctl-c) & terminate ($ kill <pid>).
    def __handle_SIGINT__(signum, frame):
        assert signum == signal.SIGINT, "This signal handler only handles SIGINT, not '%s'. BUG!" % (signum)
        daemon.request_stop()
    def __handle_SIGTERM__(signum, frame):
        assert signum == signal.SIGTERM, "This signal handler only handles SIGTERM, not '%s'. BUG!" % (signum)
        daemon.request_kill()
    signal.signal(signal.SIGINT, __handle_SIGINT__)
    signal.signal(signal.SIGTERM, __handle_SIGTERM__)
    
    if options.threads:
        # multi-threaded; spawn new threads for new Tasks
        daemon = ThreadingNorcDaemon(region, options.poll_frequency, settings.TMS_LOG_DIR, not options.no_log_redirect)
    else:
        # single-threaded; fork new Tasks
        daemon = ForkingNorcDaemon(region, options.poll_frequency, settings.TMS_LOG_DIR, not options.no_log_redirect)
    
    ended_gracefully = daemon.run()
    if ended_gracefully:
        sys.exit(0)
    elif options.threads:
        # there's no way in python to interrupt threads; so gotta force 'em.
        # exit code is 137 on OS X
        os.kill(os.getpid(), signal.SIGKILL)
    else:
        sys.exit(137)
Exemple #5
0
def main():
    parser = OptionParser("%prog --queue_name <queue_name> --max_to_run <#> \
[--poll_frequency <3>] [--no_log_redirect] [--debug]")
    parser.add_option("--poll_frequency", action="store", default=3, type="int"
        , help="delay in seconds between looking for tasks to run")
    parser.add_option("--queue_name", action="store", help="queue name this daemon monitors")
    parser.add_option("--max_to_run", action="store", type="int"
        , help="max Tasks that can be run at a time")
    parser.add_option("--no_log_redirect", action="store_true"
        , help="print daemon logging to sys.stdout & sys.stderr instead of redirecting them to a TMS log file.")
    parser.add_option("--debug", action="store_true", help="more messages")
    (options, args) = parser.parse_args()
    
    log.set_logging_debug(options.debug)
    
    if options.poll_frequency < 1:
        raise Exception("--poll_frequency must be >= 1")
    if not options.max_to_run or options.max_to_run < 1:
        raise Exception("--max_to_run must be >= 1. found %s" % (options.max_to_run))
    
    if not options.queue_name:
        sys.exit(parser.get_usage())
    
    # resolve the region
    # currently an SQS Queue is mapped 1:1 to a ResourceRegion
    region = tms_models.ResourceRegion.get(options.queue_name)
    if region == None:
        raise Exception("Don't know region '%s'" % (options.queue_name))
    
    # register signal handlers for interrupt (ctl-c) & terminate ($ kill <pid>).
    def __handle_SIGINT__(signum, frame):
        assert signum == signal.SIGINT, "This signal handler only handles SIGINT, not '%s'. BUG!" % (signum)
        daemon.request_stop()
    def __handle_SIGTERM__(signum, frame):
        assert signum == signal.SIGTERM, "This signal handler only handles SIGTERM, not '%s'. BUG!" % (signum)
        daemon.request_kill()
    signal.signal(signal.SIGINT, __handle_SIGINT__)
    signal.signal(signal.SIGTERM, __handle_SIGTERM__)
    
    daemon = ForkingSQSDaemon(region, options.poll_frequency, settings.TMS_LOG_DIR
        , not options.no_log_redirect, max_to_run=options.max_to_run)
    
    ended_gracefully = daemon.run()
    if ended_gracefully:
        sys.exit(0)
    else:
        sys.exit(137)
Exemple #6
0
def main():
    parser = OptionParser("%prog --task_name <name> --region <regionname> [--debug]")
    parser.add_option("--task_name", action="store", help="the task to run")
    parser.add_option("--region", action="store", help="run the task in this region")
    parser.add_option("--debug", action="store_true", help="more messages")
    (options, args) = parser.parse_args()
    
    if not options.task_name or not options.region:
        raise parser.get_usage()
        
    if options.debug:
        log.set_logging_debug(options.debug)
    
    task = get_task(options.task_name)
    run_task(task, region_name=options.region)
    
    return True
Exemple #7
0
def main():
    global WAIT_POLL_SECONDS
    
    parser = OptionParser(usage())
    parser.add_option("--status", action="store_true"
        , help="show status of all running norc daemons.")
    parser.add_option("--details", action="store", type="int"
        , help="show details for tmsd given by id.")
    parser.add_option("--filter_status", action="store", default="interesting"
        , help="if showing status, limit to this set. Defaults to 'interesting', which is active+errored.")
    parser.add_option("--salvage", action="store"
        , type="int", help="don't exit tms daemon as requested; leave it running.")
    parser.add_option("--pause", action="store", type="int"
        , help="pause the tms daemon of given ID so no more tasks are run")
    parser.add_option("--stop", action="store", type="int"
        , help="stop the tms daemon of given ID after all currently running tasks have finished")
    parser.add_option("--kill", action="store"
        , type="int", help="immediately kill the tms daemon of given ID")
    parser.add_option("--delete", action="store"
        , type="int", help="mark tms daemon of given ID as deleted for convenience. Only changes DB.")
    parser.add_option("--wait_seconds", action="store", default=0
        , type="int", help="wait for N seconds for tmsd to stop after kill or stop is issued. Default is 0")
    parser.add_option("--force", action="store_true", help="overrides some safety checks. Use carefully by trying not to use it first.")
    parser.add_option("--due_to_run", action="store", type="int"
        , help="show a max # of Tasks due to run (currently an expensive DB call)")
    parser.add_option("--debug", action="store_true", help="more messages")
    (options, args) = parser.parse_args()
    
    if options.debug:
        log.set_logging_debug(options.debug)
    
    if not options.status and not options.details \
        and not options.pause and not options.stop and not options.kill \
        and not options.salvage and not options.delete:
        raise usage()
    if options.stop and (options.kill or options.salvage or options.details or options.pause) \
        or options.kill and (options.stop or options.salvage or options.details or options.pause) \
        or options.details and (options.kill or options.stop or options.salvage or options.pause) \
        or options.pause and (options.kill or options.stop or options.salvage or options.details):
        raise usage()
    
    #
    # edit a tmsd
    #
    tds_id = None; tds = None
    if options.pause:
        tds_id = options.pause
    elif options.stop:
        tds_id = options.stop
    elif options.kill:
        tds_id = options.kill
    elif options.salvage:
        tds_id = options.salvage
    elif options.delete:
        tds_id = options.delete
    elif options.details:
        tds_id = options.details
    
    if not tds_id == None:
        tds = get_tds(tds_id)
        if options.pause and tds.is_paused() or tds.is_pause_requested():
            raise Exception("tmsd %s is already paused or pause has been requested." % (tds.id))
        if options.stop and tds.is_stop_requested():
            raise Exception("tmsd %s is already scheduled to stop. You can also try --kill <id>." % (tds.id))
        elif options.kill and tds.is_kill_requested():
            raise Exception("tmsd %s is already scheduled to be killed. The only thing more severe is $kill -9 %s." % (tds.id, tds.pid))
        elif options.salvage and (not tds.is_stop_requested() and not tds.is_kill_requested() and not tds.is_paused()):
            raise Exception("tmsd %s cannot be salvaged.  Its status is not paused, stop- or kill- requested" % (tds.id))
    
    if options.delete:
        if not options.force and not tds.is_done_with_error():
            raise Exception("tmsd %s cannot be deleted because it has status %s. Use --force to override." % (tds.id, tds.get_status()))
        log.info("Deleting tmsd %s" % (tds))
        tds.set_status(tms_models.NorcDaemonStatus.STATUS_DELETED)
    elif options.salvage:
        log.info("Salvaging tmsd %s" % (tds))
        tds.set_status(tms_models.NorcDaemonStatus.STATUS_RUNNING)
    elif options.pause or options.stop or options.kill:
        if tds.is_done():
            raise Exception("tmsd %s is not running.  It cannot be shutdown or paused." % (tds.id))
        if options.pause:
            log.info("Sending pause request to tmsd %s" % (tds))
            tds.set_status(tms_models.NorcDaemonStatus.STATUS_PAUSEREQUESTED)
        elif options.stop:
            log.info("Sending stop request to tmsd %s" % (tds))
            tds.set_status(tms_models.NorcDaemonStatus.STATUS_STOPREQUESTED)
        elif options.kill:
            log.info("Sending kill request to tmsd %s" % (tds))
            tds.set_status(tms_models.NorcDaemonStatus.STATUS_KILLREQUESTED)
        #
        if options.wait_seconds:
            seconds_waited = 0
            timeout = False
            while True:
                if seconds_waited >= options.wait_seconds:
                    timeout = True
                    break
                tds = get_tds(tds_id)
                if tds.is_shutting_down():
                    log.info("Waiting for shutdown of tmsd %s.  It's been %s seconds." % (tds.id, seconds_waited), indent_chars=4)
                elif tds.is_done():
                    log.info("tmsd %s is done with status '%s'" % (tds.id, tds.get_status()))
                    break
                else:
                    raise Exception("tmsd %s shutdown was requested but not honored or was overwritten in DB. This is bad, but try \"kill <pid>\" directly." % (tms.id))
                time.sleep(WAIT_POLL_SECONDS)
                seconds_waited += WAIT_POLL_SECONDS
            if timeout:
                log.info("Timeout reached waiting for tmsd %s to finish.  Check process id %s on host '%s'" % (tds.id, tds.pid, tds.host))
                sys.exit(1)
    
    #
    # report on status
    #
    
    if options.status and not tds == None:
        report_tmsd_status(options.filter_status, [tds], max_tasks_due_to_run=options.due_to_run)
    elif options.status:
        report_tmsd_status(options.filter_status, max_tasks_due_to_run=options.due_to_run)
    if options.details:
        daemon_type = tds.get_daemon_type()
        if daemon_type == tms_models.NorcDaemonStatus.DAEMON_TYPE_TMS:
            report_tmsd_details(options.filter_status, tds)
        elif daemon_type == tms_models.NorcDaemonStatus.DAEMON_TYPE_SQS:
            report_sqsd_details(options.filter_status, tds)
        else:
            raise Exception("Unknown daemon_type '%s'" % (daemon_type))