コード例 #1
0
ファイル: node.py プロジェクト: bioid/brenda
    def task_loop():
        try:
            # reset tasks
            local.task_active = None
            local.task_push = None

            # get SQS work queue
            q = aws.get_sqs_queue(conf)

            # Loop over tasks.  There are up to two different tasks at any
            # given moment that we are processing concurrently:
            #
            # 1. Active task -- usually a blender render operation.
            # 2. S3 push task -- a task which pushes the products of the
            #                    previous active task (such as rendered
            #                    frames) to S3.
            while True:
                # reset active task
                local.task_active = None

                # initialize active task object
                task = State()
                task.msg = None
                task.proc = None
                task.retcode = None
                task.outdir = None
                task.id = 0

                # Get a task from the SQS work queue.  This is normally
                # a short script that runs blender to render one
                # or more frames.
                queuemsg = q.get_messages(message_attributes=['config'])

                # output some debug info
                print "queue read:", task.msg
                if local.task_push:
                    print "push task:", local.task_push.__dict__
                else:
                    print "no task push task"

                # process task
                if len(queuemsg) > 0:
                    task.msg = queuemsg[0]

                    # assign an ID to task
                    local.task_id_counter += 1
                    task.id = local.task_id_counter

                    # register active task
                    local.task_active = task

                    # create output directory
                    task.outdir = os.path.join(work_dir, "brenda-outdir%d.tmp" % (task.id,))
                    utils.rmtree(task.outdir)
                    utils.mkdir(task.outdir)

                    # Create a config dictionary using combination of global and task-specific config values
                    taskconfig = conf.copy()
                    if 'config' in task.msg.message_attributes:
                        taskconfig.update(json.loads(task.msg.message_attributes['config']['string_value']))

                    # Store outdir in task config for later use
                    taskconfig['OUTDIR'] = task.outdir
                    if not 'BLENDER_FILE' in taskconfig:
                        taskconfig['BLENDER_FILE'] = '*.blend'

                    print "task-specific config:", taskconfig

                    # get the task script
                    script = task.msg.get_body()
                    print "script len:", len(script)


                    # do macro substitution on the task script
                    for k in taskconfig:
                        script = script.replace('$' + k, taskconfig[k])

                    # add shebang if absent
                    if not script.startswith("#!"):
                        script = "#!/bin/bash\n" + script

                    # Make sure we're working with the correct project directory
                    # FIXME - this is likely not the most efficient way of doing it, and probably leads to unnecessary
                    #         downloads from s3.  Ideally we would keep all project directories and switch between them,
                    #         but currently brenda only supports one working project directory at a time
                    proj_dir = get_project(taskconfig, taskconfig['BLENDER_PROJECT'])

                    # mount additional EBS volumes
                    aws.mount_additional_ebs(taskconfig, proj_dir)

                    # cd to project directory, where we will run blender from
                    with utils.Cd(proj_dir) as cd:
                        # write script file and make it executable
                        script_fn = "./brenda-go"
                        with open(script_fn, 'w') as f:
                            f.write(script)
                        st = os.stat(script_fn)
                        os.chmod(script_fn, st.st_mode | (stat.S_IEXEC|stat.S_IXGRP|stat.S_IXOTH))

                        # run the script
                        print "------- Run script %s -------" % (os.path.realpath(script_fn),)
                        print script,
                        print "--------------------------"
                        task.proc = Subprocess([script_fn])

                    print "active task:", local.task_active.__dict__

                # Wait for active and S3-push tasks to complete,
                # while periodically reasserting with SQS to
                # acknowledge that tasks are still pending.
                # (If we don't reassert with SQS frequently enough,
                # it will assume we died, and put our tasks back
                # in the queue.  "frequently enough" means within
                # visibility_timeout.)
                count = 0
                while True:
                    reassert = (count >= visibility_timeout_reassert)
                    for i, task in enumerate((local.task_active, local.task_push)):
                        if task:
                            name = task_names[i]
                            if task.proc is not None:
                                # test if process has finished
                                task.retcode = task.proc.poll()
                                if task.retcode is not None:
                                    # process has finished
                                    task.proc = None

                                    # did process finish with errors?
                                    if task.retcode != 0:
                                        errtxt = "fatal error in %s task" % (name,)
                                        if name == 'active':
                                            raise error.ValueErrorRetry(errtxt)
                                        else:
                                            raise ValueError(errtxt)

                                    # Process finished successfully.  If S3-push process,
                                    # tell SQS that the task completed successfully.
                                    if name == 'push':
                                        print "******* TASK", task.id, "COMMITTED to S3"
                                        q.delete_message(task.msg)
                                        task.msg = None
                                        local.task_count += 1
                                        task_complete_accounting(local.task_count)

                                    # active task completed?
                                    if name == 'active':
                                        print "******* TASK", task.id, "READY-FOR-PUSH"

                            # tell SQS that we are still working on the task
                            if reassert and task.proc is not None:
                                print "******* REASSERT", name, task.id
                                task.msg.change_visibility(visibility_timeout)

                    # break out of loop only when no pending tasks remain
                    if ((not local.task_active or local.task_active.proc is None)
                        and (not local.task_push or local.task_push.proc is None)):
                        break

                    # setup for next process poll iteration
                    if reassert:
                        count = 0
                    time.sleep(1)
                    count += 1

                # clean up the S3-push task
                cleanup(local.task_push, 'push')
                local.task_push = None

                # start a concurrent push task to commit files generated by
                # just-completed active task (such as blender render frames) to S3
                if local.task_active:
                    local.task_active.proc = start_s3_push_process(opts, args, taskconfig, local.task_active.outdir)
                    local.task_push = local.task_active
                    local.task_active = None

                # if no active task and no S3-push task, we are done (unless DONE is set to "poll")
                if not local.task_active and not local.task_push:
                    action = read_done_file()
                    if action == "poll":
                        print "Polling for more work..."
                        time.sleep(15)
                    elif action == "smart":
                        now = time.time()
                        try:
                            instance_id = aws.get_instance_id_self()
                            spot_request_id = aws.get_spot_request_from_instance_id(conf, instance_id)
                            launch_time = aws.get_launch_time(conf, spot_request_id)
                            if launch_time:
                                spottime = aws.get_uptime(now, launch_time)
                                minutes_after_hour = (spottime / 60) % 60
                                print "Smart poll: ", minutes_after_hour
                                if minutes_after_hour >= smart_shutdown_threshold:
                                    print "Smart poll threshold passed, shutting down (%d minutes after the hour with no work in queue)" % (minutes_after_hour)
                                    # update the value of DONE config var for clean shutdown
                                    conf['DONE'] = 'shutdown'
                                    write_done_file()
                                    break;
                            else:
                                print "Smart poll: no launch_time for spot request %s" % (spot_request_id)
                        except Exception, e:
                            print "Smart poll failed!", e

                        time.sleep(15)
                    else:
                        break

        finally:
            cleanup_all()
コード例 #2
0
ファイル: node.py プロジェクト: bioid/brenda
    # get configuration parameters
    work_dir = aws.get_work_dir(conf)
    visibility_timeout_reassert = int(conf.get('VISIBILITY_TIMEOUT_REASSERT', '30'))
    visibility_timeout = int(conf.get('VISIBILITY_TIMEOUT', '120'))

    # validate RENDER_OUTPUT bucket
    #aws.get_s3_output_bucket(conf)

    # file cleanup
    utils.rm('task_count')
    utils.rm('task_last')

    # create Blender temporary directory
    tmp_dir = os.path.join(work_dir, 'tmp')
    if not os.path.isdir(tmp_dir):
        utils.mkdir(tmp_dir)
    os.environ['TMP'] = tmp_dir

    # save the value of DONE config var
    write_done_file()

    # Prepare for smart shutdown
    smart_shutdown_threshold = int(conf.get('SMART_SHUTDOWN_THRESHOLD', 58))
    spot_request_create_time = datetime.datetime.now().isoformat()

    # Get our spot instance request, if it exists
    spot_request_id = None
    if int(conf.get('RUNNING_ON_EC2', '1')):
        try:
            instance_id = aws.get_instance_id_self()
            spot_request_id = aws.get_spot_request_from_instance_id(conf, instance_id)
コード例 #3
0
ファイル: node.py プロジェクト: jdavancens/better-brenda
    def task_loop():
        try:
            # reset tasks
            local.task_active = None
            local.task_push = None

            # get SQS work queue
            q = aws.get_sqs_queue(conf)

            # Loop over tasks.  There are up to two different tasks at any
            # given moment that we are processing concurrently:
            #
            # 1. Active task -- usually a blender render operation.
            # 2. S3 push task -- a task which pushes the products of the
            #                    previous active task (such as rendered
            #                    frames) to S3.
            while True:
                # reset active task
                local.task_active = None

                # initialize active task object
                task = State()
                task.msg = None
                task.proc = None
                task.retcode = None
                task.outdir = None
                task.id = 0

                # Get a task from the SQS work queue.  This is normally
                # a short script that runs blender to render one
                # or more frames.
                task.msg = q.read()

                # output some debug info
                print "queue read:", task.msg
                if local.task_push:
                    print "push task:", local.task_push.__dict__
                else:
                    print "no task push task"

                # process task
                if task.msg is not None:
                    # assign an ID to task
                    local.task_id_counter += 1
                    task.id = local.task_id_counter

                    # register active task
                    local.task_active = task

                    # create output directory
                    task.outdir = os.path.join(
                        work_dir, "brenda-outdir%d.tmp" % (task.id, ))
                    utils.rmtree(task.outdir)
                    utils.mkdir(task.outdir)

                    # get the task script
                    script = task.msg.get_body()
                    print "script len:", len(script)

                    # do macro substitution on the task script
                    script = script.replace('$OUTDIR', task.outdir)

                    # add shebang if absent
                    if not script.startswith("#!"):
                        script = "#!/bin/bash\n" + script

                    # cd to project directory, where we will run blender from
                    with utils.Cd(proj_dir) as cd:
                        # write script file and make it executable
                        script_fn = "./brenda-go"
                        with open(script_fn, 'w') as f:
                            f.write(script)
                        st = os.stat(script_fn)
                        os.chmod(
                            script_fn, st.st_mode |
                            (stat.S_IEXEC | stat.S_IXGRP | stat.S_IXOTH))

                        # run the script
                        print "------- Run script %s -------" % (
                            os.path.realpath(script_fn), )
                        print script,
                        print "--------------------------"
                        task.proc = Subprocess([script_fn])

                    print "active task:", local.task_active.__dict__

                # Wait for active and S3-push tasks to complete,
                # while periodically reasserting with SQS to
                # acknowledge that tasks are still pending.
                # (If we don't reassert with SQS frequently enough,
                # it will assume we died, and put our tasks back
                # in the queue.  "frequently enough" means within
                # visibility_timeout.)
                count = 0
                while True:
                    reassert = (count >= visibility_timeout_reassert)
                    for i, task in enumerate(
                        (local.task_active, local.task_push)):
                        if task:
                            name = task_names[i]
                            if task.proc is not None:
                                # test if process has finished
                                task.retcode = task.proc.poll()
                                if task.retcode is not None:
                                    # process has finished
                                    task.proc = None

                                    # did process finish with errors?
                                    if task.retcode != 0:
                                        errtxt = "fatal error in %s task" % (
                                            name, )
                                        if name == 'active':
                                            raise error.ValueErrorRetry(errtxt)
                                        else:
                                            raise ValueError(errtxt)

                                    # Process finished successfully.  If S3-push process,
                                    # tell SQS that the task completed successfully.
                                    if name == 'push':
                                        print "******* TASK", task.id, "COMMITTED to S3"
                                        q.delete_message(task.msg)
                                        task.msg = None
                                        local.task_count += 1
                                        task_complete_accounting(
                                            local.task_count)

                                    # active task completed?
                                    if name == 'active':
                                        print "******* TASK", task.id, "READY-FOR-PUSH"

                            # tell SQS that we are still working on the task
                            if reassert and task.proc is not None:
                                print "******* REASSERT", name, task.id
                                task.msg.change_visibility(visibility_timeout)

                    # break out of loop only when no pending tasks remain
                    if ((not local.task_active
                         or local.task_active.proc is None) and
                        (not local.task_push or local.task_push.proc is None)):
                        break

                    # setup for next process poll iteration
                    if reassert:
                        count = 0
                    time.sleep(1)
                    count += 1

                # clean up the S3-push task
                cleanup(local.task_push, 'push')
                local.task_push = None

                # start a concurrent push task to commit files generated by
                # just-completed active task (such as blender render frames) to S3
                if local.task_active:
                    local.task_active.proc = start_s3_push_process(
                        opts, args, conf, local.task_active.outdir)
                    local.task_push = local.task_active
                    local.task_active = None

                # if no active task and no S3-push task, we are done (unless DONE is set to "poll")
                if not local.task_active and not local.task_push:
                    if read_done_file() == "poll":
                        print "Polling for more work..."
                        time.sleep(15)
                    else:
                        break

        finally:
            cleanup_all()
コード例 #4
0
ファイル: node.py プロジェクト: jdavancens/better-brenda
    work_dir = aws.get_work_dir(conf)
    visibility_timeout_reassert = int(
        conf.get('VISIBILITY_TIMEOUT_REASSERT', '30'))
    visibility_timeout = int(conf.get('VISIBILITY_TIMEOUT', '120'))

    # validate RENDER_OUTPUT bucket
    aws.get_s3_output_bucket(conf)

    # file cleanup
    utils.rm('task_count')
    utils.rm('task_last')

    # create Blender temporary directory
    tmp_dir = os.path.join(work_dir, 'tmp')
    if not os.path.isdir(tmp_dir):
        utils.mkdir(tmp_dir)
    os.environ['TMP'] = tmp_dir

    # save the value of DONE config var
    write_done_file()

    # Get our spot instance request, if it exists
    spot_request_id = None
    if int(conf.get('RUNNING_ON_EC2', '1')):
        try:
            instance_id = aws.get_instance_id_self()
            spot_request_id = aws.get_spot_request_from_instance_id(
                conf, instance_id)
            print "Spot request ID:", spot_request_id
        except Exception, e:
            print "Error determining spot instance request:", e
コード例 #5
0
ファイル: node.py プロジェクト: SenH/brenda
    def task_loop():
        try:
            # reset tasks
            local.task_render = None
            local.task_upload = None

            # get SQS work queue
            q = aws.get_sqs_conn_queue(conf)[0]

            # Loop over tasks.  There are up to two different tasks at any
            # given moment that we are processing concurrently:
            #
            # 1. Render task -- usually a render operation.
            # 2. Upload task -- a task which uploads results to S3.
            while True:
                # reset render task
                local.task_render = None

                # initialize render task object
                task = State()
                task.msg = None
                task.proc = None
                task.retcode = None
                task.outdir = None
                task.id = 0
                task.script_name = None

                # Get a task from the SQS work queue.  This is normally
                # a short script that renders one or more frames.
                task.msg = q.read(message_attributes=['All'])

                # output some debug info
                logging.debug('Reading work queue')
                if local.task_upload:
                    logging.info("Running upload task #%d", local.task_upload.id)
                    logging.debug(local.task_upload.__dict__)
                else:
                    logging.info('No upload task available')

                # process task
                if task.msg is not None:
                    # assign an ID to task
                    local.task_id_counter += 1
                    task.id = local.task_id_counter
                    task.script_name = task.msg.message_attributes['script_name']['string_value']

                    # register render task
                    local.task_render = task

                    # create output directory
                    task.outdir = os.path.join(work_dir, "{}_out_{}".format(task.script_name, task.id))
                    utils.rmtree(task.outdir)
                    utils.mkdir(task.outdir)

                    # get the task script
                    script = task.msg.get_body()

                    # cd to output directory, where we will run render task from
                    with utils.Cd(task.outdir):
                        # write script file and make it executable
                        script_fn = "./{}".format(task.script_name)
                        with open(script_fn, 'w') as f:
                            f.write(script)
                        st = os.stat(script_fn)
                        os.chmod(script_fn, st.st_mode | (stat.S_IEXEC|stat.S_IXGRP|stat.S_IXOTH))

                        # run the script
                        task.proc = Subprocess([script_fn])

                    logging.info('Running render task \"%s #%d\"', local.task_render.script_name, local.task_render.id)
                    logging.info(script.replace("\n"," "))
                    logging.debug(local.task_render.__dict__)

                # Wait for render & upload tasks to complete, while periodically reasserting with SQS to
                # acknowledge that tasks are still pending. (If we don't reassert with SQS frequently enough,
                # it will assume we died, and put our tasks back in the queue.  "frequently enough" means within
                # visibility_timeout.)
                count = 0
                while True:
                    reassert = (count >= visibility_timeout_reassert)
                    for i, task in enumerate((local.task_render, local.task_upload)):
                        if task:
                            name = task_names[i]
                            if task.proc is not None:
                                # test if process has finished
                                task.retcode = task.proc.poll()
                                if task.retcode is not None:
                                    # process has finished
                                    task.proc = None

                                    # did process finish with errors?
                                    if task.retcode != 0:
                                        if name == 'render':
                                            errtxt = "Render task \"{} #{}\" exited with status code {}".format(
                                            task.script_name, task.id, task.retcode)
                                            raise error.ValueErrorRetry(errtxt)
                                        else:
                                            errtxt = "Upload task #{} exited with status code {}".format(
                                            task.id, task.retcode)
                                            raise ValueError(errtxt)

                                    # Process finished successfully.  If upload process,
                                    # tell SQS that the task completed successfully.
                                    if name == 'upload':
                                        logging.info('Finished upload task #%d', task.id)
                                        q.delete_message(task.msg)
                                        task.msg = None
                                        local.task_count += 1
                                        task_complete_accounting(local.task_count)
 
                                    # Render task completed?
                                    if name == 'render':
                                        logging.info('Finished render task \"%s #%d\"', task.script_name, task.id)

                            # tell SQS that we are still working on the task
                            if reassert and task.proc is not None:
                                logging.debug('Reasserting %s task %d with SQS', name, task.id)
                                task.msg.change_visibility(visibility_timeout)

                    # break out of loop only when no pending tasks remain
                    if ((not local.task_render or local.task_render.proc is None)
                        and (not local.task_upload or local.task_upload.proc is None)):
                        break

                    # setup for next process poll iteration
                    if reassert:
                        count = 0
                    time.sleep(1)
                    count += 1

                # clean up the upload task
                cleanup(local.task_upload, 'upload')
                local.task_upload = None

                # start a concurrent upload task to commit files generated by just-completed render task to S3
                if local.task_render:
                    local.task_render.proc = start_upload_process(opts, args, conf, local.task_render)
                    local.task_upload = local.task_render
                    local.task_render = None

                # if no render or upload task, we are done (unless DONE is set to "poll")
                if not local.task_render and not local.task_upload:
                    if read_done_file() == "poll":
                        logging.info('Waiting for tasks...')
                        time.sleep(15)
                    else:
                        logging.info('Exiting')
                        break

        finally:
            cleanup_all()
コード例 #6
0
ファイル: node.py プロジェクト: Anuga/brenda
    def task_loop():
        try:
            # reset tasks
            local.task_active = None
            local.task_push = None

            # get SQS work queue
            q = aws.get_sqs_queue(conf)

            # Loop over tasks.  There are up to two different tasks at any
            # given moment that we are processing concurrently:
            #
            # 1. Active task -- usually a blender render operation.
            # 2. S3 push task -- a task which pushes the products of the
            #                    previous active task (such as rendered
            #                    frames) to S3.
            while True:
                # reset active task
                local.task_active = None

                # initialize active task object
                task = State()
                task.msg = None
                task.proc = None
                task.retcode = None
                task.outdir = None
                task.id = 0

                # Get a task from the SQS work queue.  This is normally
                # a short script that runs blender to render one
                # or more frames.
                task.msg = q.read()

                # output some debug info
                print "queue read:", task.msg
                if local.task_push:
                    print "push task:", local.task_push.__dict__
                else:
                    print "no task push task"

                # process task
                if task.msg is not None:
                    # assign an ID to task
                    local.task_id_counter += 1
                    task.id = local.task_id_counter

                    # register active task
                    local.task_active = task

                    # create output directory
                    task.outdir = os.path.join(work_dir, "brenda-outdir%d.tmp" % (task.id,))
                    utils.rmtree(task.outdir)
                    utils.mkdir(task.outdir)

                    # get the task script
                    script = task.msg.get_body()
                    print "script len:", len(script)

                    # do macro substitution on the task script
                    script = script.replace('$OUTDIR', task.outdir)

                    # add shebang if absent
                    if not script.startswith("#!"):
                        script = "#!/bin/bash\n" + script

                    # cd to project directory, where we will run blender from
                    with utils.Cd(proj_dir) as cd:
                        # write script file and make it executable
                        script_fn = "./brenda-go"
                        with open(script_fn, 'w') as f:
                            f.write(script)
                        st = os.stat(script_fn)
                        os.chmod(script_fn, st.st_mode | (stat.S_IEXEC|stat.S_IXGRP|stat.S_IXOTH))

                        # run the script
                        print "------- Run script %s -------" % (os.path.realpath(script_fn),)
                        print script,
                        print "--------------------------"
                        task.proc = Subprocess([script_fn])

                    print "active task:", local.task_active.__dict__

                # Wait for active and S3-push tasks to complete,
                # while periodically reasserting with SQS to
                # acknowledge that tasks are still pending.
                # (If we don't reassert with SQS frequently enough,
                # it will assume we died, and put our tasks back
                # in the queue.  "frequently enough" means within
                # visibility_timeout.)
                count = 0
                while True:
                    reassert = (count >= visibility_timeout_reassert)
                    for i, task in enumerate((local.task_active, local.task_push)):
                        if task:
                            name = task_names[i]
                            if task.proc is not None:
                                # test if process has finished
                                task.retcode = task.proc.poll()
                                if task.retcode is not None:
                                    # process has finished
                                    task.proc = None

                                    # did process finish with errors?
                                    if task.retcode != 0:
                                        errtxt = "fatal error in %s task" % (name,)
                                        if name == 'active':
                                            raise error.ValueErrorRetry(errtxt)
                                        else:
                                            raise ValueError(errtxt)

                                    # Process finished successfully.  If S3-push process,
                                    # tell SQS that the task completed successfully.
                                    if name == 'push':
                                        print "******* TASK", task.id, "COMMITTED to S3"
                                        q.delete_message(task.msg)
                                        task.msg = None
                                        local.task_count += 1
                                        task_complete_accounting(local.task_count)

                                    # active task completed?
                                    if name == 'active':
                                        print "******* TASK", task.id, "READY-FOR-PUSH"

                            # tell SQS that we are still working on the task
                            if reassert and task.proc is not None:
                                print "******* REASSERT", name, task.id
                                task.msg.change_visibility(visibility_timeout)

                    # break out of loop only when no pending tasks remain
                    if ((not local.task_active or local.task_active.proc is None)
                        and (not local.task_push or local.task_push.proc is None)):
                        break

                    # setup for next process poll iteration
                    if reassert:
                        count = 0
                    time.sleep(1)
                    count += 1

                # clean up the S3-push task
                cleanup(local.task_push, 'push')
                local.task_push = None

                # start a concurrent push task to commit files generated by
                # just-completed active task (such as blender render frames) to S3
                if local.task_active:
                    local.task_active.proc = start_s3_push_process(opts, args, conf, local.task_active.outdir)
                    local.task_push = local.task_active
                    local.task_active = None

                # if no active task and no S3-push task, we are done (unless DONE is set to "poll")
                if not local.task_active and not local.task_push:
                    if read_done_file() == "poll":
                        print "Polling for more work..."
                        time.sleep(15)
                    else:
                        break

        finally:
            cleanup_all()