예제 #1
0
def perf(opts, conf, args):
    def task_count_last(i):
        s = i[1].split()
        try:
            count = int(s[0])
            last = int(s[1])
        except:
            return None
        else:
            return count, last

    script = ['if', '[', '-f', 'task_count', ']', '&&', '[', '-f', 'task_last', '];', 'then', 'cat', 'task_count;', 'cat', 'task_last;', 'else', 'echo', '0;', 'fi']
    instances = aws.filter_instances(opts, conf)
    idict = dict([(i.dns_name, i) for i in instances])
    sdict = aws.get_spot_request_dict(conf)
    data = {}
    for i in run_cmd_list(opts, conf, ssh_cmd_list(opts, conf, script, instances), show_output=False, capture_stderr=False):
        host = i[0]
        inst = idict.get(host)
        if inst:
            sir = sdict.get(inst.spot_instance_request_id)
            price = None
            if sir:
                price = float(sir.price)
            tasks = task_count_last(i)
            if tasks:
                task_count, task_last = tasks
                uptime = aws.get_uptime(task_last, inst.launch_time) / 3600.0
                stat = data.setdefault(inst.instance_type, dict(n=0, uptime_sum=0.0, task_sum=0, price_sum=0.0))
                stat['n'] += 1
                stat['uptime_sum'] += uptime
                stat['task_sum'] += task_count
                if price is not None:
                    stat['price_sum'] += price
    tph= []
    tpd = []
    total_tasks = 0.0
    total_uptime = 0
    total_n = 0
    for itype, stat in data.items():
        total_tasks += stat['task_sum']
        total_uptime += stat['uptime_sum']
        total_n += stat['n']
        tasks_per_hour = stat['task_sum'] / stat['uptime_sum']
        tph.append((tasks_per_hour, itype))
        if 'price_sum' in stat:
            mprice = stat['price_sum'] / stat['n']
            tasks_per_dollar = tasks_per_hour / mprice
            tpd.append((tasks_per_dollar, itype))
    tph.sort(reverse=True)
    tpd.sort(reverse=True)
    if total_n:
        print "Tasks per hour (%.02f)" % (total_tasks / total_uptime * total_n,)
        for tasks_per_hour, itype in tph:
            print "  %s %.02f" % (itype, tasks_per_hour)
        print "Tasks per US$"
        for tasks_per_dollar, itype in tpd:
            print "  %s %.02f" % (itype, tasks_per_dollar)
예제 #2
0
파일: run.py 프로젝트: meigo/brenda
def status(opts, conf):
    ec2 = aws.get_ec2_conn(conf)
    instances = aws.filter_instances(opts, conf)
    if instances:
        print "Active Instances"
        now = time.time()
        for i in instances:
            uptime = aws.get_uptime(now, i.launch_time)
            print ' ', i.image_id, aws.format_uptime(uptime), i.public_dns_name
    requests = ec2.get_all_spot_instance_requests()
    if requests:
        print "Spot Requests"
        for r in requests:
            dns_name = ''
            print "  %s %s %s %s $%s %s %s" % (r.id, r.region, r.type, r.create_time, r.price, r.state, r.status)
예제 #3
0
def status(opts, conf):
    ec2 = aws.get_ec2_conn(conf)
    instances = aws.filter_instances(opts, conf)
    if instances:
        print "Active Instances"
        now = time.time()
        for i in instances:
            uptime = aws.get_uptime(now, i.launch_time)
            print ' ', i.image_id, aws.format_uptime(uptime), i.public_dns_name
    requests = ec2.get_all_spot_instance_requests()
    if requests:
        print "Spot Requests"
        for r in requests:
            dns_name = ''
            print "  %s %s %s %s $%s %s %s" % (r.id, r.region, r.type, r.create_time, r.price, r.state, r.status)
예제 #4
0
def status(opts, conf):
    now = time.time()
    instances = aws.filter_instances(opts, conf,
                                     {'instance-state-name': 'running'})
    if instances:
        print "Running Instances"
    for i in instances:
        uptime = aws.get_uptime(now, i.launch_time)
        print ' ', i.image_id, aws.format_uptime(
            uptime), i.public_dns_name, i.tags

    requests = aws.get_all_spot_instance_requests(
        opts, conf, {'state': ['active', 'open']})
    if requests:
        print "Active Spot Requests"
    for r in requests:
        print "  %s %s %s %s $%s %s %s %s" % (r.id, r.region, r.type,
                                              r.create_time, r.price, r.state,
                                              r.status, r.tags)
예제 #5
0
파일: node.py 프로젝트: bioid/brenda
    def task_loop():
        try:
            # reset tasks
            local.task_active = None
            local.task_push = None

            # get SQS work queue
            q = aws.get_sqs_queue(conf)

            # Loop over tasks.  There are up to two different tasks at any
            # given moment that we are processing concurrently:
            #
            # 1. Active task -- usually a blender render operation.
            # 2. S3 push task -- a task which pushes the products of the
            #                    previous active task (such as rendered
            #                    frames) to S3.
            while True:
                # reset active task
                local.task_active = None

                # initialize active task object
                task = State()
                task.msg = None
                task.proc = None
                task.retcode = None
                task.outdir = None
                task.id = 0

                # Get a task from the SQS work queue.  This is normally
                # a short script that runs blender to render one
                # or more frames.
                queuemsg = q.get_messages(message_attributes=['config'])

                # output some debug info
                print "queue read:", task.msg
                if local.task_push:
                    print "push task:", local.task_push.__dict__
                else:
                    print "no task push task"

                # process task
                if len(queuemsg) > 0:
                    task.msg = queuemsg[0]

                    # assign an ID to task
                    local.task_id_counter += 1
                    task.id = local.task_id_counter

                    # register active task
                    local.task_active = task

                    # create output directory
                    task.outdir = os.path.join(work_dir, "brenda-outdir%d.tmp" % (task.id,))
                    utils.rmtree(task.outdir)
                    utils.mkdir(task.outdir)

                    # Create a config dictionary using combination of global and task-specific config values
                    taskconfig = conf.copy()
                    if 'config' in task.msg.message_attributes:
                        taskconfig.update(json.loads(task.msg.message_attributes['config']['string_value']))

                    # Store outdir in task config for later use
                    taskconfig['OUTDIR'] = task.outdir
                    if not 'BLENDER_FILE' in taskconfig:
                        taskconfig['BLENDER_FILE'] = '*.blend'

                    print "task-specific config:", taskconfig

                    # get the task script
                    script = task.msg.get_body()
                    print "script len:", len(script)


                    # do macro substitution on the task script
                    for k in taskconfig:
                        script = script.replace('$' + k, taskconfig[k])

                    # add shebang if absent
                    if not script.startswith("#!"):
                        script = "#!/bin/bash\n" + script

                    # Make sure we're working with the correct project directory
                    # FIXME - this is likely not the most efficient way of doing it, and probably leads to unnecessary
                    #         downloads from s3.  Ideally we would keep all project directories and switch between them,
                    #         but currently brenda only supports one working project directory at a time
                    proj_dir = get_project(taskconfig, taskconfig['BLENDER_PROJECT'])

                    # mount additional EBS volumes
                    aws.mount_additional_ebs(taskconfig, proj_dir)

                    # cd to project directory, where we will run blender from
                    with utils.Cd(proj_dir) as cd:
                        # write script file and make it executable
                        script_fn = "./brenda-go"
                        with open(script_fn, 'w') as f:
                            f.write(script)
                        st = os.stat(script_fn)
                        os.chmod(script_fn, st.st_mode | (stat.S_IEXEC|stat.S_IXGRP|stat.S_IXOTH))

                        # run the script
                        print "------- Run script %s -------" % (os.path.realpath(script_fn),)
                        print script,
                        print "--------------------------"
                        task.proc = Subprocess([script_fn])

                    print "active task:", local.task_active.__dict__

                # Wait for active and S3-push tasks to complete,
                # while periodically reasserting with SQS to
                # acknowledge that tasks are still pending.
                # (If we don't reassert with SQS frequently enough,
                # it will assume we died, and put our tasks back
                # in the queue.  "frequently enough" means within
                # visibility_timeout.)
                count = 0
                while True:
                    reassert = (count >= visibility_timeout_reassert)
                    for i, task in enumerate((local.task_active, local.task_push)):
                        if task:
                            name = task_names[i]
                            if task.proc is not None:
                                # test if process has finished
                                task.retcode = task.proc.poll()
                                if task.retcode is not None:
                                    # process has finished
                                    task.proc = None

                                    # did process finish with errors?
                                    if task.retcode != 0:
                                        errtxt = "fatal error in %s task" % (name,)
                                        if name == 'active':
                                            raise error.ValueErrorRetry(errtxt)
                                        else:
                                            raise ValueError(errtxt)

                                    # Process finished successfully.  If S3-push process,
                                    # tell SQS that the task completed successfully.
                                    if name == 'push':
                                        print "******* TASK", task.id, "COMMITTED to S3"
                                        q.delete_message(task.msg)
                                        task.msg = None
                                        local.task_count += 1
                                        task_complete_accounting(local.task_count)

                                    # active task completed?
                                    if name == 'active':
                                        print "******* TASK", task.id, "READY-FOR-PUSH"

                            # tell SQS that we are still working on the task
                            if reassert and task.proc is not None:
                                print "******* REASSERT", name, task.id
                                task.msg.change_visibility(visibility_timeout)

                    # break out of loop only when no pending tasks remain
                    if ((not local.task_active or local.task_active.proc is None)
                        and (not local.task_push or local.task_push.proc is None)):
                        break

                    # setup for next process poll iteration
                    if reassert:
                        count = 0
                    time.sleep(1)
                    count += 1

                # clean up the S3-push task
                cleanup(local.task_push, 'push')
                local.task_push = None

                # start a concurrent push task to commit files generated by
                # just-completed active task (such as blender render frames) to S3
                if local.task_active:
                    local.task_active.proc = start_s3_push_process(opts, args, taskconfig, local.task_active.outdir)
                    local.task_push = local.task_active
                    local.task_active = None

                # if no active task and no S3-push task, we are done (unless DONE is set to "poll")
                if not local.task_active and not local.task_push:
                    action = read_done_file()
                    if action == "poll":
                        print "Polling for more work..."
                        time.sleep(15)
                    elif action == "smart":
                        now = time.time()
                        try:
                            instance_id = aws.get_instance_id_self()
                            spot_request_id = aws.get_spot_request_from_instance_id(conf, instance_id)
                            launch_time = aws.get_launch_time(conf, spot_request_id)
                            if launch_time:
                                spottime = aws.get_uptime(now, launch_time)
                                minutes_after_hour = (spottime / 60) % 60
                                print "Smart poll: ", minutes_after_hour
                                if minutes_after_hour >= smart_shutdown_threshold:
                                    print "Smart poll threshold passed, shutting down (%d minutes after the hour with no work in queue)" % (minutes_after_hour)
                                    # update the value of DONE config var for clean shutdown
                                    conf['DONE'] = 'shutdown'
                                    write_done_file()
                                    break;
                            else:
                                print "Smart poll: no launch_time for spot request %s" % (spot_request_id)
                        except Exception, e:
                            print "Smart poll failed!", e

                        time.sleep(15)
                    else:
                        break

        finally:
            cleanup_all()
예제 #6
0
def instances(opts, conf):
    now = time.time()
    for i in aws.filter_instances(opts, conf):
        uptime = aws.get_uptime(now, i.launch_time)
        print i.state, i.image_id, aws.format_uptime(
            uptime), i.public_dns_name, i.tags
예제 #7
0
파일: tool.py 프로젝트: t-/brenda
def instances(opts, conf):
    now = time.time()
    for i in aws.filter_instances(opts, conf):
        uptime = aws.get_uptime(now, i.launch_time)
        print i.image_id, aws.format_uptime(uptime), i.public_dns_name
예제 #8
0
파일: tool.py 프로젝트: t-/brenda
def perf(opts, conf, args):
    def task_count_last(i):
        s = i[1].split()
        try:
            count = int(s[0])
            last = int(s[1])
        except:
            return None
        else:
            return count, last

    script = [
        "if",
        "[",
        "-f",
        "task_count",
        "]",
        "&&",
        "[",
        "-f",
        "task_last",
        "];",
        "then",
        "cat",
        "task_count;",
        "cat",
        "task_last;",
        "else",
        "echo",
        "0;",
        "fi",
    ]
    instances = aws.filter_instances(opts, conf)
    idict = dict([(i.dns_name, i) for i in instances])
    sdict = aws.get_spot_request_dict(conf)
    data = {}
    for i in run_cmd_list(
        opts, conf, ssh_cmd_list(opts, conf, script, instances), show_output=False, capture_stderr=False
    ):
        host = i[0]
        inst = idict.get(host)
        if inst:
            sir = sdict.get(inst.spot_instance_request_id)
            price = None
            if sir:
                price = float(sir.price)
            tasks = task_count_last(i)
            if tasks:
                task_count, task_last = tasks
                uptime = aws.get_uptime(task_last, inst.launch_time) / 3600.0
                stat = data.setdefault(inst.instance_type, dict(n=0, uptime_sum=0.0, task_sum=0, price_sum=0.0))
                stat["n"] += 1
                stat["uptime_sum"] += uptime
                stat["task_sum"] += task_count
                if price is not None:
                    stat["price_sum"] += price
    tph = []
    tpd = []
    total_tasks = 0.0
    total_uptime = 0
    total_n = 0
    for itype, stat in data.items():
        total_tasks += stat["task_sum"]
        total_uptime += stat["uptime_sum"]
        total_n += stat["n"]
        tasks_per_hour = stat["task_sum"] / stat["uptime_sum"]
        tph.append((tasks_per_hour, itype))
        if "price_sum" in stat:
            mprice = stat["price_sum"] / stat["n"]
            tasks_per_dollar = tasks_per_hour / mprice
            tpd.append((tasks_per_dollar, itype))
    tph.sort(reverse=True)
    tpd.sort(reverse=True)
    if total_n:
        print "Tasks per hour (%.02f)" % (total_tasks / total_uptime * total_n,)
        for tasks_per_hour, itype in tph:
            print "  %s %.02f" % (itype, tasks_per_hour)
        print "Tasks per US$"
        for tasks_per_dollar, itype in tpd:
            print "  %s %.02f" % (itype, tasks_per_dollar)