Exemple #1
0
    handler.workdir = workdir
    handler.master = master

    if master != None:
        SYS_STATS = True
        # get configuration from master
        sconf = client.config(master)
    
        dconf = json.loads(sconf)
        handler.config = dconf
    
        logging.debug("Config size: %d" % (len(sconf)))
        logging.debug(str(dconf))
            
        # send done to master
        client.done(master, id)

        # NOTE: Time conflict. The master might already send 'step' request,
        #       before server_forever() is started, so 'step' might be lost.
        #       Delay done until the server is up and running.
        # Check out Asynchronous Mixins example for SocketServer
        # Comment: the constructor might already activate the server,
        #       so there is no problem.
        # NOTE: Fixed using time.sleep(5) in master.py

        logging.debug("Supervisor sent /done to master")

    logging.info("Starting host server pid %d, id %s, port %d with master %s\n" % (pid, id, port, master))

    if SYS_STATS:
        sys_t = threading.Thread(target=timed_sys_stats_reporter, args=(None, ))
Exemple #2
0
def Execute(args):
    logging.info("Execute " + str(args.active) + "")
    tnow = time.time()

    overall_timer = perf.Timer(logging)

    task_name = "overall: "
    if len(args.active) > 0:
        # Get's the task name
        task_name = "overall: %s" % get_task_name(args.active[0])

    overall_timer.start("superstep-%d-%s" % \
            (args.server.superstep_count, task_name))

    if len(args.active) > 0:
        execdir = os.path.join(args.workdir, "snapw.%d/exec" % (args.pid))
        config.mkdir_p(execdir)
     
    def execute_single_task(task):
        # get the executables
        bunch = get_task_name(task)
        execlist = []
        try:
            execlist = args.config["bunch"][bunch]["exec"].split(",")
        except:
            pass

        timer = perf.Timer(logging)
        timer.start('provision-execlist')
        # provision execlist on disk
        for item in execlist:
            execpath = os.path.join(execdir, item)
            # check if the program exists and its mtime
            mtime = None
            try:
                stat = os.stat(execpath)
                mtime = int(stat.st_mtime)
            except:
                pass
     
            if not mtime or mtime < tnow:
                # if the file does not exist or it is older than current time,
                # contact the head task
     
                content = client.getexec(args.master,item,mtime)
                swc = "None"
                if content:
                    swc = str(len(content))
     
                logging.debug("Host received %s" % (swc))
                if content:
                    if len(content) > 0:
                        logging.debug("Host saving to %s" % (execpath))
                        f = open(execpath,"w")
                        f.write(content)
                        f.close()
       
                    os.utime(execpath,(tnow, tnow))
        timer.stop('provision-execlist')
     
        prog = execlist[0]
        logging.debug("Task %s, exec %s" % (prog, execlist))
        progpath = os.path.join(execdir, prog)
     
        if not os.path.exists(progpath):
            logging.error("task %s not started, program %s not found" % (task, progpath))
            return
     
        taskdir = "snapw.%d/tasks/%s" % (args.pid, task)
        config.mkdir_p(taskdir)
     
        qdir = os.path.join(args.workdir, args.qactname, task)
        tdir = os.path.join(args.workdir, taskdir)
     
        logging.info("starting task %s, prog %s, workdir %s, qdir %s\n" % (task, prog, tdir, qdir))
             
        # get server information
        host = args.server.host
        port = args.server.port
     
        # construct a command line for worker
        cmd = python + " %s -t %s -h %s:%d -q %s" % (
            progpath, task, host, port, qdir)
        logging.info("starting cmd %s" % (cmd))
     
        # start the work process
        p = subprocess.Popen(cmd.split(), cwd=tdir, close_fds=True)
        return p, prog
     
    # Dynamically check what the number of processors we have on each host
    # In any error, default to 1.
    max_tasks = 1
    var_par_tasks = int(args.config['par_tasks'])
    if var_par_tasks <= 0:
        try:
            max_tasks = os.sysconf('SC_NPROCESSORS_ONLN')
        except:
            max_tasks = 1
    else:
        max_tasks = var_par_tasks

    # execute the tasks in a parallel fashion by running
    # at most max_tasks processes at any point.
    task_list = args.active[:]
    procs = []
    logging.info("Running %d tasks with %d-way parallelism: %s" % \
            (len(task_list), max_tasks, str(task_list)))

    timer = perf.Timer(logging)
    pcounter = 0
    counter_map = {}
    while True:
        while task_list and len(procs) < max_tasks:
            task = task_list.pop()
            timer.start("prog-%d" % pcounter)
            p, prog = execute_single_task(task)

            timer.update_extra("prog-%d" % pcounter, "step: %d, pid: %d, prog: %s" \
                    % (args.server.superstep_count, p.pid, prog))

            counter_map[p.pid] = pcounter
            pcounter += 1
            procs.append(p)

        for p in procs:
            # wait for the process to complete
            
            pid = p.pid
            logging.debug("polling %d" % pid)
            status = p.poll()
            if status is not None:
                timer.stop("prog-%d" % counter_map[p.pid])
                del counter_map[p.pid]

                logging.debug("finished %d with status %s" % (pid, str(status)))
                # error reporting
                if status <> 0:
                    msg = "Pid %d terminated unexpectedly with status %d" % (pid, status)
                    logging.error(msg)
                    client.error(args.master, args.id, msg)

                procs.remove(p) 
 
        if not procs and not task_list:
            break
        else:
            time.sleep(0.1)

    overall_timer.stop("superstep-%d-%s" % \
            (args.server.superstep_count, task_name))

    # send done to master
    client.done(args.master, args.id)
def Execute(args):
    logging.info("Execute " + str(args.active) + "")
    tnow = time.time()

    overall_timer = perf.Timer(logging)

    task_name = "overall: "
    if len(args.active) > 0:
        # Get's the task name
        task_name = "overall: %s" % get_task_name(args.active[0])

    overall_timer.start("superstep-%d-%s" % \
            (args.server.superstep_count, task_name))

    if len(args.active) > 0:
        execdir = os.path.join(args.workdir, "snapw.%d/exec" % (args.pid))
        config.mkdir_p(execdir)
     
    def execute_single_task(task):
        # get the executables
        bunch = get_task_name(task)
        execlist = []
        try:
            execlist = args.config["bunch"][bunch]["exec"].split(",")
        except:
            pass

        timer = perf.Timer(logging)
        timer.start('provision-execlist')
        # provision execlist on disk
        for item in execlist:
            execpath = os.path.join(execdir, item)
            # check if the program exists and its mtime
            mtime = None
            try:
                stat = os.stat(execpath)
                mtime = int(stat.st_mtime)
            except:
                pass
     
            if not mtime or mtime < tnow:
                # if the file does not exist or it is older than current time,
                # contact the head task
     
                content = client.getexec(args.master,item,mtime)
                swc = "None"
                if content:
                    swc = str(len(content))
     
                logging.debug("Host received %s" % (swc))
                if content:
                    if len(content) > 0:
                        logging.debug("Host saving to %s" % (execpath))
                        f = open(execpath,"w")
                        f.write(content)
                        f.close()
       
                    os.utime(execpath,(tnow, tnow))
        timer.stop('provision-execlist')
     
        prog = execlist[0]
        logging.debug("Task %s, exec %s" % (prog, execlist))
        progpath = os.path.join(execdir, prog)
     
        if not os.path.exists(progpath):
            logging.error("task %s not started, program %s not found" % (task, progpath))
            return
     
        taskdir = "snapw.%d/tasks/%s" % (args.pid, task)
        config.mkdir_p(taskdir)
     
        qdir = os.path.join(args.workdir, args.qactname, task)
        tdir = os.path.join(args.workdir, taskdir)
     
        logging.info("starting task %s, prog %s, workdir %s, qdir %s\n" % (task, prog, tdir, qdir))
             
        # get server information
        host = args.server.host
        port = args.server.port
     
        # construct a command line for worker
        cmd = python + " %s -t %s -h %s:%d -q %s" % (
            progpath, task, host, port, qdir)
        logging.info("starting cmd %s" % (cmd))
     
        # start the work process
        p = subprocess.Popen(cmd.split(), cwd=tdir, close_fds=True)
        return p, prog
     
    # Dynamically check what the number of processors we have on each host
    # In any error, default to 1.
    max_tasks = 1
    var_par_tasks = int(args.config['par_tasks'])
    if var_par_tasks <= 0:
        try:
            max_tasks = os.sysconf('SC_NPROCESSORS_ONLN')
        except:
            max_tasks = 1
    else:
        max_tasks = var_par_tasks

    # execute the tasks in a parallel fashion by running
    # at most max_tasks processes at any point.
    task_list = args.active[:]
    procs = []
    logging.info("Running %d tasks with %d-way parallelism: %s" % \
            (len(task_list), max_tasks, str(task_list)))

    timer = perf.Timer(logging)
    pcounter = 0
    counter_map = {}
    while True:
        while task_list and len(procs) < max_tasks:
            task = task_list.pop()
            timer.start("prog-%d" % pcounter)
            p, prog = execute_single_task(task)

            timer.update_extra("prog-%d" % pcounter, "step: %d, pid: %d, prog: %s" \
                    % (args.server.superstep_count, p.pid, prog))

            counter_map[p.pid] = pcounter
            pcounter += 1
            procs.append(p)

        for p in procs:
            # wait for the process to complete
            
            pid = p.pid
            logging.debug("polling %d" % pid)
            status = p.poll()
            if status is not None:
                timer.stop("prog-%d" % counter_map[p.pid])
                del counter_map[p.pid]

                logging.debug("finished %d with status %s" % (pid, str(status)))
                # error reporting
                if status <> 0:
                    msg = "Pid %d terminated unexpectedly with status %d" % (pid, status)
                    logging.error(msg)
                    client.error(args.master, args.id, msg)

                procs.remove(p) 
 
        if not procs and not task_list:
            break
        else:
            time.sleep(0.1)

    overall_timer.stop("superstep-%d-%s" % \
            (args.server.superstep_count, task_name))

    # send done to master
    client.done(args.master, args.id)
    handler.workdir = workdir
    handler.master = master

    if master != None:
        # SYS_STATS = True
        # get configuration from master
        sconf = client.config(master)
    
        dconf = json.loads(sconf)
        handler.config = dconf
    
        logging.debug("Config size: %d" % (len(sconf)))
        logging.debug(str(dconf))
            
        # send done to master
        client.done(master, id)

        # NOTE: Time conflict. The master might already send 'step' request,
        #       before server_forever() is started, so 'step' might be lost.
        #       Delay done until the server is up and running.
        # Check out Asynchronous Mixins example for SocketServer
        # Comment: the constructor might already activate the server,
        #       so there is no problem.
        # NOTE: Fixed using time.sleep(5) in master.py

        logging.debug("Supervisor sent /done to master")

    logging.info("Starting host server pid %d, id %s, port %d with master %s\n" % (pid, id, port, master))

    if SYS_STATS:
        sys_t = threading.Thread(target=timed_sys_stats_reporter, args=(None, ))
Exemple #5
0
def Execute(args):

    args.flog.write("Execute " + str(args.active) + "\n")

    tnow = time.time()

    if len(args.active) > 0:
        execdir = os.path.join(args.workdir, "snapw.%d/exec" % (args.pid))
        config.mkdir_p(execdir)

    # execute the tasks sequentially
    for task in args.active:

        # get the executables
        bunch = "%s" % (task.split("-",1)[0])
        execlist = []
        try:
            execlist = args.config["bunch"][bunch]["exec"].split(",")
        except:
            pass

        for item in execlist:
            execpath = os.path.join(execdir, item)
            # check if the program exists and its mtime
            mtime = None
            try:
                stat = os.stat(execpath)
                mtime = int(stat.st_mtime)
            except:
                pass

            if not mtime  or  mtime < tnow:
                # the file does not exist or it is older than current time,
                #   contact the head task

                content = client.getexec(args.master,item,mtime)
                swc = "None"
                if content:
                    swc = str(len(content))

                print "Host received %s" % (swc)
                if content:
                    if len(content) > 0:
                        print "Host saving to %s" % (execpath)
                        f = open(execpath,"w")
                        f.write(content)
                        f.close()
    
                    os.utime(execpath,(tnow, tnow))

        prog = execlist[0]
        print "Task %s, exec %s" % (prog, execlist)
        progpath = os.path.join(execdir, prog)

        if not os.path.exists(progpath):
            line = "*** Error: task %s not started, program %s not found\n" % (
                    task, progpath)
            args.flog.write(line)
            args.flog.flush()
            continue

        taskdir = "snapw.%d/tasks/%s" % (args.pid, task)
        config.mkdir_p(taskdir)

        qdir = os.path.join(args.workdir, args.qactname, task)
        tdir = os.path.join(args.workdir, taskdir)

        line = "starting task %s, prog %s, workdir %s, qdir %s\n" % (
                    task, prog, tdir, qdir)
        args.flog.write(line)
        args.flog.flush()

        # get server information
        host = args.server.host
        port = args.server.port

        # construct a command line
        cmd = python + " %s -t %s -h %s:%d -q %s" % (
                    progpath, task, host, port, qdir)
        args.flog.write("starting cmd %s\n" % (cmd))
        args.flog.flush()

        # start the work process
        p = pexec.Exec(tdir,cmd)

        # wait for the process to complete
        while True:
            args.flog.write("polling\n")
            args.flog.flush()
            status = pexec.Poll(p)
            if status != None:
                break

            time.sleep(0.1)

        args.flog.write("finished\n")
        args.flog.flush()

    # send done to master
    client.done(args.master, args.id)
Exemple #6
0
def Execute(args):
    args.flog.write("Execute " + str(args.active) + "\n")
    tnow = time.time()
     
    if len(args.active) > 0:
        execdir = os.path.join(args.workdir, "snapw.%d/exec" % (args.pid))
        config.mkdir_p(execdir)
     
    def execute_single_task(task):     
        # get the executables
        bunch = "%s" % (task.split("-",1)[0])
        execlist = []
        try:
            execlist = args.config["bunch"][bunch]["exec"].split(",")
        except:
            pass
     
        for item in execlist:
            execpath = os.path.join(execdir, item)
            # check if the program exists and its mtime
            mtime = None
            try:
                stat = os.stat(execpath)
                mtime = int(stat.st_mtime)
            except:
                pass
     
            if not mtime  or  mtime < tnow:
                # the file does not exist or it is older than current time,
                #   contact the head task
     
                content = client.getexec(args.master,item,mtime)
                swc = "None"
                if content:
                    swc = str(len(content))
     
                print "Host received %s" % (swc)
                if content:
                    if len(content) > 0:
                        print "Host saving to %s" % (execpath)
                        f = open(execpath,"w")
                        f.write(content)
                        f.close()
       
                    os.utime(execpath,(tnow, tnow))
     
        prog = execlist[0]
        print "Task %s, exec %s" % (prog, execlist)
        progpath = os.path.join(execdir, prog)
     
        if not os.path.exists(progpath):
            line = "*** Error: task %s not started, program %s not found\n" % (
                task, progpath)
            args.flog.write(line)
            args.flog.flush()
            return
     
        taskdir = "snapw.%d/tasks/%s" % (args.pid, task)
        config.mkdir_p(taskdir)
     
        qdir = os.path.join(args.workdir, args.qactname, task)
        tdir = os.path.join(args.workdir, taskdir)
     
        line = "starting task %s, prog %s, workdir %s, qdir %s\n" % (
            task, prog, tdir, qdir)
        args.flog.write(line)
        args.flog.flush()
     
        # get server information
        host = args.server.host
        port = args.server.port
     
        # construct a command line
        cmd = python + " %s -t %s -h %s:%d -q %s" % (
            progpath, task, host, port, qdir)
        args.flog.write("starting cmd %s\n" % (cmd))
        args.flog.flush()
     
        # start the work process
        p = pexec.Exec(tdir,cmd)
        return p
     
    # Dynamically check what the number of processors we have on each host
    # In any error, default to 1.
    try:
        max_tasks = os.sysconf('SC_NPROCESSORS_ONLN')
    except:
        max_tasks = 1
    args.flog.write("Running tasks with " + str(max_tasks) + "-way parallelism\n")
     
    # execute the tasks in a parallel fashion by running
    # at most max_tasks processes at any point.
    task_list = args.active[:]
    procs = []
    while True:
        while task_list and len(procs) < max_tasks:
            task = task_list.pop()
            procs.append(execute_single_task(task))
                
        for p in procs:
            # wait for the process to complete
            pid = pexec.GetPid(p)
            args.flog.write("polling " + str(pid) + "\n")
            args.flog.flush()
            status = pexec.Poll(p)
            if status is not None:
                args.flog.write("finished " + str(pid) + "\n")
                args.flog.flush()
                procs.remove(p)
 
        if not procs and not task_list:
            break
        else:
            time.sleep(1.0)
 
    # send done to master
    client.done(args.master, args.id)
Exemple #7
0
def Execute(args):
    args.flog.write("Execute " + str(args.active) + "\n")
    tnow = time.time()

    if len(args.active) > 0:
        execdir = os.path.join(args.workdir, "snapw.%d/exec" % (args.pid))
        config.mkdir_p(execdir)

    def execute_single_task(task):
        # get the executables
        bunch = "%s" % (task.split("-", 1)[0])
        execlist = []
        try:
            execlist = args.config["bunch"][bunch]["exec"].split(",")
        except:
            pass

        for item in execlist:
            execpath = os.path.join(execdir, item)
            # check if the program exists and its mtime
            mtime = None
            try:
                stat = os.stat(execpath)
                mtime = int(stat.st_mtime)
            except:
                pass

            if not mtime or mtime < tnow:
                # the file does not exist or it is older than current time,
                #   contact the head task

                content = client.getexec(args.master, item, mtime)
                swc = "None"
                if content:
                    swc = str(len(content))

                print "Host received %s" % (swc)
                if content:
                    if len(content) > 0:
                        print "Host saving to %s" % (execpath)
                        f = open(execpath, "w")
                        f.write(content)
                        f.close()

                    os.utime(execpath, (tnow, tnow))

        prog = execlist[0]
        print "Task %s, exec %s" % (prog, execlist)
        progpath = os.path.join(execdir, prog)

        if not os.path.exists(progpath):
            line = "*** Error: task %s not started, program %s not found\n" % (
                task, progpath)
            args.flog.write(line)
            args.flog.flush()
            return

        taskdir = "snapw.%d/tasks/%s" % (args.pid, task)
        config.mkdir_p(taskdir)

        qdir = os.path.join(args.workdir, args.qactname, task)
        tdir = os.path.join(args.workdir, taskdir)

        line = "starting task %s, prog %s, workdir %s, qdir %s\n" % (
            task, prog, tdir, qdir)
        args.flog.write(line)
        args.flog.flush()

        # get server information
        host = args.server.host
        port = args.server.port

        # construct a command line
        cmd = python + " %s -t %s -h %s:%d -q %s" % (progpath, task, host,
                                                     port, qdir)
        args.flog.write("starting cmd %s\n" % (cmd))
        args.flog.flush()

        # start the work process
        p = pexec.Exec(tdir, cmd)
        return p

    # Dynamically check what the number of processors we have on each host
    # In any error, default to 1.
    try:
        max_tasks = os.sysconf('SC_NPROCESSORS_ONLN')
    except:
        max_tasks = 1
    args.flog.write("Running tasks with " + str(max_tasks) +
                    "-way parallelism\n")

    # execute the tasks in a parallel fashion by running
    # at most max_tasks processes at any point.
    task_list = args.active[:]
    procs = []
    while True:
        while task_list and len(procs) < max_tasks:
            task = task_list.pop()
            procs.append(execute_single_task(task))

        for p in procs:
            # wait for the process to complete
            pid = pexec.GetPid(p)
            args.flog.write("polling " + str(pid) + "\n")
            args.flog.flush()
            status = pexec.Poll(p)
            if status is not None:
                args.flog.write("finished " + str(pid) + "\n")
                args.flog.flush()
                procs.remove(p)

        if not procs and not task_list:
            break
        else:
            time.sleep(1.0)

    # send done to master
    client.done(args.master, args.id)