Esempio n. 1
0
    def Send(self, dstid, d, channel = "1", swsnap = False):

        #dstnum = dstid / self.range
        #dstname = self.target + "-" + str(dstnum)
        dstname = self.target[channel] + "-" + str(dstid)
        dsthostid = self.tasks.get(dstname)
        dshost = self.hosts.get(dsthostid)
        self.log.debug('sending to %s (had id %d)' % (dshost, dstid))

        if self.local:
            fname = self.GetOutName(dstname)

        if swsnap:
            if not gotsnap:
                self.log.error("Snap module is not available")
                sys.exit(2)
                
            # Snap vector
            if self.local:
                FOut = Snap.TFOut(Snap.TStr(fname))
                d.Save(FOut)
                FOut.Flush()
                #print "send Snap task %s, host %s, *** Error: local 'Send' not yet implemented" % (dstname, dshost)
                return

            client.messagevec(dshost,self.taskname,dstname,d)
            return
     
        else:
            # json dict
            s = json.dumps(d)
            # print "send task %s, host %s, msg %s" % (dstname, dshost, s)

            if self.local:
                f = open(fname,"w")
                f.write(s)
                f.close()
                return

            client.message(dshost,self.taskname,dstname,s)
Esempio n. 2
0
    def Send(self, dstid, d, channel="1", swsnap=False):

        #dstnum = dstid / self.range
        #dstname = self.target + "-" + str(dstnum)
        dstname = self.target[channel] + "-" + str(dstid)
        dsthostid = self.tasks.get(dstname)
        dshost = self.hosts.get(dsthostid)
        self.log.debug('sending to %s (had id %d)' % (dshost, dstid))

        if self.local:
            fname = self.GetOutName(dstname)

        if swsnap:
            if not gotsnap:
                self.log.error("Snap module is not available")
                sys.exit(2)

            # Snap vector
            if self.local:
                FOut = Snap.TFOut(Snap.TStr(fname))
                d.Save(FOut)
                FOut.Flush()
                #print "send Snap task %s, host %s, *** Error: local 'Send' not yet implemented" % (dstname, dshost)
                return

            client.messagevec(dshost, self.taskname, dstname, d)
            return

        else:
            # json dict
            s = json.dumps(d)
            # print "send task %s, host %s, msg %s" % (dstname, dshost, s)

            if self.local:
                f = open(fname, "w")
                f.write(s)
                f.close()
                return

            client.message(dshost, self.taskname, dstname, s)
Esempio n. 3
0
    def do_GET(self):
        #print "GET path", self.path
        parsed_path = urlparse.urlparse(self.path)
        message_parts = [
                'CLIENT VALUES:',
                'client_address=%s (%s)' % (self.client_address,
                                            self.address_string()),
                'command=%s' % self.command,
                'path=%s' % self.path,
                'real path=%s' % parsed_path.path,
                'query=%s' % parsed_path.query,
                'request_version=%s' % self.request_version,
                '',
                'SERVER VALUES:',
                'server_type=%s' % "head server",
                'server_version=%s' % self.server_version,
                'sys_version=%s' % self.sys_version,
                'protocol_version=%s' % self.protocol_version,
                '',
                'HEADERS RECEIVED:',
                ]
        #print parsed_path

        for name, value in sorted(self.headers.items()):
            message_parts.append('%s=%s' % (name, value.rstrip()))
        message_parts.append('')
        message = '\r\n'.join(message_parts)
        #print "message", message

        subpath = self.path.split("/")
        #print "subpath", subpath

        command = parsed_path.path
        #print "command", command

        dargs = dict(urlparse.parse_qsl(parsed_path.query))
        #print "dargs", dargs

        if self.path == "/start":
            print "starting host servers "

            master = self.config["master"]
            hosts = self.config["hosts"]
            for h in hosts:
                self.StartHostServer(h, master)

        elif self.path == "/quit":
            print "terminating host servers"

            master = self.config["master"]
            hosts = self.config["hosts"]
            for h in hosts:
                self.QuitHostServer(h)

            self.send_response(200)
            self.send_header('Content-Length', 0)
            self.end_headers()

            # set the flag to terminate the server
            self.server.running = False
            self.server.self_dummy()
            return

        elif self.path == "/dummy":
            print "dummy request"

            self.send_response(200)
            self.send_header('Content-Length', 0)
            self.end_headers()
            return

        elif self.path == "/config":
            print "get configuration"

            body = simplejson.dumps(self.config)
            self.send_response(200)
            self.send_header('Content-Length', len(body))
            self.end_headers()
            self.wfile.write(body)
            return

        elif command == "/exec":
            pname = dargs.get("p")

            ptime = 0
            try:
                ptime = int(dargs.get("t"))
            except:
                pass
            print "get executable", pname, ptime

            stat = os.stat(pname)
            mtime = int(stat.st_mtime)

            swnew = False
            if mtime > ptime:
                swnew = True
            
            print "stat", pname, ptime, mtime, "NEW" if swnew else "OLD"
            if not swnew:
                # the file has not changed
                self.send_response(304)
                self.send_header('Content-Length', 0)
                self.end_headers()
                return

            f = open(pname)
            content = f.read()
            f.close()

            self.send_response(200)
            self.send_header('Content-Length', len(content))
            self.end_headers()
            self.wfile.write(content)
            return

        elif subpath[1] == "done":
            self.send_response(200)
            #self.send_header('Last-Modified', self.date_time_string(time.time()))
            self.send_header('Content-Length', 0)
            self.end_headers()

            if len(subpath) > 2:
                host = subpath[2]
                # TODO make this update thread safe, which it is not now
                self.server.done.add(host)
                print "host %s completed work" % (str(self.server.done))
                if len(self.server.done) == len(self.config["hosts"]):

                    print "all hosts completed"
                    #time.sleep(5)

                    #  initialize a set of ready servers,
                    #       clear the continue indicator
                    self.server.ready = set()
                    self.server.iterate = False

                    # send a start message at the beginning
                    if not self.server.start:
                        self.server.start = True
                        self.server.executing = True
                        (starthost, starttask) = self.GetStartInfo(self.config)
                        s = "send __Start__ message for task %s to host %s" % (
                                starttask, starthost)
                        print s
                        client.message(starthost,"__Main__",starttask,"__Start__")

                    # send a step start command to all the hosts
                    hosts = self.config["hosts"]
                    master = "%s:%s" % (
                        self.config["master"]["host"],
                        self.config["master"]["port"])

                    print "hosts", hosts
                    for h in hosts:
                        print "send prepare to", h
                        sys.stdout.flush()
                        self.Prepare(h)
                        print "done sending prepare to", h
            return

        elif subpath[1] == "ready":
            self.send_response(200)
            #self.send_header('Last-Modified', self.date_time_string(time.time()))
            self.send_header('Content-Length', 0)
            self.end_headers()

            if len(subpath) > 2:
                host = subpath[2]
                # TODO make this update thread safe, which it is not now
                self.server.ready.add(host)

                # get the number of active tasks on the host
                numtasks = 0
                try:
                    numtasks = int(subpath[3])
                except:
                    pass

                # execute the next step, if there are active tasks
                if numtasks > 0:
                    self.server.iterate = True

                print "host %s ready" % (str(self.server.ready))
                if len(self.server.ready) == len(self.config["hosts"]):

                    # stop the execution, if there are no more tasks to execute
                    if not self.server.iterate:
                        print "all tasks completed"
                        self.server.executing = False
                        self.server.iterate = False
                        return

                    print "all hosts ready"
                    #time.sleep(5)
        
                    # initialize a set of done servers
                    self.server.done = set()

                    # send a step start command to all the hosts
                    hosts = self.config["hosts"]
                    master = "%s:%s" % (
                        self.config["master"]["host"],
                        self.config["master"]["port"])
                    # TODO, create a thread for this step
                    for h in hosts:
                        print "send next step to", h
                        self.StartStep(h)
            return

        self.send_response(200)
        #self.send_header('Last-Modified', self.date_time_string(time.time()))
        self.end_headers()
        self.wfile.write(message)
        return
Esempio n. 4
0
    def do_GET(self):

        parsed_path = urlparse.urlparse(self.path)
        message_parts = [
                'CLIENT VALUES:',
                'client_address=%s (%s)' % (self.client_address,
                                            self.address_string()),
                'command=%s' % self.command,
                'path=%s' % self.path,
                'real path=%s' % parsed_path.path,
                'query=%s' % parsed_path.query,
                'request_version=%s' % self.request_version,
                '',
                'SERVER VALUES:',
                'server_type=%s' % "head server",
                'server_version=%s' % self.server_version,
                'sys_version=%s' % self.sys_version,
                'protocol_version=%s' % self.protocol_version,
                '',
                'HEADERS RECEIVED:',
                ]

        for name, value in sorted(self.headers.items()):
            message_parts.append('%s=%s' % (name, value.rstrip()))
        message_parts.append('')
        message = '\r\n'.join(message_parts)

        subpath = self.path.split("/")

        command = parsed_path.path

        dargs = dict(urlparse.parse_qsl(parsed_path.query))

        if self.path == "/start":
            logging.info("starting host servers")

            self.server.timer.start("master")
            self.server.superstep_count = 0
            self.server.snapshot_counter = 0

            master = self.config["master"]
            hosts = self.config["hosts"]
            for h in hosts:
                self.StartHostServer(h, master)

        elif self.path == "/quit":
            self._quit()
            return

        elif self.path == "/getkv":
            logging.debug("getting kv file")

            self.send_response(200)
            if self.server.superstep_count > 1:
                if not self.server.executing and not self.server.iterate:
                    # We're done computing everything. So let LS know
                    # that this is the final copy of the k-v file.
                    body = json.dumps(get_kv_file("master"))
                    self.send_header('Content-Length', len(body) + 5)
                    self.end_headers()
                    self.wfile.write("DONE " + body)
                    return
                    
                body = json.dumps(get_kv_file("master"))
                self.send_header('Content-Length', len(body))
                self.end_headers()
                self.wfile.write(body)
            else:
                self.send_header('Content-Length', len("None"))
                self.end_headers()
                self.wfile.write("None")
            return

        elif self.path == "/dummy":
            logging.debug("dummy request")

            self.send_response(200)
            self.send_header('Content-Length', 0)
            self.end_headers()
            return

        elif self.path == "/config":
            logging.debug("get configuration")

            body = json.dumps(self.config)
            self.send_response(200)
            self.send_header('Content-Length', len(body))
            self.end_headers()
            self.wfile.write(body)
            return

        elif command == "/exec":
            pname = dargs.get("p")

            ptime = 0
            try:
                ptime = int(dargs.get("t"))
            except:
                pass
            # logging.debug("get executable: " + str(pname) + " " + str(ptime))

            stat = os.stat(pname)
            mtime = int(stat.st_mtime)

            swnew = False
            if mtime > ptime:
                swnew = True
            
            # logging.debug("stat " + str(pname) + " " + str(ptime) + " " + str(mtime) + " " + str("NEW" if swnew else "OLD"))
            if not swnew:
                # the file has not changed
                self.send_response(304)
                self.send_header('Content-Length', 0)
                self.end_headers()
                return

            f = open(pname)
            content = f.read()
            f.close()

            self.send_response(200)
            self.send_header('Content-Length', len(content))
            self.end_headers()
            self.wfile.write(content)
            return

        elif subpath[1] == "done":
            self.send_response(200)
            self.send_header('Content-Length', 0)
            self.end_headers()

            if len(subpath) > 2:
                host = subpath[2]
                
                self.server.global_lock.acquire()
                cur_superstep = self.server.superstep_count
                if cur_superstep > 0:
                    self.server.timer.stop("superstep-%d-host-%d" % \
                            (self.server.superstep_count, int(host)))
                self.server.global_lock.release()

                self.server.done_lock.acquire()
                self.server.done.add(host)
                str_log = "host %s completed work" % (str(self.server.done))
                done_size = len(self.server.done)
                self.server.done_lock.release()
                logging.info(str_log)

                if done_size == len(self.config["hosts"]):

                    logging.info("all hosts completed")
                    # Fix possible concurrency issue with supervisor.py
                    if cur_superstep == 0:
                        time.sleep(5)

                    if self.server.snapshot_enabled:
                        self.server.global_lock.acquire()
                        self.server.snapshot_counter += 1
                        cmd = "./snapshot.sh %d" % (self.server.snapshot_counter - 1)
                        self.server.global_lock.release()
                        logging.info(cmd)
                        os.system(cmd)

                    # initialize a set of ready servers,
                    # clear the continue indicator
                    self.server.ready_lock.acquire()
                    self.server.ready = set()
                    self.server.ready_lock.release()

                    self.server.iterate = False

                    # send a start message at the beginning
                    if not self.server.start:
                        self.server.start = True
                        self.server.executing = True
                        (starthost, starttask) = self.GetStartInfo(self.config)
                        s = "send __Start__ message for task %s to host %s" % (
                                starttask, starthost)
                        logging.debug(s)
                        client.message(starthost,"__Main__",starttask,"__Start__")

                    # send a step start command to all the hosts
                    hosts = self.config["hosts"]
                    master = "%s:%s" % (
                        self.config["master"]["host"],
                        self.config["master"]["port"])

                    logging.debug("hosts " + str(hosts))
                    for h in hosts:
                        logging.debug("send prepare to " + str(h))
                        self.Prepare(h)
                        logging.debug("done sending prepare to " + str(h))
            return

        elif subpath[1] == "ready":
            self.send_response(200)
            self.send_header('Content-Length', 0)
            self.end_headers()

            if len(subpath) > 2:
                host = subpath[2]

                # get the number of active tasks on the host
                numtasks = 0
                try:
                    numtasks = int(subpath[3])
                except:
                    pass

                # execute the next step, if there are active tasks
                if numtasks > 0:
                    self.server.iterate = True

                self.server.ready_lock.acquire()
                self.server.ready.add(host)
                str_log = "host %s ready" % (str(self.server.ready))
                ready_size = len(self.server.ready)
                self.server.ready_lock.release()
                logging.debug(str_log)

                if ready_size == len(self.config["hosts"]):

                    # stop the execution, if there are no more tasks to execute
                    if not self.server.iterate:
                        logging.info("all tasks completed")
                        self.server.executing = False
                        self.server.iterate = False
                        time.sleep(10)
                        self._quit(force=True)
                        return

                    logging.info("all hosts ready")
        
                    # initialize a set of done servers
                    self.server.done_lock.acquire()
                    self.server.done = set()
                    self.server.done_lock.release()

                    hosts = self.config["hosts"]
                    master = "%s:%s" % (
                        self.config["master"]["host"],
                        self.config["master"]["port"])

                    self.server.global_lock.acquire()
                    self.server.superstep_count += 1
                    for h in hosts:
                        h_id = int(h['id'])
                        self.server.timer.start("superstep-%d-host-%d" % \
                                (self.server.superstep_count, h_id))
                    self.server.global_lock.release()

                    # send a step start command to all the hosts
                    # TODO: create a thread for this step
                    for h in hosts:
                        logging.info("send next step to " + str(h))
                        self.StartStep(h)
            return

        elif subpath[1] == "error":

            self.send_response(200)
            self.send_header('Content-Length', 0)
            self.end_headers()

            if len(subpath) > 3:
                src_host = subpath[2]
                encoded_msg = subpath[3]
                msg_dict = urlparse.parse_qs(encoded_msg)
                logging.critical("Error msg from supervisor %s: %s" % \
                        (src_host, msg_dict['msg']))
                logging.critical("Terminating master now")
                self._quit(force=True)

            return

        self.send_response(200)
        self.end_headers()
        self.wfile.write(message)
        return
Esempio n. 5
0
    def do_GET(self):
        #print "GET path", self.path
        parsed_path = urlparse.urlparse(self.path)
        message_parts = [
            'CLIENT VALUES:',
            'client_address=%s (%s)' %
            (self.client_address, self.address_string()),
            'command=%s' % self.command,
            'path=%s' % self.path,
            'real path=%s' % parsed_path.path,
            'query=%s' % parsed_path.query,
            'request_version=%s' % self.request_version,
            '',
            'SERVER VALUES:',
            'server_type=%s' % "head server",
            'server_version=%s' % self.server_version,
            'sys_version=%s' % self.sys_version,
            'protocol_version=%s' % self.protocol_version,
            '',
            'HEADERS RECEIVED:',
        ]
        #print parsed_path

        for name, value in sorted(self.headers.items()):
            message_parts.append('%s=%s' % (name, value.rstrip()))
        message_parts.append('')
        message = '\r\n'.join(message_parts)
        #print "message", message

        subpath = self.path.split("/")
        #print "subpath", subpath

        command = parsed_path.path
        #print "command", command

        dargs = dict(urlparse.parse_qsl(parsed_path.query))
        #print "dargs", dargs

        if self.path == "/start":
            print "starting host servers "

            master = self.config["master"]
            hosts = self.config["hosts"]
            for h in hosts:
                self.StartHostServer(h, master)

        elif self.path == "/quit":
            print "terminating host servers"

            master = self.config["master"]
            hosts = self.config["hosts"]
            for h in hosts:
                self.QuitHostServer(h)

            self.send_response(200)
            self.send_header('Content-Length', 0)
            self.end_headers()

            # set the flag to terminate the server
            self.server.running = False
            self.server.self_dummy()
            return

        elif self.path == "/dummy":
            print "dummy request"

            self.send_response(200)
            self.send_header('Content-Length', 0)
            self.end_headers()
            return

        elif self.path == "/config":
            print "get configuration"

            body = simplejson.dumps(self.config)
            self.send_response(200)
            self.send_header('Content-Length', len(body))
            self.end_headers()
            self.wfile.write(body)
            return

        elif command == "/exec":
            pname = dargs.get("p")

            ptime = 0
            try:
                ptime = int(dargs.get("t"))
            except:
                pass
            print "get executable", pname, ptime

            stat = os.stat(pname)
            mtime = int(stat.st_mtime)

            swnew = False
            if mtime > ptime:
                swnew = True

            print "stat", pname, ptime, mtime, "NEW" if swnew else "OLD"
            if not swnew:
                # the file has not changed
                self.send_response(304)
                self.send_header('Content-Length', 0)
                self.end_headers()
                return

            f = open(pname)
            content = f.read()
            f.close()

            self.send_response(200)
            self.send_header('Content-Length', len(content))
            self.end_headers()
            self.wfile.write(content)
            return

        elif subpath[1] == "done":
            self.send_response(200)
            #self.send_header('Last-Modified', self.date_time_string(time.time()))
            self.send_header('Content-Length', 0)
            self.end_headers()

            if len(subpath) > 2:
                host = subpath[2]
                # TODO make this update thread safe, which it is not now
                self.server.done.add(host)
                print "host %s completed work" % (str(self.server.done))
                if len(self.server.done) == len(self.config["hosts"]):

                    print "all hosts completed"
                    #time.sleep(5)

                    #  initialize a set of ready servers,
                    #       clear the continue indicator
                    self.server.ready = set()
                    self.server.iterate = False

                    # send a start message at the beginning
                    if not self.server.start:
                        self.server.start = True
                        self.server.executing = True
                        (starthost, starttask) = self.GetStartInfo(self.config)
                        s = "send __Start__ message for task %s to host %s" % (
                            starttask, starthost)
                        print s
                        client.message(starthost, "__Main__", starttask,
                                       "__Start__")

                    # send a step start command to all the hosts
                    hosts = self.config["hosts"]
                    master = "%s:%s" % (self.config["master"]["host"],
                                        self.config["master"]["port"])

                    print "hosts", hosts
                    for h in hosts:
                        print "send prepare to", h
                        sys.stdout.flush()
                        self.Prepare(h)
                        print "done sending prepare to", h
            return

        elif subpath[1] == "ready":
            self.send_response(200)
            #self.send_header('Last-Modified', self.date_time_string(time.time()))
            self.send_header('Content-Length', 0)
            self.end_headers()

            if len(subpath) > 2:
                host = subpath[2]
                # TODO make this update thread safe, which it is not now
                self.server.ready.add(host)

                # get the number of active tasks on the host
                numtasks = 0
                try:
                    numtasks = int(subpath[3])
                except:
                    pass

                # execute the next step, if there are active tasks
                if numtasks > 0:
                    self.server.iterate = True

                print "host %s ready" % (str(self.server.ready))
                if len(self.server.ready) == len(self.config["hosts"]):

                    # stop the execution, if there are no more tasks to execute
                    if not self.server.iterate:
                        print "all tasks completed"
                        self.server.executing = False
                        self.server.iterate = False
                        return

                    print "all hosts ready"
                    #time.sleep(5)

                    # initialize a set of done servers
                    self.server.done = set()

                    # send a step start command to all the hosts
                    hosts = self.config["hosts"]
                    master = "%s:%s" % (self.config["master"]["host"],
                                        self.config["master"]["port"])
                    # TODO, create a thread for this step
                    for h in hosts:
                        print "send next step to", h
                        self.StartStep(h)
            return

        self.send_response(200)
        #self.send_header('Last-Modified', self.date_time_string(time.time()))
        self.end_headers()
        self.wfile.write(message)
        return
Esempio n. 6
0
    def do_GET(self):

        parsed_path = urlparse.urlparse(self.path)
        message_parts = [
            'CLIENT VALUES:',
            'client_address=%s (%s)' %
            (self.client_address, self.address_string()),
            'command=%s' % self.command,
            'path=%s' % self.path,
            'real path=%s' % parsed_path.path,
            'query=%s' % parsed_path.query,
            'request_version=%s' % self.request_version,
            '',
            'SERVER VALUES:',
            'server_type=%s' % "head server",
            'server_version=%s' % self.server_version,
            'sys_version=%s' % self.sys_version,
            'protocol_version=%s' % self.protocol_version,
            '',
            'HEADERS RECEIVED:',
        ]

        for name, value in sorted(self.headers.items()):
            message_parts.append('%s=%s' % (name, value.rstrip()))
        message_parts.append('')
        message = '\r\n'.join(message_parts)

        subpath = self.path.split("/")

        command = parsed_path.path

        dargs = dict(urlparse.parse_qsl(parsed_path.query))

        if self.path == "/start":
            logging.info("starting host servers")

            self.server.timer.start("master")
            self.server.superstep_count = 0
            self.server.snapshot_counter = 0

            master = self.config["master"]
            hosts = self.config["hosts"]
            for h in hosts:
                self.StartHostServer(h, master)

        elif self.path == "/quit":
            self._quit()
            return

        elif self.path == "/getkv":
            logging.debug("getting kv file")

            self.send_response(200)
            if self.server.superstep_count > 1:
                if not self.server.executing and not self.server.iterate:
                    # We're done computing everything. So let LS know
                    # that this is the final copy of the k-v file.
                    body = json.dumps(get_kv_file("master"))
                    self.send_header('Content-Length', len(body) + 5)
                    self.end_headers()
                    self.wfile.write("DONE " + body)
                    return

                body = json.dumps(get_kv_file("master"))
                self.send_header('Content-Length', len(body))
                self.end_headers()
                self.wfile.write(body)
            else:
                self.send_header('Content-Length', len("None"))
                self.end_headers()
                self.wfile.write("None")
            return

        elif self.path == "/dummy":
            logging.debug("dummy request")

            self.send_response(200)
            self.send_header('Content-Length', 0)
            self.end_headers()
            return

        elif self.path == "/config":
            logging.debug("get configuration")

            body = json.dumps(self.config)
            self.send_response(200)
            self.send_header('Content-Length', len(body))
            self.end_headers()
            self.wfile.write(body)
            return

        elif command == "/exec":
            pname = dargs.get("p")

            ptime = 0
            try:
                ptime = int(dargs.get("t"))
            except:
                pass
            # logging.debug("get executable: " + str(pname) + " " + str(ptime))

            stat = os.stat(pname)
            mtime = int(stat.st_mtime)

            swnew = False
            if mtime > ptime:
                swnew = True

            # logging.debug("stat " + str(pname) + " " + str(ptime) + " " + str(mtime) + " " + str("NEW" if swnew else "OLD"))
            if not swnew:
                # the file has not changed
                self.send_response(304)
                self.send_header('Content-Length', 0)
                self.end_headers()
                return

            f = open(pname)
            content = f.read()
            f.close()

            self.send_response(200)
            self.send_header('Content-Length', len(content))
            self.end_headers()
            self.wfile.write(content)
            return

        elif subpath[1] == "done":
            self.send_response(200)
            self.send_header('Content-Length', 0)
            self.end_headers()

            if len(subpath) > 2:
                host = subpath[2]

                self.server.global_lock.acquire()
                cur_superstep = self.server.superstep_count
                if cur_superstep > 0:
                    self.server.timer.stop("superstep-%d-host-%d" % \
                            (self.server.superstep_count, int(host)))
                self.server.global_lock.release()

                self.server.done_lock.acquire()
                self.server.done.add(host)
                str_log = "host %s completed work" % (str(self.server.done))
                done_size = len(self.server.done)
                self.server.done_lock.release()
                logging.info(str_log)

                if done_size == len(self.config["hosts"]):

                    logging.info("all hosts completed")
                    # Fix possible concurrency issue with supervisor.py
                    if cur_superstep == 0:
                        time.sleep(5)

                    if self.server.snapshot_enabled:
                        self.server.global_lock.acquire()
                        self.server.snapshot_counter += 1
                        cmd = "./snapshot.sh %d" % (
                            self.server.snapshot_counter - 1)
                        self.server.global_lock.release()
                        logging.info(cmd)
                        os.system(cmd)

                    # initialize a set of ready servers,
                    # clear the continue indicator
                    self.server.ready_lock.acquire()
                    self.server.ready = set()
                    self.server.ready_lock.release()

                    self.server.iterate = False

                    # send a start message at the beginning
                    if not self.server.start:
                        self.server.start = True
                        self.server.executing = True
                        (starthost, starttask) = self.GetStartInfo(self.config)
                        s = "send __Start__ message for task %s to host %s" % (
                            starttask, starthost)
                        logging.debug(s)
                        client.message(starthost, "__Main__", starttask,
                                       "__Start__")

                    # send a step start command to all the hosts
                    hosts = self.config["hosts"]
                    master = "%s:%s" % (self.config["master"]["host"],
                                        self.config["master"]["port"])

                    logging.debug("hosts " + str(hosts))
                    for h in hosts:
                        logging.debug("send prepare to " + str(h))
                        self.Prepare(h)
                        logging.debug("done sending prepare to " + str(h))
            return

        elif subpath[1] == "ready":
            self.send_response(200)
            self.send_header('Content-Length', 0)
            self.end_headers()

            if len(subpath) > 2:
                host = subpath[2]

                # get the number of active tasks on the host
                numtasks = 0
                try:
                    numtasks = int(subpath[3])
                except:
                    pass

                # execute the next step, if there are active tasks
                if numtasks > 0:
                    self.server.iterate = True

                self.server.ready_lock.acquire()
                self.server.ready.add(host)
                str_log = "host %s ready" % (str(self.server.ready))
                ready_size = len(self.server.ready)
                self.server.ready_lock.release()
                logging.debug(str_log)

                if ready_size == len(self.config["hosts"]):

                    # stop the execution, if there are no more tasks to execute
                    if not self.server.iterate:
                        logging.info("all tasks completed")
                        self.server.executing = False
                        self.server.iterate = False
                        time.sleep(10)
                        self._quit(force=True)
                        return

                    logging.info("all hosts ready")

                    # initialize a set of done servers
                    self.server.done_lock.acquire()
                    self.server.done = set()
                    self.server.done_lock.release()

                    hosts = self.config["hosts"]
                    master = "%s:%s" % (self.config["master"]["host"],
                                        self.config["master"]["port"])

                    self.server.global_lock.acquire()
                    self.server.superstep_count += 1
                    for h in hosts:
                        h_id = int(h['id'])
                        self.server.timer.start("superstep-%d-host-%d" % \
                                (self.server.superstep_count, h_id))
                    self.server.global_lock.release()

                    # send a step start command to all the hosts
                    # TODO: create a thread for this step
                    for h in hosts:
                        logging.info("send next step to " + str(h))
                        self.StartStep(h)
            return

        elif subpath[1] == "error":

            self.send_response(200)
            self.send_header('Content-Length', 0)
            self.end_headers()

            if len(subpath) > 3:
                src_host = subpath[2]
                encoded_msg = subpath[3]
                msg_dict = urlparse.parse_qs(encoded_msg)
                logging.critical("Error msg from supervisor %s: %s" % \
                        (src_host, msg_dict['msg']))
                logging.critical("Terminating master now")
                self._quit(force=True)

            return

        self.send_response(200)
        self.end_headers()
        self.wfile.write(message)
        return