Exemple #1
0
    def do_GET(self):
        parsed_path = urlparse.urlparse(self.path)
        message_parts = [
                'CLIENT VALUES:',
                'client_address=%s (%s)' % (self.client_address,
                                            self.address_string()),
                'command=%s' % self.command,
                'path=%s' % self.path,
                'real path=%s' % parsed_path.path,
                'query=%s' % parsed_path.query,
                'request_version=%s' % self.request_version,
                '',
                'SERVER VALUES:',
                'server_type=%s' % "host server",
                'server_version=%s' % self.server_version,
                'sys_version=%s' % self.sys_version,
                'protocol_version=%s' % self.protocol_version,
                '',
                'HEADERS RECEIVED:',
                ]
        for name, value in sorted(self.headers.items()):
            message_parts.append('%s=%s' % (name, value.rstrip()))
        message_parts.append('')
        message = '\r\n'.join(message_parts)

        subpath = self.path.split("/")

        if self.path == "/prepare":

            prepare_timer = perf.Timer(logging)
            self.server.superstep_count += 1
            prepare_timer.start("prepare-%d" % self.server.superstep_count)

            # move qin to qact
            qinname = "snapw.%d/qin" % (self.pid)
            qactname = "snapw.%d/qact" % (self.pid)

            # rename an existing qact
            if os.path.exists(qactname):

                removed = False
                if not self.config['debug']:
                    try:
                        shutil.rmtree(qactname)
                        logging.debug("removed dir %s" % qactname)
                        removed = True
                    except:
                        logging.error("error on removing dir %s" % qactname)

                if not removed:
                    t = time.time()
                    s = time.strftime("%Y%m%d-%H%M%S", time.localtime(t))
                    mus = "%06d" % (t*1000000 - int(t)*1000000)
                    qactnewname = "%s-%s-%s" % (qactname, s, mus)
                    os.rename(qactname, qactnewname)
                    logging.debug("renamed %s to %s" % (qactname, qactnewname))
                    
            # get the number of active tasks, rename existing qin
            numtasks = 0
            if os.path.exists(qinname):
                os.rename(qinname, qactname)
                active = os.listdir(qactname)
                numtasks = len(active)

            # create new qin
            config.mkdir_p(qinname)
    
            logging.info("preparing next step: %s, %s" % \
                    (qinname, qactname))

            # send ready to master
            client.ready(self.master, self.id, numtasks)

            self.send_response(200)
            self.send_header('Content-Length', 0)
            self.end_headers()

            prepare_timer.stop("prepare-%d" % self.server.superstep_count)

            return

        elif self.path == "/quit":
            self.send_response(200)
            self.send_header('Content-Length', 0)
            self.end_headers()
            SYS_STATS = False

            # set the flag to terminate the server
            self.server.running = False
            self.server.self_dummy()
            return

        elif self.path == "/getkv":
            logging.debug("getting kv file")

            self.send_response(200)
            if self.server.superstep_count > 1:
                body = json.dumps(get_kv_file("supervisor"))
                self.send_header('Content-Length', len(body))
                self.end_headers()
                self.wfile.write(body)
            else:
                self.send_header('Content-Length', len("None"))
                self.end_headers()
                self.wfile.write("None")
            return

        elif self.path == "/dummy":
            logging.debug("dummy request")

            self.send_response(200)
            self.send_header('Content-Length', 0)
            self.end_headers()
            return

        elif self.path == "/step":

            logging.info("execute next step")

            self.send_response(200)
            self.send_header('Content-Length', 0)
            self.end_headers()

            # TODO, implement null action,
            #   skip execution if there are no tasks to execute,
            #   qact does not exist

            # get the tasks to execute
            qactname = "snapw.%d/qact" % (self.pid)
            active = []
            if os.path.exists(qactname):
                active = os.listdir(qactname)

            logging.debug("active tasks %s" % (str(active)))

            self.qactname = qactname
            self.active = active # the task list
            # start a thread to execute the work tasks
            t = threading.Thread(target=Execute, args=(self, ))
            t.start()
            return

        elif self.path == "/config":
    
            logging.debug("get configuration")

            body = json.dumps(self.config)
            self.send_response(200)
            self.send_header('Content-Length', len(body))
            self.end_headers()
            self.wfile.write(body)

            return

        elif self.path == "/quit":
    
            logging.info("terminate execution")
            SYS_STATS = False
            self.send_response(200)
            self.send_header('Content-Length', 0)
            self.end_headers()
            sys.exit(0)

        self.send_response(200)
        self.end_headers()
        self.wfile.write(message)
        return
Exemple #2
0
    def do_GET(self):

        parsed_path = urlparse.urlparse(self.path)
        message_parts = [
                'CLIENT VALUES:',
                'client_address=%s (%s)' % (self.client_address,
                                            self.address_string()),
                'command=%s' % self.command,
                'path=%s' % self.path,
                'real path=%s' % parsed_path.path,
                'query=%s' % parsed_path.query,
                'request_version=%s' % self.request_version,
                '',
                'SERVER VALUES:',
                'server_type=%s' % "head server",
                'server_version=%s' % self.server_version,
                'sys_version=%s' % self.sys_version,
                'protocol_version=%s' % self.protocol_version,
                '',
                'HEADERS RECEIVED:',
                ]

        for name, value in sorted(self.headers.items()):
            message_parts.append('%s=%s' % (name, value.rstrip()))
        message_parts.append('')
        message = '\r\n'.join(message_parts)

        subpath = self.path.split("/")

        command = parsed_path.path

        dargs = dict(urlparse.parse_qsl(parsed_path.query))

        if self.path == "/start":
            logging.info("starting host servers")

            self.server.timer.start("master")
            self.server.superstep_count = 0
            self.server.snapshot_counter = 0

            master = self.config["master"]
            hosts = self.config["hosts"]
            for h in hosts:
                self.StartHostServer(h, master)

        elif self.path == "/quit":
            self._quit()
            return

        elif self.path == "/getkv":
            logging.debug("getting kv file")

            self.send_response(200)
            if self.server.superstep_count > 1:
                if not self.server.executing and not self.server.iterate:
                    # We're done computing everything. So let LS know
                    # that this is the final copy of the k-v file.
                    body = json.dumps(get_kv_file("master"))
                    self.send_header('Content-Length', len(body) + 5)
                    self.end_headers()
                    self.wfile.write("DONE " + body)
                    return
                    
                body = json.dumps(get_kv_file("master"))
                self.send_header('Content-Length', len(body))
                self.end_headers()
                self.wfile.write(body)
            else:
                self.send_header('Content-Length', len("None"))
                self.end_headers()
                self.wfile.write("None")
            return

        elif self.path == "/dummy":
            logging.debug("dummy request")

            self.send_response(200)
            self.send_header('Content-Length', 0)
            self.end_headers()
            return

        elif self.path == "/config":
            logging.debug("get configuration")

            body = json.dumps(self.config)
            self.send_response(200)
            self.send_header('Content-Length', len(body))
            self.end_headers()
            self.wfile.write(body)
            return

        elif command == "/exec":
            pname = dargs.get("p")

            ptime = 0
            try:
                ptime = int(dargs.get("t"))
            except:
                pass
            # logging.debug("get executable: " + str(pname) + " " + str(ptime))

            stat = os.stat(pname)
            mtime = int(stat.st_mtime)

            swnew = False
            if mtime > ptime:
                swnew = True
            
            # logging.debug("stat " + str(pname) + " " + str(ptime) + " " + str(mtime) + " " + str("NEW" if swnew else "OLD"))
            if not swnew:
                # the file has not changed
                self.send_response(304)
                self.send_header('Content-Length', 0)
                self.end_headers()
                return

            f = open(pname)
            content = f.read()
            f.close()

            self.send_response(200)
            self.send_header('Content-Length', len(content))
            self.end_headers()
            self.wfile.write(content)
            return

        elif subpath[1] == "done":
            self.send_response(200)
            self.send_header('Content-Length', 0)
            self.end_headers()

            if len(subpath) > 2:
                host = subpath[2]
                
                self.server.global_lock.acquire()
                cur_superstep = self.server.superstep_count
                if cur_superstep > 0:
                    self.server.timer.stop("superstep-%d-host-%d" % \
                            (self.server.superstep_count, int(host)))
                self.server.global_lock.release()

                self.server.done_lock.acquire()
                self.server.done.add(host)
                str_log = "host %s completed work" % (str(self.server.done))
                done_size = len(self.server.done)
                self.server.done_lock.release()
                logging.info(str_log)

                if done_size == len(self.config["hosts"]):

                    logging.info("all hosts completed")
                    # Fix possible concurrency issue with supervisor.py
                    if cur_superstep == 0:
                        time.sleep(5)

                    if self.server.snapshot_enabled:
                        self.server.global_lock.acquire()
                        self.server.snapshot_counter += 1
                        cmd = "./snapshot.sh %d" % (self.server.snapshot_counter - 1)
                        self.server.global_lock.release()
                        logging.info(cmd)
                        os.system(cmd)

                    # initialize a set of ready servers,
                    # clear the continue indicator
                    self.server.ready_lock.acquire()
                    self.server.ready = set()
                    self.server.ready_lock.release()

                    self.server.iterate = False

                    # send a start message at the beginning
                    if not self.server.start:
                        self.server.start = True
                        self.server.executing = True
                        (starthost, starttask) = self.GetStartInfo(self.config)
                        s = "send __Start__ message for task %s to host %s" % (
                                starttask, starthost)
                        logging.debug(s)
                        client.message(starthost,"__Main__",starttask,"__Start__")

                    # send a step start command to all the hosts
                    hosts = self.config["hosts"]
                    master = "%s:%s" % (
                        self.config["master"]["host"],
                        self.config["master"]["port"])

                    logging.debug("hosts " + str(hosts))
                    for h in hosts:
                        logging.debug("send prepare to " + str(h))
                        self.Prepare(h)
                        logging.debug("done sending prepare to " + str(h))
            return

        elif subpath[1] == "ready":
            self.send_response(200)
            self.send_header('Content-Length', 0)
            self.end_headers()

            if len(subpath) > 2:
                host = subpath[2]

                # get the number of active tasks on the host
                numtasks = 0
                try:
                    numtasks = int(subpath[3])
                except:
                    pass

                # execute the next step, if there are active tasks
                if numtasks > 0:
                    self.server.iterate = True

                self.server.ready_lock.acquire()
                self.server.ready.add(host)
                str_log = "host %s ready" % (str(self.server.ready))
                ready_size = len(self.server.ready)
                self.server.ready_lock.release()
                logging.debug(str_log)

                if ready_size == len(self.config["hosts"]):

                    # stop the execution, if there are no more tasks to execute
                    if not self.server.iterate:
                        logging.info("all tasks completed")
                        self.server.executing = False
                        self.server.iterate = False
                        time.sleep(10)
                        self._quit(force=True)
                        return

                    logging.info("all hosts ready")
        
                    # initialize a set of done servers
                    self.server.done_lock.acquire()
                    self.server.done = set()
                    self.server.done_lock.release()

                    hosts = self.config["hosts"]
                    master = "%s:%s" % (
                        self.config["master"]["host"],
                        self.config["master"]["port"])

                    self.server.global_lock.acquire()
                    self.server.superstep_count += 1
                    for h in hosts:
                        h_id = int(h['id'])
                        self.server.timer.start("superstep-%d-host-%d" % \
                                (self.server.superstep_count, h_id))
                    self.server.global_lock.release()

                    # send a step start command to all the hosts
                    # TODO: create a thread for this step
                    for h in hosts:
                        logging.info("send next step to " + str(h))
                        self.StartStep(h)
            return

        elif subpath[1] == "error":

            self.send_response(200)
            self.send_header('Content-Length', 0)
            self.end_headers()

            if len(subpath) > 3:
                src_host = subpath[2]
                encoded_msg = subpath[3]
                msg_dict = urlparse.parse_qs(encoded_msg)
                logging.critical("Error msg from supervisor %s: %s" % \
                        (src_host, msg_dict['msg']))
                logging.critical("Terminating master now")
                self._quit(force=True)

            return

        self.send_response(200)
        self.end_headers()
        self.wfile.write(message)
        return
    def do_GET(self):
        parsed_path = urlparse.urlparse(self.path)
        message_parts = [
                'CLIENT VALUES:',
                'client_address=%s (%s)' % (self.client_address,
                                            self.address_string()),
                'command=%s' % self.command,
                'path=%s' % self.path,
                'real path=%s' % parsed_path.path,
                'query=%s' % parsed_path.query,
                'request_version=%s' % self.request_version,
                '',
                'SERVER VALUES:',
                'server_type=%s' % "host server",
                'server_version=%s' % self.server_version,
                'sys_version=%s' % self.sys_version,
                'protocol_version=%s' % self.protocol_version,
                '',
                'HEADERS RECEIVED:',
                ]
        for name, value in sorted(self.headers.items()):
            message_parts.append('%s=%s' % (name, value.rstrip()))
        message_parts.append('')
        message = '\r\n'.join(message_parts)

        subpath = self.path.split("/")

        if self.path == "/prepare":

            prepare_timer = perf.Timer(logging)
            self.server.superstep_count += 1
            prepare_timer.start("prepare-%d" % self.server.superstep_count)

            # move qin to qact
            qinname = "snapw.%d/qin" % (self.pid)
            qactname = "snapw.%d/qact" % (self.pid)

            # rename an existing qact
            if os.path.exists(qactname):

                removed = False
                if not self.config['debug']:
                    try:
                        shutil.rmtree(qactname)
                        logging.debug("removed dir %s" % qactname)
                        removed = True
                    except:
                        logging.error("error on removing dir %s" % qactname)

                if not removed:
                    t = time.time()
                    s = time.strftime("%Y%m%d-%H%M%S", time.localtime(t))
                    mus = "%06d" % (t*1000000 - int(t)*1000000)
                    qactnewname = "%s-%s-%s" % (qactname, s, mus)
                    os.rename(qactname, qactnewname)
                    logging.debug("renamed %s to %s" % (qactname, qactnewname))
                    
            # get the number of active tasks, rename existing qin
            numtasks = 0
            if os.path.exists(qinname):
                os.rename(qinname, qactname)
                active = os.listdir(qactname)
                numtasks = len(active)

            # create new qin
            config.mkdir_p(qinname)
    
            logging.info("preparing next step: %s, %s" % \
                    (qinname, qactname))

            # send ready to master
            client.ready(self.master, self.id, numtasks)

            self.send_response(200)
            self.send_header('Content-Length', 0)
            self.end_headers()

            prepare_timer.stop("prepare-%d" % self.server.superstep_count)

            return

        elif self.path == "/quit":
            self.send_response(200)
            self.send_header('Content-Length', 0)
            self.end_headers()
            SYS_STATS = False

            # set the flag to terminate the server
            self.server.running = False
            self.server.self_dummy()
            return

        elif self.path == "/getkv":
            logging.debug("getting kv file")

            self.send_response(200)
            if self.server.superstep_count > 1:
                body = json.dumps(get_kv_file("supervisor"))
                self.send_header('Content-Length', len(body))
                self.end_headers()
                self.wfile.write(body)
            else:
                self.send_header('Content-Length', len("None"))
                self.end_headers()
                self.wfile.write("None")
            return

        elif self.path == "/dummy":
            logging.debug("dummy request")

            self.send_response(200)
            self.send_header('Content-Length', 0)
            self.end_headers()
            return

        elif self.path == "/step":

            logging.info("execute next step")

            self.send_response(200)
            self.send_header('Content-Length', 0)
            self.end_headers()

            # TODO, implement null action,
            #   skip execution if there are no tasks to execute,
            #   qact does not exist

            # get the tasks to execute
            qactname = "snapw.%d/qact" % (self.pid)
            active = []
            if os.path.exists(qactname):
                active = os.listdir(qactname)

            logging.debug("active tasks %s" % (str(active)))

            self.qactname = qactname
            self.active = active # the task list
            # start a thread to execute the work tasks
            t = threading.Thread(target=Execute, args=(self, ))
            t.start()
            return

        elif self.path == "/config":
    
            logging.debug("get configuration")

            body = json.dumps(self.config)
            self.send_response(200)
            self.send_header('Content-Length', len(body))
            self.end_headers()
            self.wfile.write(body)

            return

        elif self.path == "/quit":
    
            logging.info("terminate execution")
            SYS_STATS = False
            self.send_response(200)
            self.send_header('Content-Length', 0)
            self.end_headers()
            sys.exit(0)

        self.send_response(200)
        self.end_headers()
        self.wfile.write(message)
        return
Exemple #4
0
    def do_GET(self):

        parsed_path = urlparse.urlparse(self.path)
        message_parts = [
            'CLIENT VALUES:',
            'client_address=%s (%s)' %
            (self.client_address, self.address_string()),
            'command=%s' % self.command,
            'path=%s' % self.path,
            'real path=%s' % parsed_path.path,
            'query=%s' % parsed_path.query,
            'request_version=%s' % self.request_version,
            '',
            'SERVER VALUES:',
            'server_type=%s' % "head server",
            'server_version=%s' % self.server_version,
            'sys_version=%s' % self.sys_version,
            'protocol_version=%s' % self.protocol_version,
            '',
            'HEADERS RECEIVED:',
        ]

        for name, value in sorted(self.headers.items()):
            message_parts.append('%s=%s' % (name, value.rstrip()))
        message_parts.append('')
        message = '\r\n'.join(message_parts)

        subpath = self.path.split("/")

        command = parsed_path.path

        dargs = dict(urlparse.parse_qsl(parsed_path.query))

        if self.path == "/start":
            logging.info("starting host servers")

            self.server.timer.start("master")
            self.server.superstep_count = 0
            self.server.snapshot_counter = 0

            master = self.config["master"]
            hosts = self.config["hosts"]
            for h in hosts:
                self.StartHostServer(h, master)

        elif self.path == "/quit":
            self._quit()
            return

        elif self.path == "/getkv":
            logging.debug("getting kv file")

            self.send_response(200)
            if self.server.superstep_count > 1:
                if not self.server.executing and not self.server.iterate:
                    # We're done computing everything. So let LS know
                    # that this is the final copy of the k-v file.
                    body = json.dumps(get_kv_file("master"))
                    self.send_header('Content-Length', len(body) + 5)
                    self.end_headers()
                    self.wfile.write("DONE " + body)
                    return

                body = json.dumps(get_kv_file("master"))
                self.send_header('Content-Length', len(body))
                self.end_headers()
                self.wfile.write(body)
            else:
                self.send_header('Content-Length', len("None"))
                self.end_headers()
                self.wfile.write("None")
            return

        elif self.path == "/dummy":
            logging.debug("dummy request")

            self.send_response(200)
            self.send_header('Content-Length', 0)
            self.end_headers()
            return

        elif self.path == "/config":
            logging.debug("get configuration")

            body = json.dumps(self.config)
            self.send_response(200)
            self.send_header('Content-Length', len(body))
            self.end_headers()
            self.wfile.write(body)
            return

        elif command == "/exec":
            pname = dargs.get("p")

            ptime = 0
            try:
                ptime = int(dargs.get("t"))
            except:
                pass
            # logging.debug("get executable: " + str(pname) + " " + str(ptime))

            stat = os.stat(pname)
            mtime = int(stat.st_mtime)

            swnew = False
            if mtime > ptime:
                swnew = True

            # logging.debug("stat " + str(pname) + " " + str(ptime) + " " + str(mtime) + " " + str("NEW" if swnew else "OLD"))
            if not swnew:
                # the file has not changed
                self.send_response(304)
                self.send_header('Content-Length', 0)
                self.end_headers()
                return

            f = open(pname)
            content = f.read()
            f.close()

            self.send_response(200)
            self.send_header('Content-Length', len(content))
            self.end_headers()
            self.wfile.write(content)
            return

        elif subpath[1] == "done":
            self.send_response(200)
            self.send_header('Content-Length', 0)
            self.end_headers()

            if len(subpath) > 2:
                host = subpath[2]

                self.server.global_lock.acquire()
                cur_superstep = self.server.superstep_count
                if cur_superstep > 0:
                    self.server.timer.stop("superstep-%d-host-%d" % \
                            (self.server.superstep_count, int(host)))
                self.server.global_lock.release()

                self.server.done_lock.acquire()
                self.server.done.add(host)
                str_log = "host %s completed work" % (str(self.server.done))
                done_size = len(self.server.done)
                self.server.done_lock.release()
                logging.info(str_log)

                if done_size == len(self.config["hosts"]):

                    logging.info("all hosts completed")
                    # Fix possible concurrency issue with supervisor.py
                    if cur_superstep == 0:
                        time.sleep(5)

                    if self.server.snapshot_enabled:
                        self.server.global_lock.acquire()
                        self.server.snapshot_counter += 1
                        cmd = "./snapshot.sh %d" % (
                            self.server.snapshot_counter - 1)
                        self.server.global_lock.release()
                        logging.info(cmd)
                        os.system(cmd)

                    # initialize a set of ready servers,
                    # clear the continue indicator
                    self.server.ready_lock.acquire()
                    self.server.ready = set()
                    self.server.ready_lock.release()

                    self.server.iterate = False

                    # send a start message at the beginning
                    if not self.server.start:
                        self.server.start = True
                        self.server.executing = True
                        (starthost, starttask) = self.GetStartInfo(self.config)
                        s = "send __Start__ message for task %s to host %s" % (
                            starttask, starthost)
                        logging.debug(s)
                        client.message(starthost, "__Main__", starttask,
                                       "__Start__")

                    # send a step start command to all the hosts
                    hosts = self.config["hosts"]
                    master = "%s:%s" % (self.config["master"]["host"],
                                        self.config["master"]["port"])

                    logging.debug("hosts " + str(hosts))
                    for h in hosts:
                        logging.debug("send prepare to " + str(h))
                        self.Prepare(h)
                        logging.debug("done sending prepare to " + str(h))
            return

        elif subpath[1] == "ready":
            self.send_response(200)
            self.send_header('Content-Length', 0)
            self.end_headers()

            if len(subpath) > 2:
                host = subpath[2]

                # get the number of active tasks on the host
                numtasks = 0
                try:
                    numtasks = int(subpath[3])
                except:
                    pass

                # execute the next step, if there are active tasks
                if numtasks > 0:
                    self.server.iterate = True

                self.server.ready_lock.acquire()
                self.server.ready.add(host)
                str_log = "host %s ready" % (str(self.server.ready))
                ready_size = len(self.server.ready)
                self.server.ready_lock.release()
                logging.debug(str_log)

                if ready_size == len(self.config["hosts"]):

                    # stop the execution, if there are no more tasks to execute
                    if not self.server.iterate:
                        logging.info("all tasks completed")
                        self.server.executing = False
                        self.server.iterate = False
                        time.sleep(10)
                        self._quit(force=True)
                        return

                    logging.info("all hosts ready")

                    # initialize a set of done servers
                    self.server.done_lock.acquire()
                    self.server.done = set()
                    self.server.done_lock.release()

                    hosts = self.config["hosts"]
                    master = "%s:%s" % (self.config["master"]["host"],
                                        self.config["master"]["port"])

                    self.server.global_lock.acquire()
                    self.server.superstep_count += 1
                    for h in hosts:
                        h_id = int(h['id'])
                        self.server.timer.start("superstep-%d-host-%d" % \
                                (self.server.superstep_count, h_id))
                    self.server.global_lock.release()

                    # send a step start command to all the hosts
                    # TODO: create a thread for this step
                    for h in hosts:
                        logging.info("send next step to " + str(h))
                        self.StartStep(h)
            return

        elif subpath[1] == "error":

            self.send_response(200)
            self.send_header('Content-Length', 0)
            self.end_headers()

            if len(subpath) > 3:
                src_host = subpath[2]
                encoded_msg = subpath[3]
                msg_dict = urlparse.parse_qs(encoded_msg)
                logging.critical("Error msg from supervisor %s: %s" % \
                        (src_host, msg_dict['msg']))
                logging.critical("Terminating master now")
                self._quit(force=True)

            return

        self.send_response(200)
        self.end_headers()
        self.wfile.write(message)
        return