def Send(self, dstid, d, channel = "1", swsnap = False): #dstnum = dstid / self.range #dstname = self.target + "-" + str(dstnum) dstname = self.target[channel] + "-" + str(dstid) dsthostid = self.tasks.get(dstname) dshost = self.hosts.get(dsthostid) self.log.debug('sending to %s (had id %d)' % (dshost, dstid)) if self.local: fname = self.GetOutName(dstname) if swsnap: if not gotsnap: self.log.error("Snap module is not available") sys.exit(2) # Snap vector if self.local: FOut = Snap.TFOut(Snap.TStr(fname)) d.Save(FOut) FOut.Flush() #print "send Snap task %s, host %s, *** Error: local 'Send' not yet implemented" % (dstname, dshost) return client.messagevec(dshost,self.taskname,dstname,d) return else: # json dict s = json.dumps(d) # print "send task %s, host %s, msg %s" % (dstname, dshost, s) if self.local: f = open(fname,"w") f.write(s) f.close() return client.message(dshost,self.taskname,dstname,s)
def Send(self, dstid, d, channel="1", swsnap=False): #dstnum = dstid / self.range #dstname = self.target + "-" + str(dstnum) dstname = self.target[channel] + "-" + str(dstid) dsthostid = self.tasks.get(dstname) dshost = self.hosts.get(dsthostid) self.log.debug('sending to %s (had id %d)' % (dshost, dstid)) if self.local: fname = self.GetOutName(dstname) if swsnap: if not gotsnap: self.log.error("Snap module is not available") sys.exit(2) # Snap vector if self.local: FOut = Snap.TFOut(Snap.TStr(fname)) d.Save(FOut) FOut.Flush() #print "send Snap task %s, host %s, *** Error: local 'Send' not yet implemented" % (dstname, dshost) return client.messagevec(dshost, self.taskname, dstname, d) return else: # json dict s = json.dumps(d) # print "send task %s, host %s, msg %s" % (dstname, dshost, s) if self.local: f = open(fname, "w") f.write(s) f.close() return client.message(dshost, self.taskname, dstname, s)
def do_GET(self): #print "GET path", self.path parsed_path = urlparse.urlparse(self.path) message_parts = [ 'CLIENT VALUES:', 'client_address=%s (%s)' % (self.client_address, self.address_string()), 'command=%s' % self.command, 'path=%s' % self.path, 'real path=%s' % parsed_path.path, 'query=%s' % parsed_path.query, 'request_version=%s' % self.request_version, '', 'SERVER VALUES:', 'server_type=%s' % "head server", 'server_version=%s' % self.server_version, 'sys_version=%s' % self.sys_version, 'protocol_version=%s' % self.protocol_version, '', 'HEADERS RECEIVED:', ] #print parsed_path for name, value in sorted(self.headers.items()): message_parts.append('%s=%s' % (name, value.rstrip())) message_parts.append('') message = '\r\n'.join(message_parts) #print "message", message subpath = self.path.split("/") #print "subpath", subpath command = parsed_path.path #print "command", command dargs = dict(urlparse.parse_qsl(parsed_path.query)) #print "dargs", dargs if self.path == "/start": print "starting host servers " master = self.config["master"] hosts = self.config["hosts"] for h in hosts: self.StartHostServer(h, master) elif self.path == "/quit": print "terminating host servers" master = self.config["master"] hosts = self.config["hosts"] for h in hosts: self.QuitHostServer(h) self.send_response(200) self.send_header('Content-Length', 0) self.end_headers() # set the flag to terminate the server self.server.running = False self.server.self_dummy() return elif self.path == "/dummy": print "dummy request" self.send_response(200) self.send_header('Content-Length', 0) self.end_headers() return elif self.path == "/config": print "get configuration" body = simplejson.dumps(self.config) self.send_response(200) self.send_header('Content-Length', len(body)) self.end_headers() self.wfile.write(body) return elif command == "/exec": pname = dargs.get("p") ptime = 0 try: ptime = int(dargs.get("t")) except: pass print "get executable", pname, ptime stat = os.stat(pname) mtime = int(stat.st_mtime) swnew = False if mtime > ptime: swnew = True print "stat", pname, ptime, mtime, "NEW" if swnew else "OLD" if not swnew: # the file has not changed self.send_response(304) self.send_header('Content-Length', 0) self.end_headers() return f = open(pname) content = f.read() f.close() self.send_response(200) self.send_header('Content-Length', len(content)) self.end_headers() self.wfile.write(content) return elif subpath[1] == "done": self.send_response(200) #self.send_header('Last-Modified', self.date_time_string(time.time())) self.send_header('Content-Length', 0) self.end_headers() if len(subpath) > 2: host = subpath[2] # TODO make this update thread safe, which it is not now self.server.done.add(host) print "host %s completed work" % (str(self.server.done)) if len(self.server.done) == len(self.config["hosts"]): print "all hosts completed" #time.sleep(5) # initialize a set of ready servers, # clear the continue indicator self.server.ready = set() self.server.iterate = False # send a start message at the beginning if not self.server.start: self.server.start = True self.server.executing = True (starthost, starttask) = self.GetStartInfo(self.config) s = "send __Start__ message for task %s to host %s" % ( starttask, starthost) print s client.message(starthost,"__Main__",starttask,"__Start__") # send a step start command to all the hosts hosts = self.config["hosts"] master = "%s:%s" % ( self.config["master"]["host"], self.config["master"]["port"]) print "hosts", hosts for h in hosts: print "send prepare to", h sys.stdout.flush() self.Prepare(h) print "done sending prepare to", h return elif subpath[1] == "ready": self.send_response(200) #self.send_header('Last-Modified', self.date_time_string(time.time())) self.send_header('Content-Length', 0) self.end_headers() if len(subpath) > 2: host = subpath[2] # TODO make this update thread safe, which it is not now self.server.ready.add(host) # get the number of active tasks on the host numtasks = 0 try: numtasks = int(subpath[3]) except: pass # execute the next step, if there are active tasks if numtasks > 0: self.server.iterate = True print "host %s ready" % (str(self.server.ready)) if len(self.server.ready) == len(self.config["hosts"]): # stop the execution, if there are no more tasks to execute if not self.server.iterate: print "all tasks completed" self.server.executing = False self.server.iterate = False return print "all hosts ready" #time.sleep(5) # initialize a set of done servers self.server.done = set() # send a step start command to all the hosts hosts = self.config["hosts"] master = "%s:%s" % ( self.config["master"]["host"], self.config["master"]["port"]) # TODO, create a thread for this step for h in hosts: print "send next step to", h self.StartStep(h) return self.send_response(200) #self.send_header('Last-Modified', self.date_time_string(time.time())) self.end_headers() self.wfile.write(message) return
def do_GET(self): parsed_path = urlparse.urlparse(self.path) message_parts = [ 'CLIENT VALUES:', 'client_address=%s (%s)' % (self.client_address, self.address_string()), 'command=%s' % self.command, 'path=%s' % self.path, 'real path=%s' % parsed_path.path, 'query=%s' % parsed_path.query, 'request_version=%s' % self.request_version, '', 'SERVER VALUES:', 'server_type=%s' % "head server", 'server_version=%s' % self.server_version, 'sys_version=%s' % self.sys_version, 'protocol_version=%s' % self.protocol_version, '', 'HEADERS RECEIVED:', ] for name, value in sorted(self.headers.items()): message_parts.append('%s=%s' % (name, value.rstrip())) message_parts.append('') message = '\r\n'.join(message_parts) subpath = self.path.split("/") command = parsed_path.path dargs = dict(urlparse.parse_qsl(parsed_path.query)) if self.path == "/start": logging.info("starting host servers") self.server.timer.start("master") self.server.superstep_count = 0 self.server.snapshot_counter = 0 master = self.config["master"] hosts = self.config["hosts"] for h in hosts: self.StartHostServer(h, master) elif self.path == "/quit": self._quit() return elif self.path == "/getkv": logging.debug("getting kv file") self.send_response(200) if self.server.superstep_count > 1: if not self.server.executing and not self.server.iterate: # We're done computing everything. So let LS know # that this is the final copy of the k-v file. body = json.dumps(get_kv_file("master")) self.send_header('Content-Length', len(body) + 5) self.end_headers() self.wfile.write("DONE " + body) return body = json.dumps(get_kv_file("master")) self.send_header('Content-Length', len(body)) self.end_headers() self.wfile.write(body) else: self.send_header('Content-Length', len("None")) self.end_headers() self.wfile.write("None") return elif self.path == "/dummy": logging.debug("dummy request") self.send_response(200) self.send_header('Content-Length', 0) self.end_headers() return elif self.path == "/config": logging.debug("get configuration") body = json.dumps(self.config) self.send_response(200) self.send_header('Content-Length', len(body)) self.end_headers() self.wfile.write(body) return elif command == "/exec": pname = dargs.get("p") ptime = 0 try: ptime = int(dargs.get("t")) except: pass # logging.debug("get executable: " + str(pname) + " " + str(ptime)) stat = os.stat(pname) mtime = int(stat.st_mtime) swnew = False if mtime > ptime: swnew = True # logging.debug("stat " + str(pname) + " " + str(ptime) + " " + str(mtime) + " " + str("NEW" if swnew else "OLD")) if not swnew: # the file has not changed self.send_response(304) self.send_header('Content-Length', 0) self.end_headers() return f = open(pname) content = f.read() f.close() self.send_response(200) self.send_header('Content-Length', len(content)) self.end_headers() self.wfile.write(content) return elif subpath[1] == "done": self.send_response(200) self.send_header('Content-Length', 0) self.end_headers() if len(subpath) > 2: host = subpath[2] self.server.global_lock.acquire() cur_superstep = self.server.superstep_count if cur_superstep > 0: self.server.timer.stop("superstep-%d-host-%d" % \ (self.server.superstep_count, int(host))) self.server.global_lock.release() self.server.done_lock.acquire() self.server.done.add(host) str_log = "host %s completed work" % (str(self.server.done)) done_size = len(self.server.done) self.server.done_lock.release() logging.info(str_log) if done_size == len(self.config["hosts"]): logging.info("all hosts completed") # Fix possible concurrency issue with supervisor.py if cur_superstep == 0: time.sleep(5) if self.server.snapshot_enabled: self.server.global_lock.acquire() self.server.snapshot_counter += 1 cmd = "./snapshot.sh %d" % (self.server.snapshot_counter - 1) self.server.global_lock.release() logging.info(cmd) os.system(cmd) # initialize a set of ready servers, # clear the continue indicator self.server.ready_lock.acquire() self.server.ready = set() self.server.ready_lock.release() self.server.iterate = False # send a start message at the beginning if not self.server.start: self.server.start = True self.server.executing = True (starthost, starttask) = self.GetStartInfo(self.config) s = "send __Start__ message for task %s to host %s" % ( starttask, starthost) logging.debug(s) client.message(starthost,"__Main__",starttask,"__Start__") # send a step start command to all the hosts hosts = self.config["hosts"] master = "%s:%s" % ( self.config["master"]["host"], self.config["master"]["port"]) logging.debug("hosts " + str(hosts)) for h in hosts: logging.debug("send prepare to " + str(h)) self.Prepare(h) logging.debug("done sending prepare to " + str(h)) return elif subpath[1] == "ready": self.send_response(200) self.send_header('Content-Length', 0) self.end_headers() if len(subpath) > 2: host = subpath[2] # get the number of active tasks on the host numtasks = 0 try: numtasks = int(subpath[3]) except: pass # execute the next step, if there are active tasks if numtasks > 0: self.server.iterate = True self.server.ready_lock.acquire() self.server.ready.add(host) str_log = "host %s ready" % (str(self.server.ready)) ready_size = len(self.server.ready) self.server.ready_lock.release() logging.debug(str_log) if ready_size == len(self.config["hosts"]): # stop the execution, if there are no more tasks to execute if not self.server.iterate: logging.info("all tasks completed") self.server.executing = False self.server.iterate = False time.sleep(10) self._quit(force=True) return logging.info("all hosts ready") # initialize a set of done servers self.server.done_lock.acquire() self.server.done = set() self.server.done_lock.release() hosts = self.config["hosts"] master = "%s:%s" % ( self.config["master"]["host"], self.config["master"]["port"]) self.server.global_lock.acquire() self.server.superstep_count += 1 for h in hosts: h_id = int(h['id']) self.server.timer.start("superstep-%d-host-%d" % \ (self.server.superstep_count, h_id)) self.server.global_lock.release() # send a step start command to all the hosts # TODO: create a thread for this step for h in hosts: logging.info("send next step to " + str(h)) self.StartStep(h) return elif subpath[1] == "error": self.send_response(200) self.send_header('Content-Length', 0) self.end_headers() if len(subpath) > 3: src_host = subpath[2] encoded_msg = subpath[3] msg_dict = urlparse.parse_qs(encoded_msg) logging.critical("Error msg from supervisor %s: %s" % \ (src_host, msg_dict['msg'])) logging.critical("Terminating master now") self._quit(force=True) return self.send_response(200) self.end_headers() self.wfile.write(message) return
def do_GET(self): #print "GET path", self.path parsed_path = urlparse.urlparse(self.path) message_parts = [ 'CLIENT VALUES:', 'client_address=%s (%s)' % (self.client_address, self.address_string()), 'command=%s' % self.command, 'path=%s' % self.path, 'real path=%s' % parsed_path.path, 'query=%s' % parsed_path.query, 'request_version=%s' % self.request_version, '', 'SERVER VALUES:', 'server_type=%s' % "head server", 'server_version=%s' % self.server_version, 'sys_version=%s' % self.sys_version, 'protocol_version=%s' % self.protocol_version, '', 'HEADERS RECEIVED:', ] #print parsed_path for name, value in sorted(self.headers.items()): message_parts.append('%s=%s' % (name, value.rstrip())) message_parts.append('') message = '\r\n'.join(message_parts) #print "message", message subpath = self.path.split("/") #print "subpath", subpath command = parsed_path.path #print "command", command dargs = dict(urlparse.parse_qsl(parsed_path.query)) #print "dargs", dargs if self.path == "/start": print "starting host servers " master = self.config["master"] hosts = self.config["hosts"] for h in hosts: self.StartHostServer(h, master) elif self.path == "/quit": print "terminating host servers" master = self.config["master"] hosts = self.config["hosts"] for h in hosts: self.QuitHostServer(h) self.send_response(200) self.send_header('Content-Length', 0) self.end_headers() # set the flag to terminate the server self.server.running = False self.server.self_dummy() return elif self.path == "/dummy": print "dummy request" self.send_response(200) self.send_header('Content-Length', 0) self.end_headers() return elif self.path == "/config": print "get configuration" body = simplejson.dumps(self.config) self.send_response(200) self.send_header('Content-Length', len(body)) self.end_headers() self.wfile.write(body) return elif command == "/exec": pname = dargs.get("p") ptime = 0 try: ptime = int(dargs.get("t")) except: pass print "get executable", pname, ptime stat = os.stat(pname) mtime = int(stat.st_mtime) swnew = False if mtime > ptime: swnew = True print "stat", pname, ptime, mtime, "NEW" if swnew else "OLD" if not swnew: # the file has not changed self.send_response(304) self.send_header('Content-Length', 0) self.end_headers() return f = open(pname) content = f.read() f.close() self.send_response(200) self.send_header('Content-Length', len(content)) self.end_headers() self.wfile.write(content) return elif subpath[1] == "done": self.send_response(200) #self.send_header('Last-Modified', self.date_time_string(time.time())) self.send_header('Content-Length', 0) self.end_headers() if len(subpath) > 2: host = subpath[2] # TODO make this update thread safe, which it is not now self.server.done.add(host) print "host %s completed work" % (str(self.server.done)) if len(self.server.done) == len(self.config["hosts"]): print "all hosts completed" #time.sleep(5) # initialize a set of ready servers, # clear the continue indicator self.server.ready = set() self.server.iterate = False # send a start message at the beginning if not self.server.start: self.server.start = True self.server.executing = True (starthost, starttask) = self.GetStartInfo(self.config) s = "send __Start__ message for task %s to host %s" % ( starttask, starthost) print s client.message(starthost, "__Main__", starttask, "__Start__") # send a step start command to all the hosts hosts = self.config["hosts"] master = "%s:%s" % (self.config["master"]["host"], self.config["master"]["port"]) print "hosts", hosts for h in hosts: print "send prepare to", h sys.stdout.flush() self.Prepare(h) print "done sending prepare to", h return elif subpath[1] == "ready": self.send_response(200) #self.send_header('Last-Modified', self.date_time_string(time.time())) self.send_header('Content-Length', 0) self.end_headers() if len(subpath) > 2: host = subpath[2] # TODO make this update thread safe, which it is not now self.server.ready.add(host) # get the number of active tasks on the host numtasks = 0 try: numtasks = int(subpath[3]) except: pass # execute the next step, if there are active tasks if numtasks > 0: self.server.iterate = True print "host %s ready" % (str(self.server.ready)) if len(self.server.ready) == len(self.config["hosts"]): # stop the execution, if there are no more tasks to execute if not self.server.iterate: print "all tasks completed" self.server.executing = False self.server.iterate = False return print "all hosts ready" #time.sleep(5) # initialize a set of done servers self.server.done = set() # send a step start command to all the hosts hosts = self.config["hosts"] master = "%s:%s" % (self.config["master"]["host"], self.config["master"]["port"]) # TODO, create a thread for this step for h in hosts: print "send next step to", h self.StartStep(h) return self.send_response(200) #self.send_header('Last-Modified', self.date_time_string(time.time())) self.end_headers() self.wfile.write(message) return
def do_GET(self): parsed_path = urlparse.urlparse(self.path) message_parts = [ 'CLIENT VALUES:', 'client_address=%s (%s)' % (self.client_address, self.address_string()), 'command=%s' % self.command, 'path=%s' % self.path, 'real path=%s' % parsed_path.path, 'query=%s' % parsed_path.query, 'request_version=%s' % self.request_version, '', 'SERVER VALUES:', 'server_type=%s' % "head server", 'server_version=%s' % self.server_version, 'sys_version=%s' % self.sys_version, 'protocol_version=%s' % self.protocol_version, '', 'HEADERS RECEIVED:', ] for name, value in sorted(self.headers.items()): message_parts.append('%s=%s' % (name, value.rstrip())) message_parts.append('') message = '\r\n'.join(message_parts) subpath = self.path.split("/") command = parsed_path.path dargs = dict(urlparse.parse_qsl(parsed_path.query)) if self.path == "/start": logging.info("starting host servers") self.server.timer.start("master") self.server.superstep_count = 0 self.server.snapshot_counter = 0 master = self.config["master"] hosts = self.config["hosts"] for h in hosts: self.StartHostServer(h, master) elif self.path == "/quit": self._quit() return elif self.path == "/getkv": logging.debug("getting kv file") self.send_response(200) if self.server.superstep_count > 1: if not self.server.executing and not self.server.iterate: # We're done computing everything. So let LS know # that this is the final copy of the k-v file. body = json.dumps(get_kv_file("master")) self.send_header('Content-Length', len(body) + 5) self.end_headers() self.wfile.write("DONE " + body) return body = json.dumps(get_kv_file("master")) self.send_header('Content-Length', len(body)) self.end_headers() self.wfile.write(body) else: self.send_header('Content-Length', len("None")) self.end_headers() self.wfile.write("None") return elif self.path == "/dummy": logging.debug("dummy request") self.send_response(200) self.send_header('Content-Length', 0) self.end_headers() return elif self.path == "/config": logging.debug("get configuration") body = json.dumps(self.config) self.send_response(200) self.send_header('Content-Length', len(body)) self.end_headers() self.wfile.write(body) return elif command == "/exec": pname = dargs.get("p") ptime = 0 try: ptime = int(dargs.get("t")) except: pass # logging.debug("get executable: " + str(pname) + " " + str(ptime)) stat = os.stat(pname) mtime = int(stat.st_mtime) swnew = False if mtime > ptime: swnew = True # logging.debug("stat " + str(pname) + " " + str(ptime) + " " + str(mtime) + " " + str("NEW" if swnew else "OLD")) if not swnew: # the file has not changed self.send_response(304) self.send_header('Content-Length', 0) self.end_headers() return f = open(pname) content = f.read() f.close() self.send_response(200) self.send_header('Content-Length', len(content)) self.end_headers() self.wfile.write(content) return elif subpath[1] == "done": self.send_response(200) self.send_header('Content-Length', 0) self.end_headers() if len(subpath) > 2: host = subpath[2] self.server.global_lock.acquire() cur_superstep = self.server.superstep_count if cur_superstep > 0: self.server.timer.stop("superstep-%d-host-%d" % \ (self.server.superstep_count, int(host))) self.server.global_lock.release() self.server.done_lock.acquire() self.server.done.add(host) str_log = "host %s completed work" % (str(self.server.done)) done_size = len(self.server.done) self.server.done_lock.release() logging.info(str_log) if done_size == len(self.config["hosts"]): logging.info("all hosts completed") # Fix possible concurrency issue with supervisor.py if cur_superstep == 0: time.sleep(5) if self.server.snapshot_enabled: self.server.global_lock.acquire() self.server.snapshot_counter += 1 cmd = "./snapshot.sh %d" % ( self.server.snapshot_counter - 1) self.server.global_lock.release() logging.info(cmd) os.system(cmd) # initialize a set of ready servers, # clear the continue indicator self.server.ready_lock.acquire() self.server.ready = set() self.server.ready_lock.release() self.server.iterate = False # send a start message at the beginning if not self.server.start: self.server.start = True self.server.executing = True (starthost, starttask) = self.GetStartInfo(self.config) s = "send __Start__ message for task %s to host %s" % ( starttask, starthost) logging.debug(s) client.message(starthost, "__Main__", starttask, "__Start__") # send a step start command to all the hosts hosts = self.config["hosts"] master = "%s:%s" % (self.config["master"]["host"], self.config["master"]["port"]) logging.debug("hosts " + str(hosts)) for h in hosts: logging.debug("send prepare to " + str(h)) self.Prepare(h) logging.debug("done sending prepare to " + str(h)) return elif subpath[1] == "ready": self.send_response(200) self.send_header('Content-Length', 0) self.end_headers() if len(subpath) > 2: host = subpath[2] # get the number of active tasks on the host numtasks = 0 try: numtasks = int(subpath[3]) except: pass # execute the next step, if there are active tasks if numtasks > 0: self.server.iterate = True self.server.ready_lock.acquire() self.server.ready.add(host) str_log = "host %s ready" % (str(self.server.ready)) ready_size = len(self.server.ready) self.server.ready_lock.release() logging.debug(str_log) if ready_size == len(self.config["hosts"]): # stop the execution, if there are no more tasks to execute if not self.server.iterate: logging.info("all tasks completed") self.server.executing = False self.server.iterate = False time.sleep(10) self._quit(force=True) return logging.info("all hosts ready") # initialize a set of done servers self.server.done_lock.acquire() self.server.done = set() self.server.done_lock.release() hosts = self.config["hosts"] master = "%s:%s" % (self.config["master"]["host"], self.config["master"]["port"]) self.server.global_lock.acquire() self.server.superstep_count += 1 for h in hosts: h_id = int(h['id']) self.server.timer.start("superstep-%d-host-%d" % \ (self.server.superstep_count, h_id)) self.server.global_lock.release() # send a step start command to all the hosts # TODO: create a thread for this step for h in hosts: logging.info("send next step to " + str(h)) self.StartStep(h) return elif subpath[1] == "error": self.send_response(200) self.send_header('Content-Length', 0) self.end_headers() if len(subpath) > 3: src_host = subpath[2] encoded_msg = subpath[3] msg_dict = urlparse.parse_qs(encoded_msg) logging.critical("Error msg from supervisor %s: %s" % \ (src_host, msg_dict['msg'])) logging.critical("Terminating master now") self._quit(force=True) return self.send_response(200) self.end_headers() self.wfile.write(message) return