default=myip) parser.add_option( "-e", "--eip", dest="eip", type="string", help= "external ip address to publish. if Not set will default to the same as -i option", default=None) parser.add_option( "-o", "--notifyport", dest="nport", type="int", help="port to bind to for notifications. default: random", default=None) parser.add_option("-f", "--infoport", dest="fport", type="int", help="port to bind to for information. default: random", default=None) options, args = nwperf.parseServerOptions() if not options.cluster: parser.error("No Cluster Specified") main(options.nameserver, options.cluster, options.ip, options.statefile, options.fport, options.nport, options.eip)
if request[0] == "jobinfo": info = status.jobInfo(request[1]) sock.send_json(info) if request[0] == "running": info = status.runningJobs() sock.send_json(info) if request[0] == "completed": info = status.completedJobs() sock.send_json(info) except: sock.send_json({}) print "done" else: ns.updateServices() status.checkUpdate() if __name__ == "__main__": myip = socket.gethostbyname(socket.gethostname()) parser = nwperf.defaultServerOptionParser() parser.add_option("-S","--name-server",dest="nameserver",type="string",help="The ZMQ URL of the nameserver to register with",default="tcp://nwperf-ns:6967") parser.add_option("-s","--state-file", help="File to store the current slurm state", dest="statefile", default="/tmp/slurmjobs.state") parser.add_option("-c","--cluster",dest="cluster",type="string",help="The cluster prefix to publish as",default=None) parser.add_option("-i", "--ip", dest="ip", type="string", help="ip address to bind to. default: %s" % myip, default=myip) options,args = nwperf.parseServerOptions() if not options.cluster: parser.error("No Cluster Specified") main(options.nameserver,options.cluster,options.ip,options.statefile)
def main(): parser = nwperf.defaultServerOptionParser() parser.add_option( "-c", "--cluster", action="store", type="string", dest="cluster", help="name of cluster to generate graphs for") parser.add_option( "-S", "--name-server", action="store", type="string", dest="nameserver", help="The ZMQ URL of the nameserver") parser.add_option( "-j", "--job-service", action="store", type="string", dest="jobservice", help="The service name that provides job information") parser.add_option( "-P", "--point-service", action="store", type="string", dest="pointservice", help="The service name that provides point data") (options, args) = nwperf.parseServerOptions() if not options.nameserver: parser.error("No name server specified") if not options.pointservice: parser.error("No point service specified") if not options.jobservice: parser.error("No job service specified") if not options.cluster: parser.error("No cluster specified") pointStore = MongoPointStore.MongoPointStore() jobStore = MongoJobStore.MongoJobStore() ns = nnslib.NameServer(options.nameserver) ctx = zmq.Context() q = multiprocessing.Queue() #if not options.generategraphs: children = [JobArchiveGenerator(pointStore, q, jobStore, extraFields={"cluster": options.cluster}) for i in range(8)] for child in children: child.start() psp = PointStoreProcess(pointStore, options.nameserver, options.pointservice) psp.start() sock = ctx.socket(zmq.SUB) sock.setsockopt(zmq.SUBSCRIBE, "JobEnd") ns.connectService(sock, options.jobservice) while True: poll = zmq.core.poll.Poller() poll.register(sock, zmq.POLLIN) res = poll.poll(1000) if res: job = sock.recv_multipart()[1] job = json.loads(job) job["Nodes"] = hostlist.expand_hostlist(job["NodeList"]) q.put(job) for i in range(len(children)): if not children[i].is_alive(): children[i].join() del(children[i]) newchild = JobArchiveGenerator(pointStore, q, jobStore, extraFields={"cluster": options.cluster}) newchild.start() children.append(newchild) try: ns.updateSocket(sock) except: sock.close() sock = ctx.socket(zmq.SUB) sock.setsockopt(zmq.SUBSCRIBE, "JobEnd") connected = False while not connected: try: ns.connectService(sock, options.jobservice) except: print "Error connecting to %s. Sleeping" % options.jobservice time.sleep(1) finally: connected = True poll.register(sock, zmq.POLLIN) if q.qsize() > 0: print "Queue size: %d" % q.qsize() if not psp.is_alive(): psp.join() psp = PointStoreProcess(pointStore, options.nameserver, options.pointservice)