def GetStalloEngineCount(): controllerHost = DefaultControllerHost controllerPort = DefaultControllerPort #Create connection to stallo rc = kernel.RemoteController((controllerHost, controllerPort)) return rc.getIDs()
def __init__(self, site): self.tc = kernel.TaskController(('127.0.0.1', 10113)) self.rc = kernel.RemoteController(('127.0.0.1', 10105)) self.rc.execute('all', fetchParse) self.allLinks = [] self.linksWorking = {} self.linksDone = {} self.site = site
def connect(self): try: self.rc = kernel.RemoteController((self.contHost, self.rc_port)) self.tc = kernel.TaskController((self.contHost, self.task_port)) # test if the cluster is really running self.rc.execute(0, 'pass') self.__running = True return 0 except: print "Your cluster is NOT running." return 1
def main(): parser = OptionParser() parser.set_defaults(n=100) parser.set_defaults(tmin=1) parser.set_defaults(tmax=60) parser.set_defaults(controller='localhost') parser.set_defaults(meport=10105) parser.set_defaults(tport=10113) parser.add_option("-n", type='int', dest='n', help='the number of tasks to run') parser.add_option("-t", type='float', dest='tmin', help='the minimum task length in seconds') parser.add_option("-T", type='float', dest='tmax', help='the maximum task length in seconds') parser.add_option("-c", type='string', dest='controller', help='the address of the controller') parser.add_option("-p", type='int', dest='meport', help="the port on which the controller listens for the MultiEngine/RemoteController client") parser.add_option("-P", type='int', dest='tport', help="the port on which the controller listens for the TaskController client") (opts, args) = parser.parse_args() assert opts.tmax >= opts.tmin, "tmax must not be smaller than tmin" rc = kernel.RemoteController((opts.controller, opts.meport)) tc = kernel.TaskController((opts.controller, opts.tport)) rc.block=True nengines = len(rc.getIDs()) rc.executeAll('from IPython.genutils import time') # the jobs should take a random time within a range times = [random.random()*(opts.tmax-opts.tmin)+opts.tmin for i in range(opts.n)] tasks = [kernel.Task("time.sleep(%f)"%t) for t in times] stime = sum(times) print "executing %i tasks, totalling %.1f secs on %i engines"%(opts.n, stime, nengines) time.sleep(1) start = time.time() taskIDs = [tc.run(t) for t in tasks] tc.barrier(taskIDs) stop = time.time() ptime = stop-start scale = stime/ptime print "executed %.1f secs in %.1f secs"%(stime, ptime) print "%.3fx parallel performance on %i engines"%(scale, nengines) print "%.1f%% of theoretical max"%(100*scale/nengines)
def __startEngines(self, dt=None, verbose=True): if dt is None: dt = self.dt if not self.ncluster: if verbose: print 'Starting engines: ' self.nodecount = 0 for engineHost, numEngines in self.engines.iteritems(): if verbose: print ' Starting %d engine(s) on %s' % (numEngines, engineHost) engLog = '%s-eng-%s-' % (self.logfile, engineHost) self.nodecount += numEngines for i in range(numEngines): cmd = "ssh -x %s '%s' 'ipengine --controller-ip=%s --controller-port=%s --logfile=%s' &" % \ (engineHost,self.sshx,self.contHost,self.engine_port,engLog) os.system(cmd) else: #ncluster engLog = '%s-eng' % (self.logfile) self.nodecount = int( os.popen('cat $PBS_NODEFILE|wc -l').read()[:-1]) print 'nodecount:', self.nodecount if verbose: print 'Starting', self.nodecount, ' engines...' #cmd = "mpiexec ipengine --controller-ip=%s --controller-port=%s --mpi=mpi4py &" % \ # (self.contHost, self.engine_port) cmd = "mpiexec -nostdout ipengine --controller-ip=%s --controller-port=%s --mpi=mpi4py --logfile=%s &" % \ (self.contHost, self.engine_port, engLog) print 'cmd:', cmd os.system(cmd) time.sleep(2.0) self.rc = kernel.RemoteController((self.contHost, self.rc_port)) wait_time = 0 while (len(self.rc.getIDs()) < self.nodecount and wait_time <= self.max_wait_time): print 'waiting...' time.sleep(5.0) wait_time += 5 print 'started', len(self.rc.getIDs()), 'engines' if wait_time > self.max_wait_time: raise Exception('TIMEOUT: not all started!!!!!') time.sleep(dt)
def SubmitDelayScanIPython1(**args): delayList = args["delayList"] outputfile = args["outputfile"] molecule = args["molecule"] controllerHost = DefaultControllerHost controllerPort = DefaultControllerPort if "controllerHost" in args: controllerHost = args["controllerHost"] if "controllerPort" in args: controllerPort = args["controllerPort"] #Create connection to stallo print "Connecting to ipython1 controller..." rc = kernel.RemoteController((controllerHost, controllerPort)) partitionCount = len(rc.getIDs()) if partitionCount == 0: raise Exception("No engines connected to controller @ stallo.") #Make sure pyprop is loaded print "Loading pyprop..." rc.executeAll('import os') rc.executeAll('os.environ["PYPROP_SINGLEPROC"] = "1"') rc.executeAll('execfile("example.py")') #scatter delay list print "Distributing jobs..." rc.scatterAll("delayList", delayList) rc.scatterAll("partitionId", r_[:partitionCount]) rc.pushAll(args=args) #run print "Running jobs..." rc.executeAll('args["delayList"] = delayList') rc.executeAll('args["outputfile"] = args["outputfile"] % partitionId[0]') rc.executeAll('RunDelayScan(**args)') #gather all files into one print "Gathering all outputfiles into one..." filenames = [outputfile % i for i in range(partitionCount)] combinedFile = outputfile.replace("%i", "all") rc.push(1, filenames=filenames, combinedFile=combinedFile) rc.execute(1, 'ConcatenateHDF5(filenames, combinedFile)') #remove all single proc files rc.executeAll('os.unlink(args["outputfile"])') print "Done."
def __init__(self, site): self.rc = kernel.TaskController(('127.0.0.1', 10113)) self.ipc = kernel.RemoteController(('127.0.0.1', 10105)) assert isinstance( self.rc, ipython1.kernel.taskxmlrpc.XMLRPCInteractiveTaskClient) assert isinstance( self.ipc, ipython1.kernel.multienginexmlrpc. XMLRPCInteractiveMultiEngineClient) self.ipc.execute('all', fetchParse) self.allLinks = [] self.linksWorking = {} self.linksDone = {} self.site = site
def test(): ipc = kernel.RemoteController(('127.0.0.1', 10105)) print ipc.getIDs() print ipc.addr ipc.executeAll('import time') foobar = ipc.executeAll('print time.ctime(time.time())') print foobar ipc.executeAll('from papers import lu06yng as lu') print ipc.executeAll('print lu.root') ipc.executeAll('import healpix') ipc.executeAll('from numarrray import *') nodeIDs = ipc.getIDs() ipc.executeAll('pixIdx = arange(12288, type=Int)') ipc.executeAll('(ipix, opix) = healpix.pix2ang_ring(32, pixIdx)') ipix = ipc.gatherAll('ipix') print ipix.shape
def remote_controller(self): return kernel.RemoteController((self.instances[0].public_dns_name, kernel.defaultRemoteController[1]))
""" A Distributed Hello world Ken Kinder <*****@*****.**> """ import ipython1.kernel.api as kernel import ipython1.kernel.multienginexmlrpc import ipython1.kernel.taskxmlrpc rc = kernel.TaskController(('127.0.0.1', 10113)) ipc = kernel.RemoteController(('127.0.0.1', 10105)) assert isinstance(rc, ipython1.kernel.taskxmlrpc.XMLRPCInteractiveTaskClient) assert isinstance( ipc, ipython1.kernel.multienginexmlrpc.XMLRPCInteractiveMultiEngineClient) ipc.execute('all', 'import time') helloTaskId = rc.run( kernel.Task('time.sleep(3) ; word = "Hello,"', resultNames=['word'])) worldTaskId = rc.run( kernel.Task('time.sleep(3) ; word = "World!"', resultNames=['word'])) print rc.getTaskResult(helloTaskId)[1]['word'], rc.getTaskResult( worldTaskId)[1]['word']