def __init__(self, site): self.tc = kernel.TaskController(('127.0.0.1', 10113)) self.rc = kernel.RemoteController(('127.0.0.1', 10105)) self.rc.execute('all', fetchParse) self.allLinks = [] self.linksWorking = {} self.linksDone = {} self.site = site
def connect(self): try: self.rc = kernel.RemoteController((self.contHost, self.rc_port)) self.tc = kernel.TaskController((self.contHost, self.task_port)) # test if the cluster is really running self.rc.execute(0, 'pass') self.__running = True return 0 except: print "Your cluster is NOT running." return 1
def main(): parser = OptionParser() parser.set_defaults(n=100) parser.set_defaults(tmin=1) parser.set_defaults(tmax=60) parser.set_defaults(controller='localhost') parser.set_defaults(meport=10105) parser.set_defaults(tport=10113) parser.add_option("-n", type='int', dest='n', help='the number of tasks to run') parser.add_option("-t", type='float', dest='tmin', help='the minimum task length in seconds') parser.add_option("-T", type='float', dest='tmax', help='the maximum task length in seconds') parser.add_option("-c", type='string', dest='controller', help='the address of the controller') parser.add_option("-p", type='int', dest='meport', help="the port on which the controller listens for the MultiEngine/RemoteController client") parser.add_option("-P", type='int', dest='tport', help="the port on which the controller listens for the TaskController client") (opts, args) = parser.parse_args() assert opts.tmax >= opts.tmin, "tmax must not be smaller than tmin" rc = kernel.RemoteController((opts.controller, opts.meport)) tc = kernel.TaskController((opts.controller, opts.tport)) rc.block=True nengines = len(rc.getIDs()) rc.executeAll('from IPython.genutils import time') # the jobs should take a random time within a range times = [random.random()*(opts.tmax-opts.tmin)+opts.tmin for i in range(opts.n)] tasks = [kernel.Task("time.sleep(%f)"%t) for t in times] stime = sum(times) print "executing %i tasks, totalling %.1f secs on %i engines"%(opts.n, stime, nengines) time.sleep(1) start = time.time() taskIDs = [tc.run(t) for t in tasks] tc.barrier(taskIDs) stop = time.time() ptime = stop-start scale = stime/ptime print "executed %.1f secs in %.1f secs"%(stime, ptime) print "%.3fx parallel performance on %i engines"%(scale, nengines) print "%.1f%% of theoretical max"%(100*scale/nengines)
def __init__(self, tcserver=None): """Create a IPython1Controller instance. tcserver is the server and port of the Ipython1 TaskController. It should be of the form <ip>:<port>. (default is "127.0.0.1:10113"). """ if not ipy1kernel: print "IPython1 not found." return None self.tcserver = tcserver or config.get('ipython1.controller') self.tc = ipy1kernel.TaskController(tuple(self.tcserver.split(':')))
def __init__(self, site): self.rc = kernel.TaskController(('127.0.0.1', 10113)) self.ipc = kernel.RemoteController(('127.0.0.1', 10105)) assert isinstance( self.rc, ipython1.kernel.taskxmlrpc.XMLRPCInteractiveTaskClient) assert isinstance( self.ipc, ipython1.kernel.multienginexmlrpc. XMLRPCInteractiveMultiEngineClient) self.ipc.execute('all', fetchParse) self.allLinks = [] self.linksWorking = {} self.linksDone = {} self.site = site
def start(self, startmpds=False, dt=None, waitafter=0.0, verbose=True): if self.isRunning(): raise Exception("cluster already running") if dt is None: dt = self.dt self.__startController(verbose=verbose, dt=4.0) time.sleep(dt) if startmpds: self.__startMPDs(verbose=verbose) time.sleep(dt) try: self.__startEngines(verbose=verbose) time.sleep(dt) except Exception, inst: self.tc = kernel.TaskController((self.contHost, self.task_port)) self.rc.activate() self.__running = True raise inst
def task_controller(self): return kernel.TaskController((self.instances[0].public_dns_name, kernel.defaultTaskController[1]))
""" A Distributed Hello world Ken Kinder <*****@*****.**> """ import ipython1.kernel.api as kernel import ipython1.kernel.multienginexmlrpc import ipython1.kernel.taskxmlrpc rc = kernel.TaskController(('127.0.0.1', 10113)) ipc = kernel.RemoteController(('127.0.0.1', 10105)) assert isinstance(rc, ipython1.kernel.taskxmlrpc.XMLRPCInteractiveTaskClient) assert isinstance( ipc, ipython1.kernel.multienginexmlrpc.XMLRPCInteractiveMultiEngineClient) ipc.execute('all', 'import time') helloTaskId = rc.run( kernel.Task('time.sleep(3) ; word = "Hello,"', resultNames=['word'])) worldTaskId = rc.run( kernel.Task('time.sleep(3) ; word = "World!"', resultNames=['word'])) print rc.getTaskResult(helloTaskId)[1]['word'], rc.getTaskResult( worldTaskId)[1]['word']
class Cluster(object): def __init__(self, clusterConfig, dt=0.5, use_mpd=False): self.dt = dt self.use_mpd = use_mpd self.max_wait_time = 300 # read configuration self.sshx = clusterConfig.getSSHX() self.contHost = clusterConfig.getControllerHost() self.engine_port = clusterConfig.getEnginePort() self.rc_port = clusterConfig.getRemoteControllerPort() self.task_port = clusterConfig.getTaskControllerPort() self.engines = clusterConfig.getEngines() self.ncluster = clusterConfig.getNcluster() # setup logfile ipdir = cutils.getIpythonDir() logdir_base = os.path.join(ipdir, 'log') if not os.path.isdir(logdir_base): os.makedirs(logdir_base) logfile = os.path.join(logdir_base, 'ipcluster') self.logfile = '%s-%s' % (logfile, os.getpid()) self.__running = False def __startController(self, dt=None, verbose=True): if dt is None: dt = self.dt if verbose: print 'Starting controller:' print ' Starting controller on %s' % self.contHost contLog = '%s-con-%s-' % (self.logfile, self.contHost) if not self.ncluster: cmd = "ssh %s '%s' 'ipcontroller --engine-port=%s --remote-cont-port=%s --task-port=%s --logfile=%s' &" % \ (self.contHost,self.sshx,self.engine_port,self.rc_port,self.task_port,contLog) os.system(cmd) else: # on the ncluster import socket self.contHost = socket.gethostbyaddr(socket.gethostname())[2][0] self.contHost = os.popen('echo $HOSTNAME').read()[:-1] #self.task_port = 10113 #self.rc_port = 10105 cmd = "ipcontroller --engine-port=%s --remote-cont-port=%s --task-port=%s --logfile=%s &" % \ (self.engine_port, self.rc_port, self.task_port, contLog) print 'cmd:', cmd os.system(cmd) time.sleep(dt) def __startEngines(self, dt=None, verbose=True): if dt is None: dt = self.dt if not self.ncluster: if verbose: print 'Starting engines: ' self.nodecount = 0 for engineHost, numEngines in self.engines.iteritems(): if verbose: print ' Starting %d engine(s) on %s' % (numEngines, engineHost) engLog = '%s-eng-%s-' % (self.logfile, engineHost) self.nodecount += numEngines for i in range(numEngines): cmd = "ssh -x %s '%s' 'ipengine --controller-ip=%s --controller-port=%s --logfile=%s' &" % \ (engineHost,self.sshx,self.contHost,self.engine_port,engLog) os.system(cmd) else: #ncluster engLog = '%s-eng' % (self.logfile) self.nodecount = int( os.popen('cat $PBS_NODEFILE|wc -l').read()[:-1]) print 'nodecount:', self.nodecount if verbose: print 'Starting', self.nodecount, ' engines...' #cmd = "mpiexec ipengine --controller-ip=%s --controller-port=%s --mpi=mpi4py &" % \ # (self.contHost, self.engine_port) cmd = "mpiexec -nostdout ipengine --controller-ip=%s --controller-port=%s --mpi=mpi4py --logfile=%s &" % \ (self.contHost, self.engine_port, engLog) print 'cmd:', cmd os.system(cmd) time.sleep(2.0) self.rc = kernel.RemoteController((self.contHost, self.rc_port)) wait_time = 0 while (len(self.rc.getIDs()) < self.nodecount and wait_time <= self.max_wait_time): print 'waiting...' time.sleep(5.0) wait_time += 5 print 'started', len(self.rc.getIDs()), 'engines' if wait_time > self.max_wait_time: raise Exception('TIMEOUT: not all started!!!!!') time.sleep(dt) def __startMPDs(self, dt=None, verbose=True): if dt is None: dt = self.dt if verbose: print 'Starting mpds: ' for engineHost in self.engines.iterkeys(): print ' Starting mpd on %s' % engineHost cmd = "ssh %s 'mpd' &" % (engineHost) os.system(cmd) time.sleep(dt) def start(self, startmpds=False, dt=None, waitafter=0.0, verbose=True): if self.isRunning(): raise Exception("cluster already running") if dt is None: dt = self.dt self.__startController(verbose=verbose, dt=4.0) time.sleep(dt) if startmpds: self.__startMPDs(verbose=verbose) time.sleep(dt) try: self.__startEngines(verbose=verbose) time.sleep(dt) except Exception, inst: self.tc = kernel.TaskController((self.contHost, self.task_port)) self.rc.activate() self.__running = True raise inst self.tc = kernel.TaskController((self.contHost, self.task_port)) self.rc.activate() if waitafter > 0.0: time.sleep(waitafter) print "Your cluster is up and running." self.__running = True