예제 #1
0
def GetStalloEngineCount():
	controllerHost = DefaultControllerHost
	controllerPort = DefaultControllerPort

	#Create connection to stallo
	rc = kernel.RemoteController((controllerHost, controllerPort))
	return rc.getIDs()
예제 #2
0
    def __init__(self, site):
        self.tc = kernel.TaskController(('127.0.0.1', 10113))
        self.rc = kernel.RemoteController(('127.0.0.1', 10105))
        self.rc.execute('all', fetchParse)

        self.allLinks = []
        self.linksWorking = {}
        self.linksDone = {}

        self.site = site
예제 #3
0
    def connect(self):
        try:
            self.rc = kernel.RemoteController((self.contHost, self.rc_port))
            self.tc = kernel.TaskController((self.contHost, self.task_port))

            # test if the cluster is really running
            self.rc.execute(0, 'pass')

            self.__running = True
            return 0
        except:
            print "Your cluster is NOT running."
            return 1
예제 #4
0
def main():
    parser = OptionParser()
    parser.set_defaults(n=100)
    parser.set_defaults(tmin=1)
    parser.set_defaults(tmax=60)
    parser.set_defaults(controller='localhost')
    parser.set_defaults(meport=10105)
    parser.set_defaults(tport=10113)
    
    parser.add_option("-n", type='int', dest='n',
        help='the number of tasks to run')
    parser.add_option("-t", type='float', dest='tmin', 
        help='the minimum task length in seconds')
    parser.add_option("-T", type='float', dest='tmax',
        help='the maximum task length in seconds')
    parser.add_option("-c", type='string', dest='controller',
        help='the address of the controller')
    parser.add_option("-p", type='int', dest='meport',
        help="the port on which the controller listens for the MultiEngine/RemoteController client")
    parser.add_option("-P", type='int', dest='tport',
        help="the port on which the controller listens for the TaskController client")
    
    (opts, args) = parser.parse_args()
    assert opts.tmax >= opts.tmin, "tmax must not be smaller than tmin"
    
    rc = kernel.RemoteController((opts.controller, opts.meport))
    tc = kernel.TaskController((opts.controller, opts.tport))
    
    rc.block=True
    nengines = len(rc.getIDs())
    rc.executeAll('from IPython.genutils import time')

    # the jobs should take a random time within a range
    times = [random.random()*(opts.tmax-opts.tmin)+opts.tmin for i in range(opts.n)]
    tasks = [kernel.Task("time.sleep(%f)"%t) for t in times]
    stime = sum(times)
    
    print "executing %i tasks, totalling %.1f secs on %i engines"%(opts.n, stime, nengines)

    time.sleep(1)
    start = time.time()
    taskIDs = [tc.run(t) for t in tasks]
    tc.barrier(taskIDs)
    stop = time.time()

    ptime = stop-start
    scale = stime/ptime
    
    print "executed %.1f secs in %.1f secs"%(stime, ptime)
    print "%.3fx parallel performance on %i engines"%(scale, nengines)
    print "%.1f%% of theoretical max"%(100*scale/nengines)
예제 #5
0
    def __startEngines(self, dt=None, verbose=True):
        if dt is None: dt = self.dt

        if not self.ncluster:
            if verbose:
                print 'Starting engines:   '
            self.nodecount = 0

            for engineHost, numEngines in self.engines.iteritems():
                if verbose:
                    print '  Starting %d engine(s) on %s' % (numEngines,
                                                             engineHost)
                engLog = '%s-eng-%s-' % (self.logfile, engineHost)

                self.nodecount += numEngines
                for i in range(numEngines):
                    cmd = "ssh -x %s '%s' 'ipengine --controller-ip=%s --controller-port=%s --logfile=%s' &" % \
                              (engineHost,self.sshx,self.contHost,self.engine_port,engLog)
                    os.system(cmd)
        else:
            #ncluster
            engLog = '%s-eng' % (self.logfile)

            self.nodecount = int(
                os.popen('cat $PBS_NODEFILE|wc -l').read()[:-1])
            print 'nodecount:', self.nodecount
            if verbose:
                print 'Starting', self.nodecount, ' engines...'
            #cmd = "mpiexec ipengine --controller-ip=%s --controller-port=%s --mpi=mpi4py &" % \
            #           (self.contHost, self.engine_port)
            cmd = "mpiexec -nostdout ipengine --controller-ip=%s --controller-port=%s --mpi=mpi4py --logfile=%s &" % \
                       (self.contHost, self.engine_port, engLog)
            print 'cmd:', cmd
            os.system(cmd)

        time.sleep(2.0)
        self.rc = kernel.RemoteController((self.contHost, self.rc_port))
        wait_time = 0
        while (len(self.rc.getIDs()) < self.nodecount
               and wait_time <= self.max_wait_time):
            print 'waiting...'
            time.sleep(5.0)
            wait_time += 5

        print 'started', len(self.rc.getIDs()), 'engines'

        if wait_time > self.max_wait_time:
            raise Exception('TIMEOUT: not all started!!!!!')

        time.sleep(dt)
예제 #6
0
def SubmitDelayScanIPython1(**args):
	delayList = args["delayList"]
	outputfile = args["outputfile"]
	molecule = args["molecule"]

	controllerHost = DefaultControllerHost
	controllerPort = DefaultControllerPort
	if "controllerHost" in args:
		controllerHost = args["controllerHost"]
	if "controllerPort" in args:
		controllerPort = args["controllerPort"]

	#Create connection to stallo
	print "Connecting to ipython1 controller..."
	rc = kernel.RemoteController((controllerHost, controllerPort))
	partitionCount = len(rc.getIDs())

	if partitionCount == 0:
		raise Exception("No engines connected to controller @ stallo.")

	#Make sure pyprop is loaded
	print "Loading pyprop..."
	rc.executeAll('import os')
	rc.executeAll('os.environ["PYPROP_SINGLEPROC"] = "1"')
	rc.executeAll('execfile("example.py")')

	#scatter delay list
	print "Distributing jobs..."
	rc.scatterAll("delayList", delayList)
	rc.scatterAll("partitionId", r_[:partitionCount])
	rc.pushAll(args=args)

	#run
	print "Running jobs..."
	rc.executeAll('args["delayList"] = delayList')
	rc.executeAll('args["outputfile"] = args["outputfile"] % partitionId[0]')
	rc.executeAll('RunDelayScan(**args)')

	#gather all files into one
	print "Gathering all outputfiles into one..."
	filenames = [outputfile % i for i in range(partitionCount)]
	combinedFile = outputfile.replace("%i", "all")
	rc.push(1, filenames=filenames, combinedFile=combinedFile)
	rc.execute(1, 'ConcatenateHDF5(filenames, combinedFile)')
	#remove all single proc files
	rc.executeAll('os.unlink(args["outputfile"])')

	print "Done."
예제 #7
0
    def __init__(self, site):
        self.rc = kernel.TaskController(('127.0.0.1', 10113))
        self.ipc = kernel.RemoteController(('127.0.0.1', 10105))
        assert isinstance(
            self.rc, ipython1.kernel.taskxmlrpc.XMLRPCInteractiveTaskClient)
        assert isinstance(
            self.ipc, ipython1.kernel.multienginexmlrpc.
            XMLRPCInteractiveMultiEngineClient)
        self.ipc.execute('all', fetchParse)

        self.allLinks = []

        self.linksWorking = {}
        self.linksDone = {}

        self.site = site
예제 #8
0
def test():
    ipc = kernel.RemoteController(('127.0.0.1', 10105))
    print ipc.getIDs()
    print ipc.addr

    ipc.executeAll('import time')
    foobar = ipc.executeAll('print time.ctime(time.time())')
    print foobar
    ipc.executeAll('from papers import lu06yng as lu')
    print ipc.executeAll('print lu.root')
    ipc.executeAll('import healpix')

    ipc.executeAll('from numarrray import *')
    nodeIDs = ipc.getIDs()

    ipc.executeAll('pixIdx = arange(12288, type=Int)')
    ipc.executeAll('(ipix, opix) = healpix.pix2ang_ring(32, pixIdx)')
    ipix = ipc.gatherAll('ipix')

    print ipix.shape
예제 #9
0
 def remote_controller(self):
     return kernel.RemoteController((self.instances[0].public_dns_name,
                                     kernel.defaultRemoteController[1]))
예제 #10
0
"""
A Distributed Hello world
Ken Kinder <*****@*****.**>
"""
import ipython1.kernel.api as kernel
import ipython1.kernel.multienginexmlrpc
import ipython1.kernel.taskxmlrpc

rc = kernel.TaskController(('127.0.0.1', 10113))
ipc = kernel.RemoteController(('127.0.0.1', 10105))
assert isinstance(rc, ipython1.kernel.taskxmlrpc.XMLRPCInteractiveTaskClient)
assert isinstance(
    ipc, ipython1.kernel.multienginexmlrpc.XMLRPCInteractiveMultiEngineClient)

ipc.execute('all', 'import time')
helloTaskId = rc.run(
    kernel.Task('time.sleep(3) ; word = "Hello,"', resultNames=['word']))
worldTaskId = rc.run(
    kernel.Task('time.sleep(3) ; word = "World!"', resultNames=['word']))

print rc.getTaskResult(helloTaskId)[1]['word'], rc.getTaskResult(
    worldTaskId)[1]['word']