Ejemplo n.º 1
0
    def __init__(self, site):
        self.tc = kernel.TaskController(('127.0.0.1', 10113))
        self.rc = kernel.RemoteController(('127.0.0.1', 10105))
        self.rc.execute('all', fetchParse)

        self.allLinks = []
        self.linksWorking = {}
        self.linksDone = {}

        self.site = site
Ejemplo n.º 2
0
    def connect(self):
        try:
            self.rc = kernel.RemoteController((self.contHost, self.rc_port))
            self.tc = kernel.TaskController((self.contHost, self.task_port))

            # test if the cluster is really running
            self.rc.execute(0, 'pass')

            self.__running = True
            return 0
        except:
            print "Your cluster is NOT running."
            return 1
Ejemplo n.º 3
0
def main():
    parser = OptionParser()
    parser.set_defaults(n=100)
    parser.set_defaults(tmin=1)
    parser.set_defaults(tmax=60)
    parser.set_defaults(controller='localhost')
    parser.set_defaults(meport=10105)
    parser.set_defaults(tport=10113)
    
    parser.add_option("-n", type='int', dest='n',
        help='the number of tasks to run')
    parser.add_option("-t", type='float', dest='tmin', 
        help='the minimum task length in seconds')
    parser.add_option("-T", type='float', dest='tmax',
        help='the maximum task length in seconds')
    parser.add_option("-c", type='string', dest='controller',
        help='the address of the controller')
    parser.add_option("-p", type='int', dest='meport',
        help="the port on which the controller listens for the MultiEngine/RemoteController client")
    parser.add_option("-P", type='int', dest='tport',
        help="the port on which the controller listens for the TaskController client")
    
    (opts, args) = parser.parse_args()
    assert opts.tmax >= opts.tmin, "tmax must not be smaller than tmin"
    
    rc = kernel.RemoteController((opts.controller, opts.meport))
    tc = kernel.TaskController((opts.controller, opts.tport))
    
    rc.block=True
    nengines = len(rc.getIDs())
    rc.executeAll('from IPython.genutils import time')

    # the jobs should take a random time within a range
    times = [random.random()*(opts.tmax-opts.tmin)+opts.tmin for i in range(opts.n)]
    tasks = [kernel.Task("time.sleep(%f)"%t) for t in times]
    stime = sum(times)
    
    print "executing %i tasks, totalling %.1f secs on %i engines"%(opts.n, stime, nengines)

    time.sleep(1)
    start = time.time()
    taskIDs = [tc.run(t) for t in tasks]
    tc.barrier(taskIDs)
    stop = time.time()

    ptime = stop-start
    scale = stime/ptime
    
    print "executed %.1f secs in %.1f secs"%(stime, ptime)
    print "%.3fx parallel performance on %i engines"%(scale, nengines)
    print "%.1f%% of theoretical max"%(100*scale/nengines)
Ejemplo n.º 4
0
    def __init__(self, tcserver=None):
        """Create a IPython1Controller instance.

        tcserver is the server and port of the Ipython1 TaskController. 
        It should be of the form <ip>:<port>. (default is "127.0.0.1:10113").
        
        """

        if not ipy1kernel:
            print "IPython1 not found."
            return None

        self.tcserver = tcserver or config.get('ipython1.controller')
        self.tc = ipy1kernel.TaskController(tuple(self.tcserver.split(':')))
Ejemplo n.º 5
0
    def __init__(self, site):
        self.rc = kernel.TaskController(('127.0.0.1', 10113))
        self.ipc = kernel.RemoteController(('127.0.0.1', 10105))
        assert isinstance(
            self.rc, ipython1.kernel.taskxmlrpc.XMLRPCInteractiveTaskClient)
        assert isinstance(
            self.ipc, ipython1.kernel.multienginexmlrpc.
            XMLRPCInteractiveMultiEngineClient)
        self.ipc.execute('all', fetchParse)

        self.allLinks = []

        self.linksWorking = {}
        self.linksDone = {}

        self.site = site
Ejemplo n.º 6
0
    def start(self, startmpds=False, dt=None, waitafter=0.0, verbose=True):

        if self.isRunning():
            raise Exception("cluster already running")

        if dt is None: dt = self.dt

        self.__startController(verbose=verbose, dt=4.0)
        time.sleep(dt)

        if startmpds:
            self.__startMPDs(verbose=verbose)
            time.sleep(dt)

        try:
            self.__startEngines(verbose=verbose)
            time.sleep(dt)
        except Exception, inst:
            self.tc = kernel.TaskController((self.contHost, self.task_port))
            self.rc.activate()
            self.__running = True
            raise inst
Ejemplo n.º 7
0
 def task_controller(self):
     return kernel.TaskController((self.instances[0].public_dns_name,
                                   kernel.defaultTaskController[1]))
Ejemplo n.º 8
0
"""
A Distributed Hello world
Ken Kinder <*****@*****.**>
"""
import ipython1.kernel.api as kernel
import ipython1.kernel.multienginexmlrpc
import ipython1.kernel.taskxmlrpc

rc = kernel.TaskController(('127.0.0.1', 10113))
ipc = kernel.RemoteController(('127.0.0.1', 10105))
assert isinstance(rc, ipython1.kernel.taskxmlrpc.XMLRPCInteractiveTaskClient)
assert isinstance(
    ipc, ipython1.kernel.multienginexmlrpc.XMLRPCInteractiveMultiEngineClient)

ipc.execute('all', 'import time')
helloTaskId = rc.run(
    kernel.Task('time.sleep(3) ; word = "Hello,"', resultNames=['word']))
worldTaskId = rc.run(
    kernel.Task('time.sleep(3) ; word = "World!"', resultNames=['word']))

print rc.getTaskResult(helloTaskId)[1]['word'], rc.getTaskResult(
    worldTaskId)[1]['word']
Ejemplo n.º 9
0
class Cluster(object):
    def __init__(self, clusterConfig, dt=0.5, use_mpd=False):
        self.dt = dt
        self.use_mpd = use_mpd
        self.max_wait_time = 300

        # read configuration
        self.sshx = clusterConfig.getSSHX()
        self.contHost = clusterConfig.getControllerHost()
        self.engine_port = clusterConfig.getEnginePort()
        self.rc_port = clusterConfig.getRemoteControllerPort()
        self.task_port = clusterConfig.getTaskControllerPort()
        self.engines = clusterConfig.getEngines()
        self.ncluster = clusterConfig.getNcluster()

        # setup logfile
        ipdir = cutils.getIpythonDir()
        logdir_base = os.path.join(ipdir, 'log')
        if not os.path.isdir(logdir_base):
            os.makedirs(logdir_base)
        logfile = os.path.join(logdir_base, 'ipcluster')
        self.logfile = '%s-%s' % (logfile, os.getpid())
        self.__running = False

    def __startController(self, dt=None, verbose=True):
        if dt is None:
            dt = self.dt
        if verbose:
            print 'Starting controller:'
            print '  Starting controller on %s' % self.contHost
        contLog = '%s-con-%s-' % (self.logfile, self.contHost)

        if not self.ncluster:
            cmd = "ssh %s '%s' 'ipcontroller --engine-port=%s --remote-cont-port=%s --task-port=%s --logfile=%s' &" % \
                  (self.contHost,self.sshx,self.engine_port,self.rc_port,self.task_port,contLog)
            os.system(cmd)

        else:
            # on the ncluster
            import socket
            self.contHost = socket.gethostbyaddr(socket.gethostname())[2][0]
            self.contHost = os.popen('echo $HOSTNAME').read()[:-1]

            #self.task_port = 10113
            #self.rc_port = 10105

            cmd = "ipcontroller --engine-port=%s --remote-cont-port=%s --task-port=%s --logfile=%s &" % \
                  (self.engine_port, self.rc_port, self.task_port, contLog)

            print 'cmd:', cmd
            os.system(cmd)

        time.sleep(dt)

    def __startEngines(self, dt=None, verbose=True):
        if dt is None: dt = self.dt

        if not self.ncluster:
            if verbose:
                print 'Starting engines:   '
            self.nodecount = 0

            for engineHost, numEngines in self.engines.iteritems():
                if verbose:
                    print '  Starting %d engine(s) on %s' % (numEngines,
                                                             engineHost)
                engLog = '%s-eng-%s-' % (self.logfile, engineHost)

                self.nodecount += numEngines
                for i in range(numEngines):
                    cmd = "ssh -x %s '%s' 'ipengine --controller-ip=%s --controller-port=%s --logfile=%s' &" % \
                              (engineHost,self.sshx,self.contHost,self.engine_port,engLog)
                    os.system(cmd)
        else:
            #ncluster
            engLog = '%s-eng' % (self.logfile)

            self.nodecount = int(
                os.popen('cat $PBS_NODEFILE|wc -l').read()[:-1])
            print 'nodecount:', self.nodecount
            if verbose:
                print 'Starting', self.nodecount, ' engines...'
            #cmd = "mpiexec ipengine --controller-ip=%s --controller-port=%s --mpi=mpi4py &" % \
            #           (self.contHost, self.engine_port)
            cmd = "mpiexec -nostdout ipengine --controller-ip=%s --controller-port=%s --mpi=mpi4py --logfile=%s &" % \
                       (self.contHost, self.engine_port, engLog)
            print 'cmd:', cmd
            os.system(cmd)

        time.sleep(2.0)
        self.rc = kernel.RemoteController((self.contHost, self.rc_port))
        wait_time = 0
        while (len(self.rc.getIDs()) < self.nodecount
               and wait_time <= self.max_wait_time):
            print 'waiting...'
            time.sleep(5.0)
            wait_time += 5

        print 'started', len(self.rc.getIDs()), 'engines'

        if wait_time > self.max_wait_time:
            raise Exception('TIMEOUT: not all started!!!!!')

        time.sleep(dt)

    def __startMPDs(self, dt=None, verbose=True):
        if dt is None: dt = self.dt
        if verbose:
            print 'Starting mpds:   '
        for engineHost in self.engines.iterkeys():
            print '  Starting mpd on %s' % engineHost
            cmd = "ssh %s 'mpd' &" % (engineHost)
            os.system(cmd)
            time.sleep(dt)

    def start(self, startmpds=False, dt=None, waitafter=0.0, verbose=True):

        if self.isRunning():
            raise Exception("cluster already running")

        if dt is None: dt = self.dt

        self.__startController(verbose=verbose, dt=4.0)
        time.sleep(dt)

        if startmpds:
            self.__startMPDs(verbose=verbose)
            time.sleep(dt)

        try:
            self.__startEngines(verbose=verbose)
            time.sleep(dt)
        except Exception, inst:
            self.tc = kernel.TaskController((self.contHost, self.task_port))
            self.rc.activate()
            self.__running = True
            raise inst

        self.tc = kernel.TaskController((self.contHost, self.task_port))
        self.rc.activate()

        if waitafter > 0.0:
            time.sleep(waitafter)

        print "Your cluster is up and running."
        self.__running = True