def initialize_ipengines(self): """ Initialize ipengines, load environ vars, etc. """ from IPython.kernel import client # 20091202 added self.mec = client.MultiEngineClient() #THE FOLLOWING LINE IS DANGEROUS WHEN OTHER TYPES OF TASKS MAY BE OCCURING: self.mec.reset( targets=self.mec.get_ids()) # Reset the namespaces of all engines self.tc = client.TaskClient() self.tc.clear( ) # This supposedly clears the list of finished task objects in the task-client self.mec.flush() # This doesnt seem to do much in our system. #import pdb; pdb.set_trace() # DEBUG #import os,sys #import classification_interface #import plugin_classifier #import ptf_master #import MySQLdb #import get_classifications_for_ptf_srcid_and_class_schema #Get_Classifications_For_Ptf_Srcid = get_classifications_for_ptf_srcid_and_class_schema.GetClassificationsForPtfSrcid(schema_str=self.schema_str) exec_str = """import os,sys import classification_interface import plugin_classifier import ptf_master import MySQLdb import get_classifications_for_ptf_srcid_and_class_schema Get_Classifications_For_Ptf_Srcid = get_classifications_for_ptf_srcid_and_class_schema.GetClassificationsForPtfSrcid(schema_str="%s") """ % (self.schema_str) self.mec.execute(exec_str)
def engines_get_from_bucket(bucket, mec=None, filestring='*', dir_root='/mnt'): if not mec: mec = client.MultiEngineClient() engines_import('boto', mec) raise NotImplementedError, 'unfinished!'
def __init__(self, dict_iterable={}, func=None, task_furl=None, multiengine_furl=None, engine_furl=None): """ Sets the function to be called and the list of parameter dictinaries, connects to the IPython controller, distributes the tasks to the engines and collects the results. Requires that ipcontroller and ipengine(s) are set up. If no FURLs are given, the default location from the ipython setup is used. Parameters: dict_iterable - list of parameter dictionaries func - function to call with parameter dictionaries task_furl - FURL for task clients to connect to. multiengine_furl - FURL for mltiengine clients to connect to engine_furl - FURL for ipengines to connect to """ ParameterSearcher.__init__(self, dict_iterable, func) self.task_furl = task_furl self.multiengine_furl = multiengine_furl self.engine_furl = engine_furl from IPython.kernel import client self.mec = client.MultiEngineClient(furl_or_file=multiengine_furl) self.tc = client.TaskClient(furl_or_file=task_furl) # know which tasks we'll have to retrieve self.taskids = [] # we keep track of failed tasks self.failed_tasks = []
def pinit(message="Hello CASA Cluster"): casalog.post("Setting up the connection to the remote nodes...",origin="PDeconv::pinit"); mec = client.MultiEngineClient(); ids=mec.get_ids(); print "Connected to IDs ",ids; # tt='print '+"'"+message+"'"; mec.activate(); # mec.execute(tt); return mec;
def __init__(self, site): self.tc = client.TaskClient() self.rc = client.MultiEngineClient() self.rc.execute(fetchParse) self.allLinks = [] self.linksWorking = {} self.linksDone = {} self.site = site
def fill_instance_engines(mec=None, eng_per_proc=2, **kwargs): if not mec: mec = client.MultiEngineClient() engines_import('os', mec) ids_by_ip = engine_ids_by_ip(mec) for ip, ids in ids_by_ip.items(): mec.execute("p = int(os.environ['NUMPROCS'])") [procs] = mec.pull('p', ids[0]) for i in range((procs * eng_per_proc) - len(ids)): launch_engine(ip, mec=mec, **kwargs)
def make_controller(controller_command='ipcontroller', furl_dir=None, max_wait=10.): """ Start an ipcontroller. Parameters: controller_command - path to the command to invoke the controller with. Default requires the controller to be in the path. furl_dir - the directory to create furls in. Default is to create them in the system's default temp directory as returned by tempfile.gettempdir(). max_wait - maximum number of seconds to wait for the controller to become accessible. It is polled three times a second during that time. Returns:Dictionary with keys: contr_obj - the controller's Popen-object task_furl - path to the FURL for task clients multiengine_furl - path to the FURL for multiengine clients engine_furl - path to the FURL for engines """ import subprocess, tempfile if furl_dir is None: furl_dir = tempfile.gettempdir() (fd, engine_furl) = tempfile.mkstemp(dir=furl_dir, prefix='furl_engine_') (fd, multiengine_furl) = tempfile.mkstemp(dir=furl_dir, prefix='furl_multiengine_') (fd, task_furl) = tempfile.mkstemp(dir=furl_dir, prefix='furl_task_') contr = subprocess.Popen(args=[ controller_command, '--engine-furl-file=%s' % engine_furl, '--multiengine-furl-file=%s' % multiengine_furl, '--task-furl-file=%s' % task_furl ]) # wait until controller is accessible import time t = time.time() from IPython.kernel import client while True: try: mec = client.MultiEngineClient(furl_or_file=multiengine_furl) time.sleep(0.5) break except Exception, e: if (time.time() - t) < max_wait: print "can't connect to controller yet. Retrying..." time.sleep(0.33) else: print "No connection after %f seconds. Giving up..." raise e
def __init__(self, pars={}): self.pars = pars # TODO: - initialize ipython modules self.mec = client.MultiEngineClient() #self.mec.reset(targets=self.mec.get_ids()) # Reset the namespaces of all engines self.tc = client.TaskClient() self.task_id_list = [] #### 2011-01-21 added: self.mec.reset(targets=self.mec.get_ids()) self.mec.clear_queue() self.mec.clear_pending_results() self.tc.task_controller.clear()
def launch_bootstrap_engine(target_ip, mec=None, **kwargs): '''launches an engine on target_ip ONLY IF no engine is currently attached from there ''' if not mec: mec = client.MultiEngineClient() try: ids_on_ip = engine_ids_by_ip(mec)[target_ip] except KeyError: pass else: mec.kill(targets=ids_on_ip) return launch_engine(target_ip, mec=mec, **kwargs)
def engines_put_to_bucket_by_s3fs(filestring, mount_root='/mnt/s3fs', mec=None, s3_conn=None, bucket=None, gz=True): '''transfers files in filestring (can be glob) to bucket if bucket is none, attempts to transfer to a bucket named the last dir in the path of filestring creates bucket if it doesn't exist ''' import boto if not mec: mec = client.MultiEngineClient() if not bucket: bucket = filestring.split('/')[-2] print 'bucket:', bucket if not s3_conn: aws_keys = get_keys_from_file() s3_conn = boto.connect_s3(**aws_keys) if not bucket in [b.name for b in s3_conn.get_all_buckets()]: print 'create bucket:', bucket s3_conn.create_bucket(bucket) engines_import('os', mec) if gz: zip = "os.system('gzip %s')" % filestring print zip filestring = filestring + '.gz' mec.execute(zip) mount = os.path.join(mount_root, bucket) makeMdirs = "os.makedirs('%s')" % mount print makeMdirs mountbucket = "os.system('s3fs %s %s')" % (bucket, mount) print mountbucket copy = "os.system('cp %s %s')" % (filestring, mount) print copy mec.execute(makeMdirs) mec.execute(mountbucket) mec.execute(copy)
def launch_engine(target_ip, username=os.environ['USER'], mec=None, key_file='~/.ssh/gsg-keypair'): '''launches an engine on target_ip ''' if not mec: mec = client.MultiEngineClient() # ssh = r'ssh %s -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no %s%s ipengine 2\> /dev/null \> /dev/null \& 2> /dev/null' % \ ssh = r'ssh %s -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no %s%s ipengine \& &' % \ ((key_file and '-i '+key_file or ''), (username and username+'@' or ''), target_ip) return os.system(ssh)
def __init__(self, input, db='ram', chains=2): try: mec = client.MultiEngineClient() except: p = subprocess.Popen('ipcluster -n %d' % proc, shell=True) p.wait() mec = client.MultiEngineClient() # Check everything is alright. nproc = len(mec.get_ids()) assert chains <= nproc Sampler.__init__(self, input, db=db) # Import the individual models in each process #mec.pushModule(input) proc = range(chains) try: mec.execute('import %s as input' % input.__name__, proc) except: mec.execute('import site', proc) mec.execute('site.addsitedir( ' + ` os.getcwd() ` + ' )', proc) mec.execute('import %s as input; reload(input)' % input.__name__, proc) # Instantiate Sampler instances in each process mec.execute('from pymc import MCMC', proc) #mec.execute('from pymc.database.parallel import Database') #for i in range(nproc): # mec.execute(i, 'db = Database(%d)'%i) mec.execute("S = MCMC(input, db='txt')", proc) self.mec = mec self.proc = proc
def engine_ids_by_ip(mec=None, interface='eth0'): if not mec: mec = client.MultiEngineClient() if not mec.get_ids(): return {} engines_import('net', mec) mec.execute("ip = net.get_ip_address('%s')" % interface) ids_by_ip = {} for id in mec.get_ids(): [ip] = mec.pull('ip', [id]) try: ids_by_ip[ip].append(id) except KeyError: ids_by_ip[ip] = [id] return ids_by_ip
def _get_cluster(self): """ Return task and multiengine clients connected to the running pipeline's IPython cluster. """ self.logger.info("Connecting to IPython cluster") try: tc = IPclient.TaskClient(self.config.get('cluster', 'task_furl')) mec = IPclient.MultiEngineClient(self.config.get('cluster', 'multiengine_furl')) except NoSectionError: self.logger.error("Cluster not definied in configuration") raise ClusterError except: self.logger.error("Unable to initialise cluster") raise ClusterError return tc, mec
def initialize_clients(self): """ Instantiate ipython1 clients, import all module dependencies. """ from IPython.kernel import client self.mec = client.MultiEngineClient() exec_str = """ import sys import os sys.path.append(os.path.abspath(os.environ.get("TCP_DIR") + 'Software/ingest_tools')) import ptf_master import analyze_iterative_tutor_classification pars = analyze_iterative_tutor_classification.pars aitc = analyze_iterative_tutor_classification.Analyze_Iterative_Tutor_Classification(pars) aitc.connect_to_db() """ print self.mec.execute(exec_str)# Do we get an echo during execution?
def engines_get_from_bucket_by_s3fs(bucket, mec=None, filestring='*', mount_root='/mnt/s3fs', target_root='/mnt', gz=False): '''uses s3fs to mount bucket at mount_root/<bucket>, creates directory target_root/<bucket>, does cp -R mount_root/<bucket>/filestring target_root/<bucket> if gz does gunzip target_root/<bucket>/*.gz NB: should be run with bootstrap engines only (i.e. 1 per host) ''' if not mec: mec = client.MultiEngineClient() engines_import('os', mec) target = os.path.join(target_root, bucket) mount = os.path.join(mount_root, bucket) makeTdirs = "os.makedirs('%s')" % target print makeTdirs makeMdirs = "os.makedirs('%s')" % mount print makeMdirs mountbucket = "os.system('s3fs %s %s')" % (bucket, mount) print mountbucket cpsource = os.path.join(mount, filestring) copy = "os.system('cp -R %s %s')" % (cpsource, target) print copy mec.execute(makeTdirs) mec.execute(makeMdirs) mec.execute(mountbucket) mec.execute(copy) if gz: zipfiles = os.path.join(target, '*.gz') unzip = "os.system('gunzip %s')" % zipfiles print unzip mec.execute(unzip)
def write_runclusterconfig(host_list, controller_host=None, mec=None, eng_per_proc=1, **kwargs): '''takes a list of hostnames and number of engines per processor, makes an engine description dict and calls write_clusterconfig ''' if not mec: mec = client.MultiEngineClient() engines_import('os', mec) mec.execute("n = int(os.environ['NUMPROCS'])") engines_dict = dict( zip(host_list, [i * eng_per_proc for i in mec.gather("n")])) write_clusterconfig(engines_dict, controller_host, **kwargs)
def test_ipy_island(self): from PyGMO import ipy_island, algorithm, problem try: from IPython.kernel import client mec = client.MultiEngineClient() if len(mec) == 0: raise RuntimeError() except ImportError as ie: return except BaseException as e: print('\nThere is a problem with parallel IPython setup. The error message is:') print(e) print('Tests for ipy_island will not be run.') return isl_type = ipy_island algo_list = [algorithm.py_example(1), algorithm.de(5)] prob_list = [problem.py_example(), problem.dejong(1)] for algo in algo_list: for prob in prob_list: self.__test_impl(isl_type,algo,prob)
def main(self): """ Main function for Testing. """ # This tests the Multi-engine interface: mec = client.MultiEngineClient() exec_str = """import os os.environ['TCP_SEX_BIN']=os.path.expandvars('$HOME/bin/sex') os.environ['TCP_WCSTOOLS_DIR']=os.path.expandvars('$HOME/src/install/wcstools-3.6.4/bin/') os.environ['TCP_DIR']=os.path.expandvars('$HOME/src/TCP/') os.environ['TCP_DATA_DIR']=os.path.expandvars('$HOME/scratch/TCP_scratch/') os.environ['CLASSPATH']=os.path.expandvars('$HOME/src/install/weka-3-5-7/weka.jar') """ #if os.path.exists(os.path.expandvars("$HOME/.ipython/custom_configs")): execfile(os.path.expandvars("$HOME/.ipython/custom_configs")) mec.execute(exec_str) # This tests the task client interface: tc = client.TaskClient() task_list = [] n_iters_total = 8 n_iters_per_clear = 10 for i in xrange(n_iters_total): task_str = """cat = os.getpid()""" # os.getpid() # os.environ taskid = tc.run(client.StringTask(task_str, pull="cat")) task_list.append(taskid) ### NOTE: This can be used to thin down the ipcontroller memory storage of ### finished tasks, but afterwards you cannot retrieve values (below): #if (i % n_iters_per_clear == 0): # tc.clear() print '!!! NUMBER OF TASKS STILL SCHEDULED: ', tc.queue_status( )['scheduled'] for i, taskid in enumerate(task_list): ### NOTE: The following retrieval doesnt work if ### tc.clear() was called earlier: task_result = tc.get_task_result(taskid, block=True) print task_result['cat'] print 'done' print tc.queue_status()
def scatter_and_run(queue, sleeptime=60, mec=None, verbose=False): '''not tested, probably not finished :) ''' from time import sleep if not mec: mec = client.MultiEngineClient() mec.scatter('q', queue) mec.execute('res = AWS.run_queue(q)', block=False) while any([i[1]['pending'] != 'None' for i in mec.queue_status()]): done = 0 for i in mec.get_ids(): if verbose: print >> sys.stderr, 'Engine %s:' % i, [status] = mec.pull(['on', 'tot'], [i]) if verbose: print >> sys.stderr, '[%s/%s]' % tuple(status) done += status[0] - 1 if verbose: print >> sys.stderr, 'total progress: %s of %s' % (done, len(queue)) sleep(sleeptime) return mec.gather('res')
The dataset we have been using for this is the 200 million digit one here: ftp://pi.super-computing.org/.2/pi200m/ """ from IPython.kernel import client from matplotlib import pyplot as plt import numpy as np from pidigits import * from timeit import default_timer as clock # Files with digits of pi (10m digits each) filestring = 'pi200m-ascii-%(i)02dof20.txt' files = [filestring % {'i': i} for i in range(1, 16)] # Connect to the IPython cluster mec = client.MultiEngineClient(profile='mycluster') mec.run('pidigits.py') # Run 10m digits on 1 engine mapper = mec.mapper(targets=0) t1 = clock() freqs10m = mapper.map(compute_two_digit_freqs, files[:1])[0] t2 = clock() digits_per_second1 = 10.0e6 / (t2 - t1) print "Digits per second (1 core, 10m digits): ", digits_per_second1 # Run 150m digits on 15 engines (8 cores) t1 = clock() freqs_all = mec.map(compute_two_digit_freqs, files[:len(mec)]) freqs150m = reduce_freqs(freqs_all) t2 = clock()
def main(): parser = OptionParser() parser.set_defaults(n=100) parser.set_defaults(tmin=1) parser.set_defaults(tmax=60) parser.set_defaults(controller='localhost') parser.set_defaults(meport=10105) parser.set_defaults(tport=10113) parser.add_option("-n", type='int', dest='n', help='the number of tasks to run') parser.add_option("-t", type='float', dest='tmin', help='the minimum task length in seconds') parser.add_option("-T", type='float', dest='tmax', help='the maximum task length in seconds') parser.add_option("-c", type='string', dest='controller', help='the address of the controller') parser.add_option( "-p", type='int', dest='meport', help= "the port on which the controller listens for the MultiEngine/RemoteController client" ) parser.add_option( "-P", type='int', dest='tport', help= "the port on which the controller listens for the TaskClient client") (opts, args) = parser.parse_args() assert opts.tmax >= opts.tmin, "tmax must not be smaller than tmin" rc = client.MultiEngineClient() tc = client.TaskClient() print tc.task_controller rc.block = True nengines = len(rc.get_ids()) rc.execute('from IPython.utils.timing import time') # the jobs should take a random time within a range times = [ random.random() * (opts.tmax - opts.tmin) + opts.tmin for i in range(opts.n) ] tasks = [client.StringTask("time.sleep(%f)" % t) for t in times] stime = sum(times) print "executing %i tasks, totalling %.1f secs on %i engines" % ( opts.n, stime, nengines) time.sleep(1) start = time.time() taskids = [tc.run(t) for t in tasks] tc.barrier(taskids) stop = time.time() ptime = stop - start scale = stime / ptime print "executed %.1f secs in %.1f secs" % (stime, ptime) print "%.3fx parallel performance on %i engines" % (scale, nengines) print "%.1f%% of theoretical max" % (100 * scale / nengines)
#from __future__ import with_statement # XXX This file is currently disabled to preserve 2.4 compatibility. #def test_simple(): if 0: # XXX - for now, we need a running cluster to be started separately. The # daemon work is almost finished, and will make much of this unnecessary. from IPython.kernel import client mec = client.MultiEngineClient(('127.0.0.1', 10105)) try: mec.get_ids() except ConnectionRefusedError: import os, time os.system('ipcluster -n 2 &') time.sleep(2) mec = client.MultiEngineClient(('127.0.0.1', 10105)) mec.block = False import itertools c = itertools.count() parallel = RemoteMultiEngine(mec) mec.pushAll() ## with parallel as pr: ## # A comment
def engines_import(module, mec=None): '''imports module (or more than one, separated by commas) on all engines ''' if not mec: mec = client.MultiEngineClient() return mec.execute('import %s' % module)
# Parallel Python in IPython ipcluster local -n 6 from IPython.kernel import client mec=client.MultiEngineClient() mec.get_ids() ipengine mec.kill(controller=True)
min_strike = ask_question("Min strike price", float, 90.0) max_strike = ask_question("Max strike price", float, 110.0) n_sigmas = ask_question("Number of volatility values", int, 5) min_sigma = ask_question("Min volatility", float, 0.1) max_sigma = ask_question("Max volatility", float, 0.4) strike_vals = np.linspace(min_strike, max_strike, n_strikes) sigma_vals = np.linspace(min_sigma, max_sigma, n_sigmas) #----------------------------------------------------------------------------- # Setup for parallel calculation #----------------------------------------------------------------------------- # The MultiEngineClient is used to setup the calculation and works with all # engine. mec = client.MultiEngineClient(profile=cluster_profile) # The TaskClient is an interface to the engines that provides dynamic load # balancing at the expense of not knowing which engine will execute the code. tc = client.TaskClient(profile=cluster_profile) # Initialize the common code on the engines. This Python module has the # price_options function that prices the options. mec.run('mcpricer.py') #----------------------------------------------------------------------------- # Perform parallel calculation #----------------------------------------------------------------------------- print "Running parallel calculation over strike prices and volatilities..." print "Strike prices: ", strike_vals