def initialize_ipengines(self): """ Initialize ipengines, load environ vars, etc. """ from IPython.kernel import client # 20091202 added self.mec = client.MultiEngineClient() #THE FOLLOWING LINE IS DANGEROUS WHEN OTHER TYPES OF TASKS MAY BE OCCURING: self.mec.reset( targets=self.mec.get_ids()) # Reset the namespaces of all engines self.tc = client.TaskClient() self.tc.clear( ) # This supposedly clears the list of finished task objects in the task-client self.mec.flush() # This doesnt seem to do much in our system. #import pdb; pdb.set_trace() # DEBUG #import os,sys #import classification_interface #import plugin_classifier #import ptf_master #import MySQLdb #import get_classifications_for_ptf_srcid_and_class_schema #Get_Classifications_For_Ptf_Srcid = get_classifications_for_ptf_srcid_and_class_schema.GetClassificationsForPtfSrcid(schema_str=self.schema_str) exec_str = """import os,sys import classification_interface import plugin_classifier import ptf_master import MySQLdb import get_classifications_for_ptf_srcid_and_class_schema Get_Classifications_For_Ptf_Srcid = get_classifications_for_ptf_srcid_and_class_schema.GetClassificationsForPtfSrcid(schema_str="%s") """ % (self.schema_str) self.mec.execute(exec_str)
def __init__(self, dict_iterable={}, func=None, task_furl=None, multiengine_furl=None, engine_furl=None): """ Sets the function to be called and the list of parameter dictinaries, connects to the IPython controller, distributes the tasks to the engines and collects the results. Requires that ipcontroller and ipengine(s) are set up. If no FURLs are given, the default location from the ipython setup is used. Parameters: dict_iterable - list of parameter dictionaries func - function to call with parameter dictionaries task_furl - FURL for task clients to connect to. multiengine_furl - FURL for mltiengine clients to connect to engine_furl - FURL for ipengines to connect to """ ParameterSearcher.__init__(self, dict_iterable, func) self.task_furl = task_furl self.multiengine_furl = multiengine_furl self.engine_furl = engine_furl from IPython.kernel import client self.mec = client.MultiEngineClient(furl_or_file=multiengine_furl) self.tc = client.TaskClient(furl_or_file=task_furl) # know which tasks we'll have to retrieve self.taskids = [] # we keep track of failed tasks self.failed_tasks = []
def pmap(func, seq): """ map(func, seq) """ tc = client.TaskClient() if func and seq: return ( tc.map(func, seq), ) else: return ( [], )
def __init__(self, site): self.tc = client.TaskClient() self.rc = client.MultiEngineClient() self.rc.execute(fetchParse) self.allLinks = [] self.linksWorking = {} self.linksDone = {} self.site = site
def __init__(self, pars={}): self.pars = pars # TODO: - initialize ipython modules self.mec = client.MultiEngineClient() #self.mec.reset(targets=self.mec.get_ids()) # Reset the namespaces of all engines self.tc = client.TaskClient() self.task_id_list = [] #### 2011-01-21 added: self.mec.reset(targets=self.mec.get_ids()) self.mec.clear_queue() self.mec.clear_pending_results() self.tc.task_controller.clear()
def _get_cluster(self): """ Return task and multiengine clients connected to the running pipeline's IPython cluster. """ self.logger.info("Connecting to IPython cluster") try: tc = IPclient.TaskClient(self.config.get('cluster', 'task_furl')) mec = IPclient.MultiEngineClient(self.config.get('cluster', 'multiengine_furl')) except NoSectionError: self.logger.error("Cluster not definied in configuration") raise ClusterError except: self.logger.error("Unable to initialise cluster") raise ClusterError return tc, mec
def parallel_populate_mysql_with_initial_tutor_sources(self, fpath_list, \ test_aitc=None): """ This takes fpaths to TUTOR Vosource.xmls and adds each source's srcid, science_class to class table in parallel using Ipython1. """ if test_aitc != None: # for linear TESTING without Ipython1 / parallelization: for fpath in fpath_list: test_aitc.insert_vosource_info_into_table(fpath) return from IPython.kernel import client tc = client.TaskClient() for fpath in fpath_list: exec_str = "aitc.insert_vosource_info_into_table('%s')" % (fpath) taskid = tc.run(client.StringTask(exec_str)) self.running_ingest_tasks.append(taskid)
def main(self): """ Main function for Testing. """ # This tests the Multi-engine interface: mec = client.MultiEngineClient() exec_str = """import os os.environ['TCP_SEX_BIN']=os.path.expandvars('$HOME/bin/sex') os.environ['TCP_WCSTOOLS_DIR']=os.path.expandvars('$HOME/src/install/wcstools-3.6.4/bin/') os.environ['TCP_DIR']=os.path.expandvars('$HOME/src/TCP/') os.environ['TCP_DATA_DIR']=os.path.expandvars('$HOME/scratch/TCP_scratch/') os.environ['CLASSPATH']=os.path.expandvars('$HOME/src/install/weka-3-5-7/weka.jar') """ #if os.path.exists(os.path.expandvars("$HOME/.ipython/custom_configs")): execfile(os.path.expandvars("$HOME/.ipython/custom_configs")) mec.execute(exec_str) # This tests the task client interface: tc = client.TaskClient() task_list = [] n_iters_total = 8 n_iters_per_clear = 10 for i in xrange(n_iters_total): task_str = """cat = os.getpid()""" # os.getpid() # os.environ taskid = tc.run(client.StringTask(task_str, pull="cat")) task_list.append(taskid) ### NOTE: This can be used to thin down the ipcontroller memory storage of ### finished tasks, but afterwards you cannot retrieve values (below): #if (i % n_iters_per_clear == 0): # tc.clear() print '!!! NUMBER OF TASKS STILL SCHEDULED: ', tc.queue_status( )['scheduled'] for i, taskid in enumerate(task_list): ### NOTE: The following retrieval doesnt work if ### tc.clear() was called earlier: task_result = tc.get_task_result(taskid, block=True) print task_result['cat'] print 'done' print tc.queue_status()
def time_twisted(nmessages, t=0, f=wait): from IPython.kernel import client as kc client = kc.TaskClient() if f is wait: s = "import time; time.sleep(%f)"%t task = kc.StringTask(s) elif f is echo: t = np.random.random(t/8) s = "s=t" task = kc.StringTask(s, push=dict(t=t), pull=['s']) else: raise # do one ping before starting timing client.barrier(client.run(task)) tic = time.time() tids = [] for i in xrange(nmessages): tids.append(client.run(task)) lap = time.time() client.barrier(tids) toc = time.time() return lap-tic, toc-tic
#!/usr/bin/env python # encoding: utf-8 """Run a Monte-Carlo options pricer in parallel.""" from IPython.kernel import client import numpy as N from mcpricer import MCOptionPricer tc = client.TaskClient() rc = client.MultiEngineClient() # Initialize the common code on the engines rc.run('mcpricer.py') # Push the variables that won't change #(stock print, interest rate, days and MC paths) rc.push(dict(S=100.0, r=0.05, days=260, paths=10000)) task_string = """\ op = MCOptionPricer(S,K,sigma,r,days,paths) op.run() vp, ap, vc, ac = op.vanilla_put, op.asian_put, op.vanilla_call, op.asian_call """ # Create arrays of strike prices and volatilities K_vals = N.linspace(90.0, 100.0, 5) sigma_vals = N.linspace(0.0, 0.2, 5) # Submit tasks taskids = [] for K in K_vals:
def populate_mysql_with_iterative_classes_for_sources(self, aitc, \ vsrc_xml_fpath_list=[], \ do_nonparallel=False): """ Here we actually iteratively add the individual epochs for a vosource, classify, and enter into the analysis Mysql table. """ # TODO: Here we retrieve all relevant vosource.xml fpaths from # mysql table if len(vsrc_xml_fpath_list) > 0: vosource_fpath_list = vsrc_xml_fpath_list else: select_str = "SELECT fpath FROM %s" % (self.pars['table_name']) aitc.cursor.execute(select_str) results = aitc.cursor.fetchall() vosource_fpath_list = [] for result in results: vosource_fpath_list.append(result[0]) if do_nonparallel: import ptf_master #special_vosource_fpath_list = [] #for elem in special_vosource_fpath_list: # try: # vosource_fpath_list.pop(elem) # except: # pass #special_vosource_fpath_list.extend(vosource_fpath_list) #for i,fpath in enumerate(special_vosource_fpath_list): for i,fpath in enumerate(vosource_fpath_list): ptf_master.test_nonthread_nonipython1(use_postgre_ptf=False, case_simulate_ptf_stream_using_vosource=True, vosource_xml_fpath=fpath, case_poll_for_recent_postgre_table_entries=False, insert_row_into_iterative_class_probs=True) print "Done: VOSource %d of %d" % (i, len(vosource_fpath_list)) return (None, None) ##### For debugging using cProfile, kcachegrind, etc: #p = cProfile.Profile() #p.run(""" #import ptf_master #for i,fpath in enumerate(%s): # ptf_master.test_nonthread_nonipython1(use_postgre_ptf=False, # case_simulate_ptf_stream_using_vosource=True, # vosource_xml_fpath=fpath, # case_poll_for_recent_postgre_table_entries=False, # insert_row_into_iterative_class_probs=True)""" % (str(vosource_fpath_list[:14]))) #k = lsprofcalltree.KCacheGrind(p) #data = open('/tmp/prof_14.kgrind', 'w+') #k.output(data) #data.close() #sys.exit() from ipython1.kernel import client tc = client.TaskClient((self.pars['ipython_host_ip'], \ self.pars['ipython_taskclient_port'])) for fpath in vosource_fpath_list: exec_str = \ """ptf_master.test_nonthread_nonipython1(use_postgre_ptf=False, case_simulate_ptf_stream_using_vosource=True, vosource_xml_fpath='%s', case_poll_for_recent_postgre_table_entries=False, insert_row_into_iterative_class_probs=True) """ % (fpath) taskid = tc.run(client.StringTask(exec_str)) self.running_ingest_tasks.append(taskid) #print 'yo' # print tc.get_task_result(self.running_ingest_tasks[243], block=False) return (tc, vosource_fpath_list) # for polling which task threads are still queued, which are finished.
def main(): parser = OptionParser() parser.set_defaults(n=100) parser.set_defaults(tmin=1) parser.set_defaults(tmax=60) parser.set_defaults(controller='localhost') parser.set_defaults(meport=10105) parser.set_defaults(tport=10113) parser.add_option("-n", type='int', dest='n', help='the number of tasks to run') parser.add_option("-t", type='float', dest='tmin', help='the minimum task length in seconds') parser.add_option("-T", type='float', dest='tmax', help='the maximum task length in seconds') parser.add_option("-c", type='string', dest='controller', help='the address of the controller') parser.add_option( "-p", type='int', dest='meport', help= "the port on which the controller listens for the MultiEngine/RemoteController client" ) parser.add_option( "-P", type='int', dest='tport', help= "the port on which the controller listens for the TaskClient client") (opts, args) = parser.parse_args() assert opts.tmax >= opts.tmin, "tmax must not be smaller than tmin" rc = client.MultiEngineClient() tc = client.TaskClient() print tc.task_controller rc.block = True nengines = len(rc.get_ids()) rc.execute('from IPython.utils.timing import time') # the jobs should take a random time within a range times = [ random.random() * (opts.tmax - opts.tmin) + opts.tmin for i in range(opts.n) ] tasks = [client.StringTask("time.sleep(%f)" % t) for t in times] stime = sum(times) print "executing %i tasks, totalling %.1f secs on %i engines" % ( opts.n, stime, nengines) time.sleep(1) start = time.time() taskids = [tc.run(t) for t in tasks] tc.barrier(taskids) stop = time.time() ptime = stop - start scale = stime / ptime print "executed %.1f secs in %.1f secs" % (stime, ptime) print "%.3fx parallel performance on %i engines" % (scale, nengines) print "%.1f%% of theoretical max" % (100 * scale / nengines)
max_sigma = ask_question("Max volatility", float, 0.4) strike_vals = np.linspace(min_strike, max_strike, n_strikes) sigma_vals = np.linspace(min_sigma, max_sigma, n_sigmas) #----------------------------------------------------------------------------- # Setup for parallel calculation #----------------------------------------------------------------------------- # The MultiEngineClient is used to setup the calculation and works with all # engine. mec = client.MultiEngineClient(profile=cluster_profile) # The TaskClient is an interface to the engines that provides dynamic load # balancing at the expense of not knowing which engine will execute the code. tc = client.TaskClient(profile=cluster_profile) # Initialize the common code on the engines. This Python module has the # price_options function that prices the options. mec.run('mcpricer.py') #----------------------------------------------------------------------------- # Perform parallel calculation #----------------------------------------------------------------------------- print "Running parallel calculation over strike prices and volatilities..." print "Strike prices: ", strike_vals print "Volatilities: ", sigma_vals sys.stdout.flush() # Submit tasks to the TaskClient for each (strike, sigma) pair as a MapTask.