def __init__(self, hosts=[], channel_type="mpi", preamble=None, retry_jobs=True, no_wait=True, verbose=True, max_retries=2, use_threading=False): self.hosts = [] self.job_list = deque() self.idle_codes = [] self.retry_jobs = retry_jobs self.max_retries = max_retries self._finished_jobs = deque() self.preamble = preamble self.pool = AsyncRequestsPool() self.number_available_codes = 0 self.number_starting_codes = 0 self.no_wait = no_wait self.last_finished_job = None self.use_threading = use_threading self.verbose = verbose if self.verbose: print "AMUSE JobServer launching" self.add_hosts(hosts=hosts, channel_type=channel_type)
def test22(self): pool = AsyncRequestsPool() x = ForTestingInterface() y = ForTestingInterface() request1 = x.sleep.async(0.5) request2 = y.sleep.async(1.5) finished_requests = [] def handle_result(request, index): self.assertTrue(request.is_result_available()) finished_requests.append(index) pool.add_request(request1, handle_result, [1]) pool.add_request(request2, handle_result, [2]) pool.wait() self.assertEquals(len(finished_requests), 1) self.assertEquals(len(pool), 1) pool.wait() self.assertEquals(len(finished_requests), 2) self.assertEquals(len(pool), 0) self.assertTrue(request1.is_result_available()) self.assertTrue(request2.is_result_available()) self.assertEquals(request1.result(), 0) self.assertEquals(request2.result(), 0) y.stop() x.stop()
def step_les_models(model_time, work_queue, offset=les_spinup): global errorFlag les_wall_times = [] if not any(les_models): return les_wall_times if les_queue_threads >= len( les_models): # Step all dales models in parallel if async_evolve: # evolve all dales models with asynchronous Amuse calls reqs = [] pool = AsyncRequestsPool() for les in les_models: req = les.evolve_model. async (model_time + (offset | units.s), exactEnd=True) reqs.append(req) pool.add_request(req) # now while the dales threads are working, sync the netcdf to disk spio.sync_root() # wait for all threads pool.waitall() try: les_wall_times = [r.result().value_in(units.s) for r in reqs] log.info("async step_les_models() done. Elapsed times:" + str(['%5.1f' % t for t in les_wall_times])) except Exception as e: log.error("Exception caught while gathering results: %s" % e.message) else: # evolve all dales models using python threads threads = [] for les in les_models: t = threading.Thread(target=step_les, args=(les, model_time, offset), name=str(les.grid_index)) # t.setDaemon(True) threads.append(t) t.start() # now while the dales threads are working, sync the netcdf to disk spio.sync_root() # wait for all threads for t in threads: # log.info("Waiting to join thread %s..." % t.name) t.join() # log.info("joined thread %s" % t.name) elif les_queue_threads > 1: for les in les_models: work_queue.put((les, model_time)) # enqueue all dales instances # now while the dales threads are working, sync the netcdf to disk spio.sync_root() work_queue.join() # wait for all dales work to be completed if errorFlag: log.info("One thread failed - exiting ...") # stop_worker_threads(work_queue) # signal worker threads to quit - now an atexit function, should not # need it here finalize() sys.exit(1) else: # sequential version for les in les_models: step_les(les, model_time, offset) return les_wall_times
def test29(self): pool = AsyncRequestsPool() x = ForTestingInterface() y = ForTestingInterface() sequenced_requests_indices = [] def next_request(index): if index < 4: sequenced_requests_indices.append(index) return x.sleep. async (0.5) else: return None request1 = ASyncRequestSequence(next_request) request2 = y.sleep. async (1.0) finished_requests = [] def handle_result(request, index): self.assertTrue(request.is_result_available()) self.assertTrue(request.is_finished) finished_requests.append(index) pool.add_request(request1, handle_result, [1]) pool.add_request(request2, handle_result, [2]) pool.wait() print finished_requests, sequenced_requests_indices self.assertEquals(len(finished_requests), 1) self.assertEquals(len(pool), 1) self.assertEquals(finished_requests, [2]) self.assertTrue(len(sequenced_requests_indices) > 0) pool.wait() self.assertEquals(len(finished_requests), 2) self.assertEquals(len(pool), 0) x.sleep(0.1) self.assertEquals(sequenced_requests_indices, [0, 1, 2, 3]) self.assertTrue(request1.is_result_available()) self.assertTrue(request2.is_result_available()) self.assertEquals(request1.result(), [0, 0, 0, 0]) self.assertEquals(request2.result(), 0) y.stop() x.stop()
def test29(self): pool = AsyncRequestsPool() x = ForTestingInterface() y = ForTestingInterface() sequenced_requests_indices = [] def next_request(index): if index < 4: sequenced_requests_indices.append(index) return x.sleep.async(0.5) else: return None request1 = ASyncRequestSequence(next_request) request2 = y.sleep.async(1.0) finished_requests = [] def handle_result(request, index): self.assertTrue(request.is_result_available()) self.assertTrue(request.is_finished) finished_requests.append(index) pool.add_request(request1, handle_result, [1]) pool.add_request(request2, handle_result, [2]) pool.wait() print finished_requests, sequenced_requests_indices self.assertEquals(len(finished_requests), 1) self.assertEquals(len(pool), 1) self.assertEquals(finished_requests, [2]) self.assertTrue(len(sequenced_requests_indices)> 0) pool.wait() self.assertEquals(len(finished_requests), 2) self.assertEquals(len(pool), 0) x.sleep(0.1) self.assertEquals(sequenced_requests_indices, [0,1,2,3]) self.assertTrue(request1.is_result_available()) self.assertTrue(request2.is_result_available()) self.assertEquals(request1.result(), [0,0,0,0]) self.assertEquals(request2.result(), 0) y.stop() x.stop()
def __init__(self,hosts=[],channel_type="mpi",preamble=None, retry_jobs=True, no_wait=True,verbose=True,max_retries=2, use_threading=False): self.hosts=[] self.job_list=deque() self.idle_codes=[] self.retry_jobs=retry_jobs self.max_retries=max_retries self._finished_jobs=deque() self.preamble=preamble self.pool=AsyncRequestsPool() self.number_available_codes=0 self.number_starting_codes=0 self.no_wait=no_wait self.last_finished_job=None self.use_threading=use_threading self.verbose=verbose if self.verbose: print "AMUSE JobServer launching" self.add_hosts(hosts=hosts,channel_type=channel_type)
class JobServer(object): def __init__(self, hosts=[], channel_type="mpi", preamble=None, retry_jobs=True, no_wait=True, verbose=True, max_retries=2, use_threading=False): self.hosts = [] self.job_list = deque() self.idle_codes = [] self.retry_jobs = retry_jobs self.max_retries = max_retries self._finished_jobs = deque() self.preamble = preamble self.pool = AsyncRequestsPool() self.number_available_codes = 0 self.number_starting_codes = 0 self.no_wait = no_wait self.last_finished_job = None self.use_threading = use_threading self.verbose = verbose if self.verbose: print "AMUSE JobServer launching" self.add_hosts(hosts=hosts, channel_type=channel_type) def no_hosts(self): if self.number_available_codes == 0 and self.number_starting_codes == 0: return True return False def add_hosts(self, hosts=[], channel_type="mpi"): self.hosts.append(hosts) if self.verbose: print "JobServer: connecting %i hosts" % len(hosts) if not self.use_threading: for host in hosts: self.number_starting_codes += 1 self._startup(channel_type=channel_type, hostname=host, label=host, copy_worker_code=True, redirection="none") else: threads = [] for host in hosts: kwargs = dict(channel_type=channel_type, hostname=host, label=host, copy_worker_code=True, redirection="none") threads.append( threading.Thread(target=self._startup, kwargs=kwargs)) for thread in threads: self.number_starting_codes += 1 thread.daemon = True thread.start() if not self.no_wait: if self.verbose: print "... waiting" for thread in threads: thread.join() else: if self.verbose: print "... waiting for first available host" while self.number_available_codes == 0 and self.number_starting_codes > 0: sleep(0.1) if self.no_wait: if self.verbose: print "JobServer: launched" else: if self.verbose: print "JobServer: launched with", len(self.idle_codes), "hosts" def _startup(self, *args, **kwargs): try: code = RemoteCodeInterface(*args, **kwargs) except Exception as ex: self.number_starting_codes -= 1 print "JobServer: startup failed on", kwargs[ 'hostname'] or "default" print ex else: if self.preamble is not None: code.execute(self.preamble) self.number_available_codes += 1 self.number_starting_codes -= 1 if self.no_wait: if self.number_available_codes & (self.number_available_codes - 1) == 0: if self.verbose: print "JobServer: hosts now available:", self.number_available_codes if self.number_starting_codes == 0: if self.verbose: print "JobServer: hosts in total:", self.number_available_codes if self.job_list: self._add_job(self.job_list.popleft(), code) else: self.idle_codes.append(code) def exec_(self, arg): while self.number_starting_codes > 0: sleep(0.1) self.waitall() for code in self.idle_codes: code.execute(arg) def submit_job(self, f, args=(), kwargs={}): if len(self.pool) == 0 and not self.job_list: if self.verbose: print "JobServer: submitting first job on queue" job = Job(f, args, kwargs) self.job_list.append(job) if self.idle_codes: self._add_job(self.job_list.popleft(), self.idle_codes.pop()) return job def wait(self): if self._finished_jobs: self.last_finished_job = self._finished_jobs.popleft() return True elif len(self.pool) == 0 and not self.job_list: if self.verbose: print "JobServer: no more jobs on queue or running" return False else: while len(self.pool) == 0 and self.job_list: if self.number_available_codes > 0: raise Exception("JobServer: this should not happen") if self.number_starting_codes == 0: raise Exception("JobServer: no codes available") self.pool.wait() self.last_finished_job = self._finished_jobs.popleft() return True def waitall(self): while self.wait(): pass @property def finished_jobs(self): while self._finished_jobs: yield self._finished_jobs.popleft() def _finalize_job(self, request, job, code): try: job.result = request.result() job.err = None except Exception as ex: job.result = None job.err = ex if job.err and not isinstance(job.err, RemoteCodeException): del code self.number_available_codes -= 1 if self.retry_jobs and job.retries < self.max_retries: retry = Job(job.f, job.args, job.kwargs, job.retries + 1) self.job_list.append(retry) else: self.idle_codes.append(code) if self.job_list and self.idle_codes: self._add_job(self.job_list.popleft(), self.idle_codes.pop()) if not self.job_list: if self.verbose: print "JobServer: last job dispatched" self._finished_jobs.append(job) def _add_job(self, job, code): job.request = code.async_func(job.f, *job.args, **job.kwargs) self.pool.add_request(job.request, self._finalize_job, [job, code]) def __del__(self): if not self.no_hosts(): self.waitall() if self.job_list: warnings.warn( "JobServer: Warning: shutting down with unfinished jobs") for code in self.idle_codes: code.stop() if self.number_starting_codes > 0: warnings.warn( "JobServer: Warning: some hosts startup threads possibly blocking" )
P_binary = 41.08 | units.day # binary orbit period a_binary = 0.2243 # units.AU amin = 2. amax = 10. results = [] N = 40 M = 40 aset = (a_binary * (amin + j * (amax - amin) / (M + 1)) for j in range(M + 1)) eset = [j * 0.5 / N for j in range(N + 1)] current_a = 0 pool = AsyncRequestsPool() def finalize_job(request, i_ecc, a, code, host): print "done with", eset[i_ecc], a result, err = request.result() print result results.append((eset[i_ecc], a, result)) if result[0] == "stable" or i_ecc == 0: # if i_ecc==0: try: a = aset.next() except: a = None if a is not None:
class JobServer(object): def __init__(self,hosts=[],channel_type="mpi",preamble=None, retry_jobs=True, no_wait=True,verbose=True,max_retries=2, use_threading=False): self.hosts=[] self.job_list=deque() self.idle_codes=[] self.retry_jobs=retry_jobs self.max_retries=max_retries self._finished_jobs=deque() self.preamble=preamble self.pool=AsyncRequestsPool() self.number_available_codes=0 self.number_starting_codes=0 self.no_wait=no_wait self.last_finished_job=None self.use_threading=use_threading self.verbose=verbose if self.verbose: print "AMUSE JobServer launching" self.add_hosts(hosts=hosts,channel_type=channel_type) def no_hosts(self): if self.number_available_codes==0 and self.number_starting_codes==0: return True return False def add_hosts(self,hosts=[],channel_type="mpi"): self.hosts.append(hosts) if self.verbose: print "JobServer: connecting %i hosts"%len(hosts) if not self.use_threading: for host in hosts: self.number_starting_codes+=1 self._startup( channel_type=channel_type,hostname=host,label=host, copy_worker_code=True,redirection="none" ) else: threads=[] for host in hosts: kwargs=dict( channel_type=channel_type,hostname=host,label=host, copy_worker_code=True,redirection="none" ) threads.append( threading.Thread(target=self._startup,kwargs=kwargs) ) for thread in threads: self.number_starting_codes+=1 thread.daemon=True thread.start() if not self.no_wait: if self.verbose: print "... waiting" for thread in threads: thread.join() else: if self.verbose: print "... waiting for first available host" while self.number_available_codes==0 and self.number_starting_codes>0: sleep(0.1) if self.no_wait: if self.verbose: print "JobServer: launched" else: if self.verbose: print "JobServer: launched with", len(self.idle_codes),"hosts" def _startup(self, *args,**kwargs): try: code=RemoteCodeInterface(*args,**kwargs) except Exception as ex: self.number_starting_codes-=1 print "JobServer: startup failed on", kwargs['hostname'] or "default" print ex else: if self.preamble is not None: code.execute(self.preamble) self.number_available_codes+=1 self.number_starting_codes-=1 if self.no_wait: if self.number_available_codes & (self.number_available_codes-1) ==0: if self.verbose: print "JobServer: hosts now available:",self.number_available_codes if self.number_starting_codes==0: if self.verbose: print "JobServer: hosts in total:", self.number_available_codes if self.job_list: self._add_job(self.job_list.popleft(), code) else: self.idle_codes.append(code) def exec_(self,arg): while self.number_starting_codes>0: sleep(0.1) self.waitall() for code in self.idle_codes: code.execute(arg) def submit_job(self,f,args=(),kwargs={}): if len(self.pool)==0 and not self.job_list: if self.verbose: print "JobServer: submitting first job on queue" job=Job(f,args,kwargs) self.job_list.append( job) if self.idle_codes: self._add_job(self.job_list.popleft(), self.idle_codes.pop()) return job def wait(self): if self._finished_jobs: self.last_finished_job=self._finished_jobs.popleft() return True elif len(self.pool)==0 and not self.job_list: if self.verbose: print "JobServer: no more jobs on queue or running" return False else: while len(self.pool)==0 and self.job_list: if self.number_available_codes>0: raise Exception("JobServer: this should not happen") if self.number_starting_codes==0: raise Exception("JobServer: no codes available") self.pool.wait() self.last_finished_job=self._finished_jobs.popleft() return True def waitall(self): while self.wait(): pass @property def finished_jobs(self): while self._finished_jobs: yield self._finished_jobs.popleft() def _finalize_job(self,request,job,code): try: job.result=request.result() job.err=None except Exception as ex: job.result=None job.err=ex if job.err and not isinstance(job.err,RemoteCodeException): del code self.number_available_codes-=1 if self.retry_jobs and job.retries<self.max_retries: retry=Job(job.f,job.args,job.kwargs,job.retries+1) self.job_list.append(retry) else: self.idle_codes.append(code) if self.job_list and self.idle_codes: self._add_job( self.job_list.popleft(), self.idle_codes.pop()) if not self.job_list: if self.verbose: print "JobServer: last job dispatched" self._finished_jobs.append(job) def _add_job(self,job,code): job.request=code.async_func(job.f,*job.args,**job.kwargs) self.pool.add_request(job.request,self._finalize_job, [job,code]) def __del__(self): if not self.no_hosts(): self.waitall() if self.job_list: warnings.warn("JobServer: Warning: shutting down with unfinished jobs") for code in self.idle_codes: code.stop() if self.number_starting_codes>0: warnings.warn("JobServer: Warning: some hosts startup threads possibly blocking")
P_binary=41.08| units.day # binary orbit period a_binary=0.2243 # units.AU amin=2. amax=10. results=[] N=40 M=40 aset= ( a_binary*(amin+j*(amax-amin)/(M+1)) for j in range(M+1) ) eset= [ j*0.5/N for j in range(N+1)] current_a=0 pool=AsyncRequestsPool() def finalize_job(request,i_ecc,a,code,host): print "done with", eset[i_ecc],a result,err=request.result() print result results.append((eset[i_ecc],a,result)) if result[0]=="stable" or i_ecc==0: # if i_ecc==0: try: a=aset.next() except: a=None if a is not None: i_ecc=N