Esempio n. 1
0
    def __init__(self,
                 hosts=[],
                 channel_type="mpi",
                 preamble=None,
                 retry_jobs=True,
                 no_wait=True,
                 verbose=True,
                 max_retries=2,
                 use_threading=False):
        self.hosts = []
        self.job_list = deque()
        self.idle_codes = []
        self.retry_jobs = retry_jobs
        self.max_retries = max_retries
        self._finished_jobs = deque()
        self.preamble = preamble
        self.pool = AsyncRequestsPool()
        self.number_available_codes = 0
        self.number_starting_codes = 0
        self.no_wait = no_wait
        self.last_finished_job = None
        self.use_threading = use_threading
        self.verbose = verbose
        if self.verbose:
            print "AMUSE JobServer launching"

        self.add_hosts(hosts=hosts, channel_type=channel_type)
Esempio n. 2
0
 def test22(self):
     
     pool = AsyncRequestsPool()
     
     x = ForTestingInterface()
     y = ForTestingInterface()
     request1 = x.sleep.async(0.5)
     request2 = y.sleep.async(1.5)
     finished_requests = []
     
     def handle_result(request, index):
         self.assertTrue(request.is_result_available())
         finished_requests.append(index)
         
     pool.add_request(request1, handle_result, [1])
     pool.add_request(request2, handle_result, [2])
     
     pool.wait()
     self.assertEquals(len(finished_requests), 1)
     self.assertEquals(len(pool), 1)
     
     pool.wait()
     self.assertEquals(len(finished_requests), 2)
     self.assertEquals(len(pool), 0)
     
     self.assertTrue(request1.is_result_available())
     self.assertTrue(request2.is_result_available())
     
     self.assertEquals(request1.result(), 0)
     self.assertEquals(request2.result(), 0)
     
     y.stop()
     x.stop()
Esempio n. 3
0
 def test22(self):
     
     pool = AsyncRequestsPool()
     
     x = ForTestingInterface()
     y = ForTestingInterface()
     request1 = x.sleep.async(0.5)
     request2 = y.sleep.async(1.5)
     finished_requests = []
     
     def handle_result(request, index):
         self.assertTrue(request.is_result_available())
         finished_requests.append(index)
         
     pool.add_request(request1, handle_result, [1])
     pool.add_request(request2, handle_result, [2])
     
     pool.wait()
     self.assertEquals(len(finished_requests), 1)
     self.assertEquals(len(pool), 1)
     
     pool.wait()
     self.assertEquals(len(finished_requests), 2)
     self.assertEquals(len(pool), 0)
     
     self.assertTrue(request1.is_result_available())
     self.assertTrue(request2.is_result_available())
     
     self.assertEquals(request1.result(), 0)
     self.assertEquals(request2.result(), 0)
     
     y.stop()
     x.stop()
Esempio n. 4
0
def step_les_models(model_time, work_queue, offset=les_spinup):
    global errorFlag
    les_wall_times = []
    if not any(les_models):
        return les_wall_times
    if les_queue_threads >= len(
            les_models):  # Step all dales models in parallel
        if async_evolve:  # evolve all dales models with asynchronous Amuse calls
            reqs = []
            pool = AsyncRequestsPool()
            for les in les_models:
                req = les.evolve_model. async (model_time + (offset | units.s),
                                               exactEnd=True)
                reqs.append(req)
                pool.add_request(req)
            # now while the dales threads are working, sync the netcdf to disk
            spio.sync_root()
            # wait for all threads
            pool.waitall()
            try:
                les_wall_times = [r.result().value_in(units.s) for r in reqs]
                log.info("async step_les_models() done. Elapsed times:" +
                         str(['%5.1f' % t for t in les_wall_times]))
            except Exception as e:
                log.error("Exception caught while gathering results: %s" %
                          e.message)

        else:  # evolve all dales models using python threads
            threads = []
            for les in les_models:
                t = threading.Thread(target=step_les,
                                     args=(les, model_time, offset),
                                     name=str(les.grid_index))
                # t.setDaemon(True)
                threads.append(t)
                t.start()
            # now while the dales threads are working, sync the netcdf to disk
            spio.sync_root()
            # wait for all threads
            for t in threads:
                # log.info("Waiting to join thread %s..." % t.name)
                t.join()
            # log.info("joined thread %s" % t.name)
    elif les_queue_threads > 1:
        for les in les_models:
            work_queue.put((les, model_time))  # enqueue all dales instances
        # now while the dales threads are working, sync the netcdf to disk
        spio.sync_root()
        work_queue.join()  # wait for all dales work to be completed
        if errorFlag:
            log.info("One thread failed - exiting ...")
            # stop_worker_threads(work_queue)  #  signal worker threads to quit - now an atexit function, should not
            # need it here
            finalize()
            sys.exit(1)
    else:  # sequential version
        for les in les_models:
            step_les(les, model_time, offset)
    return les_wall_times
    def test29(self):

        pool = AsyncRequestsPool()

        x = ForTestingInterface()
        y = ForTestingInterface()
        sequenced_requests_indices = []

        def next_request(index):
            if index < 4:
                sequenced_requests_indices.append(index)
                return x.sleep. async (0.5)
            else:
                return None

        request1 = ASyncRequestSequence(next_request)
        request2 = y.sleep. async (1.0)
        finished_requests = []

        def handle_result(request, index):
            self.assertTrue(request.is_result_available())
            self.assertTrue(request.is_finished)
            finished_requests.append(index)

        pool.add_request(request1, handle_result, [1])
        pool.add_request(request2, handle_result, [2])

        pool.wait()
        print finished_requests, sequenced_requests_indices
        self.assertEquals(len(finished_requests), 1)
        self.assertEquals(len(pool), 1)
        self.assertEquals(finished_requests, [2])
        self.assertTrue(len(sequenced_requests_indices) > 0)

        pool.wait()
        self.assertEquals(len(finished_requests), 2)
        self.assertEquals(len(pool), 0)
        x.sleep(0.1)
        self.assertEquals(sequenced_requests_indices, [0, 1, 2, 3])

        self.assertTrue(request1.is_result_available())
        self.assertTrue(request2.is_result_available())

        self.assertEquals(request1.result(), [0, 0, 0, 0])
        self.assertEquals(request2.result(), 0)

        y.stop()
        x.stop()
Esempio n. 6
0
 def test29(self):
     
     pool = AsyncRequestsPool()
     
     x = ForTestingInterface()
     y = ForTestingInterface()
     sequenced_requests_indices = []
     def next_request(index):
         if index < 4:
             sequenced_requests_indices.append(index)
             return x.sleep.async(0.5)
         else:
             return None
             
     request1 = ASyncRequestSequence(next_request)
     request2 = y.sleep.async(1.0)
     finished_requests = []
     
     def handle_result(request, index):
         self.assertTrue(request.is_result_available())
         self.assertTrue(request.is_finished)
         finished_requests.append(index)
         
     pool.add_request(request1, handle_result, [1])
     pool.add_request(request2, handle_result, [2])
     
     pool.wait()
     print finished_requests, sequenced_requests_indices
     self.assertEquals(len(finished_requests), 1)
     self.assertEquals(len(pool), 1)
     self.assertEquals(finished_requests, [2])
     self.assertTrue(len(sequenced_requests_indices)> 0)
     
     pool.wait()
     self.assertEquals(len(finished_requests), 2)
     self.assertEquals(len(pool), 0)
     x.sleep(0.1)
     self.assertEquals(sequenced_requests_indices, [0,1,2,3])
     
     self.assertTrue(request1.is_result_available())
     self.assertTrue(request2.is_result_available())
     
     self.assertEquals(request1.result(), [0,0,0,0])
     self.assertEquals(request2.result(), 0)
     
     y.stop()
     x.stop()
Esempio n. 7
0
    def __init__(self,hosts=[],channel_type="mpi",preamble=None, retry_jobs=True, 
                   no_wait=True,verbose=True,max_retries=2, use_threading=False):
      self.hosts=[]
      self.job_list=deque()
      self.idle_codes=[]
      self.retry_jobs=retry_jobs
      self.max_retries=max_retries
      self._finished_jobs=deque()
      self.preamble=preamble
      self.pool=AsyncRequestsPool()
      self.number_available_codes=0
      self.number_starting_codes=0
      self.no_wait=no_wait
      self.last_finished_job=None
      self.use_threading=use_threading
      self.verbose=verbose
      if self.verbose:
          print "AMUSE JobServer launching"

      self.add_hosts(hosts=hosts,channel_type=channel_type)
Esempio n. 8
0
class JobServer(object):
    def __init__(self,
                 hosts=[],
                 channel_type="mpi",
                 preamble=None,
                 retry_jobs=True,
                 no_wait=True,
                 verbose=True,
                 max_retries=2,
                 use_threading=False):
        self.hosts = []
        self.job_list = deque()
        self.idle_codes = []
        self.retry_jobs = retry_jobs
        self.max_retries = max_retries
        self._finished_jobs = deque()
        self.preamble = preamble
        self.pool = AsyncRequestsPool()
        self.number_available_codes = 0
        self.number_starting_codes = 0
        self.no_wait = no_wait
        self.last_finished_job = None
        self.use_threading = use_threading
        self.verbose = verbose
        if self.verbose:
            print "AMUSE JobServer launching"

        self.add_hosts(hosts=hosts, channel_type=channel_type)

    def no_hosts(self):
        if self.number_available_codes == 0 and self.number_starting_codes == 0:
            return True
        return False

    def add_hosts(self, hosts=[], channel_type="mpi"):
        self.hosts.append(hosts)
        if self.verbose:
            print "JobServer: connecting %i hosts" % len(hosts)
        if not self.use_threading:
            for host in hosts:
                self.number_starting_codes += 1
                self._startup(channel_type=channel_type,
                              hostname=host,
                              label=host,
                              copy_worker_code=True,
                              redirection="none")
        else:
            threads = []
            for host in hosts:
                kwargs = dict(channel_type=channel_type,
                              hostname=host,
                              label=host,
                              copy_worker_code=True,
                              redirection="none")
                threads.append(
                    threading.Thread(target=self._startup, kwargs=kwargs))
            for thread in threads:
                self.number_starting_codes += 1
                thread.daemon = True
                thread.start()
            if not self.no_wait:
                if self.verbose:
                    print "... waiting"
                for thread in threads:
                    thread.join()
            else:
                if self.verbose:
                    print "... waiting for first available host"
                while self.number_available_codes == 0 and self.number_starting_codes > 0:
                    sleep(0.1)
        if self.no_wait:
            if self.verbose:
                print "JobServer: launched"
        else:
            if self.verbose:
                print "JobServer: launched with", len(self.idle_codes), "hosts"

    def _startup(self, *args, **kwargs):
        try:
            code = RemoteCodeInterface(*args, **kwargs)
        except Exception as ex:
            self.number_starting_codes -= 1
            print "JobServer: startup failed on", kwargs[
                'hostname'] or "default"
            print ex
        else:
            if self.preamble is not None:
                code.execute(self.preamble)

            self.number_available_codes += 1
            self.number_starting_codes -= 1
            if self.no_wait:
                if self.number_available_codes & (self.number_available_codes -
                                                  1) == 0:
                    if self.verbose:
                        print "JobServer: hosts now available:", self.number_available_codes
                if self.number_starting_codes == 0:
                    if self.verbose:
                        print "JobServer: hosts in total:", self.number_available_codes
            if self.job_list:
                self._add_job(self.job_list.popleft(), code)
            else:
                self.idle_codes.append(code)

    def exec_(self, arg):
        while self.number_starting_codes > 0:
            sleep(0.1)
        self.waitall()
        for code in self.idle_codes:
            code.execute(arg)

    def submit_job(self, f, args=(), kwargs={}):
        if len(self.pool) == 0 and not self.job_list:
            if self.verbose:
                print "JobServer: submitting first job on queue"
        job = Job(f, args, kwargs)
        self.job_list.append(job)
        if self.idle_codes:
            self._add_job(self.job_list.popleft(), self.idle_codes.pop())
        return job

    def wait(self):
        if self._finished_jobs:
            self.last_finished_job = self._finished_jobs.popleft()
            return True
        elif len(self.pool) == 0 and not self.job_list:
            if self.verbose:
                print "JobServer: no more jobs on queue or running"
            return False
        else:
            while len(self.pool) == 0 and self.job_list:
                if self.number_available_codes > 0:
                    raise Exception("JobServer: this should not happen")
                if self.number_starting_codes == 0:
                    raise Exception("JobServer: no codes available")
            self.pool.wait()
            self.last_finished_job = self._finished_jobs.popleft()
            return True

    def waitall(self):
        while self.wait():
            pass

    @property
    def finished_jobs(self):
        while self._finished_jobs:
            yield self._finished_jobs.popleft()

    def _finalize_job(self, request, job, code):
        try:
            job.result = request.result()
            job.err = None
        except Exception as ex:
            job.result = None
            job.err = ex
        if job.err and not isinstance(job.err, RemoteCodeException):
            del code
            self.number_available_codes -= 1
            if self.retry_jobs and job.retries < self.max_retries:
                retry = Job(job.f, job.args, job.kwargs, job.retries + 1)
                self.job_list.append(retry)
        else:
            self.idle_codes.append(code)
        if self.job_list and self.idle_codes:
            self._add_job(self.job_list.popleft(), self.idle_codes.pop())
            if not self.job_list:
                if self.verbose:
                    print "JobServer: last job dispatched"
        self._finished_jobs.append(job)

    def _add_job(self, job, code):
        job.request = code.async_func(job.f, *job.args, **job.kwargs)
        self.pool.add_request(job.request, self._finalize_job, [job, code])

    def __del__(self):
        if not self.no_hosts():
            self.waitall()
        if self.job_list:
            warnings.warn(
                "JobServer: Warning: shutting down with unfinished jobs")
        for code in self.idle_codes:
            code.stop()
        if self.number_starting_codes > 0:
            warnings.warn(
                "JobServer: Warning: some hosts startup threads possibly blocking"
            )
Esempio n. 9
0
P_binary = 41.08 | units.day  # binary orbit period
a_binary = 0.2243  # units.AU

amin = 2.
amax = 10.

results = []

N = 40
M = 40

aset = (a_binary * (amin + j * (amax - amin) / (M + 1)) for j in range(M + 1))
eset = [j * 0.5 / N for j in range(N + 1)]
current_a = 0

pool = AsyncRequestsPool()


def finalize_job(request, i_ecc, a, code, host):
    print "done with", eset[i_ecc], a
    result, err = request.result()
    print result
    results.append((eset[i_ecc], a, result))

    if result[0] == "stable" or i_ecc == 0:
        #  if i_ecc==0:
        try:
            a = aset.next()
        except:
            a = None
        if a is not None:
Esempio n. 10
0
class JobServer(object):
    def __init__(self,hosts=[],channel_type="mpi",preamble=None, retry_jobs=True, 
                   no_wait=True,verbose=True,max_retries=2, use_threading=False):
      self.hosts=[]
      self.job_list=deque()
      self.idle_codes=[]
      self.retry_jobs=retry_jobs
      self.max_retries=max_retries
      self._finished_jobs=deque()
      self.preamble=preamble
      self.pool=AsyncRequestsPool()
      self.number_available_codes=0
      self.number_starting_codes=0
      self.no_wait=no_wait
      self.last_finished_job=None
      self.use_threading=use_threading
      self.verbose=verbose
      if self.verbose:
          print "AMUSE JobServer launching"

      self.add_hosts(hosts=hosts,channel_type=channel_type)

    def no_hosts(self):
      if self.number_available_codes==0 and self.number_starting_codes==0:
        return True
      return False
   
    def add_hosts(self,hosts=[],channel_type="mpi"):
      self.hosts.append(hosts)
      if self.verbose:
        print "JobServer: connecting %i hosts"%len(hosts)
      if not self.use_threading:
        for host in hosts:
          self.number_starting_codes+=1
          self._startup( channel_type=channel_type,hostname=host,label=host,
                           copy_worker_code=True,redirection="none" )
      else:  
        threads=[]
        for host in hosts:
          kwargs=dict( channel_type=channel_type,hostname=host,label=host,
                         copy_worker_code=True,redirection="none" )
          threads.append( threading.Thread(target=self._startup,kwargs=kwargs) )
        for thread in threads:
          self.number_starting_codes+=1
          thread.daemon=True
          thread.start()
        if not self.no_wait:  
          if self.verbose:
            print "... waiting"
          for thread in threads:
            thread.join()
        else:
          if self.verbose:
            print "... waiting for first available host"
          while self.number_available_codes==0 and self.number_starting_codes>0:
            sleep(0.1)
      if self.no_wait:
        if self.verbose:
          print "JobServer: launched"
      else:    
        if self.verbose:
          print "JobServer: launched with", len(self.idle_codes),"hosts"
    
    def _startup(self, *args,**kwargs):
      try: 
        code=RemoteCodeInterface(*args,**kwargs) 
      except Exception as ex:
        self.number_starting_codes-=1
        print "JobServer: startup failed on", kwargs['hostname'] or "default"
        print ex
      else:
        if self.preamble is not None:
          code.execute(self.preamble)
           
        self.number_available_codes+=1
        self.number_starting_codes-=1
        if self.no_wait:
          if self.number_available_codes & (self.number_available_codes-1) ==0:
            if self.verbose:
              print "JobServer: hosts now available:",self.number_available_codes
          if self.number_starting_codes==0:
            if self.verbose:
              print "JobServer: hosts in total:", self.number_available_codes
        if self.job_list: 
          self._add_job(self.job_list.popleft(), code)
        else:
          self.idle_codes.append(code)   
  
    def exec_(self,arg):
      while self.number_starting_codes>0:
        sleep(0.1)
      self.waitall()  
      for code in self.idle_codes:
        code.execute(arg)
    
    def submit_job(self,f,args=(),kwargs={}):
      if len(self.pool)==0 and not self.job_list:
        if self.verbose:
          print "JobServer: submitting first job on queue"
      job=Job(f,args,kwargs)
      self.job_list.append( job)
      if self.idle_codes: 
          self._add_job(self.job_list.popleft(), self.idle_codes.pop())        
      return job

    def wait(self):
      if self._finished_jobs:
        self.last_finished_job=self._finished_jobs.popleft()
        return True
      elif len(self.pool)==0 and not self.job_list:
        if self.verbose:
          print "JobServer: no more jobs on queue or running"        
        return False
      else:
        while len(self.pool)==0 and self.job_list:
          if self.number_available_codes>0:
            raise Exception("JobServer: this should not happen")    
          if self.number_starting_codes==0:
            raise Exception("JobServer: no codes available")
        self.pool.wait()
        self.last_finished_job=self._finished_jobs.popleft()
        return True

    def waitall(self):
      while self.wait():
        pass
    
    @property
    def finished_jobs(self):
       while self._finished_jobs:
         yield self._finished_jobs.popleft()
    
    def _finalize_job(self,request,job,code):
      try:
        job.result=request.result()
        job.err=None
      except Exception as ex:
        job.result=None
        job.err=ex
      if job.err and not isinstance(job.err,RemoteCodeException):
        del code
        self.number_available_codes-=1
        if self.retry_jobs and job.retries<self.max_retries:
          retry=Job(job.f,job.args,job.kwargs,job.retries+1)
          self.job_list.append(retry)
      else:
        self.idle_codes.append(code)
      if self.job_list and self.idle_codes:
        self._add_job( self.job_list.popleft(), self.idle_codes.pop())
        if not self.job_list:
          if self.verbose:
            print "JobServer: last job dispatched"
      self._finished_jobs.append(job)
    
    def _add_job(self,job,code):
      job.request=code.async_func(job.f,*job.args,**job.kwargs)
      self.pool.add_request(job.request,self._finalize_job, [job,code])
    
    def __del__(self):
      if not self.no_hosts():
        self.waitall()
      if self.job_list:
        warnings.warn("JobServer: Warning: shutting down with unfinished jobs")
      for code in self.idle_codes:
        code.stop()
      if self.number_starting_codes>0:
        warnings.warn("JobServer: Warning: some hosts startup threads possibly blocking")
Esempio n. 11
0
P_binary=41.08| units.day  # binary orbit period
a_binary=0.2243 # units.AU

amin=2.
amax=10.

results=[]

N=40
M=40

aset= ( a_binary*(amin+j*(amax-amin)/(M+1)) for j in range(M+1) )
eset= [ j*0.5/N for j in range(N+1)]
current_a=0

pool=AsyncRequestsPool()

def finalize_job(request,i_ecc,a,code,host):
  print "done with", eset[i_ecc],a
  result,err=request.result()
  print result
  results.append((eset[i_ecc],a,result))

  if result[0]=="stable" or i_ecc==0:
#  if i_ecc==0:
    try:
      a=aset.next()
    except:
      a=None
    if a is not None:  
      i_ecc=N