コード例 #1
0
ファイル: impetus.py プロジェクト: richardjmarini/Impetus
class Impetus(object):
   """
   Multi-threaded library for interfacing with the Impetus system. 
   Hides threading considerations from the client.  Determines callback
   methods through introspection if callbacks are not explicitly stated. 
   Decorators are provided for the client to indicate methods which run on 
   the remote nodes and local process methods which consume the results. 
   Creates a single stream per instance.  The client can created additional 
   streams through the Queue's remote methods via the "impq" handler. 
   """

   statuses= ("forked", "processed")

   def __init__(self, address, authkey, taskdir= "tasks", id= None, **properties):
      """Creates a stream and retrieves the streams priority queue and data-store."""

      self.id= id if id else str(uuid1())
      self.ipaddress= getipaddress()

      self.address= address
      self.taskdir= path.join(taskdir, self.id)
      self.properties= properties

      self.impq= SyncManager(address= self.address, authkey= authkey)
      self.impq.register("get_streams")
      self.impq.register("create_stream")
      self.impq.register("delete_stream")
      self.impq.register("get_store")
      self.impq.register("get_queue")
      self.impq.connect()

      self.jobs= []
      self.impq.create_stream(id= self.id, ipaddress= self.ipaddress, **properties)
      self.store= self.impq.get_store(id= self.id)
      self.queue= self.impq.get_queue(id= self.id)
      self.alive= True
      self._current_thread= None
      self._lock= Lock()
      self.threads= []
      self.errors= {}
      self.ready= {}
      self._progress= {}


      try:
         makedirs(self.taskdir)
      except:
         pass

   def __del__(self):
      """Deletes the stream that was created during initialization."""

      self.impq.delete_stream(self.id)

   @staticmethod
   def node(target):
      """
      All methods that are to run on remote nodes must be staticmethods
      as the context of which the methods was defined can not be serialized.
      """

      return target

   @staticmethod
   def startup(target):
      """
      Sets up the startup method for the object to run as a thread.
      """

      def _process(self):

         target(self)

      global _declaration_order
      _process.order= _declaration_order
      _declaration_order+= 1
      return _process

   @staticmethod
   def shutdown(target):
      """
      Sets up the shutdown method to be excuted 
      after all threads have been terminated.  The 
      ready and errors parameters will contain a dict 
      of file-handles pointing to the results files
      (ie, ../tasks/<task_id>/<method>.ok, .err>
      for each @process method.
      """
   
      def _shutdown(self):

         target(self, self.ready, self.errors, self._progress)

      global _declaration_order
      _shutdown.order= _declaration_order
      return _shutdown

   @staticmethod
   def process(target):
      """
      Sets up method to run as a thread. The method will be 
      called with a list of currently available jobs that are 
      either in a ready or error state. The thread will die
      when it has finished processing all the jobs the previous
      @process method forked and when the previous @process method
      has terminatted. Each thread will be regulated so that all 
      threads have an eventual chance of executing.  Order of execution
      is not guarenteed and thread scheudling is handled by the 
      operating system.
      """

      def _process(self):

         current_thread= currentThread()
         if current_thread.name == 'MainThread':
            return
         previous_thread= current_thread.previous_thread

         while self.alive:

            self._thread_regulator(current_thread, previous_thread)

            with self._lock:
               jobs= filter(lambda job: job.get("callback") == current_thread.name, self.store.values())
               ready= filter(lambda job: job.get("status") == "ready", jobs)
               errors= filter(lambda job: job.get("status") == "error", jobs)

               for job in ready:
                  self.ready[current_thread.name].write(encode(compress(jdumps(job, cls= JobEncoder))) + "\n")
                  self.store.pop(job.get("id"))

               for job in errors:
                  self.errors[current_thread.name].write(encode(compress(jdumps(job, cls= JobEncoder))) + "\n")
                  self.store.pop(job.get("id"))
        
            if len(ready) or len(errors):
               target(self, ready, errors)

            self._thread_progress(current_thread.name, "processed", len(ready) + len(errors))
            self._show_progress(current_thread)

            if len(self.store) == 0 and previous_thread != None and previous_thread.is_alive() == False:
               print "%s %s completed" % (datetime.utcnow(), current_thread.name)
               stdout.flush()
               self.alive= False

            sleep(0.01)
         
      global _declaration_order
      _process.order= _declaration_order
      _declaration_order+= 1
       
      return _process

   def fork(self, target, args, callback= None, priority= None, job_id= None, **properties):
      """
      Turns the target method to be forked into byte-code and creates a Job.  The Job
      is initialized to the starting state and placed the the streams priorty queue 
      for execution. 
      """
 
      if self.properties.get('mss'):

         stall_time= 1
         while len(self.store) > int(self.properties.get('mss')):
            print "throttling size %s exceeds mss %s" % (len(self.store), self.properties.get('mss'))
            sleep(stall_time)  
            stall_time+= 1
            if stall_time >= 10:
               break

      current_thread= currentThread()
      job= Job(
         client= {"id": self.id, "ipaddress": self.ipaddress},
         name= target.func_name,
         code= encode(compress(mdumps(target.func_code))),
         args= args,
         callback= callback if callback else current_thread.next_thread.name,
         result= None,
         transport= None,
         **properties
      )
      
      if priority:
         setattr(job, "priority", priority)

      self.store.update([(job.get("id"), job)])
      self.queue.put([(job.get("priority"), job.get("id"))])
  
      #print "forked", len(self.store)
      
      self.jobs.append(job.get("id"))
      
      self._thread_progress(current_thread.name, "forked", 1)
      
      return job.get("id")

   def _thread_progress(self, name, status, count):
      """
      Keeps track of how many jobs the current
      thread has forked/processed.
      """

      with self._lock:
 
         progress= self._progress.get(name, dict([(s, 0) for s in self.statuses]))
         progress.update([(status, progress.get(status, 0) + count)])
         self._progress.update([(name, progress)])

   def _show_progress(self, current_thread):
      """Displays the current threads progress to stdout."""
 
      msg= []
      with self._lock:
         for thread in self.threads:
            progress= self._progress.get(thread.name, dict([(s, 0) for s in self.statuses]))
            msg.append("%s %s/%s -> " % (thread.name, progress.get("forked"), progress.get("processed")))

      print "thread: %s via %s" % (''.join(msg)[:-4], current_thread.name)
         
   def _thread_regulator(self, current_thread, previous_thread):
      """
      Regulates the current thread so all threads have an eventual 
      chance to run.  Thread scheduling is handled by the operating-system. 
      If the operating-system repeatively schedules the same thread than
      that thread is immediately put to sleep so the operating-system
      can schedule a new thread.
      """

      stall_time= 1
      while self._current_thread == current_thread:
         #print "stalling:", current_thread.name, stall_time
         sleep(stall_time)
         stall_time+= 1
         if stall_time >= 10:
            break

         if current_thread.name == self.threads[-1].name and previous_thread != None and previous_thread.is_alive() == False:
            with self._lock:
               self._current_thread= self.threads[0]

      with self._lock:
         #print "setting current thread", current_thread.name
         self._current_thread= current_thread

   def _create_thread(self, name, method):
      """
      Creates thread for the @process method as well 
      as error/ready file handlers for which all jobs
      in an error/ready state are written to. All threads
      are maintained in an internal thread list.
      """

      thread= Thread(target= method, name= name, args= (self, ))
      self.errors[name]= open(path.join(self.taskdir, '.'.join((name, "err"))), 'ab+')
      self.ready[name]= open(path.join(self.taskdir, '.'.join((name, "ok"))), 'ab+')

      return thread
 
   def _link_threads(self, threads):
      """
      Creates previous/next properties for each thread based
      on the threads declaration order.
      """
 
      for i in range(len(threads)):
        setattr(threads[i], "previous_thread", threads[i-1] if i > 0 else None)
        setattr(threads[i], "next_thread", threads[i+1] if i < len(threads)-1 else None)
  
      return threads[0]

   def _start_threads(self, threads):
      """Starts all threads based on their delcaration order."""

      [thread.start() for thread in threads]
      [thread.join() for thread in threads]

   def run(self):

      self.threads= [self._create_thread(name, method) for (name, method) in sorted(filter(lambda (name, method): type(method) == FunctionType and method.__name__ == "_process", self.__class__.__dict__.items()), key= lambda (name, method): method.order)]