class Impetus(object): """ Multi-threaded library for interfacing with the Impetus system. Hides threading considerations from the client. Determines callback methods through introspection if callbacks are not explicitly stated. Decorators are provided for the client to indicate methods which run on the remote nodes and local process methods which consume the results. Creates a single stream per instance. The client can created additional streams through the Queue's remote methods via the "impq" handler. """ statuses= ("forked", "processed") def __init__(self, address, authkey, taskdir= "tasks", id= None, **properties): """Creates a stream and retrieves the streams priority queue and data-store.""" self.id= id if id else str(uuid1()) self.ipaddress= getipaddress() self.address= address self.taskdir= path.join(taskdir, self.id) self.properties= properties self.impq= SyncManager(address= self.address, authkey= authkey) self.impq.register("get_streams") self.impq.register("create_stream") self.impq.register("delete_stream") self.impq.register("get_store") self.impq.register("get_queue") self.impq.connect() self.jobs= [] self.impq.create_stream(id= self.id, ipaddress= self.ipaddress, **properties) self.store= self.impq.get_store(id= self.id) self.queue= self.impq.get_queue(id= self.id) self.alive= True self._current_thread= None self._lock= Lock() self.threads= [] self.errors= {} self.ready= {} self._progress= {} try: makedirs(self.taskdir) except: pass def __del__(self): """Deletes the stream that was created during initialization.""" self.impq.delete_stream(self.id) @staticmethod def node(target): """ All methods that are to run on remote nodes must be staticmethods as the context of which the methods was defined can not be serialized. """ return target @staticmethod def startup(target): """ Sets up the startup method for the object to run as a thread. """ def _process(self): target(self) global _declaration_order _process.order= _declaration_order _declaration_order+= 1 return _process @staticmethod def shutdown(target): """ Sets up the shutdown method to be excuted after all threads have been terminated. The ready and errors parameters will contain a dict of file-handles pointing to the results files (ie, ../tasks/<task_id>/<method>.ok, .err> for each @process method. """ def _shutdown(self): target(self, self.ready, self.errors, self._progress) global _declaration_order _shutdown.order= _declaration_order return _shutdown @staticmethod def process(target): """ Sets up method to run as a thread. The method will be called with a list of currently available jobs that are either in a ready or error state. The thread will die when it has finished processing all the jobs the previous @process method forked and when the previous @process method has terminatted. Each thread will be regulated so that all threads have an eventual chance of executing. Order of execution is not guarenteed and thread scheudling is handled by the operating system. """ def _process(self): current_thread= currentThread() if current_thread.name == 'MainThread': return previous_thread= current_thread.previous_thread while self.alive: self._thread_regulator(current_thread, previous_thread) with self._lock: jobs= filter(lambda job: job.get("callback") == current_thread.name, self.store.values()) ready= filter(lambda job: job.get("status") == "ready", jobs) errors= filter(lambda job: job.get("status") == "error", jobs) for job in ready: self.ready[current_thread.name].write(encode(compress(jdumps(job, cls= JobEncoder))) + "\n") self.store.pop(job.get("id")) for job in errors: self.errors[current_thread.name].write(encode(compress(jdumps(job, cls= JobEncoder))) + "\n") self.store.pop(job.get("id")) if len(ready) or len(errors): target(self, ready, errors) self._thread_progress(current_thread.name, "processed", len(ready) + len(errors)) self._show_progress(current_thread) if len(self.store) == 0 and previous_thread != None and previous_thread.is_alive() == False: print "%s %s completed" % (datetime.utcnow(), current_thread.name) stdout.flush() self.alive= False sleep(0.01) global _declaration_order _process.order= _declaration_order _declaration_order+= 1 return _process def fork(self, target, args, callback= None, priority= None, job_id= None, **properties): """ Turns the target method to be forked into byte-code and creates a Job. The Job is initialized to the starting state and placed the the streams priorty queue for execution. """ if self.properties.get('mss'): stall_time= 1 while len(self.store) > int(self.properties.get('mss')): print "throttling size %s exceeds mss %s" % (len(self.store), self.properties.get('mss')) sleep(stall_time) stall_time+= 1 if stall_time >= 10: break current_thread= currentThread() job= Job( client= {"id": self.id, "ipaddress": self.ipaddress}, name= target.func_name, code= encode(compress(mdumps(target.func_code))), args= args, callback= callback if callback else current_thread.next_thread.name, result= None, transport= None, **properties ) if priority: setattr(job, "priority", priority) self.store.update([(job.get("id"), job)]) self.queue.put([(job.get("priority"), job.get("id"))]) #print "forked", len(self.store) self.jobs.append(job.get("id")) self._thread_progress(current_thread.name, "forked", 1) return job.get("id") def _thread_progress(self, name, status, count): """ Keeps track of how many jobs the current thread has forked/processed. """ with self._lock: progress= self._progress.get(name, dict([(s, 0) for s in self.statuses])) progress.update([(status, progress.get(status, 0) + count)]) self._progress.update([(name, progress)]) def _show_progress(self, current_thread): """Displays the current threads progress to stdout.""" msg= [] with self._lock: for thread in self.threads: progress= self._progress.get(thread.name, dict([(s, 0) for s in self.statuses])) msg.append("%s %s/%s -> " % (thread.name, progress.get("forked"), progress.get("processed"))) print "thread: %s via %s" % (''.join(msg)[:-4], current_thread.name) def _thread_regulator(self, current_thread, previous_thread): """ Regulates the current thread so all threads have an eventual chance to run. Thread scheduling is handled by the operating-system. If the operating-system repeatively schedules the same thread than that thread is immediately put to sleep so the operating-system can schedule a new thread. """ stall_time= 1 while self._current_thread == current_thread: #print "stalling:", current_thread.name, stall_time sleep(stall_time) stall_time+= 1 if stall_time >= 10: break if current_thread.name == self.threads[-1].name and previous_thread != None and previous_thread.is_alive() == False: with self._lock: self._current_thread= self.threads[0] with self._lock: #print "setting current thread", current_thread.name self._current_thread= current_thread def _create_thread(self, name, method): """ Creates thread for the @process method as well as error/ready file handlers for which all jobs in an error/ready state are written to. All threads are maintained in an internal thread list. """ thread= Thread(target= method, name= name, args= (self, )) self.errors[name]= open(path.join(self.taskdir, '.'.join((name, "err"))), 'ab+') self.ready[name]= open(path.join(self.taskdir, '.'.join((name, "ok"))), 'ab+') return thread def _link_threads(self, threads): """ Creates previous/next properties for each thread based on the threads declaration order. """ for i in range(len(threads)): setattr(threads[i], "previous_thread", threads[i-1] if i > 0 else None) setattr(threads[i], "next_thread", threads[i+1] if i < len(threads)-1 else None) return threads[0] def _start_threads(self, threads): """Starts all threads based on their delcaration order.""" [thread.start() for thread in threads] [thread.join() for thread in threads] def run(self): self.threads= [self._create_thread(name, method) for (name, method) in sorted(filter(lambda (name, method): type(method) == FunctionType and method.__name__ == "_process", self.__class__.__dict__.items()), key= lambda (name, method): method.order)]