class TaskControl: def __init__(self, cls_worker, count, *args, **kwargs): self.queue = JoinableQueue() self.stopped = Event() self.count_processed = Value('i', 0) self.processes = [cls_worker(self, *args) for _ in range(count)] map(Process.start, self.processes) def is_active(self): return not self.stopped.is_set() def is_alive(self): alive = filter(bool, map(Process.is_alive, self.processes)) print '---- %d child processes are still alive' % len(alive) return alive def stop(self): self.stopped.set() self.queue.close() print '-- waiting for processes to finish' map(Process.join, self.processes) self.queue.cancel_join_thread() def send_chunk(self, items): map(self.queue.put, items) print '--- waiting for queue to complete' while self.get_stats()[1] and self.is_alive(): time.sleep(1) def get(self): while self.is_active(): try: yield self.queue.get(timeout=1) except Queue.Empty: pass def tick(self): self.queue.task_done() self.count_processed.value += 1 if not self.count_processed.value % 20: print '%d items processed' % self.count_processed.value time.sleep(0.5) def get_stats(self): stats = self.count_processed.value, self.queue.qsize() print '--- %d items processed, %d queued' % stats return stats
class multiprocessing_mapping(object): def __init__(self, num_workers=4, transform=read_img) -> None: super().__init__() self.num_workers = num_workers self.transform = transform self.data_queue = Queue() self.path_queue = JoinableQueue() self.path_queue.cancel_join_thread() self.workers = [ multiprocessing.Process(target=self.transform, args=(self.path_queue, self.data_queue)) for _ in range(self.num_workers) ] for w in self.workers: w.daemon = True # ensure that the worker exits on process exit w.start() def __call__(self, img_path_list): for i in img_path_list: self.path_queue.put(i) self.path_queue.join() return [self.data_queue.get() for _ in range(len(img_path_list))]
class ParallelAnalyzer(BaseAnalyzer): ''' Parallel analyzer which uses the `multiprocessing` module. ''' def __init__(self, storage, script_list, script_hashes, min_script_needs, apks_or_paths, concurrency = None): ''' See :py:method`.BaseAnalyzer.__init__` for details on the first attributes. Other Parameters ---------------- concurrency : int, optional (default is number of cpu cores) Number of workers to spawn. ''' super(ParallelAnalyzer, self).__init__(storage, script_list, script_hashes, min_script_needs, apks_or_paths) # parallelization parameters if concurrency is None: concurrency = cpu_count() self.__concurrency = concurrency log.info("concurrency: %s", self.concurrency) log.info("Using processes") # parallel stuff, concerning processes self.__work_queue = Queue() self.__work_queue.cancel_join_thread() self.__workers = [] self.__analyzed_apks = Queue() def get_analyzed_apks(self): return self.__analyzed_apks def set_analyzed_apks(self, value): self.__analyzed_apks = value def del_analyzed_apks(self): del self.__analyzed_apks def get_work_queue(self): return self.__work_queue def get_concurrency(self): return self.__concurrency def get_workers(self): return self.__workers def set_workers(self, value): self.__workers = value def del_workers(self): del self.__workers analyzed_apks = property(get_analyzed_apks, set_analyzed_apks, del_analyzed_apks, "Queue<FastAPK> : Yet analyzed APKs") concurrency = property(get_concurrency, None, None, "int : Number of workers to spawn.") workers = property(get_workers, set_workers, del_workers, "list<Worker> : List of workers.") work_queue = property(get_work_queue, None, None, "Queue<str> : Queue with paths to apks which shall be analyzed.") def _analyze(self): ''' See doc of :py:method:BaseAnalyzer.analyze`. ''' try: work_queue = self.work_queue # create worker pool log.debug("starting %s workers ...", self.concurrency) for _ in range(self.concurrency): p = Worker(self.script_list, self.script_hashes, self.min_script_needs, work_queue, self.storage, self.cnt_analyzed_apks, self.analyzed_apks, self.storage_results) self.workers.append(p) p.daemon = True # start workers for p in self.workers: p.start() # queue has size limit -> start workers first then enqueue items log.info("Loading apk paths into work queue ...") for apk_stuff in AnalyzeUtil.apk_gen(self.apks_or_paths): # task is apk with all scripts work_queue.put(apk_stuff) for _ in range(self.concurrency): # signal end-of-work work_queue.put(STOP_SENTINEL) # progress view for cli av = AnalysisStatsView(self.cnt_analyzed_apks, self._cnt_apks, self.analyzed_apks) av.daemon = True av.start() # block until workers finished work_queue.join() av.terminate() log.debug("joined on work queue ...") return self.cnt_analyzed_apks.value # try hot shutdown first except KeyboardInterrupt: log.warn("Hot shutdown ... ") try: log.warn("clearing work queue ... ") Util.clear_queue(work_queue) log.warn("cleared work queue ... ") for _ in range(self.concurrency): # signal end-of-work work_queue.put(STOP_SENTINEL) for worker in self.workers: worker.join() log.warn("waited for all workers ... ") return self.cnt_analyzed_apks.value # if user really wants make a cold shutdown -> kill processes except KeyboardInterrupt: log.warn("Cold shutdown ... ") log.warn("Hard shutdown wanted! Killing all workers!") # kill processes via SIGINT -> send CTRL-C for w in self.workers: try: os.kill(w.pid, signal.SIGINT) except: pass return self.cnt_analyzed_apks.value
class ParallelAnalyzer(BaseAnalyzer): ''' Parallel analyzer which uses the `multiprocessing` module. ''' def __init__(self, storage, script_list, script_hashes, min_script_needs, apks_or_paths, concurrency=None): ''' See :py:method`.BaseAnalyzer.__init__` for details on the first attributes. Other Parameters ---------------- concurrency : int, optional (default is number of cpu cores) Number of workers to spawn. ''' super(ParallelAnalyzer, self).__init__(storage, script_list, script_hashes, min_script_needs, apks_or_paths) # parallelization parameters if concurrency is None: concurrency = cpu_count() self.__concurrency = concurrency log.info("concurrency: %s", self.concurrency) log.info("Using processes") # parallel stuff, concerning processes self.__work_queue = Queue() self.__work_queue.cancel_join_thread() self.__workers = [] self.__analyzed_apks = Queue() def get_analyzed_apks(self): return self.__analyzed_apks def set_analyzed_apks(self, value): self.__analyzed_apks = value def del_analyzed_apks(self): del self.__analyzed_apks def get_work_queue(self): return self.__work_queue def get_concurrency(self): return self.__concurrency def get_workers(self): return self.__workers def set_workers(self, value): self.__workers = value def del_workers(self): del self.__workers analyzed_apks = property(get_analyzed_apks, set_analyzed_apks, del_analyzed_apks, "Queue<FastAPK> : Yet analyzed APKs") concurrency = property(get_concurrency, None, None, "int : Number of workers to spawn.") workers = property(get_workers, set_workers, del_workers, "list<Worker> : List of workers.") work_queue = property( get_work_queue, None, None, "Queue<str> : Queue with paths to apks which shall be analyzed.") def _analyze(self): ''' See doc of :py:method:BaseAnalyzer.analyze`. ''' try: work_queue = self.work_queue # create worker pool log.debug("starting %s workers ...", self.concurrency) for _ in range(self.concurrency): p = Worker(self.script_list, self.script_hashes, self.min_script_needs, work_queue, self.storage, self.cnt_analyzed_apks, self.analyzed_apks, self.storage_results) self.workers.append(p) p.daemon = True # start workers for p in self.workers: p.start() # queue has size limit -> start workers first then enqueue items log.info("Loading apk paths into work queue ...") for apk_stuff in AnalyzeUtil.apk_gen(self.apks_or_paths): # task is apk with all scripts work_queue.put(apk_stuff) for _ in range(self.concurrency): # signal end-of-work work_queue.put(STOP_SENTINEL) # progress view for cli av = AnalysisStatsView(self.cnt_analyzed_apks, self._cnt_apks, self.analyzed_apks) av.daemon = True av.start() # block until workers finished work_queue.join() av.terminate() log.debug("joined on work queue ...") return self.cnt_analyzed_apks.value # try hot shutdown first except KeyboardInterrupt: log.warn("Hot shutdown ... ") try: log.warn("clearing work queue ... ") Util.clear_queue(work_queue) log.warn("cleared work queue ... ") for _ in range(self.concurrency): # signal end-of-work work_queue.put(STOP_SENTINEL) for worker in self.workers: worker.join() log.warn("waited for all workers ... ") return self.cnt_analyzed_apks.value # if user really wants make a cold shutdown -> kill processes except KeyboardInterrupt: log.warn("Cold shutdown ... ") log.warn("Hard shutdown wanted! Killing all workers!") # kill processes via SIGINT -> send CTRL-C for w in self.workers: try: os.kill(w.pid, signal.SIGINT) except: pass return self.cnt_analyzed_apks.value