def main(): jobs = Queue() result = JoinableQueue() NUMBER_OF_PROCESSES = cpu_count() tasks = ["1", "2", "3", "4", "5"] for w in tasks: jobs.put(w) [ Process(target=work, args=(i, jobs, result)).start() for i in range(NUMBER_OF_PROCESSES) ] print('starting workers') for t in range(len(tasks)): r = result.get() time.sleep(0.5) print(r) result.task_done() for w in range(NUMBER_OF_PROCESSES): jobs.put(None) result.join() jobs.close() result.close()
def worker(node: str, worker_name: str, queue_calls: JoinableQueue, queue_error: Queue, queue_success: Queue): """ Consumes some data from queue_calls and works on it.""" connection = NodeCall(node) counter = 0 start = time.perf_counter() print("Started worker {0}!".format(worker_name)) for args in iter(queue_calls.get, sentinel): queue_calls.task_done() counter += 1 if not counter % 100: print(f"Worker {worker_name} done {counter} jobs.") # print("Worker {0} got {1} args.".format(worker_name, args)) result = connection.call_wrapper(*args) stop = time.perf_counter() if isinstance(result, dict) and result.get('error', ''): queue_error.put(result) else: queue_success.put({args[1]: stop - start}) start = stop print(f"Worker {worker_name} done {counter} jobs.") queue_calls.task_done() print(f"{queue_calls.qsize()} the approximate size of the queue.")
def consumer(in_queue: JoinableQueue, out_queue: JoinableQueue): while True: item = in_queue.get() sleep(0.5) s = str(item) out_queue.put(s) in_queue.task_done()
def producer(in_queue: JoinableQueue): while True: item = in_queue.get() sleep(0.5) n = int(item) print(n) in_queue.task_done()
class QueueTask: def __init__(self): self.queue = JoinableQueue() self.event = Event() atexit.register( self.queue.join ) process = Process(target=self.work) process.daemon = True process.start() def work(self): while True: func, args, wait_for = self.queue.get() for evt in wait_for: evt.wait() func(*args) self.event.set() self.queue.task_done() def enqueue(self, func, args=[], wait_for=[]): self.event.clear() self.queue.put( (func, args, wait_for) ) return self.event
class AlarmExecutor: def __init__(self): self.queue = JoinableQueue(10) self.running = False self.t = Thread(target=self._run, name="AlarmExecutor") def _run(self): while self.running: try: alarm = self.queue.get(block=True, timeout=1) alarm.execute() logging.debug("Alarm executed") self.queue.task_done() except Queue.Empty: continue def start(self): logging.debug("Starting alarm executor") self.running = True self.t.start() def stop(self): if self.running: logging.debug("Stoppping alarm executor") self.running = False self.t.join() else: logging.debug("Attempted to stop alarm executor when it is not running")
def _drain_and_join_queue(q: mp.JoinableQueue, join: bool = True) -> None: """ Drains a queue completely, such that it is joinable :param q: Queue to join :param join: Whether to join the queue or not """ # Do nothing when it's not set if q is None: return # Call task done up to the point where we get a ValueError. We need to do this when child processes already # started processing on some tasks and got terminated half-way. n = 0 try: while True: q.task_done() n += 1 except ValueError: pass try: while not q.empty() or n != 0: q.get(block=True, timeout=1.0) n -= 1 except (queue.Empty, EOFError): pass # Join if join: q.join()
def worker(q: JoinableQueue, i: int, output, print_lock: Lock, FLAGS: Tuple[Any]) -> None: """Retrieves files from the queue and annotates them.""" if FLAGS.in_memory: with open(FLAGS.alias_db, 'rb') as f: alias_db = pickle.load(f) with open(FLAGS.relation_db, 'rb') as f: relation_db = pickle.load(f) with open(FLAGS.wiki_db, 'rb') as f: wiki_db = pickle.load(f) else: alias_db = SqliteDict(FLAGS.alias_db, flag='r') relation_db = SqliteDict(FLAGS.relation_db, flag='r') wiki_db = SqliteDict(FLAGS.wiki_db, flag='r') annotator = Annotator(alias_db, relation_db, wiki_db, distance_cutoff=FLAGS.cutoff, match_aliases=FLAGS.match_aliases, unmatch=FLAGS.unmatch, prune_clusters=FLAGS.prune_clusters) while True: logger.debug('Worker %i taking a task from the queue', i) json_data = q.get() if json_data is None: break annotation = annotator.annotate(json_data) print_lock.acquire() output.write(json.dumps(annotation) + '\n') print_lock.release() q.task_done() logger.debug('Worker %i finished a task', i)
def calculate_set(num_processes): todo_queue = JoinableQueue() results_queue = JoinableQueue() # setup and launch workers # we'll make them daemon processes so they shut down automatically when this process exits, but # we'll also shut them down ourselves when we finish workers = [ Process(target=worker, args=(todo_queue, results_queue)) for i in xrange(num_processes) ] for individual in workers: individual.daemon = True individual.start() result = numpy.zeros([ny, nx]) for i in xrange(ny): y = i * dy + ylo for j in xrange(nx): x = j * dx + xlo todo_queue.put((x, y, i, j)) todo_queue.join() while not results_queue.empty(): i, j, val = results_queue.get() result[i, j] = val results_queue.task_done() # shutdown the compute processes for individual in workers: individual.terminate() return result
class AlarmExecutor: def __init__(self): self.queue = JoinableQueue(10) self.running = False self.t = Thread(target=self._run, name="AlarmExecutor") def _run(self): while self.running: try: alarm = self.queue.get(block=True, timeout=1) alarm.execute() logging.debug("Alarm executed") self.queue.task_done() except queue.Empty: continue def start(self): logging.debug("Starting alarm executor") self.running = True self.t.start() def stop(self): if self.running: logging.debug("Stopping alarm executor") self.running = False self.t.join() else: msg = "Attempted to stop alarm executor when it is not running" logging.debug(msg)
def threaded_db_input(pipe: mp.JoinableQueue, len_seeds: int) -> NoReturn: """Runs DB operation in a separate process Args: :param pipe: connection with the parent. :param len_seeds: total number of seeds. Returns: Executes the queries from the queue. """ con, dbname = get_db_con(len_seeds) stmt = pipe.get(timeout=3600) pid = None while stmt is not None: try: pid.join() except Exception as e: if pid: print(e) # try: # con = con = lite.connect(dbname, timeout=3000, check_same_thread=False, isolation_level=None) # con.commit() pid = mp.Process(target=stmt[0], args=(con, ) + stmt[1]) pid.start() # except Exception as e: # print('Found exception in db input:') # print(e) # print('Arguments that caused exception: ') # print(stmt) # finally: pipe.task_done() stmt = pipe.get() print('DB thread exiting...') con.close()
def search8(q, path): jobs = Queue() result = JoinableQueue() NUMBER_OF_PROCESSES = cpu_count() job_count = 0 for f in os.scandir('data'): jobs.put(f.path) job_count = job_count + 1 [ Process(target=work, args=(i, q, jobs, result)).start() for i in range(NUMBER_OF_PROCESSES) ] matches = [] for t in range(job_count): r = result.get() result.task_done() if r: matches.append(r) matches.sort() for w in range(NUMBER_OF_PROCESSES): jobs.put(None) result.join() jobs.close() result.close() return matches
class Queue: def __init__(self): self._queue = JoinableQueue() def put(self, element): if self._queue is not None: self._queue.put(element) def get(self): if self._queue is not None: try: return self._queue.get() except: return None def join(self): if self._queue is not None: self._queue.join() def task_done(self): if self._queue is not None: self._queue.task_done() def unblock_gets(self): if self._queue is not None: self._queue.close() self._queue = JoinableQueue()
class Renderer: queue = None def __init__(self, nb_workers=2): self.queue = JoinableQueue() self.processes = [Process(target=self.upload) for i in range(nb_workers)] for p in self.processes: p.start() def render(self, item): self.queue.put(item) def upload(self): while True: item = self.queue.get() if item is None: break # process your item here self.queue.task_done() def terminate(self): """ wait until queue is empty and terminate processes """ self.queue.join() for p in self.processes: p.terminate()
def main(): from multiprocessing import JoinableQueue from genmod.vcf import vcf_header from genmod.utils import annotation_parser parser = argparse.ArgumentParser(description="Parse different kind of pedigree files.") parser.add_argument('variant_file', type=str, nargs=1 , help='A file with variant information.') parser.add_argument('annotation_file', type=str, nargs=1 , help='A file with feature annotations.') parser.add_argument('-phased', '--phased', action="store_true", help='If variant file is phased.') parser.add_argument('-v', '--verbose', action="store_true", help='Increase output verbosity.') args = parser.parse_args() infile = args.variant_file[0] if args.verbose: print('Parsing annotationfile...') start_time_annotation = datetime.now() my_anno_parser = annotation_parser.AnnotationParser(args.annotation_file[0], 'ref_gene') if args.verbose: print('annotation parsed. Time to parse annotation: %s\n' % str(datetime.now() - start_time_annotation)) my_head_parser = vcf_header.VCFParser(infile) my_head_parser.parse() print(my_head_parser.__dict__) variant_queue = JoinableQueue() start_time = datetime.now() my_parser = VariantFileParser(infile, variant_queue, my_head_parser, my_anno_parser, args) nr_of_batches = my_parser.parse() print(nr_of_batches) for i in range(nr_of_batches): variant_queue.get() variant_queue.task_done() variant_queue.join() print('Time to parse variants: %s ' % str(datetime.now()-start_time))
class QueueTask: def __init__(self): self.queue = JoinableQueue() self.event = Event() atexit.register(self.queue.join) process = Process(target=self.work) process.daemon = True process.start() def work(self): while True: func, args, wait_for = self.queue.get() for evt in wait_for: evt.wait() func(*args) self.event.set() self.queue.task_done() def enqueue(self, func, args=[], wait_for=[]): self.event.clear() self.queue.put((func, args, wait_for)) return self.event
def generator(test_q: JoinableQueue) -> Iterator[TestBatch]: test = test_q.get() while test: test_q.task_done() yield test test = test_q.get() test_q.task_done()
def InternalSet(Achild:Queue, Bchild:Queue, outqueue:Queue): """Take the output of two LeafSet's and take the union.""" logger = multiprocessing.log_to_stderr() logger.setLevel(logging.INFO) AminusB = set() BminusA = set() morestuff = True while morestuff: a = Achild.get() b = Bchild.get() logger.info("Internal:%s:%s" % (a, b)) if a in BminusA: BminusA.remove(a) elif a not in AminusB: AminusB.add(a) outqueue.put(a) if b in AminusB: AminusB.remove(b) elif b not in BminusA: BminusA.add(b) outqueue.put(b) Achild.task_done() Bchild.task_done() if (a == SIGOBJ) or (b == SIGOBJ): outqueue.put(SIGOBJ) morestuff = False logger.info("internal done")
class ScheduleContainer(object): def __init__(self): self.schedule_container = JoinableQueue(maxsize=0) self.scheduler = schedule self._run = True worker = Thread(target=self.work) worker.start() def append(self, request_form): self.schedule_container.put(request_form) @staticmethod def task(temp): def inner(): t.change_temp(temp) return inner def work(self): lock = Lock() while self._run: lock.acquire() if not self.schedule_container.empty(): schedule_obj = self.schedule_container.get() job = self.scheduler.every() job.start_day = str(schedule_obj.day) job.unit = 'weeks' job.at(str(schedule_obj.time)).do(self.task(schedule_obj.temp)) print('schedule made into job') schedule_obj.save() self.schedule_container.task_done() lock.release() schedule.run_pending() time.sleep(1)
class Requester(object): def __init__(self, num_workers=2): self.queue = JoinableQueue() self.processes = [ Process(target=self.request) for _ in range(num_workers) ] def add_url(self, url): self.queue.put(url) def request(self): url = self.queue.get() while url is not None: # TODO - actually send a request here self.queue.task_done() url = self.queue.get() def terminate(self): # send the terminate command for _ in self.processes: self.queue.put(None) # wait for processing to finish for p in self.processes: p.join()
def main(): jobs = JoinableQueue() result = JoinableQueue() numToProcess = -1 scores = pd.DataFrame(columns=['query','fmeasure','precision','recall', 'size','maxDistance','topHits',"contextSteps"]) print len(datasets) for key in datasets: jobs.put(key) processed_count = Counter() for i in xrange(NUMBER_OF_PROCESSES): p = Process(target=work, args=(i, jobs, result, processed_count)) p.daemon = True p.start() #work(1, jobs, result, processed_count) automated_annotations = {} distances = {} jobs.join() dataset_index = collections.defaultdict(set) annotated_datasets = set() while not result.empty(): dataset, classes = result.get() if len(classes) == 0: annotated_datasets.add(dataset) for c in classes.keys(): dataset_index[c].add(dataset) owl_class = Class(c, graph=graph) for parent in owl_class.parents: dataset_index[parent.identifier].add(dataset) result.task_done() print '\n' for query, c in queries.items(): manual = ground_truth[query] automated = dataset_index[c] hits = manual & automated misses = manual - automated precision = np.nan if len(automated) == 0 else float(len(hits)) / len(automated) recall = np.nan if len(manual) == 0 else float(len(hits)) / len(manual) if precision != 0 or recall != 0: fmeasure = 0 if np.isnan(precision) or np.isnan(recall) else 2 * (precision * recall) / (precision + recall) else: fmeasure = 0 scores = scores.append(dict(query=query, size=len(manual), precision=precision, recall=recall, fmeasure=fmeasure,topHits=topHits, maxDistance=maxDistance, contextSteps = context_steps), ignore_index=True) print "Hits for", query, c print '\n'.join(sorted(hits)) print scores print "Annotated", len(annotated_datasets), "datasets."
def Cvpointgray(imgs: multiprocessing.JoinableQueue, conn, l: multiprocessing.Lock): while True: try: img, d = imgs.get() except Empty: break x = [] mean = np.mean(img) for j in range(img.shape[0]): x0 = 0 y0 = 0 ym = [] for i in range(1, img.shape[1]): ym.append(img[j, i]) x0 = x0 + int(img[j, i]) ** 4 y0 = y0 + int(img[j, i]) ** 4 * i if x0 == 0 or np.mean(ym) < mean: y = 0 else: y = y0 / x0 y = round(y) x.append(y) print(d) l.acquire() conn.send([d, x]) l.release() imgs.task_done()
class Thread_Pool_Manager(object): def __init__(self, thread_num=cpu_count()): self.thread_num = thread_num print(thread_num) self.work_queue = JoinableQueue() self.work_num = Semaphore(0) self.mutex = Lock() def start_threads(self): for i in range(self.thread_num): thread = Process(target=self.do_job) thread.daemon = True # set thread as daemon thread.start() def do_job(self): global Numbers while True: # print(1) self.work_num.acquire() with self.mutex: print(1, self.work_queue.qsize()) thread_job = self.work_queue.get() print(0, self.work_queue.qsize()) thread_job.do_job(self.work_queue, self.work_num) print(self.work_queue.qsize()) self.work_queue.task_done() def join(self): self.work_queue.join() def add_job(self, job): self.work_queue.put(job) self.work_num.release()
class StoreWorker(Process): def __init__(self, cache=10): super(StoreWorker, self).__init__() self.store_q = JoinableQueue() self.cache = cache def run(self): while True: template = self.store_q.get() if template is None: print('Store received None') self.store_q.task_done() break # Set up the environment before storing the objects. self.res = self._handle(template) ''' if template.objects: # Call to the functions in this class func = getattr(self, template.func, None) if func: result = func(template.objects, **template.kws) if not result: print('Failed with template', template.name) print(template) ''' self.store_q.task_done() print('stopping store') def create(self, objects, *args, **kwargs): ''' Writes a list of objects to the database specified in the template. This wraps around functions of the database wrappers. ''' raise NotImplementedError def read(self, *args, **kwargs): ''' Read an entry from the database. ''' raise NotImplementedError def update(self, *args, **kwargs): ''' Performs an update to the database based on the key specified. If no key is specified, the url of object is used. By default creates an object in the database, if none exists. ''' raise NotImplementedError def delete(self, *args, **kwargs): ''' Deletes an entry from the database. Not implemented yet in any of the DatabaseAdapters. ''' raise NotImplementedError
def queueManager(numProc, myList, function, *args): '''queueManager(numProc, myList, function, *args): generic function used to start worker processes via the multiprocessing Queue object numProc - number of processors to use myList - a list of objects to be iterated over function - target function *args - additional arguments to pass to function Return - an unordered list of the results from myList ''' qIn = Queue() qOut = JoinableQueue() if args: arguments = (qIn, qOut,) + args else: arguments = (qIn, qOut,) results = [] # reduce processer count if proc count > files i = 0 for l in myList: qIn.put((i,l)) i += 1 for _ in range(numProc): p = Process(target = function, args = arguments).start() sys.stdout.write("Progress: {:>3}%".format(0) ) curProgress = 0 lastProgress = 0 while qOut.qsize() < len(myList): #sys.stdout.write("\b\b\b\b{:>3}%".format(int(ceil(100*qOut.qsize()/len(myList))))) curProgress = int(ceil(100*qOut.qsize()/len(myList))) if curProgress - lastProgress > 10: lastProgress += 10 sys.stdout.write("\nProgress: {:>3}%".format(lastProgress)) sys.stdout.flush() sys.stdout.write("\nProgress: {:>3}%".format(100)) #sys.stdout.write("\b\b\b\b{:>3}%".format(100)) sys.stdout.write("\n") for _ in range(len(myList)): # indicate done results processing results.append(qOut.get()) qOut.task_done() #tell child processes to stop for _ in range(numProc): qIn.put('STOP') orderedRes = [None]*len(results) for i, res in results: orderedRes[i] = res qOut.join() qIn.close() qOut.close() return orderedRes
def worker_func(in_queue: JoinableQueue, out_queue: Queue, hr_t, tr_h): while True: dat = in_queue.get() if dat is None: in_queue.task_done() continue testing_data, head_pred, tail_pred = dat out_queue.put(test_evaluation(testing_data, head_pred, tail_pred, hr_t, tr_h)) in_queue.task_done()
def process_pcap(pcap, base): """ Process a pcap file :param pcap: pcap file as read by pyshark module :param base: base name of the file being processed :return: None """ # Establish communication queues tasks = {} packets_to_process = {} results = JoinableQueue() for pkt in pcap: try: # Obtain VLAN id vlan_id = pkt.layers[1].id # Create queues for the corresponding VLAN if they do not exist if vlan_id not in packets_to_process.keys(): packets_to_process[vlan_id] = JoinableQueue() # Create and start tasks for the corresponding VLAN if they do not exist if vlan_id not in tasks.keys(): print 'Creating Analyzer for VLAN id {0}'.format(vlan_id) # results[vlan_id] = m.dict() tasks[vlan_id] = Analyzer(vlan_id, packets_to_process[vlan_id], results) tasks[vlan_id].start() # # Add package to queue for the corresponding VLAN # if gtp.version == '1': # packets_to_process[vlan_id].put(Packet(int(gtp.version), int(gtp.message))) # elif gtp.version == '2': # packets_to_process[vlan_id].put(Packet(int(gtp.version), int(gtp.message_type))) packets_to_process[vlan_id].put(Packet(pkt)) except AttributeError as e: print 'AttributeError: {0}'.format(e) except KeyError as e: print 'KeyError: {0}'.format(e) # Add a poison pill for each consumer for i in tasks.keys(): packets_to_process[i].put(None) # Wait for all of the tasks to finish for i in tasks.keys(): packets_to_process[i].join() results.task_done() # Print results print_results(results, base)
def main(model_to_load): pass df = pd.read_csv(f_path + 'test.csv') # shuffle to get faster processing time df = df.sample(frac=1).reset_index(drop=True) img_name_array, image_category = get_img_and_landmark(df) data_len = image_category.shape[0] batch_size_o = 20 image_queue = JoinableQueue(3) predict_queue = JoinableQueue(16) landmark_queue = JoinableQueue(8) model = load_model(model_to_load, {'custom_loss': custom_loss}) print("model read completed") p_i = Process(target=img_reader, args=(image_queue, img_name_array, data_len, batch_size_o)) p_i.daemon = True p_i.start() c_pool = [] for _i in range(8): c = Process(target=get_landmarker, args=(predict_queue, landmark_queue)) c.daemon = True c.start() c_pool.append(c) p1 = Process(target=predictor, args=(landmark_queue, t0, image_category, img_name_array)) p1.daemon = True p1.start() for i in range(math.ceil(data_len / batch_size_o)): img_array = image_queue.get(timeout=1800) batch_size = img_array["img_array"].shape[0] tmp_out_1 = model.predict(img_array["img_array"], batch_size=batch_size) tmp_out_flip = model.predict(img_array["img_flip_array"], batch_size=batch_size) image_queue.task_done() predict_queue.put( { 'id': [ i for i in range(i * batch_size_o, i * batch_size_o + batch_size) ], 'heatmap': tmp_out_1[0], 'heatmap_flip': tmp_out_flip[0], 'image_category': image_category[i * batch_size_o:i * batch_size_o + batch_size] }, timeout=1800) p1.join()
class TaskManager: # noinspection PyPep8Naming def __init__(self, jobs_queue_capacity: int, workers_num: int, WorkerClass: Worker.__class__ = Worker): # empty job queue self._queue = JoinableQueue(maxsize=jobs_queue_capacity) logger.info( f'Queue size set to accept at most {jobs_queue_capacity} before pausing job assignment.' ) self.WorkerClass = WorkerClass self.workers_num = max_number_of_workers(workers_num) _workers = [] def wake_up_workers(self): self._workers: List[Worker] = [ self.WorkerClass(self._queue) for _ in range(self.workers_num) ] for worker in self._workers: worker.start() def assign_task(self, job: Task): self._queue.put(job) def stop_workers(self): logger.info('waiting all workers to finish') # usual termination condition is to put None on the queue. Queues are FIFO but from Python 3.8 docs: # https://docs.python.org/3.8/library/multiprocessing.html#pipes-and-queues # "If multiple processes are enqueuing objects, it is possible for the objects to be received at the other # end out-of-order. However, objects enqueued by the same process will always be in the expected order # with respect to each other.". So, when there's a single producer, that's not an issue; when there are many # producers it may happen that even if Nones are enqueued at the end of the queue, consumers pick 'em # before other items in the queue (breaking the FIFO assumption). In this case the workers would leave # before the queue is empty. To avid this, before sending Nones, it's better to wait for the queue to be # consumed. while not self._queue.empty( ): # not bullet-proof as empty() and qsize() return approx. values, but it helps print(f"jobs waiting to be assigned: {self._queue.qsize()}") sleep(1) for _ in self._workers: self._queue.put(None, block=True, timeout=None) self._queue.join() logger.info('all processes_finished') def discard_waiting_tasks(self): while not self._queue.empty(): try: self._queue.get(False) except Empty: continue self._queue.task_done() def number_of_waiting_tasks(self): return self._queue.qsize()
def worker_func(in_queue: JoinableQueue, out_queue: Queue,tr_h,hr_t,ht_r): while True: dat=in_queue.get() if dat is None: in_queue.task_done() continue testing_data,candidates,candidate_tail_pred = dat out_queue.put(candidate_evaluation(testing_data,candidates,candidate_tail_pred,tr_h,hr_t,ht_r)) in_queue.task_done()
class WorkerQueue(object): def __init__(self, num_workers = 20): self.queue = Queue() self.pool = [] self._setup_workers(num_workers) def _setup_workers(self, num_workers): """ Sets up the worker threads NOTE: undefined behaviour if you call this again. """ self.pool = [] for _ in range(num_workers): self.pool.append(Thread(target=self.threadloop)) for a_thread in self.pool: a_thread.setDaemon(True) a_thread.start() def do(self, f, *args, **kwArgs): """ puts a function on a queue for running later. """ self.queue.put((f, args, kwArgs)) def stop(self): """ Stops the WorkerQueue, waits for all of the threads to finish up. """ self.queue.put(STOP) for thread in self.pool: thread.join() def threadloop(self): #, finish = False): """ Loops until all of the tasks are finished. """ while True: args = self.queue.get() if args is STOP: self.queue.put(STOP) self.queue.task_done() break else: try: args[0](*args[1], **args[2]) finally: # clean up the queue, raise the exception. self.queue.task_done() #raise def wait(self): """ waits until all tasks are complete. """ self.queue.join()
def apply_mt(self, xs, parallelism, **kwargs): """Run the UDF multi-threaded using python multiprocessing""" if snorkel_conn_string.startswith('sqlite'): raise ValueError('Multiprocessing with SQLite is not supported. Please use a different database backend,' ' such as PostgreSQL.') # Fill a JoinableQueue with input objects in_queue = JoinableQueue() for x in xs: in_queue.put(x) # If the UDF has a reduce step, we collect the output of apply in a # Queue. This is also used to track progress via the the UDF sentinel out_queue = JoinableQueue() # Keep track of progress counts total_count = in_queue.qsize() count = 0 # Start UDF Processes for i in range(parallelism): udf = self.udf_class(in_queue=in_queue, out_queue=out_queue, add_to_session=(self.reducer is None), **self.udf_init_kwargs) udf.apply_kwargs = kwargs self.udfs.append(udf) # Start the UDF processes, and then join on their completion for udf in self.udfs: udf.start() while any([udf.is_alive() for udf in self.udfs]) and count < total_count: y = out_queue.get() # Update progress whenever an item was processed if y == UDF.TASK_DONE_SENTINEL: count += 1 if self.pb is not None: self.pb.update(1) # If there is a reduce step, do now on this thread elif self.reducer is not None: self.reducer.reduce(y, **kwargs) out_queue.task_done() else: raise ValueError("Got non-sentinel output without reducer.") if self.reducer is None: for udf in self.udfs: udf.join() else: self.reducer.session.commit() self.reducer.session.close() # Flush the processes self.udfs = []
def apply_mt(self, xs, parallelism, **kwargs): """Run the UDF multi-threaded using python multiprocessing""" if Asterisk_conn_string.startswith('sqlite'): raise ValueError('Multiprocessing with SQLite is not supported. Please use a different database backend,' ' such as PostgreSQL.') # Fill a JoinableQueue with input objects in_queue = JoinableQueue() for x in xs: in_queue.put(x) # If the UDF has a reduce step, we collect the output of apply in a # Queue. This is also used to track progress via the the UDF sentinel out_queue = JoinableQueue() # Keep track of progress counts total_count = in_queue.qsize() count = 0 # Start UDF Processes for i in range(parallelism): udf = self.udf_class(in_queue=in_queue, out_queue=out_queue, add_to_session=(self.reducer is None), **self.udf_init_kwargs) udf.apply_kwargs = kwargs self.udfs.append(udf) # Start the UDF processes, and then join on their completion for udf in self.udfs: udf.start() while any([udf.is_alive() for udf in self.udfs]) and count < total_count: y = out_queue.get() # Update progress whenever an item was processed if y == UDF.TASK_DONE_SENTINEL: count += 1 if self.pb is not None: self.pb.update(1) # If there is a reduce step, do now on this thread elif self.reducer is not None: self.reducer.reduce(y, **kwargs) out_queue.task_done() else: raise ValueError("Got non-sentinel output without reducer.") if self.reducer is None: for udf in self.udfs: udf.join() else: self.reducer.session.commit() self.reducer.session.close() # Flush the processes self.udfs = []
def assemble(inp: mp.JoinableQueue, outp: mp.JoinableQueue): conn = make_connection() print('assembler started, looping') journals = dict( conn.execute('select JournalId, DisplayName from Journals')) conference_series = dict( conn.execute( 'select ConferenceSeriesId, DisplayName from ConferenceSeries')) conference_instances = dict( conn.execute( 'select ConferenceInstanceId, DisplayName from ConferenceInstances' )) cited_by_gen = generate_citation_from_gzip() paper_citation_id, paper_citations = next(cited_by_gen) for paper in iter(inp.get, 'STOP'): paperid = paper['id'] # print(paperid, paper_citation_id) paper['author'], paper['affiliations'] = generate_author_affiliations( conn, paperid) paper['urls'] = list(generate_urls(conn, paperid)) paper['references'] = list(generate_references(conn, paperid)) paper['references_count'] = len(paper['references']) # paper['resources'] = list(generate_resources(paperid)) cii = paper.pop('ConferenceInstanceId') if type(cii) is int: paper['conferenceinstance'] = conference_instances[cii] csi = paper.pop('ConferenceSeriesId') if type(csi) is int: paper['conferenceseries'] = conference_series[csi] ji = paper.pop('JournalId') if type(ji) is int: paper['journal'] = journals[ji] if paper_citation_id == paperid: print('found citation information') paper['cited_by'] = list(paper_citations) paper['cited_by_count'] = len(paper_citations) try: paper_citation_id, paper_citations = next(cited_by_gen) except StopIteration: print('citations were finished') paper_citation_id = 0 paper_citations = [] # elif paper_citation_id < paperid: # print(f'onoz, we skipped something? pid {paperid} > {paper_citation_id}') # break # else: # print(f'seems to be in order: cited_id {paper_citation_id}, paper_id {paperid}') strip_empty_fields(paper) jsonl = json.dumps(paper, ensure_ascii=False) + '\n' outp.put(jsonl) inp.task_done() print('DONE, joining inp') inp.task_done() inp.join() outp.put('STOP') print('REALLY DONE')
def longest(queue: multiprocessing.JoinableQueue, nextqueue: multiprocessing.JoinableQueue): longest_word = ("", 0, False) while True: word_count = queue.get() if word_count[1] >= longest_word[1]: longest_word = (word_count[0], word_count[1], True) nextqueue.put(longest_word) else: nextqueue.put((word_count[0], word_count[1], False)) queue.task_done()
class ImageCrawler: NUM_PER_FETCH = 100 NUM_PROCESSES = 10 def __init__(self, database_config_path): self.queue = JoinableQueue() self.logger = Logger("image_crawler") self.adapter = ImageStoreAdapter(database_config_path, self.logger) def produce(self): while True: if self.queue.empty(): for image_id, link in self.adapter.load_undownloaded_images( self.NUM_PER_FETCH): self.logger.log("Producer: add new image to crawl:" + image_id + " " + link) self.queue.put((image_id, link)) time.sleep(10) def consume(self, process_id): while True: self.logger.log("Consumer process:" + str(process_id) + " fetch new image from queue") if not self.queue.empty(): image_id, link = self.queue.get() self.logger.log("Consumer process:" + str(process_id) + " start crawling " + str(link)) image = common_utils.page_crawl(link) if image != None: self.logger.log(link + "crawled successfully") self.adapter.store_image(image_id, image) else: self.logger.log(link + " failed at crawling") self.adapter.update_image_status( image_id, ImageIndexStatus.DOWNLOAD_FAILED) self.queue.task_done() time.sleep(1) else: self.logger.log("Queue empty") time.sleep(10) def run(self): producer = Process(target=self.produce) producer.start() consumers = [] for i in range(self.NUM_PROCESSES): consumer = Process(target=self.consume, args=(i, )) consumers.append(consumer) consumer.start() for consumer in consumers: consumer.join() producer.join() self.queue.join()
def get_data_for_df_test( start_date, end_date, period, test, ): """ """ process_id = 0 process_cnt = 0 velo_instances = [] velo_instances_ret = [] queue = JoinableQueue() date_format = "%m/%d/%Y" for i in range(3): end_date_o = datetime.strptime( end_date[i], date_format, ).date() start_date_o = datetime.strptime( start_date[i], date_format, ).date() process_name = "process_{:03d}".format(process_id) velo_inst = Velo( process_id=process_id, process_name=process_name, queue=queue, date_id=date, ) process = Process(target=velo_inst.run) process_id += 1 process_cnt += 1 velo_instances.append(velo_inst) Multiprocess.processes.append(process) for i in range(process_cnt): Multiprocess.processes[i].start() for i in range(process_cnt): msg_process_id = None msg_from_queue = "" while True: msg_from_queue = queue.get() msg_process_id = msg_from_queue[0] velo_instances_ret.append(msg_from_queue[1]) queue.task_done() break Multiprocess.processes[msg_process_id].join() return velo_instances_ret
def main(): jobs = JoinableQueue() result = JoinableQueue() print len(datasets) numToProcess = 10 scores = pd.DataFrame(columns=[ 'precision', 'recall', 'fmeasure', 'numResult', 'minScore', 'topHits', 'contentWeight', 'relationWeight' ]) manual_annotations = get_manual_annotations(numToProcess) manual_tuples = get_ir_tuples(manual_annotations) for key in manual_annotations.keys(): jobs.put(key) processed_count = Counter() for i in xrange(NUMBER_OF_PROCESSES): p = Process(target=work, args=(i, jobs, result, processed_count)) p.daemon = True p.start() #work(1, jobs, result, processed_count) automated_annotations = {} jobs.join() while not result.empty(): dataset, classes = result.get() automated_annotations[dataset] = classes result.task_done() automated_tuples = get_ir_tuples(automated_annotations) hits = manual_tuples & automated_tuples misses = manual_tuples - automated_tuples precision = float(len(hits)) / len(automated_tuples) recall = float(len(hits)) / len(manual_tuples) fmeasure = 2 * (precision * recall) / (precision + recall) # print '\t'.join([str(x) for x in [precision, recall, fmeasure, # numResult, minScore, topHits]]) scores = scores.append(dict(precision=precision, recall=recall, fmeasure=fmeasure, numResult=numResult, minScore=minScore, topHits=topHits, contentWeight=contentWeight, relationWeight=relationWeight), ignore_index=True) print scores
class Analyzer(object): def __init__(self, data_root, working_dir, tpr, index=True, index_output='index.h5'): # list of analysis objects self.__analyses = [] self.__working_dir = working_dir self.__fs = file_system.SH3FileSystem(data_root, index=True, index_output=index_output) self.__loader = loader.Loader(working_dir) self.__task_queue = JoinableQueue(8) self.__tpr = tpr def run(self): # start a queue of size max 8, block if no empty slots # populate the task queue with (analysis, xtc) items for i in range(0, 8): p = Process(target=self.__worker) p.start() for batch in self.__fs.xtc_files(): print "batch", batch for xtc in batch: for analysis in self.__analyses: print "queuing", analysis.name(), "and", xtc.name() self.__task_queue.put([analysis, xtc], True, None) print "waiting for these tasks to finish" self.__task_queue.join() print "tasks have finished" print "PID", os.getpid(), "loading analysis" for xtc in batch: for a in self.__analyses: self.__loader.load(a, xtc) def add(self, analysis): self.__analyses.append(analysis) def remove(self, analysis): self.__analyses.append(analysis) def __worker(self): # TODO: use pool because it looks like the processes sometimes don't die if it fails # get one item from queue # block if queue is empty while True: try: # timeout after 30 seconds analysis,xtc = self.__task_queue.get(True, 30) except Empty: break else: analysis.run(xtc, self.__tpr) self.__task_queue.task_done()
def parallel(self): from multiprocessing import Process, Queue, JoinableQueue self.ntrajs = [] for i in range(self.cpus): self.ntrajs.append(min(int(floor(float(self.ntraj) /self.cpus)), self.ntraj-sum(self.ntrajs))) cnt = sum(self.ntrajs) while cnt<self.ntraj: for i in range(self.cpus): self.ntrajs[i] += 1 cnt+=1 if (cnt>=self.ntraj): break self.ntrajs = np.array(self.ntrajs) self.ntrajs = self.ntrajs[np.where(self.ntrajs>0)] self.nprocs = len(self.ntrajs) sols = [] processes = [] resq = JoinableQueue() print "Number of cpus:", self.cpus print "Trying to start", self.nprocs, "process(es)." print "Number of trajectories for each process:" print self.ntrajs for i in range(self.nprocs): p = Process(target=self.evolve_serial, args=((resq,self.ntrajs[i],i,self.seed*(i+1)),)) p.start() processes.append(p) resq.join() cnt = 0 while True: try: sols.append(resq.get()) resq.task_done() cnt += 1 if (cnt >= self.nprocs): break except KeyboardInterrupt: break except: pass resq.join() for proc in processes: try: proc.join() except KeyboardInterrupt: print("Cancel thread on keyboard interrupt") proc.terminate() proc.join() resq.close() return sols
def __iter__(self): queue = JoinableQueue(maxsize=self.max_queue_size) n_batches, job_queue = self._start_producers(queue) # Run as consumer (read items from queue, in current thread) for x in xrange(n_batches): item = queue.get() #print queue.qsize(), "GET" yield item # Yield the item to the consumer (user) queue.task_done() queue.close() job_queue.close()
def worker(input_q: JoinableQueue, output: Queue): from django import db db.connection.close() while True: task = input_q.get() if task == "end": break html = urlOpen.get_html(task.url) if html: text = textParser.get_text_from_html(html) input_q.task_done() # info() output.put(task.url) print("exit")
def main(): jobs = JoinableQueue() result = JoinableQueue() print len(datasets) numToProcess = 10 scores = pd.DataFrame(columns=['precision','recall','fmeasure', 'numResult','minScore','topHits', 'contentWeight','relationWeight']) manual_annotations = get_manual_annotations(numToProcess) manual_tuples = get_ir_tuples(manual_annotations) for key in manual_annotations.keys(): jobs.put(key) processed_count = Counter() for i in xrange(NUMBER_OF_PROCESSES): p = Process(target=work, args=(i, jobs, result, processed_count)) p.daemon = True p.start() #work(1, jobs, result, processed_count) automated_annotations = {} jobs.join() while not result.empty(): dataset, classes = result.get() automated_annotations[dataset] = classes result.task_done() automated_tuples = get_ir_tuples(automated_annotations) hits = manual_tuples & automated_tuples misses = manual_tuples - automated_tuples precision = float(len(hits)) / len(automated_tuples) recall = float(len(hits)) / len(manual_tuples) fmeasure = 2 * (precision * recall) / (precision + recall) # print '\t'.join([str(x) for x in [precision, recall, fmeasure, # numResult, minScore, topHits]]) scores = scores.append(dict(precision=precision, recall=recall, fmeasure=fmeasure, numResult=numResult, minScore=minScore, topHits=topHits, contentWeight=contentWeight, relationWeight=relationWeight), ignore_index=True) print scores
class ImageCrawler: NUM_PER_FETCH = 100 NUM_PROCESSES = 10 def __init__(self, database_config_path): self.queue = JoinableQueue() self.logger = Logger("image_crawler") self.adapter = ImageStoreAdapter(database_config_path, self.logger) def produce(self): while True: if self.queue.empty(): for image_id, link in self.adapter.load_undownloaded_images(self.NUM_PER_FETCH): self.logger.log("Producer: add new image to crawl:" + image_id + " " + link) self.queue.put((image_id, link)) time.sleep(10) def consume(self, process_id): while True: self.logger.log("Consumer process:" + str(process_id) + " fetch new image from queue") if not self.queue.empty(): image_id, link = self.queue.get() self.logger.log("Consumer process:"+ str(process_id) + " start crawling " + str(link)) image = common_utils.page_crawl(link) if image != None: self.logger.log(link + "crawled successfully") self.adapter.store_image(image_id, image) else: self.logger.log(link + " failed at crawling") self.adapter.update_image_status(image_id, ImageIndexStatus.DOWNLOAD_FAILED) self.queue.task_done() time.sleep(1) else: self.logger.log("Queue empty") time.sleep(10) def run(self): producer = Process(target=self.produce) producer.start() consumers = [] for i in range(self.NUM_PROCESSES): consumer = Process(target=self.consume, args=(i,)) consumers.append(consumer) consumer.start() for consumer in consumers: consumer.join() producer.join() self.queue.join()
def __iter__(self): queue = JoinableQueue(maxsize=params.N_PRODUCERS*2) n_batches, job_queue = self.start_producers(queue) # Run as consumer (read items from queue, in current thread) for x in xrange(n_batches): item = queue.get() #print len(item[0]), queue.qsize(), "GET" yield item queue.task_done() #queue.join() #Lock until queue is fully done queue.close() job_queue.close()
def apply_mt(self, xs, parallelism, **kwargs): """Run the UDF multi-threaded using python multiprocessing""" if snorkel_conn_string.startswith('sqlite'): raise ValueError('Multiprocessing with SQLite is not supported. Please use a different database backend,' ' such as PostgreSQL.') # Fill a JoinableQueue with input objects in_queue = JoinableQueue() for x in xs: in_queue.put(x) # If the UDF has a reduce step, we collect the output of apply in a Queue out_queue = None if hasattr(self.udf_class, 'reduce'): out_queue = JoinableQueue() # Start UDF Processes for i in range(parallelism): udf = self.udf_class(in_queue=in_queue, out_queue=out_queue, **self.udf_init_kwargs) udf.apply_kwargs = kwargs self.udfs.append(udf) # Start the UDF processes, and then join on their completion for udf in self.udfs: udf.start() # If there is a reduce step, do now on this thread if hasattr(self.udf_class, 'reduce'): while any([udf.is_alive() for udf in self.udfs]): while True: try: y = out_queue.get(True, QUEUE_TIMEOUT) self.reducer.reduce(y, **kwargs) out_queue.task_done() except Empty: break self.reducer.session.commit() self.reducer.session.close() # Otherwise just join on the UDF.apply actions else: for i, udf in enumerate(self.udfs): udf.join() # Terminate and flush the processes for udf in self.udfs: udf.terminate() self.udfs = []
class Multiplexer(object): def __init__(self, worker, writer, threads=4): self.worker=worker self.writer=writer self.q=JoinableQueue() self.done = Value(c_bool,False) self.consumer=Process(target=self.consume) self.pool = Pool(threads, init_opener) def start(self): self.done.value=False self.consumer.start() def addjob(self, url, data=None): params=[url] if data: params.append(data) try: return self.pool.apply_async(self.worker,params,callback=self.q.put) except: logger.error('[!] failed to scrape '+ url) logger.error(traceback.format_exc()) raise def finish(self): self.pool.close() logger.info('closed pool') self.pool.join() logger.info('joined pool') self.done.value=True self.q.close() logger.info('closed q') self.consumer.join() logger.info('joined consumer') #self.q.join() #logger.info('joined q') def consume(self): param=[0,0] while True: job=None try: job=self.q.get(True, timeout=1) except Empty: if self.done.value==True: break if job: param = self.writer(job, param) self.q.task_done() logger.info('added/updated: %s' % param)
def LeafSet(inchan:Queue, outchan:Queue): """Report the distinct elements of inchan on outchan.""" sf = set() logger = multiprocessing.log_to_stderr() logger.setLevel(logging.INFO) morestuff = True while morestuff: x = inchan.get() logger.info("Leaf:%s" % x) if x not in sf: sf.add(x) outchan.put(x) inchan.task_done() if x == SIGOBJ: morestuff = False logger.info("leafdone")
def printer(chan:Queue, fmt:str, destination:file): """A data sink that prints the contents of chan as strings to destination using a format string""" logger = multiprocessing.log_to_stderr() logger.setLevel(logging.INFO) logger.info("sink started") i = 0 morestuff = True while morestuff: elt = chan.get() logger.info(fmt % (i, elt)) i += 1 chan.task_done() if elt == SIGOBJ: morestuff = False logger.info('printer done')
def clear_area_around_eye(size = 256, image_dir = 'I:/AI_for_an_eyes/test/test/', target_dir = 'I:/AI_for_an_eyes/test/test_zonder_meuk_256/'): if not os.path.exists(target_dir): os.makedirs(target_dir) util.update_progress(0) tasks = glob.glob(image_dir+'*.jpeg') job_total = len(tasks) print 'Processing images matching ' + image_dir+ '*.jpeg' jobs = Queue() result = JoinableQueue() NUMBER_OF_PROCESSES = cpu_count()*2 for im_name in tasks: jobs.put(im_name) for i in xrange(NUMBER_OF_PROCESSES): p = Thread(target=worker, args=(i, jobs, result, target_dir, size)) p.daemon = True p.start() print 'Starting workers (', NUMBER_OF_PROCESSES, ')!' n_complete = 0 for t in xrange(len(tasks)): r = result.get() n_complete += 1 util.update_progress(n_complete/job_total) result.task_done() #print t, 'done' for w in xrange(NUMBER_OF_PROCESSES): jobs.put(None) util.update_progress(1) print 'Done!' time.sleep(1) result.join() jobs.close() result.close()
def hist_eq(image_dir = 'test_hist/', target_dir = 'test_result_hist/', method = 'CLAHE'): if not os.path.exists(target_dir): os.makedirs(target_dir) tasks = glob.glob(image_dir+'*.jpeg') job_total = len(tasks) print 'Processing images matching ' + image_dir+ '*.jpeg' jobs = Queue() result = JoinableQueue() NUMBER_OF_PROCESSES = cpu_count()*2 for im_name in tasks: jobs.put(im_name) for i in xrange(NUMBER_OF_PROCESSES): p = Thread(target=worker, args=(i, jobs, result, target_dir, method)) p.daemon = True p.start() print 'Starting workers (', NUMBER_OF_PROCESSES, ')!' n_complete = 0 for t in xrange(len(tasks)): r = result.get() n_complete += 1 util.update_progress(n_complete/job_total) result.task_done() #print t, 'done' for w in xrange(NUMBER_OF_PROCESSES): jobs.put(None) print 'Done!' result.join() jobs.close() result.close()
class MMapPool(object): def __init__(self, n, mmap_size): self.n = n self.mmap_size = mmap_size self.pool = [mmap.mmap(-1, mmap_size) for _ in range(n)] self.free_mmaps = set(range(n)) self.free_queue = JoinableQueue() def new(self): if not self.free_mmaps: self.free_mmaps.add(self.free_queue.get()) self.free_queue.task_done() while True: try: self.free_mmaps.add(self.free_queue.get_nowait()) self.free_queue.task_done() except Empty: break mmap_idx = self.free_mmaps.pop() return mmap_idx, self.pool[mmap_idx] def join(self): while len(self.free_mmaps) < self.n: self.free_mmaps.add(self.free_queue.get()) self.free_queue.task_done() def get(self, idx): return self.pool[idx] def free(self, idx): self.free_queue.put(idx)
class FileReader(Process): def __init__(self, filename, buffer_size=1000): super(FileReader, self).__init__() self.filename = filename self.que = JoinableQueue(buffer_size) self.event = Event() self.event.set() self.started = Event() self.started.clear() # It's crucial to call task_done on the queue after the item was processed def get_queue(self): return self.que def get_event(self): return self.event def is_done(self): return not self.event.is_set() and self.que.empty() def run(self): self.started.set() self.proc() self.event.clear() def proc(self): with open_gz(self.filename, encoding='utf-8') as file: for line in file: self.que.put(line) def __iter__(self): self.start() self.started.wait() while not self.is_done(): try: text = self.que.get(timeout=0.1) yield text self.que.task_done() except Empty: pass
def __iter__(self): queue = JoinableQueue(maxsize=params.N_PRODUCERS * 8) n_batches, job_queue = self.start_producers(queue) # Run as consumer (read items from queue, in current thread) for x in xrange(n_batches): item = queue.get() yield item queue.task_done() queue.close() job_queue.close() if self.shuffle: shuffled_idx = np.random.permutation(len(self.X)) X_new = [] y_new = [] for i in range(len(self.X)): X_new += [self.X[shuffled_idx[i]]] y_new += [self.y[shuffled_idx[i]]] self.X = X_new self.y = y_new
def main(file_list, outputFile): jobs = JoinableQueue(10000) result = JoinableQueue() loader_processes = 3 NUMBER_OF_PROCESSES = cpu_count() - loader_processes files = [ [] for x in range(loader_processes)] for i, f in enumerate(open(file_list)): files[i%loader_processes].append(f) for i in range(loader_processes): Process(target=read, args=(jobs, files[i])).start() for i in xrange(NUMBER_OF_PROCESSES): p = Process(target=work, args=(i, jobs, result)) p.daemon = True p.start() o = csv.writer(open(outputFile, 'w'), delimiter=',') o.writerow(stat_cols) while not submitted.is_set() and not processed.is_set(): row = result.get() o.writerow(row) result.task_done()
class Processor: queue = None def __init__(self, shared_array, data_shape, number_of_workers=1): # create a joinable queue self.queue = JoinableQueue() # keep reference to shared memory array self.shared_array = shared_array self.data_shape = data_shape # create the processes self.processes = [Process(target=self.compute_indicator) for _ in range(number_of_workers)] for p in self.processes: p.start() def add_work_item(self, item): # add the parameters list to the parameters queue self.queue.put(item) def compute_indicator(self): while True: # get a list of arguments from the queue arguments = self.queue.get() # if we didn't get one we keep looping if arguments is None: break # process the arguments here index = arguments[0] month_scale = arguments[1] valid_min = arguments[2] valid_max = arguments[3] # turn the shared array into a numpy array data = np.ctypeslib.as_array(self.shared_array) data = data.reshape(self.data_shape) # only process non-empty grid cells, i.e. data array contains at least some non-NaN values if (isinstance(data[:, index], np.ma.MaskedArray) and data[:, index].mask.all()) \ or np.isnan(data[:, index]).all() or (data[:, index] < 0).all(): pass else: # we have some valid values to work with logger.info('Processing latitude: {}'.format(index)) # perform a fitting to gamma fitted_values = indices.spi_gamma(data[:, index], month_scale, valid_min, valid_max) # update the shared array data[:, index] = fitted_values # indicate that the task has completed self.queue.task_done() def terminate(self): # terminate all processes for p in self.processes: p.terminate() def wait_on_all(self): #wait until queue is empty self.queue.join()
def main(): #--PLUGINS INITIALIZATION-- start_time = time() print '\n\n\n' + _format_title('Registering available plugins') sslyze_plugins = PluginsFinder() available_plugins = sslyze_plugins.get_plugins() available_commands = sslyze_plugins.get_commands() print '' for plugin in available_plugins: print ' ' + plugin.__name__ print '\n\n' # Create the command line parser and the list of available options sslyze_parser = CommandLineParser(available_plugins, PROJECT_VERSION) try: # Parse the command line (command_list, target_list, shared_settings) = sslyze_parser.parse_command_line() except CommandLineParsingError as e: print e.get_error_msg() return print command_list #--PROCESSES INITIALIZATION-- nb_processes = command_list.nb_processes if command_list.https_tunnel: nb_processes = 1 # Let's not kill the proxy task_queue = JoinableQueue() # Processes get tasks from task_queue and result_queue = JoinableQueue() # put the result of each task in result_queue # Spawn a pool of processes, and pass them the queues process_list = [] for _ in xrange(nb_processes): p = WorkerProcess(task_queue, result_queue, available_commands, \ shared_settings) p.start() process_list.append(p) # Keep track of the processes that were started #--TESTING SECTION-- # Figure out which hosts are up and fill the task queue with work to do print _format_title('Checking host(s) availability') targets_OK = [] targets_ERR = [] target_results = ServersConnectivityTester.test_server_list(target_list, shared_settings) for target in target_results: if target is None: break # None is a sentinel here # Send tasks to worker processes targets_OK.append(target) for command in available_commands: if getattr(command_list, command): args = command_list.__dict__[command] task_queue.put( (target, command, args) ) for exception in target_results: targets_ERR.append(exception) print ServersConnectivityTester.get_printable_result(targets_OK, targets_ERR) print '\n\n' # Put a 'None' sentinel in the queue to let the each process know when every # task has been completed [task_queue.put(None) for _ in process_list] # Keep track of how many tasks have to be performed for each target task_num=0 for command in available_commands: if getattr(command_list, command): task_num+=1 # --REPORTING SECTION-- processes_running = nb_processes # XML output if shared_settings['xml_file']: xml_output_list = [] # Each host has a list of results result_dict = {} for target in targets_OK: result_dict[target] = [] # If all processes have stopped, all the work is done while processes_running: result = result_queue.get() if result == None: # Getting None means that one process was done processes_running -= 1 else: # Getting an actual result (target, command, plugin_result) = result result_dict[target].append((command, plugin_result)) if len(result_dict[target]) == task_num: # Done with this target # Print the results and update the xml doc print _format_txt_target_result(target, result_dict[target]) if shared_settings['xml_file']: xml_output_list.append(_format_xml_target_result(target, result_dict[target])) result_queue.task_done() # --TERMINATE-- # Make sure all the processes had time to terminate task_queue.join() result_queue.join() #[process.join() for process in process_list] # Causes interpreter shutdown errors exec_time = time()-start_time # Output XML doc to a file if needed if shared_settings['xml_file']: result_xml_attr = {'httpsTunnel':str(shared_settings['https_tunnel_host']), 'totalScanTime' : str(exec_time), 'defaultTimeout' : str(shared_settings['timeout']), 'startTLS' : str(shared_settings['starttls'])} result_xml = Element('results', attrib = result_xml_attr) # Sort results in alphabetical order to make the XML files (somewhat) diff-able xml_output_list.sort(key=lambda xml_elem: xml_elem.attrib['host']) for xml_element in xml_output_list: result_xml.append(xml_element) xml_final_doc = Element('document', title = "SSLyze Scan Results", SSLyzeVersion = PROJECT_VERSION, SSLyzeWeb = PROJECT_URL) # Add the list of invalid targets xml_final_doc.append(ServersConnectivityTester.get_xml_result(targets_ERR)) # Add the output of the plugins xml_final_doc.append(result_xml) # Hack: Prettify the XML file so it's (somewhat) diff-able xml_final_pretty = minidom.parseString(tostring(xml_final_doc, encoding='UTF-8')) with open(shared_settings['xml_file'],'w') as xml_file: xml_file.write(xml_final_pretty.toprettyxml(indent=" ", encoding="utf-8" )) print _format_title('Scan Completed in {0:.2f} s'.format(exec_time))
tasks = [Task(q, out_queue) for i in range(NUM_WORKERS)] for w in tasks: w.start() logging.info("Items left in queue: {0}".format(q.qsize())) logging.debug("Joining q") # q.join() # qf.join() if False: processes_active = True while processes_active: for w in tasks: processes_active = False or w.is_alive() logging.debug(w.is_alive()) sleep(0.2) for y in tasks: y.join() logging.info("Elapsed time with {0} threads and {1} as maximum number: {2}".format(NUM_WORKERS, MAX_PRIME_NUMBER, datetime.now()-start_time)) count = 0 while not out_queue.empty(): out_queue.get() out_queue.task_done() count += 1 logging.info("Total primes found: {0}".format(count))
def main(): # For py2exe builds freeze_support() # Handle SIGINT to terminate processes signal.signal(signal.SIGINT, sigint_handler) # --PLUGINS INITIALIZATION-- start_time = time() print "\n\n\n" + _format_title("Registering available plugins") sslyze_plugins = PluginsFinder() available_plugins = sslyze_plugins.get_plugins() available_commands = sslyze_plugins.get_commands() print "" for plugin in available_plugins: print " " + plugin.__name__ print "\n\n" # Create the command line parser and the list of available options sslyze_parser = CommandLineParser(available_plugins, PROJECT_VERSION) try: # Parse the command line (command_list, target_list, shared_settings) = sslyze_parser.parse_command_line() except CommandLineParsingError as e: print e.get_error_msg() return # --PROCESSES INITIALIZATION-- # Three processes per target from MIN_PROCESSES up to MAX_PROCESSES nb_processes = max(MIN_PROCESSES, min(MAX_PROCESSES, len(target_list) * 3)) if command_list.https_tunnel: nb_processes = 1 # Let's not kill the proxy task_queue = JoinableQueue() # Processes get tasks from task_queue and result_queue = JoinableQueue() # put the result of each task in result_queue # Spawn a pool of processes, and pass them the queues for _ in xrange(nb_processes): priority_queue = JoinableQueue() # Each process gets a priority queue p = WorkerProcess(priority_queue, task_queue, result_queue, available_commands, shared_settings) p.start() process_list.append((p, priority_queue)) # Keep track of each process and priority_queue # --TESTING SECTION-- # Figure out which hosts are up and fill the task queue with work to do print _format_title("Checking host(s) availability") targets_OK = [] targets_ERR = [] # Each server gets assigned a priority queue for aggressive commands # so that they're never run in parallel against this single server cycle_priority_queues = cycle(process_list) target_results = ServersConnectivityTester.test_server_list(target_list, shared_settings) for target in target_results: if target is None: break # None is a sentinel here # Send tasks to worker processes targets_OK.append(target) (_, current_priority_queue) = cycle_priority_queues.next() for command in available_commands: if getattr(command_list, command): args = command_list.__dict__[command] if command in sslyze_plugins.get_aggressive_commands(): # Aggressive commands should not be run in parallel against # a given server so we use the priority queues to prevent this current_priority_queue.put((target, command, args)) else: # Normal commands get put in the standard/shared queue task_queue.put((target, command, args)) for exception in target_results: targets_ERR.append(exception) print ServersConnectivityTester.get_printable_result(targets_OK, targets_ERR) print "\n\n" # Put a 'None' sentinel in the queue to let the each process know when every # task has been completed for (proc, priority_queue) in process_list: task_queue.put(None) # One sentinel in the task_queue per proc priority_queue.put(None) # One sentinel in each priority_queue # Keep track of how many tasks have to be performed for each target task_num = 0 for command in available_commands: if getattr(command_list, command): task_num += 1 # --REPORTING SECTION-- processes_running = nb_processes # XML output xml_output_list = [] # Each host has a list of results result_dict = {} for target in targets_OK: result_dict[target] = [] # If all processes have stopped, all the work is done while processes_running: result = result_queue.get() if result is None: # Getting None means that one process was done processes_running -= 1 else: # Getting an actual result (target, command, plugin_result) = result result_dict[target].append((command, plugin_result)) if len(result_dict[target]) == task_num: # Done with this target # Print the results and update the xml doc print _format_txt_target_result(target, result_dict[target]) if shared_settings["xml_file"]: xml_output_list.append(_format_xml_target_result(target, result_dict[target])) result_queue.task_done() # --TERMINATE-- # Make sure all the processes had time to terminate task_queue.join() result_queue.join() # [process.join() for process in process_list] # Causes interpreter shutdown errors exec_time = time() - start_time # Output XML doc to a file if needed if shared_settings["xml_file"]: result_xml_attr = { "httpsTunnel": str(shared_settings["https_tunnel_host"]), "totalScanTime": str(exec_time), "defaultTimeout": str(shared_settings["timeout"]), "startTLS": str(shared_settings["starttls"]), } result_xml = Element("results", attrib=result_xml_attr) # Sort results in alphabetical order to make the XML files (somewhat) diff-able xml_output_list.sort(key=lambda xml_elem: xml_elem.attrib["host"]) for xml_element in xml_output_list: result_xml.append(xml_element) xml_final_doc = Element( "document", title="SSLyze Scan Results", SSLyzeVersion=PROJECT_VERSION, SSLyzeWeb=PROJECT_URL ) # Add the list of invalid targets xml_final_doc.append(ServersConnectivityTester.get_xml_result(targets_ERR)) # Add the output of the plugins xml_final_doc.append(result_xml) # Hack: Prettify the XML file so it's (somewhat) diff-able xml_final_pretty = minidom.parseString(tostring(xml_final_doc, encoding="UTF-8")) with open(shared_settings["xml_file"], "w") as xml_file: xml_file.write(xml_final_pretty.toprettyxml(indent=" ", encoding="utf-8")) print _format_title("Scan Completed in {0:.2f} s".format(exec_time))