class emailSubsystem(object): def __init__(self): ### will move to Celery eventually; with Celery, the app would be able to periodically # wakeup and check on replyQueue to see which emails were send, which were not and # what to do ... self.emailQueue = JoinableQueue() self.replyQueue = JoinableQueue() self.worker = Process(target=sendEmailWorker, args=(self.emailQueue, self.replyQueue)) def start(self): # temporarily comment out starting a new process as it seems to leave zombies # and causes app not to start as max process limit is reached. #self.worker.start() return def shutdown(self): # post poison pill # wait on the queue to be done; ie join on emailQueue # wait on the worker process to die; ie join on worker self.emailQueue.put(None) self.emailQueue.join() self.worker.join()
def init(opts): tasks = JoinableQueue() manager = Manager() stats = manager.dict() stats['processed'] = 0 stats['missing'] = 0 # start up our workers threads log.info("Creating %d workers" % opts.threads) workers = [ Worker(tasks, opts, stats) for i in xrange(opts.threads)] for w in workers: w.start() # queue up the bibcodes for doc in get_docs(opts): tasks.put(doc) # add some poison pills to the end of the queue log.info("poisoning our task threads") for i in xrange(opts.threads): tasks.put(None) # join the results queue. this should # block until all tasks in the task queue are completed log.info("Joining the task queue") tasks.join() log.info("Joining the task threads") for w in workers: w.join() log.info("processed: %d" % stats['processed']) log.info("records with no existing extract dir: %d" % stats['missing'])
def main(multiplier): # Establish communication queues tasks = JoinableQueue() results = Queue() # Start consumers num_consumers = cpu_count() * multiplier print 'Creating %d consumers' % num_consumers consumers = [Consumer(tasks, results) for i in xrange(num_consumers)] for w in consumers: w.start() fout = open(os.path.join(settings.PERSIST_DIR, 'doc_matrix_comparison.csv'), 'w', 0) rw = ResultWriter(results, csv.writer(fout)) rw.start() #num_docs = 801781 num_docs = 25 for i in xrange(num_docs): tasks.put(Task(i)) # Add a poison pill for each consumer for i in xrange(num_consumers): tasks.put(None) # Wait for all of the tasks to finish tasks.join() results.put('STOP')
def upload(args=None, authdata=None): """ Initialize the containers and pseudo-directories for what is to be uploaded. Separates jobs into sub-jobs based on container. Up to 100 containers per second. """ #initalize the containers in parallel containers = [] for obj in os.listdir(args['dir']): if args['container']: containers.append(args['container']) break #if os.path.isdir(os.path.abspath(args['dir']+'/'+obj)): if os.path.isdir(os.path.join(args['dir'], obj)): containers.append(obj) if containers: #set container job count to the less of args['cc'] or container count if args['cc'] < len(containers): args['cc'] = len(containers) #create queue and jobs container_queue = JoinableQueue() for container_worker in range(args['cc']): job = Process(target=container_consumer, args=(args, authdata, container_queue,)) job.daemon=False job.start() for container in containers: container_queue.put(container) #tailing the queue with a Null marker so the works shut down nicely. for container in range(args['cc']): container_queue.put(None) container_queue.join()
def crunch(file_name, ext_type, handler, pool_size=4, queue_size=40, limit=None): print 'Crunching file: %s, limit: %s' % (file_name, limit) q = JoinableQueue(queue_size) q_feats = Queue() pool = Pool(pool_size, wrap_handler(handler), ((q, q_feats),)) with file_reader(file_name) as reader: idx = 0 for entry in reader: if (entry.pathname.find(ext_type) != -1): text = [b for b in entry.get_blocks()] key = entry.pathname.split('/')[-1].split('.')[0] q.put((key, text), True) idx += 1 print 'Processing:', entry.pathname, idx if limit and idx >= limit: print 'Reached the limit' break q.close() q.join() pool.close() result = [] for i in range(q_feats.qsize()): result.append(q_feats.get()) return result
def getdata_multiprocess(self,task_funcsiter=None,task_funcsiterparas={}, task_funcsconst=None,task_funcsconstparas={},processnum=None, threadnum=2): def _start_processes(taskqueue,resultqueue,taskqueue_lk,task_funcsconst, task_funcsconstparas,processnum,threadnum): for i in range(processnum): p = Process(target=self.multiprocess_task, args=(taskqueue,resultqueue, taskqueue_lk,threadnum, task_funcsconst,task_funcsconstparas ),name='P'+str(i)) p.daemon=True p.start() processnum=processnum if processnum else multiprocessing.cpu_count() #任务传送queue taskqueue=JoinableQueue() #任务写入/唤醒lock taskqueue_lk = multiprocessing.Condition(multiprocessing.Lock()) #结果传送queue resultqueue=Queue() _start_processes(taskqueue,resultqueue,taskqueue_lk,task_funcsconst, task_funcsconstparas,processnum,threadnum) #放入任务,唤醒进程 if task_funcsconst is None: self._put_tasks(zip(task_funcsiter,task_funcsiterparas),taskqueue,taskqueue_lk) else: self._put_tasks(task_funcsiterparas,taskqueue,taskqueue_lk) logger.info('main join!') taskqueue.join() logger.info('main end!') return self._get_results(resultqueue)
def solve(iterations, proc_count): queue = JoinableQueue() partition = get_iterations_partition(iterations, proc_count) for iteration in partition: queue.put(iteration) for i in range(process_count): queue.put(None) manager = Manager() result = manager.list() processes = [] cur_time = time.time() for i in range(process_count): proc = Process(target=worker, args=(queue, result,)) proc.start() processes.append(proc) queue.join() for proc in processes: proc.join() cur_time = time.time() - cur_time print_results(cur_time, result, iterations)
def setup_queue(options): probe_servers = Queue() progress_queue = Queue() run = Probe.ProbeRun.objects.get(id = options.run_id) summary_top = Results.ResultSummaryList.objects.get(part_of_run=run) summary_top.setup() connection.close() threads = [] for i in range(options.threads): new_thread = Process(target=SetupQueueThread, args=(i,run, probe_servers, progress_queue)) new_thread.daemon = True new_thread.start() threads.append(new_thread) progress_thread = threading.Thread(target=__ProgressCounter, args=(run, progress_queue, threads,options)) progress_thread.daemon = True progress_thread.start() i = 0; if options.input_filename and (not options.count or i < options.count): for hostname_line in fileinput.input(options.input_filename, openhook=fileinput.hook_compressed): probe_servers.put(hostname_line) i+=1 if options.count and i >= options.count: break; probe_servers.join() progress_queue.join() return run
def aggress(map): global startMap startMap = map #print "Regressing..." state = State() jobs = [] longestSolution = Value('d', 20) highestScore = Value('d', 0) queue = JoinableQueue() manager = Manager() d = manager.dict() d.clear() l = RLock() if multiProc: queue.put((state, map, 1)) for i in range(numProcs): p = Process(target = multiMain, args=(startMap, l, d, queue,highestScore)) p.start() queue.join() else: a(l, highestScore, d, None, state, map, 1)
def run(self): # Changes the process name shown by ps for instance setProcTitle ("agentcluster master [version: %s] [monitoring: %d seconds]" % (__version__,self.monitoring_period) ); try: logger.info ( 'Agent cluster server starting' ); logger.info ( 'Configurations will be scanned in directories:' ); for directory in confdir.data: logger.info ( ' o %s', os.path.abspath(directory) ); self.watchdog = Watchdog(self.monitoring_period) self.watchdog.start() # Generates a deadlock to enter in sleep mode # Only an external signal can break this deadlock logger.info ( 'Agent cluster server started' ); queue = JoinableQueue() queue.put(object()); queue.join(); except KeyboardInterrupt: logger.info ( 'Agent cluster server interrupted' ); except Exception: logger.error ( 'Exception catched in main process: %s', sys.exc_info()[1] ); logger.debug ( "", exc_info=True ); finally: # First stop the monitoring to avoid restarting killed agents if self.watchdog is not None: self.watchdog.shutdown = True self.watchdog.join() logger.info ( 'Agent cluster server end' ); logging.shutdown()
def main(workers=10): """ Executes main function of mini-framework's Control thread. :param workers: Integer detailing number of worker FIFO threads to employ """ start_logging() log_info("New multiprocessing session with {} workers".format(workers)) # Input JoinableQueue and Output Queue inq = JoinableQueue(maxsize=int(workers*1.5)) outq = Queue(maxsize=int(workers*1.5)) ot = OutThread(workers, outq) ot.start() for _ in range(workers): w = WorkerThread(inq, outq) w.start() # Create a sequence of a 1000 random alphabetic characters random_chars = (ascii_letters[randint(0, 51)] for _ in range(1000)) # Keep input queue loaded for as long as possible # Feed the process pool with work units for work in enumerate(random_chars): inq.put(work) # Fill the input queue with Nones to shut the worker threads down # which terminates the process pool for _ in range(workers): inq.put(None) inq.join() print("Control process terminating")
class FlightProducer(Process): def __init__(self, options={}, date_group=[]): self.options = options self.date_group = date_group self.date_queue = JoinableQueue() def start(self): consumers_list = [] consumers_num = cpu_count() * 2 # Consumers for i in xrange(consumers_num): consumers_list.append(FlightConsumer(self.options, self.date_queue)) for consumer in consumers_list: consumer.start() # Put each date group to queue for date_item in self.date_group: self.date_queue.put(date_item) # Tell consumers can exit for i in xrange(consumers_num): self.date_queue.put(None) # Wait for all of the consumers to finish self.date_queue.join() print('Done')
class ProcessPool(object): def __init__(self, size=1): self.size = size self.jobs = Queue() self.results = Queue() self.processes = [] def start(self): '''start all processes''' for i in range(self.size): self.processes.append(ProcessWorker(self)) for process in self.processes: process.start() def append_job(self, job, *args, **kwargs): self.jobs.put((job, args, kwargs)) def join(self): '''waiting all jobs done''' self.jobs.join() def stop(self): '''kill all processes''' for process in self.processes: process.stop() for process in self.processes: # waiting processes completing if process.is_alive(): process.join() del self.processes[:] # reset processes to empty
def main(opts, files): if opts.threads == 1: log.info("running synchronously") run_syncronous(opts, files) else: Q = JoinableQueue() workers = [Worker(Q, opts) for i in xrange(opts.threads)] log.info("initializing %d threads" % opts.threads) for w in workers: w.start() # push log events onto the queue events_iter = events(files, opts) if opts.limit: events_iter = itertools.islice(events_iter, opts.limit) for event in events_iter: Q.put(event) # add poison pills for i in xrange(opts.threads): Q.put(None) Q.join() log.info("work complete. shutting down threads.") for w in workers: w.join()
class JavaMultipleParserExecutor: def __init__(self, output_dir, repo_path, processes=None): self.target_blobs = JoinableQueue() self.num_consumers = processes if processes else cpu_count() self.consumers = [JavaConsumer(self.target_blobs, repo_path, output_dir) for i in range(self.num_consumers)] for consumer in self.consumers: consumer.start() self.closed = False def parse_blob(self, blob): if self.closed: return self.target_blobs.put(blob.hexsha) def join(self): if self.closed: return for i in range(self.num_consumers): self.target_blobs.put(None) self.target_blobs.join() self.closed = True
def launch_mesos_tf(marathon_url_str, tsknom_str, cpu_float, mem_float, ntasks_int, uri_str, marathon_usr, marathon_usrpwd, localhost_str, mxattempts=10): toret_nodes = dict() docker = False if uri_str.find('docker') > -1: uri_str = uri_str.replace('docker://', '') docker = True uri_str = uri_str.rstrip('/') marathon_url_str = marathon_url_str.rstrip('/') counter = 0 tq = JoinableQueue() q = Queue() plist = list() consumers = [ Consumer(tq, q) for i in xrange(ntasks_int) ] for c in consumers: c.start() for i in xrange(ntasks_int): tq.put(Task(post_marathon_tasks, (marathon_url_str, tsknom_str, cpu_float, mem_float, i+1, ntasks_int, uri_str, marathon_usr, marathon_usrpwd, localhost_str, mxattempts, docker))) for i in xrange(ntasks_int): tq.put(None) tq.join() for i in xrange(1, ntasks_int+1): toret_nodes[i] = q.get() return toret_nodes
def main(): jobs = JoinableQueue() result = JoinableQueue() numToProcess = -1 scores = pd.DataFrame(columns=['query','fmeasure','precision','recall', 'size','maxDistance','topHits',"contextSteps"]) print len(datasets) for key in datasets: jobs.put(key) processed_count = Counter() for i in xrange(NUMBER_OF_PROCESSES): p = Process(target=work, args=(i, jobs, result, processed_count)) p.daemon = True p.start() #work(1, jobs, result, processed_count) automated_annotations = {} distances = {} jobs.join() dataset_index = collections.defaultdict(set) annotated_datasets = set() while not result.empty(): dataset, classes = result.get() if len(classes) == 0: annotated_datasets.add(dataset) for c in classes.keys(): dataset_index[c].add(dataset) owl_class = Class(c, graph=graph) for parent in owl_class.parents: dataset_index[parent.identifier].add(dataset) result.task_done() print '\n' for query, c in queries.items(): manual = ground_truth[query] automated = dataset_index[c] hits = manual & automated misses = manual - automated precision = np.nan if len(automated) == 0 else float(len(hits)) / len(automated) recall = np.nan if len(manual) == 0 else float(len(hits)) / len(manual) if precision != 0 or recall != 0: fmeasure = 0 if np.isnan(precision) or np.isnan(recall) else 2 * (precision * recall) / (precision + recall) else: fmeasure = 0 scores = scores.append(dict(query=query, size=len(manual), precision=precision, recall=recall, fmeasure=fmeasure,topHits=topHits, maxDistance=maxDistance, contextSteps = context_steps), ignore_index=True) print "Hits for", query, c print '\n'.join(sorted(hits)) print scores print "Annotated", len(annotated_datasets), "datasets."
def convert(self): tmp_unpack_path = self._create_tmp_unpack_folder(self.output_filepath) header, files_attrs = self._get_archive_metadata() file_list = [attrs["Path"] for attrs in files_attrs if attrs["Attributes"] == "....A"] files_input_queue = JoinableQueue(200) files_processed_queue = JoinableQueue(200) extractor_processes = Pool(self._threads_num, self._extract_files, (files_input_queue, files_processed_queue, tmp_unpack_path)) archiver_process = Process(target=self._archive_files, args = (files_processed_queue, tmp_unpack_path)) archiver_process.start() for archived_file in file_list: files_input_queue.put(archived_file) files_input_queue.join() for i in xrange(self._threads_num): files_input_queue.put(None) extractor_processes.close() extractor_processes.join() files_processed_queue.join() files_processed_queue.put(None) archiver_process.join() shutil.rmtree(tmp_unpack_path, True)
def get_citations(**args): """ Method to prepare the actual citation dictionary creation """ # create the queues tasks = JoinableQueue() results = JoinableQueue() # how many threads are there to be used if 'threads' in args: threads = args['threads'] else: threads = cpu_count() # initialize the "harvesters" (each harvester get the citations for a bibcode) harvesters = [ CitationHarvester(tasks, results) for i in range(threads)] # start the harvesters for b in harvesters: b.start() # put the bibcodes in the tasks queue for bib in args['bibcodes']: tasks.put(bib) # add some 'None' values at the end of the tasks list, to faciliate proper closure for i in range(threads): tasks.put(None) tasks.join() for b in harvesters: b.join() return [item for sublist in cit_dict.values() for item in sublist]
def main(max): rand_word = '' filename_tmp = "file_tmp.txt" final_file = "dataset.txt" q = JoinableQueue() _worker = Process(target=tuan, args=(q, filename_tmp,)) _worker.start() alphanum = printable[0:62] alpha = printable[10:62] with open(final_file, "w") as f: print("Writing in {}".format(final_file)) for x in range(max): rand_word = random_word(64, alpha) if randint(0, 100) <= 50: q.put(rand_word) else: q.put(random_word(64, alphanum)) print(rand_word, random_word(64, alphanum), sep='\n', file=f) else: q.put(None) q.join() _worker.join() print(file=f) with open(filename_tmp, "r") as f2: copyfileobj(f2, f) print("Removing {}".format(filename_tmp)) remove(filename_tmp) print("Work is done.")
class Mothership(object): """ Monitor of producer and consumers """ def __init__(self, producer, consumers): self._queue = JoinableQueue() self._producer_proxy = ProducerProxy(self._queue, producer) self._consumer_pool = list(ConsumerProxy(self._queue, consumer) \ for consumer in consumers) def start(self): """ Start working """ logger.info('Starting Producers'.center(20, '=')) self._producer_proxy.start() logger.info('Starting Consumers'.center(20, '=')) for consumer in self._consumer_pool: consumer.start() self._producer_proxy.join() self._queue.join() def __enter__(self): return self def __exit__(self, types, value, tb): return
def scheduler(db,category): task=JoinableQueue() for i in range(cpu_count()): pid=os.fork() if pid==0: consumer(category,task) os._exit(0) # 防止子进程向下执行 # print('此处不会被执行') elif pid<0: logging.error('创建子进程失败') with ThreadPoolExecutor() as executor: cursor = db['image_match_result_{}'.format(category)].find( {'$or': [{'robot_processed': False}, {'robot_processed': {'$exists': False}}]}, {'_id': 1, 'b_image_url': 1, 'c_image_url': 1} ) for item in cursor: item['mark']=True # 标错 executor.submit(producer, item, task) cursor = db['item_match_result_{}'.format(category)].find( {'$or': [{'robot_processed': False}, {'robot_processed': {'$exists': False}}]}, {'_id': 1, 'b_image_url': 1, 'c_image_url': 1} ) for item in cursor: item['mark']=False # 标对 executor.submit(producer, item, task) task.join() os.kill(0,signal.SIGKILL)
def evaluate(points,meshToBasis,kernel,quadRule,coeffs,nprocs=None): """Evaluate a kernel using the given coefficients""" if nprocs==None: nprocs=cpu_count() inputQueue=JoinableQueue() nelements=meshToBasis.nelements for elem in meshToBasis: inputQueue.put(elem) buf=sharedctypes.RawArray('b',len(points[0])*numpy.dtype(numpy.complex128).itemsize) result=numpy.frombuffer(buf,dtype=numpy.complex128) result[:]=numpy.zeros(1,dtype=numpy.complex128) time.sleep(.5) workers=[] for id in range(nprocs): worker=EvaluationWorker(points,kernel,quadRule,coeffs,inputQueue,result) worker.start() workers.append(worker) inputQueue.join() for worker in workers: worker.join() return result.copy()
class QuickReader(): def __init__(self, writer_name,handle_raw_assertion,add_lines_to_queue, isTest = False, num_threads = 5): self.writer_name = writer_name self.num_threads = num_threads self.handle_raw_assertion = handle_raw_assertion self.add_lines_to_queue = add_lines_to_queue self.queue = JoinableQueue() self.isTest = isTest def start(self): print "begin writing " + self.writer_name self.create_processes() self.add_lines_to_queue(self.queue) self.queue.join() print "finished writing " + self.writer_name def pull_lines(self,q,writer): while 1: raw_assertion = q.get() edges = self.handle_raw_assertion(raw_assertion) for edge in edges: writer.write(edge) q.task_done() def create_processes(self): for i in range(self.num_threads): writer = MultiWriter(self.writer_name + "_" + str(i),self.isTest) p = Process(target = self.pull_lines, args = (self.queue, writer)) #p.daemon=True p.start()
def main(): from multiprocessing import JoinableQueue from genmod.vcf import vcf_header from genmod.utils import annotation_parser parser = argparse.ArgumentParser(description="Parse different kind of pedigree files.") parser.add_argument('variant_file', type=str, nargs=1 , help='A file with variant information.') parser.add_argument('annotation_file', type=str, nargs=1 , help='A file with feature annotations.') parser.add_argument('-phased', '--phased', action="store_true", help='If variant file is phased.') parser.add_argument('-v', '--verbose', action="store_true", help='Increase output verbosity.') args = parser.parse_args() infile = args.variant_file[0] if args.verbose: print('Parsing annotationfile...') start_time_annotation = datetime.now() my_anno_parser = annotation_parser.AnnotationParser(args.annotation_file[0], 'ref_gene') if args.verbose: print('annotation parsed. Time to parse annotation: %s\n' % str(datetime.now() - start_time_annotation)) my_head_parser = vcf_header.VCFParser(infile) my_head_parser.parse() print(my_head_parser.__dict__) variant_queue = JoinableQueue() start_time = datetime.now() my_parser = VariantFileParser(infile, variant_queue, my_head_parser, my_anno_parser, args) nr_of_batches = my_parser.parse() print(nr_of_batches) for i in range(nr_of_batches): variant_queue.get() variant_queue.task_done() variant_queue.join() print('Time to parse variants: %s ' % str(datetime.now()-start_time))
def build(opts): tasks = JoinableQueue() results = JoinableQueue() if opts.remove: log.info("Removing existing docs collection") session = utils.get_session(config) session.docs.drop() # start up our builder threads log.info("Creating %d Builder processes" % opts.threads) builders = [ Builder(tasks, results) for i in xrange(opts.threads)] for b in builders: b.start() # queue up the bibcodes for bib in get_bibcodes(opts): tasks.put(bib) # add some poison pills to the end of the queue log.info("poisoning our task threads") for i in xrange(opts.threads): tasks.put(None) # join the results queue. this should # block until all tasks in the task queue are completed log.info("Joining the task queue") tasks.join() log.info("Joining the task threads") for b in builders: b.join() log.info("All work complete")
def main(): fetch_queue = JoinableQueue() reject_queue = JoinableQueue(maxsize = 1000) log_processor = Process(target=job_creator, args=(fetch_queue, './search_log_valid_2010_06_17'), name='log-processor') writers = [ ] write_queues = [] for num in DATA_SETS: queue, writer = create_writer(reject_queue, num) writers.append(writer) write_queues.append(queue) fetchers = [ create_fetcher(fetch_queue, write_queues, reject_queue, num) for num in xrange(NUM_FETCHERS) ] reject_writer = Process(target=reject, args=(reject_queue, './rejected-lines'), name='related-search-reject-writer') log_processor.start() reject_writer.start() start_processes(writers) start_processes(fetchers) log_processor.join() print 'DONE? ' fetch_queue.join() write_queue.join() reject_writer.join()
def generate_cache(path_mapping,rep_func, attributes,num_procs, call_back, stop_check): all_files = set() for pm in path_mapping: if stop_check is not None and stop_check(): return all_files.update(pm) all_files = sorted(all_files) stopped = Stopped() job_queue = JoinableQueue(100) file_ind = 0 while True: if file_ind == len(all_files): break try: job_queue.put(all_files[file_ind],False) except Full: break file_ind += 1 manager = Manager() return_dict = manager.dict() procs = [] counter = Counter() #if call_back is not None: # call_back('Generating representations...') # cb = Process(target = call_back_worker, # args = (call_back, counter, len(all_files), stop_check)) # procs.append(cb) for i in range(num_procs): p = RepWorker(job_queue, return_dict,rep_func,attributes, counter, stopped) procs.append(p) p.start() time.sleep(2) if call_back is not None: call_back('Generating representations...') prev = 0 val = 0 while True: if file_ind == len(all_files): break if stop_check is not None and stop_check(): stopped.stop() time.sleep(1) break job_queue.put(all_files[file_ind]) if call_back is not None: value = counter.value() call_back(value) file_ind += 1 job_queue.join() time.sleep(2) for p in procs: p.join() return return_dict
def setup_queue(options): probe_servers = Queue() progress_queue = Queue() if options.queue_id: queue_list = Probe.PreparedQueueList.objects.get(id=options.queue_id) else: queue_name=options.queue_name.strip('"').strip() queue_list,created = Probe.PreparedQueueList.objects.get_or_create( list_name=queue_name, defaults = dict(list_description=options.description.strip('"').strip()), ) if options.run_id: run = Probe.ProbeRun.objects.get(id=options.run_id) cursor = connection.cursor() cursor.execute("""INSERT INTO probedata2_preparedqueueitem (part_of_queue_id, server_id) SELECT %s AS part_of_queue_id, server_id FROM probedata2_probequeue WHERE part_of_run_id = %s""", [str(queue_list.id),str(run.id)] ) transaction.commit_unless_managed() return queue_list connection.close() threads = [] for i in range(options.threads): new_thread = Process(target=SetupQueueThread, args=(i,queue_list, probe_servers, progress_queue)) new_thread.daemon = True new_thread.start() threads.append(new_thread) progress_thread = threading.Thread(target=__ProgressCounter, args=(progress_queue, threads,options)) progress_thread.daemon = True progress_thread.start() i = 0; if not options.file_list_only: for host in Probe.Server.objects.filter(enabled = True).values_list("id",flat=True): probe_servers.put(host) i+=1 if options.count and i >= options.count: break; if options.input_filename and (not options.count or i < options.count): for hostname_line in fileinput.input(options.input_filename, openhook=fileinput.hook_compressed): probe_servers.put(hostname_line) i+=1 if options.count and i >= options.count: break; probe_servers.join() progress_queue.join() return queue_list
def queueManager(numProc, myList, function, *args): '''queueManager(numProc, myList, function, *args): generic function used to start worker processes via the multiprocessing Queue object numProc - number of processors to use myList - a list of objects to be iterated over function - target function *args - additional arguments to pass to function Return - an unordered list of the results from myList ''' qIn = Queue() qOut = JoinableQueue() if args: arguments = (qIn, qOut,) + args else: arguments = (qIn, qOut,) results = [] # reduce processer count if proc count > files i = 0 for l in myList: qIn.put((i,l)) i += 1 for _ in range(numProc): p = Process(target = function, args = arguments).start() sys.stdout.write("Progress: {:>3}%".format(0) ) curProgress = 0 lastProgress = 0 while qOut.qsize() < len(myList): #sys.stdout.write("\b\b\b\b{:>3}%".format(int(ceil(100*qOut.qsize()/len(myList))))) curProgress = int(ceil(100*qOut.qsize()/len(myList))) if curProgress - lastProgress > 10: lastProgress += 10 sys.stdout.write("\nProgress: {:>3}%".format(lastProgress)) sys.stdout.flush() sys.stdout.write("\nProgress: {:>3}%".format(100)) #sys.stdout.write("\b\b\b\b{:>3}%".format(100)) sys.stdout.write("\n") for _ in range(len(myList)): # indicate done results processing results.append(qOut.get()) qOut.task_done() #tell child processes to stop for _ in range(numProc): qIn.put('STOP') orderedRes = [None]*len(results) for i, res in results: orderedRes[i] = res qOut.join() qIn.close() qOut.close() return orderedRes
return results def thread(): while True: worker = userQ.get() portscan(worker) userQ.task_done() # This sets how many threads you want to run and starts them for x in range(100): t = threading.Thread(target=thread) t.daemon = True t.start() smartList = [ 0, 21, 22, 23, 25, 53, 79, 80, 110, 113, 119, 135, 137, 138, 139, 143, 389, 443, 445, 555, 631, 666, 902, 912, 1001, 1002, 1024, 1025, 1026, 1027, 1028, 1029, 1030, 1243, 1433, 1434, 1720, 1900, 2000, 4380, 4381, 5000, 5040, 5088, 5354, 5432, 6463, 6667, 6670, 6711, 6776, 6969, 7000, 7680, 8080, 8733, 12345, 12346, 13148, 15292, 15393, 21554, 22222, 27015, 27017, 27275, 27374, 29559, 31337, 31338, 49664, 49665, 49666, 49668, 49684, 49731, 49765, 49774, 50698, 50760, 51229, 54860, 54870, 57621 ] for worker in smartList: userQ.put(worker) userQ.join()
evaluation_queue = JoinableQueue() result_queue = Queue() for i in range(3): worker = Process(target=worker_func, args=(evaluation_queue, result_queue)) worker.start() print("work %d start!" % i) num = 100 for i in range(num): evaluation_queue.put(i) for i in range(3): evaluation_queue.put(None) evaluation_queue.join() for i in range(num): data = result_queue.get() print(data) import tensorflow as tf from dkrl_model import DKRL from transe_model import TransE import pickle as pkl from dkrl_data_loader import DataLoader import numpy as np from multiprocessing import JoinableQueue, Queue, Process import os
if wt > 0.9: in_queue.put(None) print('stop producer') break def consumer(in_queue, out_queue): while 1: task = in_queue.get() if task is None: break func, arg = task result = func(arg) in_queue.task_done() out_queue.put(result) processes = [] p = Process(target=producer, args=(tasks_queue,)) p.start() processes.append(p) p = Process(target=consumer, args=(tasks_queue, results_queue)) p.start() processes.append(p) tasks_queue.join() for p in processes: p.join() while 1: if results_queue.empty(): break result = results_queue.get() print('Result:', result)
class WorkerMapper(Mapper): """Work mapper implementation using multiple worker processes and task queue. Uses the python multiprocessing module to spawn multiple worker processes which watch a task queue of walker segments. """ def __init__(self, num_workers=None, worker_type=None, worker_attributes=None, **kwargs): """Constructor for WorkerMapper. kwargs are ignored. Parameters ---------- num_workers : int The number of worker processes to spawn. worker_type : callable, optional Callable that generates an object implementing the Worker interface, typically a type from a Worker class. worker_attributes : dictionary A dictionary of values that are passed to the worker constructor as key-word arguments. """ if worker_attributes is not None: self._worker_attributes = worker_attributes else: self._worker_attributes = {} self._num_workers = num_workers self._worker_segment_times = {i: [] for i in range(self.num_workers)} # choose the type of the worker if worker_type is None: self._worker_type = Worker warn("worker_type not given using the default base class") logging.warn("worker_type not given using the default base class") else: self._worker_type = worker_type @property def num_workers(self): """The number of worker processes.""" return self._num_workers # TODO remove after testing # @num_workers.setter # def num_workers(self, num_workers): # """Setter for the number of workers # Parameters # ---------- # num_workers : int # """ # self._num_workers = num_workers @property def worker_type(self): """The callable that generates a worker object. Typically this is just the type from the class definition of the Worker where the constructor is called. """ return self._worker_type # TODO remove after testing # @worker_type.setter # def worker_type(self, worker_type): # """ # Parameters # ---------- # worker_type : # Returns # ------- # """ # self._worker_type = worker_type def init(self, num_workers=None, **kwargs): """Runtime initialization and setting of function to map over walkers. Parameters ---------- num_workers : int The number of worker processes to spawn segment_func : callable implementing the Runner.run_segment interface """ super().init(**kwargs) # the number of workers must be given here or set as an object attribute if num_workers is None and self.num_workers is None: raise ValueError( "The number of workers must be given, received {}".format( num_workers)) # if the number of walkers was given for this init() call use # that, otherwise we use the default that was specified when # the object was created elif num_workers is None and self.num_workers is not None: num_workers = self.num_workers # Establish communication queues self._task_queue = JoinableQueue() self._result_queue = Queue() # Start workers, giving them all the queues self._workers = [] for i in range(num_workers): worker = self.worker_type(i, self._task_queue, self._result_queue, **self._worker_attributes) self._workers.append(worker) # start the worker processes for worker in self._workers: worker.start() logging.info("Worker process started as name: {}; PID: {}".format( worker.name, worker.pid)) def cleanup(self, **kwargs): """Runtime post-simulation tasks. This is run either at the end of a successful simulation or upon an error in the main process of the simulation manager call to `run_cycle`. The Mapper class performs no actions here and all arguments are ignored. """ # send poison pills (Stop signals) to the queues to stop them in a nice way # and let them finish up for i in range(self.num_workers): self._task_queue.put((None, None)) # delete the queues and workers self._task_queue = None self._result_queue = None self._workers = None def _make_task(self, *args, **kwargs): """Generate a task from 'segment_func' attribute. Similar to partial evaluation (or currying). Args will be eventually used as the arguments to the call of 'segment_func' by the worker processes when they receive the task from the queue. Returns ------- task : Task object """ return Task(self._func, *args, **kwargs) def map(self, *args): # docstring in superclass map_process = mp.current_process() logging.info("Mapping from process {}; PID {}".format( map_process.name, map_process.pid)) # make tuples for the arguments to each function call task_args = zip(*args) num_tasks = len(args[0]) # Enqueue the jobs for task_idx, task_arg in enumerate(task_args): # a task will be the actual task and its task idx so we can # sort them later self._task_queue.put((task_idx, self._make_task(*task_arg))) logging.info("Waiting for tasks to be run") # Wait for all of the tasks to finish self._task_queue.join() # workers_done = [worker.done for worker in self._workers] # if all(workers_done): # get the results out in an unordered way. We rely on the # number of tasks we know we put out because if you just try # to get from the queue until it is empty it will just wait # forever, since nothing is there. ALternatively it is risky # to implement a wait timeout or no wait in case there is a # small wait time. logging.info("Retrieving results") n_results = num_tasks results = [] while n_results > 0: logging.info("trying to retrieve result: {}".format(n_results)) result = self._result_queue.get() results.append(result) logging.info("Retrieved result {}: {}".format(n_results, result)) n_results -= 1 logging.info("No more results") logging.info("Retrieved results") # sort the results according to their task_idx results.sort() # save the task run times, so they can be accessed if desired, # after clearing the task times from the last mapping self._worker_segment_times = {i: [] for i in range(self.num_workers)} for task_idx, worker_idx, task_time, result in results: self._worker_segment_times[worker_idx].append(task_time) # then just return the values of the function return [result for task_idx, worker_idx, task_time, result in results]
class TwitterTracker(object): def __init__(self, l_query, timeout, fileName_partialData): self.timeout = timeout self.l_query = l_query self.collection_name = os.environ[ "COLLECTION_NAME"] #str(uuid.uuid4()) self.file_data = fileName_partialData self.status = STOPPED self.q_tweet = JoinableQueue(maxsize=0) self.collection = self.__DBConnection() self.__startTracker() def __DBConnection(self): client = MongoClient() self.db = client[C.TWITTER_DB] return self.db[self.collection_name] def __startTracker(self): process_tweetProcessor = Process(target=self.__runTweetProcessor) process_listener = Process(target=self.__runListener) process_listener.start() process_tweetProcessor.start() self.q_tweet.join() process_tweetProcessor.join() process_listener.join() self.status = FINISHED def __runTweetProcessor(self): tweetProcessor = TweetProcessor(self.q_tweet, self.collection, self.l_query, self.file_data) tweetProcessor.run() def __runListener(self): auth = tweepy.OAuthHandler(C.CONSUMER_KEY, C.CONSUMER_SECRET) auth.set_access_token(C.ACCESS_TOKEN, C.ACCESS_TOKEN_SECRET) listener = Listener(self.q_tweet) stream = tweepy.Stream(auth, listener) self.status = INITIATED start_time = time.time() try: stream.filter(languages=['en'], track=self.l_query, async=True) pass_time = time.time() - start_time while (pass_time < self.timeout): time.sleep(self.timeout - pass_time) pass_time = time.time() - start_time #print "SLEEPING" + str(pass_time) self.q_tweet.put(C.TOKEN_LAST_TWEET) stream.disconnect() except Exception as e: s = str(e) self.status = ERROR stream.disconnect() def getStatus(self): return self.status def getTweets(self): if self.status == FINISHED: return [t for t in self.collection.find()] else: return None def getTweets_ByTrackKey(self, key): if self.status == FINISHED: result = self.db.command('text', self.collection_name, search=key) return [t['obj'] for t in result['results']] else: return None
class Servo: def __init__(self, channel, pulse_width_start, pulse_width_stop, init_angle, turnoff_timeout=0): """Define a new software controllable servo with adjustable speed control Keyword arguments: pulse_width_start -- The minimum pulse width defining the lowest angle pulse_width_stop -- The maximum pulse width defining the biggest angle init_angle -- Initial angle that the servo should take when it is powered on. Range is 0 to 180deg turnoff_timeout -- number of seconds after which the servo is turned off if no command is received. 0 = never turns off """ self.pulse_width_start = pulse_width_start self.pulse_width_stop = pulse_width_stop self.turnoff_timeout = turnoff_timeout self.current_pulse_width = init_angle * ( self.pulse_width_stop - self.pulse_width_start) / 180.0 + self.pulse_width_start self.last_pulse_width = self.current_pulse_width self.queue = JoinableQueue(1000) self.lastCommandTime = 0 self.t = Thread(target=self._wait_for_event) self.t.daemon = True self.running = True self.t.start() self.pwm = PWM_pin(channel, 100, self.current_pulse_width) # Set up the Shift register for enabling this servo if channel == "P9_14": shiftreg_nr = 3 elif channel == "P9_16": shiftreg_nr = 2 else: logging.warning( "Tried to assign servo to an unknown channel/pin: " + str(channel)) return ShiftRegister.make() self.shift_reg = ShiftRegister.registers[shiftreg_nr] self.set_enabled() def set_enabled(self, is_enabled=True): if is_enabled: self.shift_reg.add_state(0x01) else: self.shift_reg.remove_state(0x01) def set_angle(self, angle, speed=60, asynchronous=True): ''' Set the servo angle to the given value, in degree, with the given speed in deg / sec ''' pulse_width = angle * (self.pulse_width_stop - self.pulse_width_start ) / 180.0 + self.pulse_width_start last_angle = (self.last_pulse_width - self.pulse_width_start) / float( self.pulse_width_stop - self.pulse_width_start) * 180.0 t = (math.fabs(angle - last_angle) / speed) / math.fabs(angle - last_angle) for w in xrange(int(self.last_pulse_width * 1000), int(pulse_width * 1000), (1 if pulse_width >= self.last_pulse_width else -1)): self.queue.put((w / 1000.0, t)) self.last_pulse_width = pulse_width if not asynchronous: self.queue.join() def turn_off(self): self.pwm.set_enabled(False) def stop(self): self.running = False self.t.join() self.turn_off() def _wait_for_event(self): while self.running: try: ev = self.queue.get(block=True, timeout=1) except Queue.Empty: if self.turnoff_timeout > 0 and self.lastCommandTime > 0 and time.time( ) - self.lastCommandTime > self.turnoff_timeout: self.lastCommandTime = 0 self.turn_off() continue except Exception: # To avoid exception printed on output pass self.current_pulse_width = ev[0] self.pwm.set_value(self.current_pulse_width) self.lastCommandTime = time.time() time.sleep(ev[1]) self.queue.task_done()
def main(): parser = argparse.ArgumentParser(description='Computing TFLite accuracy') parser.add_argument('--model', required=True, help='Path to the model (protocol buffer binary file)') parser.add_argument( '--alphabet', required=True, help= 'Path to the configuration file specifying the alphabet used by the network' ) parser.add_argument('--lm', required=True, help='Path to the language model binary file') parser.add_argument( '--trie', required=True, help= 'Path to the language model trie file created with native_client/generate_trie' ) parser.add_argument('--csv', required=True, help='Path to the CSV source file') parser.add_argument( '--proc', required=False, default=cpu_count(), type=int, help='Number of processes to spawn, defaulting to number of CPUs') args = parser.parse_args() work_todo = JoinableQueue() work_done = Queue() processes = [] for i in range(args.proc): worker_process = Process(target=tflite_worker, args=(args.model, args.alphabet, args.lm, args.trie, work_todo, work_done, i), daemon=True, name='tflite_process_{}'.format(i)) worker_process.start() processes.append(worker_process) print([x.name for x in processes]) ground_truths = [] predictions = [] losses = [] with open(args.csv, 'r') as csvfile: csvreader = csv.DictReader(csvfile) for row in csvreader: work_todo.put({ 'filename': row['filename'], 'transcript': row['transcript'] }) work_todo.join() while (not work_done.empty()): msg = work_done.get() losses.append(0.0) ground_truths.append(msg['ground_truth']) predictions.append(msg['prediction']) distances = [levenshtein(a, b) for a, b in zip(ground_truths, predictions)] wer, cer, samples = calculate_report(ground_truths, predictions, distances, losses) mean_loss = np.mean(losses) print('Test - WER: %f, CER: %f, loss: %f' % (wer, cer, mean_loss))
def main(): # For py2exe builds freeze_support() # Handle SIGINT to terminate processes signal.signal(signal.SIGINT, sigint_handler) start_time = time() #--PLUGINS INITIALIZATION-- sslyze_plugins = PluginsFinder() available_plugins = sslyze_plugins.get_plugins() available_commands = sslyze_plugins.get_commands() # Create the command line parser and the list of available options sslyze_parser = CommandLineParser(available_plugins, PROJECT_VERSION) try: # Parse the command line (command_list, target_list, shared_settings) = sslyze_parser.parse_command_line() except CommandLineParsingError as e: print e.get_error_msg() return if not shared_settings['quiet'] and shared_settings['xml_file'] != '-': print '\n\n\n' + _format_title('Available plugins') print '' for plugin in available_plugins: print ' ' + plugin.__name__ print '\n\n' #--PROCESSES INITIALIZATION-- # Three processes per target from MIN_PROCESSES up to MAX_PROCESSES nb_processes = max(MIN_PROCESSES, min(MAX_PROCESSES, len(target_list) * 3)) if command_list.https_tunnel: nb_processes = 1 # Let's not kill the proxy task_queue = JoinableQueue() # Processes get tasks from task_queue and result_queue = JoinableQueue( ) # put the result of each task in result_queue # Spawn a pool of processes, and pass them the queues for _ in xrange(nb_processes): priority_queue = JoinableQueue() # Each process gets a priority queue p = WorkerProcess(priority_queue, task_queue, result_queue, available_commands, \ shared_settings) p.start() process_list.append( (p, priority_queue)) # Keep track of each process and priority_queue #--TESTING SECTION-- # Figure out which hosts are up and fill the task queue with work to do if not shared_settings['quiet'] and shared_settings['xml_file'] != '-': print _format_title('Checking host(s) availability') targets_OK = [] targets_ERR = [] # Each server gets assigned a priority queue for aggressive commands # so that they're never run in parallel against this single server cycle_priority_queues = cycle(process_list) target_results = ServersConnectivityTester.test_server_list( target_list, shared_settings) for target in target_results: if target is None: break # None is a sentinel here # Send tasks to worker processes targets_OK.append(target) (_, current_priority_queue) = cycle_priority_queues.next() for command in available_commands: if getattr(command_list, command): args = command_list.__dict__[command] if command in sslyze_plugins.get_aggressive_commands(): # Aggressive commands should not be run in parallel against # a given server so we use the priority queues to prevent this current_priority_queue.put((target, command, args)) else: # Normal commands get put in the standard/shared queue task_queue.put((target, command, args)) for exception in target_results: targets_ERR.append(exception) if not shared_settings['quiet'] and shared_settings['xml_file'] != '-': print ServersConnectivityTester.get_printable_result( targets_OK, targets_ERR) print '\n\n' # Put a 'None' sentinel in the queue to let the each process know when every # task has been completed for (proc, priority_queue) in process_list: task_queue.put(None) # One sentinel in the task_queue per proc priority_queue.put(None) # One sentinel in each priority_queue # Keep track of how many tasks have to be performed for each target task_num = 0 for command in available_commands: if getattr(command_list, command): task_num += 1 # --REPORTING SECTION-- processes_running = nb_processes # XML output xml_output_list = [] # Each host has a list of results result_dict = {} for target in targets_OK: result_dict[target] = [] # If all processes have stopped, all the work is done while processes_running: result = result_queue.get() if result is None: # Getting None means that one process was done processes_running -= 1 else: # Getting an actual result (target, command, plugin_result) = result result_dict[target].append((command, plugin_result)) if len(result_dict[target]) == task_num: # Done with this target # Print the results and update the xml doc if shared_settings['xml_file']: xml_output_list.append( _format_xml_target_result(target, result_dict[target])) if not shared_settings[ 'quiet'] and shared_settings['xml_file'] != '-': print _format_txt_target_result( target, result_dict[target]) else: print _format_txt_target_result(target, result_dict[target]) result_queue.task_done() # --TERMINATE-- # Make sure all the processes had time to terminate task_queue.join() result_queue.join() #[process.join() for process in process_list] # Causes interpreter shutdown errors exec_time = time() - start_time # Output XML doc to a file if needed if shared_settings['xml_file']: result_xml_attr = { 'httpsTunnel': str(shared_settings['https_tunnel_host']), 'totalScanTime': str(exec_time), 'defaultTimeout': str(shared_settings['timeout']), 'startTLS': str(shared_settings['starttls']) } result_xml = Element('results', attrib=result_xml_attr) # Sort results in alphabetical order to make the XML files (somewhat) diff-able xml_output_list.sort(key=lambda xml_elem: xml_elem.attrib['host']) for xml_element in xml_output_list: result_xml.append(xml_element) xml_final_doc = Element('document', title="SSLyze Scan Results", SSLyzeVersion=PROJECT_VERSION, SSLyzeWeb=PROJECT_URL) # Add the list of invalid targets xml_final_doc.append( ServersConnectivityTester.get_xml_result(targets_ERR)) # Add the output of the plugins xml_final_doc.append(result_xml) # Remove characters that are illegal for XML # https://lsimons.wordpress.com/2011/03/17/stripping-illegal-characters-out-of-xml-in-python/ xml_final_string = tostring(xml_final_doc, encoding='UTF-8') illegal_xml_chars_RE = re.compile( u'[\x00-\x08\x0b\x0c\x0e-\x1F\uD800-\uDFFF\uFFFE\uFFFF]') xml_sanitized_final_string = illegal_xml_chars_RE.sub( '', xml_final_string) # Hack: Prettify the XML file so it's (somewhat) diff-able xml_final_pretty = minidom.parseString( xml_sanitized_final_string).toprettyxml(indent=" ", encoding="utf-8") if shared_settings['xml_file'] == '-': # Print XML output to the console if needed print xml_final_pretty else: # Otherwise save the XML output to the console with open(shared_settings['xml_file'], 'w') as xml_file: xml_file.write(xml_final_pretty) if not shared_settings['quiet'] and shared_settings['xml_file'] != '-': print _format_title('Scan Completed in {0:.2f} s'.format(exec_time))
def main(_): print_flags() initialize_folders() env = gym.make(FLAGS.env_name) if FLAGS.is_train and FLAGS.is_monitor: def monitor_frequency_func(iteration): return (iteration + FLAGS.monitor_frequency) % FLAGS.monitor_frequency == 0 env = wrappers.Monitor(env, FLAGS.log_dir + "/" + FLAGS.scope, video_callable=monitor_frequency_func, resume=FLAGS.is_load) job_queue = JoinableQueue() result_queue = Queue() e = 0 if FLAGS.n_processes == 1 or not FLAGS.is_train: reacher = TRPOAgent(FLAGS.env_name, FLAGS.scope, FLAGS.max_kl, job_queue, result_queue) reacher.start() else: # PARALLEL TRAINING OFFERS ALMOST LINEAR IMPROVEMENT ON 2 processors proll = ParallelRollout(FLAGS.env_name, FLAGS.traj_len, FLAGS.n_processes, FLAGS.max_kl) parallel_reacher = ParallelTRPOAgent(FLAGS.env_name, FLAGS.scope, FLAGS.max_kl, job_queue, result_queue, proll) parallel_reacher.start() if FLAGS.is_load: job_queue.put(('load', (FLAGS.checkpoint_dir + '/' + FLAGS.scope, ))) job_queue.join() try: while True: e += 1 if FLAGS.is_train: print("EPISODE =", e) start = time.time() job_queue.put( ('learn', (FLAGS.gamma, FLAGS.n_trajs, FLAGS.traj_len))) job_queue.join() end = time.time() print("ROLLOUT TAKES", end - start) obs = env.reset() for i in range(FLAGS.traj_len): job_queue.put(('act', (obs, ))) job_queue.join() obs, _, done, _ = env.step(result_queue.get()) if not FLAGS.is_train: env.render() if done: break if e % FLAGS.checkpoint_freq == 0 and FLAGS.is_train: job_queue.put( ('save', (FLAGS.checkpoint_dir + '/' + FLAGS.scope, ))) job_queue.join() job_queue.put(('log', ( FLAGS.log_dir + '/' + FLAGS.scope, 'my_log.json', ))) job_queue.join() except KeyboardInterrupt: print('You pressed Ctrl+C!') if FLAGS.is_train and FLAGS.is_monitor: env.close() proll.end() parallel_reacher.join() sys.exit(0)
for x in outs: print(x.name) for x in blobs: print(x.name) blobs = list( filter( lambda x: os.path.basename(x.name).split(".")[0] not in list( map(lambda x: "_".join(x.name.split("_")[1:]).split(".")[0], outs)), blobs)) print(len(blobs)) list(map(lambda x: downloads.put(x), blobs)) downloaders = map( lambda x: Process(target=utils.consume, args=(downloads, download_event, ((IN_ACC, IN_KEY, IN_CONTAINER), files ), download_blob)), range(0, 2)) list(map(lambda proc: proc.start(), downloaders)) processors = map( lambda x: Process(target=utils.consume, args=(files, process_event, (RUNNER, (OUT_ACC, OUT_KEY, OUT_CONTAINER)), process)), range(0, 1)) list(map(lambda proc: proc.start(), processors)) downloads.join() files.join() download_event.set() process_event.set()
#upload to cloud print('import to Cloud') queue = JoinableQueue() pr = 1 pro = [] for z in ZPGFile.objects.filter(is_movie=0, on_cloud=0).order_by("file_date"): queue.put(z) for i in range(pr): p = Process(target=uploadF, args=(cli,)) p.daemon = True p.start() pro.append(p) queue.join() for p in pro: p.join() # бекапим файл с бд dbfile_path = settings.DATABASES['default']['NAME'] dbfile_name = settings.DATABASES['default']['NAME'].split('/')[-1] cli.move_folder_or_file(dbfile_name, dbfile_name+"_old") #remove_folder_or_file(dbfile_name) cli.upload_file(dbfile_path, dbfile_name ) print ("DB file uploaded")
def files_to_map( files: List[str], out_dir: str = ".", min_zoom: int = 0, title: str = "FitsMap", task_procs: int = 0, procs_per_task: int = 0, catalog_delim: str = ",", cat_wcs_fits_file: str = None, max_catalog_zoom: int = -1, tile_size: Tuple[int, int] = [256, 256], image_engine: str = IMG_ENGINE_MPL, norm_kwargs: dict = {}, rows_per_column: int = np.inf, prefer_xy: bool = False, ) -> None: """Converts a list of files into a LeafletJS map. Args: files (List[str]): List of files to convert into a map, can include image files (.fits, .png, .jpg) and catalog files (.cat) out_dir (str): Directory to place the genreated web page and associated subdirectories min_zoom (int): The minimum zoom to create tiles for. The default value is 0, but if it can be helpful to set it to a value greater than zero if your running out of memory as the lowest zoom images can be the most memory intensive. title (str): The title to placed on the webpage task_procs (int): The number of tasks to run in parallel procs_per_task (int): The number of tiles to process in parallel catalog_delim (str): The delimited for catalog (.cat) files. Deault is whitespace. cat_wcs_fits_file (str): A fits file that has the WCS that will be used to map ra and dec coordinates from the catalog files to x and y coordinates in the map max_catalog_zoom (int): The zoom level to stop clustering on, the default is the max zoom level of the image. For images with a high source density, setting this higher than the max zoom will help with performance. tile_size (Tuple[int, int]): The tile size for the leaflet map. Currently only [256, 256] is supported. image_engine (str): The method that converts the image data into image tiles. The default is convert.IMG_ENGINE_MPL (matplotlib) the other option is convert.IMG_ENGINE_PIL (pillow). Pillow can render FITS files but doesn't do any scaling. Pillow may be more performant for only PNG images. norm_kwargs (dict): Optional normalization keyword arguments passed to `astropy.visualization.simple_norm`. The default is linear scaling using min/max values. See documentation for more information: https://docs.astropy.org/en/stable/api/astropy.visualization.mpl_normalize.simple_norm.html rows_per_column (int): If converting a catalog, the number of items in have in each column of the marker popup. By default produces all values in a single column. Setting this value can make it easier to work with catalogs that have a lot of values for each object. Returns: None """ if len(files) == 0: raise ValueError("No files provided `files` is an empty list") unlocatable_files = list(filterfalse(os.path.exists, files)) if len(unlocatable_files) > 0: raise FileNotFoundError(unlocatable_files) if not os.path.exists(out_dir): os.makedirs(out_dir) if "js" not in os.listdir(out_dir): os.mkdir(os.path.join(out_dir, "js")) if "css" not in os.listdir(out_dir): os.mkdir(os.path.join(out_dir, "css")) img_f_kwargs = dict( tile_size=tile_size, min_zoom=min_zoom, image_engine=image_engine, out_dir=out_dir, mp_procs=procs_per_task, norm_kwargs=norm_kwargs, ) img_files = filter_on_extension(files, IMG_FORMATS) img_layer_names = list(map(get_map_layer_name, img_files)) img_job_f = partial(tile_img, **img_f_kwargs) cat_files = filter_on_extension(files, CAT_FORMAT) cat_layer_names = list(map(get_map_layer_name, cat_files)) max_x, max_y = utils.peek_image_info(img_files) max_dim = max(max_x, max_y) if len(cat_files) > 0: # get highlevel image info for catalogging function max_zoom = int(np.log2(2**np.ceil(np.log2(max_dim)) / tile_size[0])) max_dim = 2**max_zoom * tile_size[0] if max_catalog_zoom == -1: max_zoom = int(np.log2(2**np.ceil(np.log2(max_dim)) / tile_size[0])) else: max_zoom = max_catalog_zoom cat_job_f = partial( tile_markers, cat_wcs_fits_file, out_dir, catalog_delim, procs_per_task, prefer_xy, min_zoom, max_zoom, tile_size[0], max_dim, max_dim, ) else: cat_job_f = None pbar_locations = count(0) img_tasks = zip(repeat(img_job_f), zip(img_files, pbar_locations)) cat_tasks = zip(repeat(cat_job_f), zip(cat_files, pbar_locations)) tasks = chain(img_tasks, cat_tasks) if task_procs: q = JoinableQueue() any(map(lambda t: q.put(t), tasks)) workers = [ Process(target=async_worker, args=[q]) for _ in range(task_procs) ] [w.start() for w in workers] # can use any-map if this returns None q.join() else: any(map(lambda func_args: func_args[0](*func_args[1]), tasks)) ns = "\n" * (next(pbar_locations) - 1) print(ns + "Building index.html") if cat_wcs_fits_file is not None: cat_wcs = WCS(cat_wcs_fits_file) else: cat_wcs = None cartographer.chart( out_dir, title, img_layer_names, cat_layer_names, cat_wcs, rows_per_column, (max_x, max_y), ) print("Done.")
class DeepZoomStaticTiler(object): """Handles generation of tiles and metadata for all images in a slide.""" def __init__(self, slidepath, basename, format, tile_size, overlap, limit_bounds, quality, workers, with_viewer): if with_viewer: # Check extra dependency before doing a bunch of work import jinja2 self._slide = open_slide(slidepath) self._basename = basename self._format = format self._tile_size = tile_size self._overlap = overlap self._limit_bounds = limit_bounds self._queue = JoinableQueue(2 * workers) self._workers = workers self._with_viewer = with_viewer self._dzi_data = {} for _i in range(workers): TileWorker(self._queue, slidepath, tile_size, overlap, limit_bounds, quality).start() def run(self): self._run_image() if self._with_viewer: for name in self._slide.associated_images: self._run_image(name) self._write_html() self._write_static() self._shutdown() def _run_image(self, associated=None): """Run a single image from self._slide.""" if associated is None: image = self._slide if self._with_viewer: basename = os.path.join(self._basename, VIEWER_SLIDE_NAME) else: basename = self._basename else: image = ImageSlide(self._slide.associated_images[associated]) basename = os.path.join(self._basename, self._slugify(associated)) dz = DeepZoomGenerator(image, self._tile_size, self._overlap, limit_bounds=self._limit_bounds) tiler = DeepZoomImageTiler(dz, basename, self._format, associated, self._queue) tiler.run() self._dzi_data[self._url_for(associated)] = tiler.get_dzi() def _url_for(self, associated): if associated is None: base = VIEWER_SLIDE_NAME else: base = self._slugify(associated) return '%s.dzi' % base def _write_html(self): import jinja2 env = jinja2.Environment(loader=jinja2.PackageLoader(__name__), autoescape=True) template = env.get_template('slide-multipane.html') associated_urls = dict((n, self._url_for(n)) for n in self._slide.associated_images) try: mpp_x = self._slide.properties[openslide.PROPERTY_NAME_MPP_X] mpp_y = self._slide.properties[openslide.PROPERTY_NAME_MPP_Y] mpp = (float(mpp_x) + float(mpp_y)) / 2 except (KeyError, ValueError): mpp = 0 # Embed the dzi metadata in the HTML to work around Chrome's # refusal to allow XmlHttpRequest from file:///, even when # the originating page is also a file:/// data = template.render(slide_url=self._url_for(None), slide_mpp=mpp, associated=associated_urls, properties=self._slide.properties, dzi_data=json.dumps(self._dzi_data)) with open(os.path.join(self._basename, 'index.html'), 'w') as fh: fh.write(data) def _write_static(self): basesrc = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'static') basedst = os.path.join(self._basename, 'static') self._copydir(basesrc, basedst) self._copydir(os.path.join(basesrc, 'images'), os.path.join(basedst, 'images')) def _copydir(self, src, dest): if not os.path.exists(dest): os.makedirs(dest) for name in os.listdir(src): srcpath = os.path.join(src, name) if os.path.isfile(srcpath): shutil.copy(srcpath, os.path.join(dest, name)) @classmethod def _slugify(cls, text): text = normalize('NFKD', text.lower()).encode('ascii', 'ignore').decode() return re.sub('[^a-z0-9]+', '_', text) def _shutdown(self): for _i in range(self._workers): self._queue.put(None) self._queue.join()
p.daemon = True p.start() span_procs[span] = p if CONFIG["report_csv"]: prepare_csv() while len(spans_to_process) > 0 or len(span_procs) > 0: s = span_done.get() span = s["span"] span_procs[s["span"]].join() log_messages.put("%s done" %s['span']) del span_procs[s["span"]] # Create a new process if needed print("still %s spans to process" %len(spans_to_process)) if len(spans_to_process) > 0: next_span = spans_to_process.pop() span_procs[next_span] = Process(target = process_span, args = (next_span, span_done, log_messages)) span_procs[next_span].daemon = True span_procs[next_span].start() print("new process on %s" %next_span) if CONFIG["report_csv"]: csv_writing(s) span_done.task_done() span_done.join() log_messages.join() logger.terminate()
writer.writerow([ 'hospitalName', 'importDate', 'device', 'project', 'markPurpose', 'collectPath', 'source' ]) hospitalName = "CS898001" importDate = str(datetime.datetime.now()).split(' ')[0] device = "CT" project = "CT_Chest" source = "-1" lists = [hospitalName, importDate, device, project, source] q = JoinableQueue() multiprocessing = [] for i in range(cpu_count() - 2): p = Process(target=Worker, args=(q, )) p.daemon = True p.start() multiprocessing.append(p) for filename in os.listdir(src_path): if filename.endswith('.csv'): continue dcm_dir = os.path.join(src_path, filename) q.put([dcm_dir, csv_path, lists]) q.join() for i in range(0, cpu_count() - 2): q.put(None) for p in multiprocessing: p.join() print('消耗的时间: ', float(time.time() - start))
def parallel(self): from multiprocessing import Process, Queue, JoinableQueue if debug: print(inspect.stack()[0][3]) self.ntrajs = [] for i in range(self.cpus): self.ntrajs.append( min(int(np.floor(float(self.ntraj) / self.cpus)), self.ntraj - sum(self.ntrajs))) cnt = sum(self.ntrajs) while cnt < self.ntraj: for i in range(self.cpus): self.ntrajs[i] += 1 cnt += 1 if (cnt >= self.ntraj): break self.ntrajs = np.array(self.ntrajs) self.ntrajs = self.ntrajs[np.where(self.ntrajs > 0)] self.nprocs = len(self.ntrajs) sols = [] processes = [] resq = JoinableQueue() resq.join() if debug: print("Number of cpus: " + str(self.cpus)) print("Trying to start " + str(self.nprocs) + " process(es).") print("Number of trajectories for each process: " + str(self.ntrajs)) for i in range(self.nprocs): p = Process(target=self.evolve_serial, args=((resq, self.ntrajs[i], i, self.seed * (i + 1)), )) p.start() processes.append(p) cnt = 0 while True: try: sols.append(resq.get()) resq.task_done() cnt += 1 if (cnt >= self.nprocs): break except KeyboardInterrupt: break except: pass resq.join() for proc in processes: try: proc.join() except KeyboardInterrupt: if debug: print("Cancel thread on keyboard interrupt") proc.terminate() proc.join() resq.close() return sols
class DAQLogger(): def __init__(self, options): self.process = None self.listener = None #self.log_queue = BufferedReadQueue() self.log_queue = JoinableQueue() self.raw_queue = JoinableQueue() settings = options.settings self.settings = {} if os.name == 'nt': self.od = "%s\\" % settings.GetStrElemVal("LogDir", 'log') elif os.name == 'posix': self.od = "%s/" % settings.GetStrElemVal("LogDir", 'log') if not os.path.exists(self.od): os.makedirs(self.od) self.settings['od'] = self.od self.settings['name'] = settings.GetStrElemVal("station_name", 'station_name') print "Station Name: %s" % self.settings['name'] self.settings['email'] = settings.GetIntElemVal("ErrorEmail", 1) print "Error email: ON" if self.settings[ 'email'] == 1 else "Error email: OFF" if self.settings['email'] == 1: self.settings['ErrorEmailFrom'] = settings.GetStrElemVal( "ErrorEmailFrom", "ErrorEmailFrom") self.settings['ErrorEmailTo'] = settings.GetStrElemVal( "ErrorEmailTo", "ErrorEmailTo") self.settings['ErrorEmailPw'] = settings.GetStrElemVal( "ErrorEmailPw", "ErrorEmailPw") print "Sending log emails from: %s" % self.settings[ 'ErrorEmailFrom'] print "Sending log emails to: %s" % self.settings["ErrorEmailTo"] self.settings['post'] = settings.GetIntElemVal("ErrorPost", 1) print "Error POST: ON" if self.settings[ 'post'] == 1 else "Error POST: OFF" # this is main logger level self.settings['loggerlevel'] = self._process_log_level( "DEBUG") # Hard code to debug to allow max flexibility #self.settings['consolelevel'] = self._process_log_level(settings.GetStrElemVal("ConsoleLevel", "TIMESTAMP")) if options.debug_on: #print "Setting console display level to 'DEBUG'" #self.settings['consolelevel'] = self._process_log_level("DEBUG") self.settings['consolelevel'] = self._process_log_level("DEBUG") else: self.settings['consolelevel'] = self._process_log_level( settings.GetStrElemVal("ConsoleLevel", "TIMESTAMP")) #print "Setting console display level to 'TIMESTAMP'" #self.settings['consolelevel'] = self._process_log_level("TIMESTAMP") #self.settings['consolelevel'] = TIMESTAMP print "Setting console display level: %s" % self._process_log_level( self.settings['consolelevel']) self.settings['logfilelevel'] = self._process_log_level( settings.GetStrElemVal("LogFileLevel", "WARNING")) print "Setting logfile level: %s" % self._process_log_level( self.settings['logfilelevel']) self.settings['postfilelevel'] = self._process_log_level( settings.GetStrElemVal("PostLevel", "WARNING")) print "Setting POST level: %s" % self._process_log_level( self.settings['postfilelevel']) self.settings['posturl'] = settings.GetStrElemVal( "LogPostUrl", "/field_sites_logs/logging.php") self.settings['postserver'] = settings.GetStrElemVal( "LogPostServer", "vlf-engineering.stanford.edu:80") # Setup basic logger. Will overwrite self.logger = logging.getLogger() self.logger.setLevel(logging.DEBUG) #print "Logger level set." self.listener = Process(target=_log_listener, args=(self.log_queue, self.raw_queue)) self.listener.daemon = True self.listener.start() self.lpid = self.listener.pid self._log("Starting Logger Listener: %d" % self.lpid) def start(self): self.process = Process(target=_log_processing, args=(self.settings, self.raw_queue)) self.process.daemon = True self.process.start() self.pid = self.process.pid self._log("Starting Logging Thread: %d" % self.pid) def stop(self): self._log("Closing down logging queue.") self.log_queue.put(None) self.raw_queue.join() self.log_queue.join() self.process.terminate() logging.shutdown() #print "Logger finished." def _process_log_level(self, level): """ if level == "DEBUG": return logging.DEBUG elif level == "INFO": return logging.INFO elif level == "WARNING": return logging.WARNING elif level == "CRITICAL": return logging.CRITICAL else: return logging.INFO """ return getLevelName(level) def _log(self, message): # Log messages within logger #print "got into log" #Add timestamp log_entry = LogRecord("LOG", logging.INFO, "", 0, message, (), None, None) self.log_queue.put(log_entry)
def main(_): parser = argparse.ArgumentParser(description='ProjE.') parser.add_argument('--data', dest='data_dir', type=str, help="Data folder", default='./data/FB15k/') parser.add_argument('--lr', dest='lr', type=float, help="Learning rate", default=0.01) parser.add_argument("--dim", dest='dim', type=int, help="Embedding dimension", default=200) parser.add_argument("--batch", dest='batch', type=int, help="Batch size", default=200) parser.add_argument("--comb", dest="combination_method", type=str, help="Combination method", default='simple') parser.add_argument("--worker", dest='n_worker', type=int, help="Evaluation worker", default=3) parser.add_argument("--generator", dest='n_generator', type=int, help="Data generator", default=10) parser.add_argument("--eval_batch", dest="eval_batch", type=int, help="Evaluation batch size", default=500) parser.add_argument("--save_dir", dest='save_dir', type=str, help="Model path", default='./ProjE_ckpt/') parser.add_argument("--load_model", dest='load_model', type=str, help="Model file", default="") parser.add_argument("--save_per", dest='save_per', type=int, help="Save per x iteration", default=10) parser.add_argument("--eval_per", dest='eval_per', type=int, help="Evaluate every x iteration", default=1) parser.add_argument("--max_iter", dest='max_iter', type=int, help="Max iteration", default=100) parser.add_argument("--summary_dir", dest='summary_dir', type=str, help="summary directory", default='./ProjE_summary/') parser.add_argument("--keep", dest='drop_out', type=float, help="Keep prob (1.0 keep all, 0. drop all)", default=0.5) parser.add_argument("--optimizer", dest='optimizer', type=str, help="Optimizer", default='adam') parser.add_argument("--prefix", dest='prefix', type=str, help="model_prefix", default='DEFAULT') parser.add_argument("--loss_weight", dest='loss_weight', type=float, help="Weight on parameter loss", default=1e-5) parser.add_argument("--neg_weight", dest='neg_weight', type=float, help="Sampling weight on negative examples", default=0.5) args = parser.parse_args() print(args) model = ProjE(args.data_dir, embed_dim=args.dim, combination_method=args.combination_method, dropout=args.drop_out, neg_weight=args.neg_weight) train_hrt_input, train_hrt_weight, train_trh_input, train_trh_weight, \ train_loss, train_op = train_ops(model, learning_rate=args.lr, optimizer_str=args.optimizer, regularizer_weight=args.loss_weight) test_input, test_head, test_tail = test_ops(model) with tf.Session() as session: # tf.initialize_all_variables().run() tf.global_variables_initializer().run() saver = tf.train.Saver() iter_offset = 0 if args.load_model is not None and os.path.exists(args.load_model): saver.restore(session, args.load_model) iter_offset = int( args.load_model.split('.')[-2].split('_')[-1]) + 1 print("Load model from %s, iteration %d restored." % (args.load_model, iter_offset)) total_inst = model.n_train # training data generator raw_training_data_queue = Queue() training_data_queue = Queue() data_generators = list() for i in range(args.n_generator): data_generators.append( Process(target=data_generator_func, args=(raw_training_data_queue, training_data_queue, model.tr_h, model.hr_t, model.n_entity, args.neg_weight))) data_generators[-1].start() evaluation_queue = JoinableQueue() result_queue = Queue() for i in range(args.n_worker): worker = Process(target=worker_func, args=(evaluation_queue, result_queue, model.hr_t, model.tr_h)) worker.start() for data_func, test_type in zip( [model.validation_data, model.testing_data], ['VALID', 'TEST']): accu_mean_rank_h = list() accu_mean_rank_t = list() accu_filtered_mean_rank_h = list() accu_filtered_mean_rank_t = list() evaluation_count = 0 for testing_data in data_func(batch_size=args.eval_batch): head_pred, tail_pred = session.run([test_head, test_tail], {test_input: testing_data}) evaluation_queue.put((testing_data, head_pred, tail_pred)) evaluation_count += 1 for i in range(args.n_worker): evaluation_queue.put(None) print("waiting for worker finishes their work") evaluation_queue.join() print("all worker stopped.") while evaluation_count > 0: evaluation_count -= 1 (mrh, fmrh), (mrt, fmrt) = result_queue.get() accu_mean_rank_h += mrh accu_mean_rank_t += mrt accu_filtered_mean_rank_h += fmrh accu_filtered_mean_rank_t += fmrt print( "[%s] INITIALIZATION [HEAD PREDICTION] MEAN RANK: %.1f FILTERED MEAN RANK %.1f HIT@10 %.3f FILTERED HIT@10 %.3f" % (test_type, np.mean(accu_mean_rank_h), np.mean(accu_filtered_mean_rank_h), np.mean(np.asarray(accu_mean_rank_h, dtype=np.int32) < 10), np.mean( np.asarray(accu_filtered_mean_rank_h, dtype=np.int32) < 10 ))) print( "[%s] INITIALIZATION [TAIL PREDICTION] MEAN RANK: %.1f FILTERED MEAN RANK %.1f HIT@10 %.3f FILTERED HIT@10 %.3f" % (test_type, np.mean(accu_mean_rank_t), np.mean(accu_filtered_mean_rank_t), np.mean(np.asarray(accu_mean_rank_t, dtype=np.int32) < 10), np.mean( np.asarray(accu_filtered_mean_rank_t, dtype=np.int32) < 10 ))) for n_iter in range(iter_offset, args.max_iter): start_time = timeit.default_timer() accu_loss = 0. accu_re_loss = 0. ninst = 0 print("initializing raw training data...") nbatches_count = 0 for dat in model.raw_training_data(batch_size=args.batch): raw_training_data_queue.put(dat) nbatches_count += 1 print("raw training data initialized.") while nbatches_count > 0: nbatches_count -= 1 hr_tlist, hr_tweight, tr_hlist, tr_hweight = training_data_queue.get( ) l, rl, _ = session.run( [train_loss, model.regularizer_loss, train_op], { train_hrt_input: hr_tlist, train_hrt_weight: hr_tweight, train_trh_input: tr_hlist, train_trh_weight: tr_hweight }) accu_loss += l accu_re_loss += rl ninst += len(hr_tlist) + len(tr_hlist) if ninst % (5000) is not None: print( '[%d sec](%d/%d) : %.2f -- loss : %.5f rloss: %.5f ' % (timeit.default_timer() - start_time, ninst, total_inst, float(ninst) / total_inst, l / (len(hr_tlist) + len(tr_hlist)), args.loss_weight * (rl / (len(hr_tlist) + len(tr_hlist)))), end='\r') print("") print("iter %d avg loss %.5f, time %.3f" % (n_iter, accu_loss / ninst, timeit.default_timer() - start_time)) if n_iter % args.save_per == 0 or n_iter == args.max_iter - 1: save_path = saver.save( session, os.path.join( args.save_dir, "ProjE_" + str(args.prefix) + "_" + str(n_iter) + ".ckpt")) print("Model saved at %s" % save_path) if n_iter % args.eval_per == 0 or n_iter == args.max_iter - 1: for data_func, test_type in zip( [model.validation_data, model.testing_data], ['VALID', 'TEST']): accu_mean_rank_h = list() accu_mean_rank_t = list() accu_filtered_mean_rank_h = list() accu_filtered_mean_rank_t = list() evaluation_count = 0 for testing_data in data_func(batch_size=args.eval_batch): head_pred, tail_pred = session.run( [test_head, test_tail], {test_input: testing_data}) evaluation_queue.put( (testing_data, head_pred, tail_pred)) evaluation_count += 1 for i in range(args.n_worker): evaluation_queue.put(None) print("waiting for worker finishes their work") evaluation_queue.join() print("all worker stopped.") while evaluation_count > 0: evaluation_count -= 1 (mrh, fmrh), (mrt, fmrt) = result_queue.get() accu_mean_rank_h += mrh accu_mean_rank_t += mrt accu_filtered_mean_rank_h += fmrh accu_filtered_mean_rank_t += fmrt print( "[%s] ITER %d [HEAD PREDICTION] MEAN RANK: %.1f FILTERED MEAN RANK %.1f HIT@10 %.3f FILTERED HIT@10 %.3f" % (test_type, n_iter, np.mean(accu_mean_rank_h), np.mean(accu_filtered_mean_rank_h), np.mean( np.asarray(accu_mean_rank_h, dtype=np.int32) < 10 ), np.mean( np.asarray(accu_filtered_mean_rank_h, dtype=np.int32) < 10))) print( "[%s] ITER %d [TAIL PREDICTION] MEAN RANK: %.1f FILTERED MEAN RANK %.1f HIT@10 %.3f FILTERED HIT@10 %.3f" % (test_type, n_iter, np.mean(accu_mean_rank_t), np.mean(accu_filtered_mean_rank_t), np.mean( np.asarray(accu_mean_rank_t, dtype=np.int32) < 10 ), np.mean( np.asarray(accu_filtered_mean_rank_t, dtype=np.int32) < 10)))
class Shredder(object): def __init__(self, work_generator, work_fn, aggregator, num_processes=0, log_level='info'): if log_level.lower() not in ['warn', 'info', 'debug']: raise Exception('invalid log level') logging.basicConfig(level=getattr(logging, log_level.upper())) self.logger = logging.getLogger('shredder') if not num_processes: self.num_processes = cpu_count() self.logger.info( "num_processes not set, " "defaulting to cpu_count %d", self.num_processes) else: self.num_processes = num_processes self.work_generator = work_generator self.work_fn = work_fn self.aggregator = aggregator self.queue = JoinableQueue() self.workers = Workers() def signal_handler(self, signum, stack_handler): self.logger.info("shutting down") signal.setitimer(signal.ITIMER_REAL, 0, 0) # clear self.workers.shutdown() sys.exit(0) def aggregate_results(self, signum, stack_handler): msgs = self.workers.read() for msg in msgs: self.aggregator(msg) def launch_workers(self): for i in range(0, self.num_processes): self.logger.info("launching worker-%d", i) worker = self.launch(i) self.workers.add(worker) def shred(self): count = 0 for chunk in self.work_generator(): if chunk is None: self.logger.warn("Got None from generator...ignoring") continue count += 1 self.queue.put(copy.deepcopy(chunk)) while self.queue.qsize() > self.num_processes: self.logger.debug( "Queue size exceeds process count, sleeping..") sleep(5) self.logger.debug("sent %d messages to the queue", count) def start(self): self.launch_workers() signal.signal(signal.SIGINT, self.signal_handler) signal.signal(signal.SIGALRM, self.aggregate_results) signal.setitimer(signal.ITIMER_REAL, 5, 5) self.shred() self.logger.info("Shredded; workers will shutdown when queue empties") self.workers.send_poison_pill(self.queue) self.queue.join() # make sure there aren't any results to aggregate before exiting self.aggregate_results(None, None) self.workers.cleanup() self.logger.info("Done") def launch(self, name): """ Start a new Worker process that will consume work from the queue. """ parent_pipe, child_pipe = Pipe() process = Process(target=Worker.start, args=(name, self.queue, child_pipe, self.work_fn)) process.start() worker = WorkerContext(name, process, parent_pipe) return worker
def cleanup(days, project, concurrency, silent, model, router, timed): """Delete a portion of trailing data based on creation date. All data that is older than `--days` will be deleted. The default for this is 30 days. In the default setting all projects will be truncated but if you have a specific project you want to limit this to this can be done with the `--project` flag which accepts a project ID or a string with the form `org/project` where both are slugs. """ if concurrency < 1: click.echo('Error: Minimum concurrency is 1', err=True) raise click.Abort() # Make sure we fork off multiprocessing pool # before we import or configure the app from multiprocessing import Process, JoinableQueue as Queue pool = [] task_queue = Queue(1000) for _ in xrange(concurrency): p = Process(target=multiprocess_worker, args=(task_queue,)) p.daemon = True p.start() pool.append(p) from sentry.runner import configure configure() from django.db import router as db_router from sentry.app import nodestore from sentry.db.deletion import BulkDeleteQuery from sentry import models if timed: import time from sentry.utils import metrics start_time = time.time() # list of models which this query is restricted to model_list = {m.lower() for m in model} def is_filtered(model): if router is not None and db_router.db_for_write(model) != router: return True if not model_list: return False return model.__name__.lower() not in model_list # Deletions that use `BulkDeleteQuery` (and don't need to worry about child relations) # (model, datetime_field, order_by) BULK_QUERY_DELETES = [ (models.EventMapping, 'date_added', '-date_added'), (models.EventAttachment, 'date_added', None), (models.UserReport, 'date_added', None), (models.GroupEmailThread, 'date', None), (models.GroupRuleStatus, 'date_added', None), ] + EXTRA_BULK_QUERY_DELETES # Deletions that use the `deletions` code path (which handles their child relations) # (model, datetime_field, order_by) DELETES = ( (models.Event, 'datetime', 'datetime'), (models.Group, 'last_seen', 'last_seen'), ) if not silent: click.echo('Removing expired values for LostPasswordHash') if is_filtered(models.LostPasswordHash): if not silent: click.echo('>> Skipping LostPasswordHash') else: models.LostPasswordHash.objects.filter( date_added__lte=timezone.now() - timedelta(hours=48) ).delete() for model in [models.ApiGrant, models.ApiToken]: if not silent: click.echo(u'Removing expired values for {}'.format(model.__name__)) if is_filtered(model): if not silent: click.echo(u'>> Skipping {}'.format(model.__name__)) else: model.objects.filter(expires_at__lt=timezone.now()).delete() project_id = None if project: click.echo( "Bulk NodeStore deletion not available for project selection", err=True) project_id = get_project(project) if project_id is None: click.echo('Error: Project not found', err=True) raise click.Abort() else: if not silent: click.echo("Removing old NodeStore values") cutoff = timezone.now() - timedelta(days=days) try: nodestore.cleanup(cutoff) except NotImplementedError: click.echo( "NodeStore backend does not support cleanup operation", err=True) for bqd in BULK_QUERY_DELETES: if len(bqd) == 4: model, dtfield, order_by, chunk_size = bqd else: chunk_size = 10000 model, dtfield, order_by = bqd if not silent: click.echo( u"Removing {model} for days={days} project={project}".format( model=model.__name__, days=days, project=project or '*', ) ) if is_filtered(model): if not silent: click.echo('>> Skipping %s' % model.__name__) else: BulkDeleteQuery( model=model, dtfield=dtfield, days=days, project_id=project_id, order_by=order_by, ).execute(chunk_size=chunk_size) for model, dtfield, order_by in DELETES: if not silent: click.echo( u"Removing {model} for days={days} project={project}".format( model=model.__name__, days=days, project=project or '*', ) ) if is_filtered(model): if not silent: click.echo('>> Skipping %s' % model.__name__) else: imp = '.'.join((model.__module__, model.__name__)) q = BulkDeleteQuery( model=model, dtfield=dtfield, days=days, project_id=project_id, order_by=order_by, ) for chunk in q.iterator(chunk_size=100): task_queue.put((imp, chunk)) task_queue.join() # Clean up FileBlob instances which are no longer used and aren't super # recent (as there could be a race between blob creation and reference) if not silent: click.echo("Cleaning up unused FileBlob references") if is_filtered(models.FileBlob): if not silent: click.echo('>> Skipping FileBlob') else: cleanup_unused_files(silent) # Shut down our pool for _ in pool: task_queue.put(_STOP_WORKER) # And wait for it to drain for p in pool: p.join() if timed: duration = int(time.time() - start_time) metrics.timing('cleanup.duration', duration, instance=router) click.echo("Clean up took %s second(s)." % duration)
print('%s吃了%s' % (name, food)) q.task_done() # 告诉队列你已经取出一个数据完毕了 if __name__ == '__main__': # q = Queue() q = JoinableQueue() p1 = Process(target=producer, args=("大厨egon", "包子", q)) p2 = Process(target=producer, args=('tank', '馒头', q)) c1 = Process(target=consumer, args=("春哥", q)) c2 = Process(target=consumer, args=("牛哥", q)) p1.start() p2.start() # 守护进程,当队列取完内部计数器为0时就结束 c1.daemon = True c2.daemon = True c1.start() c2.start() p1.join() p2.join() # 等待生产者生产完毕之后,往队列中添加特点的结束符号 # q.put(None) q.join() # 等待队列中所有的数据取完再执行代码 """ JoinableQueue 每当我往队列中存数据,内部计数器会+1 取出数据时,内部计数器会-1 q.join() 当计数器为0的时候,才往后运行 """
in_queue = JoinableQueue() out_queue = Queue() for _ in range(num_workers): p = Process(target=worker, args=(in_queue, out_queue,)) p.daemon = True p.start() p_consume = Process(target=consumer, args=(out_queue,)) p_consume.daemon = True p_consume.start() lags_to_try = [i for i in range(1, 9, 2)] num_series = train_data.shape[0] print("Setting input queue...") print("data enqued: 0.00%", flush=True) for index in range(num_series): series = train_data.iloc[index, :] in_queue.put((lags_to_try, series)) if (index+1) % 10000 == 0: print("data enqued: {:.2f}%".format(100.0*(index+1)/num_series), flush=True) print("data enqued: 100.00%", flush=True) in_queue.close() in_queue.join() print("Input queue joined", flush=True) out_queue.put(None) p_consume.join() print("consumer joined", flush=True) print("All tasks done") sys.exit(0)
def score_mp(iterable, function, num_procs, call_back, stop_check, debug=False, chunk_size=500): job_queue = JoinableQueue(100) scored_queue = Queue() stopped = Stopped() done = False while not done: chunk = [] while len(chunk) < chunk_size: try: item = next(iterable) except StopIteration: done = True break chunk.append(item) try: job_queue.put(chunk, False) except Full: break procs = [] counter = Counter() for i in range(num_procs): p = ScoreWorker(job_queue, scored_queue, function, counter, stopped) procs.append(p) p.start() val = 0 done = False while not done: if stop_check is None: break if stop_check is not None and stop_check(): stopped.stop() time.sleep(1) break chunk = [] while len(chunk) < chunk_size: try: item = next(iterable) except StopIteration: done = True break chunk.append(item) job_queue.put(chunk) if call_back is not None: value = counter.value() call_back(value) job_queue.join() time.sleep(2) if debug: print('queueadder joined!') return_list = list() error = None while True: try: l = scored_queue.get(timeout=2) except Empty: break if isinstance(l, Exception): error = l else: return_list.extend(l) if debug: print('emptied result queue') for p in procs: p.join() if error is not None: raise (error) if debug: print('joined') print(len(return_list)) return return_list
def IFN_2Dscan(modelfile, param1, param2, t_list, spec, custom_params=False, cpu=None, doseNorm=1, suppress=False, verbose=1): # initialization jobs = Queue() result = JoinableQueue() if cpu == None or cpu >= cpu_count(): NUMBER_OF_PROCESSES = cpu_count()-1 else: NUMBER_OF_PROCESSES = cpu if verbose != 0: print("Using {} threads".format(NUMBER_OF_PROCESSES)) # build task list params=[] if verbose != 0: print("Building tasks") if type(custom_params) == list: for val1 in param1[1]: for val2 in param2[1]: params.append([[param1[0],val1],[param2[0],val2]]+[c for c in custom_params]) else: for val1 in param1[1]: for val2 in param2[1]: params.append([[param1[0],val1],[param2[0],val2]]) # Write modelfile imported_model = __import__(modelfile,fromlist=['ifnmodels']) py_output = export(imported_model.model, 'python') with open('ODE_system.py','w') as f: f.write(py_output) tasks = [[modelfile, t_list, spec, p] for p in params] # put jobs on the queue if verbose != 0: print("There are {} tasks to compute".format(len(params))) if verbose != 0: print("Putting tasks on the queue") for w in tasks: jobs.put(w) if verbose != 0: print("Computing scan") # start up the workers [Process(target=IFN_2Dscan_helper, args=(i, jobs, result)).start() for i in range(NUMBER_OF_PROCESSES)] # pull in the results from each worker pool_results=[] for t in range(len(tasks)): r = result.get() pool_results.append(r) result.task_done() # tell the workers there are no more jobs for w in range(NUMBER_OF_PROCESSES): jobs.put(None) # close all extra threads result.join() jobs.close() result.close() if verbose != 0: print("Done scan") response_image = image_builder(pool_results, doseNorm, (len(param1[1]),len(param2[1]))) # plot heatmap if suppress==False if suppress==False: IFN_heatmap(response_image, "response image - {}".format(param1[0]), param2[0]) #return the scan return response_image
def test_model(): flags.entTotal = len(data_loader.ent_dict) flags.relTotal = len(data_loader.rel_dict) test = Test(test_data_size=len(data_loader.test_data_list), loader=data_loader) evaluation_queue = JoinableQueue() result_queue = Queue() for i in range(flags.num_worker): worker = Process(target=worker_func, args=(evaluation_queue, result_queue, test)) worker.start() print("work %d start!" % i) with tf.Graph().as_default(): with tf.Session() as sess: if flags.model == "dkrl": model = DKRL(flags, data_loader.lengths, data_loader.vocab2id, is_training=False, desciption_data=data_loader.get_all_description(), lengths=data_loader.get_all_content_len()) elif flags.model == "transE": model = TransE(flags) saver = tf.train.Saver(max_to_keep=flags.num_checkpoint) # saver.restore(sess,"./res-dkrl/400-model.tf") saver.restore(sess, "./res-transe/500-model.tf") if flags.model == "dkrl": sess.run([model.get_ent_cnn_embedding()]) l_raw_mean_rank = float(0.0) l_filter_mean_rank = float(0.0) l_raw_hit_10 = float(0.0) l_filter_hit_10 = float(0.0) r_raw_mean_rank = 0.0 r_filter_mean_rank = 0.0 r_raw_hit_10 = 0.0 r_filter_hit_10 = 0.0 def test_step(h, r, t, test_words, content_len): feedDict = { model.test_h: h, model.test_r: r, model.test_t: t # model.test_h_words:test_words[0], # model.test_t_words:test_words[1], # model.test_h_content_len: content_len[0], # model.test_t_content_len: content_len[1] } res = sess.run([model.predict], feed_dict=feedDict) return res evaluation_count = 0 for data in data_loader.get_predict_instance(): if test.index == len(data_loader.test_data_list): break temp_h, temp_t, item, head_words, tail_words, head_content_len, tail_content_len = data predict_h = test_step(temp_h[:, 0], temp_h[:, 2], temp_h[:, 1], head_words, head_content_len) predict_t = test_step(temp_t[:, 0], temp_t[:, 2], temp_t[:, 1], tail_words, tail_content_len) evaluation_queue.put( (predict_h[0], temp_h, predict_t[0], temp_t, item)) evaluation_count += 1 # test.test_head(predict_h[0], temp_h, item) # test.test_tail(predict_t[0], temp_t, item) # test.index += 1 # test.Print() for i in range(flags.num_worker): evaluation_queue.put(None) print("waiting for worker finishes their work") evaluation_queue.join() print("all worker stopped.") index = evaluation_count while evaluation_count > 0: evaluation_count = evaluation_count - 1 l, r = result_queue.get() l_r_rank, l_f_rank, l_r_hit_10, l_f_hit_10 = l r_r_rank, r_f_rank, r_r_hit_10, r_f_hit_10 = r l_raw_mean_rank += l_r_rank r_raw_mean_rank += r_r_rank l_filter_mean_rank += l_f_rank r_filter_mean_rank += r_f_rank l_raw_hit_10 += l_r_hit_10 r_raw_hit_10 += r_r_hit_10 l_filter_hit_10 += l_f_hit_10 r_filter_hit_10 += r_f_hit_10 print( "l_raw_mean_rank {:.3f} l_filter_mean_rank {:.3f} l_raw_hit_10 {:.5f} l_filter_hit_10 {:.5f}" .format(l_raw_mean_rank / index, l_filter_mean_rank / index, l_raw_hit_10 / index, l_filter_hit_10 / index)) print( "r_raw_mean_rank {:.3f} r_filter_mean_rank {:.3f} r_raw_hit_10 {:.5f} r_filter_hit_10 {:.5f}" .format(r_raw_mean_rank / index, r_filter_mean_rank / index, r_raw_hit_10 / index, r_filter_hit_10 / index))
class ParallelSampler(): """ Generate rollouts by N parallel Sampler Processes 1. Sampler process runs async, generates rollouts by exec policy on each own random seeded Env 2. Sampler writes rollouts to Queue to communicate with Agent process 3. Sampler reads policy weights from Queue updated by Agent Process """ def __init__(self, n_sampler, env_name, policy, max_step=1000, batch_size=10000, animate=False): """ n_sampler: num of sampler processes to generate rollouts env_name: ai gym environment, e.g. 'HalfCheetah-V2' policy: policy object with ops max_step: maximum sample size of a episode batch_size: number of steps in a training batch animate: boolean, True uses env.render() method to animate episode """ # Queue and Event self.tasks = JoinableQueue() self.results = Queue() self.weights_ready_event = Event() self.policy = policy self.n_sampler = n_sampler self.batch_size = batch_size self.clear_rollouts() self.samplers = [] #TODO, add monitor in one process for sid in range(self.n_sampler): self.samplers.append( Sampler(sid, self.tasks, self.results, self.weights_ready_event, env_name, policy, max_step, batch_size, animate)) for sampler in self.samplers: # each sampler start running async sampler.start() def set_policy_weights(self, weights): """ save policy weights to tasks Queue, signal each sampler process to assign the weights """ self.weights_ready_event.clear() for i in range(self.n_sampler): self.tasks.put(weights) self.tasks.join() self.weights_ready_event.set() def gen_rollouts(self): """ N Sampler Processes to generate rollouts in parallel, for efficiency, each Sampler collects one rollout (episode) """ start = time.time() for i in range(self.n_sampler): # task to collect experience for each sampler self.tasks.put(1) # wait for experience collection tasks finishing self.tasks.join() self.clear_rollouts() print('reading result') total_steps = 0 for i in range(self.n_sampler): res = self.results.get() total_steps += self.add_rollouts(res) #print('running {} min to collect total steps {}'.format( #(time.time() - start) / 60.0, total_steps)) return self.rollouts def clear_rollouts(self): self.rollouts = [] def add_rollouts(self, to_add): """ add rollouts: self.rollouts += to_add self.rollouts: list of rollout (map) rollout: map of {'observers' : NumPy array of states from episode 'actions' : NumPy array of actions from episode 'rewards' : NumPy array of (un-discounted) rewards from episode 'unscaled_obs' : NumPy array of (un-discounted) rewards from episode } input: to_add: list of rollout output: n_steps in to_add rollouts """ n_steps = 0 for rollout in to_add: self.rollouts.append(rollout) n_steps += rollout['observes'].shape[0] return n_steps def exit(self): """ task: sampler finish """ for i in range(self.n_sampler): self.tasks.put(2)
from multiprocessing import Process, JoinableQueue import time queue = JoinableQueue() def put(): for i in range(10): print('添加元素: {}'.format(i)) queue.put(i) def get(): while True: time.sleep(0.1) print('获取元素: {}'.format(queue.get())) queue.task_done() p1 = Process(target=put) p1.daemon = True p1.start() p2 = Process(target=get) p2.daemon = True p2.start() time.sleep(0.5) # 由于进程启动比较慢, 所以导致字进程任务还没有执行, 就来到这里了, 程序就提前结束了,解决方案: 稍微等待 queue.join() # 让主进程等待队列任务完成
# Push output df to write queue write_queue.put((os.path.join(output_dir, level + "_quantiles.csv"), q_df)) pool = Pool(processes=args.nprocs) for _ in tqdm.tqdm( pool.imap_unordered(_process_date, dates), total=len(dates), desc="Postprocessing dates", dynamic_ncols=True, ): pass pool.close() pool.join() # wait until everything is done to_write.join() # wait until queue is empty to_write.put(None) # send signal to term loop to_write.join() # wait until write_thread handles it write_thread.join() # join the write_thread # sort output csvs if not args.no_sort: for a_level in args.levels: filename = os.path.join(output_dir, a_level + "_quantiles.csv") logging.info("Sorting output file " + filename + "...") out_df = pd.read_csv(filename) # TODO we can avoid having to set index here once readable_column names is complete # set index and sort them out_df = out_df.set_index([a_level, "date", "quantile"]).sort_index() # sort columns alphabetically
class MCTSParallelPlayer(Agent): def __init__(self, env, sim_count=16, trade_off=3, neighbor_range=1, process_num=0): self.sim_count = sim_count self.neighborhood = lambda loc: itertools.product( range(loc[0] - neighbor_range, loc[0] + neighbor_range + 1), range(loc[1] - neighbor_range, loc[1] + neighbor_range + 1)) self.env = env self.trade_off = trade_off self.process_num = process_num if process_num else cpu_count() self.parallel() self.reset() def parallel(self): self.task_queue = JoinableQueue(1) self.result_queue = JoinableQueue(self.process_num) self.workers = [ Process(target=self.rollout_worker, args=(self.env, self.task_queue, self.result_queue)) for _ in range(self.process_num) ] for worker in self.workers: worker.start() self.update_thread = Thread(target=self.update_statics) self.update_thread.start() def reset(self): self.count = KeyHashDefaultDict(int) self.uob = KeyHashDefaultDict(int) self.win = KeyHashDefaultDict(int) self.child_info = KeyHashDefaultDict(list) self.father_info = KeyHashDefaultDict() def get_winner(self, state): return self.env.get_winner(state) def hash_convert(self, state): board, player = state return (strfboard(board), player) def simulation(self, state): leaf_state = self.select(state) if self.get_winner(leaf_state) != None: self.back_propagate(leaf_state, self.get_winner(leaf_state)) else: if self.count[leaf_state] + self.uob[leaf_state] == 0: self.rollout(leaf_state) else: self.expand(leaf_state) next_state = random.choice(self.child_info[leaf_state]) if self.get_winner(next_state) != None: self.back_propagate(next_state, self.get_winner(next_state)) else: self.rollout(next_state) def select(self, state): cur_state = state while cur_state in self.child_info: self.uob[cur_state] += 1 childs = self.child_info[cur_state] unexplored_childs = [ child for child in childs if self.count[child] + self.uob[child] == 0 ] if unexplored_childs: cur_state = random.choice(unexplored_childs) else: ucbs = [ self.win[next_state] / (self.count[next_state] + epision) + np.sqrt(self.trade_off * np.log(self.count[cur_state] + self.uob[cur_state]) / (self.count[next_state] + self.uob[next_state])) for next_state in childs ] max_ucb = max(ucbs) best_childs = [ child for child, ucb in zip(childs, ucbs) if ucb == max_ucb ] cur_state = random.choice(best_childs) return cur_state def expand(self, state): board, player = state cur_state = state self.uob[cur_state] += 1 if np.any(board != EMPTY): neighbor_valid_actions = reduce(operator.concat, [[ act for act in self.neighborhood(action) if 0 <= act[0] < board.shape[0] and 0 <= act[1] < board.shape[1] and board[act] == EMPTY ] for action in np.argwhere(board != EMPTY)]) else: neighbor_valid_actions = [ (np.array(board.shape) / 2).astype(np.int8) ] for action in neighbor_valid_actions: next_state, winner, done, _ = self.env.next_step(cur_state, action) if done: self.child_info[cur_state] = [next_state] self.father_info[next_state] = cur_state break self.child_info[cur_state].append(next_state) self.father_info[next_state] = cur_state def expand_worker(self, state): pass def rollout(self, state): self.uob[state] += 1 pre_actions = [] cur_state = state while self.father_info[cur_state]: pre_actions.append( np.argwhere( self.father_info[cur_state][0] - cur_state[0] != EMPTY)[0]) cur_state = self.father_info[cur_state] self.task_queue.put( { "mode": "rollout", "node": state, "pre_actions": pre_actions }, block=True) @staticmethod def rollout_worker(env, task_q, result_q): trans = Transfer() while True: data = task_q.get(True) state = data["node"] pre_actions = data["pre_actions"] board, player = state while True: vaild_actions = list(zip(*np.where(board == 0))) if vaild_actions: # random_action = random.choice(vaild_actions) action = trans.decide(pre_actions) (board, player), winner, done, info = env.next_step( (board, player), action) else: result_q.put((state, random.choice([WHITE, BLACK])), block=True) task_q.task_done() break if done: result_q.put((state, winner), block=True) task_q.task_done() break def back_propagate(self, state, winner): cur_state = state while cur_state != None: self.uob[cur_state] -= 1 self.count[cur_state] += 1 if winner == cur_state[1]: self.win[cur_state] += 1 cur_state = self.father_info[cur_state] def update_statics(self): while True: data = self.result_queue.get(True) self.back_propagate(*data) self.result_queue.task_done() def closeout(self): self.task_queue.join() self.result_queue.join() def decide(self, state): board, player = state self.father_info[state] = None # while self.count[state] < self.sim_count: for i in range(self.sim_count): self.simulation(state) self.closeout() childs = self.child_info[state] win_rates = [ self.win[next_state] / (self.count[next_state] + epision) for next_state in childs ] max_win_rate = max(win_rates) best_childs = [ child for child, win_rate in zip(childs, win_rates) if win_rate == max_win_rate ] best_child = random.choice(best_childs) return np.argwhere(best_child[0] - board)[0]
def _parallelly_make_dataset(self): import multiprocessing from multiprocessing import Process from multiprocessing import JoinableQueue as Queue name_file = '{}/video_list.npy'.format(self.loc) len_file = '{}/video_lengths.npy'.format(self.loc) if isfile(name_file): video_list = np.load(name_file) video_lengths = np.load(len_file) return video_list, video_lengths q = Queue() qvideo_list = Queue() fnames_list = [] for root, _, fnames in tqdm(os.walk(self.root)): for fname in sorted(fnames): fnames_list.append(os.path.join(root, fname)) def parallel_worker(fnames_chunk): item = q.get() for fname in tqdm(fnames_chunk): if has_file_allowed_extension(fname, VIDEO_EXTENSION): video_path = fname vc = cv2.VideoCapture(video_path) length = int(vc.get(cv2.CAP_PROP_FRAME_COUNT)) if length > 0 and vc.isOpened(): qvideo_list.put((video_path, length)) qvideo_list.task_done() vc.release() q.task_done() processes = 32 n = len(fnames_list) chunk = int(n / processes) if chunk == 0: chunk = 1 fnames_chunks = [fnames_list[i*chunk:(i+1)*chunk] \ for i in range((n + chunk - 1) // chunk)] for i in range(processes): q.put(i) multiprocessing.Process(target=parallel_worker, args=(fnames_chunks[i], )).start() q.join() qvideo_list.join() video_list = [] video_lengths = [] while qvideo_list.qsize() != 0: video, length = qvideo_list.get() video_list.append(video) video_lengths.append(length) np.save(name_file, video_list) np.save(len_file, video_lengths) return video_list, video_lengths