def run_one_epoch(PBT_params, PBT_params_worker, num_workers, config_train, config_test, worst_20perc_workers, best_20perc_workers, epoch_loop_num, num_worker_loops): # initiate some fields in PBT_params dictionary for param, value in PBT_params.items(): PBT_params_worker['mutation_{}'.format( param)] = (-1) * np.ones(num_workers) PBT_params_worker['copied_from_w'] = (-1) * np.ones(num_workers) num_workers_paral = int(config_train['num_workers_paral']) gpus_list = list(map(int, config_train['gpus'].split(','))) gpus_list_paral = gpus_list * int( math.ceil(num_workers_paral / float(len(gpus_list)))) w = 0 for worker_loop_num in range(num_worker_loops): for worker_paral in range(num_workers_paral): print( '\nepoch_loop_num = {}, w = {}, worker_loop_num = {}, worker_paral = {}' .format(epoch_loop_num, w, worker_loop_num, worker_paral)) if epoch_loop_num == 0: # rand initial params for param, value in PBT_params.items(): assert len(value) > 1 if len(value) == 2: val = np.random.uniform(value[0], value[1]) else: idx = np.random.randint(0, len(value)) val = value[idx] config_train[param] = val PBT_params_worker[str(param)][w] = val else: # copy all checkpoints ckpt_dir = os.path.join(config_train['train_dir'], 'ckpts') for filename in os.listdir(ckpt_dir): if filename.startswith('latest_'): src = os.path.join(ckpt_dir, filename) ww = (filename.split('.')[0]).split('_')[-1] dest = os.path.join(ckpt_dir, 'latestcopy_{}.pth'.format(ww)) if os.path.isfile(dest): os.remove(dest) shutil.copy(src, dest) # truncation selection (copy weights and params of best 20% workers randomly to worst 20% workers) assert best_20perc_workers != None and worst_20perc_workers != None if w in worst_20perc_workers: rand_idx = np.random.randint(0, len(best_20perc_workers)) worker_to_copy_from = best_20perc_workers[rand_idx] PBT_params_worker['copied_from_w'][w] = worker_to_copy_from print( 'w = {}, best_20perc_workers = {}, worker_to_copy_from = {}' .format(w, best_20perc_workers, worker_to_copy_from)) mutation = [ float(val) for val in config_train['mutation'].split(',') ] for param, value in PBT_params.items(): rand_idx = np.random.randint(0, len(mutation)) mutation_rand = mutation[rand_idx] val = float( PBT_params_worker[str(param)][worker_to_copy_from] * mutation_rand) if param == 'keep_prob': val = min(val, 1.0) val = max(val, 0.0) config_train[param] = val # print 'worst20%: w = {}, param = {}, value = {}'.format(w, param, val) PBT_params_worker[str(param)][w] = val PBT_params_worker['mutation_{}'.format( param)][w] = mutation_rand # resume training from the ckpt of the best worker config_train['load_ckpt'] = 'latest_{}'.format( worker_to_copy_from) else: # resume training from previous ckpt config_train['load_ckpt'] = 'latest_{}'.format(w) # add current epoch number to config config_train[ 'n_epoch'] = epoch_loop_num * config_train['num_epochs'] # start train process config_train['worker_num'] = w print('started training worker {}'.format(w)) set_cuda_visible_gpus(config_train, 'train', one_gpu_str=str( gpus_list_paral[worker_paral])) train_process = multiprocessing.Process( target=Train.train_builder, args=(Namespace(**config_train), ), name='train_{}'.format(w)) train_process.start() w += 1 train_process.join() print('worker loop {} for epoch_loop {} finished'.format( worker_loop_num, epoch_loop_num)) sleep(5) # run parallel tests for all workers w = 0 for worker_loop_num in range(num_worker_loops): for worker_paral in range(num_workers_paral): print('started test worker {}'.format(w)) config_test['worker_num'] = w set_cuda_visible_gpus(config_test, 'test', one_gpu_str=str( gpus_list_paral[worker_paral])) test_process = multiprocessing.Process( target=Test.test_builder, args=(Namespace(**config_test), ), name='test_{}'.format(w)) test_process.start() w += 1 test_process.join() print('test for epoch_loop {} finished'.format(epoch_loop_num)) sleep(5)
def run(args, logger): init_time_start = time.time() # load dataset and samplers dataset = get_dataset(args.data_path, args.dataset, args.format, args.data_files) if args.neg_sample_size_eval < 0: args.neg_sample_size_eval = dataset.n_entities args.batch_size = get_compatible_batch_size(args.batch_size, args.neg_sample_size) args.batch_size_eval = get_compatible_batch_size(args.batch_size_eval, args.neg_sample_size_eval) args.eval_filter = not args.no_eval_filter if args.neg_deg_sample_eval: assert not args.eval_filter, "if negative sampling based on degree, we can't filter positive edges." train_data = TrainDataset(dataset, args, ranks=args.num_proc) # if there is no cross partition relaiton, we fall back to strict_rel_part args.strict_rel_part = args.mix_cpu_gpu and (train_data.cross_part == False) args.soft_rel_part = args.mix_cpu_gpu and args.soft_rel_part and train_data.cross_part args.num_workers = 8 # fix num_worker to 8 if args.num_proc > 1: train_samplers = [] for i in range(args.num_proc): train_sampler_head = train_data.create_sampler(args.batch_size, args.neg_sample_size, args.neg_sample_size, mode='head', num_workers=args.num_workers, shuffle=True, exclude_positive=False, rank=i) train_sampler_tail = train_data.create_sampler(args.batch_size, args.neg_sample_size, args.neg_sample_size, mode='tail', num_workers=args.num_workers, shuffle=True, exclude_positive=False, rank=i) train_samplers.append(NewBidirectionalOneShotIterator(train_sampler_head, train_sampler_tail, args.neg_sample_size, args.neg_sample_size, True, dataset.n_entities)) train_sampler = NewBidirectionalOneShotIterator(train_sampler_head, train_sampler_tail, args.neg_sample_size, args.neg_sample_size, True, dataset.n_entities) else: # This is used for debug train_sampler_head = train_data.create_sampler(args.batch_size, args.neg_sample_size, args.neg_sample_size, mode='head', num_workers=args.num_workers, shuffle=True, exclude_positive=False) train_sampler_tail = train_data.create_sampler(args.batch_size, args.neg_sample_size, args.neg_sample_size, mode='tail', num_workers=args.num_workers, shuffle=True, exclude_positive=False) train_sampler = NewBidirectionalOneShotIterator(train_sampler_head, train_sampler_tail, args.neg_sample_size, args.neg_sample_size, True, dataset.n_entities) if args.valid or args.test: if len(args.gpu) > 1: args.num_test_proc = args.num_proc if args.num_proc < len(args.gpu) else len(args.gpu) else: args.num_test_proc = args.num_proc eval_dataset = EvalDataset(dataset, args) if args.valid: if args.num_proc > 1: valid_sampler_heads = [] valid_sampler_tails = [] for i in range(args.num_proc): valid_sampler_head = eval_dataset.create_sampler('valid', args.batch_size_eval, args.neg_sample_size_eval, args.neg_sample_size_eval, args.eval_filter, mode='chunk-head', num_workers=args.num_workers, rank=i, ranks=args.num_proc) valid_sampler_tail = eval_dataset.create_sampler('valid', args.batch_size_eval, args.neg_sample_size_eval, args.neg_sample_size_eval, args.eval_filter, mode='chunk-tail', num_workers=args.num_workers, rank=i, ranks=args.num_proc) valid_sampler_heads.append(valid_sampler_head) valid_sampler_tails.append(valid_sampler_tail) else: # This is used for debug valid_sampler_head = eval_dataset.create_sampler('valid', args.batch_size_eval, args.neg_sample_size_eval, args.neg_sample_size_eval, args.eval_filter, mode='chunk-head', num_workers=args.num_workers, rank=0, ranks=1) valid_sampler_tail = eval_dataset.create_sampler('valid', args.batch_size_eval, args.neg_sample_size_eval, args.neg_sample_size_eval, args.eval_filter, mode='chunk-tail', num_workers=args.num_workers, rank=0, ranks=1) if args.test: if args.num_test_proc > 1: test_sampler_tails = [] test_sampler_heads = [] for i in range(args.num_test_proc): test_sampler_head = eval_dataset.create_sampler('test', args.batch_size_eval, args.neg_sample_size_eval, args.neg_sample_size_eval, args.eval_filter, mode='chunk-head', num_workers=args.num_workers, rank=i, ranks=args.num_test_proc) test_sampler_tail = eval_dataset.create_sampler('test', args.batch_size_eval, args.neg_sample_size_eval, args.neg_sample_size_eval, args.eval_filter, mode='chunk-tail', num_workers=args.num_workers, rank=i, ranks=args.num_test_proc) test_sampler_heads.append(test_sampler_head) test_sampler_tails.append(test_sampler_tail) else: test_sampler_head = eval_dataset.create_sampler('test', args.batch_size_eval, args.neg_sample_size_eval, args.neg_sample_size_eval, args.eval_filter, mode='chunk-head', num_workers=args.num_workers, rank=0, ranks=1) test_sampler_tail = eval_dataset.create_sampler('test', args.batch_size_eval, args.neg_sample_size_eval, args.neg_sample_size_eval, args.eval_filter, mode='chunk-tail', num_workers=args.num_workers, rank=0, ranks=1) # load model model = load_model(logger, args, dataset.n_entities, dataset.n_relations) if args.num_proc > 1 or args.async_update: model.share_memory() # We need to free all memory referenced by dataset. eval_dataset = None dataset = None print('Total initialize time {:.3f} seconds'.format(time.time() - init_time_start)) # train start = time.time() rel_parts = train_data.rel_parts if args.strict_rel_part or args.soft_rel_part else None cross_rels = train_data.cross_rels if args.soft_rel_part else None if args.num_proc > 1: procs = [] barrier = mp.Barrier(args.num_proc) for i in range(args.num_proc): valid_sampler = [valid_sampler_heads[i], valid_sampler_tails[i]] if args.valid else None proc = mp.Process(target=train_mp, args=(args, model, train_samplers[i], valid_sampler, i, rel_parts, cross_rels, barrier)) procs.append(proc) proc.start() for proc in procs: proc.join() else: valid_samplers = [valid_sampler_head, valid_sampler_tail] if args.valid else None train(args, model, train_sampler, valid_samplers, rel_parts=rel_parts) print('training takes {} seconds'.format(time.time() - start)) if args.save_emb is not None: if not os.path.exists(args.save_emb): os.mkdir(args.save_emb) model.save_emb(args.save_emb, args.dataset) # We need to save the model configurations as well. conf_file = os.path.join(args.save_emb, 'config.json') with open(conf_file, 'w') as outfile: json.dump({'dataset': args.dataset, 'model': args.model_name, 'emb_size': args.hidden_dim, 'max_train_step': args.max_step, 'batch_size': args.batch_size, 'neg_sample_size': args.neg_sample_size, 'lr': args.lr, 'gamma': args.gamma, 'double_ent': args.double_ent, 'double_rel': args.double_rel, 'neg_adversarial_sampling': args.neg_adversarial_sampling, 'adversarial_temperature': args.adversarial_temperature, 'regularization_coef': args.regularization_coef, 'regularization_norm': args.regularization_norm}, outfile, indent=4) # test if args.test: start = time.time() if args.num_test_proc > 1: queue = mp.Queue(args.num_test_proc) procs = [] for i in range(args.num_test_proc): proc = mp.Process(target=test_mp, args=(args, model, [test_sampler_heads[i], test_sampler_tails[i]], i, 'Test', queue)) procs.append(proc) proc.start() total_metrics = {} metrics = {} logs = [] for i in range(args.num_test_proc): log = queue.get() logs = logs + log for metric in logs[0].keys(): metrics[metric] = sum([log[metric] for log in logs]) / len(logs) for k, v in metrics.items(): print('Test average {} : {}'.format(k, v)) for proc in procs: proc.join() else: test(args, model, [test_sampler_head, test_sampler_tail]) print('testing takes {:.3f} seconds'.format(time.time() - start))
def run_test(self, test_factory, queue, summary, job_deadline=0): """ Run a test instance inside a subprocess. :param test_factory: Test factory (test class and parameters). :type test_factory: tuple of :class:`avocado.core.test.Test` and dict. :param queue: Multiprocess queue. :type queue: :class`multiprocessing.Queue` instance. :param summary: Contains types of test failures. :type summary: set. :param job_deadline: Maximum time to execute. :type job_deadline: int. """ proc = None sigtstp = multiprocessing.Lock() def sigtstp_handler(signum, frame): # pylint: disable=W0613 """ SIGSTOP all test processes on SIGTSTP """ if not proc: # Ignore ctrl+z when proc not yet started return with sigtstp: msg = "ctrl+z pressed, %%s test (%s)" % proc.pid if self.sigstopped: APP_LOG.info("\n" + msg, "resumming") TEST_LOG.info(msg, "resumming") process.kill_process_tree(proc.pid, signal.SIGCONT, False) self.sigstopped = False else: APP_LOG.info("\n" + msg, "stopping") TEST_LOG.info(msg, "stopping") process.kill_process_tree(proc.pid, signal.SIGSTOP, False) self.sigstopped = True proc = multiprocessing.Process(target=self._run_test, args=(test_factory, queue,)) test_status = TestStatus(self.job, queue) cycle_timeout = 1 time_started = time.time() signal.signal(signal.SIGTSTP, signal.SIG_IGN) proc.start() signal.signal(signal.SIGTSTP, sigtstp_handler) test_status.wait_for_early_status(proc, 60) # At this point, the test is already initialized and we know # for sure if there's a timeout set. timeout = test_status.early_status.get('timeout') timeout = float(timeout or self.DEFAULT_TIMEOUT) test_deadline = time_started + timeout if job_deadline > 0: deadline = min(test_deadline, job_deadline) else: deadline = test_deadline ctrl_c_count = 0 ignore_window = 2.0 ignore_time_started = time.time() stage_1_msg_displayed = False stage_2_msg_displayed = False first = 0.01 step = 0.01 abort_reason = None result_dispatcher = self.job._result_events_dispatcher while True: try: if time.time() >= deadline: abort_reason = "Timeout reached" try: os.kill(proc.pid, signal.SIGTERM) except OSError: pass break wait.wait_for(lambda: not queue.empty() or not proc.is_alive(), cycle_timeout, first, step) if test_status.interrupt: break if proc.is_alive(): if ctrl_c_count == 0: if (test_status.status.get('running') or self.sigstopped): result_dispatcher.map_method('test_progress', False) else: result_dispatcher.map_method('test_progress', True) else: break except KeyboardInterrupt: time_elapsed = time.time() - ignore_time_started ctrl_c_count += 1 if ctrl_c_count == 1: if not stage_1_msg_displayed: abort_reason = "Interrupted by ctrl+c" self.job.log.debug("\nInterrupt requested. Waiting %d " "seconds for test to finish " "(ignoring new Ctrl+C until then)", ignore_window) stage_1_msg_displayed = True ignore_time_started = time.time() process.kill_process_tree(proc.pid, signal.SIGINT) if (ctrl_c_count > 1) and (time_elapsed > ignore_window): if not stage_2_msg_displayed: abort_reason = "Interrupted by ctrl+c (multiple-times)" self.job.log.debug("Killing test subprocess %s", proc.pid) stage_2_msg_displayed = True process.kill_process_tree(proc.pid, signal.SIGKILL) # Get/update the test status (decrease timeout on abort) if abort_reason: finish_deadline = TIMEOUT_TEST_INTERRUPTED else: finish_deadline = deadline test_state = test_status.finish(proc, time_started, step, finish_deadline, result_dispatcher) # Try to log the timeout reason to test's results and update test_state if abort_reason: test_state = add_runner_failure(test_state, "INTERRUPTED", abort_reason) # don't process other tests from the list if ctrl_c_count > 0: self.job.log.debug('') # Make sure the test status is correct if test_state.get('status') not in status.user_facing_status: test_state = add_runner_failure(test_state, "ERROR", "Test reports" " unsupported test status.") self.result.check_test(test_state) result_dispatcher.map_method('end_test', self.result, test_state) if test_state['status'] == "INTERRUPTED": summary.add("INTERRUPTED") elif not mapping[test_state['status']]: summary.add("FAIL") if getattr(self.job.args, 'failfast', 'off') == 'on': summary.add("INTERRUPTED") self.job.log.debug("Interrupting job (failfast).") return False if ctrl_c_count > 0: return False return True
def test_sync_with_handlers(): proc = multiprocessing.Process(target=_sync_with_handlers_proc_target) proc.start() proc.join() assert proc.exitcode == 0
raise threading.Thread( target=hashrateCalculator, args=(hashcount, khashcount) ).start() # Start hashrate calculator thread for x in range(int(threadcount)): # Launch duco mining threads thread.append(x) thread[x] = multiprocessing.Process( target=Thread, args=( x, hashcount, accepted, rejected, requestedDiff, khashcount, username, efficiency, rigIdentifier, algorithm)) thread[x].start() time.sleep(0.1) try: initRichPresence() threading.Thread(target=updateRichPresence).start() except: if debug == "y": raise
def producer(sequence, output_q): # JoinableQueue:work干活,干完了给反馈 print("Into procuder:", ctime()) for item in sequence: output_q.put(item) print("put", item, "into q") print("Out of procuder:", ctime()) # 建立进程 if __name__ == '__main__': q = multiprocessing.JoinableQueue() # 运行消费者进程 cons_p = multiprocessing.Process(target=consumer, args=(q, )) cons_p.daemon = True cons_p.start() cons_p2 = multiprocessing.Process(target=consumer, args=(q, )) cons_p2.daemon = True cons_p2.start() # 生产多个项,sequence代表要发送给消费者的项序列 # 在实践中,这可能是生成器的输出或通过一些其他方式生产出来 sequence = [1, 2, 3, 4] producer(sequence, q) q.put(None) q.put(None) # 等待所有项被处理
import multiprocessing, sys def foo(): print("123") # Because "if __name__ == '__main__'" is missing this will not work # correctly on Windows. However, we should get a RuntimeError rather # than the Windows equivalent of a fork bomb. p = multiprocessing.Process(target=foo) p.start() p.join() sys.exit(p.exitcode)
Creates an empty list and then appends a random number to the list 'count' number of times. A CPU-heavy operation! """ for i in range(count): out_list.append(random.random()) if __name__ == "__main__": size = 10000000 # Number of random numbers to add procs = 2 # Number of processes to create # Create a list of jobs and then iterate through # the number of processes appending each process to # the job list jobs = [] for i in range(0, procs): out_list = list() process = multiprocessing.Process(target=list_append, args=(size, i, out_list)) jobs.append(process) # Start the processes (i.e. calculate the random number lists) for j in jobs: j.start() # Ensure all of the processes have finished for j in jobs: j.join() print("List processing complete.")
""" 管理器对象会控制一个服务端进程, 该进程持有python对象, 并可以让其他进程操作这些对象, 当有人修改这些对象时, 管理器会确保没有其他的进程在使用这些对象. """ import multiprocessing def worker(dictionary, key, item): dictionary[key] = item if __name__ == "__main__": mgr = multiprocessing.Manager() dictionary = mgr.dict() # manage中有管理者的数据结构对象 # 一个Process列表 jobs = [multiprocessing.Process(target=worker, args=(dictionary, i, i*2)) for i in range(10)] for j in jobs: j.start() for j in jobs: j.join() print("Result:", dictionary)
smac_scenario_args=smac_scenario_args, ) automl.fit(X_train, y_train, dataset_name=dataset_name) return spawn_classifier if __name__ == '__main__': X, y = sklearn.datasets.load_digits(return_X_y=True) X_train, X_test, y_train, y_test = \ sklearn.model_selection.train_test_split(X, y, random_state=1) processes = [] spawn_classifier = get_spawn_classifier(X_train, y_train) for i in range(4): # set this at roughly half of your cores p = multiprocessing.Process(target=spawn_classifier, args=(i, 'digits')) p.start() processes.append(p) for p in processes: p.join() print('Starting to build an ensemble!') automl = AutoSklearnClassifier( time_left_for_this_task=15, per_run_time_limit=15, ml_memory_limit=1024, shared_mode=True, ensemble_size=50, ensemble_nbest=200, tmp_folder=tmp_folder, output_folder=output_folder,
def do_collection(args): params = { "moreSpecific": False, "type": "UPDATE", "socketOptions": { "includeRaw": False } } # ASN Graph ASNs = nx.DiGraph() last_time_saved = time.time() while True: ws = connect(params) try: for data in ws: parsed = json.loads(data) if parsed["type"] != "ris_message": print(parsed) news = [] # Newly announced routes withdrawn = [] if "withdrawals" in parsed["data"].keys(): withdrawn = parsed["data"][ "withdrawals"] # Withdrawn routes if "announcements" in parsed["data"].keys(): for announcement in parsed["data"]["announcements"]: news.append(announcement["prefixes"]) if "path" in parsed["data"].keys() and len( parsed["data"]["path"] ) > 1: #TODO : Gérer mieux le cas à 1 dans le path, voire 0 path_raw = parsed["data"]["path"] path = [] for AS in path_raw: if isinstance(AS, int): path.append(AS) else: for real_AS in AS: path.append(real_AS) for i, AS in enumerate(path): # Add new routes AS = int(AS) if i < (len(path) - 1): neighbor = int(path[i + 1]) if neighbor == AS: continue subnets = dict() update_graph(ASNs, AS, neighbor, news, withdrawn) # Save if time.time() - last_time_saved >= args.save_rate * 60: save_process = multiprocessing.Process( target=save_graph, args=(ASNs.copy(), args.output_folder)) save_process.start() last_time_saved = time.time() except websocket._exceptions.WebSocketConnectionClosedException: print("Socket closed, retrying") pass except websocket._exceptions.WebSocketBadStatusException as err: print("Bad status error :", err) pass except ConnectionResetError: print("Connection reset") pass
def S3_check(bucket): process = multiprocessing.Process(target=s3_job, args=(bucket, )) process.start() process.join() return jsonify(get_s3(bucket))
def Subdomain_enumeration(domain): multiprocessing.Process(target=sub_job, args=(domain, )).start() return jsonify({"status": 200})
def parse_bam(): """ Reads bam file and saves reads and their segments in objects of the Read en Segment classes. :param bamfile used to open bam file: """ global sample_name, header, bam sys.stderr.write(time.strftime("%c") + " Busy with parsing bam file...\n") bam = pysam.AlignmentFile(NanoSV.opts_bam, 'rb') header = bam.header if not bam.has_index(): sys.exit('The bam has no index file') if 'HD' in header: if not header['HD']['SO'] == 'coordinate': sys.exit('The bam file is not coordinate sorted') if 'RG' in header: if type(header['RG']) is list: sample_name = header['RG'][0]['SM'] else: sample_name = header['RG']['SM'] else: sample_name = re.sub('(\.sorted)?\.bam$', '', str(NanoSV.opts_bam)) contig_list = [] for contig_dict in header['SQ']: contig_list.append(contig_dict['SN']) q = mp.Queue() q_out = mp.Queue() for contig in contig_list: q.put(contig) processes = [mp.Process(target=parse_chr_bam, args=(q, q_out, NanoSV.opts_bam)) for x in range(NanoSV.opts_threads)] for p in processes: p.start() liveprocs = list(processes) while liveprocs: time.sleep(5) try: while 1: contig_segments, contig_variants = q_out.get(block=False, timeout=1) segments.update(contig_segments) variants.update(contig_variants) except queue.Empty: pass time.sleep(5) # Give tasks a chance to put more data in if not q.empty(): continue liveprocs = [p for p in liveprocs if p.is_alive()] for p in processes: p.join() for contig in segments: for pos in segments[contig]: for id in segments[contig][pos]: segment = segments[contig][pos][id] if segment.qname in reads: read = read = reads[segment.qname] else: read = r.Read(segment.qname, segment.rlength) reads[segment.qname] = read read.addSegment(segment) if NanoSV.opts_phasing_on: write_bed() bam.close()
tasks.extend([task for task in generate_future_tasks(image)]) while True: logging.debug('Active Workers: {0}, Pending Tasks: {1}'.format(len(workers), len(tasks))) if len(tasks) == 0 and len(workers) == 0: # If there are no more pending tasks and all # workers are done processing, then end the script. break try: if len(tasks) > 0 and len(workers) < 6: # If there are more pending tasks and # the max number of active workers has # not been reached, start the new task. command = tasks.pop() worker = multiprocessing.Process(target=execute_plugin, args=(command,)) workers.append({ 'plugin_name': command['plugin_name'], 'image_basename': command['image_basename'], 'task': worker}) worker.start() else: time.sleep(5) logging.debug('Polling workers....') for i, worker in enumerate(workers): logging.debug('[{0}] Worker for {1} is still alive?: {2}'.format( worker['image_basename'], worker['plugin_name'], worker['task'].is_alive())) if not worker['task'].is_alive(): logging.debug('[{0}] Terminating finished worker for {1}'.format( worker['image_basename'], worker['plugin_name'])) workers.pop(i)
def main(args=None): # noqa args = args or make_argument_parser().parse_args() for path in args.path: sys.path.insert(0, path) if args.use_spawn: multiprocessing.set_start_method("spawn") try: if args.pid_file: setup_pidfile(args.pid_file) except RuntimeError as e: with file_or_stderr(args.log_file) as stream: logger = setup_parent_logging(args, stream=stream) logger.critical(e) return RET_PIDFILE canteen = multiprocessing.Value(Canteen) worker_pipes = [] worker_processes = [] worker_process_events = [] for worker_id in range(args.processes): read_pipe, write_pipe = multiprocessing.Pipe(duplex=False) event = multiprocessing.Event() proc = multiprocessing.Process( target=worker_process, args=(args, worker_id, StreamablePipe(write_pipe), canteen, event), daemon=False, ) proc.start() worker_pipes.append(read_pipe) worker_processes.append(proc) worker_process_events.append(event) # Wait for all worker processes to come online before starting the # fork processes. This is required to avoid race conditions like # in #297. for event in worker_process_events: if proc.is_alive(): if not event.wait(timeout=30): break fork_pipes = [] fork_processes = [] for fork_id, fork_path in enumerate(chain(args.forks, canteen_get(canteen))): read_pipe, write_pipe = multiprocessing.Pipe(duplex=False) proc = multiprocessing.Process( target=fork_process, args=(args, fork_id, fork_path, StreamablePipe(write_pipe)), daemon=True, ) proc.start() fork_pipes.append(read_pipe) fork_processes.append(proc) parent_read_pipe, parent_write_pipe = multiprocessing.Pipe(duplex=False) logger = setup_parent_logging(args, stream=StreamablePipe(parent_write_pipe)) logger.info("Dramatiq %r is booting up." % __version__) if args.pid_file: atexit.register(remove_pidfile, args.pid_file, logger) running, reload_process = True, False # To avoid issues with signal delivery to user threads on # platforms such as FreeBSD 10.3, we make the main thread block # the signals it expects to handle before spawning the file # watcher and log watcher threads so that those threads can # inherit the blocking behaviour. if hasattr(signal, "pthread_sigmask"): signal.pthread_sigmask( signal.SIG_BLOCK, {signal.SIGINT, signal.SIGTERM, signal.SIGHUP}, ) if HAS_WATCHDOG and args.watch: if not hasattr(signal, "SIGHUP"): raise RuntimeError("Watching for source changes is not supported on %s." % sys.platform) file_watcher = setup_file_watcher(args.watch, args.watch_use_polling) log_watcher_stop_event = Event() log_watcher = Thread( target=watch_logs, args=(args.log_file, [parent_read_pipe, *worker_pipes, *fork_pipes], log_watcher_stop_event), daemon=False, ) log_watcher.start() def stop_subprocesses(signum): nonlocal running running = False for proc in chain(worker_processes, fork_processes): try: os.kill(proc.pid, signum) except OSError: # pragma: no cover if proc.exitcode is None: logger.warning("Failed to send %r to PID %d.", signum.name, proc.pid) def sighandler(signum, frame): nonlocal reload_process reload_process = signum == getattr(signal, "SIGHUP", None) if signum == signal.SIGINT: signum = signal.SIGTERM logger.info("Sending signal %r to subprocesses...", getattr(signum, "name", signum)) stop_subprocesses(signum) # Now that the watcher threads have been started, it should be # safe to unblock the signals that were previously blocked. if hasattr(signal, "pthread_sigmask"): signal.pthread_sigmask( signal.SIG_UNBLOCK, {signal.SIGINT, signal.SIGTERM, signal.SIGHUP}, ) retcode = RET_OK signal.signal(signal.SIGINT, sighandler) signal.signal(signal.SIGTERM, sighandler) if hasattr(signal, "SIGHUP"): signal.signal(signal.SIGHUP, sighandler) if hasattr(signal, "SIGBREAK"): signal.signal(signal.SIGBREAK, sighandler) # Wait for all workers to terminate. If any of the processes # terminates unexpectedly, then shut down the rest as well. The # use of `waited' here avoids a race condition where the processes # could potentially exit before we even get a chance to wait on # them. waited = False while not waited or any(p.exitcode is None for p in worker_processes): waited = True for proc in worker_processes: proc.join(timeout=1) if proc.exitcode is None: continue if running: # pragma: no cover logger.critical("Worker with PID %r exited unexpectedly (code %r). Shutting down...", proc.pid, proc.exitcode) stop_subprocesses(signal.SIGTERM) retcode = proc.exitcode break else: retcode = retcode or proc.exitcode # The log watcher can't be a daemon in case we log to a file so we # have to wait for it to complete on exit. log_watcher_stop_event.set() log_watcher.join() if HAS_WATCHDOG and args.watch: file_watcher.stop() file_watcher.join() if reload_process: if sys.argv[0].endswith("/dramatiq/__main__.py"): return os.execvp(sys.executable, ["python", "-m", "dramatiq", *sys.argv[1:]]) return os.execvp(sys.argv[0], sys.argv) return RET_KILLED if retcode < 0 else retcode
def start(self): self.key_mouse_daemon = multiprocessing.Process(target = pythoncom.PumpMessages) self.key_mouse_daemon.start() self.mainloop() return
def Vcf(opts): base_dir=os.getcwd() config_file=opts.Config_file f=open(config_file) config_list=yaml.load(f) #######read and parse parameter print "Start reading and parsing parameter..." time.sleep(5) output_fold=config_list["output_fold"] ptuneos_bin_path="bin" vcf_file=config_list["vcf_file"] REFERENCE=base_dir + "/" + "database/Fasta/human.fasta" somatic_out_fold=output_fold + '/' + 'somatic_mutation' logfile_out_fold=output_fold + '/' + 'logfile' prefix=config_list["sample_name"] tumor_depth_cutoff=config_list["tumor_depth_cutoff"] tumor_vaf_cutoff=config_list["tumor_vaf_cutoff"] normal_vaf_cutoff=config_list["normal_vaf_cutoff"] vep_cache=config_list["vep_cache_path"] vep_path=config_list["vep_path"] netmhc_out_fold=output_fold + '/' + 'netmhc' indel_fasta_file=netmhc_out_fold+'/'+prefix+'_indel.fasta' hla_str=config_list["hla_str"] split_num=200 netchop_path="software/netchop" human_peptide_path="database/Protein/human.pep.all.fa" exp_file=config_list["expression_file"] binding_fc_aff_cutoff=int(config_list["binding_fc_aff_cutoff"]) binding_aff_cutoff=int(config_list["binding_aff_cutoff"]) fpkm_cutoff=int(config_list["fpkm_cutoff"]) netctl_out_fold=output_fold + '/' + 'netctl' netMHCpan_path=config_list["netMHCpan_path"] snv_fasta_file=netmhc_out_fold+'/'+prefix+'_snv.fasta' snv_netmhc_out_file=netmhc_out_fold+'/'+prefix+'_snv_netmhc.tsv' indel_netmhc_out_file=netmhc_out_fold+'/'+prefix+'_indel_netmhc.tsv' vcftools_path="software/vcftools" peptide_length=config_list["peptide_length"] pyclone_fold=output_fold + '/' + 'pyclone' pyclone_path=config_list["pyclone_path"] copynumber_profile=config_list["copynumber_profile"] tumor_cellularity=float(config_list["tumor_cellularity"]) snv_final_neo_file=netctl_out_fold + '/' + prefix + '_pyclone_neo.tsv' indel_final_neo_file=netctl_out_fold + '/' + prefix + '_indel_netctl_concact.tsv' iedb_file="train_model/iedb.fasta" cf_hy_model_9="train_model/cf_hy_9_model.m" cf_hy_model_10="train_model/cf_hy_10_model.m" cf_hy_model_11="train_model/cf_hy_11_model.m" RF_model="train_model/RF_train_model.m" driver_gene_path="software/DriveGene.tsv" snv_neo_model_file=netctl_out_fold + '/' + prefix + '_snv_neo_model.tsv' snv_blastp_tmp_file=netctl_out_fold + '/' + prefix + '_snv_blastp_tmp.tsv' snv_blastp_out_tmp_file=netctl_out_fold + '/' + prefix + '_snv_blastp_out_tmp.tsv' snv_netMHCpan_pep_tmp_file=netctl_out_fold + '/' + prefix + '_snv_netMHCpan_pep_tmp.tsv' snv_netMHCpan_ml_out_tmp_file=netctl_out_fold + '/' + prefix + '_snv_netMHCpan_ml_out_tmp.tsv' indel_neo_model_file=netctl_out_fold + '/' + prefix + '_indel_neo_model.tsv' indel_blastp_tmp_file=netctl_out_fold + '/' + prefix + '_indel_blastp_tmp.tsv' indel_blastp_out_tmp_file=netctl_out_fold + '/' + prefix + '_indel_blastp_out_tmp.tsv' indel_netMHCpan_pep_tmp_file=netctl_out_fold + '/' + prefix + '_indel_netMHCpan_pep_tmp.tsv' indel_netMHCpan_ml_out_tmp_file=netctl_out_fold + '/' + prefix + '_indel_netMHCpan_ml_out_tmp.tsv' blast_db_path="database/Protein/peptide_database/peptide" #####check input file,tool path and reference file##### if os.path.exists(vcf_file): print "Check inuput mutation vcf file... OK" else: print "Please check your input vcf file!" os._exit(1) if os.path.exists(vep_path): print "Check vep path... OK" else: print "Please check your vep path!" os._exit(1) if os.path.exists(vep_cache): print "Check vep cache path... OK" else: print "Please check your vep cache path!" os._exit(1) if os.path.exists(exp_file): print "Check expression file... OK" else: print "Please check your expression file path!" os._exit(1) time.sleep(5) #####check output directory### print "Check output directory" if not os.path.exists(output_fold): os.mkdir(output_fold) if not os.path.exists(somatic_out_fold): os.mkdir(somatic_out_fold) if not os.path.exists(netmhc_out_fold): os.mkdir(netmhc_out_fold) if not os.path.exists(netctl_out_fold): os.mkdir(netctl_out_fold) if not os.path.exists(logfile_out_fold): os.mkdir(logfile_out_fold) if not os.path.exists(pyclone_fold): os.mkdir(pyclone_fold) if hla_str=="None": print "please provied hla type, seperate by comma,eg:HLiA-A02:01,HLA-A01:01,HLA-B15:17,HLA-B13:02,HLA-C07:01,HLA-C06:02" else: print "Check hla alleles... OK" print "Start preprocessing VCF file..." processes_0=[] h1=multiprocessing.Process(target=VCF_process,args=(prefix,vcf_file,somatic_out_fold,vcftools_path,vep_path,vep_cache,netmhc_out_fold,tumor_depth_cutoff,tumor_vaf_cutoff,normal_vaf_cutoff,ptuneos_bin_path,human_peptide_path,logfile_out_fold,)) processes_0.append(h1) for p in processes_0: p.daemon = True p.start() for p in processes_0: p.join() print "Preprocessing VCF file done!" print "Start neoantigen prediction..." processes_1=[] d1=multiprocessing.Process(target=snv_neo,args=(snv_fasta_file,hla_str,driver_gene_path,snv_netmhc_out_file,netmhc_out_fold,split_num,prefix,exp_file,binding_fc_aff_cutoff,binding_aff_cutoff,fpkm_cutoff,netctl_out_fold,netMHCpan_path,peptide_length,ptuneos_bin_path,netchop_path,)) processes_1.append(d1) d2=multiprocessing.Process(target=indel_neo,args=(indel_fasta_file,somatic_out_fold,hla_str,driver_gene_path,indel_netmhc_out_file,split_num,netMHCpan_path,prefix,exp_file,binding_fc_aff_cutoff,binding_aff_cutoff,fpkm_cutoff,netctl_out_fold,netmhc_out_fold,peptide_length,ptuneos_bin_path,netchop_path,REFERENCE,human_peptide_path,)) processes_1.append(d2) for p in processes_1: p.daemon = True p.start() for p in processes_1: p.join() print "Neoantigen prediciton done!" print "Neoantigen annotation..." processes_2=[] m1=multiprocessing.Process(target=pyclone_annotation,args=(somatic_out_fold,copynumber_profile,tumor_cellularity,prefix,pyclone_fold,netctl_out_fold,pyclone_path,ptuneos_bin_path,logfile_out_fold,)) processes_2.append(m1) for p in processes_2: p.daemon = True p.start() for p in processes_2: p.join() print "Neoantigen annotation done!" print "Neoantigen filtering using Pre&RecNeo model and refined immunogenicity score scheme." processes_3=[] r1=multiprocessing.Process(target=InVivoModelAndScoreSNV,args=(snv_final_neo_file,cf_hy_model_9,cf_hy_model_10,cf_hy_model_11,RF_model,snv_neo_model_file,snv_blastp_tmp_file,snv_blastp_out_tmp_file,snv_netMHCpan_pep_tmp_file,snv_netMHCpan_ml_out_tmp_file,iedb_file,blast_db_path,)) processes_3.append(r1) if os.path.exists(indel_final_neo_file): r2=multiprocessing.Process(target=InVivoModelAndScoreINDEL,args=(indel_final_neo_file,cf_hy_model_9,cf_hy_model_10,cf_hy_model_11,RF_model,indel_neo_model_file,indel_blastp_tmp_file,indel_blastp_out_tmp_file,indel_netMHCpan_pep_tmp_file,indel_netMHCpan_ml_out_tmp_file,iedb_file,blast_db_path,)) processes_3.append(r2) else: print "No neoantigen from Indels is identified!" for p in processes_3: p.daemon = True p.start() for p in processes_3: p.join() print "All Finished! please check result files 'snv_neo_model.tsv' and 'indel_neo_model.tsv' in netctl fold"
time.sleep(0.01) channel.queue(chunk) except (TypeError, BrokenPipeError, KeyboardInterrupt, SystemExit): pygame.quit() if __name__ == '__main__': print("Hondarribia by Peter Salomonsen - intro song for WebAssembly Summit 2020") print("Source: https://petersalomonsen.com/webassemblymusic/livecodev2/?gist=5b795090ead4f192e7f5ee5dcdd17392") print("Synthesized: https://soundcloud.com/psalomo/hondarribia") print() q = mp.Queue() p = mp.Process(target=player, args=(q,)) p.start() scriptpath = os.path.dirname(os.path.realpath(__file__)) wasm_fn = os.path.join(scriptpath, f"./wasm/hondarribia-{sample_rate}.wasm") # Prepare Wasm3 engine env = wasm3.Environment() rt = env.new_runtime(2048) with open(wasm_fn, "rb") as f: mod = env.parse_module(f.read()) rt.load(mod) buff = b'' buff_sz = prebuffer
print e print 'Connect task queue service fail, exec run_queue first.' sys.exit(0) """ run_call进程发起调用时,设置通道变量 task_id, call_id, host_id 挂机时将uuid出队列,调用task_queue(task_id, uuid),释放任务队列空间,当小于任务机器人配额时可被选中 挂机时将fs_host表中,id=host_id 的记录 line_use-1,减少fs主机已用线数,当line_use<line_max时可被选中 """ task_queue = m.get_queue() # 事件队列,event_listener与event_processor共享 manager = multiprocessing.Manager() event_queue = manager.Queue() proc_event_listener = multiprocessing.Process( target=event_listener, name='event_listener', args=(event_queue,)) proc_event_listener.start() proc_event_processor = multiprocessing.Process( target=event_processor, name='event_processor', args=(event_queue, task_queue)) proc_event_processor.start() # proc_call_sender.join() # proc_event_listener.join() # proc_event_processor.join() # flag = False # count = 0 while True: time.sleep(3)
def start(self): self.q = multiprocessing.Queue() p = multiprocessing.Process(target=self.put, args=(self.q, )) p.start()
parser.add_argument('--purge', action="store", dest="purge", type=int, default=60, help='Purge statistics older than PURGE minutes. (default: 60min)') parser.add_argument('--workers', action="store", dest="workers", type=int, default=64, help='Number of worker processes to be used to fetch the stats (default: 64)') global args args = parser.parse_args(sys.argv[1:]) global http_headers http_headers = {} jwt_token = os.getenv("JWT_TOKEN", None) if jwt_token is not None: http_headers = { "Authorization": "Bearer {}".format(jwt_token)} if args.header: http_headers = dict(x.split(': ') for x in args.header) logger.info(http_headers) global http_proxyes http_proxyes = {} if args.proxy: http_proxyes['http'] = args.proxy http_proxyes['https'] = args.proxy # Schedule a new collection every 1min while True: p = multiprocessing.Process(target=collect_and_purge) p.start() time.sleep(int(os.getenv("COLLECTION_INTERVAL", 60)))
def test_deferred_write_on_flush(): proc = multiprocessing.Process(target=_deferred_write_on_flush_proc_target) proc.start() proc.join() assert proc.exitcode == 0
def main(capture_build_dir, replay_build_dir, use_goma, gtest_filter, test_exec): start_time = time.time() # set the number of workers to be cpu_count - 1 (since the main process already takes up a CPU # core). Whenever a worker is available, it grabs the next job from the job queue and runs it. # The worker closes down when there is no more job. worker_count = multiprocessing.cpu_count() - 1 cwd = SetCWDToAngleFolder() trace_folder = "traces" if not os.path.isdir(capture_build_dir): os.makedirs(capture_build_dir) CreateReplayBuildFolders(worker_count, replay_build_dir) CreateTraceFolders(worker_count, trace_folder) replay_exec = "capture_replay_tests" if platform == "win32": test_exec += ".exe" replay_exec += ".exe" gn_path, autoninja_path = GetGnAndAutoninjaAbsolutePaths() if gn_path == "" or autoninja_path == "": print("No gn or autoninja found on system") return # generate gn files gn_proc = CreateGnGenSubProcess(gn_path, capture_build_dir, [("use_goma", use_goma), ("angle_with_capture_by_default", "true")], True) returncode, output = gn_proc.BlockingRun() if returncode != 0: return autoninja_proc = CreateAutoninjaSubProcess(autoninja_path, capture_build_dir, test_exec, True) returncode, output = autoninja_proc.BlockingRun() if returncode != 0: return # get a list of tests test_names_and_params = GetTestNamesAndParams( os.path.join(capture_build_dir, test_exec), gtest_filter) # objects created by manager can be shared by multiple processes. We use it to create # collections that are shared by multiple processes such as job queue or result list. manager = multiprocessing.Manager() job_queue = manager.Queue() for test_name_and_params in test_names_and_params: job_queue.put(Test(test_name_and_params[0], test_name_and_params[1], use_goma)) environment_vars = [("ANGLE_CAPTURE_FRAME_END", "100"), ("ANGLE_CAPTURE_SERIALIZE_STATE", "1")] for environment_var in environment_vars: os.environ[environment_var[0]] = environment_var[1] passed_count = 0 failed_count = 0 skipped_count = 0 failed_tests = [] # result list is created by manager and can be shared by multiple processes. Each subprocess # populates the result list with the results of its test runs. After all subprocesses finish, # the main process processes the results in the result list. # An item in the result list is a tuple with 3 values (testname, result, output). # The "result" can take 3 values "Passed", "Failed", "Skipped". The output is the stdout and # the stderr of the test appended together. result_list = manager.list() workers = [] for i in range(worker_count): proc = multiprocessing.Process( target=RunTest, args=(job_queue, gn_path, autoninja_path, capture_build_dir, replay_build_dir + str(i), test_exec, replay_exec, trace_folder + str(i), result_list)) workers.append(proc) proc.start() for worker in workers: worker.join() for environment_var in environment_vars: del os.environ[environment_var[0]] end_time = time.time() print("\n\n\n") print("Results:") for result in result_list: output_string = result[1] + ": " + result[0] + ". " if result[1] == "Skipped": output_string += result[2] skipped_count += 1 elif result[1] == "Failed": output_string += result[2] failed_tests.append(result[0]) failed_count += 1 else: passed_count += 1 print(output_string) print("\n\n") print("Elapsed time: " + str(end_time - start_time) + " seconds") print("Passed: "+ str(passed_count) + " Failed: " + str(failed_count) + \ " Skipped: " + str(skipped_count)) print("Failed tests:") for failed_test in failed_tests: print("\t" + failed_test) DeleteTraceFolders(worker_count, trace_folder) DeleteReplayBuildFolders(worker_count, replay_build_dir, trace_folder) if os.path.isdir(capture_build_dir): SafeDeleteFolder(capture_build_dir)
def _mapfn(iter): import tensorflow as tf from packaging import version # Note: consuming the input iterator helps Pyspark re-use this worker, for i in iter: executor_id = i # executor_id # check that there are enough available GPUs (if using tensorflow-gpu) before committing reservation on this node if compat.is_gpu_available(): num_gpus = tf_args.num_gpus if 'num_gpus' in tf_args else 1 # 获取每个executor的GPU数量 gpus_to_use = gpu_info.get_gpus(num_gpus) # 分配 job_name 和 task_index # assign TF job/task based on provided cluster_spec template (or use default/null values) job_name = 'default' task_index = -1 cluster_id = cluster_meta['id'] cluster_template = cluster_meta['cluster_template'] for jobtype in cluster_template: nodes = cluster_template[jobtype] if executor_id in nodes: job_name = jobtype task_index = nodes.index(executor_id) break # get unique key (hostname, executor_id) for this executor host = util.get_ip_address() util.write_executor_id(executor_id) # 将 executor_id 写入工作目录 port = 0 # check for existing TFManagers if TFSparkNode.mgr is not None and str(TFSparkNode.mgr.get('state')) != "'stopped'": # 开启 manager之前先检查之前有没有开启的 if TFSparkNode.cluster_id == cluster_id: # raise an exception to force Spark to retry this "reservation" task on another executor raise Exception("TFManager already started on {0}, executor={1}, state={2}".format(host, executor_id, str( TFSparkNode.mgr.get( "state")))) else: # old state, just continue with creating new manager logger.warn("Ignoring old TFManager with cluster_id {0}, requested cluster_id {1}".format( TFSparkNode.cluster_id, cluster_id)) # start a TFManager and get a free port # use a random uuid as the authkey authkey = uuid.uuid4().bytes addr = None if job_name in ('ps', 'evaluator'): # PS节点必须是远程可访问的,以关闭从spark driver。 # PS nodes must be remotely accessible in order to shutdown from Spark driver. TFSparkNode.mgr = TFManager.start(authkey, ['control', 'error'], 'remote') addr = (host, TFSparkNode.mgr.address[1]) else: # worker 只需要开启本地manager即可,用于数据传输 # worker nodes only need to be locally accessible within the executor for data feeding TFSparkNode.mgr = TFManager.start(authkey, queues) addr = TFSparkNode.mgr.address # initialize mgr state TFSparkNode.mgr.set('state', 'running') TFSparkNode.cluster_id = cluster_id # expand Hadoop classpath wildcards for JNI (Spark 2.x) if 'HADOOP_PREFIX' in os.environ: classpath = os.environ['CLASSPATH'] hadoop_path = os.path.join(os.environ['HADOOP_PREFIX'], 'bin', 'hadoop') hadoop_classpath = subprocess.check_output([hadoop_path, 'classpath', '--glob']).decode() logger.debug("CLASSPATH: {0}".format(hadoop_classpath)) os.environ['CLASSPATH'] = classpath + os.pathsep + hadoop_classpath # 开启 tensorflow_board 如果需要, on 'worker:0' if available (for backwards-compatibility), otherwise on 'chief:0' or 'master:0' job_names = sorted([k for k in cluster_template.keys() if k in ['chief', 'master', 'worker']]) tb_job_name = 'worker' if 'worker' in job_names else job_names[0] tb_pid = 0 # tensorflow_board id tb_port = 0 # tensorflow_board port if tensorboard and job_name == tb_job_name and task_index == 0: # 获取一个可用端口 tb_sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) tb_sock.bind(('', 0)) tb_port = tb_sock.getsockname()[1] tb_sock.close() logdir = log_dir if log_dir else "tensorboard_%d" % executor_id # search for tensorboard in python/bin, PATH, and PYTHONPATH pypath = sys.executable pydir = os.path.dirname(pypath) # 获取 python 根目录 sys_path = os.pathsep.join(sys.path) search_path = os.pathsep.join([pydir, sys_path, os.environ['PATH'], os.environ['PYTHONPATH']]) tb_path = util.find_in_path(search_path, 'tensorboard') # executable in PATH if not tb_path: tb_path = util.find_in_path(search_path, 'tensorboard/main.py') # TF 1.3+ if not tb_path: tb_path = util.find_in_path(search_path, 'tensorflow/tensorboard/__main__.py') # TF 1.2- if not tb_path: raise Exception("Unable to find 'tensorboard' in: {}".format(search_path)) # launch tensorboard if version.parse(tf.__version__) >= version.parse('2.0.0'): tb_proc = subprocess.Popen( [pypath, tb_path, "--reload_multifile=True", "--logdir=%s" % logdir, "--port=%d" % tb_port], env=os.environ) else: tb_proc = subprocess.Popen([pypath, tb_path, "--logdir=%s" % logdir, "--port=%d" % tb_port], env=os.environ) tb_pid = tb_proc.pid # check server to see if this task is being retried (i.e. already reserved) client = reservation.Client(cluster_meta['server_addr']) # 启动一个客户端连接server cluster_info = client.get_reservations() tmp_sock = None node_meta = None for node in cluster_info: (nhost, nexec) = (node['host'], node['executor_id']) if nhost == host and nexec == executor_id: # 如果集群信息中包含此executor信息 node_meta = node port = node['port'] # 如果没有,则开始注册 # if not already done, register everything we need to set up the cluster if node_meta is None: # first, find a free port for TF tmp_sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) tmp_sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) tmp_sock.bind(('', port)) port = tmp_sock.getsockname()[1] node_meta = { 'executor_id': executor_id, 'host': host, 'job_name': job_name, 'task_index': task_index, 'port': port, 'tb_pid': tb_pid, 'tb_port': tb_port, 'addr': addr, 'authkey': authkey } # register node metadata with server logger.info("TFSparkNode.reserve: {0}".format(node_meta)) client.register(node_meta) # 等待所有的节点注册完成 # wait for other nodes to finish reservations cluster_info = client.await_reservations() client.close() # 构造 clusterspec # construct a TensorFlow clusterspec from cluster_info sorted_cluster_info = sorted(cluster_info, key=lambda k: k['executor_id']) cluster_spec = {} last_executor_id = -1 for node in sorted_cluster_info: if (node['executor_id'] == last_executor_id): raise Exception("Duplicate worker/task in cluster_info") last_executor_id = node['executor_id'] logger.info("node: {0}".format(node)) (njob, nhost, nport) = (node['job_name'], node['host'], node['port']) hosts = [] if njob not in cluster_spec else cluster_spec[njob] hosts.append("{0}:{1}".format(nhost, nport)) cluster_spec[njob] = hosts # update TF_CONFIG if cluster spec has a 'master' node (i.e. tf.estimator) if 'master' in cluster_spec or 'chief' in cluster_spec: tf_config = json.dumps({ 'cluster': cluster_spec, 'task': {'type': job_name, 'index': task_index}, 'environment': 'cloud' }) logger.info("export TF_CONFIG: {}".format(tf_config)) os.environ['TF_CONFIG'] = tf_config # reserve GPU(s) again, just before launching TF process (in case situation has changed) if compat.is_gpu_available(): # compute my index relative to other nodes on the same host (for GPU allocation) my_addr = cluster_spec[job_name][task_index] my_host = my_addr.split(':')[0] flattened = [v for sublist in cluster_spec.values() for v in sublist] local_peers = [p for p in flattened if p.startswith(my_host)] my_index = local_peers.index(my_addr) num_gpus = tf_args.num_gpus if 'num_gpus' in tf_args else 1 gpus_to_use = gpu_info.get_gpus(num_gpus, my_index) gpu_str = "GPUs" if num_gpus > 1 else "GPU" logger.debug("Requested {} {}, setting CUDA_VISIBLE_DEVICES={}".format(num_gpus, gpu_str, gpus_to_use)) os.environ['CUDA_VISIBLE_DEVICES'] = gpus_to_use # create a context object to hold metadata for TF ctx = TFNodeContext(executor_id, job_name, task_index, cluster_spec, cluster_meta['default_fs'], cluster_meta['working_dir'], TFSparkNode.mgr) # release port reserved for TF as late as possible if tmp_sock is not None: tmp_sock.close() # Background mode relies reuse of python worker in Spark. if background: # However, reuse of python worker can't work on Windows, we need to check if the current # script runs on Windows or not. if os.name == 'nt' or platform.system() == 'Windows': raise Exception("Background mode is not supported on Windows.") # Check if the config of reuse python worker is enabled on Spark. if not os.environ.get("SPARK_REUSE_WORKER"): raise Exception( "Background mode relies reuse of python worker on Spark. This config 'spark.python.worker.reuse' is not enabled on Spark. Please enable it before using background.") def wrapper_fn(args, context): """Wrapper function that sets the sys.argv of the executor.""" if isinstance(args, list): sys.argv = args fn(args, context) def wrapper_fn_background(args, context): """Wrapper function that signals exceptions to foreground process.""" errq = TFSparkNode.mgr.get_queue('error') try: wrapper_fn(args, context) except Exception: errq.put(traceback.format_exc()) if job_name in ('ps', 'evaluator') or background: # invoke the TensorFlow main function in a background thread logger.info("Starting TensorFlow {0}:{1} as {2} on cluster node {3} on background process".format( job_name, task_index, job_name, executor_id)) p = multiprocessing.Process(target=wrapper_fn_background, args=(tf_args, ctx)) if job_name in ('ps', 'evaluator'): p.daemon = True p.start() # for ps and evaluator nodes, wait indefinitely in foreground thread for a "control" event (None == "stop") if job_name in ('ps', 'evaluator'): queue = TFSparkNode.mgr.get_queue('control') equeue = TFSparkNode.mgr.get_queue('error') done = False while not done: while (queue.empty() and equeue.empty()): time.sleep(1) if (not equeue.empty()): e_str = equeue.get() raise Exception("Exception in " + job_name + ":\n" + e_str) msg = queue.get(block=True) logger.info("Got msg: {0}".format(msg)) if msg is None: logger.info("Terminating {}".format(job_name)) TFSparkNode.mgr.set('state', 'stopped') done = True queue.task_done() else: # otherwise, just run TF function in the main executor/worker thread logger.info( "Starting TensorFlow {0}:{1} on cluster node {2} on foreground thread".format(job_name, task_index, executor_id)) wrapper_fn(tf_args, ctx) logger.info("Finished TensorFlow {0}:{1} on cluster node {2}".format(job_name, task_index, executor_id))
if lung_mask is not None: predictions_scan *= lung_mask for nodule_n, zyxd in enumerate(annotations): plot_slice_3d_4( input=x[0, 0], mask=y[0, 0], prediction=predictions_scan[0, 0], lung_mask=lung_mask[0, 0] if lung_mask is not None else x[0, 0], axis=0, pid='-'.join([str(n), str(nodule_n), str(pid)]), img_dir=outputs_path, idx=zyxd) print('saved plot') print('time since start:', (time.time() - start_time) / 60.) jobs = [job for job in jobs if job.is_alive] if len(jobs) >= 3: jobs[0].join() del jobs[0] jobs.append( mp.Process(target=extract_candidates, args=(predictions_scan, annotations, tf_matrix, pid, outputs_path))) jobs[-1].daemon = True jobs[-1].start() for job in jobs: job.join()
def run(self): mainProcess = multiprocessing.Process(target=self.main) mainProcess.start()
print("process "+multiprocessing.current_process().name+" consumed "+item) except Exception: pass # print("empty") if __name__ == "__main__": qu=multiprocessing.Queue(maxsize=5) #Queue P=Thread(name='p',target=read_thread,args=(qu,)) P.start() start=perf_counter() C1=multiprocessing.Process(name='c1',target=consume,args=(qu,)) C1.start() C2=multiprocessing.Process(name='c2',target=consume,args=(qu,)) C2.start() C3=multiprocessing.Process(name='c3',target=consume,args=(qu,)) C3.start() P.join() C1.join() C2.join() C3.join() end=perf_counter()
def __enter__(self): ''' Start a master and minion ''' running_tests_user = pwd.getpwuid(os.getuid()).pw_name self.master_opts = salt.config.master_config( os.path.join(INTEGRATION_TEST_DIR, 'files', 'conf', 'master') ) self.master_opts['user'] = running_tests_user minion_config_path = os.path.join( INTEGRATION_TEST_DIR, 'files', 'conf', 'minion' ) self.minion_opts = salt.config.minion_config(minion_config_path) self.minion_opts['user'] = running_tests_user self.syndic_opts = salt.config.syndic_config( os.path.join(INTEGRATION_TEST_DIR, 'files', 'conf', 'syndic'), minion_config_path ) self.syndic_opts['user'] = running_tests_user #if sys.version_info < (2, 7): # self.minion_opts['multiprocessing'] = False self.sub_minion_opts = salt.config.minion_config( os.path.join(INTEGRATION_TEST_DIR, 'files', 'conf', 'sub_minion') ) self.sub_minion_opts['root_dir'] = os.path.join(TMP, 'subsalt') self.sub_minion_opts['user'] = running_tests_user #if sys.version_info < (2, 7): # self.sub_minion_opts['multiprocessing'] = False self.smaster_opts = salt.config.master_config( os.path.join( INTEGRATION_TEST_DIR, 'files', 'conf', 'syndic_master' ) ) self.smaster_opts['user'] = running_tests_user # Set up config options that require internal data self.master_opts['pillar_roots'] = { 'base': [os.path.join(FILES, 'pillar', 'base')] } self.master_opts['file_roots'] = { 'base': [ os.path.join(FILES, 'file', 'base'), # Let's support runtime created files that can be used like: # salt://my-temp-file.txt TMP_STATE_TREE ], # Alternate root to test __env__ choices 'prod': [ os.path.join(FILES, 'file', 'prod'), TMP_PRODENV_STATE_TREE ] } self.master_opts['ext_pillar'].append( {'cmd_yaml': 'cat {0}'.format( os.path.join( FILES, 'ext.yaml' ) )} ) self.master_opts['extension_modules'] = os.path.join( INTEGRATION_TEST_DIR, 'files', 'extension_modules' ) # clean up the old files self._clean() # Point the config values to the correct temporary paths for name in ('hosts', 'aliases'): optname = '{0}.file'.format(name) optname_path = os.path.join(TMP, name) self.master_opts[optname] = optname_path self.minion_opts[optname] = optname_path self.sub_minion_opts[optname] = optname_path verify_env([os.path.join(self.master_opts['pki_dir'], 'minions'), os.path.join(self.master_opts['pki_dir'], 'minions_pre'), os.path.join(self.master_opts['pki_dir'], 'minions_rejected'), os.path.join(self.master_opts['cachedir'], 'jobs'), os.path.join(self.smaster_opts['pki_dir'], 'minions'), os.path.join(self.smaster_opts['pki_dir'], 'minions_pre'), os.path.join(self.smaster_opts['pki_dir'], 'minions_rejected'), os.path.join(self.smaster_opts['cachedir'], 'jobs'), os.path.dirname(self.master_opts['log_file']), self.minion_opts['extension_modules'], self.sub_minion_opts['extension_modules'], self.sub_minion_opts['pki_dir'], self.master_opts['sock_dir'], self.smaster_opts['sock_dir'], self.sub_minion_opts['sock_dir'], self.minion_opts['sock_dir'], TMP_STATE_TREE, TMP_PRODENV_STATE_TREE, TMP, ], running_tests_user) # Set up PATH to mockbin self._enter_mockbin() master = salt.master.Master(self.master_opts) self.master_process = multiprocessing.Process(target=master.start) self.master_process.start() minion = salt.minion.Minion(self.minion_opts) self.minion_process = multiprocessing.Process(target=minion.tune_in) self.minion_process.start() sub_minion = salt.minion.Minion(self.sub_minion_opts) self.sub_minion_process = multiprocessing.Process( target=sub_minion.tune_in ) self.sub_minion_process.start() smaster = salt.master.Master(self.smaster_opts) self.smaster_process = multiprocessing.Process(target=smaster.start) self.smaster_process.start() syndic = salt.minion.Syndic(self.syndic_opts) self.syndic_process = multiprocessing.Process(target=syndic.tune_in) self.syndic_process.start() if os.environ.get('DUMP_SALT_CONFIG', None) is not None: from copy import deepcopy try: os.makedirs('/tmp/salttest/conf') except OSError: pass master_opts = deepcopy(self.master_opts) minion_opts = deepcopy(self.minion_opts) master_opts.pop('conf_file', None) minion_opts.pop('conf_file', None) minion_opts.pop('grains', None) minion_opts.pop('pillar', None) open('/tmp/salttest/conf/master', 'w').write( yaml.dump(master_opts) ) open('/tmp/salttest/conf/minion', 'w').write( yaml.dump(minion_opts) ) self.minion_targets = set(['minion', 'sub_minion']) self.pre_setup_minions() self.setup_minions() if getattr(self.parser.options, 'ssh', False): self.prep_ssh() if self.parser.options.sysinfo: try: print_header( '~~~~~~~ Versions Report ', inline=True, width=getattr(self.parser.options, 'output_columns', PNUM) ) except TypeError: print_header('~~~~~~~ Versions Report ', inline=True) print('\n'.join(salt.version.versions_report())) try: print_header( '~~~~~~~ Minion Grains Information ', inline=True, width=getattr(self.parser.options, 'output_columns', PNUM) ) except TypeError: print_header('~~~~~~~ Minion Grains Information ', inline=True) grains = self.client.cmd('minion', 'grains.items') minion_opts = self.minion_opts.copy() minion_opts['color'] = self.parser.options.no_colors is False salt.output.display_output(grains, 'grains', minion_opts) try: print_header( '=', sep='=', inline=True, width=getattr(self.parser.options, 'output_columns', PNUM) ) except TypeError: print_header('', sep='=', inline=True) try: return self finally: self.post_setup_minions()
def main(): # num = 0 # for dirpath, dirnames, filenames in os.walk(cam2_path): # for f in filenames: # # if num==1000: # # break # print f # if '.jpg' in f: # print(join(dirpath, f)) # cam2_image_list.append(join(dirpath, f)) # #num+=1 # print "cam2 images num:" # print len(cam2_image_list) # #save_obj(cam2_image_list,"gallery_list") # average_register = len(cam2_image_list) // 8 # # for i in range(8): # if i==7: # multiprocessing_register.append(cam2_image_list[i*average_register:]) # else: # multiprocessing_register.append(cam2_image_list[i*average_register:(i+1)*average_register]) # num = 0 # for dirpath, dirnames, filenames in os.walk(cam1_path): # for f in filenames: # # if num==200: # # break # print f # if '.jpg' in f: # cam1_image_list.append(join(dirpath, f)) # # num+=1 # print "cam1 images num:" # print len(cam1_image_list) #save_obj(cam1_image_list, "query_list") cam1_image_list = load_obj("query_list") print("cam1_image_list is", type(cam1_image_list)) for i in range(len(cam1_image_list)): # len(cam1_image_list)): #print(i) id = cam1_image_list[i].split('/')[-2] candidate_path_id = candidate_path + id # print("mkdir is ",candidate_path_id) if not os.path.exists(str(candidate_path_id)): os.system('mkdir ' + candidate_path_id) candidate_path_ids.append(candidate_path_id) num_match = len(cam1_image_list) // 8 #num_match = len(cam1_image_list) // image_batch print("batch size is:", num_match) for i in range(8): if i == 7: multiprocessing_match.append(cam1_image_list[i * num_match:]) else: multiprocessing_match.append( cam1_image_list[i * num_match:(i + 1) * num_match]) # for i in range(num_match): # if i == num_match-1: # multiprocessing_match.append(cam1_image_list[i * num_match:]) # else: # multiprocessing_match.append(cam1_image_list[i * num_match:(i + 1) * num_match]) #register_feature = multiprocessing.Manager().dict() # # #tic = timeit.default_timer() # plist_register = [] # for count in range(8): # #length=count*average_register # p = multiprocessing.Process(target=extract_register,args=(multiprocessing_register[count],count,register_feature,count%4)) #,average_register,register_feature,count_register)) # p.start() # plist_register.append(p) # # for p_register in plist_register: # p_register.join() #register_feature=load_obj("register111") # print("--------------------",len(register_feature)) # print("register feature is %d" % len(register_feature)) # dict_register=dict(register_feature) # del register_feature #save_obj(dict_register,"gallery2") # toc = timeit.default_timer() # print('register time: %.2f' % ((toc - tic) * 1000)) tic = timeit.default_timer() match_feature = multiprocessing.Manager().dict() plist_match = [] for j in range(8): p = multiprocessing.Process( target=extract_query, args=(multiprocessing_match[j], j, match_feature, j % 4) ) # ,match_feature)) #,average_match,count_match,register_out)) p.start() plist_match.append(p) for p_match in plist_match: p_match.join() del plist_match # for j in range(0,num_match,4): # print j # plist_match = [] # process = num_match - j if (j + 4) > num_match else 4 # for i in range(process): # p = multiprocessing.Process(target=extract_query, args=(multiprocessing_match[j+i],i,match_feature,i)) # ,match_feature)) #,average_match,count_match,register_out)) # p.start() # plist_match.append(p) # # for p_match in plist_match: # p_match.join() # del plist_match toc = timeit.default_timer() print(len(match_feature)) dict_match = dict(match_feature) del match_feature save_obj(dict_match, "query2") dict_register = load_obj("gallery2") cam2_image_list = load_obj("gallery_list") print("cam2_image_list is {},dict_register is {}".format( len(cam2_image_list), len(dict_register))) #exit(0) # match_feature=load_obj("query") # print(type(match_feature)) # print(len(match_feature)) #print(match_feature1) print('match time: %.2f' % ((toc - tic) * 1000)) print("match feature is %d" % len(dict_match)) dataset = np.array(dict_register.values()) del dict_register print("dataset is {},type is {}".format(len(dataset), type(dataset))) query = np.array(dict_match.values()) del dict_match print("query is {},type is {}".format(len(query), type(query))) index = pykgraph.KGraph(dataset, 'euclidean') # another option is 'angular' del dataset index.build(reverse=-1) # #index.save("index_file.txt") tic = timeit.default_timer() knn = index.search(query, K=1) del query toc = timeit.default_timer() print('match time: %.2f' % ((toc - tic) * 1000)) #print('match time: %.2f' % ((toc - tic) * 1000/len(query))) print(len(cam2_image_list)) print(cam1_image_list[0]) print(len(knn)) for i, index in enumerate(knn): print(i, index[0]) #print('cp ' + cam2_image_list[index[0]] + ' ' +candidate_path+cam1_image_list[i].split('/')[-2]+"/") os.system('cp ' + cam2_image_list[index[0]] + ' ' + candidate_path + cam1_image_list[i].split('/')[-2] + "/")