def Parallel(): from pathos.helpers import mp process = [ mp.Process(target=send_to_serv, args=(file, )), mp.Process(target=send_to_serv_2, args=(file_2, )), mp.Process(target=send_to_serv_3, args=(file_3, )) ] r1 = map(lambda p: p.start(), process) r2 = map(lambda p: p.join(), process) r3 = map(lambda p: p.join(), process) r1 = list(r1) r1 = list(r2) r1 = list(r3)
def watch(self, callback=lambda *args: None): assert self.connected, 'Not connected to server!' print('Starting to watch!') def watch_helper(display_queue, server_ip, queue_result_name, callback): channel = get_channel(server_ip, queue_result_name) def callback_rmq(channel, method, properties, body): display_queue.put({ 'src': 'output', 'data': pickle.loads(body) }) callback(body) channel.basic_consume( callback_rmq, queue=queue_result_name, no_ack=True) channel.start_consuming() process = mp.Process(target=watch_helper, args=( self.display_queue, self.server_ip, self.queue_result_name, callback, )) process.start() return
def queue(self, input_dir='.', tr=2000, loop=True, watch=False): assert self.connected, 'Not connected to server!' def queue_helper(display_queue, server_ip, queue_work_name, input_dir, tr, loop): # NOTE: Each process needs its own set of file descriptors channel = get_channel(server_ip, queue_work_name) paths = get_paths(input_dir, self.conf['extensions']) while True: for path in paths: channel.basic_publish( exchange='', routing_key=queue_work_name, body=pickle.dumps(nibabel.load(path).get_data()) ) display_queue.put({ 'src': 'input', 'data': path }) time.sleep(float(tr / 1000)) if not loop: break process = mp.Process(target=queue_helper, args=( self.display_queue, self.server_ip, self.queue_work_name, input_dir, tr, loop, )) process.start() return
def run(self): ''' **************** -2: Error -1: Timeout 0 : Wrong Answer 1 : Passed **************** ''' if self.Error: return [[-1, -2, str(self.ee)]] ans = thread.Queue() job = [] for case in range(len(self.input)): # sent[case] = False # print(self.input[case]) # print(self.expected[case]) T = thread.Process(target=self.run_helper, args=(self.input[case], self.expected[case], case, ans)) job.append(T) T.start() T.join(self.timeLimit) if T.is_alive(): print('case%d: Timeout' % (case + 1)) # self.update_result(-1) # -1 for timeout ans.put([case, -1]) T.terminate() T.join() return [ans.get() for j in job]
def _launch_processes(self): for i in range(len(self._processes)): self.open_ipc_connection(i) self._processes[i] = mp.Process( target=_shm_launch_domain_server_, args=[ self._domain_factory, self._lambdas, i, self._shm_proxy.copy(), dict(self._shm_registers), dict(self._shm_types), dict(self._shm_sizes), self._rsize, list(self._shm_arrays), list(self._shm_lambdas), list(self._shm_names), list(self._shm_params), self._initializations[i], self._activations[i], self._dones[i], self._conditions[i], self._ipc_connections[i] if self._ipc_notify else None, logger, ], ) self._processes[i].start() # Waits for all jobs to be launched and waiting each for requests for i in range(len(self._processes)): with self._conditions[i]: self._conditions[i].wait_for( lambda: bool(self._initializations[i].value) == True )
def _launch_processes(self): for i in range(len(self._job_results)): self.open_ipc_connection(i) pparent, pchild = mp.Pipe() self._waiting_jobs[i] = pparent self._processes[i] = mp.Process( target=_launch_domain_server_, args=[ self._domain_factory, self._lambdas, i, self._job_results, pchild, self._initializations[i], self._conditions[i], self._ipc_connections[i] if self._ipc_notify else None, logger, ], ) self._processes[i].start() # Waits for all jobs to be launched and waiting each for requests for i in range(len(self._job_results)): with self._conditions[i]: self._conditions[i].wait_for( lambda: bool(self._initializations[i].value) == True )
def _spawn_job_thread(self, job_id, prevhash, coinb1, coinb2, merkle_branches, version, nbits, ntime): '''Stops any previous job and begins a new job.''' # Stop the old job (if any) self._stop_job() # Create the new job self._job = self._subscription.create_job( job_id=job_id, prevhash=prevhash, coinb1=coinb1, coinb2=coinb2, merkle_branches=merkle_branches, version=version, nbits=nbits, ntime=ntime ) def run(s): try: for result in self._job.mine(s, self._thread_count): params = [self._subscription.worker_name] + [result[k] for k in ('job_id', 'extranounce2', 'ntime', 'nounce')] self.send(method='mining.submit', params=params) log("Found share: " + str(params), LEVEL_INFO) log("%d thread - Hashrate: %s" % (s, human_readable_hashrate(self._job.hashrate)), LEVEL_INFO) except Exception as e: log("ERROR: %s" % e, LEVEL_ERROR) for i in range(0, self._thread_count): processes[i] = multiprocess.Process(target=run, args=(i,), daemon = True) processes[i].start()
def start(self): max_queue_size = 1 if self.ordered else self.max_queue_size // 2 self.queue = multip.Queue( maxsize=max_queue_size) if self.multiprocess else Queue.Queue( maxsize=self.max_queue_size) # Flag used for keeping values in completed queue in order self.last_completed_job = multip.Value('i', -1) self.exit = multip.Event() if self.multiprocess and self.ordered: self.cache_queue = Queue.Queue(maxsize=self.max_queue_size) def batcher(queue, cache_queue): while not self.exit.is_set(): job_index, item = queue.get() cache_queue.put((job_index, item)) time.sleep(0.0001) #to be sure.. # As Queues in Python are __!__NOT__!__ First in first out in a multiprocessing setting # We use a seperate thread to synchronously put them in order p = Thread(target=batcher, args=(self.queue, self.cache_queue), name='Synchronous batcher worker') p.daemon = True p.start() else: self.cache_queue = self.queue # Start worker processes or threads for i in xrange(self.n_producers): name = "ContinuousParallelBatchIterator worker {0}".format(i) if self.multiprocess: p = multip.Process(target=_produce_helper, args=(i, self.generator, self.job_queue, self.queue, self.last_completed_job, self.ordered, self.exit), name=name) else: p = Thread(target=_produce_helper, args=(i, self.generator, self.job_queue, self.queue, self.last_completed_job, self.ordered, self.exit), name=name) # Make the process daemon, so the main process can die without these finishing p.daemon = True p.start() self.started = True
def compute(self): keys = list(self.context.keys()) keys.sort(key=itemgetter(0)) for k, gr in groupby(keys, key=itemgetter(0)): sp_queue.put(gr) # for proc in range(2): hop = mp.Process(target=self.worker1, args=(sp_queue, cmp_queue)) hop.start() hop.join()
def processor(self): """A method to execute load - a separate process is spawned for each action.""" pipe_list = [] actions_processes = [] for action in list(self.load['actions'].keys()): recv_end, send_end = mp.Pipe(False) proc = mp.Process(target=self.worker, args=(action, send_end)) actions_processes.append(proc) pipe_list.append(recv_end) for proc in actions_processes: proc.start() for proc in actions_processes: proc.join()
def test_solve_python(solver_python): # We launch each algorithm in a separate process in order to avoid the various # algorithms to initialize different versions of the OpenMP library in the same # process (since our C++ hub algorithms and other algorithms like PPO2 - via torch - # might link against different OpenMP libraries) pparent, pchild = mp.Pipe(duplex=False) p = mp.Process(target=do_test_python, args=(solver_python, pchild,)) p.start() r = pparent.recv() p.join() p.close() pparent.close() assert r
def start(self): # print(request.form) self.experimentOpts = pickle.loads(request.get_data()) self.logger.info('Starting experiment %s' % self.experimentOpts['name']) # TODO: this is pretty terrible and I don't even bother joining process = mp.Process(target=Launcher, args=( self.experimentOpts['queue_work_name'], self.experimentOpts['queue_result_name'], self.experimentOpts.get('experiment_data'), )) process.start() return 'Successfully started!', 200
def _launch_processes(self): for i in range(len(self._job_results)): self.open_ipc_connection(i) pparent, pchild = mp.Pipe() self._waiting_jobs[i] = pparent self._processes[i] = mp.Process( target=_launch_domain_server_, args=[ self._domain_factory, self._lambdas, i, self._job_results, pchild, self._ipc_connections[i] if self._ipc_notify else None, logger ]) self._processes[i].start() # Waits for all jobs to be launched and waiting each for requests while True in set(self._active_domains): continue
def start_session(self, ipc_notify=False): if not self._ongoing_session: self._ongoing_session = True for i in range(len(self._job_results)): pparent, pchild = Pipe() self._waiting_jobs[i] = pparent self._processes[i] = mp.Process( target=_launch_domain_server_, args=[ self._domain_factory, i, self._lock, self._active_domains, self._job_results, pchild, self._ipc_connections[i] if ipc_notify else None, logger ]) self._processes[i].start() # Waits for all jobs to be launched and waiting each for requests while True in set(self._active_domains): continue
def queue(self, input_dir='.', tr=2000, loop=True, watch=False): assert self.connected, 'Not connected to server!' # NOTE: Rely on closures to pass arguments, yes it's bad. def publish_func(publish_message): paths = get_paths(input_dir, self.conf['extensions']) while True: for path in paths: publish_message(pickle.dumps( nibabel.load(path).get_data())) self.display_queue.put({'src': 'input', 'data': path}) time.sleep(float(tr / 1000)) if not loop: break def queue_helper(): # NOTE: Each process needs its own set of file descriptors # publisher = Publisher( # self.amqp, # queue=self.queue_work_name, # routing_key=self.queue_work_name, # publish_func=publish_func # ) # publisher.run() channel = get_channel(self.server_ip, self.queue_work_name) paths = get_paths(input_dir, self.conf['extensions']) while True: for path in paths: # publish_message(pickle.dumps( # nibabel.load(path).get_data())) channel.basic_publish(exchange='message', routing_key=self.queue_work_name, body=pickle.dumps( nibabel.load(path).get_data())) self.display_queue.put({'src': 'input', 'data': path}) time.sleep(float(tr / 1000)) if not loop: break process = mp.Process(target=queue_helper) process.start() return
def start_session(self, ipc_notify=False): if not self._ongoing_session: self._ongoing_session = True for i in range(len(self._processes)): self._processes[i] = mp.Process( target=_shm_launch_domain_server_, args=[ self._domain_factory, i, self._active_domains, self._shm_proxy.copy(), dict(self._shm_registers), dict(self._shm_types), dict(self._shm_sizes), self._rsize, list(self._shm_arrays), list(self._shm_names), list(self._shm_params), self._conditions[i], self._ipc_connections[i] if ipc_notify else None, logger ]) self._processes[i].start() # Waits for all jobs to be launched and waiting each for requests while True in set(self._active_domains): continue
def watch(self, callback=lambda *args: None): assert self.connected, 'Not connected to server!' print('Starting to watch!') def watch_helper(): def callback_rmq(channel, method, properties, body): self.display_queue.put({ 'src': 'output', 'data': pickle.loads(body) }) callback(body) consumer = Consumer(self.amqp, queue=self.queue_result_name, routing_key=self.queue_result_name, callback=callback_rmq) consumer.run() process = mp.Process(target=watch_helper) process.start() return
def run_full_exp_parallel_smp(self, save=True): if 'pathos' in sys.modules: n_cpus = os.cpu_count() remaining_runs = deepcopy(self.n_runs) while self.finished is False: round_start_time = time.time() n_round_runs = np.min([n_cpus, remaining_runs]) processes = [0] * n_round_runs # queue = pathos_multiprocess.Queue() queues = [0] * n_round_runs for queue_no in range(len(queues)): queues[queue_no] = pathos_multiprocess.Queue() for proc_no in range(n_round_runs): bayes_optimiser = deepcopy( self.bayes_opt_configs[self.current_config_no]) seed = int(torch.randint(1, int(2**32 - 1), (1, ))) processes[proc_no] = pathos_multiprocess.Process( target=self.run_rep_parallel, args=(bayes_optimiser, queues[proc_no], self.current_config_no, self.current_rep, seed)) processes[proc_no].start() if self.current_rep < self.repetitions - 1: self.current_rep += 1 elif self.current_config_no < self.n_configs - 1: self.current_rep = 0 self.current_config_no += 1 else: self.finished = True jobs_running = True procs_status = [5] * len(processes) last_waiting_n = 10e10 while jobs_running: for proc_no, process in enumerate(processes): process.join(timeout=1) if process.is_alive(): procs_status[proc_no] = 1 else: procs_status[proc_no] = 0 for queue in queues: while not queue.empty(): message = queue.get() config_ind, rep_ind, vals, best_vals = message # config_ind, rep_ind, best_vals = message # self.bayes_opts[config_ind][rep_ind] = b_opt self.vals[config_ind][rep_ind] = deepcopy(vals) self.best_vals[config_ind][rep_ind] = deepcopy( best_vals) waiting_n = np.sum(np.count_nonzero(procs_status)) if last_waiting_n != waiting_n: current_time = time.time() elapsed_time = (current_time - round_start_time) / 60.0 print( f"Waited for {elapsed_time} minutes in this round, " f"for {waiting_n} processes out of {len(processes)}", flush=True) last_waiting_n = deepcopy(waiting_n) if np.sum(procs_status) < 1: jobs_running = False remaining_runs -= n_round_runs self.print_status() if save: self.save_experiment() else: print( "Could not run experiment in parallel because pathos is not imported" "This is probably because it isn't installed.")
# multi-proc setup manager = mp.Manager() # shared memory queue = manager.Queue() for p in xrange(ncpu): if p == ncpu - 1: curr_mut_list = all_mutation_list[p*increment:] else: curr_mut_list = all_mutation_list[p*increment:(p*increment) + increment] if len(curr_mut_list) == 0: continue proc = mp.Process(target=perform_foldx, args=(curr_mut_list, p, queue)) proc.start() Processes.append(proc) time.sleep(5) # collect results to dictionary for p in xrange(len(Processes)): mut_to_foldxresults.update(queue.get()) for proc in Processes: proc.join() # count pt_to_stability_change_count = {} for pt, mutation_to_count in pt_to_mutation_count.items():
def node_indexing(self): ''' 1) Index tree nodes by level-order. 2) Annotate node id to tree string. 3) Get leaf to node distances. 4) Calculate pairwise inter-node distances using leaf to node distances 5) Calculate mean distance of child-nodes of each node to root ''' tree_string = self.tree_object.write( format=5) # append node id annotation node_to_leaves = {} nindex_to_node = {} node_to_nindex = {} node_to_parent_node = {} # binary indicating that treeinfo file was parsed self.treeinfo_file_given = 0 if len(self.leaf_dist_to_node) > 0: self.treeinfo_file_given = 1 else: if self.no_treeinfo: print('\nWARNING: NO TREEINFO FILE WILL BE GENERATED.\n') # level-order traversal print('\nIndexing internal nodes...') for n, node in enumerate(self.tree_object.traverse()): if node.is_leaf(): continue nindex_to_node[n] = node node_to_nindex[node] = n # get parent node (except for root) try: node_to_parent_node[n] = node_to_nindex[node.up] except: pass # node annotation for final tree output node_string = re.sub('[^\)]+$', '', node.write(format=5)) tree_string = tree_string.replace( node_string, '{}[&NODE_ID={}]'.format(node_string, n)) # multi-proc setup manager = mp.Manager() # shared memory leaf_dist_to_node_queue = manager.Queue() node_to_leaves_queue = manager.Queue() # generate processes processes = [] nindex_list = nindex_to_node.keys()[:] shuffle(nindex_list) # shuffle to make multi-processes more equitable increment = int(len(nindex_list) / self.cores) for p in range(self.cores): if p == self.cores - 1: curr_nindex_list = nindex_list[p * increment:] else: curr_nindex_list = nindex_list[p * increment:(p * increment) + increment] #for n, node in nindex_to_node.items(): proc = mp.Process(target=self.get_leaf_distance_to_node, args=(curr_nindex_list, [ nindex_to_node[n] for n in curr_nindex_list ], leaf_dist_to_node_queue, node_to_leaves_queue)) processes.append(proc) proc.start() # collect results to dictionary for p in range(len(processes)): node_to_leaves.update(node_to_leaves_queue.get()) for leaf_key, list_value in leaf_dist_to_node_queue.get().items(): for (n, distance) in list_value: try: self.leaf_dist_to_node[leaf_key][n] = distance except: self.leaf_dist_to_node[leaf_key] = {n: distance} # wait for all processes to end for proc in processes: proc.join() # write to treeinfo file if self.treeinfo_file_given < 1 and self.no_treeinfo == False: print('Writing to treeinfo file...') output = open(self.treeinfo_fname, 'w') json.dump(self.leaf_dist_to_node, output) output.write('\n') output.close() """ # legacy single-thread code node_to_leaves_single = {} leaf_dist_to_node_single = {} # level-order traversal for n, node in enumerate(self.tree_object.traverse()): if node.is_leaf(): continue # distance of leaf to each of its ancestral node for leaf_node in node.get_leaves(): leaf = leaf_node.name dist = leaf_node.get_distance(node) try: leaf_dist_to_node_single[leaf][n] = dist except: leaf_dist_to_node_single[leaf] = {n: dist} # sort leaves by distance to node in reverse-order node_to_leaves_single[n] = sorted(node.get_leaf_names(), key=lambda leaf: self.leaf_dist_to_node[leaf][n], reverse=True) # check single vs multi-proc print ('Keys for leaf_dist_to_node: {}'.format(set(leaf_dist_to_node_single.keys()) == set(self.leaf_dist_to_node.keys()))) for leaf, node_to_dist in self.leaf_dist_to_node.items(): if set(node_to_dist.keys()) != set(leaf_dist_to_node_single[leaf].keys()): print (leaf, set(node_to_dist.keys())^set(leaf_dist_to_node_single[leaf].keys())) for node, dist in node_to_dist.items(): if dist != leaf_dist_to_node_single[leaf][node]: print (leaf, dist, leaf_dist_to_node_single[leaf][node]) print ('Keys for node_to_leaves: {}'.format(set(node_to_leaves.keys()) == set(node_to_leaves_single.keys()))) for node, leaves in node_to_leaves.items(): if leaves != node_to_leaves_single[node]: print (node) print (leaves) print (node_to_leaves_single[node]) print ('\n') """ # get ancestral nodepair to dist # legacy single thread code (faster) ancestral_nodepair_to_dist = {} for leaf, node_to_dist in self.leaf_dist_to_node.items(): ancestors_of_leaf = node_to_dist.keys()[:] for (i, j) in itertools.combinations(ancestors_of_leaf, 2): if (i in ancestral_nodepair_to_dist and j in ancestral_nodepair_to_dist[i]) or ( j in ancestral_nodepair_to_dist and i in ancestral_nodepair_to_dist[j]): continue else: ij_dist = abs(node_to_dist[i] - node_to_dist[j]) try: ancestral_nodepair_to_dist[i][j] = ij_dist except: ancestral_nodepair_to_dist[i] = {j: ij_dist} try: ancestral_nodepair_to_dist[j][i] = ij_dist except: ancestral_nodepair_to_dist[j] = {i: ij_dist} # get sibling nodepair to dist # legacy single thread code here (faster) sibling_nodepair_to_dist = {} for (i, j) in itertools.combinations(ancestral_nodepair_to_dist.keys(), 2): if (i in ancestral_nodepair_to_dist and j in ancestral_nodepair_to_dist[i]) or ( i in sibling_nodepair_to_dist and j in sibling_nodepair_to_dist[i]) or ( j in sibling_nodepair_to_dist and i in sibling_nodepair_to_dist[j]): continue else: ancestors_to_i = [ node for node in ancestral_nodepair_to_dist[i].keys() if node < i ] ancestors_to_j = [ node for node in ancestral_nodepair_to_dist[j].keys() if node < j ] common_ancestors = sorted( set(ancestors_to_i) & set(ancestors_to_j)) common_ancestor = common_ancestors[-1] ij_dist = ancestral_nodepair_to_dist[i][ common_ancestor] + ancestral_nodepair_to_dist[j][ common_ancestor] try: sibling_nodepair_to_dist[i][j] = ij_dist except: sibling_nodepair_to_dist[i] = {j: ij_dist} try: sibling_nodepair_to_dist[j][i] = ij_dist except: sibling_nodepair_to_dist[j] = {i: ij_dist} nodepair_to_dist = ancestral_nodepair_to_dist.copy() for i in nodepair_to_dist.keys(): nodepair_to_dist[i][i] = 0 try: nodepair_to_dist[i].update(sibling_nodepair_to_dist[i]) except: continue # get mean distance of children nodes of each node to root node_to_mean_child_dist2root = { n: np.mean([ self.leaf_dist_to_node[child.name][0] if child.is_leaf() else nodepair_to_dist[node_to_nindex[child]][0] for child in nindex_to_node[n].get_children() ]) for n in node_to_leaves.keys() } return tree_string, node_to_leaves, nindex_to_node, node_to_nindex, self.leaf_dist_to_node, nodepair_to_dist, node_to_parent_node, node_to_mean_child_dist2root
def pwdist_dist_and_ancestral_trace(self, node_to_leaves, nindex_to_node, node_to_nindex, node_to_mean_child_dist2root, nodepair_to_dist): ''' 1) Get pairwise distances of all leaves 2) Get ancestral/descendant traces 3) Get pairwise distance distributions of nodes 4) Get leaf to ancestor trace 5) Get mean child-nodes distance to ancestral trace ''' #! -- multiprocessing: calculate pairwise leaf distance -- # def get_pw_leaf_dist(lp_list, queue): lp_to_dist = {} for (x, y) in lp_list: lp_to_dist[(x.name, y.name)] = lp_to_dist[(y.name, x.name)] = x.get_distance(y) queue.put(lp_to_dist) # ! -- multiprocessing: calculate pairwise leaf distance -- # # get pairwise sequence patristic distance if self.treeinfo_file_given < 1: print('\nParsing all pairwise distances between leaves...') """ # multi-proc setup (pool) pool = mp.Pool(processes=self.cores) result = pool.map(get_pw_leaf_dist, list(itertools.combinations(self.tree_object.get_leaves(), 2))) for (leaf_x, leaf_y, dist) in result: self.leafpair_to_distance[(leaf_x, leaf_y)] = self.leafpair_to_distance[(leaf_y, leaf_x)] = dist """ # multi-proc setup manager = mp.Manager() # shared memory leafpair_to_distance_queue = manager.Queue() # generate processes processes = [] leafpair_list = list( itertools.combinations(self.tree_object.get_leaves(), 2)) increment = int(len(leafpair_list) / self.cores) for p in range(self.cores): if p == self.cores - 1: curr_leafpair_list = leafpair_list[p * increment:] else: curr_leafpair_list = leafpair_list[p * increment:(p * increment) + increment] #for n, node in nindex_to_node.items(): proc = mp.Process(target=get_pw_leaf_dist, args=(curr_leafpair_list, leafpair_to_distance_queue)) processes.append(proc) proc.start() # collect results to dictionary for p in range(len(processes)): self.leafpair_to_distance.update( leafpair_to_distance_queue.get()) # wait for all processes to end for proc in processes: proc.join() if self.no_treeinfo == False: print('Writing to treeinfo file...') output = open(self.treeinfo_fname, 'a') json.dump(self.remap_keys(self.leafpair_to_distance), output) output.write('\n') output.close() """# single thread legacy code leafpair_to_distance_single = {} for x, y in itertools.combinations(self.tree_object.get_leaves(), 2): leaf_x = x.name leaf_y = y.name dist = x.get_distance(y) leafpair_to_distance_single[(leaf_x, leaf_y)] = leafpair_to_distance_single[(leaf_y, leaf_x)] = dist print ('Keys to leafpair_to_distance: {}'.format(set(self.leafpair_to_distance.keys()) == set(leafpair_to_distance_single.keys()))) for (leaf_x, leaf_y), dist in self.leafpair_to_distance.items(): if dist != leafpair_to_distance_single[(leaf_x, leaf_y)]: print (leaf_x, leaf_y, dist, leafpair_to_distance_single[(leaf_x, leaf_y)])""" node_to_ancestral_nodes = {} node_to_descendant_nodes = {} node_to_pwdist = {} node_to_mean_pwdist = {} leaf_to_ancestors = {} node_to_mean_child_dist2anc = {} # get ancestry and pairwise sequence distance distribution (single thread code faster) print('\nSorting lineages and PWD distributions...') for n in sorted(node_to_mean_child_dist2root, key=node_to_mean_child_dist2root.get): leaves = node_to_leaves[n] mean_dist2root = node_to_mean_child_dist2root[n] # get leaf to ancestor nodes subtending it for leaf in leaves: try: leaf_to_ancestors[leaf].append(n) except: leaf_to_ancestors[leaf] = [n] ancestors_to_n = [ node_to_nindex[anc] for anc in nindex_to_node[n].iter_ancestors() ] node_to_ancestral_nodes[n] = ancestors_to_n for anc in ancestors_to_n: try: node_to_descendant_nodes[anc].append(n) except: node_to_descendant_nodes[anc] = [n] try: node_to_mean_child_dist2anc[n][ anc] = mean_dist2root - nodepair_to_dist[anc][0] except: node_to_mean_child_dist2anc[n] = { anc: mean_dist2root - nodepair_to_dist[anc][0] } pwdist = sorted([ self.leafpair_to_distance[(x, y)] for (x, y) in itertools.combinations(leaves, 2) ]) node_to_pwdist[n] = pwdist node_to_mean_pwdist[n] = np.mean(pwdist) return self.leafpair_to_distance, node_to_pwdist, node_to_mean_pwdist, node_to_ancestral_nodes, node_to_descendant_nodes, leaf_to_ancestors, node_to_mean_child_dist2anc
def get_global_pval(self, hytest_method, node_to_leaves, node_to_ancestral_nodes, node_to_pwdist): #, leafpair_to_distance): ''' Perform all inter-clusters' hypotheses tests ''' if self.treeinfo_file_given < 1: from ctypes import c_char_p import os print('\nPerforming {} tests...'.format(hytest_method)) # shared memory lpd = self.leafpair_to_distance max_node = max(node_to_leaves.keys()) if os.name == 'nt': # windows node_to_leaves_shared = [ node_to_leaves[n] if n in node_to_leaves.keys() else False for n in range(max_node + 1) ] else: node_to_leaves_shared = [ mp.Array(c_char_p, node_to_leaves[n]) if n in node_to_leaves.keys() else False for n in range(max_node + 1) ] node_to_ancestral_nodes_shared = [ mp.Array('i', node_to_ancestral_nodes[n]) if n in node_to_ancestral_nodes else False for n in range(max_node + 1) ] node_to_pwdist_shared = [ mp.Array('d', node_to_pwdist[n]) if n in node_to_leaves.keys() else False for n in range(max_node + 1) ] # worker def get_interclus_pval(np_list, ntl_dict, ntan_dict, ntpwd_dict, q): currp_np_to_pval = {} for (i, j) in np_list: if (ntan_dict[j] != False and i in list(ntan_dict[j])) or ( ntan_dict[i] != False and j in list(ntan_dict[i])): pval = inter_cluster_hytest( list(ntpwd_dict[i]), list(ntpwd_dict[j])).hytest(hytest_method) else: ij_pwdist = sorted([ lpd[(x, y)] for x, y in itertools.combinations( list(set(ntl_dict[i]) | set(ntl_dict[j])), 2) ]) # take the conservative (max) p-value comparing node i/j individually to i+j pval = max([ inter_cluster_hytest( list(ntpwd_dict[i]), ij_pwdist).hytest(hytest_method), inter_cluster_hytest( list(ntpwd_dict[j]), ij_pwdist).hytest(hytest_method) ]) currp_np_to_pval[(i, j)] = pval q.put(currp_np_to_pval) # multi-proc setup manager = mp.Manager() # shared memory queue = manager.Queue() # generate processes processes = [] # split nodepair list into ncpu sets nodepair_list = list( itertools.combinations(node_to_leaves.keys(), 2)) shuffle(nodepair_list) # shuffle to make more equitable increment = int(len(nodepair_list) / self.cores) for p in range(self.cores): if p == self.cores - 1: curr_nodepair_list = nodepair_list[p * increment:] else: curr_nodepair_list = nodepair_list[p * increment:(p * increment) + increment] proc = mp.Process(target=get_interclus_pval, args=(curr_nodepair_list, node_to_leaves_shared, node_to_ancestral_nodes_shared, node_to_pwdist_shared, queue)) processes.append(proc) proc.start() # collect results to dictionary for p in range(len(processes)): self.nodepair_to_pval.update(queue.get()) # wait for all processes to end for proc in processes: proc.join() if self.no_treeinfo == False: print('Writing to treeinfo file...') output = open(self.treeinfo_fname, 'a') json.dump(self.remap_keys(self.nodepair_to_pval), output) output.write('\n') output.close() """ # single thread legacy code nodepair_to_pval_single = {} for i,j in itertools.combinations(node_to_leaves.keys(), 2): if (j in node_to_ancestral_nodes and i in node_to_ancestral_nodes[j]) or (i in node_to_ancestral_nodes and j in node_to_ancestral_nodes[i]): pval = inter_cluster_hytest(node_to_pwdist[i], node_to_pwdist[j]).hytest(hytest_method) else: ij_pwdist = sorted([leafpair_to_distance[(x,y)] for x,y in itertools.combinations(list(set(node_to_leaves[i])|set(node_to_leaves[j])), 2)]) # take the conservative (max) p-value comparing node i/j individually to i+j pval = max([inter_cluster_hytest(node_to_pwdist[i], ij_pwdist).hytest(hytest_method), inter_cluster_hytest(node_to_pwdist[j], ij_pwdist).hytest(hytest_method)]) nodepair_to_pval_single[(i,j)] = pval # check print ('Sets of nodepair_to_pval: {}'.format(set(self.nodepair_to_pval.keys()) == set(nodepair_to_pval_single.keys()))) for (i, j), pval in self.nodepair_to_pval.items(): if pval != nodepair_to_pval_single[(i,j)]: print (i, j, pval, nodepair_to_pval_single[(i, j)]) """ return self.nodepair_to_pval