Ejemplo n.º 1
0
def Parallel():
    from pathos.helpers import mp
    process = [
        mp.Process(target=send_to_serv, args=(file, )),
        mp.Process(target=send_to_serv_2, args=(file_2, )),
        mp.Process(target=send_to_serv_3, args=(file_3, ))
    ]
    r1 = map(lambda p: p.start(), process)
    r2 = map(lambda p: p.join(), process)
    r3 = map(lambda p: p.join(), process)
    r1 = list(r1)
    r1 = list(r2)
    r1 = list(r3)
Ejemplo n.º 2
0
    def watch(self, callback=lambda *args: None):
        assert self.connected, 'Not connected to server!'
        print('Starting to watch!')

        def watch_helper(display_queue, server_ip, queue_result_name, callback):
            channel = get_channel(server_ip, queue_result_name)

            def callback_rmq(channel, method, properties, body):
                display_queue.put({
                    'src': 'output',
                    'data': pickle.loads(body)
                })
                callback(body)

            channel.basic_consume(
                callback_rmq,
                queue=queue_result_name,
                no_ack=True)
            channel.start_consuming()

        process = mp.Process(target=watch_helper, args=(
            self.display_queue,
            self.server_ip,
            self.queue_result_name,
            callback,
        ))

        process.start()

        return
Ejemplo n.º 3
0
    def queue(self, input_dir='.', tr=2000, loop=True, watch=False):
        assert self.connected, 'Not connected to server!'

        def queue_helper(display_queue, server_ip, queue_work_name, input_dir, tr, loop):
            # NOTE: Each process needs its own set of file descriptors
            channel = get_channel(server_ip, queue_work_name)
            paths = get_paths(input_dir, self.conf['extensions'])

            while True:
                for path in paths:
                    channel.basic_publish(
                        exchange='',
                        routing_key=queue_work_name,
                        body=pickle.dumps(nibabel.load(path).get_data())
                    )
                    display_queue.put({
                        'src': 'input',
                        'data': path
                    })
                    time.sleep(float(tr / 1000))
                if not loop:
                    break

        process = mp.Process(target=queue_helper, args=(
            self.display_queue,
            self.server_ip,
            self.queue_work_name,
            input_dir,
            tr,
            loop,
        ))

        process.start()

        return
Ejemplo n.º 4
0
 def run(self):
     '''
         ****************
         -2: Error
         -1: Timeout
         0 : Wrong Answer
         1 : Passed
         ****************
     '''
     if self.Error:
         return [[-1, -2, str(self.ee)]]
     ans = thread.Queue()
     job = []
     for case in range(len(self.input)):
         # sent[case] = False
         # print(self.input[case])
         # print(self.expected[case])
         T = thread.Process(target=self.run_helper,
                            args=(self.input[case], self.expected[case],
                                  case, ans))
         job.append(T)
         T.start()
         T.join(self.timeLimit)
         if T.is_alive():
             print('case%d: Timeout' % (case + 1))
             # self.update_result(-1) # -1 for timeout
             ans.put([case, -1])
             T.terminate()
             T.join()
     return [ans.get() for j in job]
Ejemplo n.º 5
0
 def _launch_processes(self):
     for i in range(len(self._processes)):
         self.open_ipc_connection(i)
         self._processes[i] = mp.Process(
             target=_shm_launch_domain_server_,
             args=[
                 self._domain_factory,
                 self._lambdas,
                 i,
                 self._shm_proxy.copy(),
                 dict(self._shm_registers),
                 dict(self._shm_types),
                 dict(self._shm_sizes),
                 self._rsize,
                 list(self._shm_arrays),
                 list(self._shm_lambdas),
                 list(self._shm_names),
                 list(self._shm_params),
                 self._initializations[i],
                 self._activations[i],
                 self._dones[i],
                 self._conditions[i],
                 self._ipc_connections[i] if self._ipc_notify else None,
                 logger,
             ],
         )
         self._processes[i].start()
     # Waits for all jobs to be launched and waiting each for requests
     for i in range(len(self._processes)):
         with self._conditions[i]:
             self._conditions[i].wait_for(
                 lambda: bool(self._initializations[i].value) == True
             )
Ejemplo n.º 6
0
 def _launch_processes(self):
     for i in range(len(self._job_results)):
         self.open_ipc_connection(i)
         pparent, pchild = mp.Pipe()
         self._waiting_jobs[i] = pparent
         self._processes[i] = mp.Process(
             target=_launch_domain_server_,
             args=[
                 self._domain_factory,
                 self._lambdas,
                 i,
                 self._job_results,
                 pchild,
                 self._initializations[i],
                 self._conditions[i],
                 self._ipc_connections[i] if self._ipc_notify else None,
                 logger,
             ],
         )
         self._processes[i].start()
     # Waits for all jobs to be launched and waiting each for requests
     for i in range(len(self._job_results)):
         with self._conditions[i]:
             self._conditions[i].wait_for(
                 lambda: bool(self._initializations[i].value) == True
             )
Ejemplo n.º 7
0
    def _spawn_job_thread(self, job_id, prevhash, coinb1, coinb2, merkle_branches, version, nbits, ntime):
        '''Stops any previous job and begins a new job.'''

        # Stop the old job (if any)
        self._stop_job()
        # Create the new job
        self._job = self._subscription.create_job(
            job_id=job_id,
            prevhash=prevhash,
            coinb1=coinb1,
            coinb2=coinb2,
            merkle_branches=merkle_branches,
            version=version,
            nbits=nbits,
            ntime=ntime
        )

        def run(s):
            try:
                for result in self._job.mine(s, self._thread_count):
                    params = [self._subscription.worker_name] + [result[k] for k in
                                                                 ('job_id', 'extranounce2', 'ntime', 'nounce')]
                    self.send(method='mining.submit', params=params)
                    log("Found share: " + str(params), LEVEL_INFO)
                log("%d thread - Hashrate: %s" % (s, human_readable_hashrate(self._job.hashrate)), LEVEL_INFO)
            except Exception as e:
                log("ERROR: %s" % e, LEVEL_ERROR)

        for i in range(0, self._thread_count):
            processes[i] = multiprocess.Process(target=run, args=(i,), daemon = True)
            processes[i].start()
Ejemplo n.º 8
0
    def start(self):

        max_queue_size = 1 if self.ordered else self.max_queue_size // 2

        self.queue = multip.Queue(
            maxsize=max_queue_size) if self.multiprocess else Queue.Queue(
                maxsize=self.max_queue_size)

        # Flag used for keeping values in completed queue in order
        self.last_completed_job = multip.Value('i', -1)
        self.exit = multip.Event()

        if self.multiprocess and self.ordered:
            self.cache_queue = Queue.Queue(maxsize=self.max_queue_size)

            def batcher(queue, cache_queue):
                while not self.exit.is_set():
                    job_index, item = queue.get()
                    cache_queue.put((job_index, item))

                    time.sleep(0.0001)  #to be sure..

            # As Queues in Python are __!__NOT__!__ First in first out in a multiprocessing setting
            # We use a seperate thread to synchronously put them in order
            p = Thread(target=batcher,
                       args=(self.queue, self.cache_queue),
                       name='Synchronous batcher worker')
            p.daemon = True
            p.start()

        else:
            self.cache_queue = self.queue

        # Start worker processes or threads
        for i in xrange(self.n_producers):
            name = "ContinuousParallelBatchIterator worker {0}".format(i)

            if self.multiprocess:
                p = multip.Process(target=_produce_helper,
                                   args=(i, self.generator, self.job_queue,
                                         self.queue, self.last_completed_job,
                                         self.ordered, self.exit),
                                   name=name)
            else:
                p = Thread(target=_produce_helper,
                           args=(i, self.generator, self.job_queue, self.queue,
                                 self.last_completed_job, self.ordered,
                                 self.exit),
                           name=name)

            # Make the process daemon, so the main process can die without these finishing
            p.daemon = True
            p.start()

        self.started = True
Ejemplo n.º 9
0
    def compute(self):

        keys = list(self.context.keys())
        keys.sort(key=itemgetter(0))

        for k, gr in groupby(keys, key=itemgetter(0)):
            sp_queue.put(gr)

        # for proc in range(2):
        hop = mp.Process(target=self.worker1, args=(sp_queue, cmp_queue))
        hop.start()

        hop.join()
Ejemplo n.º 10
0
 def processor(self):
     """A method to execute load - a separate process is spawned for each action."""
     pipe_list = []
     actions_processes = []
     for action in list(self.load['actions'].keys()):
         recv_end, send_end = mp.Pipe(False)
         proc = mp.Process(target=self.worker, args=(action, send_end))
         actions_processes.append(proc)
         pipe_list.append(recv_end)
     for proc in actions_processes:
         proc.start()
     for proc in actions_processes:
         proc.join()
Ejemplo n.º 11
0
def test_solve_python(solver_python):
    # We launch each algorithm in a separate process in order to avoid the various
    # algorithms to initialize different versions of the OpenMP library in the same
    # process (since our C++ hub algorithms and other algorithms like PPO2 - via torch -
    # might link against different OpenMP libraries)
    pparent, pchild = mp.Pipe(duplex=False)
    p = mp.Process(target=do_test_python, args=(solver_python, pchild,))
    p.start()
    r = pparent.recv()
    p.join()
    p.close()
    pparent.close()
    assert r
Ejemplo n.º 12
0
    def start(self):
        # print(request.form)
        self.experimentOpts = pickle.loads(request.get_data())
        self.logger.info('Starting experiment %s' %
                         self.experimentOpts['name'])

        # TODO: this is pretty terrible and I don't even bother joining
        process = mp.Process(target=Launcher, args=(
            self.experimentOpts['queue_work_name'],
            self.experimentOpts['queue_result_name'],
            self.experimentOpts.get('experiment_data'),
            ))
        process.start()
        return 'Successfully started!', 200
Ejemplo n.º 13
0
 def _launch_processes(self):
     for i in range(len(self._job_results)):
         self.open_ipc_connection(i)
         pparent, pchild = mp.Pipe()
         self._waiting_jobs[i] = pparent
         self._processes[i] = mp.Process(
             target=_launch_domain_server_,
             args=[
                 self._domain_factory, self._lambdas, i, self._job_results,
                 pchild,
                 self._ipc_connections[i] if self._ipc_notify else None,
                 logger
             ])
         self._processes[i].start()
     # Waits for all jobs to be launched and waiting each for requests
     while True in set(self._active_domains):
         continue
Ejemplo n.º 14
0
 def start_session(self, ipc_notify=False):
     if not self._ongoing_session:
         self._ongoing_session = True
         for i in range(len(self._job_results)):
             pparent, pchild = Pipe()
             self._waiting_jobs[i] = pparent
             self._processes[i] = mp.Process(
                 target=_launch_domain_server_,
                 args=[
                     self._domain_factory, i, self._lock,
                     self._active_domains, self._job_results, pchild,
                     self._ipc_connections[i] if ipc_notify else None,
                     logger
                 ])
             self._processes[i].start()
         # Waits for all jobs to be launched and waiting each for requests
         while True in set(self._active_domains):
             continue
Ejemplo n.º 15
0
    def queue(self, input_dir='.', tr=2000, loop=True, watch=False):
        assert self.connected, 'Not connected to server!'

        # NOTE: Rely on closures to pass arguments, yes it's bad.
        def publish_func(publish_message):
            paths = get_paths(input_dir, self.conf['extensions'])
            while True:
                for path in paths:
                    publish_message(pickle.dumps(
                        nibabel.load(path).get_data()))
                    self.display_queue.put({'src': 'input', 'data': path})
                    time.sleep(float(tr / 1000))
                if not loop:
                    break

        def queue_helper():
            # NOTE: Each process needs its own set of file descriptors
            #  publisher = Publisher(
            #  self.amqp,
            #  queue=self.queue_work_name,
            #  routing_key=self.queue_work_name,
            #  publish_func=publish_func
            #  )
            #  publisher.run()
            channel = get_channel(self.server_ip, self.queue_work_name)
            paths = get_paths(input_dir, self.conf['extensions'])

            while True:
                for path in paths:
                    #  publish_message(pickle.dumps(
                    #  nibabel.load(path).get_data()))
                    channel.basic_publish(exchange='message',
                                          routing_key=self.queue_work_name,
                                          body=pickle.dumps(
                                              nibabel.load(path).get_data()))
                    self.display_queue.put({'src': 'input', 'data': path})
                    time.sleep(float(tr / 1000))
                if not loop:
                    break

        process = mp.Process(target=queue_helper)
        process.start()

        return
Ejemplo n.º 16
0
 def start_session(self, ipc_notify=False):
     if not self._ongoing_session:
         self._ongoing_session = True
         for i in range(len(self._processes)):
             self._processes[i] = mp.Process(
                 target=_shm_launch_domain_server_,
                 args=[
                     self._domain_factory, i, self._active_domains,
                     self._shm_proxy.copy(),
                     dict(self._shm_registers),
                     dict(self._shm_types),
                     dict(self._shm_sizes), self._rsize,
                     list(self._shm_arrays),
                     list(self._shm_names),
                     list(self._shm_params), self._conditions[i],
                     self._ipc_connections[i] if ipc_notify else None,
                     logger
                 ])
             self._processes[i].start()
         # Waits for all jobs to be launched and waiting each for requests
         while True in set(self._active_domains):
             continue
Ejemplo n.º 17
0
    def watch(self, callback=lambda *args: None):
        assert self.connected, 'Not connected to server!'
        print('Starting to watch!')

        def watch_helper():
            def callback_rmq(channel, method, properties, body):
                self.display_queue.put({
                    'src': 'output',
                    'data': pickle.loads(body)
                })
                callback(body)

            consumer = Consumer(self.amqp,
                                queue=self.queue_result_name,
                                routing_key=self.queue_result_name,
                                callback=callback_rmq)

            consumer.run()

        process = mp.Process(target=watch_helper)

        process.start()

        return
Ejemplo n.º 18
0
    def run_full_exp_parallel_smp(self, save=True):

        if 'pathos' in sys.modules:

            n_cpus = os.cpu_count()

            remaining_runs = deepcopy(self.n_runs)

            while self.finished is False:

                round_start_time = time.time()
                n_round_runs = np.min([n_cpus, remaining_runs])
                processes = [0] * n_round_runs
                # queue = pathos_multiprocess.Queue()
                queues = [0] * n_round_runs
                for queue_no in range(len(queues)):
                    queues[queue_no] = pathos_multiprocess.Queue()

                for proc_no in range(n_round_runs):
                    bayes_optimiser = deepcopy(
                        self.bayes_opt_configs[self.current_config_no])
                    seed = int(torch.randint(1, int(2**32 - 1), (1, )))
                    processes[proc_no] = pathos_multiprocess.Process(
                        target=self.run_rep_parallel,
                        args=(bayes_optimiser, queues[proc_no],
                              self.current_config_no, self.current_rep, seed))
                    processes[proc_no].start()

                    if self.current_rep < self.repetitions - 1:
                        self.current_rep += 1

                    elif self.current_config_no < self.n_configs - 1:
                        self.current_rep = 0
                        self.current_config_no += 1

                    else:
                        self.finished = True

                jobs_running = True
                procs_status = [5] * len(processes)
                last_waiting_n = 10e10
                while jobs_running:
                    for proc_no, process in enumerate(processes):
                        process.join(timeout=1)
                        if process.is_alive():
                            procs_status[proc_no] = 1
                        else:
                            procs_status[proc_no] = 0

                    for queue in queues:
                        while not queue.empty():
                            message = queue.get()
                            config_ind, rep_ind, vals, best_vals = message
                            # config_ind, rep_ind, best_vals = message

                            # self.bayes_opts[config_ind][rep_ind] = b_opt
                            self.vals[config_ind][rep_ind] = deepcopy(vals)
                            self.best_vals[config_ind][rep_ind] = deepcopy(
                                best_vals)

                    waiting_n = np.sum(np.count_nonzero(procs_status))
                    if last_waiting_n != waiting_n:
                        current_time = time.time()
                        elapsed_time = (current_time - round_start_time) / 60.0
                        print(
                            f"Waited for {elapsed_time} minutes in this round, "
                            f"for {waiting_n} processes out of {len(processes)}",
                            flush=True)
                        last_waiting_n = deepcopy(waiting_n)

                    if np.sum(procs_status) < 1:
                        jobs_running = False

                remaining_runs -= n_round_runs

                self.print_status()

                if save:
                    self.save_experiment()

        else:
            print(
                "Could not run experiment in parallel because pathos is not imported"
                "This is probably because it isn't installed.")
    # multi-proc setup
    manager = mp.Manager()

    # shared memory
    queue = manager.Queue()

    for p in xrange(ncpu):
        if p == ncpu - 1:
            curr_mut_list = all_mutation_list[p*increment:]
        else:
            curr_mut_list = all_mutation_list[p*increment:(p*increment) + increment]

        if len(curr_mut_list) == 0:
            continue

        proc = mp.Process(target=perform_foldx, args=(curr_mut_list, p, queue))
        proc.start()
        Processes.append(proc)
        time.sleep(5)

    # collect results to dictionary
    for p in xrange(len(Processes)):
        mut_to_foldxresults.update(queue.get())

    for proc in Processes:
        proc.join()

    # count
    pt_to_stability_change_count = {}

    for pt, mutation_to_count in pt_to_mutation_count.items():
Ejemplo n.º 20
0
    def node_indexing(self):
        '''
        1) Index tree nodes by level-order.
        2) Annotate node id to tree string.
        3) Get leaf to node distances.
        4) Calculate pairwise inter-node distances using leaf to node distances
        5) Calculate mean distance of child-nodes of each node to root
        '''

        tree_string = self.tree_object.write(
            format=5)  # append node id annotation

        node_to_leaves = {}
        nindex_to_node = {}
        node_to_nindex = {}
        node_to_parent_node = {}

        # binary indicating that treeinfo file was parsed
        self.treeinfo_file_given = 0
        if len(self.leaf_dist_to_node) > 0:
            self.treeinfo_file_given = 1
        else:
            if self.no_treeinfo:
                print('\nWARNING: NO TREEINFO FILE WILL BE GENERATED.\n')

        # level-order traversal
        print('\nIndexing internal nodes...')
        for n, node in enumerate(self.tree_object.traverse()):
            if node.is_leaf():
                continue

            nindex_to_node[n] = node
            node_to_nindex[node] = n

            # get parent node (except for root)
            try:
                node_to_parent_node[n] = node_to_nindex[node.up]
            except:
                pass

            # node annotation for final tree output
            node_string = re.sub('[^\)]+$', '', node.write(format=5))
            tree_string = tree_string.replace(
                node_string, '{}[&NODE_ID={}]'.format(node_string, n))

        # multi-proc setup
        manager = mp.Manager()
        # shared memory
        leaf_dist_to_node_queue = manager.Queue()
        node_to_leaves_queue = manager.Queue()
        # generate processes
        processes = []

        nindex_list = nindex_to_node.keys()[:]
        shuffle(nindex_list)  # shuffle to make multi-processes more equitable
        increment = int(len(nindex_list) / self.cores)

        for p in range(self.cores):
            if p == self.cores - 1:
                curr_nindex_list = nindex_list[p * increment:]
            else:
                curr_nindex_list = nindex_list[p * increment:(p * increment) +
                                               increment]

            #for n, node in nindex_to_node.items():
            proc = mp.Process(target=self.get_leaf_distance_to_node,
                              args=(curr_nindex_list, [
                                  nindex_to_node[n] for n in curr_nindex_list
                              ], leaf_dist_to_node_queue,
                                    node_to_leaves_queue))
            processes.append(proc)
            proc.start()

        # collect results to dictionary
        for p in range(len(processes)):
            node_to_leaves.update(node_to_leaves_queue.get())

            for leaf_key, list_value in leaf_dist_to_node_queue.get().items():
                for (n, distance) in list_value:
                    try:
                        self.leaf_dist_to_node[leaf_key][n] = distance
                    except:
                        self.leaf_dist_to_node[leaf_key] = {n: distance}

        # wait for all processes to end
        for proc in processes:
            proc.join()

        # write to treeinfo file
        if self.treeinfo_file_given < 1 and self.no_treeinfo == False:
            print('Writing to treeinfo file...')
            output = open(self.treeinfo_fname, 'w')
            json.dump(self.leaf_dist_to_node, output)
            output.write('\n')
            output.close()
        """
        # legacy single-thread code
        node_to_leaves_single = {}
        leaf_dist_to_node_single = {}

        # level-order traversal
        for n, node in enumerate(self.tree_object.traverse()):
            if node.is_leaf():
                continue

            # distance of leaf to each of its ancestral node
            for leaf_node in node.get_leaves():
                leaf = leaf_node.name
                dist = leaf_node.get_distance(node)

                try:
                    leaf_dist_to_node_single[leaf][n] = dist
                except:
                    leaf_dist_to_node_single[leaf] = {n: dist}

            # sort leaves by distance to node in reverse-order
            node_to_leaves_single[n] = sorted(node.get_leaf_names(), key=lambda leaf: self.leaf_dist_to_node[leaf][n], reverse=True)

        
        # check single vs multi-proc
        print ('Keys for leaf_dist_to_node: {}'.format(set(leaf_dist_to_node_single.keys()) == set(self.leaf_dist_to_node.keys())))
        for leaf, node_to_dist in self.leaf_dist_to_node.items():
            if set(node_to_dist.keys()) != set(leaf_dist_to_node_single[leaf].keys()):
                print (leaf, set(node_to_dist.keys())^set(leaf_dist_to_node_single[leaf].keys()))

            for node, dist in node_to_dist.items():
                if dist != leaf_dist_to_node_single[leaf][node]:
                    print (leaf, dist, leaf_dist_to_node_single[leaf][node])

        print ('Keys for node_to_leaves: {}'.format(set(node_to_leaves.keys()) == set(node_to_leaves_single.keys())))
        for node, leaves in node_to_leaves.items():
            if leaves != node_to_leaves_single[node]:
                print (node)
                print (leaves)
                print (node_to_leaves_single[node])
                print ('\n')
        """

        # get ancestral nodepair to dist
        # legacy single thread code (faster)
        ancestral_nodepair_to_dist = {}
        for leaf, node_to_dist in self.leaf_dist_to_node.items():
            ancestors_of_leaf = node_to_dist.keys()[:]
            for (i, j) in itertools.combinations(ancestors_of_leaf, 2):

                if (i in ancestral_nodepair_to_dist
                        and j in ancestral_nodepair_to_dist[i]) or (
                            j in ancestral_nodepair_to_dist
                            and i in ancestral_nodepair_to_dist[j]):
                    continue
                else:
                    ij_dist = abs(node_to_dist[i] - node_to_dist[j])

                    try:
                        ancestral_nodepair_to_dist[i][j] = ij_dist
                    except:
                        ancestral_nodepair_to_dist[i] = {j: ij_dist}

                    try:
                        ancestral_nodepair_to_dist[j][i] = ij_dist
                    except:
                        ancestral_nodepair_to_dist[j] = {i: ij_dist}

        # get sibling nodepair to dist
        # legacy single thread code here (faster)
        sibling_nodepair_to_dist = {}
        for (i, j) in itertools.combinations(ancestral_nodepair_to_dist.keys(),
                                             2):
            if (i in ancestral_nodepair_to_dist
                    and j in ancestral_nodepair_to_dist[i]) or (
                        i in sibling_nodepair_to_dist
                        and j in sibling_nodepair_to_dist[i]) or (
                            j in sibling_nodepair_to_dist
                            and i in sibling_nodepair_to_dist[j]):
                continue
            else:
                ancestors_to_i = [
                    node for node in ancestral_nodepair_to_dist[i].keys()
                    if node < i
                ]
                ancestors_to_j = [
                    node for node in ancestral_nodepair_to_dist[j].keys()
                    if node < j
                ]
                common_ancestors = sorted(
                    set(ancestors_to_i) & set(ancestors_to_j))
                common_ancestor = common_ancestors[-1]
                ij_dist = ancestral_nodepair_to_dist[i][
                    common_ancestor] + ancestral_nodepair_to_dist[j][
                        common_ancestor]

                try:
                    sibling_nodepair_to_dist[i][j] = ij_dist
                except:
                    sibling_nodepair_to_dist[i] = {j: ij_dist}

                try:
                    sibling_nodepair_to_dist[j][i] = ij_dist
                except:
                    sibling_nodepair_to_dist[j] = {i: ij_dist}

        nodepair_to_dist = ancestral_nodepair_to_dist.copy()

        for i in nodepair_to_dist.keys():
            nodepair_to_dist[i][i] = 0
            try:
                nodepair_to_dist[i].update(sibling_nodepair_to_dist[i])
            except:
                continue

        # get mean distance of children nodes of each node to root
        node_to_mean_child_dist2root = {
            n: np.mean([
                self.leaf_dist_to_node[child.name][0] if child.is_leaf() else
                nodepair_to_dist[node_to_nindex[child]][0]
                for child in nindex_to_node[n].get_children()
            ])
            for n in node_to_leaves.keys()
        }

        return tree_string, node_to_leaves, nindex_to_node, node_to_nindex, self.leaf_dist_to_node, nodepair_to_dist, node_to_parent_node, node_to_mean_child_dist2root
Ejemplo n.º 21
0
    def pwdist_dist_and_ancestral_trace(self, node_to_leaves, nindex_to_node,
                                        node_to_nindex,
                                        node_to_mean_child_dist2root,
                                        nodepair_to_dist):
        '''
        1) Get pairwise distances of all leaves
        2) Get ancestral/descendant traces
        3) Get pairwise distance distributions of nodes
        4) Get leaf to ancestor trace
        5) Get mean child-nodes distance to ancestral trace
        '''

        #! -- multiprocessing: calculate pairwise leaf distance -- #
        def get_pw_leaf_dist(lp_list, queue):
            lp_to_dist = {}
            for (x, y) in lp_list:
                lp_to_dist[(x.name,
                            y.name)] = lp_to_dist[(y.name,
                                                   x.name)] = x.get_distance(y)

            queue.put(lp_to_dist)

        # ! -- multiprocessing: calculate pairwise leaf distance -- #

        # get pairwise sequence patristic distance
        if self.treeinfo_file_given < 1:
            print('\nParsing all pairwise distances between leaves...')
            """
            # multi-proc setup (pool)
            pool = mp.Pool(processes=self.cores)
            result = pool.map(get_pw_leaf_dist, list(itertools.combinations(self.tree_object.get_leaves(), 2)))

            for (leaf_x, leaf_y, dist) in result:
                self.leafpair_to_distance[(leaf_x, leaf_y)] = self.leafpair_to_distance[(leaf_y, leaf_x)] = dist
            """

            # multi-proc setup
            manager = mp.Manager()
            # shared memory
            leafpair_to_distance_queue = manager.Queue()
            # generate processes
            processes = []

            leafpair_list = list(
                itertools.combinations(self.tree_object.get_leaves(), 2))
            increment = int(len(leafpair_list) / self.cores)

            for p in range(self.cores):
                if p == self.cores - 1:
                    curr_leafpair_list = leafpair_list[p * increment:]
                else:
                    curr_leafpair_list = leafpair_list[p *
                                                       increment:(p *
                                                                  increment) +
                                                       increment]

                #for n, node in nindex_to_node.items():
                proc = mp.Process(target=get_pw_leaf_dist,
                                  args=(curr_leafpair_list,
                                        leafpair_to_distance_queue))
                processes.append(proc)
                proc.start()

            # collect results to dictionary
            for p in range(len(processes)):
                self.leafpair_to_distance.update(
                    leafpair_to_distance_queue.get())

            # wait for all processes to end
            for proc in processes:
                proc.join()

            if self.no_treeinfo == False:
                print('Writing to treeinfo file...')
                output = open(self.treeinfo_fname, 'a')
                json.dump(self.remap_keys(self.leafpair_to_distance), output)
                output.write('\n')
                output.close()
            """# single thread legacy code
            leafpair_to_distance_single = {}
            for x, y in itertools.combinations(self.tree_object.get_leaves(), 2):
                leaf_x = x.name
                leaf_y = y.name
                dist = x.get_distance(y)
                leafpair_to_distance_single[(leaf_x, leaf_y)] = leafpair_to_distance_single[(leaf_y, leaf_x)] = dist

            print ('Keys to leafpair_to_distance: {}'.format(set(self.leafpair_to_distance.keys()) == set(leafpair_to_distance_single.keys())))
            for (leaf_x, leaf_y), dist in self.leafpair_to_distance.items():
                if dist != leafpair_to_distance_single[(leaf_x, leaf_y)]:
                    print (leaf_x, leaf_y, dist, leafpair_to_distance_single[(leaf_x, leaf_y)])"""

        node_to_ancestral_nodes = {}
        node_to_descendant_nodes = {}
        node_to_pwdist = {}
        node_to_mean_pwdist = {}
        leaf_to_ancestors = {}
        node_to_mean_child_dist2anc = {}

        # get ancestry and pairwise sequence distance distribution (single thread code faster)
        print('\nSorting lineages and PWD distributions...')
        for n in sorted(node_to_mean_child_dist2root,
                        key=node_to_mean_child_dist2root.get):
            leaves = node_to_leaves[n]
            mean_dist2root = node_to_mean_child_dist2root[n]

            # get leaf to ancestor nodes subtending it
            for leaf in leaves:
                try:
                    leaf_to_ancestors[leaf].append(n)
                except:
                    leaf_to_ancestors[leaf] = [n]

            ancestors_to_n = [
                node_to_nindex[anc]
                for anc in nindex_to_node[n].iter_ancestors()
            ]

            node_to_ancestral_nodes[n] = ancestors_to_n
            for anc in ancestors_to_n:
                try:
                    node_to_descendant_nodes[anc].append(n)
                except:
                    node_to_descendant_nodes[anc] = [n]

                try:
                    node_to_mean_child_dist2anc[n][
                        anc] = mean_dist2root - nodepair_to_dist[anc][0]
                except:
                    node_to_mean_child_dist2anc[n] = {
                        anc: mean_dist2root - nodepair_to_dist[anc][0]
                    }

            pwdist = sorted([
                self.leafpair_to_distance[(x, y)]
                for (x, y) in itertools.combinations(leaves, 2)
            ])
            node_to_pwdist[n] = pwdist
            node_to_mean_pwdist[n] = np.mean(pwdist)

        return self.leafpair_to_distance, node_to_pwdist, node_to_mean_pwdist, node_to_ancestral_nodes, node_to_descendant_nodes, leaf_to_ancestors, node_to_mean_child_dist2anc
Ejemplo n.º 22
0
    def get_global_pval(self, hytest_method, node_to_leaves,
                        node_to_ancestral_nodes,
                        node_to_pwdist):  #, leafpair_to_distance):
        '''
        Perform all inter-clusters' hypotheses tests
        '''
        if self.treeinfo_file_given < 1:
            from ctypes import c_char_p
            import os

            print('\nPerforming {} tests...'.format(hytest_method))

            # shared memory
            lpd = self.leafpair_to_distance
            max_node = max(node_to_leaves.keys())
            if os.name == 'nt':
                # windows
                node_to_leaves_shared = [
                    node_to_leaves[n] if n in node_to_leaves.keys() else False
                    for n in range(max_node + 1)
                ]
            else:
                node_to_leaves_shared = [
                    mp.Array(c_char_p, node_to_leaves[n])
                    if n in node_to_leaves.keys() else False
                    for n in range(max_node + 1)
                ]
            node_to_ancestral_nodes_shared = [
                mp.Array('i', node_to_ancestral_nodes[n])
                if n in node_to_ancestral_nodes else False
                for n in range(max_node + 1)
            ]
            node_to_pwdist_shared = [
                mp.Array('d', node_to_pwdist[n])
                if n in node_to_leaves.keys() else False
                for n in range(max_node + 1)
            ]

            # worker
            def get_interclus_pval(np_list, ntl_dict, ntan_dict, ntpwd_dict,
                                   q):
                currp_np_to_pval = {}
                for (i, j) in np_list:
                    if (ntan_dict[j] != False and i in list(ntan_dict[j])) or (
                            ntan_dict[i] != False and j in list(ntan_dict[i])):
                        pval = inter_cluster_hytest(
                            list(ntpwd_dict[i]),
                            list(ntpwd_dict[j])).hytest(hytest_method)
                    else:
                        ij_pwdist = sorted([
                            lpd[(x, y)] for x, y in itertools.combinations(
                                list(set(ntl_dict[i]) | set(ntl_dict[j])), 2)
                        ])
                        # take the conservative (max) p-value comparing node i/j individually to i+j
                        pval = max([
                            inter_cluster_hytest(
                                list(ntpwd_dict[i]),
                                ij_pwdist).hytest(hytest_method),
                            inter_cluster_hytest(
                                list(ntpwd_dict[j]),
                                ij_pwdist).hytest(hytest_method)
                        ])
                    currp_np_to_pval[(i, j)] = pval
                q.put(currp_np_to_pval)

            # multi-proc setup
            manager = mp.Manager()

            # shared memory
            queue = manager.Queue()

            # generate processes
            processes = []

            # split nodepair list into ncpu sets
            nodepair_list = list(
                itertools.combinations(node_to_leaves.keys(), 2))
            shuffle(nodepair_list)  # shuffle to make more equitable

            increment = int(len(nodepair_list) / self.cores)
            for p in range(self.cores):
                if p == self.cores - 1:
                    curr_nodepair_list = nodepair_list[p * increment:]
                else:
                    curr_nodepair_list = nodepair_list[p *
                                                       increment:(p *
                                                                  increment) +
                                                       increment]

                proc = mp.Process(target=get_interclus_pval,
                                  args=(curr_nodepair_list,
                                        node_to_leaves_shared,
                                        node_to_ancestral_nodes_shared,
                                        node_to_pwdist_shared, queue))
                processes.append(proc)
                proc.start()

            # collect results to dictionary
            for p in range(len(processes)):
                self.nodepair_to_pval.update(queue.get())

            # wait for all processes to end
            for proc in processes:
                proc.join()

            if self.no_treeinfo == False:
                print('Writing to treeinfo file...')
                output = open(self.treeinfo_fname, 'a')
                json.dump(self.remap_keys(self.nodepair_to_pval), output)
                output.write('\n')
                output.close()
            """
            # single thread legacy code
            nodepair_to_pval_single = {}
            for i,j in itertools.combinations(node_to_leaves.keys(), 2):
                if (j in node_to_ancestral_nodes and i in node_to_ancestral_nodes[j]) or (i in node_to_ancestral_nodes and j in node_to_ancestral_nodes[i]):
                    pval = inter_cluster_hytest(node_to_pwdist[i], node_to_pwdist[j]).hytest(hytest_method)
                else:
                    ij_pwdist = sorted([leafpair_to_distance[(x,y)] for x,y in itertools.combinations(list(set(node_to_leaves[i])|set(node_to_leaves[j])), 2)])
                    # take the conservative (max) p-value comparing node i/j individually to i+j
                    pval = max([inter_cluster_hytest(node_to_pwdist[i], ij_pwdist).hytest(hytest_method), inter_cluster_hytest(node_to_pwdist[j], ij_pwdist).hytest(hytest_method)])

                nodepair_to_pval_single[(i,j)] = pval

            # check
            print ('Sets of nodepair_to_pval: {}'.format(set(self.nodepair_to_pval.keys()) == set(nodepair_to_pval_single.keys())))
            for (i, j), pval in self.nodepair_to_pval.items():
                if pval != nodepair_to_pval_single[(i,j)]:
                    print (i, j, pval, nodepair_to_pval_single[(i, j)])
            """

        return self.nodepair_to_pval