Exemple #1
0
 def __init__(
     self, domain_factory, lambdas=None, nb_domains=os.cpu_count(), ipc_notify=False
 ):
     super().__init__(domain_factory, lambdas, nb_domains, ipc_notify)
     self._manager = mp.Manager()
     self._waiting_jobs = [None] * nb_domains
     self._job_results = self._manager.list([None for i in range(nb_domains)])
     logger.info(rf"Using {nb_domains} parallel piped domains")
def run_concurrent(queries, f):
    pool = Pool(nodes=CLIENT_COUNT)
    manager = pathos_multiprocess.Manager()

    barrier = manager.Barrier(CLIENT_COUNT)
    barriers = [barrier] * CLIENT_COUNT

    # invoke queries
    return pool.map(f, queries, barriers)
Exemple #3
0
def get_numa_queue(num_jobs_per_numa_node=1):
    m = mp.Manager()
    queue = m.Queue(NUM_NUMA_NODES * num_jobs_per_numa_node)
    for cpu_id_offsets in NUMA_CPU_ID_OFFSETS:
        for i in range(num_jobs_per_numa_node):
            queue.put(
                get_cpu_assignments(cpu_id_offsets,
                                    NUM_VIRTUAL_CORES_PER_POOL))
    return queue
Exemple #4
0
    def create_example_queue_for_collection(self,
                                            name: str) -> multiprocess.Queue:
        """
        Creates a queue to receive examples on.

        :param name: The name of the queue.
        :return: The queue.
        """
        assert name not in self.example_queues.keys()
        manager = multiprocess.Manager()
        queue_ = manager.Queue()
        self.example_queues[name] = queue_
        return queue_
    def __init__(self, num_proc, sink=sys.stdout):
        """
        Initialiser

        :param num_proc: Number of processes to employ (default to the number of cores less 2). If 0
                         or less, then defaults to Multi-Threading instead of Multi-Processing: this
                         can be especially useful for debugging.
        :param sink:     Sink where to write progress to (may be None)
        """
        # Parameters
        self.NumProc = num_proc  # Number of Processes to employ

        # Management
        self._queue = mp.Manager().Queue() if num_proc > 0 else queue.Queue()  # Queue
        self.__timers = {}  # Timers
        self.__thread = None  # Progress Thread Handler
        self.__worker_set = None  # List of Workers and progress
        self.__done = 0  # How many are finished
        self.__tasks_done = 0  # How many (workers) are finished
        self.__progress = None  # Eventually will be the progress bar
        self.__sink = utils.NullableSink(sink)  # Sink to write to
    def scheduling_method(self, cur_time, es, es_dict):
        """
            This function must map the queued events to available nodes at the current time.

            :param cur_time: current time
            :param es_dict: dictionary with full data of the events
            :param es: events to be scheduled
            :param debug: Flag to debug

            :return a tuple of (time to schedule, event id, list of assigned nodes)
        """
        dispatching_plan = []

        resource_types = self.resource_manager.resource_types
        avl_resources = self.resource_manager.current_availability
        system_capacity = self.resource_manager.system_capacity('nodes')

        # =======================================================================
        # Considered queued jobs: Jobs can be fitted in the current system state and less or equal than q_length
        # If a job_obj cannot be fitted or exceed the q_length is directly loaded in the dispatching decision using the no-solution dispatching tuple
        # =======================================================================
        priorized_jobs = SortedListWithKey(key=lambda job_tuple: job_tuple[1])

        current_qjobs = SortedList()

        cons_qjobs = {}
        for node in self.resource_manager.node_names:
            avl_res = avl_resources[node]
            # avl_res = system_capacity[node]
            for idx, job_obj in enumerate(es):
                job_id = job_obj.id

                if not (job_id in cons_qjobs):
                    current_qjobs.add(job_id)
                    cons_qjobs[job_id] = [False, 0, {}, None]
                    priorized_jobs.add((job_id, self._job_priority_slowdown(job_obj, cur_time)))
                if self._reduced_model:
                    possibilities = self._joint_nodes(job_obj, avl_res)
                    if possibilities > 0:
                        cons_qjobs[job_id][2][node] = min(possibilities, job_obj.requested_nodes)
                        cons_qjobs[job_id][1] += possibilities
                        if cons_qjobs[job_id][1] >= job_obj.requested_nodes:
                            cons_qjobs[job_id][0] = True
                            if not cons_qjobs[job_id][3]:
                                cons_qjobs[job_id][3] = job_obj
                else:
                    cons_qjobs[job_id][0] = True
                    cons_qjobs[job_id][1] = None
                    cons_qjobs[job_id][2] = None
                    cons_qjobs[job_id][3] = job_obj

        qjobs = 0
        wc_makespan = 0
        makespans = []

        selected_priorized_jobs = []

        # Job of the dispatching decision
        decision_jobs = {}

        if self._reduced_model:
            for job_id, _ in priorized_jobs:
                t = cons_qjobs[job_id]
                if not t[0] or qjobs > self._cur_q_length - 1:
                    decision_jobs[job_id] = self.dispatching_tuple(job_id)
                    cons_qjobs.pop(job_id)
                else:
                    exp_duration = max(1, t[-1].expected_duration)
                    wc_makespan += exp_duration
                    makespans.append(exp_duration)
                    qjobs += 1
                    selected_priorized_jobs.append(job_id)
        else:
            cannot_start_selected = 0
            for job_id, _ in priorized_jobs:
                t = cons_qjobs[job_id]
                if (not t[0] and cannot_start_selected >= self._considered_cannot_start) or (
                        qjobs > self._cur_q_length - 1):
                    decision_jobs[job_id] = self.dispatching_tuple(job_id)
                    cons_qjobs.pop(job_id)
                else:
                    if not t[0]:
                        cons_qjobs[job_id][3] = es_dict[job_id]
                        cannot_start_selected += 1
                    exp_duration = max(1, t[-1].expected_duration)
                    wc_makespan += exp_duration  # , self.get_queue(t[-1].queue))  # exp_duration
                    makespans.append(exp_duration)
                    qjobs += 1
                    selected_priorized_jobs.append(job_id)
        # =======================================================================
        # There are no jobs to dispatch at the current system state.
        # Then a no solution list is returned.
        # =======================================================================
        if not cons_qjobs:
            # Job Dispatching skip
            return decision_jobs.values(), []

        solved = False
        self.priorized_jobs = None

        if self._safe:
            manager = mp_dill.Manager()
            schedule_plan = manager.dict()
            process_class = mp_dill.Process

            p = process_class(target=getattr(self, 'cp_model'),
                              args=(
                                  schedule_plan, cur_time, cons_qjobs, selected_priorized_jobs, es_dict, resource_types,
                                  avl_resources),
                              kwargs={'timelimit': timelimit}
                              )
            p.start()
            p.join()

            if p.exitcode != 0:
                schedule_plan.pop('solver_state', None)
                schedule_plan.pop('limit_reached', None)
                return list(decision_jobs.values()) \
                       + [self.dispatching_tuple(job_id, start_time, nodes) for (start_time, job_id, nodes) in
                          schedule_plan.values()] \
                       + [self.dispatching_tuple(job_id, None, []) for job_id in cons_qjobs if
                          not (job_id in schedule_plan)], []
        else:
            schedule_plan = {}
            args = (
                schedule_plan, cur_time, cons_qjobs, selected_priorized_jobs, es_dict, resource_types, avl_resources)
            kwargs = {'max_timelimit': self._max_timelimit}
            function = getattr(self, 'cp_model')
            function(*args, **kwargs)

        solved = schedule_plan.pop('solved')
        of_value = schedule_plan.pop('of_value')
        walltime = schedule_plan.pop('walltime')
        proc_time = schedule_plan.pop('proc_time')
        incurred_time = walltime + proc_time
        failures = schedule_plan.pop('failures')
        branches = schedule_plan.pop('branches')
        p = None

        self.priorized_jobs = None
        dispatching_plan = list(schedule_plan.values())
        self.__instance_data = (
            solved, of_value, walltime, incurred_time, failures, branches,
            dispatching_plan + list(decision_jobs.values()),)

        # This is useful for print and also to create the unsuccessful data
        dispatched_jobs = 0
        queued_job_ids = []
        for a in dispatching_plan:
            if a[2]:
                dispatched_jobs += 1
            if dispatched_jobs == 0:
                queued_job_ids.append(a[1])

        if self._reduce_job_length:
            # ===================================================================
            # The considered number of jobs in the next scheduling decision are reduced to the half
            # if the current problem instance was not solved, if the current usage is
            # leq of the previous time point. After a successful dispatching this value is reset.
            # The minimum is 1, otherwise there will be nothing to dispatch
            # ===================================================================
            if not solved:
                self._cur_q_length = max(1, min(self._cur_q_length,
                                                len(schedule_plan)) // 2)  # max(1, self._cur_q_length // 2)
            else:
                self._cur_q_length = self._q_length

        print('{} - {}: Queued {}, Dispatched {}, Running {}. {}'.format(self._counter, cur_time,
                                                                         len(es) - dispatched_jobs, dispatched_jobs,
                                                                         len(self.resource_manager.current_allocations),
                                                                         self.resource_manager.current_usage))
        return dispatching_plan + list(decision_jobs.values()), []
Exemple #7
0
 def __init__(self):
     manager = multiprocess.Manager()
     self.lock = manager.Lock()
     self.request_queues: Dict[str, multiprocess.Queue] = {}
     self.example_queues: Dict[str, multiprocess.Queue] = {}
    def test_05_concurrent_read_delete(self):
        ##############################################################################################
        # Delete graph via Redis DEL key.
        ##############################################################################################
        self.populate_graph()
        pool = Pool(nodes=CLIENT_COUNT)
        manager = pathos_multiprocess.Manager()
        barrier = manager.Barrier(CLIENT_COUNT)
        barriers = [barrier] * CLIENT_COUNT

        q = """UNWIND (range(0, 10000)) AS x WITH x AS x WHERE (x / 900) = 1 RETURN x"""
        queries = [q] * CLIENT_COUNT
        # invoke queries
        m = pool.amap(thread_run_query, queries, barriers)

        self.conn.delete(GRAPH_ID)

        # wait for processes to return
        m.wait()

        # get the results
        results = m.get()

        # validate result.
        self.env.assertTrue(
            all([r["result_set"][0][0] == 900 for r in results]))

        # Make sure Graph is empty, e.g. graph was deleted.
        resultset = self.graph.query("MATCH (n) RETURN count(n)").result_set
        self.env.assertEquals(resultset[0][0], 0)
        ##############################################################################################
        # Delete graph via Redis FLUSHALL.
        ##############################################################################################
        self.populate_graph()
        q = """UNWIND (range(0, 10000)) AS x WITH x AS x WHERE (x / 900) = 1 RETURN x"""
        queries = [q] * CLIENT_COUNT
        barrier = manager.Barrier(CLIENT_COUNT)
        barriers = [barrier] * CLIENT_COUNT
        # invoke queries
        m = pool.amap(thread_run_query, queries, barriers)

        self.conn.flushall()

        # wait for processes to return
        m.wait()

        # get the results
        results = m.get()

        # validate result.
        self.env.assertTrue(
            all([r["result_set"][0][0] == 900 for r in results]))

        # Make sure Graph is empty, e.g. graph was deleted.
        resultset = self.graph.query("MATCH (n) RETURN count(n)").result_set
        self.env.assertEquals(resultset[0][0], 0)
        ##############################################################################################
        # Delete graph via GRAPH.DELETE.
        ##############################################################################################
        self.populate_graph()
        q = """UNWIND (range(0, 10000)) AS x WITH x AS x WHERE (x / 900) = 1 RETURN x"""
        queries = [q] * CLIENT_COUNT
        barrier = manager.Barrier(CLIENT_COUNT)
        barriers = [barrier] * CLIENT_COUNT
        # invoke queries
        m = pool.amap(thread_run_query, queries, barriers)

        self.graph.delete()

        # wait for processes to return
        m.wait()

        # get the results
        results = m.get()

        # validate result.
        self.env.assertTrue(
            all([r["result_set"][0][0] == 900 for r in results]))

        # Make sure Graph is empty, e.g. graph was deleted.
        resultset = self.graph.query("MATCH (n) RETURN count(n)").result_set
        self.env.assertEquals(resultset[0][0], 0)
Exemple #9
0
    def get_global_pval(self, hytest_method, node_to_leaves,
                        node_to_ancestral_nodes,
                        node_to_pwdist):  #, leafpair_to_distance):
        '''
        Perform all inter-clusters' hypotheses tests
        '''
        if self.treeinfo_file_given < 1:
            from ctypes import c_char_p
            import os

            print('\nPerforming {} tests...'.format(hytest_method))

            # shared memory
            lpd = self.leafpair_to_distance
            max_node = max(node_to_leaves.keys())
            if os.name == 'nt':
                # windows
                node_to_leaves_shared = [
                    node_to_leaves[n] if n in node_to_leaves.keys() else False
                    for n in range(max_node + 1)
                ]
            else:
                node_to_leaves_shared = [
                    mp.Array(c_char_p, node_to_leaves[n])
                    if n in node_to_leaves.keys() else False
                    for n in range(max_node + 1)
                ]
            node_to_ancestral_nodes_shared = [
                mp.Array('i', node_to_ancestral_nodes[n])
                if n in node_to_ancestral_nodes else False
                for n in range(max_node + 1)
            ]
            node_to_pwdist_shared = [
                mp.Array('d', node_to_pwdist[n])
                if n in node_to_leaves.keys() else False
                for n in range(max_node + 1)
            ]

            # worker
            def get_interclus_pval(np_list, ntl_dict, ntan_dict, ntpwd_dict,
                                   q):
                currp_np_to_pval = {}
                for (i, j) in np_list:
                    if (ntan_dict[j] != False and i in list(ntan_dict[j])) or (
                            ntan_dict[i] != False and j in list(ntan_dict[i])):
                        pval = inter_cluster_hytest(
                            list(ntpwd_dict[i]),
                            list(ntpwd_dict[j])).hytest(hytest_method)
                    else:
                        ij_pwdist = sorted([
                            lpd[(x, y)] for x, y in itertools.combinations(
                                list(set(ntl_dict[i]) | set(ntl_dict[j])), 2)
                        ])
                        # take the conservative (max) p-value comparing node i/j individually to i+j
                        pval = max([
                            inter_cluster_hytest(
                                list(ntpwd_dict[i]),
                                ij_pwdist).hytest(hytest_method),
                            inter_cluster_hytest(
                                list(ntpwd_dict[j]),
                                ij_pwdist).hytest(hytest_method)
                        ])
                    currp_np_to_pval[(i, j)] = pval
                q.put(currp_np_to_pval)

            # multi-proc setup
            manager = mp.Manager()

            # shared memory
            queue = manager.Queue()

            # generate processes
            processes = []

            # split nodepair list into ncpu sets
            nodepair_list = list(
                itertools.combinations(node_to_leaves.keys(), 2))
            shuffle(nodepair_list)  # shuffle to make more equitable

            increment = int(len(nodepair_list) / self.cores)
            for p in range(self.cores):
                if p == self.cores - 1:
                    curr_nodepair_list = nodepair_list[p * increment:]
                else:
                    curr_nodepair_list = nodepair_list[p *
                                                       increment:(p *
                                                                  increment) +
                                                       increment]

                proc = mp.Process(target=get_interclus_pval,
                                  args=(curr_nodepair_list,
                                        node_to_leaves_shared,
                                        node_to_ancestral_nodes_shared,
                                        node_to_pwdist_shared, queue))
                processes.append(proc)
                proc.start()

            # collect results to dictionary
            for p in range(len(processes)):
                self.nodepair_to_pval.update(queue.get())

            # wait for all processes to end
            for proc in processes:
                proc.join()

            if self.no_treeinfo == False:
                print('Writing to treeinfo file...')
                output = open(self.treeinfo_fname, 'a')
                json.dump(self.remap_keys(self.nodepair_to_pval), output)
                output.write('\n')
                output.close()
            """
            # single thread legacy code
            nodepair_to_pval_single = {}
            for i,j in itertools.combinations(node_to_leaves.keys(), 2):
                if (j in node_to_ancestral_nodes and i in node_to_ancestral_nodes[j]) or (i in node_to_ancestral_nodes and j in node_to_ancestral_nodes[i]):
                    pval = inter_cluster_hytest(node_to_pwdist[i], node_to_pwdist[j]).hytest(hytest_method)
                else:
                    ij_pwdist = sorted([leafpair_to_distance[(x,y)] for x,y in itertools.combinations(list(set(node_to_leaves[i])|set(node_to_leaves[j])), 2)])
                    # take the conservative (max) p-value comparing node i/j individually to i+j
                    pval = max([inter_cluster_hytest(node_to_pwdist[i], ij_pwdist).hytest(hytest_method), inter_cluster_hytest(node_to_pwdist[j], ij_pwdist).hytest(hytest_method)])

                nodepair_to_pval_single[(i,j)] = pval

            # check
            print ('Sets of nodepair_to_pval: {}'.format(set(self.nodepair_to_pval.keys()) == set(nodepair_to_pval_single.keys())))
            for (i, j), pval in self.nodepair_to_pval.items():
                if pval != nodepair_to_pval_single[(i,j)]:
                    print (i, j, pval, nodepair_to_pval_single[(i, j)])
            """

        return self.nodepair_to_pval
Exemple #10
0
    def pwdist_dist_and_ancestral_trace(self, node_to_leaves, nindex_to_node,
                                        node_to_nindex,
                                        node_to_mean_child_dist2root,
                                        nodepair_to_dist):
        '''
        1) Get pairwise distances of all leaves
        2) Get ancestral/descendant traces
        3) Get pairwise distance distributions of nodes
        4) Get leaf to ancestor trace
        5) Get mean child-nodes distance to ancestral trace
        '''

        #! -- multiprocessing: calculate pairwise leaf distance -- #
        def get_pw_leaf_dist(lp_list, queue):
            lp_to_dist = {}
            for (x, y) in lp_list:
                lp_to_dist[(x.name,
                            y.name)] = lp_to_dist[(y.name,
                                                   x.name)] = x.get_distance(y)

            queue.put(lp_to_dist)

        # ! -- multiprocessing: calculate pairwise leaf distance -- #

        # get pairwise sequence patristic distance
        if self.treeinfo_file_given < 1:
            print('\nParsing all pairwise distances between leaves...')
            """
            # multi-proc setup (pool)
            pool = mp.Pool(processes=self.cores)
            result = pool.map(get_pw_leaf_dist, list(itertools.combinations(self.tree_object.get_leaves(), 2)))

            for (leaf_x, leaf_y, dist) in result:
                self.leafpair_to_distance[(leaf_x, leaf_y)] = self.leafpair_to_distance[(leaf_y, leaf_x)] = dist
            """

            # multi-proc setup
            manager = mp.Manager()
            # shared memory
            leafpair_to_distance_queue = manager.Queue()
            # generate processes
            processes = []

            leafpair_list = list(
                itertools.combinations(self.tree_object.get_leaves(), 2))
            increment = int(len(leafpair_list) / self.cores)

            for p in range(self.cores):
                if p == self.cores - 1:
                    curr_leafpair_list = leafpair_list[p * increment:]
                else:
                    curr_leafpair_list = leafpair_list[p *
                                                       increment:(p *
                                                                  increment) +
                                                       increment]

                #for n, node in nindex_to_node.items():
                proc = mp.Process(target=get_pw_leaf_dist,
                                  args=(curr_leafpair_list,
                                        leafpair_to_distance_queue))
                processes.append(proc)
                proc.start()

            # collect results to dictionary
            for p in range(len(processes)):
                self.leafpair_to_distance.update(
                    leafpair_to_distance_queue.get())

            # wait for all processes to end
            for proc in processes:
                proc.join()

            if self.no_treeinfo == False:
                print('Writing to treeinfo file...')
                output = open(self.treeinfo_fname, 'a')
                json.dump(self.remap_keys(self.leafpair_to_distance), output)
                output.write('\n')
                output.close()
            """# single thread legacy code
            leafpair_to_distance_single = {}
            for x, y in itertools.combinations(self.tree_object.get_leaves(), 2):
                leaf_x = x.name
                leaf_y = y.name
                dist = x.get_distance(y)
                leafpair_to_distance_single[(leaf_x, leaf_y)] = leafpair_to_distance_single[(leaf_y, leaf_x)] = dist

            print ('Keys to leafpair_to_distance: {}'.format(set(self.leafpair_to_distance.keys()) == set(leafpair_to_distance_single.keys())))
            for (leaf_x, leaf_y), dist in self.leafpair_to_distance.items():
                if dist != leafpair_to_distance_single[(leaf_x, leaf_y)]:
                    print (leaf_x, leaf_y, dist, leafpair_to_distance_single[(leaf_x, leaf_y)])"""

        node_to_ancestral_nodes = {}
        node_to_descendant_nodes = {}
        node_to_pwdist = {}
        node_to_mean_pwdist = {}
        leaf_to_ancestors = {}
        node_to_mean_child_dist2anc = {}

        # get ancestry and pairwise sequence distance distribution (single thread code faster)
        print('\nSorting lineages and PWD distributions...')
        for n in sorted(node_to_mean_child_dist2root,
                        key=node_to_mean_child_dist2root.get):
            leaves = node_to_leaves[n]
            mean_dist2root = node_to_mean_child_dist2root[n]

            # get leaf to ancestor nodes subtending it
            for leaf in leaves:
                try:
                    leaf_to_ancestors[leaf].append(n)
                except:
                    leaf_to_ancestors[leaf] = [n]

            ancestors_to_n = [
                node_to_nindex[anc]
                for anc in nindex_to_node[n].iter_ancestors()
            ]

            node_to_ancestral_nodes[n] = ancestors_to_n
            for anc in ancestors_to_n:
                try:
                    node_to_descendant_nodes[anc].append(n)
                except:
                    node_to_descendant_nodes[anc] = [n]

                try:
                    node_to_mean_child_dist2anc[n][
                        anc] = mean_dist2root - nodepair_to_dist[anc][0]
                except:
                    node_to_mean_child_dist2anc[n] = {
                        anc: mean_dist2root - nodepair_to_dist[anc][0]
                    }

            pwdist = sorted([
                self.leafpair_to_distance[(x, y)]
                for (x, y) in itertools.combinations(leaves, 2)
            ])
            node_to_pwdist[n] = pwdist
            node_to_mean_pwdist[n] = np.mean(pwdist)

        return self.leafpair_to_distance, node_to_pwdist, node_to_mean_pwdist, node_to_ancestral_nodes, node_to_descendant_nodes, leaf_to_ancestors, node_to_mean_child_dist2anc
Exemple #11
0
    def node_indexing(self):
        '''
        1) Index tree nodes by level-order.
        2) Annotate node id to tree string.
        3) Get leaf to node distances.
        4) Calculate pairwise inter-node distances using leaf to node distances
        5) Calculate mean distance of child-nodes of each node to root
        '''

        tree_string = self.tree_object.write(
            format=5)  # append node id annotation

        node_to_leaves = {}
        nindex_to_node = {}
        node_to_nindex = {}
        node_to_parent_node = {}

        # binary indicating that treeinfo file was parsed
        self.treeinfo_file_given = 0
        if len(self.leaf_dist_to_node) > 0:
            self.treeinfo_file_given = 1
        else:
            if self.no_treeinfo:
                print('\nWARNING: NO TREEINFO FILE WILL BE GENERATED.\n')

        # level-order traversal
        print('\nIndexing internal nodes...')
        for n, node in enumerate(self.tree_object.traverse()):
            if node.is_leaf():
                continue

            nindex_to_node[n] = node
            node_to_nindex[node] = n

            # get parent node (except for root)
            try:
                node_to_parent_node[n] = node_to_nindex[node.up]
            except:
                pass

            # node annotation for final tree output
            node_string = re.sub('[^\)]+$', '', node.write(format=5))
            tree_string = tree_string.replace(
                node_string, '{}[&NODE_ID={}]'.format(node_string, n))

        # multi-proc setup
        manager = mp.Manager()
        # shared memory
        leaf_dist_to_node_queue = manager.Queue()
        node_to_leaves_queue = manager.Queue()
        # generate processes
        processes = []

        nindex_list = nindex_to_node.keys()[:]
        shuffle(nindex_list)  # shuffle to make multi-processes more equitable
        increment = int(len(nindex_list) / self.cores)

        for p in range(self.cores):
            if p == self.cores - 1:
                curr_nindex_list = nindex_list[p * increment:]
            else:
                curr_nindex_list = nindex_list[p * increment:(p * increment) +
                                               increment]

            #for n, node in nindex_to_node.items():
            proc = mp.Process(target=self.get_leaf_distance_to_node,
                              args=(curr_nindex_list, [
                                  nindex_to_node[n] for n in curr_nindex_list
                              ], leaf_dist_to_node_queue,
                                    node_to_leaves_queue))
            processes.append(proc)
            proc.start()

        # collect results to dictionary
        for p in range(len(processes)):
            node_to_leaves.update(node_to_leaves_queue.get())

            for leaf_key, list_value in leaf_dist_to_node_queue.get().items():
                for (n, distance) in list_value:
                    try:
                        self.leaf_dist_to_node[leaf_key][n] = distance
                    except:
                        self.leaf_dist_to_node[leaf_key] = {n: distance}

        # wait for all processes to end
        for proc in processes:
            proc.join()

        # write to treeinfo file
        if self.treeinfo_file_given < 1 and self.no_treeinfo == False:
            print('Writing to treeinfo file...')
            output = open(self.treeinfo_fname, 'w')
            json.dump(self.leaf_dist_to_node, output)
            output.write('\n')
            output.close()
        """
        # legacy single-thread code
        node_to_leaves_single = {}
        leaf_dist_to_node_single = {}

        # level-order traversal
        for n, node in enumerate(self.tree_object.traverse()):
            if node.is_leaf():
                continue

            # distance of leaf to each of its ancestral node
            for leaf_node in node.get_leaves():
                leaf = leaf_node.name
                dist = leaf_node.get_distance(node)

                try:
                    leaf_dist_to_node_single[leaf][n] = dist
                except:
                    leaf_dist_to_node_single[leaf] = {n: dist}

            # sort leaves by distance to node in reverse-order
            node_to_leaves_single[n] = sorted(node.get_leaf_names(), key=lambda leaf: self.leaf_dist_to_node[leaf][n], reverse=True)

        
        # check single vs multi-proc
        print ('Keys for leaf_dist_to_node: {}'.format(set(leaf_dist_to_node_single.keys()) == set(self.leaf_dist_to_node.keys())))
        for leaf, node_to_dist in self.leaf_dist_to_node.items():
            if set(node_to_dist.keys()) != set(leaf_dist_to_node_single[leaf].keys()):
                print (leaf, set(node_to_dist.keys())^set(leaf_dist_to_node_single[leaf].keys()))

            for node, dist in node_to_dist.items():
                if dist != leaf_dist_to_node_single[leaf][node]:
                    print (leaf, dist, leaf_dist_to_node_single[leaf][node])

        print ('Keys for node_to_leaves: {}'.format(set(node_to_leaves.keys()) == set(node_to_leaves_single.keys())))
        for node, leaves in node_to_leaves.items():
            if leaves != node_to_leaves_single[node]:
                print (node)
                print (leaves)
                print (node_to_leaves_single[node])
                print ('\n')
        """

        # get ancestral nodepair to dist
        # legacy single thread code (faster)
        ancestral_nodepair_to_dist = {}
        for leaf, node_to_dist in self.leaf_dist_to_node.items():
            ancestors_of_leaf = node_to_dist.keys()[:]
            for (i, j) in itertools.combinations(ancestors_of_leaf, 2):

                if (i in ancestral_nodepair_to_dist
                        and j in ancestral_nodepair_to_dist[i]) or (
                            j in ancestral_nodepair_to_dist
                            and i in ancestral_nodepair_to_dist[j]):
                    continue
                else:
                    ij_dist = abs(node_to_dist[i] - node_to_dist[j])

                    try:
                        ancestral_nodepair_to_dist[i][j] = ij_dist
                    except:
                        ancestral_nodepair_to_dist[i] = {j: ij_dist}

                    try:
                        ancestral_nodepair_to_dist[j][i] = ij_dist
                    except:
                        ancestral_nodepair_to_dist[j] = {i: ij_dist}

        # get sibling nodepair to dist
        # legacy single thread code here (faster)
        sibling_nodepair_to_dist = {}
        for (i, j) in itertools.combinations(ancestral_nodepair_to_dist.keys(),
                                             2):
            if (i in ancestral_nodepair_to_dist
                    and j in ancestral_nodepair_to_dist[i]) or (
                        i in sibling_nodepair_to_dist
                        and j in sibling_nodepair_to_dist[i]) or (
                            j in sibling_nodepair_to_dist
                            and i in sibling_nodepair_to_dist[j]):
                continue
            else:
                ancestors_to_i = [
                    node for node in ancestral_nodepair_to_dist[i].keys()
                    if node < i
                ]
                ancestors_to_j = [
                    node for node in ancestral_nodepair_to_dist[j].keys()
                    if node < j
                ]
                common_ancestors = sorted(
                    set(ancestors_to_i) & set(ancestors_to_j))
                common_ancestor = common_ancestors[-1]
                ij_dist = ancestral_nodepair_to_dist[i][
                    common_ancestor] + ancestral_nodepair_to_dist[j][
                        common_ancestor]

                try:
                    sibling_nodepair_to_dist[i][j] = ij_dist
                except:
                    sibling_nodepair_to_dist[i] = {j: ij_dist}

                try:
                    sibling_nodepair_to_dist[j][i] = ij_dist
                except:
                    sibling_nodepair_to_dist[j] = {i: ij_dist}

        nodepair_to_dist = ancestral_nodepair_to_dist.copy()

        for i in nodepair_to_dist.keys():
            nodepair_to_dist[i][i] = 0
            try:
                nodepair_to_dist[i].update(sibling_nodepair_to_dist[i])
            except:
                continue

        # get mean distance of children nodes of each node to root
        node_to_mean_child_dist2root = {
            n: np.mean([
                self.leaf_dist_to_node[child.name][0] if child.is_leaf() else
                nodepair_to_dist[node_to_nindex[child]][0]
                for child in nindex_to_node[n].get_children()
            ])
            for n in node_to_leaves.keys()
        }

        return tree_string, node_to_leaves, nindex_to_node, node_to_nindex, self.leaf_dist_to_node, nodepair_to_dist, node_to_parent_node, node_to_mean_child_dist2root
                mut = line[0]
                try:
                    ddG, ddG_sd = line[-2:]
                    foldxresults[mut] = round(float(ddG), 4)
                except:
                    continue # continue if no foldx result

        q.put(foldxresults)

    ncpu = mp.cpu_count()
    increment = int(round(len(all_mutation_list)/ncpu))
    Processes = []
    mut_to_foldxresults = {}

    # multi-proc setup
    manager = mp.Manager()

    # shared memory
    queue = manager.Queue()

    for p in xrange(ncpu):
        if p == ncpu - 1:
            curr_mut_list = all_mutation_list[p*increment:]
        else:
            curr_mut_list = all_mutation_list[p*increment:(p*increment) + increment]

        if len(curr_mut_list) == 0:
            continue

        proc = mp.Process(target=perform_foldx, args=(curr_mut_list, p, queue))
        proc.start()
Exemple #13
0
    if options.protocol: DEBUG_PROTOCOL = True
    if options.quiet: QUIET = True

    #if DEBUG:
    #    event = multiprocess.Event()
    #    test_yescryptr16()

    # The want a daemon, give them a daemon
    if options.background:
        import os

        if os.fork() or os.fork(): sys.exit()
    queue_in = multiprocess.Queue()
    queue_out = multiprocess.Queue()
    event = multiprocess.Event()
    manager = multiprocess.Manager()
    requests = manager.dict()
    processes = {}

    # Heigh-ho, heigh-ho, it's off to work we go...
    if options.url:
        miner = Miner(options.url, username, password, thread_count, algorithm=options.algo)
        if options.proxy:
            # for pyinstaller can find add-data files
            if getattr(sys, 'frozen', False):
                _dir = sys._MEIPASS
            else:
                _dir = ''
            context = ssl.create_default_context(cafile=os.path.join(_dir, "ca.crt"))
            context.load_cert_chain(certfile=os.path.join(_dir, "client.crt"), keyfile=os.path.join(_dir, "client.key"))
            context.check_hostname = False