예제 #1
0
    def _convert_consensus_paths_to_affinity_tree_nodes():
        at_nodes = []
        assigned_sequences = []
        for c_id, c_info in consensus_paths.items():
            assigned_sequences += c_info.assigned_sequences_ids
            all_seq = p.get_sequences_ids()
            compatibilities = p.get_compatibilities(all_seq, c_info.path)
            if len(c_info.assigned_sequences_ids):
                assigned_seq_comp = [c
                                     for seq_id, c in compatibilities.items()
                                     if seq_id in c_info.assigned_sequences_ids]
                mincomp = min(assigned_seq_comp)
            else:
                mincomp = 0
            new_node = tree.AffinityNode(id_=tree.AffinityNodeID(c_id + 1),
                                         parent=tree.AffinityNodeID(0),
                                         sequences=c_info.assigned_sequences_ids,
                                         mincomp=mincomp,
                                         compatibilities=compatibilities,
                                         consensus=c_info.path,
                                         children=[])
            at_nodes.append(new_node)

        node_for_unassigned_sequences = tree.AffinityNode(parent=tree.AffinityNodeID(0),
                                                          sequences=[seq_id
                                                                     for seq_id in p.get_sequences_ids()
                                                                     if seq_id not in assigned_sequences],
                                                          id_=tree.AffinityNodeID(len(at_nodes) + 1),
                                                          mincomp=graph.Compatibility(0),
                                                          children=[])
        at_nodes.append(node_for_unassigned_sequences)
        return at_nodes
예제 #2
0
def _get_children_nodes_looping(node: tree.AffinityNode,
                                poagraph: graph.Poagraph,
                                output_dir: Path,
                                blosum_path: Path,
                                p: parameters.P,
                                current_max_affinity_node_id: int) -> List[tree.AffinityNode]:
    """Generates children of given Affinity Tree node."""

    children_nodes: List[tree.AffinityNode] = []
    not_assigned_sequences_ids: List[msa.SequenceID] = node.sequences
    detailed_logger.info(f"""Getting children nodes for
                             affinity node {node.id_}...""")

    affinity_node_id = 0
    so_far_cutoffs: List[poagraph.Compatibility] = []
    while not_assigned_sequences_ids:
        detailed_logger.info(f"### Getting child {len(so_far_cutoffs)}...")
        child_ready = False
        attempt = 0
        current_candidates = not_assigned_sequences_ids
        while not child_ready:
            consensus_candidate = poa.get_consensuses(poagraph,
                                                      current_candidates,
                                                      output_dir,
                                                      f"parent_{node.id_}_child_{len(so_far_cutoffs)}_attempt_{attempt}",
                                                      blosum_path,
                                                      parameters.Hbmin(0),
                                                      specific_consensuses_id=[0])[0].path
            compatibilities_to_consensus_candidate = poagraph.get_compatibilities(sequences_ids=not_assigned_sequences_ids,
                                                                                  consensus_path=consensus_candidate,
                                                                                  p=p)
            compatibilities_to_consensus_candidate[msa.SequenceID("parent")] = node.mincomp
            qualified_sequences_ids_candidates, cutoff = _get_qualified_sequences_ids_and_cutoff(
                compatibilities_to_max_c=compatibilities_to_consensus_candidate,
                so_far_cutoffs=so_far_cutoffs,
                splitted_node_id=node.id_)

            if qualified_sequences_ids_candidates == current_candidates or attempt == 10:
                if attempt == 10:
                    detailed_logger.info("Attempt treshold 10 exceeded!")
                affinity_node_id += 1

                affinity_node = tree.AffinityNode(
                    id_=tree.AffinityNodeID(current_max_affinity_node_id + affinity_node_id),
                    parent=node.id_,
                    sequences=qualified_sequences_ids_candidates,
                    mincomp=_get_min_comp(node_sequences_ids=qualified_sequences_ids_candidates,
                                          comps_to_consensus=compatibilities_to_consensus_candidate),
                    consensus=graph.SeqPath(consensus_candidate))
                children_nodes.append(affinity_node)
                not_assigned_sequences_ids = list(set(not_assigned_sequences_ids) - set(qualified_sequences_ids_candidates))
                child_ready = True
                so_far_cutoffs.append(affinity_node.mincomp)
            else:
                current_candidates = qualified_sequences_ids_candidates
                attempt += 1

    detailed_logger.info("Children nodes generated.")

    return children_nodes
예제 #3
0
def _get_root_node(poagraph: graph.Poagraph,
                   blosum_path: Path,
                   output_dir: Path,
                   p: parameters.P) -> tree.AffinityNode:
    """Creates root node of the Affinity Tree.

    The node has assigned a consensus path and
    all sequences present in poagraph."""

    detailed_logger.info("Getting the root affinity node...")
    all_poagraph_sequences_ids = poagraph.get_sequences_ids()
    try:
        consensus_paths = poa.get_consensuses(poagraph,
                                              all_poagraph_sequences_ids,
                                              output_dir,
                                              "root",
                                              blosum_path,
                                              hbmin=parameters.Hbmin(0),
                                              specific_consensuses_id=[0])
    except poa.NoConsensusError:
        raise AffinityTreeBuildException("Cannot find root consensus.")
    compatibilities = poagraph.get_compatibilities(all_poagraph_sequences_ids,
                                                   consensus_paths[0].path,
                                                   p=p)
    affinity_node = tree.AffinityNode(id_=tree.AffinityNodeID(0),
                                      sequences=[*poagraph.sequences.keys()],
                                      mincomp=_get_min_comp(all_poagraph_sequences_ids, compatibilities),
                                      compatibilities=compatibilities,
                                      consensus=consensus_paths[0].path)
    detailed_logger.info(f"New affinity node created: {str(affinity_node)}")
    return affinity_node
예제 #4
0
def build_poa_affinity_tree(p: graph.Poagraph,
                            blosum: Optional[parameters.Blosum],
                            output_dir: Path,
                            hbmin: parameters.Hbmin,
                            verbose: bool) -> tree.AffinityTree:
    """Builds Affinity Tree coherent with poa software.

    This method builds a simple version of Affinity Tree
    as it uses a single call to poa software. Poa provides
    division of sequences in Poagraph into consistent groups
    with a consensus path assigned to each group. These groups
    are converted in this method to Affinity Tree nodes and
    connected with a dummy root node so the result is coherent
    with pangtree definition of Affinity Tree.

    Args:
        p: Poagraph containing sequences to be divided into
            groups (Affinity Tree nodes).
        optional blosum: BLOSUM matrix. If not provided, default Blosum80.mat is used.
        output_dir: Path to a directory that can be used by poa software.
        hbmin: Parameter required by poa software. The minimum value of
            sequence compatibility to generated consensus.
        verbose: Switch to control logging intensity.

    Raises:
        AffinityTreeGenerationException: if consensuses cannot be found.
    """
    def _convert_consensus_paths_to_affinity_tree_nodes():
        at_nodes = []
        assigned_sequences = []
        for c_id, c_info in consensus_paths.items():
            assigned_sequences += c_info.assigned_sequences_ids
            all_seq = p.get_sequences_ids()
            compatibilities = p.get_compatibilities(all_seq, c_info.path)
            if len(c_info.assigned_sequences_ids):
                assigned_seq_comp = [c
                                     for seq_id, c in compatibilities.items()
                                     if seq_id in c_info.assigned_sequences_ids]
                mincomp = min(assigned_seq_comp)
            else:
                mincomp = 0
            new_node = tree.AffinityNode(id_=tree.AffinityNodeID(c_id + 1),
                                         parent=tree.AffinityNodeID(0),
                                         sequences=c_info.assigned_sequences_ids,
                                         mincomp=mincomp,
                                         compatibilities=compatibilities,
                                         consensus=c_info.path,
                                         children=[])
            at_nodes.append(new_node)

        node_for_unassigned_sequences = tree.AffinityNode(parent=tree.AffinityNodeID(0),
                                                          sequences=[seq_id
                                                                     for seq_id in p.get_sequences_ids()
                                                                     if seq_id not in assigned_sequences],
                                                          id_=tree.AffinityNodeID(len(at_nodes) + 1),
                                                          mincomp=graph.Compatibility(0),
                                                          children=[])
        at_nodes.append(node_for_unassigned_sequences)
        return at_nodes

    global_logger.info("POA defined affinity tree generation started.")
    if blosum is None:
        blosum = get_default_blosum()
    _raise_error_if_invalid_poagraph(p)
    try:
        consensus_paths = poa.get_consensuses(p,
                                              p.get_sequences_ids(),
                                              output_dir,
                                              "poa_tree",
                                              blosum.filepath,
                                              hbmin)
    except poa.NoConsensusError:
        raise AffinityTreeBuildException("No consensus in the Affinity Tree.")

    consensus_nodes = _convert_consensus_paths_to_affinity_tree_nodes()
    root_node = tree.AffinityNode(id_=tree.AffinityNodeID(0),
                                  children=[c_node.id_
                                            for c_node in consensus_nodes])
    affinity_tree = tree.AffinityTree([root_node] + consensus_nodes)
    global_logger.info("POA defined affinity tree generation finished.")
    return affinity_tree