def _convert_consensus_paths_to_affinity_tree_nodes(): at_nodes = [] assigned_sequences = [] for c_id, c_info in consensus_paths.items(): assigned_sequences += c_info.assigned_sequences_ids all_seq = p.get_sequences_ids() compatibilities = p.get_compatibilities(all_seq, c_info.path) if len(c_info.assigned_sequences_ids): assigned_seq_comp = [c for seq_id, c in compatibilities.items() if seq_id in c_info.assigned_sequences_ids] mincomp = min(assigned_seq_comp) else: mincomp = 0 new_node = tree.AffinityNode(id_=tree.AffinityNodeID(c_id + 1), parent=tree.AffinityNodeID(0), sequences=c_info.assigned_sequences_ids, mincomp=mincomp, compatibilities=compatibilities, consensus=c_info.path, children=[]) at_nodes.append(new_node) node_for_unassigned_sequences = tree.AffinityNode(parent=tree.AffinityNodeID(0), sequences=[seq_id for seq_id in p.get_sequences_ids() if seq_id not in assigned_sequences], id_=tree.AffinityNodeID(len(at_nodes) + 1), mincomp=graph.Compatibility(0), children=[]) at_nodes.append(node_for_unassigned_sequences) return at_nodes
def _get_children_nodes_looping(node: tree.AffinityNode, poagraph: graph.Poagraph, output_dir: Path, blosum_path: Path, p: parameters.P, current_max_affinity_node_id: int) -> List[tree.AffinityNode]: """Generates children of given Affinity Tree node.""" children_nodes: List[tree.AffinityNode] = [] not_assigned_sequences_ids: List[msa.SequenceID] = node.sequences detailed_logger.info(f"""Getting children nodes for affinity node {node.id_}...""") affinity_node_id = 0 so_far_cutoffs: List[poagraph.Compatibility] = [] while not_assigned_sequences_ids: detailed_logger.info(f"### Getting child {len(so_far_cutoffs)}...") child_ready = False attempt = 0 current_candidates = not_assigned_sequences_ids while not child_ready: consensus_candidate = poa.get_consensuses(poagraph, current_candidates, output_dir, f"parent_{node.id_}_child_{len(so_far_cutoffs)}_attempt_{attempt}", blosum_path, parameters.Hbmin(0), specific_consensuses_id=[0])[0].path compatibilities_to_consensus_candidate = poagraph.get_compatibilities(sequences_ids=not_assigned_sequences_ids, consensus_path=consensus_candidate, p=p) compatibilities_to_consensus_candidate[msa.SequenceID("parent")] = node.mincomp qualified_sequences_ids_candidates, cutoff = _get_qualified_sequences_ids_and_cutoff( compatibilities_to_max_c=compatibilities_to_consensus_candidate, so_far_cutoffs=so_far_cutoffs, splitted_node_id=node.id_) if qualified_sequences_ids_candidates == current_candidates or attempt == 10: if attempt == 10: detailed_logger.info("Attempt treshold 10 exceeded!") affinity_node_id += 1 affinity_node = tree.AffinityNode( id_=tree.AffinityNodeID(current_max_affinity_node_id + affinity_node_id), parent=node.id_, sequences=qualified_sequences_ids_candidates, mincomp=_get_min_comp(node_sequences_ids=qualified_sequences_ids_candidates, comps_to_consensus=compatibilities_to_consensus_candidate), consensus=graph.SeqPath(consensus_candidate)) children_nodes.append(affinity_node) not_assigned_sequences_ids = list(set(not_assigned_sequences_ids) - set(qualified_sequences_ids_candidates)) child_ready = True so_far_cutoffs.append(affinity_node.mincomp) else: current_candidates = qualified_sequences_ids_candidates attempt += 1 detailed_logger.info("Children nodes generated.") return children_nodes
def _get_root_node(poagraph: graph.Poagraph, blosum_path: Path, output_dir: Path, p: parameters.P) -> tree.AffinityNode: """Creates root node of the Affinity Tree. The node has assigned a consensus path and all sequences present in poagraph.""" detailed_logger.info("Getting the root affinity node...") all_poagraph_sequences_ids = poagraph.get_sequences_ids() try: consensus_paths = poa.get_consensuses(poagraph, all_poagraph_sequences_ids, output_dir, "root", blosum_path, hbmin=parameters.Hbmin(0), specific_consensuses_id=[0]) except poa.NoConsensusError: raise AffinityTreeBuildException("Cannot find root consensus.") compatibilities = poagraph.get_compatibilities(all_poagraph_sequences_ids, consensus_paths[0].path, p=p) affinity_node = tree.AffinityNode(id_=tree.AffinityNodeID(0), sequences=[*poagraph.sequences.keys()], mincomp=_get_min_comp(all_poagraph_sequences_ids, compatibilities), compatibilities=compatibilities, consensus=consensus_paths[0].path) detailed_logger.info(f"New affinity node created: {str(affinity_node)}") return affinity_node
def build_poa_affinity_tree(p: graph.Poagraph, blosum: Optional[parameters.Blosum], output_dir: Path, hbmin: parameters.Hbmin, verbose: bool) -> tree.AffinityTree: """Builds Affinity Tree coherent with poa software. This method builds a simple version of Affinity Tree as it uses a single call to poa software. Poa provides division of sequences in Poagraph into consistent groups with a consensus path assigned to each group. These groups are converted in this method to Affinity Tree nodes and connected with a dummy root node so the result is coherent with pangtree definition of Affinity Tree. Args: p: Poagraph containing sequences to be divided into groups (Affinity Tree nodes). optional blosum: BLOSUM matrix. If not provided, default Blosum80.mat is used. output_dir: Path to a directory that can be used by poa software. hbmin: Parameter required by poa software. The minimum value of sequence compatibility to generated consensus. verbose: Switch to control logging intensity. Raises: AffinityTreeGenerationException: if consensuses cannot be found. """ def _convert_consensus_paths_to_affinity_tree_nodes(): at_nodes = [] assigned_sequences = [] for c_id, c_info in consensus_paths.items(): assigned_sequences += c_info.assigned_sequences_ids all_seq = p.get_sequences_ids() compatibilities = p.get_compatibilities(all_seq, c_info.path) if len(c_info.assigned_sequences_ids): assigned_seq_comp = [c for seq_id, c in compatibilities.items() if seq_id in c_info.assigned_sequences_ids] mincomp = min(assigned_seq_comp) else: mincomp = 0 new_node = tree.AffinityNode(id_=tree.AffinityNodeID(c_id + 1), parent=tree.AffinityNodeID(0), sequences=c_info.assigned_sequences_ids, mincomp=mincomp, compatibilities=compatibilities, consensus=c_info.path, children=[]) at_nodes.append(new_node) node_for_unassigned_sequences = tree.AffinityNode(parent=tree.AffinityNodeID(0), sequences=[seq_id for seq_id in p.get_sequences_ids() if seq_id not in assigned_sequences], id_=tree.AffinityNodeID(len(at_nodes) + 1), mincomp=graph.Compatibility(0), children=[]) at_nodes.append(node_for_unassigned_sequences) return at_nodes global_logger.info("POA defined affinity tree generation started.") if blosum is None: blosum = get_default_blosum() _raise_error_if_invalid_poagraph(p) try: consensus_paths = poa.get_consensuses(p, p.get_sequences_ids(), output_dir, "poa_tree", blosum.filepath, hbmin) except poa.NoConsensusError: raise AffinityTreeBuildException("No consensus in the Affinity Tree.") consensus_nodes = _convert_consensus_paths_to_affinity_tree_nodes() root_node = tree.AffinityNode(id_=tree.AffinityNodeID(0), children=[c_node.id_ for c_node in consensus_nodes]) affinity_tree = tree.AffinityTree([root_node] + consensus_nodes) global_logger.info("POA defined affinity tree generation finished.") return affinity_tree