예제 #1
0
def build_poa_affinity_tree(p: graph.Poagraph,
                            blosum: Optional[parameters.Blosum],
                            output_dir: Path,
                            hbmin: parameters.Hbmin,
                            verbose: bool) -> tree.AffinityTree:
    """Builds Affinity Tree coherent with poa software.

    This method builds a simple version of Affinity Tree
    as it uses a single call to poa software. Poa provides
    division of sequences in Poagraph into consistent groups
    with a consensus path assigned to each group. These groups
    are converted in this method to Affinity Tree nodes and
    connected with a dummy root node so the result is coherent
    with pangtree definition of Affinity Tree.

    Args:
        p: Poagraph containing sequences to be divided into
            groups (Affinity Tree nodes).
        optional blosum: BLOSUM matrix. If not provided, default Blosum80.mat is used.
        output_dir: Path to a directory that can be used by poa software.
        hbmin: Parameter required by poa software. The minimum value of
            sequence compatibility to generated consensus.
        verbose: Switch to control logging intensity.

    Raises:
        AffinityTreeGenerationException: if consensuses cannot be found.
    """
    def _convert_consensus_paths_to_affinity_tree_nodes():
        at_nodes = []
        assigned_sequences = []
        for c_id, c_info in consensus_paths.items():
            assigned_sequences += c_info.assigned_sequences_ids
            all_seq = p.get_sequences_ids()
            compatibilities = p.get_compatibilities(all_seq, c_info.path)
            if len(c_info.assigned_sequences_ids):
                assigned_seq_comp = [c
                                     for seq_id, c in compatibilities.items()
                                     if seq_id in c_info.assigned_sequences_ids]
                mincomp = min(assigned_seq_comp)
            else:
                mincomp = 0
            new_node = tree.AffinityNode(id_=tree.AffinityNodeID(c_id + 1),
                                         parent=tree.AffinityNodeID(0),
                                         sequences=c_info.assigned_sequences_ids,
                                         mincomp=mincomp,
                                         compatibilities=compatibilities,
                                         consensus=c_info.path,
                                         children=[])
            at_nodes.append(new_node)

        node_for_unassigned_sequences = tree.AffinityNode(parent=tree.AffinityNodeID(0),
                                                          sequences=[seq_id
                                                                     for seq_id in p.get_sequences_ids()
                                                                     if seq_id not in assigned_sequences],
                                                          id_=tree.AffinityNodeID(len(at_nodes) + 1),
                                                          mincomp=graph.Compatibility(0),
                                                          children=[])
        at_nodes.append(node_for_unassigned_sequences)
        return at_nodes

    global_logger.info("POA defined affinity tree generation started.")
    if blosum is None:
        blosum = get_default_blosum()
    _raise_error_if_invalid_poagraph(p)
    try:
        consensus_paths = poa.get_consensuses(p,
                                              p.get_sequences_ids(),
                                              output_dir,
                                              "poa_tree",
                                              blosum.filepath,
                                              hbmin)
    except poa.NoConsensusError:
        raise AffinityTreeBuildException("No consensus in the Affinity Tree.")

    consensus_nodes = _convert_consensus_paths_to_affinity_tree_nodes()
    root_node = tree.AffinityNode(id_=tree.AffinityNodeID(0),
                                  children=[c_node.id_
                                            for c_node in consensus_nodes])
    affinity_tree = tree.AffinityTree([root_node] + consensus_nodes)
    global_logger.info("POA defined affinity tree generation finished.")
    return affinity_tree
예제 #2
0
파일: __main__.py 프로젝트: pknut/pangtree
def main():
    parser = cli.get_parser()
    args = parser.parse_args()
    start = datetime.datetime.now()
    if not args.quiet and args.verbose:
        logprocess.add_file_handler_to_logger(args.output_dir,
                                              "details",
                                              "details.log",
                                              propagate=False)
        logprocess.add_file_handler_to_logger(args.output_dir,
                                              "",
                                              "details.log",
                                              propagate=False)
    if args.quiet:
        logprocess.disable_all_loggers()

    poagraph, dagmaf, fasta_provider = None, None, None
    if isinstance(args.multialignment, Maf) and args.raw_maf:
        poagraph = Poagraph.build_from_maf(args.multialignment, args.metadata)
    elif isinstance(args.multialignment, Maf) and not args.raw_maf:
        fasta_provider = cli.resolve_fasta_provider(args)
        poagraph, dagmaf = Poagraph.build_from_dagmaf(args.multialignment,
                                                      fasta_provider,
                                                      args.metadata)
    elif isinstance(args.multialignment, Po):
        poagraph = Poagraph.build_from_po(args.multialignment, args.metadata)

    consensus_tree = None
    if args.consensus is not None:
        blosum = args.blosum if args.blosum else cli.get_default_blosum()
        if fasta_provider is not None and isinstance(fasta_provider,
                                                     ConstSymbolProvider):
            blosum.check_if_symbol_is_present(
                fasta_provider.missing_symbol.as_str())

        consensus_output_dir = pathtools.get_child_dir(args.output_dir,
                                                       "consensus")

        if args.consensus == 'poa':
            consensus_tree = simple_tree_generator.get_simple_consensus_tree(
                poagraph, blosum, consensus_output_dir, args.hbmin,
                args.verbose)
        elif args.consensus == 'tree':
            max_strategy = cli.resolve_max_strategy(args)
            node_strategy = cli.resolve_node_strategy(args)
            consensus_tree = tree_generator.get_consensus_tree(
                poagraph, blosum, consensus_output_dir, args.stop, args.p,
                max_strategy, node_strategy, args.verbose)
        try:
            seq_id_to_name = {
                seq_id: seq.seqmetadata["name"]
                for seq_id, seq in poagraph.sequences.items()
            }
        except:
            seq_id_to_name = None

        newick_consensus_tree = consensus_tree.as_newick(seq_id_to_name)

        pathtools.save_to_file(
            newick_consensus_tree,
            pathtools.get_child_path(args.output_dir, "consensus_tree.newick"))

    if args.output_po:
        pangenome_po = poagraph_to_PangenomePO(poagraph)
        pathtools.save_to_file(
            pangenome_po,
            pathtools.get_child_path(args.output_dir, "poagraph.po"))

    if args.output_fasta:
        sequences_fasta = poagraph_to_fasta(poagraph)
        pathtools.save_to_file(
            sequences_fasta,
            pathtools.get_child_path(args.output_dir, "sequences.fasta"))
        if consensus_tree:
            consensuses_fasta = consensuses_tree_to_fasta(
                poagraph, consensus_tree)
            pathtools.save_to_file(
                consensuses_fasta,
                pathtools.get_child_path(args.output_dir, "consensuses.fasta"))

    end = datetime.datetime.now()
    pangenomejson = to_PangenomeJSON(task_parameters=cli.get_task_parameters(
        args, running_time=f"{end-start}s"),
                                     poagraph=poagraph,
                                     dagmaf=dagmaf,
                                     consensuses_tree=consensus_tree)

    pangenome_json_str = to_json(pangenomejson)
    pathtools.save_to_file(
        pangenome_json_str,
        pathtools.get_child_path(args.output_dir, "pangenome.json"))
예제 #3
0
def build_affinity_tree(poagraph: graph.Poagraph,
                        blosum: Optional[parameters.Blosum],
                        output_dir: Path,
                        stop: parameters.Stop,
                        p: parameters.P,
                        verbose: bool) -> tree.AffinityTree:
    """Builds Affinity Tree.

    Affinity Tree is defined in paper 'Getting insight into the
    pan-genome structure with Pangtree'. This method builds
    an Affinity Tree by iterative calls to poa software.
    Full algorithm and idea are described in the above-mentioned paper.

    Args:
        poagraph: Poagraph containing _sequences to be divided into groups
            (Affinity Tree nodes).
        optional blosum: BLOSUM matrix. If not provided, default Blosum80.mat is used.
        output_dir: Path to a directory that can be used by poa software.
        stop: Value of mincomp above which an affinity tree node is no more
            split.
        p: Value changing the linear meaning of compatibility when searching
            for cutoff.
        verbose: Switch to control logging intensity.

    Raises:
        AffinityTreeGenerationException: if consensuses cannot be found.

    Returns:
        Affinity Tree generated with Pangtree algorithm.
    """

    global_logger.info("Affinity Tree generation started.")
    if blosum is None:
        blosum = get_default_blosum()
    if verbose:
        logprocess.add_file_handler_to_logger(output_dir,
                                              "tresholdsCSV",
                                              "tresholds.csv",
                                              "%(message)s", False)
    _raise_error_if_invalid_poagraph(poagraph)

    root_node = _get_root_node(poagraph, blosum.filepath, output_dir, p)
    affinity_tree = tree.AffinityTree([root_node])

    nodes_to_process = deque([affinity_tree.get_node(tree.AffinityNodeID(0))])
    while nodes_to_process:
        node = nodes_to_process.pop()

        children_nodes = _get_children_nodes_looping(node,
                                                     poagraph,
                                                     output_dir,
                                                     blosum.filepath,
                                                     p,
                                                     affinity_tree.get_max_node_id())
        if len(children_nodes) == 1:
            continue

        for child in children_nodes:
            all_sequences = [*poagraph.sequences.keys()]
            child.compatibilities = poagraph.get_compatibilities(sequences_ids=all_sequences,
                                                                 consensus_path=child.consensus,
                                                                 p=p)
            node.children.append(child.id_)
            affinity_tree.nodes.append(child)
            if not _node_is_ready(child, stop):
                nodes_to_process.append(child)
    global_logger.info("Affinity Tree generation finished.\n")
    return affinity_tree
예제 #4
0
def main():
    parser = cli.get_parser()
    args = parser.parse_args()
    start = datetime.datetime.now()
    if not args.quiet and args.verbose:
        logprocess.add_file_handler_to_logger(args.output_dir,
                                              "details",
                                              "details.log",
                                              propagate=False)
        logprocess.add_file_handler_to_logger(args.output_dir,
                                              "",
                                              "details.log",
                                              propagate=False)
    if args.quiet:
        logprocess.disable_all_loggers()

    poagraph, dagmaf, fasta_provider = None, None, None
    if isinstance(args.multialignment, msa.Maf) and args.raw_maf:
        poagraph = builder.build_from_maf(args.multialignment, args.metadata)
    elif isinstance(args.multialignment, msa.Maf) and not args.raw_maf:
        fasta_provider = cli.resolve_fasta_provider(args)
        poagraph, dagmaf = builder.build_from_dagmaf(args.multialignment,
                                                     fasta_provider,
                                                     args.metadata)
    elif isinstance(args.multialignment, msa.Po):
        poagraph = builder.build_from_po(args.multialignment, args.metadata)

    affinity_tree = None
    if args.affinity is not None:
        blosum = args.blosum if args.blosum else cli.get_default_blosum()
        if fasta_provider is not None and isinstance(
                fasta_provider, missings.ConstBaseProvider):
            blosum.check_if_symbol_is_present(
                fasta_provider.missing_base.as_str())

        consensus_output_dir = pathtools.get_child_dir(args.output_dir,
                                                       "affinitytree")

        if args.affinity == 'poa':
            affinity_tree = at_builders.build_poa_affinity_tree(
                poagraph, blosum, consensus_output_dir, args.hbmin,
                args.verbose)
        elif args.affinity == 'tree':
            affinity_tree = at_builders.build_affinity_tree(
                poagraph, blosum, consensus_output_dir, args.stop, args.p,
                args.verbose)
        if args.metadata is not None:
            seq_id_to_metadata = {
                seq_id: seq.seqmetadata
                for seq_id, seq in poagraph.sequences.items()
            }
        else:
            seq_id_to_metadata = None

        affinity_tree_newick = affinity_tree.as_newick(seq_id_to_metadata,
                                                       separate_leaves=True)

        pathtools.save_to_file(
            affinity_tree_newick,
            pathtools.get_child_path(consensus_output_dir,
                                     "affinity_tree.newick"))

    if args.output_po:
        pangenome_po = po.poagraph_to_PangenomePO(poagraph)
        pathtools.save_to_file(
            pangenome_po,
            pathtools.get_child_path(args.output_dir, "poagraph.po"))

    if args.output_fasta:
        sequences_fasta = fasta.poagraph_to_fasta(poagraph)
        pathtools.save_to_file(
            sequences_fasta,
            pathtools.get_child_path(args.output_dir, "_sequences.fasta"))
        if affinity_tree:
            consensuses_fasta = fasta.affinity_tree_to_fasta(
                poagraph, affinity_tree)
            pathtools.save_to_file(
                consensuses_fasta,
                pathtools.get_child_path(args.output_dir,
                                         "affinitytree.fasta"))

    end = datetime.datetime.now()
    pangenomejson = json.to_PangenomeJSON(
        task_parameters=cli.get_task_parameters(args,
                                                running_time=f"{end-start}s"),
        poagraph=poagraph,
        dagmaf=dagmaf,
        affinity_tree=affinity_tree)

    pangenome_json_str = json.to_json(pangenomejson)
    pathtools.save_to_file(
        pangenome_json_str,
        pathtools.get_child_path(args.output_dir, "pangenome.json"))