Ejemplo n.º 1
0
def get_consensus_tree(poagraph: Poagraph,
                       blosum: Blosum,
                       output_dir: Path,
                       stop: Stop,
                       p: P,
                       max_strategy: FindMaxCutoff,
                       node_strategy: FindNodeCutoff,
                       verbose: bool) -> ConsensusTree:
    global_logger.info("Consensuses Tree generation started.")
    if verbose:
        logprocess.add_file_handler_to_logger(output_dir, "tresholdsCSV", "tresholds.csv", "%(message)s", False)
    _raise_error_if_invalid_poagraph(poagraph)
    consensus_tree = _init_consensus_tree(poagraph, blosum.filepath, output_dir, p)

    nodes_to_process = deque([consensus_tree.get_node(ConsensusNodeID(0))])
    while nodes_to_process:
        node = nodes_to_process.pop()

        children_nodes = _get_children_nodes_looping(node,
                                            poagraph,
                                            output_dir,
                                            blosum.filepath,
                                            p,
                                            max_strategy,
                                            node_strategy,
                                            consensus_tree.get_max_node_id())
        if len(children_nodes) == 1:
            continue

        for child in children_nodes:
            child.compatibilities_to_all = poagraph.get_compatibilities(sequences_ids=[*poagraph.sequences.keys()],
                                                                        consensus_path=child.consensus_path,
                                                                        p=p) #zmiana 24.06
                                                                        # p=P(1))
            node.children_nodes_ids.append(child.consensus_id)
            consensus_tree.nodes.append(child)
            if not _node_is_ready(child, stop):
                nodes_to_process.append(child)
    global_logger.info("Consensuses Tree generation finished.\n")
    return consensus_tree
Ejemplo n.º 2
0
def run_pangtreebuild(output_dir: Path,
                      datatype: DataType,
                      multialignment: Union[Maf, Po],
                      fasta_provider: Union[FromFile, FromNCBI,
                                            ConstBaseProvider],
                      blosum: Blosum,
                      consensus_choice: str,
                      output_po: bool,
                      output_fasta: bool,
                      output_newick: bool,
                      missing_symbol: MissingBase,
                      metadata: Optional[MetadataCSV] = None,
                      hbmin: Optional[Hbmin] = None,
                      stop: Optional[Stop] = None,
                      p: Optional[P] = None,
                      fasta_path: Optional[Path] = None,
                      include_nodes: Optional[bool] = None) -> PangenomeJSON:
    start = time.time()
    logprocess.add_file_handler_to_logger(output_dir,
                                          "details",
                                          "details.log",
                                          propagate=False)
    logprocess.add_file_handler_to_logger(output_dir,
                                          "",
                                          "details.log",
                                          propagate=False)
    logprocess.remove_console_handler_from_root_logger()
    poagraph, dagmaf = None, None
    if isinstance(multialignment, Maf):
        poagraph, dagmaf = builder.build_from_dagmaf(multialignment,
                                                     fasta_provider, metadata)
    elif isinstance(multialignment, Po):
        poagraph = builder.build_from_po(multialignment, metadata)

    consensus_output_dir = tools.get_child_dir(output_dir, "consensus")
    consensus_tree = None
    if consensus_choice == 'poa':
        consensus_tree = build_poa_affinity_tree(poagraph, blosum,
                                                 consensus_output_dir, hbmin,
                                                 True)
    elif consensus_choice == 'tree':
        consensus_tree = build_affinity_tree(poagraph, blosum,
                                             consensus_output_dir, stop, p,
                                             True)

    if output_po:
        pangenome_po = poagraph_to_PangenomePO(poagraph)
        tools.save_to_file(pangenome_po,
                           tools.get_child_path(output_dir, "poagraph.po"))

    if output_fasta:
        sequences_fasta = poagraph_to_fasta(poagraph)
        tools.save_to_file(sequences_fasta,
                           tools.get_child_path(output_dir, "sequences.fasta"))
        if consensus_tree:
            consensuses_fasta = affinity_tree_to_fasta(poagraph,
                                                       consensus_tree)
            tools.save_to_file(
                consensuses_fasta,
                tools.get_child_path(output_dir, "consensuses.fasta"))

    if output_newick:
        if metadata is not None:
            seq_id_to_metadata = {
                seq_id: seq.seqmetadata
                for seq_id, seq in poagraph.sequences.items()
            }
        else:
            seq_id_to_metadata = None

        affinity_tree_newick = consensus_tree.as_newick(seq_id_to_metadata,
                                                        separate_leaves=True)

        tools.save_to_file(
            affinity_tree_newick,
            tools.get_child_path(output_dir, "affinity_tree.newick"))

    end = time.time()

    task_parameters = TaskParameters(
        running_time=f"{end - start}s",
        multialignment_file_path=multialignment.filename,
        multialignment_format=str(type(multialignment).__name__),
        datatype=datatype.name,
        metadata_file_path=metadata.filename if metadata else None,
        blosum_file_path=blosum.filepath.name,
        output_path=None,
        output_po=output_po,
        output_fasta=output_fasta,
        output_with_nodes=include_nodes,
        verbose=True,
        raw_maf=False,
        fasta_provider=str(type(fasta_provider).__name__),
        missing_base_symbol=missing_symbol.value,
        fasta_source_file=fasta_path,
        consensus_type=consensus_choice,
        hbmin=hbmin.value if hbmin else None,
        stop=stop.value if stop else None,
        p=p.value if p else None)

    pangenomejson = to_PangenomeJSON(task_parameters=task_parameters,
                                     poagraph=poagraph,
                                     dagmaf=dagmaf,
                                     affinity_tree=consensus_tree)
    pangenome_json_str = to_json(pangenomejson)
    tools.save_to_file(pangenome_json_str,
                       tools.get_child_path(output_dir, "pangenome.json"))
    return pangenomejson
Ejemplo n.º 3
0
def run_poapangenome(output_dir: Path,
                     datatype: DataType,
                     multialignment: Union[Maf, Po],
                     fasta_provider: Union[FromFile, FromNCBI,
                                           ConstSymbolProvider],
                     blosum: Blosum,
                     consensus_choice: str,
                     output_po: bool,
                     output_fasta: bool,
                     missing_symbol: MissingSymbol,
                     metadata: Optional[MetadataCSV] = None,
                     hbmin: Optional[Hbmin] = None,
                     stop: Optional[Stop] = None,
                     p: Optional[P] = None,
                     fasta_path: Optional[Path] = None) -> PangenomeJSON:
    start = time.time()
    logprocess.add_file_handler_to_logger(output_dir,
                                          "details",
                                          "details.log",
                                          propagate=False)
    logprocess.add_file_handler_to_logger(output_dir,
                                          "",
                                          "details.log",
                                          propagate=False)
    logprocess.remove_console_handler_from_root_logger()
    poagraph, dagmaf = None, None
    if isinstance(multialignment, Maf):
        poagraph, dagmaf = Poagraph.build_from_dagmaf(multialignment,
                                                      fasta_provider, metadata)
    elif isinstance(multialignment, Po):
        poagraph = Poagraph.build_from_po(multialignment, metadata)

    consensus_output_dir = tools.get_child_dir(output_dir, "consensus")
    consensus_tree = None
    if consensus_choice == 'poa':
        consensus_tree = simple_tree_generator.get_simple_consensus_tree(
            poagraph, blosum, consensus_output_dir, hbmin, True)
    elif consensus_choice == 'tree':
        consensus_tree = tree_generator.get_consensus_tree(
            poagraph, blosum, consensus_output_dir, stop, p, MAX2(), NODE3(),
            True)

    if output_po:
        pangenome_po = poagraph_to_PangenomePO(poagraph)
        tools.save_to_file(pangenome_po,
                           tools.get_child_path(output_dir, "poagraph.po"))

    if output_fasta:
        sequences_fasta = poagraph_to_fasta(poagraph)
        tools.save_to_file(sequences_fasta,
                           tools.get_child_path(output_dir, "sequences.fasta"))
        if consensus_tree:
            consensuses_fasta = consensuses_tree_to_fasta(
                poagraph, consensus_tree)
            tools.save_to_file(
                consensuses_fasta,
                tools.get_child_path(output_dir, "consensuses.fasta"))

    end = time.time()

    task_parameters = TaskParameters(
        running_time=f"{end - start}s",
        multialignment_file_path=multialignment.filename,
        multialignment_format=str(type(multialignment).__name__),
        datatype=datatype.name,
        metadata_file_path=metadata.filename if metadata else None,
        blosum_file_path=blosum.filepath.name,
        output_path=None,
        output_po=output_po,
        output_fasta=output_fasta,
        output_with_nodes=True,
        verbose=True,
        raw_maf=False,
        fasta_provider=str(type(fasta_provider).__name__),
        missing_base_symbol=missing_symbol.value,
        fasta_source_file=fasta_path,
        consensus_type=consensus_choice,
        hbmin=hbmin.value if hbmin else None,
        max_cutoff_option="MAX2",
        search_range=None,
        node_cutoff_option="NODE3",
        multiplier=None,
        stop=stop.value if stop else None,
        p=p.value if p else None)

    pangenomejson = to_PangenomeJSON(task_parameters=task_parameters,
                                     poagraph=poagraph,
                                     dagmaf=dagmaf,
                                     consensuses_tree=consensus_tree)
    pangenome_json_str = to_json(pangenomejson)
    tools.save_to_file(pangenome_json_str,
                       tools.get_child_path(output_dir, "pangenome.json"))
    return pangenomejson
Ejemplo n.º 4
0
def build_affinity_tree(poagraph: graph.Poagraph,
                        blosum: Optional[parameters.Blosum],
                        output_dir: Path,
                        stop: parameters.Stop,
                        p: parameters.P,
                        verbose: bool) -> tree.AffinityTree:
    """Builds Affinity Tree.

    Affinity Tree is defined in paper 'Getting insight into the
    pan-genome structure with Pangtree'. This method builds
    an Affinity Tree by iterative calls to poa software.
    Full algorithm and idea are described in the above-mentioned paper.

    Args:
        poagraph: Poagraph containing _sequences to be divided into groups
            (Affinity Tree nodes).
        optional blosum: BLOSUM matrix. If not provided, default Blosum80.mat is used.
        output_dir: Path to a directory that can be used by poa software.
        stop: Value of mincomp above which an affinity tree node is no more
            split.
        p: Value changing the linear meaning of compatibility when searching
            for cutoff.
        verbose: Switch to control logging intensity.

    Raises:
        AffinityTreeGenerationException: if consensuses cannot be found.

    Returns:
        Affinity Tree generated with Pangtree algorithm.
    """

    global_logger.info("Affinity Tree generation started.")
    if blosum is None:
        blosum = get_default_blosum()
    if verbose:
        logprocess.add_file_handler_to_logger(output_dir,
                                              "tresholdsCSV",
                                              "tresholds.csv",
                                              "%(message)s", False)
    _raise_error_if_invalid_poagraph(poagraph)

    root_node = _get_root_node(poagraph, blosum.filepath, output_dir, p)
    affinity_tree = tree.AffinityTree([root_node])

    nodes_to_process = deque([affinity_tree.get_node(tree.AffinityNodeID(0))])
    while nodes_to_process:
        node = nodes_to_process.pop()

        children_nodes = _get_children_nodes_looping(node,
                                                     poagraph,
                                                     output_dir,
                                                     blosum.filepath,
                                                     p,
                                                     affinity_tree.get_max_node_id())
        if len(children_nodes) == 1:
            continue

        for child in children_nodes:
            all_sequences = [*poagraph.sequences.keys()]
            child.compatibilities = poagraph.get_compatibilities(sequences_ids=all_sequences,
                                                                 consensus_path=child.consensus,
                                                                 p=p)
            node.children.append(child.id_)
            affinity_tree.nodes.append(child)
            if not _node_is_ready(child, stop):
                nodes_to_process.append(child)
    global_logger.info("Affinity Tree generation finished.\n")
    return affinity_tree
Ejemplo n.º 5
0
def main():
    parser = cli.get_parser()
    args = parser.parse_args()
    start = datetime.datetime.now()
    if not args.quiet and args.verbose:
        logprocess.add_file_handler_to_logger(args.output_dir,
                                              "details",
                                              "details.log",
                                              propagate=False)
        logprocess.add_file_handler_to_logger(args.output_dir,
                                              "",
                                              "details.log",
                                              propagate=False)
    if args.quiet:
        logprocess.disable_all_loggers()

    poagraph, dagmaf, fasta_provider = None, None, None
    if isinstance(args.multialignment, Maf) and args.raw_maf:
        poagraph = Poagraph.build_from_maf(args.multialignment, args.metadata)
    elif isinstance(args.multialignment, Maf) and not args.raw_maf:
        fasta_provider = cli.resolve_fasta_provider(args)
        poagraph, dagmaf = Poagraph.build_from_dagmaf(args.multialignment,
                                                      fasta_provider,
                                                      args.metadata)
    elif isinstance(args.multialignment, Po):
        poagraph = Poagraph.build_from_po(args.multialignment, args.metadata)

    consensus_tree = None
    if args.consensus is not None:
        blosum = args.blosum if args.blosum else cli.get_default_blosum()
        if fasta_provider is not None and isinstance(fasta_provider,
                                                     ConstSymbolProvider):
            blosum.check_if_symbol_is_present(
                fasta_provider.missing_symbol.as_str())

        consensus_output_dir = pathtools.get_child_dir(args.output_dir,
                                                       "consensus")

        if args.consensus == 'poa':
            consensus_tree = simple_tree_generator.get_simple_consensus_tree(
                poagraph, blosum, consensus_output_dir, args.hbmin,
                args.verbose)
        elif args.consensus == 'tree':
            max_strategy = cli.resolve_max_strategy(args)
            node_strategy = cli.resolve_node_strategy(args)
            consensus_tree = tree_generator.get_consensus_tree(
                poagraph, blosum, consensus_output_dir, args.stop, args.p,
                max_strategy, node_strategy, args.verbose)
        try:
            seq_id_to_name = {
                seq_id: seq.seqmetadata["name"]
                for seq_id, seq in poagraph.sequences.items()
            }
        except:
            seq_id_to_name = None

        newick_consensus_tree = consensus_tree.as_newick(seq_id_to_name)

        pathtools.save_to_file(
            newick_consensus_tree,
            pathtools.get_child_path(args.output_dir, "consensus_tree.newick"))

    if args.output_po:
        pangenome_po = poagraph_to_PangenomePO(poagraph)
        pathtools.save_to_file(
            pangenome_po,
            pathtools.get_child_path(args.output_dir, "poagraph.po"))

    if args.output_fasta:
        sequences_fasta = poagraph_to_fasta(poagraph)
        pathtools.save_to_file(
            sequences_fasta,
            pathtools.get_child_path(args.output_dir, "sequences.fasta"))
        if consensus_tree:
            consensuses_fasta = consensuses_tree_to_fasta(
                poagraph, consensus_tree)
            pathtools.save_to_file(
                consensuses_fasta,
                pathtools.get_child_path(args.output_dir, "consensuses.fasta"))

    end = datetime.datetime.now()
    pangenomejson = to_PangenomeJSON(task_parameters=cli.get_task_parameters(
        args, running_time=f"{end-start}s"),
                                     poagraph=poagraph,
                                     dagmaf=dagmaf,
                                     consensuses_tree=consensus_tree)

    pangenome_json_str = to_json(pangenomejson)
    pathtools.save_to_file(
        pangenome_json_str,
        pathtools.get_child_path(args.output_dir, "pangenome.json"))
Ejemplo n.º 6
0
def main():
    parser = cli.get_parser()
    args = parser.parse_args()
    start = datetime.datetime.now()
    if not args.quiet and args.verbose:
        logprocess.add_file_handler_to_logger(args.output_dir,
                                              "details",
                                              "details.log",
                                              propagate=False)
        logprocess.add_file_handler_to_logger(args.output_dir,
                                              "",
                                              "details.log",
                                              propagate=False)
    if args.quiet:
        logprocess.disable_all_loggers()

    poagraph, dagmaf, fasta_provider = None, None, None
    if isinstance(args.multialignment, msa.Maf) and args.raw_maf:
        poagraph = builder.build_from_maf(args.multialignment, args.metadata)
    elif isinstance(args.multialignment, msa.Maf) and not args.raw_maf:
        fasta_provider = cli.resolve_fasta_provider(args)
        poagraph, dagmaf = builder.build_from_dagmaf(args.multialignment,
                                                     fasta_provider,
                                                     args.metadata)
    elif isinstance(args.multialignment, msa.Po):
        poagraph = builder.build_from_po(args.multialignment, args.metadata)

    affinity_tree = None
    if args.affinity is not None:
        blosum = args.blosum if args.blosum else cli.get_default_blosum()
        if fasta_provider is not None and isinstance(
                fasta_provider, missings.ConstBaseProvider):
            blosum.check_if_symbol_is_present(
                fasta_provider.missing_base.as_str())

        consensus_output_dir = pathtools.get_child_dir(args.output_dir,
                                                       "affinitytree")

        if args.affinity == 'poa':
            affinity_tree = at_builders.build_poa_affinity_tree(
                poagraph, blosum, consensus_output_dir, args.hbmin,
                args.verbose)
        elif args.affinity == 'tree':
            affinity_tree = at_builders.build_affinity_tree(
                poagraph, blosum, consensus_output_dir, args.stop, args.p,
                args.verbose)
        if args.metadata is not None:
            seq_id_to_metadata = {
                seq_id: seq.seqmetadata
                for seq_id, seq in poagraph.sequences.items()
            }
        else:
            seq_id_to_metadata = None

        affinity_tree_newick = affinity_tree.as_newick(seq_id_to_metadata,
                                                       separate_leaves=True)

        pathtools.save_to_file(
            affinity_tree_newick,
            pathtools.get_child_path(consensus_output_dir,
                                     "affinity_tree.newick"))

    if args.output_po:
        pangenome_po = po.poagraph_to_PangenomePO(poagraph)
        pathtools.save_to_file(
            pangenome_po,
            pathtools.get_child_path(args.output_dir, "poagraph.po"))

    if args.output_fasta:
        sequences_fasta = fasta.poagraph_to_fasta(poagraph)
        pathtools.save_to_file(
            sequences_fasta,
            pathtools.get_child_path(args.output_dir, "_sequences.fasta"))
        if affinity_tree:
            consensuses_fasta = fasta.affinity_tree_to_fasta(
                poagraph, affinity_tree)
            pathtools.save_to_file(
                consensuses_fasta,
                pathtools.get_child_path(args.output_dir,
                                         "affinitytree.fasta"))

    end = datetime.datetime.now()
    pangenomejson = json.to_PangenomeJSON(
        task_parameters=cli.get_task_parameters(args,
                                                running_time=f"{end-start}s"),
        poagraph=poagraph,
        dagmaf=dagmaf,
        affinity_tree=affinity_tree)

    pangenome_json_str = json.to_json(pangenomejson)
    pathtools.save_to_file(
        pangenome_json_str,
        pathtools.get_child_path(args.output_dir, "pangenome.json"))