def get_consensus_tree(poagraph: Poagraph, blosum: Blosum, output_dir: Path, stop: Stop, p: P, max_strategy: FindMaxCutoff, node_strategy: FindNodeCutoff, verbose: bool) -> ConsensusTree: global_logger.info("Consensuses Tree generation started.") if verbose: logprocess.add_file_handler_to_logger(output_dir, "tresholdsCSV", "tresholds.csv", "%(message)s", False) _raise_error_if_invalid_poagraph(poagraph) consensus_tree = _init_consensus_tree(poagraph, blosum.filepath, output_dir, p) nodes_to_process = deque([consensus_tree.get_node(ConsensusNodeID(0))]) while nodes_to_process: node = nodes_to_process.pop() children_nodes = _get_children_nodes_looping(node, poagraph, output_dir, blosum.filepath, p, max_strategy, node_strategy, consensus_tree.get_max_node_id()) if len(children_nodes) == 1: continue for child in children_nodes: child.compatibilities_to_all = poagraph.get_compatibilities(sequences_ids=[*poagraph.sequences.keys()], consensus_path=child.consensus_path, p=p) #zmiana 24.06 # p=P(1)) node.children_nodes_ids.append(child.consensus_id) consensus_tree.nodes.append(child) if not _node_is_ready(child, stop): nodes_to_process.append(child) global_logger.info("Consensuses Tree generation finished.\n") return consensus_tree
def run_pangtreebuild(output_dir: Path, datatype: DataType, multialignment: Union[Maf, Po], fasta_provider: Union[FromFile, FromNCBI, ConstBaseProvider], blosum: Blosum, consensus_choice: str, output_po: bool, output_fasta: bool, output_newick: bool, missing_symbol: MissingBase, metadata: Optional[MetadataCSV] = None, hbmin: Optional[Hbmin] = None, stop: Optional[Stop] = None, p: Optional[P] = None, fasta_path: Optional[Path] = None, include_nodes: Optional[bool] = None) -> PangenomeJSON: start = time.time() logprocess.add_file_handler_to_logger(output_dir, "details", "details.log", propagate=False) logprocess.add_file_handler_to_logger(output_dir, "", "details.log", propagate=False) logprocess.remove_console_handler_from_root_logger() poagraph, dagmaf = None, None if isinstance(multialignment, Maf): poagraph, dagmaf = builder.build_from_dagmaf(multialignment, fasta_provider, metadata) elif isinstance(multialignment, Po): poagraph = builder.build_from_po(multialignment, metadata) consensus_output_dir = tools.get_child_dir(output_dir, "consensus") consensus_tree = None if consensus_choice == 'poa': consensus_tree = build_poa_affinity_tree(poagraph, blosum, consensus_output_dir, hbmin, True) elif consensus_choice == 'tree': consensus_tree = build_affinity_tree(poagraph, blosum, consensus_output_dir, stop, p, True) if output_po: pangenome_po = poagraph_to_PangenomePO(poagraph) tools.save_to_file(pangenome_po, tools.get_child_path(output_dir, "poagraph.po")) if output_fasta: sequences_fasta = poagraph_to_fasta(poagraph) tools.save_to_file(sequences_fasta, tools.get_child_path(output_dir, "sequences.fasta")) if consensus_tree: consensuses_fasta = affinity_tree_to_fasta(poagraph, consensus_tree) tools.save_to_file( consensuses_fasta, tools.get_child_path(output_dir, "consensuses.fasta")) if output_newick: if metadata is not None: seq_id_to_metadata = { seq_id: seq.seqmetadata for seq_id, seq in poagraph.sequences.items() } else: seq_id_to_metadata = None affinity_tree_newick = consensus_tree.as_newick(seq_id_to_metadata, separate_leaves=True) tools.save_to_file( affinity_tree_newick, tools.get_child_path(output_dir, "affinity_tree.newick")) end = time.time() task_parameters = TaskParameters( running_time=f"{end - start}s", multialignment_file_path=multialignment.filename, multialignment_format=str(type(multialignment).__name__), datatype=datatype.name, metadata_file_path=metadata.filename if metadata else None, blosum_file_path=blosum.filepath.name, output_path=None, output_po=output_po, output_fasta=output_fasta, output_with_nodes=include_nodes, verbose=True, raw_maf=False, fasta_provider=str(type(fasta_provider).__name__), missing_base_symbol=missing_symbol.value, fasta_source_file=fasta_path, consensus_type=consensus_choice, hbmin=hbmin.value if hbmin else None, stop=stop.value if stop else None, p=p.value if p else None) pangenomejson = to_PangenomeJSON(task_parameters=task_parameters, poagraph=poagraph, dagmaf=dagmaf, affinity_tree=consensus_tree) pangenome_json_str = to_json(pangenomejson) tools.save_to_file(pangenome_json_str, tools.get_child_path(output_dir, "pangenome.json")) return pangenomejson
def run_poapangenome(output_dir: Path, datatype: DataType, multialignment: Union[Maf, Po], fasta_provider: Union[FromFile, FromNCBI, ConstSymbolProvider], blosum: Blosum, consensus_choice: str, output_po: bool, output_fasta: bool, missing_symbol: MissingSymbol, metadata: Optional[MetadataCSV] = None, hbmin: Optional[Hbmin] = None, stop: Optional[Stop] = None, p: Optional[P] = None, fasta_path: Optional[Path] = None) -> PangenomeJSON: start = time.time() logprocess.add_file_handler_to_logger(output_dir, "details", "details.log", propagate=False) logprocess.add_file_handler_to_logger(output_dir, "", "details.log", propagate=False) logprocess.remove_console_handler_from_root_logger() poagraph, dagmaf = None, None if isinstance(multialignment, Maf): poagraph, dagmaf = Poagraph.build_from_dagmaf(multialignment, fasta_provider, metadata) elif isinstance(multialignment, Po): poagraph = Poagraph.build_from_po(multialignment, metadata) consensus_output_dir = tools.get_child_dir(output_dir, "consensus") consensus_tree = None if consensus_choice == 'poa': consensus_tree = simple_tree_generator.get_simple_consensus_tree( poagraph, blosum, consensus_output_dir, hbmin, True) elif consensus_choice == 'tree': consensus_tree = tree_generator.get_consensus_tree( poagraph, blosum, consensus_output_dir, stop, p, MAX2(), NODE3(), True) if output_po: pangenome_po = poagraph_to_PangenomePO(poagraph) tools.save_to_file(pangenome_po, tools.get_child_path(output_dir, "poagraph.po")) if output_fasta: sequences_fasta = poagraph_to_fasta(poagraph) tools.save_to_file(sequences_fasta, tools.get_child_path(output_dir, "sequences.fasta")) if consensus_tree: consensuses_fasta = consensuses_tree_to_fasta( poagraph, consensus_tree) tools.save_to_file( consensuses_fasta, tools.get_child_path(output_dir, "consensuses.fasta")) end = time.time() task_parameters = TaskParameters( running_time=f"{end - start}s", multialignment_file_path=multialignment.filename, multialignment_format=str(type(multialignment).__name__), datatype=datatype.name, metadata_file_path=metadata.filename if metadata else None, blosum_file_path=blosum.filepath.name, output_path=None, output_po=output_po, output_fasta=output_fasta, output_with_nodes=True, verbose=True, raw_maf=False, fasta_provider=str(type(fasta_provider).__name__), missing_base_symbol=missing_symbol.value, fasta_source_file=fasta_path, consensus_type=consensus_choice, hbmin=hbmin.value if hbmin else None, max_cutoff_option="MAX2", search_range=None, node_cutoff_option="NODE3", multiplier=None, stop=stop.value if stop else None, p=p.value if p else None) pangenomejson = to_PangenomeJSON(task_parameters=task_parameters, poagraph=poagraph, dagmaf=dagmaf, consensuses_tree=consensus_tree) pangenome_json_str = to_json(pangenomejson) tools.save_to_file(pangenome_json_str, tools.get_child_path(output_dir, "pangenome.json")) return pangenomejson
def build_affinity_tree(poagraph: graph.Poagraph, blosum: Optional[parameters.Blosum], output_dir: Path, stop: parameters.Stop, p: parameters.P, verbose: bool) -> tree.AffinityTree: """Builds Affinity Tree. Affinity Tree is defined in paper 'Getting insight into the pan-genome structure with Pangtree'. This method builds an Affinity Tree by iterative calls to poa software. Full algorithm and idea are described in the above-mentioned paper. Args: poagraph: Poagraph containing _sequences to be divided into groups (Affinity Tree nodes). optional blosum: BLOSUM matrix. If not provided, default Blosum80.mat is used. output_dir: Path to a directory that can be used by poa software. stop: Value of mincomp above which an affinity tree node is no more split. p: Value changing the linear meaning of compatibility when searching for cutoff. verbose: Switch to control logging intensity. Raises: AffinityTreeGenerationException: if consensuses cannot be found. Returns: Affinity Tree generated with Pangtree algorithm. """ global_logger.info("Affinity Tree generation started.") if blosum is None: blosum = get_default_blosum() if verbose: logprocess.add_file_handler_to_logger(output_dir, "tresholdsCSV", "tresholds.csv", "%(message)s", False) _raise_error_if_invalid_poagraph(poagraph) root_node = _get_root_node(poagraph, blosum.filepath, output_dir, p) affinity_tree = tree.AffinityTree([root_node]) nodes_to_process = deque([affinity_tree.get_node(tree.AffinityNodeID(0))]) while nodes_to_process: node = nodes_to_process.pop() children_nodes = _get_children_nodes_looping(node, poagraph, output_dir, blosum.filepath, p, affinity_tree.get_max_node_id()) if len(children_nodes) == 1: continue for child in children_nodes: all_sequences = [*poagraph.sequences.keys()] child.compatibilities = poagraph.get_compatibilities(sequences_ids=all_sequences, consensus_path=child.consensus, p=p) node.children.append(child.id_) affinity_tree.nodes.append(child) if not _node_is_ready(child, stop): nodes_to_process.append(child) global_logger.info("Affinity Tree generation finished.\n") return affinity_tree
def main(): parser = cli.get_parser() args = parser.parse_args() start = datetime.datetime.now() if not args.quiet and args.verbose: logprocess.add_file_handler_to_logger(args.output_dir, "details", "details.log", propagate=False) logprocess.add_file_handler_to_logger(args.output_dir, "", "details.log", propagate=False) if args.quiet: logprocess.disable_all_loggers() poagraph, dagmaf, fasta_provider = None, None, None if isinstance(args.multialignment, Maf) and args.raw_maf: poagraph = Poagraph.build_from_maf(args.multialignment, args.metadata) elif isinstance(args.multialignment, Maf) and not args.raw_maf: fasta_provider = cli.resolve_fasta_provider(args) poagraph, dagmaf = Poagraph.build_from_dagmaf(args.multialignment, fasta_provider, args.metadata) elif isinstance(args.multialignment, Po): poagraph = Poagraph.build_from_po(args.multialignment, args.metadata) consensus_tree = None if args.consensus is not None: blosum = args.blosum if args.blosum else cli.get_default_blosum() if fasta_provider is not None and isinstance(fasta_provider, ConstSymbolProvider): blosum.check_if_symbol_is_present( fasta_provider.missing_symbol.as_str()) consensus_output_dir = pathtools.get_child_dir(args.output_dir, "consensus") if args.consensus == 'poa': consensus_tree = simple_tree_generator.get_simple_consensus_tree( poagraph, blosum, consensus_output_dir, args.hbmin, args.verbose) elif args.consensus == 'tree': max_strategy = cli.resolve_max_strategy(args) node_strategy = cli.resolve_node_strategy(args) consensus_tree = tree_generator.get_consensus_tree( poagraph, blosum, consensus_output_dir, args.stop, args.p, max_strategy, node_strategy, args.verbose) try: seq_id_to_name = { seq_id: seq.seqmetadata["name"] for seq_id, seq in poagraph.sequences.items() } except: seq_id_to_name = None newick_consensus_tree = consensus_tree.as_newick(seq_id_to_name) pathtools.save_to_file( newick_consensus_tree, pathtools.get_child_path(args.output_dir, "consensus_tree.newick")) if args.output_po: pangenome_po = poagraph_to_PangenomePO(poagraph) pathtools.save_to_file( pangenome_po, pathtools.get_child_path(args.output_dir, "poagraph.po")) if args.output_fasta: sequences_fasta = poagraph_to_fasta(poagraph) pathtools.save_to_file( sequences_fasta, pathtools.get_child_path(args.output_dir, "sequences.fasta")) if consensus_tree: consensuses_fasta = consensuses_tree_to_fasta( poagraph, consensus_tree) pathtools.save_to_file( consensuses_fasta, pathtools.get_child_path(args.output_dir, "consensuses.fasta")) end = datetime.datetime.now() pangenomejson = to_PangenomeJSON(task_parameters=cli.get_task_parameters( args, running_time=f"{end-start}s"), poagraph=poagraph, dagmaf=dagmaf, consensuses_tree=consensus_tree) pangenome_json_str = to_json(pangenomejson) pathtools.save_to_file( pangenome_json_str, pathtools.get_child_path(args.output_dir, "pangenome.json"))
def main(): parser = cli.get_parser() args = parser.parse_args() start = datetime.datetime.now() if not args.quiet and args.verbose: logprocess.add_file_handler_to_logger(args.output_dir, "details", "details.log", propagate=False) logprocess.add_file_handler_to_logger(args.output_dir, "", "details.log", propagate=False) if args.quiet: logprocess.disable_all_loggers() poagraph, dagmaf, fasta_provider = None, None, None if isinstance(args.multialignment, msa.Maf) and args.raw_maf: poagraph = builder.build_from_maf(args.multialignment, args.metadata) elif isinstance(args.multialignment, msa.Maf) and not args.raw_maf: fasta_provider = cli.resolve_fasta_provider(args) poagraph, dagmaf = builder.build_from_dagmaf(args.multialignment, fasta_provider, args.metadata) elif isinstance(args.multialignment, msa.Po): poagraph = builder.build_from_po(args.multialignment, args.metadata) affinity_tree = None if args.affinity is not None: blosum = args.blosum if args.blosum else cli.get_default_blosum() if fasta_provider is not None and isinstance( fasta_provider, missings.ConstBaseProvider): blosum.check_if_symbol_is_present( fasta_provider.missing_base.as_str()) consensus_output_dir = pathtools.get_child_dir(args.output_dir, "affinitytree") if args.affinity == 'poa': affinity_tree = at_builders.build_poa_affinity_tree( poagraph, blosum, consensus_output_dir, args.hbmin, args.verbose) elif args.affinity == 'tree': affinity_tree = at_builders.build_affinity_tree( poagraph, blosum, consensus_output_dir, args.stop, args.p, args.verbose) if args.metadata is not None: seq_id_to_metadata = { seq_id: seq.seqmetadata for seq_id, seq in poagraph.sequences.items() } else: seq_id_to_metadata = None affinity_tree_newick = affinity_tree.as_newick(seq_id_to_metadata, separate_leaves=True) pathtools.save_to_file( affinity_tree_newick, pathtools.get_child_path(consensus_output_dir, "affinity_tree.newick")) if args.output_po: pangenome_po = po.poagraph_to_PangenomePO(poagraph) pathtools.save_to_file( pangenome_po, pathtools.get_child_path(args.output_dir, "poagraph.po")) if args.output_fasta: sequences_fasta = fasta.poagraph_to_fasta(poagraph) pathtools.save_to_file( sequences_fasta, pathtools.get_child_path(args.output_dir, "_sequences.fasta")) if affinity_tree: consensuses_fasta = fasta.affinity_tree_to_fasta( poagraph, affinity_tree) pathtools.save_to_file( consensuses_fasta, pathtools.get_child_path(args.output_dir, "affinitytree.fasta")) end = datetime.datetime.now() pangenomejson = json.to_PangenomeJSON( task_parameters=cli.get_task_parameters(args, running_time=f"{end-start}s"), poagraph=poagraph, dagmaf=dagmaf, affinity_tree=affinity_tree) pangenome_json_str = json.to_json(pangenomejson) pathtools.save_to_file( pangenome_json_str, pathtools.get_child_path(args.output_dir, "pangenome.json"))