def validate_fasta_file(file_content, session_dir, file_name): if file_content is None or file_name is None or session_dir is None: return None else: if ".zip" in file_name: file_content = tools.decode_zip_content(file_content) else: file_content = tools.decode_content(file_content) output_dir = Path(session_dir) fasta_path = tools.get_child_path(output_dir, file_name) if ".zip" in file_name: tools.save_to_file(file_content, fasta_path, 'wb') else: tools.save_to_file(file_content, fasta_path) error_message = pangtreebuild.fasta_file_is_valid(fasta_path) if len(error_message) == 0: return { "is_correct": True, "filename": file_name, "error": error_message } else: return { "is_correct": False, "filename": file_name, "error": error_message }
def get_elem_cache_info(pangenome_hash: int) -> Path: cache_file_name = str(abs(pangenome_hash)) + ".pickle" parent_output_dir = Path( os.path.abspath(os.path.join( os.path.dirname(__file__)))).joinpath("../cache/").resolve() poagraph_elements_cache_path = tools.get_child_path( parent_output_dir, cache_file_name) return poagraph_elements_cache_path
def run_pangtreebuild(output_dir: Path, datatype: DataType, multialignment: Union[Maf, Po], fasta_provider: Union[FromFile, FromNCBI, ConstBaseProvider], blosum: Blosum, consensus_choice: str, output_po: bool, output_fasta: bool, output_newick: bool, missing_symbol: MissingBase, metadata: Optional[MetadataCSV] = None, hbmin: Optional[Hbmin] = None, stop: Optional[Stop] = None, p: Optional[P] = None, fasta_path: Optional[Path] = None, include_nodes: Optional[bool] = None) -> PangenomeJSON: start = time.time() logprocess.add_file_handler_to_logger(output_dir, "details", "details.log", propagate=False) logprocess.add_file_handler_to_logger(output_dir, "", "details.log", propagate=False) logprocess.remove_console_handler_from_root_logger() poagraph, dagmaf = None, None if isinstance(multialignment, Maf): poagraph, dagmaf = builder.build_from_dagmaf(multialignment, fasta_provider, metadata) elif isinstance(multialignment, Po): poagraph = builder.build_from_po(multialignment, metadata) consensus_output_dir = tools.get_child_dir(output_dir, "consensus") consensus_tree = None if consensus_choice == 'poa': consensus_tree = build_poa_affinity_tree(poagraph, blosum, consensus_output_dir, hbmin, True) elif consensus_choice == 'tree': consensus_tree = build_affinity_tree(poagraph, blosum, consensus_output_dir, stop, p, True) if output_po: pangenome_po = poagraph_to_PangenomePO(poagraph) tools.save_to_file(pangenome_po, tools.get_child_path(output_dir, "poagraph.po")) if output_fasta: sequences_fasta = poagraph_to_fasta(poagraph) tools.save_to_file(sequences_fasta, tools.get_child_path(output_dir, "sequences.fasta")) if consensus_tree: consensuses_fasta = affinity_tree_to_fasta(poagraph, consensus_tree) tools.save_to_file( consensuses_fasta, tools.get_child_path(output_dir, "consensuses.fasta")) if output_newick: if metadata is not None: seq_id_to_metadata = { seq_id: seq.seqmetadata for seq_id, seq in poagraph.sequences.items() } else: seq_id_to_metadata = None affinity_tree_newick = consensus_tree.as_newick(seq_id_to_metadata, separate_leaves=True) tools.save_to_file( affinity_tree_newick, tools.get_child_path(output_dir, "affinity_tree.newick")) end = time.time() task_parameters = TaskParameters( running_time=f"{end - start}s", multialignment_file_path=multialignment.filename, multialignment_format=str(type(multialignment).__name__), datatype=datatype.name, metadata_file_path=metadata.filename if metadata else None, blosum_file_path=blosum.filepath.name, output_path=None, output_po=output_po, output_fasta=output_fasta, output_with_nodes=include_nodes, verbose=True, raw_maf=False, fasta_provider=str(type(fasta_provider).__name__), missing_base_symbol=missing_symbol.value, fasta_source_file=fasta_path, consensus_type=consensus_choice, hbmin=hbmin.value if hbmin else None, stop=stop.value if stop else None, p=p.value if p else None) pangenomejson = to_PangenomeJSON(task_parameters=task_parameters, poagraph=poagraph, dagmaf=dagmaf, affinity_tree=consensus_tree) pangenome_json_str = to_json(pangenomejson) tools.save_to_file(pangenome_json_str, tools.get_child_path(output_dir, "pangenome.json")) return pangenomejson
def get_default_blosum_path(): parent_dir = Path(os.path.dirname(os.path.abspath(__file__)) + '/') return tools.get_child_path(parent_dir, "../dependencies/blosum80.mat")
def run_poapangenome(output_dir: Path, datatype: DataType, multialignment: Union[Maf, Po], fasta_provider: Union[FromFile, FromNCBI, ConstSymbolProvider], blosum: Blosum, consensus_choice: str, output_po: bool, output_fasta: bool, missing_symbol: MissingSymbol, metadata: Optional[MetadataCSV] = None, hbmin: Optional[Hbmin] = None, stop: Optional[Stop] = None, p: Optional[P] = None, fasta_path: Optional[Path] = None) -> PangenomeJSON: start = time.time() logprocess.add_file_handler_to_logger(output_dir, "details", "details.log", propagate=False) logprocess.add_file_handler_to_logger(output_dir, "", "details.log", propagate=False) logprocess.remove_console_handler_from_root_logger() poagraph, dagmaf = None, None if isinstance(multialignment, Maf): poagraph, dagmaf = Poagraph.build_from_dagmaf(multialignment, fasta_provider, metadata) elif isinstance(multialignment, Po): poagraph = Poagraph.build_from_po(multialignment, metadata) consensus_output_dir = tools.get_child_dir(output_dir, "consensus") consensus_tree = None if consensus_choice == 'poa': consensus_tree = simple_tree_generator.get_simple_consensus_tree( poagraph, blosum, consensus_output_dir, hbmin, True) elif consensus_choice == 'tree': consensus_tree = tree_generator.get_consensus_tree( poagraph, blosum, consensus_output_dir, stop, p, MAX2(), NODE3(), True) if output_po: pangenome_po = poagraph_to_PangenomePO(poagraph) tools.save_to_file(pangenome_po, tools.get_child_path(output_dir, "poagraph.po")) if output_fasta: sequences_fasta = poagraph_to_fasta(poagraph) tools.save_to_file(sequences_fasta, tools.get_child_path(output_dir, "sequences.fasta")) if consensus_tree: consensuses_fasta = consensuses_tree_to_fasta( poagraph, consensus_tree) tools.save_to_file( consensuses_fasta, tools.get_child_path(output_dir, "consensuses.fasta")) end = time.time() task_parameters = TaskParameters( running_time=f"{end - start}s", multialignment_file_path=multialignment.filename, multialignment_format=str(type(multialignment).__name__), datatype=datatype.name, metadata_file_path=metadata.filename if metadata else None, blosum_file_path=blosum.filepath.name, output_path=None, output_po=output_po, output_fasta=output_fasta, output_with_nodes=True, verbose=True, raw_maf=False, fasta_provider=str(type(fasta_provider).__name__), missing_base_symbol=missing_symbol.value, fasta_source_file=fasta_path, consensus_type=consensus_choice, hbmin=hbmin.value if hbmin else None, max_cutoff_option="MAX2", search_range=None, node_cutoff_option="NODE3", multiplier=None, stop=stop.value if stop else None, p=p.value if p else None) pangenomejson = to_PangenomeJSON(task_parameters=task_parameters, poagraph=poagraph, dagmaf=dagmaf, consensuses_tree=consensus_tree) pangenome_json_str = to_json(pangenomejson) tools.save_to_file(pangenome_json_str, tools.get_child_path(output_dir, "pangenome.json")) return pangenomejson
def run_pangenome(run_processing_btn_click, session_state: Dict, session_dir: str, datatype: str, multialignment_content: str, multialignment_filename: str, fasta_provider_choice: str, fasta_content: str, fasta_filename: str, missing_symbol: str, consensus_choice: str, output_config: List[str], metadata_content: str, metadata_filename: str, hbmin_value: float, stop_value: float, p_value: float): if run_processing_btn_click == 0: raise PreventUpdate() if session_state is None: session_state = {} if session_dir is None: session_dir = tools.create_output_dir() else: session_dir = Path(session_dir) current_processing_output_dir_name = tools.get_child_path( session_dir, tools.get_current_time()) tools.create_dir(current_processing_output_dir_name) if multialignment_filename and "maf" in multialignment_filename: multialignment = Maf(StringIO( tools.decode_content(multialignment_content)), file_name=multialignment_filename) elif multialignment_filename and "po" in multialignment_filename: multialignment = Po(StringIO( tools.decode_content(multialignment_content)), file_name=multialignment_filename) else: session_state[ "error"] = "Cannot create Poagraph. Only MAF and PO files are supported." return session_state missing_symbol = MissingBase( missing_symbol) if missing_symbol != "" else MissingBase() fasta_path = None if fasta_provider_choice == "NCBI": fasta_provider = FromNCBI(use_cache=True) elif fasta_provider_choice == "File": fasta_path = tools.get_child_path(current_processing_output_dir_name, fasta_filename).resolve() save_mode = "wb" if "zip" in fasta_filename else "w" if "zip" in fasta_filename: fasta_decoded_content = tools.decode_zip_content(fasta_content) else: fasta_decoded_content = tools.decode_content(fasta_content) tools.save_to_file(fasta_decoded_content, fasta_path, save_mode) fasta_provider = FromFile(fasta_path) blosum_path = pangtreebuild.get_default_blosum_path() blosum_contents = tools.read_file_to_stream(blosum_path) blosum = Blosum(blosum_contents, blosum_path) metadata = MetadataCSV(StringIO(tools.decode_content(metadata_content)), metadata_filename) if metadata_content else None pangenomejson = pangtreebuild.run_pangtreebuild( output_dir=current_processing_output_dir_name, datatype=DataType[datatype], multialignment=multialignment, fasta_provider=fasta_provider, blosum=blosum, consensus_choice=consensus_choice, output_po=True if "po" in output_config else False, output_fasta=True if "fasta" in output_config else False, output_newick=True if "newick" in output_config else False, missing_symbol=missing_symbol, metadata=metadata, hbmin=Hbmin(hbmin_value) if hbmin_value else None, stop=Stop(stop_value) if stop_value else None, p=P(p_value) if p_value else None, fasta_path=fasta_filename if fasta_filename else None, include_nodes=True # if "nodes" in output_config else False ) pangenome_json_str = to_json(pangenomejson) current_processing_output_zip = tools.dir_to_zip( current_processing_output_dir_name) current_processing_short_name = "/".join( str(current_processing_output_zip).split("/")[-2:]) return { "last_output_zip": current_processing_short_name, "jsonpangenome": pangenome_json_str, "error": "" }, True