def validate_fasta_file(file_content, session_dir, file_name): if file_content is None or file_name is None or session_dir is None: return None else: if ".zip" in file_name: file_content = tools.decode_zip_content(file_content) else: file_content = tools.decode_content(file_content) output_dir = Path(session_dir) fasta_path = tools.get_child_path(output_dir, file_name) if ".zip" in file_name: tools.save_to_file(file_content, fasta_path, 'wb') else: tools.save_to_file(file_content, fasta_path) error_message = pangtreebuild.fasta_file_is_valid(fasta_path) if len(error_message) == 0: return { "is_correct": True, "filename": file_name, "error": error_message } else: return { "is_correct": False, "filename": file_name, "error": error_message }
def validate_blosum_file(file_content, missing_symbol, fasta_provider_choice, file_name): if file_content is None or file_name is None: return None if fasta_provider_choice == "Symbol" and missing_symbol != "": symbol = missing_symbol else: symbol = None if file_content is None: blosum_file_content = tools.read_file_to_stream(pangtreebuild.get_default_blosum_path()) file_source_info = "default BLOSUM file" else: blosum_file_content = StringIO(tools.decode_content(file_content)) file_source_info = f"provided BLOSUM file: {file_name}" error_message = pangtreebuild.blosum_file_is_valid(blosum_file_content, symbol) if len(error_message) == 0: symbol_info = f"It contains symbol for missing nucleotides/proteins: {symbol}." if symbol else "" validation_message = f"The {file_source_info} is correct. " + symbol_info return {"is_correct": True, "filename": file_name, "symbol": symbol, "validation_message": validation_message} else: validation_message = f"Error in {file_source_info} or symbol for missing nucleotides/proteins: {symbol}. " \ f"Reason: {error_message}" return {"is_correct": False, "filename": file_name, "symbol": symbol, "validation_message": validation_message}
def read_pangenome_upload(upload_content) -> PangenomeJSON: if not upload_content: raise PreventUpdate() if upload_content.startswith("data:application/json;base64"): jsonified_pangenome = tools.decode_content(upload_content) else: jsonified_pangenome = upload_content return tools.unjsonify_jsonpangenome(jsonified_pangenome)
def validate_metadata_file(file_content, file_name): if file_content is None or file_name is None: return None else: file_content = tools.decode_content(file_content) error_message = pangtreebuild.multialignment_file_is_valid(file_content, file_name) if len(error_message) == 0: return {"is_correct": True, "filename": file_name, "error": error_message} else: return {"is_correct": False, "filename": file_name, "error": error_message}
def validate_metadata_file(file_content, file_name): if file_content is None or file_name is None: return None file_content = tools.decode_content(file_content) error_message = pangtreebuild.metadata_file_is_valid( file_content, file_name) is_file_correct = True if len(error_message) == 0 else False return { "is_correct": is_file_correct, "filename": file_name, "error": error_message }
def run_pangenome(run_processing_btn_click, session_state: Dict, session_dir: str, datatype: str, multialignment_content: str, multialignment_filename: str, fasta_provider_choice: str, fasta_content: str, fasta_filename: str, missing_symbol: str, consensus_choice: str, output_config: List[str], metadata_content: str, metadata_filename: str, hbmin_value: float, stop_value: float, p_value: float): if run_processing_btn_click == 0: raise PreventUpdate() if session_state is None: session_state = {} if session_dir is None: session_dir = tools.create_output_dir() else: session_dir = Path(session_dir) current_processing_output_dir_name = tools.get_child_path( session_dir, tools.get_current_time()) tools.create_dir(current_processing_output_dir_name) if multialignment_filename and "maf" in multialignment_filename: multialignment = Maf(StringIO( tools.decode_content(multialignment_content)), file_name=multialignment_filename) elif multialignment_filename and "po" in multialignment_filename: multialignment = Po(StringIO( tools.decode_content(multialignment_content)), file_name=multialignment_filename) else: session_state[ "error"] = "Cannot create Poagraph. Only MAF and PO files are supported." return session_state missing_symbol = MissingBase( missing_symbol) if missing_symbol != "" else MissingBase() fasta_path = None if fasta_provider_choice == "NCBI": fasta_provider = FromNCBI(use_cache=True) elif fasta_provider_choice == "File": fasta_path = tools.get_child_path(current_processing_output_dir_name, fasta_filename).resolve() save_mode = "wb" if "zip" in fasta_filename else "w" if "zip" in fasta_filename: fasta_decoded_content = tools.decode_zip_content(fasta_content) else: fasta_decoded_content = tools.decode_content(fasta_content) tools.save_to_file(fasta_decoded_content, fasta_path, save_mode) fasta_provider = FromFile(fasta_path) blosum_path = pangtreebuild.get_default_blosum_path() blosum_contents = tools.read_file_to_stream(blosum_path) blosum = Blosum(blosum_contents, blosum_path) metadata = MetadataCSV(StringIO(tools.decode_content(metadata_content)), metadata_filename) if metadata_content else None pangenomejson = pangtreebuild.run_pangtreebuild( output_dir=current_processing_output_dir_name, datatype=DataType[datatype], multialignment=multialignment, fasta_provider=fasta_provider, blosum=blosum, consensus_choice=consensus_choice, output_po=True if "po" in output_config else False, output_fasta=True if "fasta" in output_config else False, output_newick=True if "newick" in output_config else False, missing_symbol=missing_symbol, metadata=metadata, hbmin=Hbmin(hbmin_value) if hbmin_value else None, stop=Stop(stop_value) if stop_value else None, p=P(p_value) if p_value else None, fasta_path=fasta_filename if fasta_filename else None, include_nodes=True # if "nodes" in output_config else False ) pangenome_json_str = to_json(pangenomejson) current_processing_output_zip = tools.dir_to_zip( current_processing_output_dir_name) current_processing_short_name = "/".join( str(current_processing_output_zip).split("/")[-2:]) return { "last_output_zip": current_processing_short_name, "jsonpangenome": pangenome_json_str, "error": "" }, True