Ejemplo n.º 1
0
def validate_fasta_file(file_content, session_dir, file_name):
    if file_content is None or file_name is None or session_dir is None:
        return None
    else:
        if ".zip" in file_name:
            file_content = tools.decode_zip_content(file_content)
        else:
            file_content = tools.decode_content(file_content)
        output_dir = Path(session_dir)
        fasta_path = tools.get_child_path(output_dir, file_name)
        if ".zip" in file_name:
            tools.save_to_file(file_content, fasta_path, 'wb')
        else:
            tools.save_to_file(file_content, fasta_path)
        error_message = pangtreebuild.fasta_file_is_valid(fasta_path)
        if len(error_message) == 0:
            return {
                "is_correct": True,
                "filename": file_name,
                "error": error_message
            }
        else:
            return {
                "is_correct": False,
                "filename": file_name,
                "error": error_message
            }
Ejemplo n.º 2
0
def validate_blosum_file(file_content, missing_symbol, fasta_provider_choice, file_name):
    if file_content is None or file_name is None:
        return None

    if fasta_provider_choice == "Symbol" and missing_symbol != "":
        symbol = missing_symbol
    else:
        symbol = None

    if file_content is None:
        blosum_file_content = tools.read_file_to_stream(pangtreebuild.get_default_blosum_path())
        file_source_info = "default BLOSUM file"
    else:
        blosum_file_content = StringIO(tools.decode_content(file_content))
        file_source_info = f"provided BLOSUM file: {file_name}"

    error_message = pangtreebuild.blosum_file_is_valid(blosum_file_content, symbol)
    if len(error_message) == 0:
        symbol_info = f"It contains symbol for missing nucleotides/proteins: {symbol}." if symbol else ""
        validation_message = f"The {file_source_info} is correct. " + symbol_info
        return {"is_correct": True,
                "filename": file_name,
                "symbol": symbol,
                "validation_message": validation_message}
    else:
        validation_message = f"Error in {file_source_info} or symbol for missing nucleotides/proteins: {symbol}. " \
            f"Reason: {error_message}"
        return {"is_correct": False,
                "filename": file_name,
                "symbol": symbol,
                "validation_message": validation_message}
Ejemplo n.º 3
0
def read_pangenome_upload(upload_content) -> PangenomeJSON:
    if not upload_content:
        raise PreventUpdate()
    if upload_content.startswith("data:application/json;base64"):
        jsonified_pangenome = tools.decode_content(upload_content)
    else:
        jsonified_pangenome = upload_content
    return tools.unjsonify_jsonpangenome(jsonified_pangenome)
Ejemplo n.º 4
0
def validate_metadata_file(file_content, file_name):
    if file_content is None or file_name is None:
        return None
    else:
        file_content = tools.decode_content(file_content)
        error_message = pangtreebuild.multialignment_file_is_valid(file_content, file_name)
        if len(error_message) == 0:
            return {"is_correct": True, "filename": file_name, "error": error_message}
        else:
            return {"is_correct": False, "filename": file_name, "error": error_message}
Ejemplo n.º 5
0
def validate_metadata_file(file_content, file_name):
    if file_content is None or file_name is None:
        return None
    file_content = tools.decode_content(file_content)
    error_message = pangtreebuild.metadata_file_is_valid(
        file_content, file_name)
    is_file_correct = True if len(error_message) == 0 else False
    return {
        "is_correct": is_file_correct,
        "filename": file_name,
        "error": error_message
    }
Ejemplo n.º 6
0
def run_pangenome(run_processing_btn_click, session_state: Dict,
                  session_dir: str, datatype: str, multialignment_content: str,
                  multialignment_filename: str, fasta_provider_choice: str,
                  fasta_content: str, fasta_filename: str, missing_symbol: str,
                  consensus_choice: str, output_config: List[str],
                  metadata_content: str, metadata_filename: str,
                  hbmin_value: float, stop_value: float, p_value: float):
    if run_processing_btn_click == 0:
        raise PreventUpdate()
    if session_state is None:
        session_state = {}
    if session_dir is None:
        session_dir = tools.create_output_dir()
    else:
        session_dir = Path(session_dir)

    current_processing_output_dir_name = tools.get_child_path(
        session_dir, tools.get_current_time())
    tools.create_dir(current_processing_output_dir_name)

    if multialignment_filename and "maf" in multialignment_filename:
        multialignment = Maf(StringIO(
            tools.decode_content(multialignment_content)),
                             file_name=multialignment_filename)
    elif multialignment_filename and "po" in multialignment_filename:
        multialignment = Po(StringIO(
            tools.decode_content(multialignment_content)),
                            file_name=multialignment_filename)
    else:
        session_state[
            "error"] = "Cannot create Poagraph. Only MAF and PO files are supported."
        return session_state

    missing_symbol = MissingBase(
        missing_symbol) if missing_symbol != "" else MissingBase()

    fasta_path = None
    if fasta_provider_choice == "NCBI":
        fasta_provider = FromNCBI(use_cache=True)
    elif fasta_provider_choice == "File":
        fasta_path = tools.get_child_path(current_processing_output_dir_name,
                                          fasta_filename).resolve()
        save_mode = "wb" if "zip" in fasta_filename else "w"
        if "zip" in fasta_filename:
            fasta_decoded_content = tools.decode_zip_content(fasta_content)
        else:
            fasta_decoded_content = tools.decode_content(fasta_content)
        tools.save_to_file(fasta_decoded_content, fasta_path, save_mode)
        fasta_provider = FromFile(fasta_path)
    blosum_path = pangtreebuild.get_default_blosum_path()
    blosum_contents = tools.read_file_to_stream(blosum_path)
    blosum = Blosum(blosum_contents, blosum_path)

    metadata = MetadataCSV(StringIO(tools.decode_content(metadata_content)),
                           metadata_filename) if metadata_content else None
    pangenomejson = pangtreebuild.run_pangtreebuild(
        output_dir=current_processing_output_dir_name,
        datatype=DataType[datatype],
        multialignment=multialignment,
        fasta_provider=fasta_provider,
        blosum=blosum,
        consensus_choice=consensus_choice,
        output_po=True if "po" in output_config else False,
        output_fasta=True if "fasta" in output_config else False,
        output_newick=True if "newick" in output_config else False,
        missing_symbol=missing_symbol,
        metadata=metadata,
        hbmin=Hbmin(hbmin_value) if hbmin_value else None,
        stop=Stop(stop_value) if stop_value else None,
        p=P(p_value) if p_value else None,
        fasta_path=fasta_filename if fasta_filename else None,
        include_nodes=True  # if "nodes" in output_config else False
    )
    pangenome_json_str = to_json(pangenomejson)

    current_processing_output_zip = tools.dir_to_zip(
        current_processing_output_dir_name)
    current_processing_short_name = "/".join(
        str(current_processing_output_zip).split("/")[-2:])
    return {
        "last_output_zip": current_processing_short_name,
        "jsonpangenome": pangenome_json_str,
        "error": ""
    }, True