def validate_fasta_file(file_content, session_dir, file_name):
    if file_content is None or file_name is None or session_dir is None:
        return None
    else:
        if ".zip" in file_name:
            file_content = tools.decode_zip_content(file_content)
        else:
            file_content = tools.decode_content(file_content)
        output_dir = Path(session_dir)
        fasta_path = tools.get_child_path(output_dir, file_name)
        if ".zip" in file_name:
            tools.save_to_file(file_content, fasta_path, 'wb')
        else:
            tools.save_to_file(file_content, fasta_path)
        error_message = pangtreebuild.fasta_file_is_valid(fasta_path)
        if len(error_message) == 0:
            return {
                "is_correct": True,
                "filename": file_name,
                "error": error_message
            }
        else:
            return {
                "is_correct": False,
                "filename": file_name,
                "error": error_message
            }
Exemple #2
0
def get_elem_cache_info(pangenome_hash: int) -> Path:
    cache_file_name = str(abs(pangenome_hash)) + ".pickle"
    parent_output_dir = Path(
        os.path.abspath(os.path.join(
            os.path.dirname(__file__)))).joinpath("../cache/").resolve()
    poagraph_elements_cache_path = tools.get_child_path(
        parent_output_dir, cache_file_name)
    return poagraph_elements_cache_path
Exemple #3
0
def run_pangtreebuild(output_dir: Path,
                      datatype: DataType,
                      multialignment: Union[Maf, Po],
                      fasta_provider: Union[FromFile, FromNCBI,
                                            ConstBaseProvider],
                      blosum: Blosum,
                      consensus_choice: str,
                      output_po: bool,
                      output_fasta: bool,
                      output_newick: bool,
                      missing_symbol: MissingBase,
                      metadata: Optional[MetadataCSV] = None,
                      hbmin: Optional[Hbmin] = None,
                      stop: Optional[Stop] = None,
                      p: Optional[P] = None,
                      fasta_path: Optional[Path] = None,
                      include_nodes: Optional[bool] = None) -> PangenomeJSON:
    start = time.time()
    logprocess.add_file_handler_to_logger(output_dir,
                                          "details",
                                          "details.log",
                                          propagate=False)
    logprocess.add_file_handler_to_logger(output_dir,
                                          "",
                                          "details.log",
                                          propagate=False)
    logprocess.remove_console_handler_from_root_logger()
    poagraph, dagmaf = None, None
    if isinstance(multialignment, Maf):
        poagraph, dagmaf = builder.build_from_dagmaf(multialignment,
                                                     fasta_provider, metadata)
    elif isinstance(multialignment, Po):
        poagraph = builder.build_from_po(multialignment, metadata)

    consensus_output_dir = tools.get_child_dir(output_dir, "consensus")
    consensus_tree = None
    if consensus_choice == 'poa':
        consensus_tree = build_poa_affinity_tree(poagraph, blosum,
                                                 consensus_output_dir, hbmin,
                                                 True)
    elif consensus_choice == 'tree':
        consensus_tree = build_affinity_tree(poagraph, blosum,
                                             consensus_output_dir, stop, p,
                                             True)

    if output_po:
        pangenome_po = poagraph_to_PangenomePO(poagraph)
        tools.save_to_file(pangenome_po,
                           tools.get_child_path(output_dir, "poagraph.po"))

    if output_fasta:
        sequences_fasta = poagraph_to_fasta(poagraph)
        tools.save_to_file(sequences_fasta,
                           tools.get_child_path(output_dir, "sequences.fasta"))
        if consensus_tree:
            consensuses_fasta = affinity_tree_to_fasta(poagraph,
                                                       consensus_tree)
            tools.save_to_file(
                consensuses_fasta,
                tools.get_child_path(output_dir, "consensuses.fasta"))

    if output_newick:
        if metadata is not None:
            seq_id_to_metadata = {
                seq_id: seq.seqmetadata
                for seq_id, seq in poagraph.sequences.items()
            }
        else:
            seq_id_to_metadata = None

        affinity_tree_newick = consensus_tree.as_newick(seq_id_to_metadata,
                                                        separate_leaves=True)

        tools.save_to_file(
            affinity_tree_newick,
            tools.get_child_path(output_dir, "affinity_tree.newick"))

    end = time.time()

    task_parameters = TaskParameters(
        running_time=f"{end - start}s",
        multialignment_file_path=multialignment.filename,
        multialignment_format=str(type(multialignment).__name__),
        datatype=datatype.name,
        metadata_file_path=metadata.filename if metadata else None,
        blosum_file_path=blosum.filepath.name,
        output_path=None,
        output_po=output_po,
        output_fasta=output_fasta,
        output_with_nodes=include_nodes,
        verbose=True,
        raw_maf=False,
        fasta_provider=str(type(fasta_provider).__name__),
        missing_base_symbol=missing_symbol.value,
        fasta_source_file=fasta_path,
        consensus_type=consensus_choice,
        hbmin=hbmin.value if hbmin else None,
        stop=stop.value if stop else None,
        p=p.value if p else None)

    pangenomejson = to_PangenomeJSON(task_parameters=task_parameters,
                                     poagraph=poagraph,
                                     dagmaf=dagmaf,
                                     affinity_tree=consensus_tree)
    pangenome_json_str = to_json(pangenomejson)
    tools.save_to_file(pangenome_json_str,
                       tools.get_child_path(output_dir, "pangenome.json"))
    return pangenomejson
Exemple #4
0
def get_default_blosum_path():
    parent_dir = Path(os.path.dirname(os.path.abspath(__file__)) + '/')
    return tools.get_child_path(parent_dir, "../dependencies/blosum80.mat")
Exemple #5
0
def run_poapangenome(output_dir: Path,
                     datatype: DataType,
                     multialignment: Union[Maf, Po],
                     fasta_provider: Union[FromFile, FromNCBI,
                                           ConstSymbolProvider],
                     blosum: Blosum,
                     consensus_choice: str,
                     output_po: bool,
                     output_fasta: bool,
                     missing_symbol: MissingSymbol,
                     metadata: Optional[MetadataCSV] = None,
                     hbmin: Optional[Hbmin] = None,
                     stop: Optional[Stop] = None,
                     p: Optional[P] = None,
                     fasta_path: Optional[Path] = None) -> PangenomeJSON:
    start = time.time()
    logprocess.add_file_handler_to_logger(output_dir,
                                          "details",
                                          "details.log",
                                          propagate=False)
    logprocess.add_file_handler_to_logger(output_dir,
                                          "",
                                          "details.log",
                                          propagate=False)
    logprocess.remove_console_handler_from_root_logger()
    poagraph, dagmaf = None, None
    if isinstance(multialignment, Maf):
        poagraph, dagmaf = Poagraph.build_from_dagmaf(multialignment,
                                                      fasta_provider, metadata)
    elif isinstance(multialignment, Po):
        poagraph = Poagraph.build_from_po(multialignment, metadata)

    consensus_output_dir = tools.get_child_dir(output_dir, "consensus")
    consensus_tree = None
    if consensus_choice == 'poa':
        consensus_tree = simple_tree_generator.get_simple_consensus_tree(
            poagraph, blosum, consensus_output_dir, hbmin, True)
    elif consensus_choice == 'tree':
        consensus_tree = tree_generator.get_consensus_tree(
            poagraph, blosum, consensus_output_dir, stop, p, MAX2(), NODE3(),
            True)

    if output_po:
        pangenome_po = poagraph_to_PangenomePO(poagraph)
        tools.save_to_file(pangenome_po,
                           tools.get_child_path(output_dir, "poagraph.po"))

    if output_fasta:
        sequences_fasta = poagraph_to_fasta(poagraph)
        tools.save_to_file(sequences_fasta,
                           tools.get_child_path(output_dir, "sequences.fasta"))
        if consensus_tree:
            consensuses_fasta = consensuses_tree_to_fasta(
                poagraph, consensus_tree)
            tools.save_to_file(
                consensuses_fasta,
                tools.get_child_path(output_dir, "consensuses.fasta"))

    end = time.time()

    task_parameters = TaskParameters(
        running_time=f"{end - start}s",
        multialignment_file_path=multialignment.filename,
        multialignment_format=str(type(multialignment).__name__),
        datatype=datatype.name,
        metadata_file_path=metadata.filename if metadata else None,
        blosum_file_path=blosum.filepath.name,
        output_path=None,
        output_po=output_po,
        output_fasta=output_fasta,
        output_with_nodes=True,
        verbose=True,
        raw_maf=False,
        fasta_provider=str(type(fasta_provider).__name__),
        missing_base_symbol=missing_symbol.value,
        fasta_source_file=fasta_path,
        consensus_type=consensus_choice,
        hbmin=hbmin.value if hbmin else None,
        max_cutoff_option="MAX2",
        search_range=None,
        node_cutoff_option="NODE3",
        multiplier=None,
        stop=stop.value if stop else None,
        p=p.value if p else None)

    pangenomejson = to_PangenomeJSON(task_parameters=task_parameters,
                                     poagraph=poagraph,
                                     dagmaf=dagmaf,
                                     consensuses_tree=consensus_tree)
    pangenome_json_str = to_json(pangenomejson)
    tools.save_to_file(pangenome_json_str,
                       tools.get_child_path(output_dir, "pangenome.json"))
    return pangenomejson
def run_pangenome(run_processing_btn_click, session_state: Dict,
                  session_dir: str, datatype: str, multialignment_content: str,
                  multialignment_filename: str, fasta_provider_choice: str,
                  fasta_content: str, fasta_filename: str, missing_symbol: str,
                  consensus_choice: str, output_config: List[str],
                  metadata_content: str, metadata_filename: str,
                  hbmin_value: float, stop_value: float, p_value: float):
    if run_processing_btn_click == 0:
        raise PreventUpdate()
    if session_state is None:
        session_state = {}
    if session_dir is None:
        session_dir = tools.create_output_dir()
    else:
        session_dir = Path(session_dir)

    current_processing_output_dir_name = tools.get_child_path(
        session_dir, tools.get_current_time())
    tools.create_dir(current_processing_output_dir_name)

    if multialignment_filename and "maf" in multialignment_filename:
        multialignment = Maf(StringIO(
            tools.decode_content(multialignment_content)),
                             file_name=multialignment_filename)
    elif multialignment_filename and "po" in multialignment_filename:
        multialignment = Po(StringIO(
            tools.decode_content(multialignment_content)),
                            file_name=multialignment_filename)
    else:
        session_state[
            "error"] = "Cannot create Poagraph. Only MAF and PO files are supported."
        return session_state

    missing_symbol = MissingBase(
        missing_symbol) if missing_symbol != "" else MissingBase()

    fasta_path = None
    if fasta_provider_choice == "NCBI":
        fasta_provider = FromNCBI(use_cache=True)
    elif fasta_provider_choice == "File":
        fasta_path = tools.get_child_path(current_processing_output_dir_name,
                                          fasta_filename).resolve()
        save_mode = "wb" if "zip" in fasta_filename else "w"
        if "zip" in fasta_filename:
            fasta_decoded_content = tools.decode_zip_content(fasta_content)
        else:
            fasta_decoded_content = tools.decode_content(fasta_content)
        tools.save_to_file(fasta_decoded_content, fasta_path, save_mode)
        fasta_provider = FromFile(fasta_path)
    blosum_path = pangtreebuild.get_default_blosum_path()
    blosum_contents = tools.read_file_to_stream(blosum_path)
    blosum = Blosum(blosum_contents, blosum_path)

    metadata = MetadataCSV(StringIO(tools.decode_content(metadata_content)),
                           metadata_filename) if metadata_content else None
    pangenomejson = pangtreebuild.run_pangtreebuild(
        output_dir=current_processing_output_dir_name,
        datatype=DataType[datatype],
        multialignment=multialignment,
        fasta_provider=fasta_provider,
        blosum=blosum,
        consensus_choice=consensus_choice,
        output_po=True if "po" in output_config else False,
        output_fasta=True if "fasta" in output_config else False,
        output_newick=True if "newick" in output_config else False,
        missing_symbol=missing_symbol,
        metadata=metadata,
        hbmin=Hbmin(hbmin_value) if hbmin_value else None,
        stop=Stop(stop_value) if stop_value else None,
        p=P(p_value) if p_value else None,
        fasta_path=fasta_filename if fasta_filename else None,
        include_nodes=True  # if "nodes" in output_config else False
    )
    pangenome_json_str = to_json(pangenomejson)

    current_processing_output_zip = tools.dir_to_zip(
        current_processing_output_dir_name)
    current_processing_short_name = "/".join(
        str(current_processing_output_zip).split("/")[-2:])
    return {
        "last_output_zip": current_processing_short_name,
        "jsonpangenome": pangenome_json_str,
        "error": ""
    }, True