Exemple #1
0
Search and Newick imports, that don't belong anywhere else. 
"""
from typing import List, Optional, Set
from intermake import pr
from mhelper import Logger, bio_helper, isFilename

import warnings
import re

from groot import constants
from groot.constants import STAGES, EChanges
from groot.application import app
from groot.data import IHasFasta, global_view, Gene, Model
from groot.utilities import cli_view_utils

LOG = Logger("import")


@app.command(folder=constants.F_IMPORT)
def import_genes(file_name: str) -> EChanges:
    """
    Imports a FASTA file into your model.
    If data already exists in the model, only sequence data matching sequences already in the model is loaded.
    
    :param file_name:   File to import
    """
    model = global_view.current_model()
    model.get_status(STAGES.SEQUENCES_2).assert_import()

    model.user_comments.append("IMPORT_FASTA \"{}\"".format(file_name))
Exemple #2
0
from mgraph import MGraph, MSplit, exporting
from mhelper import Logger
from typing import Dict, Optional

from groot import Component, constants
from groot.application import app
from groot.constants import STAGES, EChanges
from groot.data import INode, Model, Split, global_view
from groot.utilities import lego_graph

__LOG_SPLITS = Logger("nrfg.splits", False)


@app.command(folder=constants.F_CREATE)
def create_splits():
    """
    Creates the candidate splits.
    
    NRFG Stage I.
    
    Collects the splits present in the component trees.

    :remarks:
    --------------------------------------------------------------------------------------------------------------    
    | Some of our graphs may have contradictory information.                                                     |
    | To resolve this we perform a consensus.                                                                    |
    | We define all the graphs by their splits, then see whether the splits are supported by the majority.       |
    |                                                                                                            |
    | A couple of implementation notes:                                                                          |
    | 1. I've not used the most efficient algorithm, however this is fast enough for the purpose and it is much  |
    |    easier to explain what we're doing. For a fast algorithm see Jansson 2013, which runs in O(nk) time.    |
Exemple #3
0
from intermake import pr
from mgraph import analysing
from mhelper import ComponentFinder, Logger, LogicError, string_helper
from typing import List

from groot import constants
from groot.application import app
from groot.constants import STAGES, EChanges
from groot.data import INode, Pregraph, Subset, global_view
from groot.utilities import lego_graph

LOG = Logger("pregraphs", False)


@app.command(folder=constants.F_CREATE)
def create_pregraphs():
    """
    Creates the pregraphs.
    
    Requisites: `create_subsets`
    """
    model = global_view.current_model()

    # Special case - if no subsets just stop now
    if model.get_status(STAGES.PREGRAPHS_13).is_complete and len(
            model.subsets) == 0:
        pr.printx("<verbose>No subsets - nothing to do.</verbose>")
        return

    model.get_status(STAGES.PREGRAPHS_13).assert_create()
Exemple #4
0
from mhelper import Logger, LogicError, ansi_helper, string_helper
from typing import Set

from groot import constants
from groot.application import app
from groot.constants import STAGES, EChanges
from groot.data import Split, global_view

__LOG_EVIDENCE = Logger("nrfg.evidence", False)


@app.command(folder=constants.F_CREATE)
def create_consensus(cutoff: float = 0.5) -> EChanges:
    """
    Filters the candidate splits.
    
    NRFG PHASE II.
    
    Collect consensus evidence.
    
    :remarks:
    ----------------------------------------------------------------------------------------------------
    | The second stage of the consensus.                                                               |
    | We collect evidence from the graphs to support or reject our splits.                             |
    | Unlike a normal majority rule consensus, there's no guarantee that our splits are in the graphs, |
    | so, in addition to support/reject evidence, we have a third category, whereby the graph neither  |
    | supports nor rejects a split.                                                                    |
    ----------------------------------------------------------------------------------------------------
                                                                                                       
    :param cutoff:              Cutoff to be used in the consensus 
    """
Exemple #5
0
from mgraph import MGraph
from mhelper import Logger, array_helper, string_helper

from groot import constants
from groot.application import app
from groot.constants import STAGES, EChanges
from groot.data import FusionGraph, Formation, global_view
from groot.utilities import lego_graph

__LOG = Logger("nrfg.sew", False)


@app.command(folder=constants.F_CREATE)
def create_fused():
    """
    Creates the NRFG (uncleaned).
    
    Sews the subgraphs back together at the fusion points.
    
    Requisites: `create_subgraphs`
    """
    __LOG.pause("▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒ SEW ▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒")

    model = global_view.current_model()
    model.get_status(STAGES.FUSE_15).assert_create()

    # There is a special case where there is no fusions
    if len(model.fusions) == 0 and len(model.components) == 1:
        model.fusion_graph_unclean = FusionGraph(
            model.components[0].tree.copy(), False)
        return
Exemple #6
0
from mgraph import MGraph, MNode, analysing
from mhelper import Logger, LoopDetector, SwitchError

from groot.constants import STAGES, EChanges
from groot.data import EPosition, FusionGraph, Formation, Point, Gene, global_view
from groot.utilities import lego_graph
from groot import constants
from groot.application import app

LOG = Logger("clean", False)


@app.command(folder=constants.F_CREATE)
def create_cleaned():
    """
    Cleans the NRFG.
    
    Requisites: `create_fused`
    """
    model = global_view.current_model()
    model.get_status(STAGES.CLEAN_16).assert_create()
    nrfg = model.fusion_graph_unclean.graph.copy()

    __remove_redundant_fusions(nrfg)
    __remove_redundant_clades(nrfg)
    __make_fusions_rootlets(nrfg)
    __make_outgroup_parents_roots(nrfg)

    model.fusion_graph_clean = FusionGraph(nrfg, True)
    return EChanges.MODEL_DATA
Exemple #7
0
from intermake import pr
from mgraph import MEdge, MGraph, MNode
from mhelper import Logger, array_helper, string_helper

import itertools

from groot import constants
from groot.constants import EChanges
from groot.data.model_collections import FusionCollection
from groot.application import app
from groot.data import INode, Component, Fusion, Model, Point, Gene, global_view
from groot.data.model_core import Formation
from groot.utilities import lego_graph
from groot.commands.workflow import s080_tree

__LOG = Logger("fusion", False)
__LOG_ISOLATION = Logger("isolation", False)


@app.command(folder=constants.F_CREATE)
def create_fusions() -> EChanges:
    """
    Finds the fusion points in the model.
    i.e. Given the events (see `find_events`), find the exact points at which the fusion(s) occur.
    Requisites: `create_trees`
    """
    model = global_view.current_model()
    model.get_status(constants.STAGES.FUSIONS_9).assert_create()

    r: List[Fusion] = []
Exemple #8
0
from collections import defaultdict
from typing import Dict, FrozenSet, List, Set, cast, Any
from mhelper import Logger, string_helper

from groot import constants
from groot.application import app
from groot.constants import STAGES, EChanges
from groot.data import INode, Fusion, Point, Gene, Subset, global_view


__LOG = Logger( "nrfg.find", False )

@app.command(folder = constants.F_DROP)
def drop_subsets():
    """
    Removes data from the model.
    """
    model = global_view.current_model()
    model.get_status( STAGES.SUBSETS_12 ).assert_drop()
    
    model.subsets = frozenset()
    
    return EChanges.COMP_DATA


@app.command(folder = constants.F_CREATE)
def create_subsets( no_super: bool = True ):
    """
    Creates leaf subsets.
    
    Requisites: `create_consensus`
Exemple #9
0
Components algorithms.

The only one publicly exposed is `detect`, so start there.
"""
from typing import List, Optional
from intermake import pr
from mhelper import ComponentFinder, Logger, string_helper

import warnings

from groot.application import app
from groot import constants
from groot.constants import EChanges, STAGES
from groot.data import Component, Edge, Gene, global_view

LOG_MAJOR = Logger( "comp.major", False )
LOG_MAJOR_V = Logger( "comp.major.v", False )
LOG_GRAPH = Logger( "comp.graph", False )


@app.command( folder = constants.F_CREATE )
def create_major( tol: int = 0, debug: bool = False ) -> EChanges:
    """
    Detects model components.
    
    First step of finding the components.
    
    We classify each component as a set of "major" genes.
    
    Components are defined as sets of genes that share a similarity path between them, where each edge between element 𝓧 and 𝓨 in that path:
        * Is sourced from no less than 𝓧's length, less the tolerance
Exemple #10
0
from mgraph import MGraph, MNode
from mhelper import Logger, SwitchError, string_helper, FunctionInspector
from typing import Callable, Iterable, Sequence, Union

from groot import constants
from groot.application import app
from groot.constants import STAGES, EChanges
from groot.data import INamedGraph, Formation, Point, Pregraph, Subset, Subgraph, global_view, Gene
from groot.utilities import AlgorithmCollection, lego_graph, external_runner

LOG = Logger("possible_graphs", False)

DAlgorithm = Callable[[Union[str, Subset]], Union[str, MGraph]]
"""
Task:
    A supertree consensus is required whereby the set of taxa on the inputs may not be the same.

Input (ONE OF):
    str (default): newick trees
    LegoSubset: the gene subset in question
    
Output (ONE OF):
    str: A newick tree
    MGraph: The tree
    
Uses Pep-484 to indicate which input is required, otherwise the default will be assumed.
"""

supertree_algorithms = AlgorithmCollection(DAlgorithm, "Supertree")

Exemple #11
0
BLAST is the default algorithm and this invocation can be found in the `groot_ex` project. 
"""
from intermake import pr
from typing import Callable, List, Optional
from mhelper import EFileMode, isFilename, Logger

import re

from groot.commands.workflow.s020_sequences import _make_gene
from groot.application import app
from groot import Edge, constants
from groot.constants import EXT_BLAST, STAGES, EChanges
from groot.data import Model, Domain, global_view
from groot.utilities import AlgorithmCollection, external_runner

LOG = Logger("import/blast")

DAlgorithm = Callable[[str], str]
"""
Task:
    A similarity of FASTA sequences.

Input:
    str (default): FASTA sequences for two or more genes
    
Output:
    str: A similarity matrix in BLAST format 6 TSV.
"""

similarity_algorithms = AlgorithmCollection(DAlgorithm, "Similarity")
Exemple #12
0
from groot import supertree_algorithms, Subset, Gene
from mgraph import importing, MGraph
from mhelper import file_helper, Logger, LogicError, exception_helper
from intermake import subprocess_helper

__LOG_CREATE = Logger("supertree")


@supertree_algorithms.register("clann")
def supertree_clann(inputs: str) -> str:
    """
    Uses CLANN to generate a supertree.
    
    :param inputs:      Input trees in Newick format.
    :return:            The consensus supertree in Newick format.
    """
    file_helper.write_all_text("in_file.nwk", inputs)

    script = """
    execute in_file.nwk;
    hs savetrees=out_file.nwk;
    quit
    """

    subprocess_helper.run_subprocess(["clann"], stdin=script)

    result = file_helper.read_all_text("out_file.nwk")

    return result.split(";")[0]

Exemple #13
0
The only one publicly exposed is `detect`, so start there.
"""
from collections import defaultdict
from typing import Dict, Optional, Set, Tuple, List
from intermake import pr
from mhelper import Logger, array_helper, string_helper

import warnings

from groot import constants
from groot.application import app
from groot.constants import STAGES, EChanges
from groot.data import Component, Edge, Model, Gene, Domain, global_view

LOG_MINOR = Logger("comp.minor", False)


@app.command(folder=constants.F_CREATE)
def create_minor(tol: int) -> EChanges:
    """
    Finds the subsequence components, here termed the "minor" elements.
    
    Clause 1:
        Subsequences belong to the component of the sequence in which they reside.
        
    Clause 2:
        When one sequence of a component possesses an edge to a sequence of another component (an "entry").
        Subsequences of all sequences in that second component receive the first component, at the position of the entry.
        
    Requisites: `create_major`