Ejemplo n.º 1
0
    def coaltree(ntips, ne=None, random_names=False, seed=None):
        """
        Returns a coalescent tree with ntips samples and waiting times 
        between coalescent events drawn from the kingman coalescent:
        (4N)/(k*(k-1)), where N is population size and k is sample size.
        Edge lengths on the tree are in generations.

        If no Ne argument is entered then edge lengths are returned in units
        of 2*Ne, i.e., coalescent time units. 
        """
        # seed generator
        random.seed(seed)

        # convert units
        coalunits = False
        if not ne:
            coalunits = True
            ne = 10000

        # build tree: generate N tips as separate Nodes then attach together 
        # at internal nodes drawn randomly from coalescent waiting times.
        tips = [
            toytree.tree().treenode.add_child(name=str(i)) 
            for i in range(ntips)
        ]
        while len(tips) > 1:
            rtree = toytree.tree()
            tip1 = tips.pop(random.choice(range(len(tips))))
            tip2 = tips.pop(random.choice(range(len(tips))))
            kingman = (4. * ne) / float(ntips * (ntips - 1))
            dist = random.expovariate(1. / kingman)
            rtree.treenode.add_child(tip1, dist=tip2.height + dist)
            rtree.treenode.add_child(tip2, dist=tip1.height + dist)
            tips.append(rtree.treenode)

        # build new tree from the newick string
        self = toytree.tree(tips[0].write())    
        self.treenode.ladderize()

        # make tree edges in units of 2N (then N doesn't matter!)
        if coalunits:
            for node in self.treenode.traverse():
                node.dist /= (2. * ne)

        # ensure tips are at zero (they sometime vary just slightly)
        for node in self.treenode.traverse():
            if node.is_leaf():
                node.dist += node.height

        # set tipnames to r{idx}
        nidx = list(range(self.ntips))
        if random_names:
            random.shuffle(nidx)
        for idx, node in self.idx_dict.items():
            if node.is_leaf():
                node.name = "r{}".format(nidx[idx])

        # decompose fills in internal node names and idx
        self._coords.update()
        return self
Ejemplo n.º 2
0
 def compare_quartets(self):
     """
     Compare two sets of quartets generated from two
     phylogenetic trees. (to be continued, need to 
     store output in __init__ object)
     """
     i = 0
     compare_quartets_df = pd.DataFrame(
         columns=['trees', 'Quartet intersection'])
     for i in range(0, int(nquartets) - 1):
         q0 = get_quartets(
             toytree.tree(
                 str(trefilespath) + "/tree" + str(i + 1) + ".tre"))
         q1 = get_quartets(
             toytree.tree(
                 str(trefilespath) + "/tree" + str(i + 2) + ".tre"))
         diffs = q0.symmetric_difference(q1)
         len(diffs)
         compare_quartets_df = compare_quartets_df.append(
             {
                 'trees': str(i + 1) + ", " + str(i + 2),
                 'Quartet intersection': len(q0.intersection(q1)) / len(q0)
             },
             ignore_index=True)
         pd.set_option("display.max_rows", None, "display.max_columns",
                       None)
         compare_quartets_df.to_csv(str(trefilespath) + "compare_quartets" +
                                    str(nquartets) + ".csv",
                                    index=False)
     return compare_quartets_df
Ejemplo n.º 3
0
def remote_raxml(phyfile, inference_args):
    """
    Call raxml on phy and returned parse tree result
    """
    # call raxml on the input phylip file with inference args
    rax = raxml(
        data=phyfile,
        name="temp_" + str(os.getpid()),
        workdir=tempfile.gettempdir(),
        **inference_args
    )
    rax.run(force=True, quiet=True, block=True)

    # get newick string from result
    if os.path.exists(rax.trees.bipartitions):
        tree = toytree.tree(rax.trees.bipartitions).newick
    else:
        tree = toytree.tree(rax.trees.bestTree).newick

    # remote tree files
    for tfile in rax.trees:
        tpath = getattr(rax.trees, tfile)
        if os.path.exists(tpath):
            os.remove(tpath)

    # return results
    return tree
Ejemplo n.º 4
0
    def _preview_topology(self):
        """
        Reads in the tree file, and saves a .png with rectangular and circular topology previews.

        :return: toyplot.Canvas object
        """
        self.log.debug('reading tree')
        # read in the tree
        self.tree = toytree.tree(open(self.tree_file, 'r').read(),
                                 tree_format=0)

        # drop / replace nodes
        if self.args.replace_nodes:
            for idx in self.args.replace_nodes:
                self.tree.idx_dict[idx].add_sister(name='%d_replaced' % idx,
                                                   dist=1)
                self.tree.idx_dict[idx].detach()
            # saving the node names now saves some work
            self.args.replace_nodes = [
                '%d_replaced' % idx for idx in self.args.replace_nodes
            ]
        else:
            # save empty list
            self.args.replace_nodes = []
        if self.args.drop_nodes:
            [self.tree.idx_dict[idx].detach() for idx in self.args.drop_nodes]

        # outgroup rooting
        if self.args.root:
            self.tree = self.tree.root(
                names=[self.tree.idx_dict[self.args.root].name])

        self.tree = toytree.tree(self.tree.write())

        # set dimensions of the canvas
        preview = toyplot.Canvas(width=800,
                                 height=400,
                                 style={'background-color': 'white'})
        # dissect canvas into two cartesian areas
        ax0 = preview.cartesian(bounds=('5%', '48%', '5%', '95%'))
        ax1 = preview.cartesian(bounds=('52%', '95%', '5%', '95%'))

        # call draw with the 'axes' argument to pass it to a specific cartesian area
        self.log.debug('drawing preview')
        self.tree.draw(axes=ax0, layout='r', tip_labels=False)
        self.tree.draw(axes=ax1, layout='c', tip_labels=False)

        # hide the axes coordinates
        ax0.show = False
        ax1.show = False

        # render to image
        png.render(preview, self.args.topo)
        self.log.info('rendered preview %s' % self.args.topo)
        return preview
Ejemplo n.º 5
0
    def _parse_results(self):
        ## get tree and admix from output files
        with gzip.open(self.files.treeout) as tmp:
            data = tmp.readlines()

            ## store the tree
            k = 0
            if self.params.noss:
                k += 1
            self.results.tree = data[k].strip()
            self.results.admixture = []

            ## get admix events
            for adx in data[k + 1:]:
                weight, jweight, jse, pval, clade1, clade2 = adx.strip().split(
                )
                self.results.admixture.append(
                    (clade1, clade2, weight, jweight, jse, pval))

        ## get a toytree
        tre = toytree.tree(self.results.tree)

        ## order admixture
        for aidx in range(len(self.results.admixture)):
            admix = self.results.admixture[aidx]

            source = toytree.tree(admix[0] + ";")
            if len(source.tree) == 1:
                sodx = tre.tree.search_nodes(name=source.tree.name)[0].idx
            else:
                sodx = tre.tree.get_common_ancestor(
                    source.get_tip_labels()).idx

            sink = toytree.tree(admix[1] + ";")
            if len(sink.tree) == 1:
                sidx = tre.tree.search_nodes(name=sink.tree.name)[0].idx
            else:
                sidx = tre.tree.get_common_ancestor(sink.get_tip_labels()).idx

            self.results.admixture[aidx] = (
                int(sodx),
                float(admix[0].rsplit(":", 1)[1]),
                int(sidx),
                float(admix[1].rsplit(":", 1)[1]),
                float(admix[2]),
                #float(admix[3]),
                #float(admix[4]),
                #float(admix[5]),
            )

        ## parse the cov
        names = tre.get_tip_labels()
        self.results.cov = _parse_matrix(self.files.cov, names)
        self.results.covse = _parse_matrix(self.files.covse, names)
        self.results.modelcov = _parse_matrix(self.files.modelcov, names)
Ejemplo n.º 6
0
    def unittree(ntips, treeheight=1.0, random_names=False, seed=None):
        """
        Returns a random tree ultrametric topology w/ N tips and a root 
        height set to 1 or a user-entered treeheight value. Descendant 
        nodes are evenly spaced between the root and time 0.

        Parameters
        -----------
        ntips (int):
            The number of tips in the randomly generated tree

        treeheight(float):
            Scale tree height (all edges) so that root is at this height.

        seed (int):
            Random number generator seed.
        """
        # seed generator
        random.seed(seed)

        # generate tree with N tips.
        tmptree = toytree.tree().treenode  # TreeNode()
        tmptree.populate(ntips)
        self = toytree.tree(newick=tmptree.write())

        # set tip names by labeling sequentially from 0
        self = (
            self
            .ladderize()
            .mod.make_ultrametric()
            .mod.node_scale_root_height(treeheight)
        )

        # set tipnames randomly (doesn't have to match idx)
        nidx = list(range(self.ntips))[::-1]
        if random_names:
            random.shuffle(nidx)
        for tidx, node in enumerate(self.treenode.get_leaves()):
            node.name = "r{}".format(nidx[tidx])

        # set all support values to 100 default
        for node in self.treenode.traverse():
            node.support = 100

        # fill internal node names and idxs
        self.treenode.ladderize()
        self._coords.update()
        return self
Ejemplo n.º 7
0
    def __init__(
            self,
            tree,  #must be Toytree class object
            matrix=None,  #must be pandas DataFrame class object
            model=None,
            prior=0.5):

        if isinstance(tree, toytree.tree):
            self.tree = tree
        elif isinstance(tree, str):
            self.tree = toytree.tree(tree, tree_format=0)
        else:
            raise Exception(
                'tree must be either a newick string or toytree object')

        if isinstance(matrix, pd.DataFrame):
            self.matrix = matrix
        else:
            self.matrix = pd.read_csv(matrix, index_col=0)

        self.model = model
        self.prior = prior

        self.alpha = 1 / tree.treenode.height
        self.beta = 1 / tree.treenode.height
Ejemplo n.º 8
0
    def run_qmc(self):
        """
        Runs quartet max-cut QMC on the quartets qdump file.
        """
        # build command
        self._tmp = os.path.join(self.tet.dirs, ".tmptre")
        cmd = [
            QMC,
            "qrtt={}".format(self.tet.files.qdump),
            "otre={}".format(self._tmp),
        ]

        # run QMC on quartets input
        proc = sps.Popen(cmd, stderr=sps.STDOUT, stdout=sps.PIPE)
        res = proc.communicate()
        if proc.returncode:
            raise TetradError("error in QMC")  # res)

        # parse tmp file written by QMC into a tree and rename tips
        ttre = toytree.tree(self._tmp)
        for tip in ttre.treenode.get_leaves():
            tip.name = self.tet.samples[int(tip.name)]

        # convert to newick
        newick = ttre.write(tree_format=9)

        # save the tree to file
        if self.boot:
            with open(self.tet.trees.boots, 'a') as outboot:
                outboot.write(newick + "\n")
        else:
            with open(self.tet.trees.tree, 'w') as outtree:
                outtree.write(newick)
Ejemplo n.º 9
0
    def relative_error(self):
        "Calculate relative error between true tree and chronos or mrbayes output."

        # Get edge lengths of true tree.
        true_edge_lengths = self.sptree.get_edge_values(feature="height")

        for idx in self.data.index:

            # Calculate error for three tree types.
            for model in [
                    "chronos_correlated", "chronos_relaxed", "mrbayes_tree"
            ]:

                # Scale tree to match height of true tree, then get edge lengths to subtract from true edge lengths.
                dtree = toytree.tree(self.data.at[idx, model])
                dtree.mod.node_scale_root_height(treeheight=list(
                    self.sptree.get_feature_dict("height").keys())[0])
                dtree_edge_lengths = dtree.get_edge_values(feature="height")

                # Calculate array.
                subtract_array = true_edge_lengths - dtree_edge_lengths

                # Square each element in the array.
                squared_array = np.square(subtract_array)

                # Sum all elements in the array (sum of squares).
                sum_squares = np.sum(squared_array)

                # Save error to appropriate colmun.
                if model == "chronos_correlated":
                    self.data.loc[idx, "chc_relative_error"] = sum_squares
                elif model == "chronos_relaxed":
                    self.data.loc[idx, "chr_relative_error"] = sum_squares
                elif model == "mrbayes_tree":
                    self.data.loc[idx, "mb_relative_error"] = sum_squares
Ejemplo n.º 10
0
    def simple_error(self):
        "Calculate simple error between true tree and chronos or mrbayes output."

        # Get edge lengths of true tree.
        true_edge_lengths = self.sptree.get_edge_values(feature="height")

        for idx in self.data.index:

            # Calculate error for three tree types.
            for model in [
                    "chronos_correlated", "chronos_relaxed", "mrbayes_tree"
            ]:

                # Get edge lengths to subtract from true edge lengths.
                dtree = toytree.tree(self.data.at[idx, model])
                dtree_edge_lengths = dtree.get_edge_values(feature="height")

                # Calculate array.
                subtract_array = true_edge_lengths - dtree_edge_lengths

                # Square each element in the array.
                squared_array = np.square(subtract_array)

                # Sum all elements in the array (sum of squares).
                sum_squares = np.sum(squared_array)

                # Save error to appropriate colmun.
                if model == "chronos_correlated":
                    self.data.loc[idx, "chc_simple_error"] = sum_squares
                elif model == "chronos_relaxed":
                    self.data.loc[idx, "chr_simple error"] = sum_squares
                elif model == "mrbayes_tree":
                    self.data.loc[idx, "mb_simple_error"] = sum_squares
Ejemplo n.º 11
0
    def __init__(self, tree, df, seed=None):

        # Store the species tree.
        self.sptree = toytree.tree(tree)

        # Store copy of pd dataframe from Simulator object.
        self.data = df.copy()
Ejemplo n.º 12
0
Archivo: util.py Proyecto: messDiv/MESS
def _update_newick_names(nw, names):
    tre = toytree.tree(nw)
    leaves = tre.get_tip_labels()
    nodes = [tre.treenode.search_nodes(name=x)[0] for x in leaves]
    for node, name in zip(nodes, names):
        node.name = name
    return tre.write(tree_format=5)
Ejemplo n.º 13
0
    def infer_mb(self, tmp):
        """
        Call mb on phy and returned parse tree result
        """

        # call mb on the input phylip file with inference args
        mb = mrbayes(data=tmp,
                     name="temp_" + str(os.getpid()),
                     workdir=tempfile.gettempdir(),
                     **self.inference_args)
        mb.run(force=True, quiet=True, block=True)

        # get newick string from result
        tree = toytree.tree(mb.trees.constre, tree_format=10).newick

        # cleanup remote tree files
        for tup in mb.trees:
            tpath = tup[1]
            if os.path.exists(tpath):
                os.remove(tpath)

        # remove the TEMP phyfile in workdir/tmpdir
        #os.remove(tmp)

        # return results
        return tree
Ejemplo n.º 14
0
    def imbtree(ntips, treeheight=1.0, random_names=False):
        """
        Return an imbalanced (comb-like) tree topology.
        """
        node = toytree.TreeNode.TreeNode()
        node.add_child(name="r0")
        node.add_child(name="r1")

        for i in range(2, ntips):
            # empty node
            cherry = toytree.TreeNode.TreeNode()
            # add new child
            cherry.add_child(name="r" + str(i))
            # add old tree
            cherry.add_child(node)
            # update rtree
            node = cherry

        # get toytree from newick            
        tre = toytree.tree(node)
        tre = tre.mod.make_ultrametric(nocopy=True)
        tre = tre.mod.node_scale_root_height(treeheight, nocopy=True)
        tre._coords.update()

        # randomize tip names
        nidx = list(range(tre.ntips))
        if random_names:
            random.shuffle(nidx)
        for idx, node in tre.idx_dict.items():
            if node.is_leaf():
                node.name = "r{}".format(nidx[idx])
        return tre
Ejemplo n.º 15
0
    def run_tree_inference(self, nexus, idx):
        """
        Write nexus to tmpfile, runs phyml tree inference, and parses
        and returns the resulting tree. 
        """
        ## create a tmpdir for this test
        tmpdir = tempfile.tempdir
        tmpfile = os.path.join(tempfile.NamedTemporaryFile(
            delete=False,
            prefix=str(idx),
            dir=tmpdir,
        ))

        ## write nexus to tmpfile
        tmpfile.write(nexus)
        tmpfile.flush()

        ## infer the tree
        rax = raxml(name=str(idx), data=tmpfile.name, workdir=tmpdir, N=1, T=2)
        rax.run(force=True, block=True, quiet=True)

        ## clean up
        tmpfile.close()

        ## return tree order
        order = get_order(toytree.tree(rax.trees.bestTree))
        return "".join(order)
Ejemplo n.º 16
0
    def run(self):
        """
        Call Astral command ()
        """
        print("[astral.5.7.3.jar]")
        # setup the comamnd
        proc = sps.Popen(
            self._get_command(),
            stderr=sps.STDOUT,
            stdout=sps.PIPE,
        )
        comm = proc.communicate()
        if proc.returncode:
            print("Astral Error:\n", comm[0].decode())
            raise IPyradError(
                "Astral Error: your command string was:\n{}".format(" ".join(
                    self._get_command())))

        # store stderr to logfile
        with open(self.logfile, 'w') as out:
            out.write(comm[0].decode())

        # cleanup
        if os.path.exists(self._tmptrees):
            os.remove(self._tmptrees)

        # try loading the tree result
        self.tree = toytree.tree(self.treefile)

        # report result file
        print("inferred tree written to ({})".format(self.treefile))
Ejemplo n.º 17
0
    def __init__(self, newick, constraint_dict, constraint_exact):
        "Traverses tree to build test sets given constraint options."

        # store sets of four-taxon splits
        self.testset = set()
        self.hold = [0, 0, 0, 0]

        # tree to traverse
        self.tree = toytree.tree(newick)
        if not self.tree.is_rooted():
            raise IPyradError(
                "generate_tests_from_tree(): tree must be rooted and resolved")

        # constraints
        self.cdict = OrderedDict((i, []) for i in ["p1", "p2", "p3", "p4"])
        if constraint_dict:
            self.cdict.update(constraint_dict)

        # constraint setting
        self.xdict = constraint_exact
        if isinstance(self.xdict, bool):
            self.xdict = [self.xdict] * 4
        if isinstance(self.xdict, list):
            if len(self.xdict) != len(self.cdict):
                raise Exception(
                    "constraint_exact must be bool or list of bools length N")

        # get tests
        self.loop()
Ejemplo n.º 18
0
    def load_counts(self):
        """
        Load counts and labels Hdf5 databases generated by simcat.Database.
        """

        # load the snp counts and stack data
        with h5py.File(self.db_counts, 'r') as io5:

            # load the tree that was used in the simulations and the data
            self.tree = toytree.tree(io5.attrs["tree"])
            self.counts = io5["counts"][:]

            # [1] rescale to make counts proportional across ALL sims.
            # this seems to work much better than [2].
            if self.scale == 1:
                self.counts = self.counts / self.counts.max()

            # [2] rescale by make counts proportional across sims on same tree.
            if self.scale == 2:
                # iterate over matrixsets
                for i in range(self.counts.shape[0]):
                    # iterate over matrices
                    for j in range(self.counts.shape[1]):
                        # norm 16x16 matrices
                        self.counts[i, j] = (
                            self.counts[i, j] / self.counts[i, j].max()
                        )
            if not self.quiet:
                print("[load] {}".format(self.counts.shape))
Ejemplo n.º 19
0
def remote_mrbayes(nexfile, inference_args, keepdir=None):
    """
    Call mb on phy and returned parse tree result
    """
    # convert phyfile to tmp nexus seqfile

    # if keep_all_files then use workdir as the workdir instead of tmp
    if keepdir:
        workdir = keepdir
    else:
        workdir = os.path.dirname(nexfile)

    # call mb on the input phylip file with inference args
    mb = mrbayes(data=nexfile,
                 name="temp_" + str(os.getpid()),
                 workdir=workdir,
                 **inference_args)
    mb.run(force=True, quiet=True, block=True)

    # get newick string from result
    tree = toytree.tree(mb.trees.constre, tree_format=10).newick

    # cleanup remote tree files
    for tup in mb.trees:
        tpath = tup[1]
        if os.path.exists(tpath):
            os.remove(tpath)

    # remove the TEMP phyfile in workdir/tmpdir
    os.remove(nexfile)

    # return results
    return tree
Ejemplo n.º 20
0
    def draw_cov(self, axes=None):

        # get results
        cov = self.results.cov
        tre = toytree.tree(self.results.tree)

        # names spaced in order
        lnames = toyplot.locator.Explicit(
            locations=range(len(tre.get_tip_labels())),
            labels=tre.get_tip_labels()[::-1],
        )

        # get a colormap and plot the matrix
        cmap = toyplot.color.diverging.map(
            "BlueRed", 
            cov.min(),
            cov.max(),
        )

        canvas, table = toyplot.matrix(
            (cov, cmap),
            width=400, 
            height=400, 
            bshow=True,
            tshow=False,
            lshow=False,
            rlocator=lnames,
            blocator=lnames,      
        )
        return canvas, table
Ejemplo n.º 21
0
    def load_slice(self):
        """
        Pull data from .labels for use in ipcoal sims
        """
        # open view to the data
        with h5py.File(self.database, 'r') as io5:

            # sliced data arrays
            self.node_Nes = io5["node_Nes"][self.idxs, ...]
            self.admixture = io5["admixture"][self.idxs, ...]
            self.treeheight = io5["treeheight"][self.idxs, ...]
            self.slide_seeds = io5["slide_seeds"][self.idxs, ...]

            # attribute metadata
            self.tree = toytree.tree(io5.attrs["tree"])
            self.tree = self.tree.mod.make_ultrametric()  # imprecision
            self.nsnps = io5.attrs["nsnps"]
            self.rate_vector = io5.attrs["rate_vector"]
            self.pi_vector = io5.attrs["pi_vector"]
            self.ntips = len(self.tree)
            self.node_slide_prop = io5.attrs["node_slide_prop"]

            # store aligned SNPs
            self.nvalues = len(self.idxs)
            self.counts = np.zeros(
                (self.nvalues, self.tree.ntips, self.nsnps), dtype=np.int64)
Ejemplo n.º 22
0
    def __init__(self, tree, Ne=None):

        # store input params
        self.tree = toytree.tree(tree)

        # apply Ne
        self.tree = self.tree.set_node_values("Ne", default=Ne)

        # will be used to store output results
        self.demog = None  #demography in proper format
Ejemplo n.º 23
0
    def batch_raxml(self):
        """
        Infer raxml trees from sequence data.
        """
        for idx in self.data.index:

            # Write tree topology to temporary file.
            tmp_tree = os.path.join(tempfile.gettempdir(), "tmp.tre")
            with open(tmp_tree, "w") as f:
                f.write(self.data.at[idx, "spp_tree"])

            # Build RAxML command.
            cmd = [
                "raxmlHPC-PTHREADS-AVX2",  # Add relative conda path.
                "-f",
                "e",
                "-t",
                tmp_tree,
                "-T",
                "8",
                "-m",
                "GTRGAMMA",
                "-n",
                "tmp",
                "-w",
                tempfile.gettempdir(),
                "-s",
                self.data.at[idx, "phy_seqpath"]
            ]
            # "-p", "54321",
            # "-N", "100",
            # "-x", "12345"

            # RAxML has no built-in force option to remove existing files, so this handles removal if necessary.
            if os.path.exists(
                    os.path.join(tempfile.gettempdir(), "RAxML_info.tmp")):
                os.remove(os.path.join(tempfile.gettempdir(),
                                       "RAxML_info.tmp"))

            # Run RAxML, or report errors if any.
            try:
                subprocess.run(cmd,
                               check=True,
                               stdout=subprocess.PIPE,
                               stderr=subprocess.STDOUT)
            except subprocess.CalledProcessError as error:
                print(error.output)
                print(" ".join(cmd))
                raise

            # save the newick string to file
            raxtree = toytree.tree(
                os.path.join(tempfile.gettempdir(), "RAxML_result.tmp"))
            raxtree = raxtree.root(self.root)
            self.data.loc[idx, "raxml_tree"] = raxtree.write(tree_format=0)
Ejemplo n.º 24
0
    def __init__(self, tree, constraint_dict, constraint_exact):
        "Traverses tree to build test sets given constraint options."

        # store sets of four-taxon splits
        self.testset = set()
        self.hold = [0, 0, 0, 0]

        # tree to traverse
        self.tree = toytree.tree(tree)
        if not self.tree.is_rooted(): 
            raise IPyradError(
                "generate_tests_from_tree(): tree must be rooted and resolved")

        # store contraints
        self.cdict = OrderedDict((i, []) for i in ["p1", "p2", "p3", "p4"])
        # self.cdict = [(0, 0, 0, 0) for i in ]

        # constraints entered as a dict or tuple: (0, 1, 10, 13)
        if isinstance(constraint_dict, dict):
            for key, val in constraint_dict.items():
                if isinstance(val, int):
                    val = tree.get_tip_labels(val)
                self.cdict[key] = val

        elif isinstance(constraint_dict, (list, tuple, np.ndarray)):
            for cidx, pop in enumerate(["p1", "p2", "p3", "p4"]):
                const = constraint_dict[cidx]
                if isinstance(const, int):
                    self.cdict[pop] = (
                        tree.get_tip_labels(const)
                    )

        # constraint setting [True, True, False, False]
        self.xdict = constraint_exact
        if isinstance(self.xdict, bool):
            self.xdict = [self.xdict] * 4
        if isinstance(self.xdict, (tuple, list, np.ndarray)):
            if len(self.xdict) != len(self.cdict):
                print(self.xdict, self.cdict)
                raise Exception(
                    "constraint_exact must be bool or list of bools length N")
        self.xdict = np.array(self.xdict).astype(bool)

        # get tests
        self.loop(self.tree.treenode)

        # order and check redundancy
        tests = []
        coords = tree.get_node_coordinates(layout='d')
        for test in self.testset:
            stest = sorted(test[:2], key=lambda x: coords[x, 0])
            ntest = stest[0], stest[1], test[2], test[3]
            if ntest not in tests:
                tests.append(ntest)
        self.tests = tests
Ejemplo n.º 25
0
    def treeplot(self, ts, node, width: int = 600, height: int = 700):
        ts_newick = ts.at_index(node).newick()

        modtree = toytree.tree(ts_newick)
        canvas, axes, mark = modtree.draw(width=width, height=height)

        # show the axes coordinates
        axes.show = True
        axes.x.ticks.show = True
        axes.y.ticks.show = True

        axes.vlines(1 - self.simlength, style={"stroke": "blue"})
Ejemplo n.º 26
0
Archivo: util.py Proyecto: messDiv/MESS
def random_metatree(local_trees, ntips=20, treefunc=toytree.rtree.unittree):
    metatree = _random_newick(ntips=ntips, treeheight=10, treefunc=treefunc)

    tre = toytree.tree(metatree)
    tips = tre.get_tip_labels()

    local_trees = list(set(local_trees))

    drop_tips = np.random.choice(tips, len(local_trees), replace=False)
    for tip, ltree in zip(drop_tips, local_trees):
        drop_node = tre.treenode.get_leaves_by_name(name=tip)[0]
        sis = drop_node.get_sisters()[0]
        ttree = toytree.tree(ltree)
        _root = ttree.treenode.get_tree_root()
        height = _root.get_distance(_root.get_farthest_leaf()[0])
        ttree.treenode.dist = drop_node.dist - height
        new_node = drop_node.add_sister(sister=ttree.treenode)

        _ = sis.remove_sister(sister=drop_node)

    trts = np.random.sample(len(tre.get_tip_labels())) * 10
    return tre.write(tree_format=5), trts
Ejemplo n.º 27
0
    def baltree(ntips, treeheight=1.0, random_names=False):
        """
        Returns a balanced tree topology.
        """
        # require even number of tips
        if ntips % 2:
            raise ToytreeError("balanced trees must have even number of tips.")

        # make first cherry
        rtree = toytree.tree()
        rtree.treenode.add_child(name="r0")
        rtree.treenode.add_child(name="r1")

        # add tips in a balanced way
        for i in range(2, ntips):

            # get node to split
            node = return_small_clade(rtree.treenode)

            # add two children
            node.add_child(name=node.name)
            node.add_child(name="r" + str(i))

            # rename ancestral node
            node.name = None

        # get toytree from newick            
        tre = toytree.tree(rtree)  # .write(tree_format=9))
        tre = tre.mod.make_ultrametric().mod.node_scale_root_height(treeheight)
        tre._coords.update()

        # rename tips so names are in order
        nidx = list(range(tre.ntips))
        if random_names:
            random.shuffle(nidx)
        for idx, node in tre.idx_dict.items():
            if node.is_leaf():
                node.name = "r{}".format(nidx[idx])
        return tre
Ejemplo n.º 28
0
    def baltree(ntips, treeheight=1.0):
        """
        Returns a balanced tree topology.
        """
        # require even number of tips
        if ntips % 2:
            raise ToytreeError("balanced trees must have even number of tips.")

        # make first cherry
        rtree = toytree.tree()
        rtree.treenode.add_child(name="r0")
        rtree.treenode.add_child(name="r1")

        # add tips in a balanced way
        for i in range(2, ntips):

            # get node to split
            node = return_small_clade(rtree.treenode)

            # add two children
            node.add_child(name=node.name)
            node.add_child(name="r" + str(i))

            # rename ancestral node
            node.name = None

        # rename tips so names are in order
        idx = len(rtree) - 1
        for node in rtree.treenode.traverse("postorder"):
            if node.is_leaf():
                node.name = "r" + str(idx)
                idx -= 1

        # get toytree from newick            
        tre = toytree.tree(rtree.write(tree_format=9))
        tre = tre.mod.make_ultrametric()
        self = tre.mod.node_scale_root_height(treeheight)
        self._coords.update()
        return self        
Ejemplo n.º 29
0
    def __init__(self, tree, matrix, model=None, prior=0.5):
        self.model = model
        self.prior = prior
        self.matrix = matrix

        if isinstance(tree, toytree.tree):
            self.tree = tree
        elif isinstance(tree, str):
            self.tree = toytree.tree(tree, tree_format=0)
        else:
            raise Exception(
                'tree must be either a newick string or toytree object')

        self.treeheight = float(self.tree.treenode.height)
Ejemplo n.º 30
0
    def simulate_geneal_and_seqs(self):
        """
        Setup ipcoal simualtion using sptree in units of generations
        and apply Ne values from the .samp_ns array. Simulate 
        genealogies and sequence data on each tree.
        """
        for idx in self.data.index:

            # load the transformed sptree
            tre = toytree.tree(self.data.at[idx, "spp_tree"])

            # set Ne values on the tree, which ipcoal expects
            tre = tre.set_node_values(
                "Ne",
                dict(zip(range(tre.nnodes), self.samp_ns[idx])),
            )

            # simulate genealogies on this species tree
            model = ipcoal.Model(
                tree=tre,
                nsamples=2,
                seed=self.rng.integers(0, 1e9),
                **self.ipcoal_kwargs,
            )
            model.sim_loci(self.nloci, self.nsites)

            # Write a diploid phylip file.
            model.write_concat_to_phylip(
                name=self.prefix + "_{}".format(idx),
                outdir=self.outdir,
                diploid=True,
            )

            # Write a diploid nexus file.
            model.write_concat_to_nexus(
                name=self.prefix + "_{}".format(idx),
                outdir=self.outdir,
                diploid=True,
            )

            # store the number of snps
            self.data.loc[idx, "nsnps"] = model.df.nsnps.sum()

            # store the path to the sequence alignment
            self.data.loc[idx, "phy_seqpath"] = os.path.join(
                self.outdir, self.prefix + "_{}.phy".format(idx))

            self.data.loc[idx, "nex_seqpath"] = os.path.join(
                self.outdir, self.prefix + "_{}.nex".format(idx))
        print("simulated sequences on {} species trees.".format(self.reps))