def coaltree(ntips, ne=None, random_names=False, seed=None): """ Returns a coalescent tree with ntips samples and waiting times between coalescent events drawn from the kingman coalescent: (4N)/(k*(k-1)), where N is population size and k is sample size. Edge lengths on the tree are in generations. If no Ne argument is entered then edge lengths are returned in units of 2*Ne, i.e., coalescent time units. """ # seed generator random.seed(seed) # convert units coalunits = False if not ne: coalunits = True ne = 10000 # build tree: generate N tips as separate Nodes then attach together # at internal nodes drawn randomly from coalescent waiting times. tips = [ toytree.tree().treenode.add_child(name=str(i)) for i in range(ntips) ] while len(tips) > 1: rtree = toytree.tree() tip1 = tips.pop(random.choice(range(len(tips)))) tip2 = tips.pop(random.choice(range(len(tips)))) kingman = (4. * ne) / float(ntips * (ntips - 1)) dist = random.expovariate(1. / kingman) rtree.treenode.add_child(tip1, dist=tip2.height + dist) rtree.treenode.add_child(tip2, dist=tip1.height + dist) tips.append(rtree.treenode) # build new tree from the newick string self = toytree.tree(tips[0].write()) self.treenode.ladderize() # make tree edges in units of 2N (then N doesn't matter!) if coalunits: for node in self.treenode.traverse(): node.dist /= (2. * ne) # ensure tips are at zero (they sometime vary just slightly) for node in self.treenode.traverse(): if node.is_leaf(): node.dist += node.height # set tipnames to r{idx} nidx = list(range(self.ntips)) if random_names: random.shuffle(nidx) for idx, node in self.idx_dict.items(): if node.is_leaf(): node.name = "r{}".format(nidx[idx]) # decompose fills in internal node names and idx self._coords.update() return self
def compare_quartets(self): """ Compare two sets of quartets generated from two phylogenetic trees. (to be continued, need to store output in __init__ object) """ i = 0 compare_quartets_df = pd.DataFrame( columns=['trees', 'Quartet intersection']) for i in range(0, int(nquartets) - 1): q0 = get_quartets( toytree.tree( str(trefilespath) + "/tree" + str(i + 1) + ".tre")) q1 = get_quartets( toytree.tree( str(trefilespath) + "/tree" + str(i + 2) + ".tre")) diffs = q0.symmetric_difference(q1) len(diffs) compare_quartets_df = compare_quartets_df.append( { 'trees': str(i + 1) + ", " + str(i + 2), 'Quartet intersection': len(q0.intersection(q1)) / len(q0) }, ignore_index=True) pd.set_option("display.max_rows", None, "display.max_columns", None) compare_quartets_df.to_csv(str(trefilespath) + "compare_quartets" + str(nquartets) + ".csv", index=False) return compare_quartets_df
def remote_raxml(phyfile, inference_args): """ Call raxml on phy and returned parse tree result """ # call raxml on the input phylip file with inference args rax = raxml( data=phyfile, name="temp_" + str(os.getpid()), workdir=tempfile.gettempdir(), **inference_args ) rax.run(force=True, quiet=True, block=True) # get newick string from result if os.path.exists(rax.trees.bipartitions): tree = toytree.tree(rax.trees.bipartitions).newick else: tree = toytree.tree(rax.trees.bestTree).newick # remote tree files for tfile in rax.trees: tpath = getattr(rax.trees, tfile) if os.path.exists(tpath): os.remove(tpath) # return results return tree
def _preview_topology(self): """ Reads in the tree file, and saves a .png with rectangular and circular topology previews. :return: toyplot.Canvas object """ self.log.debug('reading tree') # read in the tree self.tree = toytree.tree(open(self.tree_file, 'r').read(), tree_format=0) # drop / replace nodes if self.args.replace_nodes: for idx in self.args.replace_nodes: self.tree.idx_dict[idx].add_sister(name='%d_replaced' % idx, dist=1) self.tree.idx_dict[idx].detach() # saving the node names now saves some work self.args.replace_nodes = [ '%d_replaced' % idx for idx in self.args.replace_nodes ] else: # save empty list self.args.replace_nodes = [] if self.args.drop_nodes: [self.tree.idx_dict[idx].detach() for idx in self.args.drop_nodes] # outgroup rooting if self.args.root: self.tree = self.tree.root( names=[self.tree.idx_dict[self.args.root].name]) self.tree = toytree.tree(self.tree.write()) # set dimensions of the canvas preview = toyplot.Canvas(width=800, height=400, style={'background-color': 'white'}) # dissect canvas into two cartesian areas ax0 = preview.cartesian(bounds=('5%', '48%', '5%', '95%')) ax1 = preview.cartesian(bounds=('52%', '95%', '5%', '95%')) # call draw with the 'axes' argument to pass it to a specific cartesian area self.log.debug('drawing preview') self.tree.draw(axes=ax0, layout='r', tip_labels=False) self.tree.draw(axes=ax1, layout='c', tip_labels=False) # hide the axes coordinates ax0.show = False ax1.show = False # render to image png.render(preview, self.args.topo) self.log.info('rendered preview %s' % self.args.topo) return preview
def _parse_results(self): ## get tree and admix from output files with gzip.open(self.files.treeout) as tmp: data = tmp.readlines() ## store the tree k = 0 if self.params.noss: k += 1 self.results.tree = data[k].strip() self.results.admixture = [] ## get admix events for adx in data[k + 1:]: weight, jweight, jse, pval, clade1, clade2 = adx.strip().split( ) self.results.admixture.append( (clade1, clade2, weight, jweight, jse, pval)) ## get a toytree tre = toytree.tree(self.results.tree) ## order admixture for aidx in range(len(self.results.admixture)): admix = self.results.admixture[aidx] source = toytree.tree(admix[0] + ";") if len(source.tree) == 1: sodx = tre.tree.search_nodes(name=source.tree.name)[0].idx else: sodx = tre.tree.get_common_ancestor( source.get_tip_labels()).idx sink = toytree.tree(admix[1] + ";") if len(sink.tree) == 1: sidx = tre.tree.search_nodes(name=sink.tree.name)[0].idx else: sidx = tre.tree.get_common_ancestor(sink.get_tip_labels()).idx self.results.admixture[aidx] = ( int(sodx), float(admix[0].rsplit(":", 1)[1]), int(sidx), float(admix[1].rsplit(":", 1)[1]), float(admix[2]), #float(admix[3]), #float(admix[4]), #float(admix[5]), ) ## parse the cov names = tre.get_tip_labels() self.results.cov = _parse_matrix(self.files.cov, names) self.results.covse = _parse_matrix(self.files.covse, names) self.results.modelcov = _parse_matrix(self.files.modelcov, names)
def unittree(ntips, treeheight=1.0, random_names=False, seed=None): """ Returns a random tree ultrametric topology w/ N tips and a root height set to 1 or a user-entered treeheight value. Descendant nodes are evenly spaced between the root and time 0. Parameters ----------- ntips (int): The number of tips in the randomly generated tree treeheight(float): Scale tree height (all edges) so that root is at this height. seed (int): Random number generator seed. """ # seed generator random.seed(seed) # generate tree with N tips. tmptree = toytree.tree().treenode # TreeNode() tmptree.populate(ntips) self = toytree.tree(newick=tmptree.write()) # set tip names by labeling sequentially from 0 self = ( self .ladderize() .mod.make_ultrametric() .mod.node_scale_root_height(treeheight) ) # set tipnames randomly (doesn't have to match idx) nidx = list(range(self.ntips))[::-1] if random_names: random.shuffle(nidx) for tidx, node in enumerate(self.treenode.get_leaves()): node.name = "r{}".format(nidx[tidx]) # set all support values to 100 default for node in self.treenode.traverse(): node.support = 100 # fill internal node names and idxs self.treenode.ladderize() self._coords.update() return self
def __init__( self, tree, #must be Toytree class object matrix=None, #must be pandas DataFrame class object model=None, prior=0.5): if isinstance(tree, toytree.tree): self.tree = tree elif isinstance(tree, str): self.tree = toytree.tree(tree, tree_format=0) else: raise Exception( 'tree must be either a newick string or toytree object') if isinstance(matrix, pd.DataFrame): self.matrix = matrix else: self.matrix = pd.read_csv(matrix, index_col=0) self.model = model self.prior = prior self.alpha = 1 / tree.treenode.height self.beta = 1 / tree.treenode.height
def run_qmc(self): """ Runs quartet max-cut QMC on the quartets qdump file. """ # build command self._tmp = os.path.join(self.tet.dirs, ".tmptre") cmd = [ QMC, "qrtt={}".format(self.tet.files.qdump), "otre={}".format(self._tmp), ] # run QMC on quartets input proc = sps.Popen(cmd, stderr=sps.STDOUT, stdout=sps.PIPE) res = proc.communicate() if proc.returncode: raise TetradError("error in QMC") # res) # parse tmp file written by QMC into a tree and rename tips ttre = toytree.tree(self._tmp) for tip in ttre.treenode.get_leaves(): tip.name = self.tet.samples[int(tip.name)] # convert to newick newick = ttre.write(tree_format=9) # save the tree to file if self.boot: with open(self.tet.trees.boots, 'a') as outboot: outboot.write(newick + "\n") else: with open(self.tet.trees.tree, 'w') as outtree: outtree.write(newick)
def relative_error(self): "Calculate relative error between true tree and chronos or mrbayes output." # Get edge lengths of true tree. true_edge_lengths = self.sptree.get_edge_values(feature="height") for idx in self.data.index: # Calculate error for three tree types. for model in [ "chronos_correlated", "chronos_relaxed", "mrbayes_tree" ]: # Scale tree to match height of true tree, then get edge lengths to subtract from true edge lengths. dtree = toytree.tree(self.data.at[idx, model]) dtree.mod.node_scale_root_height(treeheight=list( self.sptree.get_feature_dict("height").keys())[0]) dtree_edge_lengths = dtree.get_edge_values(feature="height") # Calculate array. subtract_array = true_edge_lengths - dtree_edge_lengths # Square each element in the array. squared_array = np.square(subtract_array) # Sum all elements in the array (sum of squares). sum_squares = np.sum(squared_array) # Save error to appropriate colmun. if model == "chronos_correlated": self.data.loc[idx, "chc_relative_error"] = sum_squares elif model == "chronos_relaxed": self.data.loc[idx, "chr_relative_error"] = sum_squares elif model == "mrbayes_tree": self.data.loc[idx, "mb_relative_error"] = sum_squares
def simple_error(self): "Calculate simple error between true tree and chronos or mrbayes output." # Get edge lengths of true tree. true_edge_lengths = self.sptree.get_edge_values(feature="height") for idx in self.data.index: # Calculate error for three tree types. for model in [ "chronos_correlated", "chronos_relaxed", "mrbayes_tree" ]: # Get edge lengths to subtract from true edge lengths. dtree = toytree.tree(self.data.at[idx, model]) dtree_edge_lengths = dtree.get_edge_values(feature="height") # Calculate array. subtract_array = true_edge_lengths - dtree_edge_lengths # Square each element in the array. squared_array = np.square(subtract_array) # Sum all elements in the array (sum of squares). sum_squares = np.sum(squared_array) # Save error to appropriate colmun. if model == "chronos_correlated": self.data.loc[idx, "chc_simple_error"] = sum_squares elif model == "chronos_relaxed": self.data.loc[idx, "chr_simple error"] = sum_squares elif model == "mrbayes_tree": self.data.loc[idx, "mb_simple_error"] = sum_squares
def __init__(self, tree, df, seed=None): # Store the species tree. self.sptree = toytree.tree(tree) # Store copy of pd dataframe from Simulator object. self.data = df.copy()
def _update_newick_names(nw, names): tre = toytree.tree(nw) leaves = tre.get_tip_labels() nodes = [tre.treenode.search_nodes(name=x)[0] for x in leaves] for node, name in zip(nodes, names): node.name = name return tre.write(tree_format=5)
def infer_mb(self, tmp): """ Call mb on phy and returned parse tree result """ # call mb on the input phylip file with inference args mb = mrbayes(data=tmp, name="temp_" + str(os.getpid()), workdir=tempfile.gettempdir(), **self.inference_args) mb.run(force=True, quiet=True, block=True) # get newick string from result tree = toytree.tree(mb.trees.constre, tree_format=10).newick # cleanup remote tree files for tup in mb.trees: tpath = tup[1] if os.path.exists(tpath): os.remove(tpath) # remove the TEMP phyfile in workdir/tmpdir #os.remove(tmp) # return results return tree
def imbtree(ntips, treeheight=1.0, random_names=False): """ Return an imbalanced (comb-like) tree topology. """ node = toytree.TreeNode.TreeNode() node.add_child(name="r0") node.add_child(name="r1") for i in range(2, ntips): # empty node cherry = toytree.TreeNode.TreeNode() # add new child cherry.add_child(name="r" + str(i)) # add old tree cherry.add_child(node) # update rtree node = cherry # get toytree from newick tre = toytree.tree(node) tre = tre.mod.make_ultrametric(nocopy=True) tre = tre.mod.node_scale_root_height(treeheight, nocopy=True) tre._coords.update() # randomize tip names nidx = list(range(tre.ntips)) if random_names: random.shuffle(nidx) for idx, node in tre.idx_dict.items(): if node.is_leaf(): node.name = "r{}".format(nidx[idx]) return tre
def run_tree_inference(self, nexus, idx): """ Write nexus to tmpfile, runs phyml tree inference, and parses and returns the resulting tree. """ ## create a tmpdir for this test tmpdir = tempfile.tempdir tmpfile = os.path.join(tempfile.NamedTemporaryFile( delete=False, prefix=str(idx), dir=tmpdir, )) ## write nexus to tmpfile tmpfile.write(nexus) tmpfile.flush() ## infer the tree rax = raxml(name=str(idx), data=tmpfile.name, workdir=tmpdir, N=1, T=2) rax.run(force=True, block=True, quiet=True) ## clean up tmpfile.close() ## return tree order order = get_order(toytree.tree(rax.trees.bestTree)) return "".join(order)
def run(self): """ Call Astral command () """ print("[astral.5.7.3.jar]") # setup the comamnd proc = sps.Popen( self._get_command(), stderr=sps.STDOUT, stdout=sps.PIPE, ) comm = proc.communicate() if proc.returncode: print("Astral Error:\n", comm[0].decode()) raise IPyradError( "Astral Error: your command string was:\n{}".format(" ".join( self._get_command()))) # store stderr to logfile with open(self.logfile, 'w') as out: out.write(comm[0].decode()) # cleanup if os.path.exists(self._tmptrees): os.remove(self._tmptrees) # try loading the tree result self.tree = toytree.tree(self.treefile) # report result file print("inferred tree written to ({})".format(self.treefile))
def __init__(self, newick, constraint_dict, constraint_exact): "Traverses tree to build test sets given constraint options." # store sets of four-taxon splits self.testset = set() self.hold = [0, 0, 0, 0] # tree to traverse self.tree = toytree.tree(newick) if not self.tree.is_rooted(): raise IPyradError( "generate_tests_from_tree(): tree must be rooted and resolved") # constraints self.cdict = OrderedDict((i, []) for i in ["p1", "p2", "p3", "p4"]) if constraint_dict: self.cdict.update(constraint_dict) # constraint setting self.xdict = constraint_exact if isinstance(self.xdict, bool): self.xdict = [self.xdict] * 4 if isinstance(self.xdict, list): if len(self.xdict) != len(self.cdict): raise Exception( "constraint_exact must be bool or list of bools length N") # get tests self.loop()
def load_counts(self): """ Load counts and labels Hdf5 databases generated by simcat.Database. """ # load the snp counts and stack data with h5py.File(self.db_counts, 'r') as io5: # load the tree that was used in the simulations and the data self.tree = toytree.tree(io5.attrs["tree"]) self.counts = io5["counts"][:] # [1] rescale to make counts proportional across ALL sims. # this seems to work much better than [2]. if self.scale == 1: self.counts = self.counts / self.counts.max() # [2] rescale by make counts proportional across sims on same tree. if self.scale == 2: # iterate over matrixsets for i in range(self.counts.shape[0]): # iterate over matrices for j in range(self.counts.shape[1]): # norm 16x16 matrices self.counts[i, j] = ( self.counts[i, j] / self.counts[i, j].max() ) if not self.quiet: print("[load] {}".format(self.counts.shape))
def remote_mrbayes(nexfile, inference_args, keepdir=None): """ Call mb on phy and returned parse tree result """ # convert phyfile to tmp nexus seqfile # if keep_all_files then use workdir as the workdir instead of tmp if keepdir: workdir = keepdir else: workdir = os.path.dirname(nexfile) # call mb on the input phylip file with inference args mb = mrbayes(data=nexfile, name="temp_" + str(os.getpid()), workdir=workdir, **inference_args) mb.run(force=True, quiet=True, block=True) # get newick string from result tree = toytree.tree(mb.trees.constre, tree_format=10).newick # cleanup remote tree files for tup in mb.trees: tpath = tup[1] if os.path.exists(tpath): os.remove(tpath) # remove the TEMP phyfile in workdir/tmpdir os.remove(nexfile) # return results return tree
def draw_cov(self, axes=None): # get results cov = self.results.cov tre = toytree.tree(self.results.tree) # names spaced in order lnames = toyplot.locator.Explicit( locations=range(len(tre.get_tip_labels())), labels=tre.get_tip_labels()[::-1], ) # get a colormap and plot the matrix cmap = toyplot.color.diverging.map( "BlueRed", cov.min(), cov.max(), ) canvas, table = toyplot.matrix( (cov, cmap), width=400, height=400, bshow=True, tshow=False, lshow=False, rlocator=lnames, blocator=lnames, ) return canvas, table
def load_slice(self): """ Pull data from .labels for use in ipcoal sims """ # open view to the data with h5py.File(self.database, 'r') as io5: # sliced data arrays self.node_Nes = io5["node_Nes"][self.idxs, ...] self.admixture = io5["admixture"][self.idxs, ...] self.treeheight = io5["treeheight"][self.idxs, ...] self.slide_seeds = io5["slide_seeds"][self.idxs, ...] # attribute metadata self.tree = toytree.tree(io5.attrs["tree"]) self.tree = self.tree.mod.make_ultrametric() # imprecision self.nsnps = io5.attrs["nsnps"] self.rate_vector = io5.attrs["rate_vector"] self.pi_vector = io5.attrs["pi_vector"] self.ntips = len(self.tree) self.node_slide_prop = io5.attrs["node_slide_prop"] # store aligned SNPs self.nvalues = len(self.idxs) self.counts = np.zeros( (self.nvalues, self.tree.ntips, self.nsnps), dtype=np.int64)
def __init__(self, tree, Ne=None): # store input params self.tree = toytree.tree(tree) # apply Ne self.tree = self.tree.set_node_values("Ne", default=Ne) # will be used to store output results self.demog = None #demography in proper format
def batch_raxml(self): """ Infer raxml trees from sequence data. """ for idx in self.data.index: # Write tree topology to temporary file. tmp_tree = os.path.join(tempfile.gettempdir(), "tmp.tre") with open(tmp_tree, "w") as f: f.write(self.data.at[idx, "spp_tree"]) # Build RAxML command. cmd = [ "raxmlHPC-PTHREADS-AVX2", # Add relative conda path. "-f", "e", "-t", tmp_tree, "-T", "8", "-m", "GTRGAMMA", "-n", "tmp", "-w", tempfile.gettempdir(), "-s", self.data.at[idx, "phy_seqpath"] ] # "-p", "54321", # "-N", "100", # "-x", "12345" # RAxML has no built-in force option to remove existing files, so this handles removal if necessary. if os.path.exists( os.path.join(tempfile.gettempdir(), "RAxML_info.tmp")): os.remove(os.path.join(tempfile.gettempdir(), "RAxML_info.tmp")) # Run RAxML, or report errors if any. try: subprocess.run(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) except subprocess.CalledProcessError as error: print(error.output) print(" ".join(cmd)) raise # save the newick string to file raxtree = toytree.tree( os.path.join(tempfile.gettempdir(), "RAxML_result.tmp")) raxtree = raxtree.root(self.root) self.data.loc[idx, "raxml_tree"] = raxtree.write(tree_format=0)
def __init__(self, tree, constraint_dict, constraint_exact): "Traverses tree to build test sets given constraint options." # store sets of four-taxon splits self.testset = set() self.hold = [0, 0, 0, 0] # tree to traverse self.tree = toytree.tree(tree) if not self.tree.is_rooted(): raise IPyradError( "generate_tests_from_tree(): tree must be rooted and resolved") # store contraints self.cdict = OrderedDict((i, []) for i in ["p1", "p2", "p3", "p4"]) # self.cdict = [(0, 0, 0, 0) for i in ] # constraints entered as a dict or tuple: (0, 1, 10, 13) if isinstance(constraint_dict, dict): for key, val in constraint_dict.items(): if isinstance(val, int): val = tree.get_tip_labels(val) self.cdict[key] = val elif isinstance(constraint_dict, (list, tuple, np.ndarray)): for cidx, pop in enumerate(["p1", "p2", "p3", "p4"]): const = constraint_dict[cidx] if isinstance(const, int): self.cdict[pop] = ( tree.get_tip_labels(const) ) # constraint setting [True, True, False, False] self.xdict = constraint_exact if isinstance(self.xdict, bool): self.xdict = [self.xdict] * 4 if isinstance(self.xdict, (tuple, list, np.ndarray)): if len(self.xdict) != len(self.cdict): print(self.xdict, self.cdict) raise Exception( "constraint_exact must be bool or list of bools length N") self.xdict = np.array(self.xdict).astype(bool) # get tests self.loop(self.tree.treenode) # order and check redundancy tests = [] coords = tree.get_node_coordinates(layout='d') for test in self.testset: stest = sorted(test[:2], key=lambda x: coords[x, 0]) ntest = stest[0], stest[1], test[2], test[3] if ntest not in tests: tests.append(ntest) self.tests = tests
def treeplot(self, ts, node, width: int = 600, height: int = 700): ts_newick = ts.at_index(node).newick() modtree = toytree.tree(ts_newick) canvas, axes, mark = modtree.draw(width=width, height=height) # show the axes coordinates axes.show = True axes.x.ticks.show = True axes.y.ticks.show = True axes.vlines(1 - self.simlength, style={"stroke": "blue"})
def random_metatree(local_trees, ntips=20, treefunc=toytree.rtree.unittree): metatree = _random_newick(ntips=ntips, treeheight=10, treefunc=treefunc) tre = toytree.tree(metatree) tips = tre.get_tip_labels() local_trees = list(set(local_trees)) drop_tips = np.random.choice(tips, len(local_trees), replace=False) for tip, ltree in zip(drop_tips, local_trees): drop_node = tre.treenode.get_leaves_by_name(name=tip)[0] sis = drop_node.get_sisters()[0] ttree = toytree.tree(ltree) _root = ttree.treenode.get_tree_root() height = _root.get_distance(_root.get_farthest_leaf()[0]) ttree.treenode.dist = drop_node.dist - height new_node = drop_node.add_sister(sister=ttree.treenode) _ = sis.remove_sister(sister=drop_node) trts = np.random.sample(len(tre.get_tip_labels())) * 10 return tre.write(tree_format=5), trts
def baltree(ntips, treeheight=1.0, random_names=False): """ Returns a balanced tree topology. """ # require even number of tips if ntips % 2: raise ToytreeError("balanced trees must have even number of tips.") # make first cherry rtree = toytree.tree() rtree.treenode.add_child(name="r0") rtree.treenode.add_child(name="r1") # add tips in a balanced way for i in range(2, ntips): # get node to split node = return_small_clade(rtree.treenode) # add two children node.add_child(name=node.name) node.add_child(name="r" + str(i)) # rename ancestral node node.name = None # get toytree from newick tre = toytree.tree(rtree) # .write(tree_format=9)) tre = tre.mod.make_ultrametric().mod.node_scale_root_height(treeheight) tre._coords.update() # rename tips so names are in order nidx = list(range(tre.ntips)) if random_names: random.shuffle(nidx) for idx, node in tre.idx_dict.items(): if node.is_leaf(): node.name = "r{}".format(nidx[idx]) return tre
def baltree(ntips, treeheight=1.0): """ Returns a balanced tree topology. """ # require even number of tips if ntips % 2: raise ToytreeError("balanced trees must have even number of tips.") # make first cherry rtree = toytree.tree() rtree.treenode.add_child(name="r0") rtree.treenode.add_child(name="r1") # add tips in a balanced way for i in range(2, ntips): # get node to split node = return_small_clade(rtree.treenode) # add two children node.add_child(name=node.name) node.add_child(name="r" + str(i)) # rename ancestral node node.name = None # rename tips so names are in order idx = len(rtree) - 1 for node in rtree.treenode.traverse("postorder"): if node.is_leaf(): node.name = "r" + str(idx) idx -= 1 # get toytree from newick tre = toytree.tree(rtree.write(tree_format=9)) tre = tre.mod.make_ultrametric() self = tre.mod.node_scale_root_height(treeheight) self._coords.update() return self
def __init__(self, tree, matrix, model=None, prior=0.5): self.model = model self.prior = prior self.matrix = matrix if isinstance(tree, toytree.tree): self.tree = tree elif isinstance(tree, str): self.tree = toytree.tree(tree, tree_format=0) else: raise Exception( 'tree must be either a newick string or toytree object') self.treeheight = float(self.tree.treenode.height)
def simulate_geneal_and_seqs(self): """ Setup ipcoal simualtion using sptree in units of generations and apply Ne values from the .samp_ns array. Simulate genealogies and sequence data on each tree. """ for idx in self.data.index: # load the transformed sptree tre = toytree.tree(self.data.at[idx, "spp_tree"]) # set Ne values on the tree, which ipcoal expects tre = tre.set_node_values( "Ne", dict(zip(range(tre.nnodes), self.samp_ns[idx])), ) # simulate genealogies on this species tree model = ipcoal.Model( tree=tre, nsamples=2, seed=self.rng.integers(0, 1e9), **self.ipcoal_kwargs, ) model.sim_loci(self.nloci, self.nsites) # Write a diploid phylip file. model.write_concat_to_phylip( name=self.prefix + "_{}".format(idx), outdir=self.outdir, diploid=True, ) # Write a diploid nexus file. model.write_concat_to_nexus( name=self.prefix + "_{}".format(idx), outdir=self.outdir, diploid=True, ) # store the number of snps self.data.loc[idx, "nsnps"] = model.df.nsnps.sum() # store the path to the sequence alignment self.data.loc[idx, "phy_seqpath"] = os.path.join( self.outdir, self.prefix + "_{}.phy".format(idx)) self.data.loc[idx, "nex_seqpath"] = os.path.join( self.outdir, self.prefix + "_{}.nex".format(idx)) print("simulated sequences on {} species trees.".format(self.reps))