def check_bootstrap(self, filename, format, align_type="d"): """Check we can use fseqboot to pseudosample an alignment. The align_type type argument is passed to the commandline object to set the output format to use (from [D]na,[p]rotein and [r]na ) """ self.assertTrue(os.path.isfile(filename), "Missing %s" % filename) cline = FSeqBootCommandline(exes["fseqboot"], sequence=filename, outfile="test_file", seqtype=align_type, reps=2, auto=True, filter=True) stdout, stderr = cline() # the resultant file should have 2 alignments... with open("test_file") as handle: bs = list(AlignIO.parse(handle, format)) self.assertEqual(len(bs), 2) # ..and each name in the original alignment... with open(filename) as handle: a_names = [" ", "_") for s in, format) ] # ...should be in each alignment in the bootstrapped file for a in bs: self.assertEqual(a_names, [" ", "_") for s in a])
def runSeqBoot(phylipfile, outbase): """ This part runs seqboot for the phylip file """ print "Running Seqboot" seqout = outbase + ".fseqboot" seqboot_cline = FSeqBootCommandline(sequence=phylipfile, reps=1000, seed=333, outfile=seqout) stdout, stderr = seqboot_cline() return seqout
def build_nj_phylip(alignment, outfile, outgroup, work_dir="."): """ build neighbor joining tree of DNA seqs with PHYLIP in EMBOSS PHYLIP manual """ phy_file = op.join(work_dir, "work", "aln.phy") try: AlignIO.write(alignment, file(phy_file, "w"), "phylip") except ValueError: print >>sys.stderr, \ "Repeated seq name, possibly due to truncation. NJ tree not built." return None seqboot_out = phy_file.rsplit(".", 1)[0] + ".fseqboot" seqboot_cl = FSeqBootCommandline(FPHYLIP_BIN("fseqboot"), \ sequence=phy_file, outfile=seqboot_out, \ seqtype="d", reps=100, seed=12345) stdout, stderr = seqboot_cl() logging.debug("Resampling alignment: %s" % seqboot_cl) dnadist_out = phy_file.rsplit(".", 1)[0] + ".fdnadist" dnadist_cl = FDNADistCommandline(FPHYLIP_BIN("fdnadist"), \ sequence=seqboot_out, outfile=dnadist_out, method="f") stdout, stderr = dnadist_cl() logging.debug\ ("Calculating distance for bootstrapped alignments: %s" % dnadist_cl) neighbor_out = phy_file.rsplit(".", 1)[0] + ".njtree" e = phy_file.rsplit(".", 1)[0] + ".fneighbor" neighbor_cl = FNeighborCommandline(FPHYLIP_BIN("fneighbor"), \ datafile=dnadist_out, outfile=e, outtreefile=neighbor_out) stdout, stderr = neighbor_cl() logging.debug("Building Neighbor Joining tree: %s" % neighbor_cl) consense_out = phy_file.rsplit(".", 1)[0] + ".consensustree.nodesupport" e = phy_file.rsplit(".", 1)[0] + ".fconsense" consense_cl = FConsenseCommandline(FPHYLIP_BIN("fconsense"), \ intreefile=neighbor_out, outfile=e, outtreefile=consense_out) stdout, stderr = consense_cl() logging.debug("Building consensus tree: %s" % consense_cl) # distance without bootstrapping dnadist_out0 = phy_file.rsplit(".", 1)[0] + ".fdnadist0" dnadist_cl0 = FDNADistCommandline(FPHYLIP_BIN("fdnadist"), \ sequence=phy_file, outfile=dnadist_out0, method="f") stdout, stderr = dnadist_cl0() logging.debug\ ("Calculating distance for original alignment: %s" % dnadist_cl0) # infer branch length on consensus tree consensustree1 = phy_file.rsplit(".", 1)[0] + ".consensustree.branchlength" run_ffitch(distfile=dnadist_out0, outtreefile=consensustree1, \ intreefile=consense_out) # write final tree ct_s = Tree(consense_out) if outgroup: t1 = consensustree1 + ".rooted" t2 = smart_reroot(consensustree1, outgroup, t1) if t2 == t1: outfile = outfile.replace(".unrooted", "") ct_b = Tree(t2) else: ct_b = Tree(consensustree1) nodesupport = {} for node in ct_s.traverse("postorder"): node_children = tuple(sorted([ for f in node])) if len(node_children) > 1: nodesupport[node_children] = node.dist / 100. for k, v in nodesupport.items(): ct_b.get_common_ancestor(*k).support = v print ct_b ct_b.write(format=0, outfile=outfile) try: s = op.getsize(outfile) except OSError: s = 0 if s: logging.debug("NJ tree printed to %s" % outfile) return outfile, phy_file else: logging.debug("Something was wrong. NJ tree was not built.") return None