예제 #1
0
    def check_bootstrap(self, filename, format, align_type="d"):
        """Check we can use fseqboot to pseudosample an alignment.

        The align_type type argument is passed to the commandline object to
        set the output format to use (from [D]na,[p]rotein and [r]na )
        """
        self.assertTrue(os.path.isfile(filename), "Missing %s" % filename)
        cline = FSeqBootCommandline(exes["fseqboot"],
                                    sequence=filename,
                                    outfile="test_file",
                                    seqtype=align_type,
                                    reps=2,
                                    auto=True,
                                    filter=True)
        stdout, stderr = cline()
        # the resultant file should have 2 alignments...
        with open("test_file") as handle:
            bs = list(AlignIO.parse(handle, format))
        self.assertEqual(len(bs), 2)
        # ..and each name in the original alignment...
        with open(filename) as handle:
            a_names = [
                s.name.replace(" ", "_") for s in AlignIO.read(handle, format)
            ]
        # ...should be in each alignment in the bootstrapped file
        for a in bs:
            self.assertEqual(a_names, [s.name.replace(" ", "_") for s in a])
예제 #2
0
def runSeqBoot(phylipfile, outbase):
    """
    This part runs seqboot for the phylip file
    """
    print "Running Seqboot"
    seqout = outbase + ".fseqboot"
    seqboot_cline = FSeqBootCommandline(sequence=phylipfile,
                                        reps=1000,
                                        seed=333,
                                        outfile=seqout)
    stdout, stderr = seqboot_cline()
    return seqout
예제 #3
0
def build_nj_phylip(alignment, outfile, outgroup, work_dir="."):
    """
    build neighbor joining tree of DNA seqs with PHYLIP in EMBOSS

    PHYLIP manual
    http://evolution.genetics.washington.edu/phylip/doc/
    """

    phy_file = op.join(work_dir, "work", "aln.phy")
    try:
        AlignIO.write(alignment, file(phy_file, "w"), "phylip")
    except ValueError:
        print >>sys.stderr, \
            "Repeated seq name, possibly due to truncation. NJ tree not built."
        return None

    seqboot_out = phy_file.rsplit(".", 1)[0] + ".fseqboot"
    seqboot_cl = FSeqBootCommandline(FPHYLIP_BIN("fseqboot"), \
        sequence=phy_file, outfile=seqboot_out, \
        seqtype="d", reps=100, seed=12345)
    stdout, stderr = seqboot_cl()
    logging.debug("Resampling alignment: %s" % seqboot_cl)

    dnadist_out = phy_file.rsplit(".", 1)[0] + ".fdnadist"
    dnadist_cl = FDNADistCommandline(FPHYLIP_BIN("fdnadist"), \
        sequence=seqboot_out, outfile=dnadist_out, method="f")
    stdout, stderr = dnadist_cl()
    logging.debug\
        ("Calculating distance for bootstrapped alignments: %s" % dnadist_cl)

    neighbor_out = phy_file.rsplit(".", 1)[0] + ".njtree"
    e = phy_file.rsplit(".", 1)[0] + ".fneighbor"
    neighbor_cl = FNeighborCommandline(FPHYLIP_BIN("fneighbor"), \
        datafile=dnadist_out, outfile=e, outtreefile=neighbor_out)
    stdout, stderr = neighbor_cl()
    logging.debug("Building Neighbor Joining tree: %s" % neighbor_cl)

    consense_out = phy_file.rsplit(".", 1)[0] + ".consensustree.nodesupport"
    e = phy_file.rsplit(".", 1)[0] + ".fconsense"
    consense_cl = FConsenseCommandline(FPHYLIP_BIN("fconsense"), \
        intreefile=neighbor_out, outfile=e, outtreefile=consense_out)
    stdout, stderr = consense_cl()
    logging.debug("Building consensus tree: %s" % consense_cl)

    # distance without bootstrapping
    dnadist_out0 = phy_file.rsplit(".", 1)[0] + ".fdnadist0"
    dnadist_cl0 = FDNADistCommandline(FPHYLIP_BIN("fdnadist"), \
        sequence=phy_file, outfile=dnadist_out0, method="f")
    stdout, stderr = dnadist_cl0()
    logging.debug\
        ("Calculating distance for original alignment: %s" % dnadist_cl0)

    # infer branch length on consensus tree
    consensustree1 = phy_file.rsplit(".", 1)[0] + ".consensustree.branchlength"
    run_ffitch(distfile=dnadist_out0, outtreefile=consensustree1, \
            intreefile=consense_out)

    # write final tree
    ct_s = Tree(consense_out)

    if outgroup:
        t1 = consensustree1 + ".rooted"
        t2 = smart_reroot(consensustree1, outgroup, t1)
        if t2 == t1:
            outfile = outfile.replace(".unrooted", "")
        ct_b = Tree(t2)
    else:
        ct_b = Tree(consensustree1)

    nodesupport = {}
    for node in ct_s.traverse("postorder"):
        node_children = tuple(sorted([f.name for f in node]))
        if len(node_children) > 1:
            nodesupport[node_children] = node.dist / 100.

    for k, v in nodesupport.items():
        ct_b.get_common_ancestor(*k).support = v
    print ct_b
    ct_b.write(format=0, outfile=outfile)

    try:
        s = op.getsize(outfile)
    except OSError:
        s = 0
    if s:
        logging.debug("NJ tree printed to %s" % outfile)
        return outfile, phy_file
    else:
        logging.debug("Something was wrong. NJ tree was not built.")
        return None