def create_tree():
    neighbor_line = FNeighborCommandline()
    neighbor_line.datafile = "data/final_matrix.phy"
    neighbor_line.matrixtype = "l"  # lower triangular distance matrix
    neighbor_line.treetype = "n"  # neighbor joining algorithm
    neighbor_line.outfile = "data/tree_output/pairwisetree_visual"
    neighbor_line.outtreefile = "data/tree_output/pairwisetree.nwk"
    subprocess.Popen(str(neighbor_line()), shell=True)
Beispiel #2
0
 def tree_from_distances(self, filename):
     """Check we can estimate a tree from a distance matrix."""
     self.assertTrue(os.path.isfile(filename), "Missing %s" % filename)
     cline = FNeighborCommandline(exes["fneighbor"],
                                  datafile=filename,
                                  outtreefile="test_file",
                                  auto=True, filter=True)
     stdout, stderr = cline()
     for tree in parse_trees("test_file"):
         tree_taxa = [t.replace(" ", "_") for t in tree.get_taxa()]
         self.assertEqual(self.test_taxa, sorted(tree_taxa))
Beispiel #3
0
def runNeighbour(distout, outbase):
    """
    This part runs the neighbour 
    """
    print "running neighbour"
    neighout = outbase + ".neighbour"
    tree = outbase + ".neighbour_NJ"
    Neigh_cline = FNeighborCommandline(datafile=distout,
                                       outfile=neighout,
                                       seed=333,
                                       treetype="n",
                                       outtreefile=tree)
    stdout, stderr = Neigh_cline()
    return (neighout, tree)
Beispiel #4
0
def build_nj_phylip(alignment, outfile, outgroup, work_dir="."):
    """
    build neighbor joining tree of DNA seqs with PHYLIP in EMBOSS

    PHYLIP manual
    http://evolution.genetics.washington.edu/phylip/doc/
    """

    phy_file = op.join(work_dir, "work", "aln.phy")
    try:
        AlignIO.write(alignment, file(phy_file, "w"), "phylip")
    except ValueError:
        print >>sys.stderr, \
            "Repeated seq name, possibly due to truncation. NJ tree not built."
        return None

    seqboot_out = phy_file.rsplit(".", 1)[0] + ".fseqboot"
    seqboot_cl = FSeqBootCommandline(FPHYLIP_BIN("fseqboot"), \
        sequence=phy_file, outfile=seqboot_out, \
        seqtype="d", reps=100, seed=12345)
    stdout, stderr = seqboot_cl()
    logging.debug("Resampling alignment: %s" % seqboot_cl)

    dnadist_out = phy_file.rsplit(".", 1)[0] + ".fdnadist"
    dnadist_cl = FDNADistCommandline(FPHYLIP_BIN("fdnadist"), \
        sequence=seqboot_out, outfile=dnadist_out, method="f")
    stdout, stderr = dnadist_cl()
    logging.debug\
        ("Calculating distance for bootstrapped alignments: %s" % dnadist_cl)

    neighbor_out = phy_file.rsplit(".", 1)[0] + ".njtree"
    e = phy_file.rsplit(".", 1)[0] + ".fneighbor"
    neighbor_cl = FNeighborCommandline(FPHYLIP_BIN("fneighbor"), \
        datafile=dnadist_out, outfile=e, outtreefile=neighbor_out)
    stdout, stderr = neighbor_cl()
    logging.debug("Building Neighbor Joining tree: %s" % neighbor_cl)

    consense_out = phy_file.rsplit(".", 1)[0] + ".consensustree.nodesupport"
    e = phy_file.rsplit(".", 1)[0] + ".fconsense"
    consense_cl = FConsenseCommandline(FPHYLIP_BIN("fconsense"), \
        intreefile=neighbor_out, outfile=e, outtreefile=consense_out)
    stdout, stderr = consense_cl()
    logging.debug("Building consensus tree: %s" % consense_cl)

    # distance without bootstrapping
    dnadist_out0 = phy_file.rsplit(".", 1)[0] + ".fdnadist0"
    dnadist_cl0 = FDNADistCommandline(FPHYLIP_BIN("fdnadist"), \
        sequence=phy_file, outfile=dnadist_out0, method="f")
    stdout, stderr = dnadist_cl0()
    logging.debug\
        ("Calculating distance for original alignment: %s" % dnadist_cl0)

    # infer branch length on consensus tree
    consensustree1 = phy_file.rsplit(".", 1)[0] + ".consensustree.branchlength"
    run_ffitch(distfile=dnadist_out0, outtreefile=consensustree1, \
            intreefile=consense_out)

    # write final tree
    ct_s = Tree(consense_out)

    if outgroup:
        t1 = consensustree1 + ".rooted"
        t2 = smart_reroot(consensustree1, outgroup, t1)
        if t2 == t1:
            outfile = outfile.replace(".unrooted", "")
        ct_b = Tree(t2)
    else:
        ct_b = Tree(consensustree1)

    nodesupport = {}
    for node in ct_s.traverse("postorder"):
        node_children = tuple(sorted([f.name for f in node]))
        if len(node_children) > 1:
            nodesupport[node_children] = node.dist / 100.

    for k, v in nodesupport.items():
        ct_b.get_common_ancestor(*k).support = v
    print ct_b
    ct_b.write(format=0, outfile=outfile)

    try:
        s = op.getsize(outfile)
    except OSError:
        s = 0
    if s:
        logging.debug("NJ tree printed to %s" % outfile)
        return outfile, phy_file
    else:
        logging.debug("Something was wrong. NJ tree was not built.")
        return None
            infile_Formatted.write("\n")


##The next part of the code uses FDNADistCommandline and the "FormattedInputFile.txt" file created above to create a distance matrix.
#The first part is "/usr/local/bin/fdnadist", which tells Python where it can find this application. The sequence parameter tells FDNADistCommandline which file you would like to use that contains
#the sequences you want to compute a distance matrix for. The method parameter tells FDNADistCommandline which distance matrix algorithm you would like to use; in this case, f means FDNADistCommandline
#will use the F84 distance model. The outfile parameter specifies the name of the outfile in which FDNADistCommandline will write the results to.
FDNADist_matrix = FDNADistCommandline("/usr/local/bin/fdnadist",sequence="FormattedInputFile.txt",method="f",outfile="distanceMatrix.fdnadist")

stdout, stderr = FDNADist_matrix() #This line is required in order for FDNADistCommandline to actually write anything to the outfile. 


##The next part of the code uses FNeighborCommandline and the "distanceMatrix.fdnadist" file created above to create a phylogenetic tree.
#The first part is "/usr/local/bin/fneighbor", which tells Python where it can find this application. The datafile parameter tells FNeighborCommandline which file you would like to use that contains
#the distance matrix that will be used to create the phylogenetic tree. The outfile parameter specifies the name of the outfile in which FNeighborCommandline will write the results to.
FNeighbor_tree = FNeighborCommandline("/usr/local/bin/fneighbor",datafile="distanceMatrix.fdnadist",outfile="treeFile.fneighbor")

stdout, stderr = FNeighbor_tree() #This line is required in order for FNeighborCommandline to actually write anything to the outfile. 


##The next part of the code creates alignments between each and every one of the sequences in the "query_searchSequences_dna.fasta" using the NeedleallCommandline.
#The next few lines create an input file for NeedleallCommandline. The FASTA file called 'query_searchSequences_dna.fasta' cannot be used because it contains spaces between each record and for
#some reason the spaces affect the Needleall alignemnt. So the next few lines create a FASTA without spaces between each record called 'needlallAlignmentInput_Nucleotide.fasta'. The sequences are also all
#made the same length using the slie function -> [:numberOfPositions].
with open("needlallAlignmentInput_Nucleotide.fasta",'w') as outfile_alignment:
    with open("query_searchSequences_dna.fasta",'r') as infile_sequences:
        for record in SeqIO.parse("query_searchSequences_dna.fasta", "fasta"):
            sequence = record.seq[:numberOfPositions]
            outfile_alignment.write('>' + str(record.id) + '\n' + str(sequence) + '\n')