Beispiel #1
0
def iterate(n_iters):
    for i in tqdm(xrange(n_iters)):
        sampler.sample()
        likelihoods.append(sampler.tree.marg_log_likelihood())

    plt.figure()
    plt.xlabel("Iterations", fontsize=fontsize)
    plt.ylabel("Data Log Likelihood", fontsize=fontsize)
    plt.plot(likelihoods)
    plt.legend(loc='best', fontsize=12)

    plt.savefig('unconstrained-likelihoods.png', bbox_inches='tight')

    final_tree = sampler.tree.copy()

    plt.figure()
    plot_tree_2d(final_tree, X, pca)

    for node in final_tree.dfs():
        if node.is_leaf():
            node.point = y[node.point]

    plt.figure()
    newick = final_tree.to_newick()
    tree = Phylo.read(StringIO(newick), 'newick')

    Phylo.draw_graphviz(tree, prog='neato')
    plt.savefig('unconstrained-tree.png', bbox_inches='tight')
    graph = Phylo.to_networkx(tree)
    with open('unconstrained-tree.nwk', 'w') as fp:
        print >> fp, newick,
    nx.write_dot(graph, 'unconstrained-tree.dot')
    plt.show()
Beispiel #2
0
def iterate(n_iters):
    for i in tqdm(xrange(n_iters)):
        sampler.sample()
        likelihoods.append(sampler.tree.marg_log_likelihood())

    plt.figure()
    plt.xlabel("Iterations", fontsize=fontsize)
    plt.ylabel("Data Log Likelihood", fontsize=fontsize)
    plt.plot(likelihoods)
    plt.legend(loc='best', fontsize=12)

    plt.savefig('unconstrained-likelihoods.png', bbox_inches='tight')


    final_tree = sampler.tree.copy()

    plt.figure()
    plot_tree_2d(final_tree, X, pca)

    for node in final_tree.dfs():
        if node.is_leaf():
            node.point = y[node.point]

    plt.figure()
    newick = final_tree.to_newick()
    tree = Phylo.read(StringIO(newick), 'newick')

    Phylo.draw_graphviz(tree, prog='neato')
    plt.savefig('unconstrained-tree.png', bbox_inches='tight')
    graph = Phylo.to_networkx(tree)
    with open('unconstrained-tree.nwk', 'w') as fp:
        print >>fp, newick,
    nx.write_dot(graph, 'unconstrained-tree.dot')
    plt.show()
Beispiel #3
0
    def DrawSimple(self):
        if self.chosenFileName == '':
            self.showOpenFileWindow()

        if self.tree != 0:
            self.tree.root.color = '#808080'
            Phylo.draw_graphviz(self.tree, node_size=2500)
            pylab.show()
Beispiel #4
0
def plot_tree(tree, y):
    final_tree = tree.copy()
    for node in final_tree.dfs():
        if node.is_leaf():
            node.point = y[node.point]

    newick = final_tree.to_newick()
    tree = Phylo.read(StringIO(newick), 'newick')
    Phylo.draw_graphviz(tree, prog='neato')
Beispiel #5
0
def plot_tree(tree, y):
    final_tree = tree.copy()
    for node in final_tree.dfs():
        if node.is_leaf():
            node.point = y[node.point]

    newick = final_tree.to_newick()
    tree = Phylo.read(StringIO(newick), 'newick')
    Phylo.draw_graphviz(tree, prog='neato')
Beispiel #6
0
 def showGraphvizRootedTreeWindow(self):
     if self.chosenFileName == '':
         self.showOpenFileDialog()
     
     if self.tree != 0:
         #self.tree.rooted = True
         self.tree.root.color = '#808080'
         Phylo.draw_graphviz(self.tree, node_size = 2500)
         pylab.show()
Beispiel #7
0
    def showPathWindow(self):
        if self.cpath != '' and self.cpath2 != '':
            self.start = self.tree.find_clades(self.cpath).next()
            self.end = self.tree.find_clades(name=self.cpath2).next()

            for clade in self.tree.trace(self.start, self.end):
                clade.color = 'red'

            for clade in self.tree.find_clades():
                if not (clade in self.tree.trace(self.start, self.end)):
                    clade.color = 'grey'

            self.start.color = 'blue'

            # RYSOWANIE
            Phylo.draw_graphviz(self.tree, node_size=2500)
            pylab.plot(0, 0)

            # DAJE CZYSTY OBRAZ BEZ OSI ** PEWNIE MOZNA PROSCIEJ
            frame1 = pylab.gca()
            for xlabel_i in frame1.axes.get_xticklabels():
                xlabel_i.set_visible(False)
                xlabel_i.set_fontsize(0.0)
            for xlabel_i in frame1.axes.get_yticklabels():
                xlabel_i.set_fontsize(0.0)
                xlabel_i.set_visible(False)
            for tick in frame1.axes.get_xticklines():
                tick.set_visible(False)
            for tick in frame1.axes.get_yticklines():
                tick.set_visible(False)
            pylab.show()

        else:
            print "Nie wybrano punktow"

            # WYSWIETLA INFORMACJE
            img = pylab.imread('img/wally.png', 'rb')
            pylab.imshow(img)
            pylab.plot(0, 0)

            # DAJE CZYSTY OBRAZ BEZ OSI ** PEWNIE MOZNA PROSCIEJ
            frame1 = pylab.gca()
            for xlabel_i in frame1.axes.get_xticklabels():
                xlabel_i.set_visible(False)
                xlabel_i.set_fontsize(0.0)
            for xlabel_i in frame1.axes.get_yticklabels():
                xlabel_i.set_fontsize(0.0)
                xlabel_i.set_visible(False)
            for tick in frame1.axes.get_xticklines():
                tick.set_visible(False)
            for tick in frame1.axes.get_yticklines():
                tick.set_visible(False)

            # SHOWTIME
            pylab.show()
Beispiel #8
0
    def showPathWindow(self):
        if self.cb1 != '' and self.cb2 != '':            
            self.start = self.tree.find_clades(self.cb1).next()
            self.end = self.tree.find_clades(name = self.cb2).next()
            
            for clade in self.tree.trace(self.start, self.end):
                clade.color = 'red'
                
            for clade in self.tree.find_clades():
                if not(clade in self.tree.trace(self.start, self.end)):
                    clade.color = 'grey'
            
            self.start.color = 'blue'

            # RYSOWANIE
            Phylo.draw_graphviz(self.tree, node_size = 2500)
            pylab.plot(0,0)

            # DAJE CZYSTY OBRAZ BEZ OSI ** PEWNIE MOZNA PROSCIEJ
            frame1 = pylab.gca()
            for xlabel_i in frame1.axes.get_xticklabels():
                xlabel_i.set_visible(False)
                xlabel_i.set_fontsize(0.0)
            for xlabel_i in frame1.axes.get_yticklabels():
                xlabel_i.set_fontsize(0.0)
                xlabel_i.set_visible(False)
            for tick in frame1.axes.get_xticklines():
                tick.set_visible(False)
            for tick in frame1.axes.get_yticklines():
                tick.set_visible(False)
            pylab.show()

        else:
            print "Nie wybrano punktow"

            # WYSWIETLA INFORMACJE
            img = pylab.imread('wally.png', 'rb')
            pylab.imshow(img)
            pylab.plot(0,0)

            # DAJE CZYSTY OBRAZ BEZ OSI ** PEWNIE MOZNA PROSCIEJ
            frame1 = pylab.gca()
            for xlabel_i in frame1.axes.get_xticklabels():
                xlabel_i.set_visible(False)
                xlabel_i.set_fontsize(0.0)
            for xlabel_i in frame1.axes.get_yticklabels():
                xlabel_i.set_fontsize(0.0)
                xlabel_i.set_visible(False)
            for tick in frame1.axes.get_xticklines():
                tick.set_visible(False)
            for tick in frame1.axes.get_yticklines():
                tick.set_visible(False)

            # SHOWTIME
            pylab.show()
Beispiel #9
0
def save_utree(tree_path):
    logging.info("Saving unrooted tree.")
    prefix_name = tree_path[:tree_path.rfind('.')]
    unrooted_tree = Phylo.read(tree_path, 'newick')
    unrooted_tree.ladderize()
    Phylo.draw_graphviz(unrooted_tree)
    try:
        plt.savefig(common.UTREE_MASK % prefix_name)
    except:
        logging.error("Error while saving unrooted tree.")
        raise
def main():
    print >>sys.stderr, "Print the result to file"
    if len(sys.argv) != 2:
        print >>sys.stderr, "Using python %s file.tre[nexus]" % sys.argv[0]
        sys.exit(0)
    # -------------------------------------------------
    file = sys.argv[1]
    progN = "twopi"
    # progN = 'neato'
    tree = Phylo.read(file, "nexus")
    Phylo.draw_graphviz(tree, prog=progN)
    file2 = file.replace("tre", "png")
    pylab.savefig(file2)
def main():
    print >> sys.stderr, "Print the result to file"
    if len(sys.argv) != 2:
        print >> sys.stderr, 'Using python %s file.tre[nexus]' % sys.argv[0]
        sys.exit(0)
    #-------------------------------------------------
    file = sys.argv[1]
    progN = 'twopi'
    #progN = 'neato'
    tree = Phylo.read(file, 'nexus')
    Phylo.draw_graphviz(tree, prog=progN)
    file2 = file.replace('tre', 'png')
    pylab.savefig(file2)
Beispiel #12
0
def plot_tree(final_tree, name):
    final_tree = final_tree.copy()
    for node in final_tree.dfs():
        if node.is_leaf():
            node.point = y[node.point]

    final_tree = final_tree.induced_subtree(animal_list)

    plt.figure()
    newick = final_tree.to_newick()
    tree = Phylo.read(StringIO(newick), 'newick')

    Phylo.draw_graphviz(tree, prog='neato')
    plt.savefig("%s.png" % name, dpi=200, bbox_inches='tight')
Beispiel #13
0
def plot_tree(final_tree, name):
    final_tree = final_tree.copy()
    for node in final_tree.dfs():
        if node.is_leaf():
            node.point = y[node.point]

    final_tree = final_tree.induced_subtree(animal_list)

    plt.figure()
    newick = final_tree.to_newick()
    tree = Phylo.read(StringIO(newick), 'newick')

    Phylo.draw_graphviz(tree, prog='neato')
    plt.savefig("%s.png" % name, dpi=200, bbox_inches='tight')
    def display(self, isascii=False):
        """
        Loads the tree from a file. And displays it.
        """
        self.tree = Phylo.read('{}.dnd'.format(self.filename), 'newick')
        
        if isascii:
            Phylo.draw_ascii(self.tree)

        try:
            import pylab
            Phylo.draw_graphviz(self.tree)
            pylab.show()
        except:
            print('Warning: failed to display using graphviz')
            Phylo.draw_ascii(self.tree)
Beispiel #15
0
def newick2img(newick, filepath, branch_length=True, radial=True, width=800):
    '''
		Create image file from given newick file. This method has it's own lock so 
		that many newick's cannot be drawn into same figure and thus corrupt resulting
		image file.
		
		newick			absolute path to newick tree file
		
		filepath		absolute path to svg file to be created
		
		branch_length	boolean, if true shows branch lengths in numeric form
		
		radial			boolean, if true draws radial graph instead of "normal".
		
		width			width of the resulting svg file in pixels. 
	'''
    prog = 'dot' if radial else 'neato'
    nwk = Phylo.read(newick, 'newick')
    Phylo.draw_graphviz(nwk, prog=prog, node_size=500)
    #Phylo.draw(nwk, do_show = False)
    plt.savefig(filepath)
Beispiel #16
0
def newick2img(newick, filepath, branch_length = True, radial = True, width = 800):
	'''
		Create image file from given newick file. This method has it's own lock so 
		that many newick's cannot be drawn into same figure and thus corrupt resulting
		image file.
		
		newick			absolute path to newick tree file
		
		filepath		absolute path to svg file to be created
		
		branch_length	boolean, if true shows branch lengths in numeric form
		
		radial			boolean, if true draws radial graph instead of "normal".
		
		width			width of the resulting svg file in pixels. 
	'''
	prog = 'dot' if radial else 'neato'
	nwk = Phylo.read(newick, 'newick')
	Phylo.draw_graphviz(nwk, prog = prog, node_size = 500)
	#Phylo.draw(nwk, do_show = False)
	plt.savefig(filepath)
def test(n = 10, theta = 1.0, L = 10):
    sim = fsmi.simulator_KingmanFiniteSites(n,theta,L)
    myTree = sim_to_tree(sim)

    # str_newick_test = '(A,(B,C)D);'
    # dpTree_test = dp.Tree.get(data = str_newick_test, schema = 'newick')
    # dpTree_test.print_plot()

    str_newick_sim = myTree.str_newick(True)
    print str_newick_sim
    dpTree_sim = dp.Tree.get(data = str_newick_sim, schema = 'newick')
    dpTree_sim.print_plot()

    phyloTree = Phylo.read(StringIO(str_newick_sim),'newick')
    print phyloTree

    plt.figure()
    Phylo.draw(phyloTree)
    phyloTree.rooted = True
    plt.figure()
    Phylo.draw_graphviz(phyloTree, prog = 'dot')
    plt.draw()
def main(args):
    # Will need to make format an option here if we ever need to
    tree = Phylo.read(args.tree, args.tree_format)
    meta = csv.DictReader(args.metadata)
    color_map = color_mapping(meta, args.color_by, args.palette)
    legend = color_map['by_group']
    sequence_mapping = color_map['by_sequence']

    # Write out the tree
    tree = apply_color_mapping(tree, sequence_mapping)
    if args.image_out:
        Phylo.draw_graphviz(tree)
        pylab.show()
        pylab.savefig(args.out)
    else:
        Phylo.write(tree, args.out, 'phyloxml')

    # Write out our legend
    leg_handle = open(args.out + '.legend', 'w')
    write_color_legend(legend, leg_handle, args.color_by)

    # close up shop
    leg_handle.close()
    args.metadata.close()
def main(args):
    # Will need to make format an option here if we ever need to
    tree = Phylo.read(args.tree, args.tree_format)
    meta = csv.DictReader(args.metadata)
    color_map = color_mapping(meta, args.color_by, args.palette)
    legend = color_map['by_group']
    sequence_mapping = color_map['by_sequence']

    # Write out the tree
    tree = apply_color_mapping(tree, sequence_mapping)
    if args.image_out:
        Phylo.draw_graphviz(tree)
        pylab.show()
        pylab.savefig(args.out)
    else:
        Phylo.write(tree, args.out, 'phyloxml')

    # Write out our legend
    leg_handle = open(args.out + '.legend', 'w')
    write_color_legend(legend, leg_handle, args.color_by)

    # close up shop
    leg_handle.close()
    args.metadata.close()
Beispiel #20
0
import matplotlib

matplotlib.use('GTKAgg')

from Bio import Phylo
import pylab

tree = Phylo.read('apaf.xml', 'phyloxml')

Phylo.draw_graphviz(tree)
pylab.show()
Beispiel #21
0
        def out():

            records = SeqIO.parse("%s" % e1.get(), "fasta")

            lens = []
            lens2 = []
            file = open("phylo.phy", 'w')
            for record in records:
                ids = record.id
                sequence = record.seq[0:100]
                lens.append(record.id)
                lens2.append(record.seq)
                line = "%s   %s" % (ids, sequence)
                print(line)

            lengthmax = len(max(lens, key=len))

            lengthmin = len(min(lens, key=len))

            file.write("   %s     100\n" % len(lens))

            for i, item in enumerate(lens):
                start = i - 1
                end = i - 1
                seq = lens2[end]

                if len(item) == int(lengthmax):
                    if i < 10:
                        ids = "%s%s%s" % (i, "-", item + "-")
                        ids = ids
                        ids = ids.replace(".", "")
                        ids = ids.replace("_", "")
                        print("1")
                    else:
                        ids = "%s%s%s" % (i, "-", item)
                        ids = ids
                        ids = ids.replace(".", "")
                        ids = ids.replace("_", "")
                        print("1")

                elif len(item) < int(lengthmax):
                    ids = "%s%s%s" % (i, "-", item)
                    add = int(lengthmax) - int(len(item))
                    ids = ids + (add * "-") + "-"
                    ids = ids
                    ids = ids.replace(".", "")
                    ids = ids.replace("_", "")
                    print("2")

                line = "%s          %s\n" % (ids.replace(".", ""), seq[0:100])
                print(line)
                file.write(line)
            file.close()

            # Read the sequences and align
            aln = AlignIO.read('phylo.phy', 'phylip')

            # Print the alignment
            print(aln)

            # Calculate the distance matrix
            calculator = DistanceCalculator('identity')
            dm = calculator.get_distance(aln)

            # Print the distance Matrix
            print('\nDistance Matrix\n===================')
            print(dm)

            # Construct the phylogenetic tree using UPGMA algorithm
            constructor = DistanceTreeConstructor()
            tree = constructor.upgma(dm)

            Phylo.write(tree, 'apaf.xml', 'phyloxml')
            tree = Phylo.read('apaf.xml', 'phyloxml')
            Phylo.draw_graphviz(tree)
            pylab.show()

            win.destroy()
Beispiel #22
0
def proc():
    tree = phylo.read('agi-tree.xml', 'phyloxml')
    phylo.draw_graphviz(tree)
    pylab.show()
def Arvore2():    
    tree = Phylo.read("backs\clustalw.dnd", "newick")
    tree.rooted = True
    tree = tree.as_phyloxml()
    Phylo.draw_graphviz(tree, fontsize='6')
    pylab.savefig(os.path.join(outpath,'%s.sequences.png'%model))
Beispiel #24
0
            Clade(branch_length=0.102, name='A')
            Clade(branch_length=0.23, name='B')
        Clade(branch_length=0.4, name='C')
...
<img src="Phylo-draw-apaf1.png" title="fig:Rooted phylogram, via Phylo.draw" alt="Rooted phylogram, via Phylo.draw" width="500" />

tree = Phylo.read('apaf.xml', 'phyloxml')
tree.ladderize()   # Flip branches so deeper clades are displayed at top
Phylo.draw(tree)

<img src="Phylo-apaf.png" title="Unrooted tree with colored nodes" alt="Unrooted tree with colored nodes" width="500" />

#for basic dendrogram
import pylab
tree = Phylo.read('apaf.xml', 'phyloxml')
Phylo.draw_graphviz(tree)
pylab.show()

#simple tree wiht defined branch lengths
tree = Phylo.parse('phyloxml_examples.xml', 'phyloxml').next()
Phylo.draw_ascii(tree)

apaf = Phylo.read('apaf.xml', 'phyloxml')
Phylo.draw_ascii(apaf)

#import networkx, pylab
#tree = Phylo.read('example.xml', 'phyloxml')
#net = Phylo.to_networkx(tree)
#networkx.draw(net)
#pylab.show()
Beispiel #25
0
    def out():
        records = SeqIO.parse(e1.get(), "fasta")

        lens = []

        for record in records:
            print(record.seq)
            ids = record.id
            sequence = record.seq
            op = lens.append(record.id)
            # print(lens)

        try:
            lengthmax = len(max(lens, key=len))
            lengthmin = len(min(lens, key=len))
        except:
            lengthmax = "0"
        line = "  %s             %s\n" % (len(lens), "125")
        file = open("phylo.phy", "w")
        file.write(line)

        for i, id in enumerate(lens):
            if lengthmin < int(lengthmax):
                add = int(lengthmax) - int(len(id))
                # print(i)
                id = id + (add * "-")
                id = id.replace(".", "")
                id = id.replace("_", "")
                to_be_write = "%s%s%s    %s" % (i, "-", id, sequence[0:100])
                # file.write("  %s                    %s\n"%(num_rec,seqlen))
                file.writelines(str("%s\n" % to_be_write))
                print(id)


            else:
                add = int(lengthmax) - int(len(id))
                id = id + (add * "-")
                id = id.replace(".", "")
                id = id.replace("_", "")
                to_be_write = "%s    %s" % (id, sequence[0:100])
                # file.write("  %s                    %s\n"%(num_rec,seqlen))

                file.writelines(str("%s\n" % (to_be_write)))
                print(id)

        # Read the sequences and align
        aln = AlignIO.read('phylo.phy', 'phylip')

        # Print the alignment
        print(aln)

        # Calculate the distance matrix
        calculator = DistanceCalculator('identity')
        dm = calculator.get_distance(aln)

        # Print the distance Matrix
        print('\nDistance Matrix\n===================')
        print(dm)

        # Construct the phylogenetic tree using UPGMA algorithm
        constructor = DistanceTreeConstructor()
        tree = constructor.upgma(dm)

        Phylo.write(tree, 'apaf.xml', 'phyloxml')
        tree = Phylo.read('apaf.xml', 'phyloxml')
        Phylo.draw_graphviz(tree)
        pylab.show()

        win.destroy()
Beispiel #26
0
plt.figure()
plt.xlim([0, n_iters + constraint_add])
plt.xlabel("Iterations", fontsize=fontsize)
plt.ylabel("Data Log Likelihood", fontsize=fontsize)
plt.plot(likelihoods)
plt.legend(loc="best", fontsize=12)

plt.savefig("online-likelihoods.png", bbox_inches="tight")


final_tree = sampler.tree.copy()

plt.figure()
plot_tree_2d(final_tree, X, pca)

for node in final_tree.dfs():
    if node.is_leaf():
        node.point = y[node.point]

newick = final_tree.to_newick()
tree = Phylo.read(StringIO(newick), "newick")

plt.figure()
Phylo.draw_graphviz(tree, prog="neato")
plt.savefig("tree.png", bbox_inches="tight")
graph = Phylo.to_networkx(tree)
with open("tree.nwk", "w") as fp:
    print >> fp, newick,
nx.write_dot(graph, "tree.dot")
plt.show()
Beispiel #27
0
plt.figure()
plt.xlim([0, n_iters + constraint_add])
plt.xlabel("Iterations", fontsize=fontsize)
plt.ylabel("Data Log Likelihood", fontsize=fontsize)
plt.plot(likelihoods)
plt.legend(loc='best', fontsize=12)

plt.savefig('online-likelihoods.png', bbox_inches='tight')

final_tree = sampler.tree.copy()

plt.figure()
plot_tree_2d(final_tree, X, pca)

for node in final_tree.dfs():
    if node.is_leaf():
        node.point = y[node.point]

newick = final_tree.to_newick()
tree = Phylo.read(StringIO(newick), 'newick')

plt.figure()
Phylo.draw_graphviz(tree, prog='neato')
plt.savefig('tree.png', bbox_inches='tight')
graph = Phylo.to_networkx(tree)
with open('tree.nwk', 'w') as fp:
    print >> fp, newick,
nx.write_dot(graph, 'tree.dot')
plt.show()
Beispiel #28
0
def processing(raw_fasta_path, out_dir_path):
    if not os.path.exists(out_dir_path):
        logging.info("Making directory {0}".format(out_dir_path))
        os.makedirs(out_dir_path)

    deduplicated_fasta = remove_duplicates(SeqIO.parse(raw_fasta_path,
                                                       "fasta"))
    base = os.path.basename(raw_fasta_path)
    fasta_path = os.path.join(out_dir_path, base)

    logging.info("Writing FASTA in {0}".format(fasta_path))
    SeqIO.write(deduplicated_fasta, fasta_path, "fasta")

    # Multiple sequence alignment
    cline = ClustalwCommandline("clustalw2", infile=fasta_path)
    stdout, stderr = cline()
    logging.info(cline)

    clustalw_result_path = fasta_path.replace(".fasta", ".aln")

    alignment_dict = SeqIO.to_dict(
        AlignIO.read(clustalw_result_path, "clustal"))

    # writing alignment table in .txt
    with open(os.path.join(out_dir_path, "alignment.txt"), "w") as fout:
        fout.write("\n".join(
            str(record.seq) for record in alignment_dict.itervalues()))

    # alignment tree drawing
    tree_path = fasta_path.replace(".fasta", ".dnd")
    tree = Phylo.read(tree_path, "newick")
    tree.ladderize()

    # with labels
    Phylo.draw_graphviz(tree, label_func=lambda x: x.name.replace("ID=", ""))
    plt.savefig(os.path.join(
        out_dir_path, "figure_with_labels.pdf"))  # need pygraphviz, pylab

    # Clustering
    ids = dict(enumerate(alignment_dict.keys()))
    distance_matrix = np.zeros([len(ids)] * 2)
    for i, j in itertools.combinations(xrange(len(ids)), r=2):
        distance_matrix[i][j] = distance_matrix[j][i] = \
            distance(alignment_dict[ids[i]], alignment_dict[ids[j]])

    # Compute and plot dendrogram
    fig = plt.figure()
    axdendro = fig.add_axes([0.09, 0.1, 0.2, 0.8])
    Y = linkage(distance_matrix, method="centroid")
    cutoff = 0.5 * max(Y[:, 2])
    clusters = fcluster(Y, cutoff, "distance")
    Z = dendrogram(Y, orientation="right", color_threshold=cutoff)
    axdendro.set_yticks([])

    # Plot distance matrix
    axmatrix = fig.add_axes([0.3, 0.1, 0.6, 0.8])
    index = Z["leaves"]
    distance_matrix = distance_matrix[index, :]
    distance_matrix = distance_matrix[:, index]
    im = axmatrix.matshow(distance_matrix, aspect="auto", origin="lower")
    axmatrix.set_xticks([])
    axmatrix.set_yticks([])

    # Plot colorbar
    axcolor = fig.add_axes([0.91, 0.1, 0.02, 0.8])
    plt.colorbar(im, cax=axcolor)

    # Display and save figure
    dendogram_path = os.path.join(out_dir_path, "dendogram.png")
    fig.savefig(dendogram_path)

    fasta_clusters = defaultdict(list)
    for i, cluster in enumerate(clusters):
        fasta_id = ids[i]
        fasta_clusters[cluster].append(alignment_dict[fasta_id])

    # Saving information about clusters
    clusters_dir_path = os.path.join(out_dir_path, "clusters")
    if not os.path.exists(clusters_dir_path):
        os.makedirs(clusters_dir_path)
    clusters_meta_path = os.path.join(clusters_dir_path, "clusters_meta.txt")
    meta_file = open(clusters_meta_path, "w")
    for cluster_id, cluster in fasta_clusters.iteritems():
        cluster_path = os.path.join(clusters_dir_path,
                                    "cluster_{0}.fasta".format(cluster_id))
        SeqIO.write(cluster, cluster_path, "fasta")
        summary_align = AlignInfo.SummaryInfo(MultipleSeqAlignment(cluster))
        consensus = summary_align.dumb_consensus()
        pssm = summary_align.pos_specific_score_matrix(consensus,
                                                       chars_to_ignore=['-'])
        frequencies = dict.fromkeys(IUPAC.protein.letters, 0)
        frequencies.update(
            (key, len(list(group)))
            for key, group in itertools.groupby(sorted(consensus)))
        frequencies.pop("X")

        meta_file.write("""Cluster ID: {0}
Cluster size: {1}
Consensus:
{2}

PSSM:
{3}
Frequencies in consensus:
{4}


""".format(cluster_id, len(cluster), textwrap.fill(str(consensus)), pssm,
           pprint.pformat(frequencies)))

        fig = plt.figure()
        pos = np.arange(len(IUPAC.protein.letters))
        width = .5  # gives histogram aspect to the bar diagram

        ax = plt.axes()
        ax.set_xticks(pos + (width / 2))
        ax.set_xticklabels(IUPAC.protein.letters)

        plt.bar(pos, [frequencies[letter] for letter in IUPAC.protein.letters],
                width,
                color='r')
        frequencies_path = os.path.join(
            clusters_dir_path, "frequencies_{0}.png".format(cluster_id))
        fig.savefig(frequencies_path)
Beispiel #29
0
def compute_tree(options, mat, names):
    """ make upgma hierarchical clustering and write it as png and
    graphviz dot
    """
    # oops, convert to biopython matrix
    matrix = []
    for i in xrange(len(names)):
        row = []
        for j in xrange(i + 1):
            # tree constructor writes 0-distances as 1s for some reason
            # so we hack around here
            val = float(mat[names[i]][names[j]])
            if val == 0.:
                val = 1e-10
            elif val == 1.:
                val = 1.1
            row.append(val)
        matrix.append(row)
    dm = _DistanceMatrix(names, matrix)

    # upgma tree
    constructor = DistanceTreeConstructor()
    tree = constructor.upgma(dm)
    robust_makedirs(os.path.dirname(tree_path(options)))
    Phylo.write(tree, tree_path(options), "newick")

    # png tree -- note : doesn't work in toil
    def f(x):
        if "Inner" in str(x):
            return ""
        else:
            return x
    Phylo.draw_graphviz(tree, label_func = f, node_size=1000, node_shape="s", font_size=10)
    pylab.savefig(tree_path(options).replace("newick", "png"))

    # graphviz
    # get networkx graph
    nxgraph = Phylo.to_networkx(tree)
    # make undirected
    nxgraph = nx.Graph(nxgraph)
    # push names to name labels
    nxgraph = nx.convert_node_labels_to_integers(nxgraph, label_attribute="label")
    for node_id in nxgraph.nodes():
        node = nxgraph.node[node_id]
        if "Inner" in str(node["label"]):
            node["label"] = "\"\""
            node["width"] = 0.001
            node["height"] = 0.001
        else:
            node["fontsize"] = 18
    for edge_id in nxgraph.edges():
        edge = nxgraph.edge[edge_id[0]][edge_id[1]]
        # in graphviz, weight means something else, so make it a label
        weight = float(edge["weight"])
        # undo hack from above
        if weight > 1:
            weight = 1.
        if weight <= 1e-10 or weight == 1.:
            weight = 0.
        edge["weight"] = None
        edge["label"] = "{0:.3g}".format(float(weight) * 100.)
        edge["fontsize"] = 14
        edge["len"] = draw_len(weight)
    nx.write_dot(nxgraph, tree_path(options).replace("newick", "dot"))
def save_tree(tree, filename):
    Phylo.draw_graphviz(tree, prog='dot')  # Draw the tree
    plt.title(filename)  # set Title for figure
    plt.savefig('./Output/images/' + filename + '.png', dpi=100)  # Save tree in an image
    plt.close()  # Close the figure
Beispiel #31
0
plt.legend(loc='best', fontsize=12)
plt.savefig('offline-scores.png', bbox_inches='tight')

plt.figure()
plt.xlim([0, n_iters])
# plt.ylim(ymin=-400)
plt.xlabel("Iterations", fontsize=fontsize)
plt.ylabel("Data Log Likelihood", fontsize=fontsize)
for name, likelihood in likelihoods.items():
    plt.plot(likelihood, label=name)
plt.legend(loc='best', fontsize=12)
plt.savefig('offline-likelihoods.png', bbox_inches='tight')

for type, model in models.items():
    final_tree = model.copy()

    plt.figure()
    plot_tree_2d(final_tree, X, pca)

    for node in final_tree.dfs():
        if node.is_leaf():
            node.point = y[node.point]

    newick = final_tree.to_newick()
    tree = Phylo.read(StringIO(newick), 'newick')

    Phylo.draw_graphviz(tree, prog='neato')
    plt.savefig('tree-%s.png' % type, bbox_inches='tight')
plt.show()

def processing(raw_fasta_path, out_dir_path):
    if not os.path.exists(out_dir_path):
        logging.info("Making directory {0}".format(out_dir_path))
        os.makedirs(out_dir_path)

    deduplicated_fasta = remove_duplicates(
        SeqIO.parse(raw_fasta_path, "fasta"))
    base = os.path.basename(raw_fasta_path)
    fasta_path = os.path.join(out_dir_path, base)

    logging.info("Writing FASTA in {0}".format(fasta_path))
    SeqIO.write(deduplicated_fasta, fasta_path, "fasta")

    # Multiple sequence alignment
    cline = ClustalwCommandline("clustalw2", infile=fasta_path)
    stdout, stderr = cline()
    logging.info(cline)

    clustalw_result_path = fasta_path.replace(".fasta", ".aln")

    alignment_dict = SeqIO.to_dict(
        AlignIO.read(clustalw_result_path, "clustal"))

    # writing alignment table in .txt
    with open(os.path.join(out_dir_path, "alignment.txt"), "w") as fout:
        fout.write(
            "\n".join(
                str(record.seq) for record in alignment_dict.itervalues()))

    # alignment tree drawing
    tree_path = fasta_path.replace(".fasta", ".dnd")
    tree = Phylo.read(tree_path, "newick")
    tree.ladderize()

    # with labels
    Phylo.draw_graphviz(tree, label_func=lambda x: x.name.replace("ID=", ""))
    plt.savefig(os.path.join(out_dir_path,
                               "figure_with_labels.pdf"))  # need pygraphviz, pylab

    # Clustering
    ids = dict(enumerate(alignment_dict.keys()))
    distance_matrix = np.zeros([len(ids)] * 2)
    for i, j in itertools.combinations(xrange(len(ids)), r=2):
        distance_matrix[i][j] = distance_matrix[j][i] = \
            distance(alignment_dict[ids[i]], alignment_dict[ids[j]])

    # Compute and plot dendrogram
    fig = plt.figure()
    axdendro = fig.add_axes([0.09, 0.1, 0.2, 0.8])
    Y = linkage(distance_matrix, method="centroid")
    cutoff = 0.5 * max(Y[:, 2])
    clusters = fcluster(Y, cutoff, "distance")
    Z = dendrogram(Y, orientation="right", color_threshold=cutoff)
    axdendro.set_yticks([])

    # Plot distance matrix
    axmatrix = fig.add_axes([0.3, 0.1, 0.6, 0.8])
    index = Z["leaves"]
    distance_matrix = distance_matrix[index, :]
    distance_matrix = distance_matrix[:, index]
    im = axmatrix.matshow(distance_matrix, aspect="auto", origin="lower")
    axmatrix.set_xticks([])
    axmatrix.set_yticks([])

    # Plot colorbar
    axcolor = fig.add_axes([0.91, 0.1, 0.02, 0.8])
    plt.colorbar(im, cax=axcolor)

    # Display and save figure
    dendogram_path = os.path.join(out_dir_path, "dendogram.png")
    fig.savefig(dendogram_path)

    fasta_clusters = defaultdict(list)
    for i, cluster in enumerate(clusters):
        fasta_id = ids[i]
        fasta_clusters[cluster].append(alignment_dict[fasta_id])

    # Saving information about clusters
    clusters_dir_path = os.path.join(out_dir_path, "clusters")
    if not os.path.exists(clusters_dir_path):
        os.makedirs(clusters_dir_path)
    clusters_meta_path = os.path.join(clusters_dir_path, "clusters_meta.txt")
    meta_file = open(clusters_meta_path, "w")
    for cluster_id, cluster in fasta_clusters.iteritems():
        cluster_path = os.path.join(clusters_dir_path,
                                    "cluster_{0}.fasta".format(cluster_id))
        SeqIO.write(cluster, cluster_path, "fasta")
        summary_align = AlignInfo.SummaryInfo(MultipleSeqAlignment(cluster))
        consensus = summary_align.dumb_consensus()
        pssm = summary_align.pos_specific_score_matrix(consensus,
                                                       chars_to_ignore=['-'])
        frequencies = dict.fromkeys(IUPAC.protein.letters, 0)
        frequencies.update(
            (key, len(list(group)))
            for key, group in itertools.groupby(sorted(consensus)))
        frequencies.pop("X")

        meta_file.write("""Cluster ID: {0}
Cluster size: {1}
Consensus:
{2}

PSSM:
{3}
Frequencies in consensus:
{4}


""".format(cluster_id, len(cluster), textwrap.fill(str(consensus)), pssm,
           pprint.pformat(frequencies)))

        fig = plt.figure()
        pos = np.arange(len(IUPAC.protein.letters))
        width = .5     # gives histogram aspect to the bar diagram

        ax = plt.axes()
        ax.set_xticks(pos + (width / 2))
        ax.set_xticklabels(IUPAC.protein.letters)

        plt.bar(pos,
                [frequencies[letter] for letter in IUPAC.protein.letters],
                width, color='r')
        frequencies_path = os.path.join(
            clusters_dir_path, "frequencies_{0}.png".format(cluster_id))
        fig.savefig(frequencies_path)
Beispiel #33
0
from Bio import Phylo
# from pygraphviz import *
# from networkx.drawing import nx_agraph

tree = Phylo.read("Plotly/small.newick", 'newick')

Phylo.draw_graphviz(tree, prog='dot')