def iterate(n_iters): for i in tqdm(xrange(n_iters)): sampler.sample() likelihoods.append(sampler.tree.marg_log_likelihood()) plt.figure() plt.xlabel("Iterations", fontsize=fontsize) plt.ylabel("Data Log Likelihood", fontsize=fontsize) plt.plot(likelihoods) plt.legend(loc='best', fontsize=12) plt.savefig('unconstrained-likelihoods.png', bbox_inches='tight') final_tree = sampler.tree.copy() plt.figure() plot_tree_2d(final_tree, X, pca) for node in final_tree.dfs(): if node.is_leaf(): node.point = y[node.point] plt.figure() newick = final_tree.to_newick() tree = Phylo.read(StringIO(newick), 'newick') Phylo.draw_graphviz(tree, prog='neato') plt.savefig('unconstrained-tree.png', bbox_inches='tight') graph = Phylo.to_networkx(tree) with open('unconstrained-tree.nwk', 'w') as fp: print >> fp, newick, nx.write_dot(graph, 'unconstrained-tree.dot') plt.show()
def iterate(n_iters): for i in tqdm(xrange(n_iters)): sampler.sample() likelihoods.append(sampler.tree.marg_log_likelihood()) plt.figure() plt.xlabel("Iterations", fontsize=fontsize) plt.ylabel("Data Log Likelihood", fontsize=fontsize) plt.plot(likelihoods) plt.legend(loc='best', fontsize=12) plt.savefig('unconstrained-likelihoods.png', bbox_inches='tight') final_tree = sampler.tree.copy() plt.figure() plot_tree_2d(final_tree, X, pca) for node in final_tree.dfs(): if node.is_leaf(): node.point = y[node.point] plt.figure() newick = final_tree.to_newick() tree = Phylo.read(StringIO(newick), 'newick') Phylo.draw_graphviz(tree, prog='neato') plt.savefig('unconstrained-tree.png', bbox_inches='tight') graph = Phylo.to_networkx(tree) with open('unconstrained-tree.nwk', 'w') as fp: print >>fp, newick, nx.write_dot(graph, 'unconstrained-tree.dot') plt.show()
def DrawSimple(self): if self.chosenFileName == '': self.showOpenFileWindow() if self.tree != 0: self.tree.root.color = '#808080' Phylo.draw_graphviz(self.tree, node_size=2500) pylab.show()
def plot_tree(tree, y): final_tree = tree.copy() for node in final_tree.dfs(): if node.is_leaf(): node.point = y[node.point] newick = final_tree.to_newick() tree = Phylo.read(StringIO(newick), 'newick') Phylo.draw_graphviz(tree, prog='neato')
def showGraphvizRootedTreeWindow(self): if self.chosenFileName == '': self.showOpenFileDialog() if self.tree != 0: #self.tree.rooted = True self.tree.root.color = '#808080' Phylo.draw_graphviz(self.tree, node_size = 2500) pylab.show()
def showPathWindow(self): if self.cpath != '' and self.cpath2 != '': self.start = self.tree.find_clades(self.cpath).next() self.end = self.tree.find_clades(name=self.cpath2).next() for clade in self.tree.trace(self.start, self.end): clade.color = 'red' for clade in self.tree.find_clades(): if not (clade in self.tree.trace(self.start, self.end)): clade.color = 'grey' self.start.color = 'blue' # RYSOWANIE Phylo.draw_graphviz(self.tree, node_size=2500) pylab.plot(0, 0) # DAJE CZYSTY OBRAZ BEZ OSI ** PEWNIE MOZNA PROSCIEJ frame1 = pylab.gca() for xlabel_i in frame1.axes.get_xticklabels(): xlabel_i.set_visible(False) xlabel_i.set_fontsize(0.0) for xlabel_i in frame1.axes.get_yticklabels(): xlabel_i.set_fontsize(0.0) xlabel_i.set_visible(False) for tick in frame1.axes.get_xticklines(): tick.set_visible(False) for tick in frame1.axes.get_yticklines(): tick.set_visible(False) pylab.show() else: print "Nie wybrano punktow" # WYSWIETLA INFORMACJE img = pylab.imread('img/wally.png', 'rb') pylab.imshow(img) pylab.plot(0, 0) # DAJE CZYSTY OBRAZ BEZ OSI ** PEWNIE MOZNA PROSCIEJ frame1 = pylab.gca() for xlabel_i in frame1.axes.get_xticklabels(): xlabel_i.set_visible(False) xlabel_i.set_fontsize(0.0) for xlabel_i in frame1.axes.get_yticklabels(): xlabel_i.set_fontsize(0.0) xlabel_i.set_visible(False) for tick in frame1.axes.get_xticklines(): tick.set_visible(False) for tick in frame1.axes.get_yticklines(): tick.set_visible(False) # SHOWTIME pylab.show()
def showPathWindow(self): if self.cb1 != '' and self.cb2 != '': self.start = self.tree.find_clades(self.cb1).next() self.end = self.tree.find_clades(name = self.cb2).next() for clade in self.tree.trace(self.start, self.end): clade.color = 'red' for clade in self.tree.find_clades(): if not(clade in self.tree.trace(self.start, self.end)): clade.color = 'grey' self.start.color = 'blue' # RYSOWANIE Phylo.draw_graphviz(self.tree, node_size = 2500) pylab.plot(0,0) # DAJE CZYSTY OBRAZ BEZ OSI ** PEWNIE MOZNA PROSCIEJ frame1 = pylab.gca() for xlabel_i in frame1.axes.get_xticklabels(): xlabel_i.set_visible(False) xlabel_i.set_fontsize(0.0) for xlabel_i in frame1.axes.get_yticklabels(): xlabel_i.set_fontsize(0.0) xlabel_i.set_visible(False) for tick in frame1.axes.get_xticklines(): tick.set_visible(False) for tick in frame1.axes.get_yticklines(): tick.set_visible(False) pylab.show() else: print "Nie wybrano punktow" # WYSWIETLA INFORMACJE img = pylab.imread('wally.png', 'rb') pylab.imshow(img) pylab.plot(0,0) # DAJE CZYSTY OBRAZ BEZ OSI ** PEWNIE MOZNA PROSCIEJ frame1 = pylab.gca() for xlabel_i in frame1.axes.get_xticklabels(): xlabel_i.set_visible(False) xlabel_i.set_fontsize(0.0) for xlabel_i in frame1.axes.get_yticklabels(): xlabel_i.set_fontsize(0.0) xlabel_i.set_visible(False) for tick in frame1.axes.get_xticklines(): tick.set_visible(False) for tick in frame1.axes.get_yticklines(): tick.set_visible(False) # SHOWTIME pylab.show()
def save_utree(tree_path): logging.info("Saving unrooted tree.") prefix_name = tree_path[:tree_path.rfind('.')] unrooted_tree = Phylo.read(tree_path, 'newick') unrooted_tree.ladderize() Phylo.draw_graphviz(unrooted_tree) try: plt.savefig(common.UTREE_MASK % prefix_name) except: logging.error("Error while saving unrooted tree.") raise
def main(): print >>sys.stderr, "Print the result to file" if len(sys.argv) != 2: print >>sys.stderr, "Using python %s file.tre[nexus]" % sys.argv[0] sys.exit(0) # ------------------------------------------------- file = sys.argv[1] progN = "twopi" # progN = 'neato' tree = Phylo.read(file, "nexus") Phylo.draw_graphviz(tree, prog=progN) file2 = file.replace("tre", "png") pylab.savefig(file2)
def main(): print >> sys.stderr, "Print the result to file" if len(sys.argv) != 2: print >> sys.stderr, 'Using python %s file.tre[nexus]' % sys.argv[0] sys.exit(0) #------------------------------------------------- file = sys.argv[1] progN = 'twopi' #progN = 'neato' tree = Phylo.read(file, 'nexus') Phylo.draw_graphviz(tree, prog=progN) file2 = file.replace('tre', 'png') pylab.savefig(file2)
def plot_tree(final_tree, name): final_tree = final_tree.copy() for node in final_tree.dfs(): if node.is_leaf(): node.point = y[node.point] final_tree = final_tree.induced_subtree(animal_list) plt.figure() newick = final_tree.to_newick() tree = Phylo.read(StringIO(newick), 'newick') Phylo.draw_graphviz(tree, prog='neato') plt.savefig("%s.png" % name, dpi=200, bbox_inches='tight')
def display(self, isascii=False): """ Loads the tree from a file. And displays it. """ self.tree = Phylo.read('{}.dnd'.format(self.filename), 'newick') if isascii: Phylo.draw_ascii(self.tree) try: import pylab Phylo.draw_graphviz(self.tree) pylab.show() except: print('Warning: failed to display using graphviz') Phylo.draw_ascii(self.tree)
def newick2img(newick, filepath, branch_length=True, radial=True, width=800): ''' Create image file from given newick file. This method has it's own lock so that many newick's cannot be drawn into same figure and thus corrupt resulting image file. newick absolute path to newick tree file filepath absolute path to svg file to be created branch_length boolean, if true shows branch lengths in numeric form radial boolean, if true draws radial graph instead of "normal". width width of the resulting svg file in pixels. ''' prog = 'dot' if radial else 'neato' nwk = Phylo.read(newick, 'newick') Phylo.draw_graphviz(nwk, prog=prog, node_size=500) #Phylo.draw(nwk, do_show = False) plt.savefig(filepath)
def newick2img(newick, filepath, branch_length = True, radial = True, width = 800): ''' Create image file from given newick file. This method has it's own lock so that many newick's cannot be drawn into same figure and thus corrupt resulting image file. newick absolute path to newick tree file filepath absolute path to svg file to be created branch_length boolean, if true shows branch lengths in numeric form radial boolean, if true draws radial graph instead of "normal". width width of the resulting svg file in pixels. ''' prog = 'dot' if radial else 'neato' nwk = Phylo.read(newick, 'newick') Phylo.draw_graphviz(nwk, prog = prog, node_size = 500) #Phylo.draw(nwk, do_show = False) plt.savefig(filepath)
def test(n = 10, theta = 1.0, L = 10): sim = fsmi.simulator_KingmanFiniteSites(n,theta,L) myTree = sim_to_tree(sim) # str_newick_test = '(A,(B,C)D);' # dpTree_test = dp.Tree.get(data = str_newick_test, schema = 'newick') # dpTree_test.print_plot() str_newick_sim = myTree.str_newick(True) print str_newick_sim dpTree_sim = dp.Tree.get(data = str_newick_sim, schema = 'newick') dpTree_sim.print_plot() phyloTree = Phylo.read(StringIO(str_newick_sim),'newick') print phyloTree plt.figure() Phylo.draw(phyloTree) phyloTree.rooted = True plt.figure() Phylo.draw_graphviz(phyloTree, prog = 'dot') plt.draw()
def main(args): # Will need to make format an option here if we ever need to tree = Phylo.read(args.tree, args.tree_format) meta = csv.DictReader(args.metadata) color_map = color_mapping(meta, args.color_by, args.palette) legend = color_map['by_group'] sequence_mapping = color_map['by_sequence'] # Write out the tree tree = apply_color_mapping(tree, sequence_mapping) if args.image_out: Phylo.draw_graphviz(tree) pylab.show() pylab.savefig(args.out) else: Phylo.write(tree, args.out, 'phyloxml') # Write out our legend leg_handle = open(args.out + '.legend', 'w') write_color_legend(legend, leg_handle, args.color_by) # close up shop leg_handle.close() args.metadata.close()
import matplotlib matplotlib.use('GTKAgg') from Bio import Phylo import pylab tree = Phylo.read('apaf.xml', 'phyloxml') Phylo.draw_graphviz(tree) pylab.show()
def out(): records = SeqIO.parse("%s" % e1.get(), "fasta") lens = [] lens2 = [] file = open("phylo.phy", 'w') for record in records: ids = record.id sequence = record.seq[0:100] lens.append(record.id) lens2.append(record.seq) line = "%s %s" % (ids, sequence) print(line) lengthmax = len(max(lens, key=len)) lengthmin = len(min(lens, key=len)) file.write(" %s 100\n" % len(lens)) for i, item in enumerate(lens): start = i - 1 end = i - 1 seq = lens2[end] if len(item) == int(lengthmax): if i < 10: ids = "%s%s%s" % (i, "-", item + "-") ids = ids ids = ids.replace(".", "") ids = ids.replace("_", "") print("1") else: ids = "%s%s%s" % (i, "-", item) ids = ids ids = ids.replace(".", "") ids = ids.replace("_", "") print("1") elif len(item) < int(lengthmax): ids = "%s%s%s" % (i, "-", item) add = int(lengthmax) - int(len(item)) ids = ids + (add * "-") + "-" ids = ids ids = ids.replace(".", "") ids = ids.replace("_", "") print("2") line = "%s %s\n" % (ids.replace(".", ""), seq[0:100]) print(line) file.write(line) file.close() # Read the sequences and align aln = AlignIO.read('phylo.phy', 'phylip') # Print the alignment print(aln) # Calculate the distance matrix calculator = DistanceCalculator('identity') dm = calculator.get_distance(aln) # Print the distance Matrix print('\nDistance Matrix\n===================') print(dm) # Construct the phylogenetic tree using UPGMA algorithm constructor = DistanceTreeConstructor() tree = constructor.upgma(dm) Phylo.write(tree, 'apaf.xml', 'phyloxml') tree = Phylo.read('apaf.xml', 'phyloxml') Phylo.draw_graphviz(tree) pylab.show() win.destroy()
def proc(): tree = phylo.read('agi-tree.xml', 'phyloxml') phylo.draw_graphviz(tree) pylab.show()
def Arvore2(): tree = Phylo.read("backs\clustalw.dnd", "newick") tree.rooted = True tree = tree.as_phyloxml() Phylo.draw_graphviz(tree, fontsize='6') pylab.savefig(os.path.join(outpath,'%s.sequences.png'%model))
Clade(branch_length=0.102, name='A') Clade(branch_length=0.23, name='B') Clade(branch_length=0.4, name='C') ... <img src="Phylo-draw-apaf1.png" title="fig:Rooted phylogram, via Phylo.draw" alt="Rooted phylogram, via Phylo.draw" width="500" /> tree = Phylo.read('apaf.xml', 'phyloxml') tree.ladderize() # Flip branches so deeper clades are displayed at top Phylo.draw(tree) <img src="Phylo-apaf.png" title="Unrooted tree with colored nodes" alt="Unrooted tree with colored nodes" width="500" /> #for basic dendrogram import pylab tree = Phylo.read('apaf.xml', 'phyloxml') Phylo.draw_graphviz(tree) pylab.show() #simple tree wiht defined branch lengths tree = Phylo.parse('phyloxml_examples.xml', 'phyloxml').next() Phylo.draw_ascii(tree) apaf = Phylo.read('apaf.xml', 'phyloxml') Phylo.draw_ascii(apaf) #import networkx, pylab #tree = Phylo.read('example.xml', 'phyloxml') #net = Phylo.to_networkx(tree) #networkx.draw(net) #pylab.show()
def out(): records = SeqIO.parse(e1.get(), "fasta") lens = [] for record in records: print(record.seq) ids = record.id sequence = record.seq op = lens.append(record.id) # print(lens) try: lengthmax = len(max(lens, key=len)) lengthmin = len(min(lens, key=len)) except: lengthmax = "0" line = " %s %s\n" % (len(lens), "125") file = open("phylo.phy", "w") file.write(line) for i, id in enumerate(lens): if lengthmin < int(lengthmax): add = int(lengthmax) - int(len(id)) # print(i) id = id + (add * "-") id = id.replace(".", "") id = id.replace("_", "") to_be_write = "%s%s%s %s" % (i, "-", id, sequence[0:100]) # file.write(" %s %s\n"%(num_rec,seqlen)) file.writelines(str("%s\n" % to_be_write)) print(id) else: add = int(lengthmax) - int(len(id)) id = id + (add * "-") id = id.replace(".", "") id = id.replace("_", "") to_be_write = "%s %s" % (id, sequence[0:100]) # file.write(" %s %s\n"%(num_rec,seqlen)) file.writelines(str("%s\n" % (to_be_write))) print(id) # Read the sequences and align aln = AlignIO.read('phylo.phy', 'phylip') # Print the alignment print(aln) # Calculate the distance matrix calculator = DistanceCalculator('identity') dm = calculator.get_distance(aln) # Print the distance Matrix print('\nDistance Matrix\n===================') print(dm) # Construct the phylogenetic tree using UPGMA algorithm constructor = DistanceTreeConstructor() tree = constructor.upgma(dm) Phylo.write(tree, 'apaf.xml', 'phyloxml') tree = Phylo.read('apaf.xml', 'phyloxml') Phylo.draw_graphviz(tree) pylab.show() win.destroy()
plt.figure() plt.xlim([0, n_iters + constraint_add]) plt.xlabel("Iterations", fontsize=fontsize) plt.ylabel("Data Log Likelihood", fontsize=fontsize) plt.plot(likelihoods) plt.legend(loc="best", fontsize=12) plt.savefig("online-likelihoods.png", bbox_inches="tight") final_tree = sampler.tree.copy() plt.figure() plot_tree_2d(final_tree, X, pca) for node in final_tree.dfs(): if node.is_leaf(): node.point = y[node.point] newick = final_tree.to_newick() tree = Phylo.read(StringIO(newick), "newick") plt.figure() Phylo.draw_graphviz(tree, prog="neato") plt.savefig("tree.png", bbox_inches="tight") graph = Phylo.to_networkx(tree) with open("tree.nwk", "w") as fp: print >> fp, newick, nx.write_dot(graph, "tree.dot") plt.show()
plt.figure() plt.xlim([0, n_iters + constraint_add]) plt.xlabel("Iterations", fontsize=fontsize) plt.ylabel("Data Log Likelihood", fontsize=fontsize) plt.plot(likelihoods) plt.legend(loc='best', fontsize=12) plt.savefig('online-likelihoods.png', bbox_inches='tight') final_tree = sampler.tree.copy() plt.figure() plot_tree_2d(final_tree, X, pca) for node in final_tree.dfs(): if node.is_leaf(): node.point = y[node.point] newick = final_tree.to_newick() tree = Phylo.read(StringIO(newick), 'newick') plt.figure() Phylo.draw_graphviz(tree, prog='neato') plt.savefig('tree.png', bbox_inches='tight') graph = Phylo.to_networkx(tree) with open('tree.nwk', 'w') as fp: print >> fp, newick, nx.write_dot(graph, 'tree.dot') plt.show()
def processing(raw_fasta_path, out_dir_path): if not os.path.exists(out_dir_path): logging.info("Making directory {0}".format(out_dir_path)) os.makedirs(out_dir_path) deduplicated_fasta = remove_duplicates(SeqIO.parse(raw_fasta_path, "fasta")) base = os.path.basename(raw_fasta_path) fasta_path = os.path.join(out_dir_path, base) logging.info("Writing FASTA in {0}".format(fasta_path)) SeqIO.write(deduplicated_fasta, fasta_path, "fasta") # Multiple sequence alignment cline = ClustalwCommandline("clustalw2", infile=fasta_path) stdout, stderr = cline() logging.info(cline) clustalw_result_path = fasta_path.replace(".fasta", ".aln") alignment_dict = SeqIO.to_dict( AlignIO.read(clustalw_result_path, "clustal")) # writing alignment table in .txt with open(os.path.join(out_dir_path, "alignment.txt"), "w") as fout: fout.write("\n".join( str(record.seq) for record in alignment_dict.itervalues())) # alignment tree drawing tree_path = fasta_path.replace(".fasta", ".dnd") tree = Phylo.read(tree_path, "newick") tree.ladderize() # with labels Phylo.draw_graphviz(tree, label_func=lambda x: x.name.replace("ID=", "")) plt.savefig(os.path.join( out_dir_path, "figure_with_labels.pdf")) # need pygraphviz, pylab # Clustering ids = dict(enumerate(alignment_dict.keys())) distance_matrix = np.zeros([len(ids)] * 2) for i, j in itertools.combinations(xrange(len(ids)), r=2): distance_matrix[i][j] = distance_matrix[j][i] = \ distance(alignment_dict[ids[i]], alignment_dict[ids[j]]) # Compute and plot dendrogram fig = plt.figure() axdendro = fig.add_axes([0.09, 0.1, 0.2, 0.8]) Y = linkage(distance_matrix, method="centroid") cutoff = 0.5 * max(Y[:, 2]) clusters = fcluster(Y, cutoff, "distance") Z = dendrogram(Y, orientation="right", color_threshold=cutoff) axdendro.set_yticks([]) # Plot distance matrix axmatrix = fig.add_axes([0.3, 0.1, 0.6, 0.8]) index = Z["leaves"] distance_matrix = distance_matrix[index, :] distance_matrix = distance_matrix[:, index] im = axmatrix.matshow(distance_matrix, aspect="auto", origin="lower") axmatrix.set_xticks([]) axmatrix.set_yticks([]) # Plot colorbar axcolor = fig.add_axes([0.91, 0.1, 0.02, 0.8]) plt.colorbar(im, cax=axcolor) # Display and save figure dendogram_path = os.path.join(out_dir_path, "dendogram.png") fig.savefig(dendogram_path) fasta_clusters = defaultdict(list) for i, cluster in enumerate(clusters): fasta_id = ids[i] fasta_clusters[cluster].append(alignment_dict[fasta_id]) # Saving information about clusters clusters_dir_path = os.path.join(out_dir_path, "clusters") if not os.path.exists(clusters_dir_path): os.makedirs(clusters_dir_path) clusters_meta_path = os.path.join(clusters_dir_path, "clusters_meta.txt") meta_file = open(clusters_meta_path, "w") for cluster_id, cluster in fasta_clusters.iteritems(): cluster_path = os.path.join(clusters_dir_path, "cluster_{0}.fasta".format(cluster_id)) SeqIO.write(cluster, cluster_path, "fasta") summary_align = AlignInfo.SummaryInfo(MultipleSeqAlignment(cluster)) consensus = summary_align.dumb_consensus() pssm = summary_align.pos_specific_score_matrix(consensus, chars_to_ignore=['-']) frequencies = dict.fromkeys(IUPAC.protein.letters, 0) frequencies.update( (key, len(list(group))) for key, group in itertools.groupby(sorted(consensus))) frequencies.pop("X") meta_file.write("""Cluster ID: {0} Cluster size: {1} Consensus: {2} PSSM: {3} Frequencies in consensus: {4} """.format(cluster_id, len(cluster), textwrap.fill(str(consensus)), pssm, pprint.pformat(frequencies))) fig = plt.figure() pos = np.arange(len(IUPAC.protein.letters)) width = .5 # gives histogram aspect to the bar diagram ax = plt.axes() ax.set_xticks(pos + (width / 2)) ax.set_xticklabels(IUPAC.protein.letters) plt.bar(pos, [frequencies[letter] for letter in IUPAC.protein.letters], width, color='r') frequencies_path = os.path.join( clusters_dir_path, "frequencies_{0}.png".format(cluster_id)) fig.savefig(frequencies_path)
def compute_tree(options, mat, names): """ make upgma hierarchical clustering and write it as png and graphviz dot """ # oops, convert to biopython matrix matrix = [] for i in xrange(len(names)): row = [] for j in xrange(i + 1): # tree constructor writes 0-distances as 1s for some reason # so we hack around here val = float(mat[names[i]][names[j]]) if val == 0.: val = 1e-10 elif val == 1.: val = 1.1 row.append(val) matrix.append(row) dm = _DistanceMatrix(names, matrix) # upgma tree constructor = DistanceTreeConstructor() tree = constructor.upgma(dm) robust_makedirs(os.path.dirname(tree_path(options))) Phylo.write(tree, tree_path(options), "newick") # png tree -- note : doesn't work in toil def f(x): if "Inner" in str(x): return "" else: return x Phylo.draw_graphviz(tree, label_func = f, node_size=1000, node_shape="s", font_size=10) pylab.savefig(tree_path(options).replace("newick", "png")) # graphviz # get networkx graph nxgraph = Phylo.to_networkx(tree) # make undirected nxgraph = nx.Graph(nxgraph) # push names to name labels nxgraph = nx.convert_node_labels_to_integers(nxgraph, label_attribute="label") for node_id in nxgraph.nodes(): node = nxgraph.node[node_id] if "Inner" in str(node["label"]): node["label"] = "\"\"" node["width"] = 0.001 node["height"] = 0.001 else: node["fontsize"] = 18 for edge_id in nxgraph.edges(): edge = nxgraph.edge[edge_id[0]][edge_id[1]] # in graphviz, weight means something else, so make it a label weight = float(edge["weight"]) # undo hack from above if weight > 1: weight = 1. if weight <= 1e-10 or weight == 1.: weight = 0. edge["weight"] = None edge["label"] = "{0:.3g}".format(float(weight) * 100.) edge["fontsize"] = 14 edge["len"] = draw_len(weight) nx.write_dot(nxgraph, tree_path(options).replace("newick", "dot"))
def save_tree(tree, filename): Phylo.draw_graphviz(tree, prog='dot') # Draw the tree plt.title(filename) # set Title for figure plt.savefig('./Output/images/' + filename + '.png', dpi=100) # Save tree in an image plt.close() # Close the figure
plt.legend(loc='best', fontsize=12) plt.savefig('offline-scores.png', bbox_inches='tight') plt.figure() plt.xlim([0, n_iters]) # plt.ylim(ymin=-400) plt.xlabel("Iterations", fontsize=fontsize) plt.ylabel("Data Log Likelihood", fontsize=fontsize) for name, likelihood in likelihoods.items(): plt.plot(likelihood, label=name) plt.legend(loc='best', fontsize=12) plt.savefig('offline-likelihoods.png', bbox_inches='tight') for type, model in models.items(): final_tree = model.copy() plt.figure() plot_tree_2d(final_tree, X, pca) for node in final_tree.dfs(): if node.is_leaf(): node.point = y[node.point] newick = final_tree.to_newick() tree = Phylo.read(StringIO(newick), 'newick') Phylo.draw_graphviz(tree, prog='neato') plt.savefig('tree-%s.png' % type, bbox_inches='tight') plt.show()
def processing(raw_fasta_path, out_dir_path): if not os.path.exists(out_dir_path): logging.info("Making directory {0}".format(out_dir_path)) os.makedirs(out_dir_path) deduplicated_fasta = remove_duplicates( SeqIO.parse(raw_fasta_path, "fasta")) base = os.path.basename(raw_fasta_path) fasta_path = os.path.join(out_dir_path, base) logging.info("Writing FASTA in {0}".format(fasta_path)) SeqIO.write(deduplicated_fasta, fasta_path, "fasta") # Multiple sequence alignment cline = ClustalwCommandline("clustalw2", infile=fasta_path) stdout, stderr = cline() logging.info(cline) clustalw_result_path = fasta_path.replace(".fasta", ".aln") alignment_dict = SeqIO.to_dict( AlignIO.read(clustalw_result_path, "clustal")) # writing alignment table in .txt with open(os.path.join(out_dir_path, "alignment.txt"), "w") as fout: fout.write( "\n".join( str(record.seq) for record in alignment_dict.itervalues())) # alignment tree drawing tree_path = fasta_path.replace(".fasta", ".dnd") tree = Phylo.read(tree_path, "newick") tree.ladderize() # with labels Phylo.draw_graphviz(tree, label_func=lambda x: x.name.replace("ID=", "")) plt.savefig(os.path.join(out_dir_path, "figure_with_labels.pdf")) # need pygraphviz, pylab # Clustering ids = dict(enumerate(alignment_dict.keys())) distance_matrix = np.zeros([len(ids)] * 2) for i, j in itertools.combinations(xrange(len(ids)), r=2): distance_matrix[i][j] = distance_matrix[j][i] = \ distance(alignment_dict[ids[i]], alignment_dict[ids[j]]) # Compute and plot dendrogram fig = plt.figure() axdendro = fig.add_axes([0.09, 0.1, 0.2, 0.8]) Y = linkage(distance_matrix, method="centroid") cutoff = 0.5 * max(Y[:, 2]) clusters = fcluster(Y, cutoff, "distance") Z = dendrogram(Y, orientation="right", color_threshold=cutoff) axdendro.set_yticks([]) # Plot distance matrix axmatrix = fig.add_axes([0.3, 0.1, 0.6, 0.8]) index = Z["leaves"] distance_matrix = distance_matrix[index, :] distance_matrix = distance_matrix[:, index] im = axmatrix.matshow(distance_matrix, aspect="auto", origin="lower") axmatrix.set_xticks([]) axmatrix.set_yticks([]) # Plot colorbar axcolor = fig.add_axes([0.91, 0.1, 0.02, 0.8]) plt.colorbar(im, cax=axcolor) # Display and save figure dendogram_path = os.path.join(out_dir_path, "dendogram.png") fig.savefig(dendogram_path) fasta_clusters = defaultdict(list) for i, cluster in enumerate(clusters): fasta_id = ids[i] fasta_clusters[cluster].append(alignment_dict[fasta_id]) # Saving information about clusters clusters_dir_path = os.path.join(out_dir_path, "clusters") if not os.path.exists(clusters_dir_path): os.makedirs(clusters_dir_path) clusters_meta_path = os.path.join(clusters_dir_path, "clusters_meta.txt") meta_file = open(clusters_meta_path, "w") for cluster_id, cluster in fasta_clusters.iteritems(): cluster_path = os.path.join(clusters_dir_path, "cluster_{0}.fasta".format(cluster_id)) SeqIO.write(cluster, cluster_path, "fasta") summary_align = AlignInfo.SummaryInfo(MultipleSeqAlignment(cluster)) consensus = summary_align.dumb_consensus() pssm = summary_align.pos_specific_score_matrix(consensus, chars_to_ignore=['-']) frequencies = dict.fromkeys(IUPAC.protein.letters, 0) frequencies.update( (key, len(list(group))) for key, group in itertools.groupby(sorted(consensus))) frequencies.pop("X") meta_file.write("""Cluster ID: {0} Cluster size: {1} Consensus: {2} PSSM: {3} Frequencies in consensus: {4} """.format(cluster_id, len(cluster), textwrap.fill(str(consensus)), pssm, pprint.pformat(frequencies))) fig = plt.figure() pos = np.arange(len(IUPAC.protein.letters)) width = .5 # gives histogram aspect to the bar diagram ax = plt.axes() ax.set_xticks(pos + (width / 2)) ax.set_xticklabels(IUPAC.protein.letters) plt.bar(pos, [frequencies[letter] for letter in IUPAC.protein.letters], width, color='r') frequencies_path = os.path.join( clusters_dir_path, "frequencies_{0}.png".format(cluster_id)) fig.savefig(frequencies_path)
from Bio import Phylo # from pygraphviz import * # from networkx.drawing import nx_agraph tree = Phylo.read("Plotly/small.newick", 'newick') Phylo.draw_graphviz(tree, prog='dot')