def newickToTree(s): from newick import parse_tree #http://www.brics.dk/~mailund/newick.html t = parse_tree(s) v = NewickVisitor() t.dfs_traverse(v) A = v.getAdjMap() return A
def parse_tree(newick_str): tree = None try: tree = newick.parse_tree(newick_str) except newick.lexer.LexerError as e: raise PhyloException("Error parsing tree") return tree
def parse_tree(treestring): """Wrapper on the newick.parse_tree to add parent edges and a leaf lookup. """ my_tree = newick.parse_tree(treestring) newick.tree.add_parent_links(my_tree) return my_tree
def newick_tree_to_pygraph(string): g = graph() l = [0] def sub(tree): if isinstance(tree, newick.tree.Leaf): # if it's just a label g.add_node(tree.identifier) return tree.identifier else: # it's a tree and has edges l[0] += 1 my_l = l[0] g.add_node(my_l) for i,e in enumerate(tree.get_edges()): child = sub(e[0]) g.add_edge((my_l, child)) return my_l t = newick.parse_tree(string) sub(t) return g
def main(organism, gene_id): write_fasta = False cache_dir = os.path.join(os.getcwd(), "cache") ensembl_rest = EnsemblComparaRest(cache_dir) orthologs = ensembl_rest.orthologs(organism, gene_id) compara_tree = ensembl_rest.compara_tree(organism, gene_id) compara_tree = '(' + compara_tree[:-1] + ');' tree_rec = newick.parse_tree(compara_tree.strip()) d_vis = DistanceVisitor() tree_rec.dfs_traverse(d_vis) tree_proteins = [l.identifier for l in tree_rec.leaves] orthologs = [(organism, gene_id)] + orthologs out_recs = [] root_id = None all_items = [] for o_organism, o_id in orthologs: transcripts = ensembl_rest.transcripts(o_organism, o_id) tx, p = [(tx, p) for (tx, p) in transcripts if p in tree_proteins][0] cur_item = EnsemblComparaTranscript(o_organism, o_id, tx, p) if root_id is None: root_id = p cur_item.distance = networkx.dijkstra_path_length(d_vis.graph, "'%s'" % root_id, "'%s'" % p) #print o_organism, o_id, p cur_item.domains = ensembl_rest.protein_domains(o_organism, o_id, tx) cur_item.statistics = ensembl_rest.protein_stats(o_organism, o_id, tx) all_items.append(cur_item) if write_fasta: out_rec = ensembl_rest.protein_fasta(o_organism, o_id, tx) out_rec.id = o_id out_rec.description = o_organism out_recs.append(out_rec) if len(out_recs) > 0: with open("%s_%s_orthologs.txt" % (organism, gene_id), "w") as \ out_handle: SeqIO.write(out_recs, out_handle, "fasta") analyze_comparative_set(all_items)
def main(organism, gene_id): write_fasta = False cache_dir = os.path.join(os.getcwd(), "cache") ensembl_rest = EnsemblComparaRest(cache_dir) orthologs = ensembl_rest.orthologs(organism, gene_id) compara_tree = ensembl_rest.compara_tree(organism, gene_id) compara_tree = '(' + compara_tree[:-1] + ');' tree_rec = newick.parse_tree(compara_tree.strip()) d_vis = DistanceVisitor() tree_rec.dfs_traverse(d_vis) tree_proteins = [l.identifier for l in tree_rec.leaves] orthologs = [(organism, gene_id)] + orthologs out_recs = [] root_id = None all_items = [] for o_organism, o_id in orthologs: transcripts = ensembl_rest.transcripts(o_organism, o_id) tx, p = [(tx, p) for (tx, p) in transcripts if p in tree_proteins][0] cur_item = EnsemblComparaTranscript(o_organism, o_id, tx, p) if root_id is None: root_id = p cur_item.distance = networkx.dijkstra_path_length( d_vis.graph, "'%s'" % root_id, "'%s'" % p) #print o_organism, o_id, p cur_item.domains = ensembl_rest.protein_domains(o_organism, o_id, tx) cur_item.statistics = ensembl_rest.protein_stats(o_organism, o_id, tx) all_items.append(cur_item) if write_fasta: out_rec = ensembl_rest.protein_fasta(o_organism, o_id, tx) out_rec.id = o_id out_rec.description = o_organism out_recs.append(out_rec) if len(out_recs) > 0: with open("%s_%s_orthologs.txt" % (organism, gene_id), "w") as \ out_handle: SeqIO.write(out_recs, out_handle, "fasta") analyze_comparative_set(all_items)
import sys from newick import parse_tree intree=sys.argv[1] infile=open(intree) text=infile.read() #file=open("/home/junm/baylor/fulldata/NONPQ_rep_set_aligned.tre") print intree outtree=intree.replace('.tre','.parsed.tre') print outtree outfile=open(outtree,"w") outfile.write(str(parse_tree(text)))