def nextFile(self): if len(self.infiles) > 0: infile = self.infiles[0] self.infiles = self.infiles[1:] return util.open_stream(infile) else: return False
def read_log(filename): """Reads a DLCoal log""" stream = util.open_stream(filename) for line in stream: if line.startswith("seed:"): continue yield eval(line, {"inf": util.INF})
def optimize_model(self, gtree, aln): """Optimizes the IQTREE model""" fd, btreefile = tempfile.mkstemp('.btree') os.close(fd) gtree.write(btreefile) fd, seqfile = tempfile.mkstemp('.align') os.close(fd) out = util.open_stream(seqfile, "w") phylip.write_phylip_align(out, aln, strip_names=False) out.close() self.seqfile = seqfile fd, bsitelhfile = tempfile.mkstemp('.bsitelh') os.close(fd) os.system('iqtree-omp -redo -nt %s -m %s -st %s -s %s -te %s -pre %s.treefix_tmp -wsl > /dev/null' % (self.cpu, self.model, self.type, self.seqfile, btreefile, self.pre)) f = open("%s.treefix_tmp.sitelh" % self.pre, 'r') self.bsitelh = f.readline().replace("1", "2", 1) + f.readline().replace("Site_Lh", "Tree1", 1) f.close() os.system('rm %s.treefix_tmp.*' % self.pre) os.remove(btreefile)
def write(self, filename=sys.stdout, delim="\t"): """Write a table to a file or stream. If 'filename' is a string it will be opened as a file. If 'filename' is a stream it will be written to directly. """ # remember filename for later saving if isinstance(filename, str): self.filename = filename out = util.open_stream(filename, "w") self.write_header(out, delim=delim) # tmp variable types = self.types # write data for row in self: # code is inlined here for speed rowstr = [] for header in self.headers: if header in row: rowstr.append(types[header].__str__(row[header])) else: rowstr.append('') print >> out, delim.join(rowstr)
def drawDistRuler(names, dists, scale=500, padding=10, textsize=12, notchsize=2, labelpadding=5, distsize=9, filename=sys.stdout): """Produce a ruler of pairwise distances""" nameswidth = textsize * max(map(len, names)) out = svg.Svg(util.open_stream(filename, "w")) out.beginSvg(scale * max(dists) + 2*padding, 2*padding+nameswidth + 5*distsize) out.beginTransform(("translate", padding, nameswidth+padding)) # draw ruler out.line(0, 0, scale*max(dists), 0) for name, dist in zip(names, dists): x = scale*dist out.text(name, x + textsize/2.0, - labelpadding, textsize, angle=-90) out.line(x, notchsize, x, - notchsize) out.text("%.3f" % dist, x + textsize/2.0, labelpadding + distsize*3.5, distsize, angle=-90) out.endTransform() out.endSvg()
def iterPfam(filename): infile = util.open_stream(filename) def getQuery(infile): for line in infile: if line.startswith("Query sequence"): name = line.rstrip().replace("Query sequence: ", "") return name def getDomains(infile): domains = [] for line in infile: if line.startswith("Parsed for domains:"): break infile.next() # skip header 1 infile.next() # skip header 2 for line in infile: if len(line) <= 1 or line[0] in "\t ": break domains.append(Domain(line)) return domains while True: query = getQuery(infile) if query == None: break domains = getDomains(infile) yield query, domains
def compute_cost(self, gtree): """Returns the deep coalescence cost""" # write species tree and gene tree using species map treeout = util.open_stream(self.treefile, 'w') self.stree.write(treeout, oneline=True, writeData=lambda x: "") treeout.write('\n') gtree.write(treeout, namefunc=lambda name: self.gene2species(name), oneline=True, writeData=lambda x: "") treeout.write('\n') treeout.close() # execute command proc = subprocess.Popen([cmd, '-i', self.treefile], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, universal_newlines=True) ret = proc.wait() if ret != 0: raise Exception("genetreereport failed with returncode %d" % ret) # parse output cost = None for line in proc.stdout: toks = line.split(':') if toks[0] == "deep coalecense": cost = int(toks[1]) break assert cost is not None return cost
def drawDistRuler(names, dists, scale=500, padding=10, textsize=12, notchsize=2, labelpadding=5, distsize=9, filename=sys.stdout): """Produce a ruler of pairwise distances""" nameswidth = textsize * max(map(len, names)) out = svg.Svg(util.open_stream(filename, "w")) out.beginSvg(scale * max(dists) + 2 * padding, 2 * padding + nameswidth + 5 * distsize) out.beginTransform(("translate", padding, nameswidth + padding)) # draw ruler out.line(0, 0, scale * max(dists), 0) for name, dist in zip(names, dists): x = scale * dist out.text(name, x + textsize / 2.0, -labelpadding, textsize, angle=-90) out.line(x, notchsize, x, -notchsize) out.text("%.3f" % dist, x + textsize / 2.0, labelpadding + distsize * 3.5, distsize, angle=-90) out.endTransform() out.endSvg()
def make_color_legend(filename, colormap, start, end, step, width=100, height=10, display=False): from rasmus import util if filename is None: filename = util.tempfile(".", "colormap", ".svg") temp = True else: temp = False s = svg.Svg(util.open_stream(filename, "w")) s.beginSvg(width, height) xscale = float(width) / (end + step - start) for i in util.frange(start, end + step, step): color = colormap.get(i) s.rect((i - start) * xscale, 0, step * xscale, height, color, color) s.endSvg() s.close() # display if display: os.system("display %s" % filename) # clean up temp files if temp: os.remove(filename)
def test_open_stream2(self): """open_stream should close file""" # make sure regular files close infile = util.open_stream(__file__) infile.close() assert infile.closed
def read_synteny_blocks(filename, feature="synteny", extra=lambda r, cols: None): infile = util.open_stream(filename) blocks = [] for line in infile: tokens = line.split("\t") species1, chrom1, start1, end1, \ species2, chrom2, start2, end2, direction = tokens[:9] if len(tokens) > 9: name = tokens[9] else: name = "" blocks.append( SyntenyBlock(regionlib.Region(species1, chrom1, feature, int(start1), int(end1), 1), regionlib.Region(species2, chrom2, feature, int(start2), int(end2), int(direction)), name=name)) extra(blocks[-1], tokens[10:]) return blocks
def compute_cost(self, gtree): """Returns the DTL cost""" # write species tree and gene tree using species map treeout = util.open_stream(self.treefile, 'w') self.stree.write(treeout, oneline=True) treeout.write('\n') gtree.write(treeout, namefunc=lambda name: self.gene2species(name), oneline=True) treeout.write('\n') treeout.close() # execute command proc = subprocess.Popen([cmd, '-i', self.treefile, '-D', str(self.dupcost), '-T', str(self.transfercost), '-L', str(self.losscost)], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, universal_newlines=True) ret = proc.wait() if ret != 0: raise Exception("DTL failed with returncode %d" % ret) # parse output cost = None for line in proc.stdout.: toks = line.split(':') if toks[0] == "The minimum reconciliation cost is": cost = int(toks[1]) break assert cost is not None return cost
def write(self, filename=sys.stdout, delim="\t"): """Write a table to a file or stream. If 'filename' is a string it will be opened as a file. If 'filename' is a stream it will be written to directly. """ # remember filename for later saving if isinstance(filename, str): self.filename = filename out = util.open_stream(filename, "w") self.write_header(out, delim=delim) # tmp variable types = self.types # write data for row in self: # code is inlined here for speed rowstr = [] for header in self.headers: if header in row: rowstr.append(types[header].__str__(row[header])) else: rowstr.append('') print >>out, delim.join(rowstr)
def iterPfam(filename): infile = util.open_stream(filename) def getQuery(infile): for line in infile: if line.startswith("Query sequence"): name = line.rstrip().replace("Query sequence: ", "") return name def getDomains(infile): domains = [] for line in infile: if line.startswith("Parsed for domains:"): break infile.next() # skip header 1 infile.next() # skip header 2 for line in infile: if len(line) <= 1 or line[0] in "\t ": break domains.append(Domain(line)) return domains while True: query = getQuery(infile) if query is None: break domains = getDomains(infile) yield query, domains
def write_fasta_ordered(filename, names, seqs, width=None): """Write a FASTA in array style to a file""" out = util.open_stream(filename, "w") for name, seq in izip(names, seqs): print >>out, ">%s" % name util.printwrap(seq, width, out=out)
def write_fasta_ordered(filename, names, seqs, width=None): """Write a FASTA in array style to a file""" out = util.open_stream(filename, "w") for name, seq in izip(names, seqs): print >> out, ">%s" % name util.printwrap(seq, width, out=out)
def recon_root(self, gtree, newCopy=True, returnCost=False): """ Returns the rerooted tree with min deep coalescence cost Generalizes compute_cost to multiple trees. """ # write species tree and gene tree using species map treeout = util.open_stream(self.treefile, 'w') self.stree.write(treeout, oneline=True, writeData=lambda x: "") treeout.write('\n') edges = [] for gtree, edge in self._reroot_helper(gtree, newCopy=newCopy, returnEdge=True): gtree.write(treeout, namefunc=lambda name: self.gene2species(name), oneline=True, writeData=lambda x: "") treeout.write('\n') edges.append(edge) treeout.close() # execute command proc = subprocess.Popen([cmd, '-i', self.treefile], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, universal_newlines=True) ret = proc.wait() if ret != 0: raise Exception("genetreereport failed with returncode %d" % ret) # parse output i = None n = len(edges) costs = [None]*n for line in proc.stdout: m = re.match("\[ gene tree #(\d+) \]", line) if m: i = int(m.groups()[0]) - 1 if i is not None: m = re.match("\[ deep coalecense: (\d+) \]", line) if m: costs[i] = int(m.groups()[0]) assert all(map(lambda x: x is not None, costs)) # find minimum cost tree ndx, mincost = min(enumerate(costs), key=lambda it:it[1]) minroot = edges[ndx] if edge != minroot: node1, node2 = minroot if node1.parent != node2: node1, node2 = node2, node1 assert node1.parent == node2 treelib.reroot(gtree, node1.name, newCopy=False, keepName=True) if returnCost: return gtree, mincost else: return gtree
def write_boot_trees(filename, trees, counts=None): out = util.open_stream(filename, "w") if counts == None: counts = [1] * len(trees) for tree, count in zip(trees, counts): for i in range(count): out.write(tree.get_one_line_newick() + "\n")
def recon_root(self, gtree, newCopy=True, returnCost=False): """ Returns the rerooted tree with min DTL cost Generalizes compute_cost to multiple trees. """ # write species tree and gene tree using species map treeout = util.open_stream(self.treefile, 'w') self.stree.write(treeout, oneline=True) treeout.write('\n') edges = [] for gtree, edge in self._reroot_helper(gtree, newCopy=newCopy, returnEdge=True): gtree.write(treeout, namefunc=lambda name: self.gene2species(name), oneline=True) treeout.write('\n') edges.append(edge) treeout.close() # execute command proc = subprocess.Popen([cmd, '-i', self.treefile, '-D', str(self.dupcost), '-T', str(self.transfercost), '-L', str(self.losscost)], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, universal_newlines=True) ret = proc.wait() if ret != 0: raise Exception("DTL failed with returncode %d" % ret) # parse output i = 0 n = len(edges) costs = [None]*n for line in proc.stdout: toks = line.split(':') if toks[0] == "The minimum reconciliation cost is": assert i < n costs[i] = int(toks[1]) i += 1 assert all(map(lambda x: x is not None, costs)) # find minimum cost tree ndx, mincost = min(enumerate(costs), key=lambda it:it[1]) minroot = edges[ndx] if edge != minroot: node1, node2 = minroot if node1.parent != node2: node1, node2 = node2, node1 assert node1.parent == node2 treelib.reroot(gtree, node1.name, newCopy=False, keepName=True) if returnCost: return gtree, mincost else: return gtree
def test_open_stream1(self): """open_stream shouldn't close existing stream""" infile = util.open_stream(sys.stdin) # ensure attribute access infile.read # make sure file doesn't close infile.close() assert not sys.stdin.closed
def write(self, filename=sys.stdout, names=None, width=80): """Write sequences in Fasta format""" out = util.open_stream(filename, "w") if names is None: names = self.names for key in names: print >>out, ">" + key util.printwrap(self[key], width, out=out)
def write(self, filename=sys.stdout, names=None, width=80): """Write sequences in Fasta format""" out = util.open_stream(filename, "w") if names is None: names = self.names for key in names: print >> out, ">" + key util.printwrap(self[key], width, out=out)
def make_fasta_index(filename): """I also have a faster C program called formatfa""" infile = util.open_stream(filename) index = {} for line in util.SafeReadIter(infile): if line.startswith(">"): index[line[1:].rstrip()] = infile.tell() return index
def write_gff(filename, regions, format=GFF3): """ Write regions to a file stream filename - a filename or file stream regions - a list of Region objects """ out = util.open_stream(filename, "w") for region in regions: format.write_region(region, out=out)
def read_gene2species(* filenames): """ Reads a gene2species file Returns a function that will map gene names to species names. """ for filename in filenames: maps = [] for filename in filenames: maps.extend(util.read_delim(util.skip_comments( util.open_stream(filename)))) return make_gene2species(maps)
def optimize_model(self, gtree, aln): """Optimizes the IQTREE model""" fd, btreefile = tempfile.mkstemp('.btree') os.close(fd) gtree.write(btreefile) self.btreefile = btreefile fd, seqfile = tempfile.mkstemp('.align') os.close(fd) out = util.open_stream(seqfile, "w") phylip.write_phylip_align(out, aln, strip_names=False) out.close() self.seqfile = seqfile
def write_dist_matrix(mat, labels=None, out=sys.stdout): out = util.open_stream(out, "w") out.write("%d\n" % len(mat)) for i in range(len(mat)): if labels == None: out.write("%8s " % phylip_padding(str(i))) else: out.write("%8s " % labels[i]) for val in mat[i]: out.write("%10f " % val) out.write("\n")
def read_tree_color_map(filename): infile = util.open_stream(filename) maps = [] for line in infile: expr, red, green, blue = line.rstrip().split("\t") maps.append([expr, map(float, (red, green, blue))]) name2color = phylo.make_gene2species(maps) def leafmap(node): return name2color(node.name) return treelib.tree_color_map(leafmap)
def readTreeDistrib(filename): infile = util.open_stream(filename) lengths = {} for line in infile: tokens = line.split("\t") name = tokens[0] if name.isdigit(): name = int(name) lengths[name] = map(float, tokens[1:]) return lengths
def iter_fasta(filename, keyfunc=firstword, valuefunc = lambda x: x): """Iterate through the sequences of a FASTA file""" key = "" value = "" for line in util.open_stream(filename): if len(line) > 0 and line[0] == ">": if key != "": yield (key, valuefunc(value)) key = keyfunc(line[1:].rstrip()) value = "" elif key != "": value += line.rstrip() if key != "": yield (key, valuefunc(value))
def writeTreeDistrib(out, lengths): out = util.open_stream(out, "w") for node, lens in lengths.items(): if len(lens) == 0 or max(lens) == min(lens): continue if isinstance(node, treelib.TreeNode): out.write(str(node.name)) else: out.write(str(node)) for length in lens: out.write("\t%f" % length) out.write("\n")
def iter_fasta(filename, keyfunc=firstword, valuefunc=lambda x: x): """Iterate through the sequences of a FASTA file""" key = "" value = "" for line in util.open_stream(filename): if len(line) > 0 and line[0] == ">": if key != "": yield (key, valuefunc(value)) key = keyfunc(line[1:].rstrip()) value = "" elif key != "": value += line.rstrip() if key != "": yield (key, valuefunc(value))
def make_color_legend(filename, colormap, start, end, step, width=100, height=10): from rasmus import util s = svg.Svg(util.open_stream(filename, "w")) s.beginSvg(width, height) xscale = float(width) / (end + step - start) for i in util.frange(start, end + step, step): color = colormap.get(i) s.rect((i-start) * xscale, 0, step*xscale, height, color, color) s.endSvg()
def writeMultiBlocks(filename, multiblocks): out = util.open_stream(filename, "w") for multiblock in multiblocks: if len(multiblock.segments) > 0: out.write("\t".join([multiblock.segments[0].genome.name, multiblock.segments[0].chrom.name, str(multiblock.segments[0].start), str(multiblock.segments[0].end), str(multiblock.segments[0].direction)])) for segment in multiblock.segments[1:]: out.write("\t") out.write("\t".join([segment.genome.name, segment.chrom.name, str(segment.start), str(segment.end), str(segment.direction)])) out.write("\n")
def read(self, filename): # open fasta infile = util.open_stream(filename, "rb") # estimate column width self.width = guess_fasta_width(filename) if self.width == -1: raise Exception("lines do not have consistent width") # read index keys = [] for key, start, end in util.DelimReader(filename + ".index", delim="\t"): keys.append(key) self.index[key] = (int(start), int(end)) self.filelookup[key] = infile # return keys read return keys
def consense_from_file(intrees, verbose=True, args="y"): # read all trees trees = util.open_stream(intrees).readlines() ntrees = len(trees) cwd = create_temp_dir() out = open("intree", "w") for tree in trees: out.write(tree) out.close() exec_phylip("consense", args, verbose) tree = treelib.read_tree("outtree") cleanup_temp_dir(cwd) return tree, ntrees
def write_regions(filename, regions, duprange, lossrange): out = util.open_stream(filename, 'w') print >>out, '\t'.join(map(str, duprange + lossrange)) for cv, region in regions.iteritems(): coords = None; area = None if isinstance(region, geometry.Polygon): # non-degenerate coords = list(region.exterior.coords) area = region.area elif isinstance(region, geometry.LineString) or isinstance(region, geometry.Point): # degenerate coords = list(region.coords) area = region.area else: raise Exception("count vector (%s) has invalid region (%s)" % (cv, dumps(region))) coords = dumps(region) toks = (cv, coords, area) print >>out, '\t'.join(map(str, toks)) out.close()
def readClustalwAlign(filename): infile = util.open_stream(filename) seqs = fasta.FastaDict() # skip first three lines infile.next() infile.next() infile.next() # parse remaining lines for line in infile: if line[0].isdigit() or line[0].isalpha(): (name, seq) = line.split()[:2] if name not in seqs: seqs[name] = seq else: seqs[name] += seq return seqs
def guess_fasta_width(fastaFile): fafile = util.open_stream(fastaFile, "rb") numlines = 5 lineno = 0 width = -1 width2 = -1 maxwidth = 0 for line in fafile: if len(line) != 0 and line[0] != ">": lineno += 1 width3 = len(line.rstrip()) maxwidth = max(maxwidth, width3) if width == -1: # first line width = width3 elif width3 > width: # widths cannot get bigger return -1 elif width3 == width: return width elif width2 == -1: # this should be last line in sequence width2 = width3 return width else: # width got smaller twice return -1 else: # previous sequence had only one line # rest widths for next sequence if width2 != -1: width2 = -1 else: width = -1 return maxwidth
def write(self, out, fullpage=False): """Write HTML table""" out = util.open_stream(out, "w") if fullpage: out.write("<html>") if self.title: out.write("<head><title>%s</title></head>\n" % self.title) out.write( "<style>.tab { border-right: 1px solid #777; border-bottom: 1px solid #777;}</style>" ) if self.title is not None: out.write("<h1>%s</h1>" % self.title) # write headers out.write("<table cellspacing=0 style='border: 1px solid black;'>\n") out.write("<tr><td class='tab'><b>#</b></td>") for header in self.headers: out.write("<td class='tab'><b>%s</b></td>" % header) out.write("</tr>\n") # write rows for i, row in enumerate(self.table): out.write("<tr><td class='tab'>%d.</td>" % (i + 1)) for j, item in enumerate(util.mget(row, self.table.headers)): if self.formats[j] is not None: # write formating out.write("<td class='tab'>%s </td>" % self.formats[j](item)) else: out.write("<td class='tab'><nobr>%s </nobr></td>" % str(item)) out.write("</tr>\n") out.write("</table>") if fullpage: out.write("</html>")
def read_phylip_align(filename): """ Read a PHYLIP alignment. Can be interleaved or not. returns a FastaDict object. """ infile = util.open_stream(filename) seqs = fasta.FastaDict() # read sequences and length nseq, seqlen = infile.next().split() nseq = int(nseq) i = 0 first = True names = [] # parse remaining lines for line in infile: line = line.rstrip() if len(line) > 0: if first: name = line[:10].strip() seq = line[10:].strip().replace(" ", "") names.append(name) else: seq = line.strip().replace(" ", "") name = names[i] i += 1 if not name in seqs: seqs[name] = seq else: seqs[name] += seq else: i = 0 first = False return seqs
def write_regions(filename, regions, duprange, lossrange): out = util.open_stream(filename, 'w') print >> out, '\t'.join(map(str, duprange + lossrange)) for cv, region in regions.iteritems(): coords = None area = None if isinstance(region, geometry.Polygon): # non-degenerate coords = list(region.exterior.coords) area = region.area elif isinstance(region, geometry.LineString) or isinstance( region, geometry.Point): # degenerate coords = list(region.coords) area = region.area else: raise Exception("count vector (%s) has invalid region (%s)" % (cv, dumps(region))) coords = dumps(region) toks = (cv, coords, area) print >> out, '\t'.join(map(str, toks)) out.close()
def savetab(self, filename): """Save data in tab delimited format""" from ramsus import util out = util.open_stream(filename, "w") for data in self.data: print >>out, data.options["plab"] if len(data.ylist) > 0: if len(data.zlist) > 0: rows = zip(data.xlist, data.ylist, data.zlist) labels = [data.options[i] for i in ["xlab", "ylab", "zlab"]] else: rows = zip(data.xlist, data.ylist) labels = [data.options[i] for i in["xlab", "ylab"]] print >>out, "\t".join(labels) for row in rows: print >>out, "\t".join(map(str, row)) print >>out
def iter_gff( filename, format=GFF3, line_filter=lambda x: True, region_filter=lambda x: True, # backcompat lineFilter=None, regionFilter=None): """ Iterate over the regions in a GFF file """ if lineFilter is not None: line_filter = lineFilter if regionFilter is not None: region_filter = regionFilter infile = util.open_stream(filename) lineno = 0 for line in infile: lineno += 1 line = line.rstrip("\n") # only continue processing if line is not comment and passes filter if len(line) == 0 or line[0] == "#" or not line_filter(line): continue # parse region try: region = format.read_region(line) except Exception, e: raise Exception("%s\nError on line %d: %s" % (e, lineno, line)) # only return region if region passes filter if region_filter(region): yield region
def read_dist_matrix(filename): infile = util.open_stream(filename) size = int(util.read_word(infile)) mat = util.make_matrix(size, size) names = [] """ I must be able to read all of these matrices 11 _______0 0.00000 0.60810 0.46709 0.57693 0.67485 0.62632 0.64763 0.67709 0.70192 0.70949 0.68634 _______1 0.60810 0.00000 0.45522 0.49033 0.47842 0.47278 0.47224 0.47160 0.52655 0.50293 0.49679 _______2 0.46709 0.45522 0.00000 0.57586 0.57433 0.57300 0.56020 0.57763 0.54225 0.58722 0.58559 _______3 0.57693 0.49033 0.57586 0.00000 0.20713 0.20357 0.21252 0.46120 0.49081 0.50956 0.49340 _______4 0.67485 0.47842 0.57433 0.20713 0.00000 0.11210 0.13503 0.45915 0.46692 0.48844 0.47421 _______5 0.62632 0.47278 0.57300 0.20357 0.11210 0.00000 0.10037 0.45525 0.50959 0.48943 0.49588 _______6 0.64763 0.47224 0.56020 0.21252 0.13503 0.10037 0.00000 0.46078 0.49727 0.53117 0.51126 _______7 0.67709 0.47160 0.57763 0.46120 0.45915 0.45525 0.46078 0.00000 0.20980 0.21216 0.20121 _______8 0.70192 0.52655 0.54225 0.49081 0.46692 0.50959 0.49727 0.20980 0.00000 0.18209 0.13265 _______9 0.70949 0.50293 0.58722 0.50956 0.48844 0.48943 0.53117 0.21216 0.18209 0.00000 0.08389 ______10 0.68634 0.49679 0.58559 0.49340 0.47421 0.49588 0.51126 0.20121 0.13265 0.08389 0.00000 As well as 11 _______0 _______1 0.60810 _______2 0.46709 0.45522 _______3 0.57693 0.49033 0.57586 _______4 0.67485 0.47842 0.57433 0.20713 _______5 0.62632 0.47278 0.57300 0.20357 0.11210 _______6 0.64763 0.47224 0.56020 0.21252 0.13503 0.10037 _______7 0.67709 0.47160 0.57763 0.46120 0.45915 0.45525 0.46078 _______8 0.70192 0.52655 0.54225 0.49081 0.46692 0.50959 0.49727 0.20980 _______9 0.70949 0.50293 0.58722 0.50956 0.48844 0.48943 0.53117 0.21216 0.18209 ______10 0.68634 0.49679 0.58559 0.49340 0.47421 0.49588 0.51126 0.20121 0.13265 0.08389 """ def isName(token): try: float(token) return False except: return True i = -1 j = 0 for line in infile: row = line.split() if len(row) == 0: continue if isName(row[0]): names.append(row[0]) row = row[1:] i += 1 j = 0 assert i != -1 for val in row: if val == "nan" or val == "inf": val = None else: val = float(val) mat[i][j] = val mat[j][i] = val j += 1 # remove nasty infinities top = util.max2(mat) for i in range(size): for j in range(size): if mat[i][j] == None: mat[i][j] = 10 * top """ for i in xrange(size): names.append(util.read_word(infile)) for j in xrange(size): mat[i][j] = float(util.read_word(infile)) """ return names, mat
def draw_tree(tree, brecon, stree, xscale=100, yscale=100, leaf_padding=10, label_size=None, label_offset=None, font_size=12, stree_font_size=20, canvas=None, autoclose=True, rmargin=10, lmargin=100, tmargin=100, bmargin=100, tree_color=(0, 0, 0), tree_trans_color=(0, 0, 0), stree_color=(.3, .7, .3), snode_color=(.2, .2, .7), loss_color=(1, 1, 1), loss_color_border=(.5, .5, .5), dup_color=(0, 0, 1), dup_color_border=(0, 0, 1), trans_color=(1, 1, 0), trans_color_border=(.5, .5, 0), gtrans_color=(1, 0, 0), gtrans_color_border=(.5, 0, 0), event_size=10, snames=None, rootlen=None, stree_width=.8, filename="tree.svg"): '''Takes as input a parasite tree, tree, a reconciliation file, brecon, a host tree, stree, as well as sizes and colors of the trees components and returns a drawing of the reconciliation of the parasite tree on the host tree with event nodes of specified colors''' # set defaults font_ratio = 8. / 11. if label_size is None: label_size = .7 * font_size if sum(x.dist for x in tree.nodes.values()) == 0: legend_scale = False minlen = xscale if snames is None: snames = dict((x, x) for x in stree.leaf_names()) # layout stree slayout = treelib1.layout_tree(stree, xscale, yscale) if rootlen is None: rootlen = .1 * max(l[0] for l in slayout.values()) # setup slayout x, y = slayout[stree.root] slayout[None] = (x - rootlen, y) for node, (x, y) in slayout.items(): slayout[node] = (x + rootlen, y - .5 * yscale) # layout tree ylists = defaultdict(lambda: []) yorders = {} # layout speciations and genes (y) for node in tree.preorder(): if node == list(tree.preorder())[0]: rootNode = node.name yorders[node] = [] for ev in brecon[node]: snode, event, frequency = ev if event == "spec" or event == "gene" or event == "loss": yorders[node].append(len(ylists[snode])) ylists[snode].append(node) # layout dups and transfers (y) for node in tree.postorder(): for ev in brecon[node]: snode, event, frequency = ev if event != "spec" and event != "gene" and event != "loss": # Find number of nodes on a single branch for y-coord v = [ yorders[child] for child in node.children if brecon[child][-1][0] == snode ] if len(v) == 0: yorders[node].append(0) else: yorders[node].append(stats.mean(flatten(v))) # layout node (x) xorders = { } #Dictionary to record number of nodes on a single branch for x-coord branchFrac = {} #Dictionary to record the placement of a node on a branch for node in tree.postorder(): for n in range(len(brecon[node])): snode, event, frequency = brecon[node][n] if event == "spec" or event == "gene" or event == "loss": # Speciation, gene, and loss events happen at host vertices if not node in branchFrac: branchFrac[node] = 0 else: # Transfers and duplications occur on branches v = [branchFrac[child] for child in node.children] if len(v) == 0: branchFrac[node] = 1 else: branchFrac[node] = max(v) + 1 for node in tree.preorder(): xorders[node] = [] for n in range(len(brecon[node])): snode, event, frequency = brecon[node][n] if event == "spec" or event == "gene" or event == "loss": # Speciation, gene, and loss events happen on vertices, not branches xorders[node].append(0) else: if node.parent and containsTransOrDup(node.parent, brecon): # set branchFrac to the branch Frac of the parent, they are # on the same branch branchFrac[node] = branchFrac[node.parent] if containsLoss(node, brecon): # if following a loss, first transfer/duplication event on branch xorders[node].append(1) elif not node.parent: # Root of tree xorders[node].append(0) else: xorders[node].append(maxList(xorders[node.parent]) + 1) # setup layout layout = {None: [slayout[brecon[tree.root][-1][0].parent]]} for node in tree.preorder(): for n in range(len(brecon[node])): snode, event, frequency = brecon[node][n] nx, ny = slayout[snode] px, py = slayout[snode.parent] (npx, npy) = layout[node.parent][-1] # set spacing between nodes on the same branch frac = 50 while branchFrac[node] * frac >= nx - px: frac = frac - 5 # calc x if event == "trans" or event == "gtrans": if npx > px: # transfer parent is farther forward in time than host parent x = npx + frac else: x = px + frac elif event == "dup": x = px + frac else: x = nx # calc y deltay = ny - py slope = deltay / float(nx - px) deltax2 = x - px deltay2 = slope * deltax2 offset = py + deltay2 frac = (yorders[node][n] + 1) / float(max(len(ylists[snode]), 1) + 1) y = offset + (frac - .5) * stree_width * yscale if node in layout: layout[node].append((x, y)) else: layout[node] = [(x, y)] # order brecon nodes temporally brecon[node] = orderLoss(node, brecon, layout) # order layout nodes temporally layout[node] = orderLayout(node, layout) if y > max(l[1] for l in slayout.values()) + 50: print nx, ny print px, py print offset, frac print ylists[snode], yorders[node] print brecon[node] print node, snode, layout[node] # layout label sizes max_label_size = max(len(x.name) for x in tree.leaves()) * font_ratio * font_size max_slabel_size = max( len(x.name) for x in stree.leaves()) * font_ratio * stree_font_size ''' if colormap == None: for node in tree: node.color = (0, 0, 0) else: colormap(tree) if stree and gene2species: recon = phylo.reconcile(tree, stree, gene2species) events = phylo.label_events(tree, recon) losses = phylo.find_loss(tree, stree, recon) else: events = None losses = None # layout tree if layout is None: coords = treelib.layout_tree(tree, xscale, yscale, minlen, maxlen) else: coords = layout ''' xcoords, ycoords = zip(*slayout.values()) maxwidth = max(xcoords) + max_label_size + max_slabel_size maxheight = max(ycoords) + yscale # initialize canvas if canvas is None: canvas = svg.Svg(util.open_stream(filename, "w")) width = int(rmargin + maxwidth + lmargin) height = int(tmargin + maxheight + bmargin) canvas.beginSvg(width, height) canvas.beginStyle("font-family: \"Sans\";") if autoclose == None: autoclose = True else: if autoclose == None: autoclose = False canvas.beginTransform(("translate", lmargin, tmargin)) draw_stree(canvas, stree, slayout, yscale=yscale, stree_width=stree_width, stree_color=stree_color, snode_color=snode_color) # draw stree leaves for node in stree: x, y = slayout[node] if node.is_leaf(): canvas.text(snames[node.name], x + leaf_padding + max_label_size, y + stree_font_size / 2., stree_font_size, fillColor=snode_color) # draw tree for node in tree: containsL = containsLoss(node, brecon) for n in range(len(brecon[node])): x, y = layout[node][n] if containsL == False: # no loss event px, py = layout[node.parent][-1] else: # loss event present if n == 0: # event is loss px, py = layout[node.parent][-1] else: # event stems from loss px, py = layout[node][n - 1] trans = False if node.parent: snode, event, frequency = brecon[node][n] if n == 0: psnode, pevent, pfrequency = brecon[node.parent][-1] # Event stemming from a loss event else: psnode, pevent, pfrequency = brecon[node][n - 1] if pevent == "trans" or pevent == "gtrans": if psnode != snode: trans = True else: trans = False if not trans: canvas.line(x, y, px, py, color=tree_color) # draw the transfer dashed line else: arch = 20 x2 = (x * .5 + px * .5) - arch y2 = (y * .5 + py * .5) x3 = (x * .5 + px * .5) - arch y3 = (y * .5 + py * .5) # draw regular transfer dashed line if pevent == "trans": canvas.write( "<path d='M%f %f C%f %f %f %f %f %f' %s />\n " % (x, y, x2, y2, x3, y3, px, py, " style='stroke-dasharray: 4, 2' " + svg.colorFields(tree_trans_color, (0, 0, 0, 0)))) # draw guilty transfer dashed line else: canvas.write( "<path d='M%f %f C%f %f %f %f %f %f' %s />\n " % (x, y, x2, y2, x3, y3, px, py, " style='stroke-dasharray: 4, 2' " + svg.colorFields(gtrans_color, (0, 0, 0, 0)))) # draw events for node in tree: if node.name == rootNode: x, y = layout[node][0] canvas.polygon((x-20, y, x-50, y+30,x-50, y+15, x-90, y+15, x-90,\ y-15, x-50, y-15, x-50, y-30), strokeColor = (1,.7,.3), \ fillColor = (1,.7,.3)) canvas.text("Root Node", x-88, y+5, font_size+2,\ fillColor = (0,0,0)) for n in range(len(brecon[node])): snode, event, frequency = brecon[node][n] frequency = float(frequency) x, y = layout[node][n] o = event_size / 2.0 if event == "loss": # draw boxes, frequencies of loss events canvas.rect(x - o, y - o, event_size, event_size, fillColor=loss_color, strokeColor=loss_color_border) canvas.text("{:.3f}".format(frequency) + node.name, x - o, y - o, font_size + 2, fillColor=loss_color) if event == "spec": # draw boxes, frequencies of speciation events canvas.rect(x - o, y - o, event_size, event_size, fillColor=(0, 0, 0), strokeColor=(0, 0, 0)) canvas.text("{:.3f}".format(frequency) + node.name, x - o, y - o, font_size + 2, fillColor=(0, 0, 0)) if event == "dup": # draw boxes, frequencies of duplication events canvas.rect(x - o, y - o, event_size, event_size, fillColor=dup_color, strokeColor=dup_color_border) canvas.text("{:.3f}".format(frequency) + node.name, x - o, y - o, font_size + 2, fillColor=dup_color) elif event == "trans": # draw boxes, frequencies of transfer events canvas.rect(x - o, y - o, event_size, event_size, fillColor=trans_color, strokeColor=trans_color_border) canvas.text("{:.3f}".format(frequency) + node.name, x - o, y - o, font_size + 2, fillColor=trans_color) elif event == "gtrans": # draw boxes, frequencies of guilty transfer events canvas.rect(x - o, y - o, event_size, event_size, fillColor=gtrans_color, strokeColor=gtrans_color_border) canvas.text("{:.3f}".format(frequency) + node.name, x - o, y - o, font_size + 2, fillColor=gtrans_color) # draw tree leaves for node in tree: for n in range(len(brecon[node])): x, y = layout[node][n] if node.is_leaf() and brecon[node][n][1] == "gene": canvas.text(node.name, x + leaf_padding, y + font_size / 2., font_size + 2, fillColor=(0, 0, 0)) canvas.endTransform() if autoclose: canvas.endStyle() canvas.endSvg() return canvas
def draw_tree(tree, labels={}, xscale=100, yscale=20, canvas=None, leafPadding=10, leafFunc=lambda x: str(x.name), labelOffset=None, fontSize=10, labelSize=None, minlen=1, maxlen=util.INF, filename=sys.stdout, rmargin=150, lmargin=10, tmargin=0, bmargin=None, colormap=None, stree=None, layout=None, gene2species=None, lossColor=(0, 0, 1), dupColor=(1, 0, 0), eventSize=4, legendScale=False, autoclose=None, extendRoot=True, labelLeaves=True, drawHoriz=True, nodeSize=0): # set defaults fontRatio = 8. / 11. if labelSize == None: labelSize = .7 * fontSize if labelOffset == None: labelOffset = -1 if bmargin == None: bmargin = yscale if sum(x.dist for x in tree.nodes.values()) == 0: legendScale = False minlen = xscale if colormap == None: for node in tree: node.color = (0, 0, 0) else: colormap(tree) if stree and gene2species: recon = phylo.reconcile(tree, stree, gene2species) events = phylo.label_events(tree, recon) losses = phylo.find_loss(tree, stree, recon) else: events = None losses = None if len(labels) > 0 or (stree and gene2species): drawHoriz = True # layout tree if layout is None: coords = treelib.layout_tree(tree, xscale, yscale, minlen, maxlen) else: coords = layout xcoords, ycoords = zip(*coords.values()) maxwidth = max(xcoords) maxheight = max(ycoords) + labelOffset # initialize canvas if canvas == None: canvas = svg.Svg(util.open_stream(filename, "w")) width = int(rmargin + maxwidth + lmargin) height = int(tmargin + maxheight + bmargin) canvas.beginSvg(width, height) if autoclose == None: autoclose = True else: if autoclose == None: autoclose = False # draw tree def walk(node): x, y = coords[node] if node.parent: parentx, parenty = coords[node.parent] else: if extendRoot: parentx, parenty = 0, y else: parentx, parenty = x, y # e.g. no branch # draw branch if drawHoriz: canvas.line(parentx, y, x, y, color=node.color) else: canvas.line(parentx, parenty, x, y, color=node.color) # draw branch labels if node.name in labels: branchlen = x - parentx lines = str(labels[node.name]).split("\n") labelwidth = max(map(len, lines)) labellen = min(labelwidth * fontRatio * fontSize, max(int(branchlen - 1), 0)) for i, line in enumerate(lines): canvas.text( line, parentx + (branchlen - labellen) / 2., y + labelOffset + (-len(lines) + 1 + i) * (labelSize + 1), labelSize) # draw nodes if nodeSize > 0: canvas.circle(x, y, nodeSize, strokeColor=svg.null, fillColor=node.color) # draw leaf labels or recur if node.is_leaf(): if labelLeaves: canvas.text(leafFunc(node), x + leafPadding, y + fontSize / 2., fontSize, fillColor=node.color) else: if drawHoriz: # draw vertical part of branch top = coords[node.children[0]][1] bot = coords[node.children[-1]][1] canvas.line(x, top, x, bot, color=node.color) # draw children for child in node.children: walk(child) canvas.beginTransform(("translate", lmargin, tmargin)) walk(tree.root) if stree and gene2species: draw_events(canvas, tree, coords, events, losses, lossColor=lossColor, dupColor=dupColor, size=eventSize) canvas.endTransform() # draw legend if legendScale: if legendScale == True: # automatically choose a scale length = maxwidth / float(xscale) order = math.floor(math.log10(length)) length = 10**order drawScale(lmargin, tmargin + maxheight + bmargin - fontSize, length, xscale, fontSize, canvas=canvas) if autoclose: canvas.endSvg() return canvas