def drawDistRuler(names, dists, scale=500, padding=10, textsize=12, notchsize=2, labelpadding=5, distsize=9, filename=sys.stdout): """Produce a ruler of pairwise distances""" nameswidth = textsize * max(map(len, names)) out = svg.Svg(util.open_stream(filename, "w")) out.beginSvg(scale * max(dists) + 2*padding, 2*padding+nameswidth + 5*distsize) out.beginTransform(("translate", padding, nameswidth+padding)) # draw ruler out.line(0, 0, scale*max(dists), 0) for name, dist in zip(names, dists): x = scale*dist out.text(name, x + textsize/2.0, - labelpadding, textsize, angle=-90) out.line(x, notchsize, x, - notchsize) out.text("%.3f" % dist, x + textsize/2.0, labelpadding + distsize*3.5, distsize, angle=-90) out.endTransform() out.endSvg()
def read_log(filename): """Reads a DLCoal log""" stream = util.open_stream(filename) for line in stream: if line.startswith("seed:"): continue yield eval(line, {"inf": util.INF})
def nextFile(self): if len(self.infiles) > 0: infile = self.infiles[0] self.infiles = self.infiles[1:] return util.open_stream(infile) else: return False
def test_open_stream2(self): """open_stream should close file""" # make sure regular files close infile = util.open_stream(__file__) infile.close() assert infile.closed
def compute_cost(self, gtree): """Returns the deep coalescence cost""" # write species tree and gene tree using species map treeout = util.open_stream(self.treefile, 'w') self.stree.write(treeout, oneline=True, writeData=lambda x: "") treeout.write('\n') gtree.write(treeout, namefunc=lambda name: self.gene2species(name), oneline=True, writeData=lambda x: "") treeout.write('\n') treeout.close() # execute command proc = subprocess.Popen([cmd, '-i', self.treefile], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, universal_newlines=True) ret = proc.wait() if ret != 0: raise Exception("genetreereport failed with returncode %d" % ret) # parse output cost = None for line in proc.stdout: toks = line.split(':') if toks[0] == "deep coalecense": cost = int(toks[1]) break assert cost is not None return cost
def optimize_model(self, gtree, aln): """Optimizes the IQTREE model""" fd, btreefile = tempfile.mkstemp('.btree') os.close(fd) gtree.write(btreefile) fd, seqfile = tempfile.mkstemp('.align') os.close(fd) out = util.open_stream(seqfile, "w") phylip.write_phylip_align(out, aln, strip_names=False) out.close() self.seqfile = seqfile fd, bsitelhfile = tempfile.mkstemp('.bsitelh') os.close(fd) os.system('iqtree-omp -redo -nt %s -m %s -st %s -s %s -te %s -pre %s.treefix_tmp -wsl > /dev/null' % (self.cpu, self.model, self.type, self.seqfile, btreefile, self.pre)) f = open("%s.treefix_tmp.sitelh" % self.pre, 'r') self.bsitelh = f.readline().replace("1", "2", 1) + f.readline().replace("Site_Lh", "Tree1", 1) f.close() os.system('rm %s.treefix_tmp.*' % self.pre) os.remove(btreefile)
def compute_cost(self, gtree): """Returns the DTL cost""" # write species tree and gene tree using species map treeout = util.open_stream(self.treefile, 'w') self.stree.write(treeout, oneline=True) treeout.write('\n') gtree.write(treeout, namefunc=lambda name: self.gene2species(name), oneline=True) treeout.write('\n') treeout.close() # execute command proc = subprocess.Popen([cmd, '-i', self.treefile, '-D', str(self.dupcost), '-T', str(self.transfercost), '-L', str(self.losscost)], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, universal_newlines=True) ret = proc.wait() if ret != 0: raise Exception("DTL failed with returncode %d" % ret) # parse output cost = None for line in proc.stdout.: toks = line.split(':') if toks[0] == "The minimum reconciliation cost is": cost = int(toks[1]) break assert cost is not None return cost
def iterPfam(filename): infile = util.open_stream(filename) def getQuery(infile): for line in infile: if line.startswith("Query sequence"): name = line.rstrip().replace("Query sequence: ", "") return name def getDomains(infile): domains = [] for line in infile: if line.startswith("Parsed for domains:"): break infile.next() # skip header 1 infile.next() # skip header 2 for line in infile: if len(line) <= 1 or line[0] in "\t ": break domains.append(Domain(line)) return domains while True: query = getQuery(infile) if query is None: break domains = getDomains(infile) yield query, domains
def make_color_legend(filename, colormap, start, end, step, width=100, height=10, display=False): from rasmus import util if filename is None: filename = util.tempfile(".", "colormap", ".svg") temp = True else: temp = False s = svg.Svg(util.open_stream(filename, "w")) s.beginSvg(width, height) xscale = float(width) / (end + step - start) for i in util.frange(start, end + step, step): color = colormap.get(i) s.rect((i - start) * xscale, 0, step * xscale, height, color, color) s.endSvg() s.close() # display if display: os.system("display %s" % filename) # clean up temp files if temp: os.remove(filename)
def write(self, filename=sys.stdout, delim="\t"): """Write a table to a file or stream. If 'filename' is a string it will be opened as a file. If 'filename' is a stream it will be written to directly. """ # remember filename for later saving if isinstance(filename, str): self.filename = filename out = util.open_stream(filename, "w") self.write_header(out, delim=delim) # tmp variable types = self.types # write data for row in self: # code is inlined here for speed rowstr = [] for header in self.headers: if header in row: rowstr.append(types[header].__str__(row[header])) else: rowstr.append('') print >>out, delim.join(rowstr)
def write_fasta_ordered(filename, names, seqs, width=None): """Write a FASTA in array style to a file""" out = util.open_stream(filename, "w") for name, seq in izip(names, seqs): print >>out, ">%s" % name util.printwrap(seq, width, out=out)
def write_boot_trees(filename, trees, counts=None): out = util.open_stream(filename, "w") if counts == None: counts = [1] * len(trees) for tree, count in zip(trees, counts): for i in range(count): out.write(tree.get_one_line_newick() + "\n")
def recon_root(self, gtree, newCopy=True, returnCost=False): """ Returns the rerooted tree with min deep coalescence cost Generalizes compute_cost to multiple trees. """ # write species tree and gene tree using species map treeout = util.open_stream(self.treefile, 'w') self.stree.write(treeout, oneline=True, writeData=lambda x: "") treeout.write('\n') edges = [] for gtree, edge in self._reroot_helper(gtree, newCopy=newCopy, returnEdge=True): gtree.write(treeout, namefunc=lambda name: self.gene2species(name), oneline=True, writeData=lambda x: "") treeout.write('\n') edges.append(edge) treeout.close() # execute command proc = subprocess.Popen([cmd, '-i', self.treefile], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, universal_newlines=True) ret = proc.wait() if ret != 0: raise Exception("genetreereport failed with returncode %d" % ret) # parse output i = None n = len(edges) costs = [None]*n for line in proc.stdout: m = re.match("\[ gene tree #(\d+) \]", line) if m: i = int(m.groups()[0]) - 1 if i is not None: m = re.match("\[ deep coalecense: (\d+) \]", line) if m: costs[i] = int(m.groups()[0]) assert all(map(lambda x: x is not None, costs)) # find minimum cost tree ndx, mincost = min(enumerate(costs), key=lambda it:it[1]) minroot = edges[ndx] if edge != minroot: node1, node2 = minroot if node1.parent != node2: node1, node2 = node2, node1 assert node1.parent == node2 treelib.reroot(gtree, node1.name, newCopy=False, keepName=True) if returnCost: return gtree, mincost else: return gtree
def recon_root(self, gtree, newCopy=True, returnCost=False): """ Returns the rerooted tree with min DTL cost Generalizes compute_cost to multiple trees. """ # write species tree and gene tree using species map treeout = util.open_stream(self.treefile, 'w') self.stree.write(treeout, oneline=True) treeout.write('\n') edges = [] for gtree, edge in self._reroot_helper(gtree, newCopy=newCopy, returnEdge=True): gtree.write(treeout, namefunc=lambda name: self.gene2species(name), oneline=True) treeout.write('\n') edges.append(edge) treeout.close() # execute command proc = subprocess.Popen([cmd, '-i', self.treefile, '-D', str(self.dupcost), '-T', str(self.transfercost), '-L', str(self.losscost)], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, universal_newlines=True) ret = proc.wait() if ret != 0: raise Exception("DTL failed with returncode %d" % ret) # parse output i = 0 n = len(edges) costs = [None]*n for line in proc.stdout: toks = line.split(':') if toks[0] == "The minimum reconciliation cost is": assert i < n costs[i] = int(toks[1]) i += 1 assert all(map(lambda x: x is not None, costs)) # find minimum cost tree ndx, mincost = min(enumerate(costs), key=lambda it:it[1]) minroot = edges[ndx] if edge != minroot: node1, node2 = minroot if node1.parent != node2: node1, node2 = node2, node1 assert node1.parent == node2 treelib.reroot(gtree, node1.name, newCopy=False, keepName=True) if returnCost: return gtree, mincost else: return gtree
def write(self, filename=sys.stdout, names=None, width=80): """Write sequences in Fasta format""" out = util.open_stream(filename, "w") if names is None: names = self.names for key in names: print >>out, ">" + key util.printwrap(self[key], width, out=out)
def test_open_stream1(self): """open_stream shouldn't close existing stream""" infile = util.open_stream(sys.stdin) # ensure attribute access infile.read # make sure file doesn't close infile.close() assert not sys.stdin.closed
def make_fasta_index(filename): """I also have a faster C program called formatfa""" infile = util.open_stream(filename) index = {} for line in util.SafeReadIter(infile): if line.startswith(">"): index[line[1:].rstrip()] = infile.tell() return index
def write_gff(filename, regions, format=GFF3): """ Write regions to a file stream filename - a filename or file stream regions - a list of Region objects """ out = util.open_stream(filename, "w") for region in regions: format.write_region(region, out=out)
def read_gene2species(* filenames): """ Reads a gene2species file Returns a function that will map gene names to species names. """ for filename in filenames: maps = [] for filename in filenames: maps.extend(util.read_delim(util.skip_comments( util.open_stream(filename)))) return make_gene2species(maps)
def optimize_model(self, gtree, aln): """Optimizes the IQTREE model""" fd, btreefile = tempfile.mkstemp('.btree') os.close(fd) gtree.write(btreefile) self.btreefile = btreefile fd, seqfile = tempfile.mkstemp('.align') os.close(fd) out = util.open_stream(seqfile, "w") phylip.write_phylip_align(out, aln, strip_names=False) out.close() self.seqfile = seqfile
def write_dist_matrix(mat, labels=None, out=sys.stdout): out = util.open_stream(out, "w") out.write("%d\n" % len(mat)) for i in range(len(mat)): if labels == None: out.write("%8s " % phylip_padding(str(i))) else: out.write("%8s " % labels[i]) for val in mat[i]: out.write("%10f " % val) out.write("\n")
def readTreeDistrib(filename): infile = util.open_stream(filename) lengths = {} for line in infile: tokens = line.split("\t") name = tokens[0] if name.isdigit(): name = int(name) lengths[name] = map(float, tokens[1:]) return lengths
def read_tree_color_map(filename): infile = util.open_stream(filename) maps = [] for line in infile: expr, red, green, blue = line.rstrip().split("\t") maps.append([expr, map(float, (red, green, blue))]) name2color = phylo.make_gene2species(maps) def leafmap(node): return name2color(node.name) return treelib.tree_color_map(leafmap)
def writeTreeDistrib(out, lengths): out = util.open_stream(out, "w") for node, lens in lengths.items(): if len(lens) == 0 or max(lens) == min(lens): continue if isinstance(node, treelib.TreeNode): out.write(str(node.name)) else: out.write(str(node)) for length in lens: out.write("\t%f" % length) out.write("\n")
def iter_fasta(filename, keyfunc=firstword, valuefunc = lambda x: x): """Iterate through the sequences of a FASTA file""" key = "" value = "" for line in util.open_stream(filename): if len(line) > 0 and line[0] == ">": if key != "": yield (key, valuefunc(value)) key = keyfunc(line[1:].rstrip()) value = "" elif key != "": value += line.rstrip() if key != "": yield (key, valuefunc(value))
def make_color_legend(filename, colormap, start, end, step, width=100, height=10): from rasmus import util s = svg.Svg(util.open_stream(filename, "w")) s.beginSvg(width, height) xscale = float(width) / (end + step - start) for i in util.frange(start, end + step, step): color = colormap.get(i) s.rect((i-start) * xscale, 0, step*xscale, height, color, color) s.endSvg()
def writeMultiBlocks(filename, multiblocks): out = util.open_stream(filename, "w") for multiblock in multiblocks: if len(multiblock.segments) > 0: out.write("\t".join([multiblock.segments[0].genome.name, multiblock.segments[0].chrom.name, str(multiblock.segments[0].start), str(multiblock.segments[0].end), str(multiblock.segments[0].direction)])) for segment in multiblock.segments[1:]: out.write("\t") out.write("\t".join([segment.genome.name, segment.chrom.name, str(segment.start), str(segment.end), str(segment.direction)])) out.write("\n")
def read(self, filename): # open fasta infile = util.open_stream(filename, "rb") # estimate column width self.width = guess_fasta_width(filename) if self.width == -1: raise Exception("lines do not have consistent width") # read index keys = [] for key, start, end in util.DelimReader(filename + ".index", delim="\t"): keys.append(key) self.index[key] = (int(start), int(end)) self.filelookup[key] = infile # return keys read return keys
def consense_from_file(intrees, verbose=True, args="y"): # read all trees trees = util.open_stream(intrees).readlines() ntrees = len(trees) cwd = create_temp_dir() out = open("intree", "w") for tree in trees: out.write(tree) out.close() exec_phylip("consense", args, verbose) tree = treelib.read_tree("outtree") cleanup_temp_dir(cwd) return tree, ntrees
def write_regions(filename, regions, duprange, lossrange): out = util.open_stream(filename, 'w') print >>out, '\t'.join(map(str, duprange + lossrange)) for cv, region in regions.iteritems(): coords = None; area = None if isinstance(region, geometry.Polygon): # non-degenerate coords = list(region.exterior.coords) area = region.area elif isinstance(region, geometry.LineString) or isinstance(region, geometry.Point): # degenerate coords = list(region.coords) area = region.area else: raise Exception("count vector (%s) has invalid region (%s)" % (cv, dumps(region))) coords = dumps(region) toks = (cv, coords, area) print >>out, '\t'.join(map(str, toks)) out.close()