def drawDistRuler(names, dists, scale=500,
                  padding=10, textsize=12, notchsize=2,
                  labelpadding=5, distsize=9,
                  filename=sys.stdout):
    """Produce a ruler of pairwise distances"""

    nameswidth = textsize * max(map(len, names))
    
    out = svg.Svg(util.open_stream(filename, "w"))
    out.beginSvg(scale * max(dists) + 2*padding,
                 2*padding+nameswidth + 5*distsize)

    
    
    out.beginTransform(("translate", padding, nameswidth+padding))

    # draw ruler
    out.line(0, 0, scale*max(dists), 0)

    for name, dist in zip(names, dists):
        x = scale*dist
        out.text(name, x + textsize/2.0, - labelpadding, textsize, angle=-90)
        out.line(x, notchsize, x, - notchsize)
        out.text("%.3f" % dist, x + textsize/2.0, labelpadding + distsize*3.5,
                 distsize, angle=-90)
    
    out.endTransform()
    out.endSvg()
Example #2
0
def read_log(filename):
    """Reads a DLCoal log"""
    stream = util.open_stream(filename)
    for line in stream:
        if line.startswith("seed:"):
            continue
        yield eval(line, {"inf": util.INF})
 def nextFile(self):
     if len(self.infiles) > 0:
         infile = self.infiles[0]
         self.infiles = self.infiles[1:]
         return util.open_stream(infile)
     else:
         return False
    def test_open_stream2(self):
        """open_stream should close file"""

        # make sure regular files close
        infile = util.open_stream(__file__)
        infile.close()
        assert infile.closed
Example #5
0
    def compute_cost(self, gtree):
        """Returns the deep coalescence cost"""

        # write species tree and gene tree using species map
        treeout = util.open_stream(self.treefile, 'w')
        self.stree.write(treeout, oneline=True, writeData=lambda x: "")
        treeout.write('\n')
        gtree.write(treeout, namefunc=lambda name: self.gene2species(name),
                    oneline=True, writeData=lambda x: "")
        treeout.write('\n')
        treeout.close()

        # execute command
        proc = subprocess.Popen([cmd,
                                 '-i', self.treefile],
                                stdout=subprocess.PIPE,
                                stderr=subprocess.STDOUT,
                                universal_newlines=True)
        ret = proc.wait()
        if ret != 0:
            raise Exception("genetreereport failed with returncode %d" % ret)

        # parse output
        cost = None
        for line in proc.stdout:
            toks = line.split(':')
            if toks[0] == "deep coalecense":
                cost = int(toks[1])
                break
        assert cost is not None

        return cost
    def optimize_model(self, gtree, aln):
        """Optimizes the IQTREE model"""
	
        fd, btreefile = tempfile.mkstemp('.btree')
        os.close(fd)
        gtree.write(btreefile)

        fd, seqfile = tempfile.mkstemp('.align')
        os.close(fd)
        out = util.open_stream(seqfile, "w")
        phylip.write_phylip_align(out, aln, strip_names=False)
        out.close()
        self.seqfile = seqfile

        fd, bsitelhfile = tempfile.mkstemp('.bsitelh')
        os.close(fd)
	
        os.system('iqtree-omp -redo -nt %s -m %s -st %s -s %s -te %s -pre %s.treefix_tmp -wsl > /dev/null' % (self.cpu, self.model, self.type, self.seqfile, btreefile, self.pre))
        
        f = open("%s.treefix_tmp.sitelh" % self.pre, 'r')
        self.bsitelh = f.readline().replace("1", "2", 1) + f.readline().replace("Site_Lh", "Tree1", 1)
        f.close()

        os.system('rm %s.treefix_tmp.*' % self.pre)
        os.remove(btreefile)
Example #7
0
    def compute_cost(self, gtree):
        """Returns the DTL cost"""

        # write species tree and gene tree using species map
        treeout = util.open_stream(self.treefile, 'w')
        self.stree.write(treeout, oneline=True)
        treeout.write('\n')
        gtree.write(treeout, namefunc=lambda name: self.gene2species(name), oneline=True)
        treeout.write('\n')
        treeout.close()

        # execute command
        proc = subprocess.Popen([cmd,
                                 '-i', self.treefile,
                                 '-D', str(self.dupcost),
                                 '-T', str(self.transfercost),
                                 '-L', str(self.losscost)],
                                stdout=subprocess.PIPE,
                                stderr=subprocess.STDOUT,
                                universal_newlines=True)
        ret = proc.wait()
        if ret != 0:
            raise Exception("DTL failed with returncode %d" % ret)

        # parse output
        cost = None
        for line in proc.stdout.:
            toks = line.split(':')
            if toks[0] == "The minimum reconciliation cost is":
                cost = int(toks[1])
                break
        assert cost is not None

        return cost
def iterPfam(filename):
    infile = util.open_stream(filename)

    def getQuery(infile):
        for line in infile:
            if line.startswith("Query sequence"):
                name = line.rstrip().replace("Query sequence: ", "")
                return name

    def getDomains(infile):
        domains = []

        for line in infile:
            if line.startswith("Parsed for domains:"):
                break

        infile.next()   # skip header 1
        infile.next()   # skip header 2

        for line in infile:
            if len(line) <= 1 or line[0] in "\t ":
                break
            domains.append(Domain(line))

        return domains

    while True:
        query = getQuery(infile)
        if query is None:
            break

        domains = getDomains(infile)

        yield query, domains
def make_color_legend(filename, colormap, start, end, step, width=100, height=10, display=False):
    from rasmus import util

    if filename is None:
        filename = util.tempfile(".", "colormap", ".svg")
        temp = True
    else:
        temp = False

    s = svg.Svg(util.open_stream(filename, "w"))
    s.beginSvg(width, height)

    xscale = float(width) / (end + step - start)

    for i in util.frange(start, end + step, step):
        color = colormap.get(i)
        s.rect((i - start) * xscale, 0, step * xscale, height, color, color)

    s.endSvg()
    s.close()

    # display
    if display:
        os.system("display %s" % filename)

    # clean up temp files
    if temp:
        os.remove(filename)
Example #10
0
 def write(self, filename=sys.stdout, delim="\t"):
     """Write a table to a file or stream.
        
        If 'filename' is a string it will be opened as a file.
        If 'filename' is a stream it will be written to directly.
     """
     
     # remember filename for later saving
     if isinstance(filename, str):
         self.filename = filename
 
     out = util.open_stream(filename, "w")
     
     self.write_header(out, delim=delim)
     
     # tmp variable
     types = self.types
     
     # write data
     for row in self:
         # code is inlined here for speed
         rowstr = []
         for header in self.headers:
             if header in row:
                 rowstr.append(types[header].__str__(row[header]))
             else:
                 rowstr.append('')
         print >>out, delim.join(rowstr)
Example #11
0
def write_fasta_ordered(filename, names, seqs, width=None):
    """Write a FASTA in array style to a file"""
    
    out = util.open_stream(filename, "w")
    
    for name, seq in izip(names, seqs):
        print >>out, ">%s" % name
        util.printwrap(seq, width, out=out)
Example #12
0
def write_boot_trees(filename, trees, counts=None):
    out = util.open_stream(filename, "w")
    
    if counts == None:
        counts = [1] * len(trees)
    
    for tree, count in zip(trees, counts):
        for i in range(count):
            out.write(tree.get_one_line_newick() + "\n")
Example #13
0
    def recon_root(self, gtree, newCopy=True, returnCost=False):
        """
        Returns the rerooted tree with min deep coalescence cost
        Generalizes compute_cost to multiple trees.
        """

        # write species tree and gene tree using species map
        treeout = util.open_stream(self.treefile, 'w')
        self.stree.write(treeout, oneline=True, writeData=lambda x: "")
        treeout.write('\n')
        edges = []
        for gtree, edge in self._reroot_helper(gtree, newCopy=newCopy, returnEdge=True):
            gtree.write(treeout, namefunc=lambda name: self.gene2species(name),
                        oneline=True, writeData=lambda x: "")
            treeout.write('\n')
            edges.append(edge)
        treeout.close()

        # execute command
        proc = subprocess.Popen([cmd,
                                 '-i', self.treefile],
                                stdout=subprocess.PIPE,
                                stderr=subprocess.STDOUT,
                                universal_newlines=True)
        ret = proc.wait()
        if ret != 0:
            raise Exception("genetreereport failed with returncode %d" % ret)

        # parse output
        i = None
        n = len(edges)
        costs = [None]*n
        for line in proc.stdout:
            m = re.match("\[ gene tree #(\d+) \]", line)
            if m:
                i = int(m.groups()[0]) - 1

            if i is not None:
                m = re.match("\[ deep coalecense: (\d+) \]", line)
                if m:
                    costs[i] = int(m.groups()[0])
        assert all(map(lambda x: x is not None, costs))

        # find minimum cost tree
        ndx, mincost = min(enumerate(costs), key=lambda it:it[1])
        minroot = edges[ndx]
        if edge != minroot:
            node1, node2 = minroot
            if node1.parent != node2:
                node1, node2 = node2, node1
            assert node1.parent == node2
            treelib.reroot(gtree, node1.name, newCopy=False, keepName=True)

        if returnCost:
            return gtree, mincost
        else:
            return gtree
Example #14
0
    def recon_root(self, gtree, newCopy=True, returnCost=False):
        """
        Returns the rerooted tree with min DTL cost
        Generalizes compute_cost to multiple trees.
        """

        # write species tree and gene tree using species map
        treeout = util.open_stream(self.treefile, 'w')
        self.stree.write(treeout, oneline=True)
        treeout.write('\n')
        edges = []
        for gtree, edge in self._reroot_helper(gtree, newCopy=newCopy, returnEdge=True):
            gtree.write(treeout, namefunc=lambda name: self.gene2species(name), oneline=True)
            treeout.write('\n')
            edges.append(edge)
        treeout.close()

        # execute command
        proc = subprocess.Popen([cmd,
                                 '-i', self.treefile,
                                 '-D', str(self.dupcost),
                                 '-T', str(self.transfercost),
                                 '-L', str(self.losscost)],
                                stdout=subprocess.PIPE,
                                stderr=subprocess.STDOUT,
                                universal_newlines=True)
        ret = proc.wait()
        if ret != 0:
            raise Exception("DTL failed with returncode %d" % ret)

        # parse output
        i = 0
        n = len(edges)
        costs = [None]*n
        for line in proc.stdout:
            toks = line.split(':')
            if toks[0] == "The minimum reconciliation cost is":
                assert i < n
                costs[i] = int(toks[1])
                i += 1
        assert all(map(lambda x: x is not None, costs))

        # find minimum cost tree
        ndx, mincost = min(enumerate(costs), key=lambda it:it[1])
        minroot = edges[ndx]
        if edge != minroot:
            node1, node2 = minroot
            if node1.parent != node2:
                node1, node2 = node2, node1
            assert node1.parent == node2
            treelib.reroot(gtree, node1.name, newCopy=False, keepName=True)

        if returnCost:
            return gtree, mincost
        else:
            return gtree
Example #15
0
 def write(self, filename=sys.stdout, names=None, width=80):
     """Write sequences in Fasta format"""
     
     out = util.open_stream(filename, "w")
     
     if names is None:
         names = self.names
     
     for key in names:
         print >>out, ">" + key
         util.printwrap(self[key], width, out=out)
    def test_open_stream1(self):
        """open_stream shouldn't close existing stream"""

        infile = util.open_stream(sys.stdin)

        # ensure attribute access
        infile.read

        # make sure file doesn't close
        infile.close()
        assert not sys.stdin.closed
Example #17
0
def make_fasta_index(filename):
    """I also have a faster C program called formatfa"""
    
    infile = util.open_stream(filename)
    
    index = {}
    
    for line in util.SafeReadIter(infile):
        if line.startswith(">"):
            index[line[1:].rstrip()] = infile.tell()
    
    return index
def write_gff(filename, regions, format=GFF3):
    """
    Write regions to a file stream
    
    filename - a filename or file stream
    regions  - a list of Region objects
    """
    
    out = util.open_stream(filename, "w")
    
    for region in regions:
        format.write_region(region, out=out)
Example #19
0
def read_gene2species(* filenames):
    """
    Reads a gene2species file

    Returns a function that will map gene names to species names.
    """
    
    for filename in filenames:
        maps = []
        for filename in filenames:
            maps.extend(util.read_delim(util.skip_comments(
                util.open_stream(filename))))
    return make_gene2species(maps)
Example #20
0
 def optimize_model(self, gtree, aln):
     """Optimizes the IQTREE model"""
      
     fd, btreefile = tempfile.mkstemp('.btree')
     os.close(fd)
     gtree.write(btreefile)
     self.btreefile = btreefile
     
     fd, seqfile = tempfile.mkstemp('.align')
     os.close(fd)
     out = util.open_stream(seqfile, "w")
     phylip.write_phylip_align(out, aln, strip_names=False)
     out.close()
     self.seqfile = seqfile
Example #21
0
def write_dist_matrix(mat, labels=None, out=sys.stdout):
    out = util.open_stream(out, "w")
    
    out.write("%d\n" % len(mat))
    
    for i in range(len(mat)):
        if labels == None:
            out.write("%8s  " % phylip_padding(str(i)))
        else:
            out.write("%8s  " % labels[i])
        
        for val in mat[i]:
            out.write("%10f " % val)
        out.write("\n")
Example #22
0
def readTreeDistrib(filename):
    infile = util.open_stream(filename)
    lengths = {}
    
    for line in infile:
        tokens = line.split("\t")
        name = tokens[0]
        
        if name.isdigit():
            name = int(name)
        
        lengths[name] = map(float, tokens[1:])
    
    return lengths
Example #23
0
def read_tree_color_map(filename):
    infile = util.open_stream(filename)
    maps = []
    
    for line in infile:
        expr, red, green, blue = line.rstrip().split("\t")
        maps.append([expr, map(float, (red, green, blue))])
    
    name2color = phylo.make_gene2species(maps)
    
    def leafmap(node):
        return name2color(node.name)

    return treelib.tree_color_map(leafmap)    
Example #24
0
def writeTreeDistrib(out, lengths):
    out = util.open_stream(out, "w")

    for node, lens in lengths.items():
        if len(lens) == 0 or max(lens) == min(lens):
            continue
        
        if isinstance(node, treelib.TreeNode):
            out.write(str(node.name))
        else:
            out.write(str(node))

        for length in lens:
            out.write("\t%f" % length)
        out.write("\n")
Example #25
0
def iter_fasta(filename, keyfunc=firstword, valuefunc = lambda x: x):
    """Iterate through the sequences of a FASTA file"""
    key = ""
    value = ""
    
    for line in util.open_stream(filename):
        if len(line) > 0 and line[0] == ">":
            if key != "":
                yield (key, valuefunc(value))
            key = keyfunc(line[1:].rstrip())
            value = ""
        elif key != "":
            value += line.rstrip()
    if key != "":
        yield (key, valuefunc(value))
Example #26
0
def make_color_legend(filename, colormap, start, end, step, 
                    width=100, height=10):
    from rasmus import util
    s = svg.Svg(util.open_stream(filename, "w"))    
    s.beginSvg(width, height)
    
    xscale =  float(width) / (end + step - start)
    
    for i in util.frange(start, end + step, step):
        color = colormap.get(i)
        s.rect((i-start) * xscale, 
               0, 
               step*xscale, height, 
               color, color)
    
    s.endSvg()
Example #27
0
def writeMultiBlocks(filename, multiblocks):
    out = util.open_stream(filename, "w")
    
    for multiblock in multiblocks:
        if len(multiblock.segments) > 0:
            out.write("\t".join([multiblock.segments[0].genome.name, 
                                 multiblock.segments[0].chrom.name, 
                                 str(multiblock.segments[0].start), 
                                 str(multiblock.segments[0].end),
                                 str(multiblock.segments[0].direction)]))
            
        for segment in multiblock.segments[1:]:
            out.write("\t")
            out.write("\t".join([segment.genome.name, segment.chrom.name, 
                                 str(segment.start), str(segment.end),
                                 str(segment.direction)]))
        out.write("\n")
Example #28
0
 def read(self, filename):
     # open fasta
     infile = util.open_stream(filename, "rb")
     
     # estimate column width
     self.width = guess_fasta_width(filename)
     if self.width == -1:
         raise Exception("lines do not have consistent width")
     
     # read index
     keys = []
     for key, start, end in util.DelimReader(filename + ".index", delim="\t"):
         keys.append(key)
         self.index[key] = (int(start), int(end))
         self.filelookup[key] = infile
     
     # return keys read
     return keys
Example #29
0
def consense_from_file(intrees, verbose=True, args="y"):

    # read all trees
    trees = util.open_stream(intrees).readlines()
    ntrees = len(trees)

    cwd = create_temp_dir()
    out = open("intree", "w")
    for tree in trees:
        out.write(tree)
    out.close()
    
    exec_phylip("consense", args, verbose)
    
    tree = treelib.read_tree("outtree")
    
    cleanup_temp_dir(cwd)
    return tree, ntrees
Example #30
0
def write_regions(filename, regions, duprange, lossrange):
    out = util.open_stream(filename, 'w')
    print >>out, '\t'.join(map(str, duprange + lossrange))
    for cv, region in regions.iteritems():
        coords = None; area = None
        if isinstance(region, geometry.Polygon):                                              # non-degenerate
            coords = list(region.exterior.coords)
            area = region.area
        elif isinstance(region, geometry.LineString) or isinstance(region, geometry.Point):   # degenerate
            coords = list(region.coords)
            area = region.area
        else:
            raise Exception("count vector (%s) has invalid region (%s)" % (cv, dumps(region)))

        coords = dumps(region)
        toks = (cv, coords, area)
        print >>out, '\t'.join(map(str, toks))
    out.close()