Example #1
0
def _build_trees_by_chrom(blocks, verbose=False):
  """
  Construct set of interval trees from an iterable of genome alignment blocks.

  :return: a dictionary indexed by chromosome name where each entry is an
           interval tree for that chromosome.
  """
  if verbose:
    sys.stderr.write("separating blocks by chromosome... ")
  by_chrom = {}
  for b in blocks:
    if b.chrom not in by_chrom:
      by_chrom[b.chrom] = []
    by_chrom[b.chrom].append(b)
  if verbose:
    sys.stderr.write("done\n")

  if verbose:
    sys.stderr.write("building interval trees by chromosome... ")
  res = {}
  for c in by_chrom:
    res[c] = IntervalTree(by_chrom[c], openEnded=True)
  if verbose:
    sys.stderr.write("done\n")

  return res
Example #2
0
    def populate(self, filehandle):
        #print "populating " + str(self)
        #print "seeking to " + str(self.fileLocation)
        filehandle.seek(self.fileLocation)
        if self.debug:
            sys.stderr.write("populating " + str(self) + "\n")
        for line in filehandle:
            # get next element
            line = line.strip()
            if self.debug:
                sys.stderr.write("\t" + "current line is " + str(line) + "\n")
            line = line.strip()
            if line == "":
                continue
            e = parseWigString(line)

            # we're done if we've left this block's chrom, or if we've moved beyond
            # the end of this blocks boundary.
            if e.chrom != self.chrom or e.start > self.end:
                break
            self.data.append(e)
        if self.debug:
            sys.stderr.write("built tree for " + str(self) + "\n")
        if len(self.data) == 0:
            print "empty! --> " + str(self)
        self.iTree = IntervalTree(self.data, openEnded=True)
Example #3
0
 def __load(self, verbose=False):
     """
   @summary: load the contents of a wig file into this object
 """
     byChrom = {}
     for e in wigIterator(self.filename, verbose=verbose):
         if e.chrom not in byChrom:
             byChrom[e.chrom] = []
         byChrom[e.chrom].append(e)
     for chrom in byChrom:
         self.itrees[chrom] = IntervalTree(byChrom[chrom], openEnded=True)
Example #4
0
 def _buildTree(self, weights, candidates):
     """
   @summary: build interval tree from cumulative weights
 """
     intervals = []
     total = 0.0
     for i in range(0, len(weights)):
         weight = weights[i]
         obj = candidates[i]
         start = total
         end = total + weight
         intervals.append(WeightedRandom.Interval(start, end, obj))
         total = total + weight
     return IntervalTree(intervals)
Example #5
0
def intervalTrees(reffh, scoreType=int, verbose=False):
    """
  Build a dictionary of interval trees indexed by chrom from a BED stream or
  file

  :param reffh: This can be either a string, or a stream-like object. In the
                former case, it is treated as a filename. The format of the
                file/stream must be BED.
  :param scoreType: The data type for scores (the fifth column) in the BED
                    file.
  :param verbose: output progress messages to sys.stderr if True
  """
    if type(reffh).__name__ == "str":
        fh = open(reffh)
    else:
        fh = reffh

    # load all the regions and split them into lists for each chrom
    elements = {}
    if verbose and fh != sys.stdin:
        totalLines = linesInFile(fh.name)
        pind = ProgressIndicator(totalToDo=totalLines,
                                 messagePrefix="completed",
                                 messageSuffix="of loading " + fh.name)
    for element in BEDIterator(fh, scoreType=scoreType, verbose=verbose):
        if element.chrom not in elements:
            elements[element.chrom] = []
        elements[element.chrom].append(element)
        if verbose and fh != sys.stdin:
            pind.done += 1
            pind.showProgress()

    # create an interval tree for each list
    trees = {}
    if verbose:
        totalLines = len(elements)
        pind = ProgressIndicator(totalToDo=totalLines,
                                 messagePrefix="completed",
                                 messageSuffix="of making interval trees")
    for chrom in elements:
        trees[chrom] = IntervalTree(elements[chrom], openEnded=True)
        if verbose:
            pind.done += 1
            pind.showProgress()

    return trees
Example #6
0
 def __init__(self, whole_chrom_files, partial_chrom_files, factory):
   """Constructor; see class docsstring for param details."""
   self.current = None
   self.current_key = None
   self.factory = factory
   self.whole_chrom_files = whole_chrom_files
   self.partial_trees = {}
   by_chrom = {}
   for chrom, start, end in partial_chrom_files:
     k = (chrom, start, end)
     v = partial_chrom_files[k]
     if chrom in whole_chrom_files:
       raise GenomeAlignmentError("Oops")
     if chrom not in by_chrom:
       by_chrom[chrom] = []
     interval = GenomicInterval(chrom, start, end)
     by_chrom[chrom].append(JITGenomeAlignmentKeyInterval(interval, v))
   for chrom in by_chrom:
     self.partial_trees[chrom] = IntervalTree(by_chrom[chrom])
   for chrom, start, end in partial_chrom_files:
     hits = self.partial_trees[chrom].intersectingInterval(start, end)
     if len(hits) != 1:
       raise GenomeAlignmentError("Oops")
Example #7
0
def intervalTreesFromList(inElements, verbose=False, openEnded=False):
    """
  build a dictionary, indexed by chrom name, of interval trees for each chrom.

  :param inElements: list of genomic intervals. Members of the list must have
                     chrom, start and end fields; no other restrictions.
  :param verbose: output progress messages to sys.stderr if True
  """
    elements = {}
    if verbose:
        totalLines = len(inElements)
        pind = ProgressIndicator(totalToDo=totalLines,
                                 messagePrefix="completed",
                                 messageSuffix="of parsing")

    for element in inElements:
        if element.chrom not in elements:
            elements[element.chrom] = []
        elements[element.chrom].append(element)
        if verbose:
            pind.done += 1
            pind.showProgress()

    # create an interval tree for each list
    trees = {}
    if verbose:
        totalLines = len(elements)
        pind = ProgressIndicator(totalToDo=totalLines,
                                 messagePrefix="completed",
                                 messageSuffix="of making interval trees")
    for chrom in elements:
        trees[chrom] = IntervalTree(elements[chrom], openEnded)
        if verbose:
            pind.done += 1
            pind.showProgress()

    return trees
Example #8
0
    def build(self):
        currentBlock = None
        at = self.handle.tell()
        seenChroms = set()
        lastIndexSeen = -1

        if self.verbose:
            try:
                pind = ProgressIndicator(
                    totalToDo=os.path.getsize(self.handle.name),
                    messagePrefix="completed",
                    messageSuffix="of building index for " + self.handle.name)
            except:
                sys.stderr.write("IndexedWig -- warning: " +
                                 "unable to show progress for stream\n")
                self.verbose = False

        ### note, for loop seems to buffer the file and so tell() gives a
        ### location that is not where the current line was read from, so
        ### we stick to readline instead.
        rline = None
        while rline != "":
            # get the next element
            rline = self.handle.readline()
            line = rline.strip()
            if line == "": continue
            e = parseWigString(line)

            # keep track of what chroms we've seen for checking order
            if not e.chrom in seenChroms:
                seenChroms.add(e.chrom)
                lastIndexSeen = -1

            # check chrom order is ok
            for seenChrom in seenChroms:
                if seenChrom > e.chrom:
                    msg = "wig file is not sorted, entry for chrom " + str(seenChrom) +\
                          " appears after entry for " + str(e.chrom)
                    raise IndexedWigError(msg)
            # check position order is ok
            if e.start < lastIndexSeen:
                msg = "wig file is not sorted, entry for chrom " + str(e.chrom) +\
                      " at " + str(e.start) + " appears after " + str(lastIndexSeen)
                raise IndexedWigError(msg)

            # update the last index we've seen
            lastIndexSeen = e.end

            # debugging message if the current block is full
            if self.debug is True:
                sys.stderr.write("processing " + str(e))
                if currentBlock is not None:
                    sys.stderr.write("; is current block full?" +
                                     str(currentBlock.isfull()) + "\n")
                else:
                    sys.stderr.write("\n")

            # we might need to make a new block for this element
            if currentBlock is None or currentBlock.isfull() or \
               currentBlock.chrom != e.chrom:
                if self.debug:
                    sys.stderr.write("making new block with " + str(e) + "\n")
                if currentBlock is not None:
                    if self.debug:
                        sys.stderr.write("closed block: " + str(currentBlock) +
                                         "\n")
                    if currentBlock.chrom not in self.blocksByChrom:
                        self.blocksByChrom[currentBlock.chrom] = []
                    self.blocksByChrom[currentBlock.chrom].append(currentBlock)
                currentBlock = WigBlock(at, e, self.blocksize)

            # add the element to the current block
            currentBlock.add(e)

            at = self.handle.tell()

            if self.verbose:
                pind.done = self.handle.tell()
                pind.showProgress()

        # don't forget to add the final block
        if currentBlock != None:
            if self.debug:
                sys.stderr.write("closed block: " + str(currentBlock) + "\n")
            if currentBlock.chrom not in self.blocksByChrom:
                self.blocksByChrom[currentBlock.chrom] = []
            self.blocksByChrom[currentBlock.chrom].append(currentBlock)

        # build the interval trees
        for chrom in self.blocksByChrom:
            self.itrees[chrom] = IntervalTree(self.blocksByChrom[chrom],
                                              openEnded=True)