Exemple #1
0
    def populate(self, filehandle):
        # print "populating " + str(self)
        # print "seeking to " + str(self.fileLocation)
        filehandle.seek(self.fileLocation)
        if self.debug:
            sys.stderr.write("populating " + str(self) + "\n")
        for line in filehandle:
            # get next element
            line = line.strip()
            if self.debug:
                sys.stderr.write("\t" + "current line is " + str(line) + "\n")
            line = line.strip()
            if line == "":
                continue
            e = parseWigString(line)

            # we're done if we've left this block's chrom, or if we've moved beyond
            # the end of this blocks boundary.
            if e.chrom != self.chrom or e.start > self.end:
                break
            self.data.append(e)
        if self.debug:
            sys.stderr.write("built tree for " + str(self) + "\n")
        if len(self.data) == 0:
            print "empty! --> " + str(self)
        self.iTree = IntervalTree(self.data, openEnded=True)
Exemple #2
0
    def populate(self, filehandle):
        #print "populating " + str(self)
        #print "seeking to " + str(self.fileLocation)
        filehandle.seek(self.fileLocation)
        if self.debug:
            sys.stderr.write("populating " + str(self) + "\n")
        for line in filehandle:
            # get next element
            line = line.strip()
            if self.debug:
                sys.stderr.write("\t" + "current line is " + str(line) + "\n")
            line = line.strip()
            if line == "":
                continue
            e = parseWigString(line)

            # we're done if we've left this block's chrom, or if we've moved beyond
            # the end of this blocks boundary.
            if e.chrom != self.chrom or e.start > self.end:
                break
            self.data.append(e)
        if self.debug:
            sys.stderr.write("built tree for " + str(self) + "\n")
        if len(self.data) == 0:
            print "empty! --> " + str(self)
        self.iTree = IntervalTree(self.data, openEnded=True)
Exemple #3
0
def regularWigIterator(fd, verbose=False, sortedby=None, scoreType=int):
    """
    @param sortedBy: if not None, should be one of ITERATOR_SORTED_BY_START
                     indicating an order that the input stream must be
                     sorted in
    @raise WigIteratorError: if sortedBy is set and stream is not sorted
  """
    if verbose:
        try:
            totalLines = linesInFile(fd)
            pind = ProgressIndicator(totalToDo=totalLines,
                                     messagePrefix="completed",
                                     messageSuffix="of processing " +
                                     getFDName(fd))
        except AttributeError:
            sys.stderr.write("WigIterator -- warning: " +
                             "unable to show progress for stream")
            verbose = False

    chromsSeen = set()
    prev = None

    fh = openFD(fd)
    for line in fh:
        if verbose:
            pind.done += 1
            pind.showProgress()

        line = line.strip()
        if line == "":
            continue
        e = parseWigString(line, scoreType=scoreType)

        # on same chrom as the prev item, make sure order is right
        if prev is not None and sortedby is not None and e.chrom == prev.chrom:
            if sortedby == ITERATOR_SORTED_START and prev.start > e.start:
                raise WigIteratorError(
                    "bed file " + fd.name +
                    " not sorted by start index - saw item " + str(prev) +
                    " before " + str(e))

        # starting a new chrom.. make sure we haven't already seen it
        if prev is not None and prev.chrom != e.chrom:
            if (sortedby == ITERATOR_SORTED_START) and\
               (e.chrom in chromsSeen or prev.chrom > e.chrom):
                raise WigIteratorError("BED file " + fd.name +
                                       " not sorted by chrom")
            chromsSeen.add(e.chrom)

        # all good..
        yield e
        prev = e
Exemple #4
0
def regularWigIterator(fd, verbose=False, sortedby=None, scoreType=int):
  """
    @param sortedBy: if not None, should be one of ITERATOR_SORTED_BY_START
                     indicating an order that the input stream must be
                     sorted in
    @raise WigIteratorError: if sortedBy is set and stream is not sorted
  """
  if verbose:
    try:
      totalLines = linesInFile(fd)
      pind = ProgressIndicator(totalToDo=totalLines,
                               messagePrefix="completed",
                               messageSuffix="of processing " + getFDName(fd))
    except AttributeError:
      sys.stderr.write("WigIterator -- warning: "
                       + "unable to show progress for stream")
      verbose = False

  chromsSeen = set()
  prev = None

  fh = openFD(fd)
  for line in fh:
    if verbose:
      pind.done += 1
      pind.showProgress()

    line = line.strip()
    if line == "":
      continue
    e = parseWigString(line, scoreType=scoreType)

    # on same chrom as the prev item, make sure order is right
    if prev is not None and sortedby is not None and e.chrom == prev.chrom:
      if sortedby == ITERATOR_SORTED_START and prev.start > e.start:
        raise WigIteratorError("bed file " + fd.name
                               + " not sorted by start index - saw item "
                               + str(prev) + " before " + str(e))

    # starting a new chrom.. make sure we haven't already seen it
    if prev is not None and prev.chrom != e.chrom:
      if (sortedby == ITERATOR_SORTED_START) and\
         (e.chrom in chromsSeen or prev.chrom > e.chrom):
        raise WigIteratorError("BED file " + fd.name
                               + " not sorted by chrom")
      chromsSeen.add(e.chrom)

    # all good..
    yield e
    prev = e
Exemple #5
0
    def testRecall(self):
        """
      @summary: here we're just testing that we can get back what we put in
                to the IndexedWig object in a random order
    """
        debug = False
        infh = DummyInputStream("\n".join(self.input))
        answers = [parseWigString(l) for l in self.input if l.strip() != ""]
        shuffle(answers)

        iwig = IndexedWig(infh, 2, 2, debug, verbose=False)
        if debug:
            sys.stderr.write("iwig structure is: \n" + str(iwig) + "\n")
        print "done"
        for e in answers:
            ans = iwig.lookup(e.chrom, e.start)
            if debug:
                sys.stderr.write("expect: " + str(e.score) + ", got: " + str(ans) + "\n")
            assert e.score == ans.score
Exemple #6
0
    def testRecall(self):
        """
      @summary: here we're just testing that we can get back what we put in
                to the IndexedWig object in a random order
    """
        debug = False
        infh = DummyInputStream("\n".join(self.input))
        answers = [parseWigString(l) for l in self.input if l.strip() != ""]
        shuffle(answers)

        iwig = IndexedWig(infh, 2, 2, debug, verbose=False)
        if debug:
            sys.stderr.write("iwig structure is: \n" + str(iwig) + "\n")
        print "done"
        for e in answers:
            ans = iwig.lookup(e.chrom, e.start)
            if debug:
                sys.stderr.write("expect: " + str(e.score) + ", got: " +
                                 str(ans) + "\n")
            assert (e.score == ans.score)
Exemple #7
0
    def build(self):
        currentBlock = None
        at = self.handle.tell()
        seenChroms = set()
        lastIndexSeen = -1

        if self.verbose:
            try:
                pind = ProgressIndicator(
                    totalToDo=os.path.getsize(self.handle.name),
                    messagePrefix="completed",
                    messageSuffix="of building index for " + self.handle.name,
                )
            except:
                sys.stderr.write("IndexedWig -- warning: " + "unable to show progress for stream\n")
                self.verbose = False

        ### note, for loop seems to buffer the file and so tell() gives a
        ### location that is not where the current line was read from, so
        ### we stick to readline instead.
        rline = None
        while rline != "":
            # get the next element
            rline = self.handle.readline()
            line = rline.strip()
            if line == "":
                continue
            e = parseWigString(line)

            # keep track of what chroms we've seen for checking order
            if not e.chrom in seenChroms:
                seenChroms.add(e.chrom)
                lastIndexSeen = -1

            # check chrom order is ok
            for seenChrom in seenChroms:
                if seenChrom > e.chrom:
                    msg = (
                        "wig file is not sorted, entry for chrom "
                        + str(seenChrom)
                        + " appears after entry for "
                        + str(e.chrom)
                    )
                    raise IndexedWigError(msg)
            # check position order is ok
            if e.start < lastIndexSeen:
                msg = (
                    "wig file is not sorted, entry for chrom "
                    + str(e.chrom)
                    + " at "
                    + str(e.start)
                    + " appears after "
                    + str(lastIndexSeen)
                )
                raise IndexedWigError(msg)

            # update the last index we've seen
            lastIndexSeen = e.end

            # debugging message if the current block is full
            if self.debug is True:
                sys.stderr.write("processing " + str(e))
                if currentBlock is not None:
                    sys.stderr.write("; is current block full?" + str(currentBlock.isfull()) + "\n")
                else:
                    sys.stderr.write("\n")

            # we might need to make a new block for this element
            if currentBlock is None or currentBlock.isfull() or currentBlock.chrom != e.chrom:
                if self.debug:
                    sys.stderr.write("making new block with " + str(e) + "\n")
                if currentBlock is not None:
                    if self.debug:
                        sys.stderr.write("closed block: " + str(currentBlock) + "\n")
                    if currentBlock.chrom not in self.blocksByChrom:
                        self.blocksByChrom[currentBlock.chrom] = []
                    self.blocksByChrom[currentBlock.chrom].append(currentBlock)
                currentBlock = WigBlock(at, e, self.blocksize)

            # add the element to the current block
            currentBlock.add(e)

            at = self.handle.tell()

            if self.verbose:
                pind.done = self.handle.tell()
                pind.showProgress()

        # don't forget to add the final block
        if currentBlock != None:
            if self.debug:
                sys.stderr.write("closed block: " + str(currentBlock) + "\n")
            if currentBlock.chrom not in self.blocksByChrom:
                self.blocksByChrom[currentBlock.chrom] = []
            self.blocksByChrom[currentBlock.chrom].append(currentBlock)

        # build the interval trees
        for chrom in self.blocksByChrom:
            self.itrees[chrom] = IntervalTree(self.blocksByChrom[chrom], openEnded=True)
Exemple #8
0
    def build(self):
        currentBlock = None
        at = self.handle.tell()
        seenChroms = set()
        lastIndexSeen = -1

        if self.verbose:
            try:
                pind = ProgressIndicator(
                    totalToDo=os.path.getsize(self.handle.name),
                    messagePrefix="completed",
                    messageSuffix="of building index for " + self.handle.name)
            except:
                sys.stderr.write("IndexedWig -- warning: " +
                                 "unable to show progress for stream\n")
                self.verbose = False

        ### note, for loop seems to buffer the file and so tell() gives a
        ### location that is not where the current line was read from, so
        ### we stick to readline instead.
        rline = None
        while rline != "":
            # get the next element
            rline = self.handle.readline()
            line = rline.strip()
            if line == "": continue
            e = parseWigString(line)

            # keep track of what chroms we've seen for checking order
            if not e.chrom in seenChroms:
                seenChroms.add(e.chrom)
                lastIndexSeen = -1

            # check chrom order is ok
            for seenChrom in seenChroms:
                if seenChrom > e.chrom:
                    msg = "wig file is not sorted, entry for chrom " + str(seenChrom) +\
                          " appears after entry for " + str(e.chrom)
                    raise IndexedWigError(msg)
            # check position order is ok
            if e.start < lastIndexSeen:
                msg = "wig file is not sorted, entry for chrom " + str(e.chrom) +\
                      " at " + str(e.start) + " appears after " + str(lastIndexSeen)
                raise IndexedWigError(msg)

            # update the last index we've seen
            lastIndexSeen = e.end

            # debugging message if the current block is full
            if self.debug is True:
                sys.stderr.write("processing " + str(e))
                if currentBlock is not None:
                    sys.stderr.write("; is current block full?" +
                                     str(currentBlock.isfull()) + "\n")
                else:
                    sys.stderr.write("\n")

            # we might need to make a new block for this element
            if currentBlock is None or currentBlock.isfull() or \
               currentBlock.chrom != e.chrom:
                if self.debug:
                    sys.stderr.write("making new block with " + str(e) + "\n")
                if currentBlock is not None:
                    if self.debug:
                        sys.stderr.write("closed block: " + str(currentBlock) +
                                         "\n")
                    if currentBlock.chrom not in self.blocksByChrom:
                        self.blocksByChrom[currentBlock.chrom] = []
                    self.blocksByChrom[currentBlock.chrom].append(currentBlock)
                currentBlock = WigBlock(at, e, self.blocksize)

            # add the element to the current block
            currentBlock.add(e)

            at = self.handle.tell()

            if self.verbose:
                pind.done = self.handle.tell()
                pind.showProgress()

        # don't forget to add the final block
        if currentBlock != None:
            if self.debug:
                sys.stderr.write("closed block: " + str(currentBlock) + "\n")
            if currentBlock.chrom not in self.blocksByChrom:
                self.blocksByChrom[currentBlock.chrom] = []
            self.blocksByChrom[currentBlock.chrom].append(currentBlock)

        # build the interval trees
        for chrom in self.blocksByChrom:
            self.itrees[chrom] = IntervalTree(self.blocksByChrom[chrom],
                                              openEnded=True)