def populate(self, filehandle): # print "populating " + str(self) # print "seeking to " + str(self.fileLocation) filehandle.seek(self.fileLocation) if self.debug: sys.stderr.write("populating " + str(self) + "\n") for line in filehandle: # get next element line = line.strip() if self.debug: sys.stderr.write("\t" + "current line is " + str(line) + "\n") line = line.strip() if line == "": continue e = parseWigString(line) # we're done if we've left this block's chrom, or if we've moved beyond # the end of this blocks boundary. if e.chrom != self.chrom or e.start > self.end: break self.data.append(e) if self.debug: sys.stderr.write("built tree for " + str(self) + "\n") if len(self.data) == 0: print "empty! --> " + str(self) self.iTree = IntervalTree(self.data, openEnded=True)
def populate(self, filehandle): #print "populating " + str(self) #print "seeking to " + str(self.fileLocation) filehandle.seek(self.fileLocation) if self.debug: sys.stderr.write("populating " + str(self) + "\n") for line in filehandle: # get next element line = line.strip() if self.debug: sys.stderr.write("\t" + "current line is " + str(line) + "\n") line = line.strip() if line == "": continue e = parseWigString(line) # we're done if we've left this block's chrom, or if we've moved beyond # the end of this blocks boundary. if e.chrom != self.chrom or e.start > self.end: break self.data.append(e) if self.debug: sys.stderr.write("built tree for " + str(self) + "\n") if len(self.data) == 0: print "empty! --> " + str(self) self.iTree = IntervalTree(self.data, openEnded=True)
def regularWigIterator(fd, verbose=False, sortedby=None, scoreType=int): """ @param sortedBy: if not None, should be one of ITERATOR_SORTED_BY_START indicating an order that the input stream must be sorted in @raise WigIteratorError: if sortedBy is set and stream is not sorted """ if verbose: try: totalLines = linesInFile(fd) pind = ProgressIndicator(totalToDo=totalLines, messagePrefix="completed", messageSuffix="of processing " + getFDName(fd)) except AttributeError: sys.stderr.write("WigIterator -- warning: " + "unable to show progress for stream") verbose = False chromsSeen = set() prev = None fh = openFD(fd) for line in fh: if verbose: pind.done += 1 pind.showProgress() line = line.strip() if line == "": continue e = parseWigString(line, scoreType=scoreType) # on same chrom as the prev item, make sure order is right if prev is not None and sortedby is not None and e.chrom == prev.chrom: if sortedby == ITERATOR_SORTED_START and prev.start > e.start: raise WigIteratorError( "bed file " + fd.name + " not sorted by start index - saw item " + str(prev) + " before " + str(e)) # starting a new chrom.. make sure we haven't already seen it if prev is not None and prev.chrom != e.chrom: if (sortedby == ITERATOR_SORTED_START) and\ (e.chrom in chromsSeen or prev.chrom > e.chrom): raise WigIteratorError("BED file " + fd.name + " not sorted by chrom") chromsSeen.add(e.chrom) # all good.. yield e prev = e
def regularWigIterator(fd, verbose=False, sortedby=None, scoreType=int): """ @param sortedBy: if not None, should be one of ITERATOR_SORTED_BY_START indicating an order that the input stream must be sorted in @raise WigIteratorError: if sortedBy is set and stream is not sorted """ if verbose: try: totalLines = linesInFile(fd) pind = ProgressIndicator(totalToDo=totalLines, messagePrefix="completed", messageSuffix="of processing " + getFDName(fd)) except AttributeError: sys.stderr.write("WigIterator -- warning: " + "unable to show progress for stream") verbose = False chromsSeen = set() prev = None fh = openFD(fd) for line in fh: if verbose: pind.done += 1 pind.showProgress() line = line.strip() if line == "": continue e = parseWigString(line, scoreType=scoreType) # on same chrom as the prev item, make sure order is right if prev is not None and sortedby is not None and e.chrom == prev.chrom: if sortedby == ITERATOR_SORTED_START and prev.start > e.start: raise WigIteratorError("bed file " + fd.name + " not sorted by start index - saw item " + str(prev) + " before " + str(e)) # starting a new chrom.. make sure we haven't already seen it if prev is not None and prev.chrom != e.chrom: if (sortedby == ITERATOR_SORTED_START) and\ (e.chrom in chromsSeen or prev.chrom > e.chrom): raise WigIteratorError("BED file " + fd.name + " not sorted by chrom") chromsSeen.add(e.chrom) # all good.. yield e prev = e
def testRecall(self): """ @summary: here we're just testing that we can get back what we put in to the IndexedWig object in a random order """ debug = False infh = DummyInputStream("\n".join(self.input)) answers = [parseWigString(l) for l in self.input if l.strip() != ""] shuffle(answers) iwig = IndexedWig(infh, 2, 2, debug, verbose=False) if debug: sys.stderr.write("iwig structure is: \n" + str(iwig) + "\n") print "done" for e in answers: ans = iwig.lookup(e.chrom, e.start) if debug: sys.stderr.write("expect: " + str(e.score) + ", got: " + str(ans) + "\n") assert e.score == ans.score
def testRecall(self): """ @summary: here we're just testing that we can get back what we put in to the IndexedWig object in a random order """ debug = False infh = DummyInputStream("\n".join(self.input)) answers = [parseWigString(l) for l in self.input if l.strip() != ""] shuffle(answers) iwig = IndexedWig(infh, 2, 2, debug, verbose=False) if debug: sys.stderr.write("iwig structure is: \n" + str(iwig) + "\n") print "done" for e in answers: ans = iwig.lookup(e.chrom, e.start) if debug: sys.stderr.write("expect: " + str(e.score) + ", got: " + str(ans) + "\n") assert (e.score == ans.score)
def build(self): currentBlock = None at = self.handle.tell() seenChroms = set() lastIndexSeen = -1 if self.verbose: try: pind = ProgressIndicator( totalToDo=os.path.getsize(self.handle.name), messagePrefix="completed", messageSuffix="of building index for " + self.handle.name, ) except: sys.stderr.write("IndexedWig -- warning: " + "unable to show progress for stream\n") self.verbose = False ### note, for loop seems to buffer the file and so tell() gives a ### location that is not where the current line was read from, so ### we stick to readline instead. rline = None while rline != "": # get the next element rline = self.handle.readline() line = rline.strip() if line == "": continue e = parseWigString(line) # keep track of what chroms we've seen for checking order if not e.chrom in seenChroms: seenChroms.add(e.chrom) lastIndexSeen = -1 # check chrom order is ok for seenChrom in seenChroms: if seenChrom > e.chrom: msg = ( "wig file is not sorted, entry for chrom " + str(seenChrom) + " appears after entry for " + str(e.chrom) ) raise IndexedWigError(msg) # check position order is ok if e.start < lastIndexSeen: msg = ( "wig file is not sorted, entry for chrom " + str(e.chrom) + " at " + str(e.start) + " appears after " + str(lastIndexSeen) ) raise IndexedWigError(msg) # update the last index we've seen lastIndexSeen = e.end # debugging message if the current block is full if self.debug is True: sys.stderr.write("processing " + str(e)) if currentBlock is not None: sys.stderr.write("; is current block full?" + str(currentBlock.isfull()) + "\n") else: sys.stderr.write("\n") # we might need to make a new block for this element if currentBlock is None or currentBlock.isfull() or currentBlock.chrom != e.chrom: if self.debug: sys.stderr.write("making new block with " + str(e) + "\n") if currentBlock is not None: if self.debug: sys.stderr.write("closed block: " + str(currentBlock) + "\n") if currentBlock.chrom not in self.blocksByChrom: self.blocksByChrom[currentBlock.chrom] = [] self.blocksByChrom[currentBlock.chrom].append(currentBlock) currentBlock = WigBlock(at, e, self.blocksize) # add the element to the current block currentBlock.add(e) at = self.handle.tell() if self.verbose: pind.done = self.handle.tell() pind.showProgress() # don't forget to add the final block if currentBlock != None: if self.debug: sys.stderr.write("closed block: " + str(currentBlock) + "\n") if currentBlock.chrom not in self.blocksByChrom: self.blocksByChrom[currentBlock.chrom] = [] self.blocksByChrom[currentBlock.chrom].append(currentBlock) # build the interval trees for chrom in self.blocksByChrom: self.itrees[chrom] = IntervalTree(self.blocksByChrom[chrom], openEnded=True)
def build(self): currentBlock = None at = self.handle.tell() seenChroms = set() lastIndexSeen = -1 if self.verbose: try: pind = ProgressIndicator( totalToDo=os.path.getsize(self.handle.name), messagePrefix="completed", messageSuffix="of building index for " + self.handle.name) except: sys.stderr.write("IndexedWig -- warning: " + "unable to show progress for stream\n") self.verbose = False ### note, for loop seems to buffer the file and so tell() gives a ### location that is not where the current line was read from, so ### we stick to readline instead. rline = None while rline != "": # get the next element rline = self.handle.readline() line = rline.strip() if line == "": continue e = parseWigString(line) # keep track of what chroms we've seen for checking order if not e.chrom in seenChroms: seenChroms.add(e.chrom) lastIndexSeen = -1 # check chrom order is ok for seenChrom in seenChroms: if seenChrom > e.chrom: msg = "wig file is not sorted, entry for chrom " + str(seenChrom) +\ " appears after entry for " + str(e.chrom) raise IndexedWigError(msg) # check position order is ok if e.start < lastIndexSeen: msg = "wig file is not sorted, entry for chrom " + str(e.chrom) +\ " at " + str(e.start) + " appears after " + str(lastIndexSeen) raise IndexedWigError(msg) # update the last index we've seen lastIndexSeen = e.end # debugging message if the current block is full if self.debug is True: sys.stderr.write("processing " + str(e)) if currentBlock is not None: sys.stderr.write("; is current block full?" + str(currentBlock.isfull()) + "\n") else: sys.stderr.write("\n") # we might need to make a new block for this element if currentBlock is None or currentBlock.isfull() or \ currentBlock.chrom != e.chrom: if self.debug: sys.stderr.write("making new block with " + str(e) + "\n") if currentBlock is not None: if self.debug: sys.stderr.write("closed block: " + str(currentBlock) + "\n") if currentBlock.chrom not in self.blocksByChrom: self.blocksByChrom[currentBlock.chrom] = [] self.blocksByChrom[currentBlock.chrom].append(currentBlock) currentBlock = WigBlock(at, e, self.blocksize) # add the element to the current block currentBlock.add(e) at = self.handle.tell() if self.verbose: pind.done = self.handle.tell() pind.showProgress() # don't forget to add the final block if currentBlock != None: if self.debug: sys.stderr.write("closed block: " + str(currentBlock) + "\n") if currentBlock.chrom not in self.blocksByChrom: self.blocksByChrom[currentBlock.chrom] = [] self.blocksByChrom[currentBlock.chrom].append(currentBlock) # build the interval trees for chrom in self.blocksByChrom: self.itrees[chrom] = IntervalTree(self.blocksByChrom[chrom], openEnded=True)