Esempio n. 1
0
    def writeIndexes(self):
        numIndexElements = int(math.ceil(1.0 * self._chrSize / CompBinManager.getIndexBinSize()))
        self._leftIndexFile = OutputFile(self._path, 'leftIndex', numIndexElements, allowAppend=False)
        self._rightIndexFile = OutputFile(self._path, 'rightIndex', numIndexElements, allowAppend=False)
        
        if self._startFile:
            lefts = self._startFile.getContents()
        else:
            lefts = np.r_[0, self._endFile.getContents()[:-1]]
        
        if self._endFile:
            rights = self._endFile.getContents()
            if not self._startFile:
                rights = rights[1:]
        else:
            rights = self._startFile.getContents() + 1
            
        bin_i = 0
        i = 0
        for i, right in enumerate(rights):
            while right > (bin_i) * CompBinManager.getIndexBinSize():
                self._leftIndexFile.write(i)
                bin_i += 1

        bin_j = 0
        j = 0
        for j, left in enumerate(lefts):
            while left >= (bin_j+1) * CompBinManager.getIndexBinSize():
                self._rightIndexFile.write(j)
                bin_j += 1
                
        self._fillRestOfIndexFile(bin_i, i+1, self._leftIndexFile)
        self._fillRestOfIndexFile(bin_j, j+1, self._rightIndexFile)
Esempio n. 2
0
 def testIsCompBin(self):
     self.assertTrue(CompBinManager.isCompBin( GenomeRegion('TestGenome', 'chr21', 0, 100) ))
     self.assertTrue(CompBinManager.isCompBin( GenomeRegion('TestGenome', 'chr21', 200, 300) ))
     self.assertTrue(CompBinManager.isCompBin( GenomeRegion('TestGenome', 'chr21', 46944300, 46944323) ))
     
     self.assertFalse(CompBinManager.isCompBin( GenomeRegion('TestGenome', 'chr21', 0, 40) ))
     self.assertFalse(CompBinManager.isCompBin( GenomeRegion('TestGenome', 'chr21', 10, 100) ))
     self.assertFalse(CompBinManager.isCompBin( GenomeRegion('TestGenome', 'chr21', 10, 200) ))
     self.assertFalse(CompBinManager.isCompBin( GenomeRegion('TestGenome', 'chr21', 100, 300) ))
     self.assertFalse(CompBinManager.isCompBin( GenomeRegion('TestGenome', 'chr21', 46944300, 46944322) ))
     self.assertFalse(CompBinManager.isCompBin( GenomeRegion('TestGenome', 'chr21', 46944300, 46944324) ))
Esempio n. 3
0
 def loadTrackView(trackData, region, borderHandling, allowOverlaps, trackName=[]):
     """
     trackData : see TrackSource.getTrackData {'id' : smartmemmap}
     region : see GenomeRegion
     """
     #brShelve = BoundingRegionShelve(region.genome, trackName, allowOverlaps)
     brShelve = trackData.boundingRegionShelve
     brInfo = brShelve.getBoundingRegionInfo(region) if brShelve is not None else None
     
     extraArrayNames = [arrayName for arrayName in trackData if arrayName not in \
                        RESERVED_PREFIXES.keys() + ['leftIndex', 'rightIndex']]
     
     reservedArrays = [TrackViewLoader._getArray(trackData, arrayName, brInfo) for arrayName in RESERVED_PREFIXES]
     extraArrays = [TrackViewLoader._getArray(trackData, arrayName, brInfo) for arrayName in extraArrayNames]
     trackFormat = TrackFormat( *(reservedArrays + [OrderedDict(zip(extraArrayNames, extraArrays))]) )
     
     if trackFormat.reprIsDense():
         if brInfo is None:
             leftIndex = region.start
             rightIndex = region.end
         else:
             leftIndex = region.start - brInfo.start
             rightIndex = region.end - brInfo.start 
     else:
         leftBin = CompBinManager.getBinNumber(region.start)
         rightBin = CompBinManager.getBinNumber(region.end-1)
         #leftBin = region.start/COMP_BIN_SIZE
         #rightBin = (region.end-1)/COMP_BIN_SIZE
         
         if trackData.get('leftIndex') is None or trackData.get('rightIndex') is None:
             raise IOError('Preprocessed track not found. TrackData: ' + ', '.join(trackData.keys()))
         
         leftIndex = TrackViewLoader._getArray(trackData, 'leftIndex', brInfo, leftBin)
         rightIndex = TrackViewLoader._getArray(trackData, 'rightIndex', brInfo, rightBin)
     
     slicedReservedArrays = [(array[leftIndex:rightIndex] if array is not None else None) for array in reservedArrays]
     slicedExtraArrays = [(array[leftIndex:rightIndex] if array is not None else None) for array in extraArrays]
     
     argList = [region] + slicedReservedArrays + [borderHandling, allowOverlaps] + [OrderedDict(zip(extraArrayNames, slicedExtraArrays))]
     tv = TrackView( *(argList) )
     
     if not trackFormat.reprIsDense():
         tv.sliceElementsAccordingToGenomeAnchor()
         #tv._doScatteredSlicing()
     return tv
Esempio n. 4
0
def createDirPath(trackName, genome, chr=None, allowOverlaps=False, basePath=Config.PROCESSED_DATA_PATH):
    """
    >>> createDirPath(['trackname'],'genome','chr1')
    '/100000/noOverlaps/genome/trackname/chr1'
    """
    from gtrackcore_memmap.util.CompBinManager import CompBinManager
    if len(trackName)>0 and trackName[0] == 'redirect':
        genome = trackName[1]
        chr = trackName[2]
        #trackName[3] is description
        trackName = trackName[4:]
        
    #print [basePath, str(CompBinManager.getIndexBinSize()), ('withOverlaps' if allowOverlaps else 'noOverlaps'), genome] +\
    #    list(trackName) + ([chr] if chr is not None else [])
    
    return os.sep.join( [basePath, str(CompBinManager.getIndexBinSize()), ('withOverlaps' if allowOverlaps else 'noOverlaps'), genome] +\
        list(trackName) + ([chr] if chr is not None else []) )
Esempio n. 5
0
 def testGetOffset(self):
     self.assertEqual(0, CompBinManager.getOffset(0,0))
     self.assertEqual(0,CompBinManager.getOffset(200,2))
     self.assertEqual(14,CompBinManager.getOffset(314,3))
     self.assertEqual(-86,CompBinManager.getOffset(314,4))
Esempio n. 6
0
 def testGetNumOfBins(self):
     self.assertEqual(0, CompBinManager.getNumOfBins(GenomeRegion('hg18', 'chr1', 0, 0)))
     self.assertEqual(1, CompBinManager.getNumOfBins(GenomeRegion('hg18', 'chr1', 0, 100)))
     self.assertEqual(2, CompBinManager.getNumOfBins(GenomeRegion('hg18', 'chr1', 200, 400)))
     self.assertEqual(4, CompBinManager.getNumOfBins(GenomeRegion('hg18', 'chr1', 67, 314)))
Esempio n. 7
0
 def testGetPosFromBinNumber(self):
     self.assertEqual(0, CompBinManager.getPosFromBinNumber(0))
     self.assertEqual(200, CompBinManager.getPosFromBinNumber(2))
     self.assertEqual(300, CompBinManager.getPosFromBinNumber(3))
Esempio n. 8
0
 def testGetBinNumber(self):
     self.assertEqual(0, CompBinManager.getBinNumber(0))
     self.assertEqual(2, CompBinManager.getBinNumber(200))
     self.assertEqual(3, CompBinManager.getBinNumber(314))
Esempio n. 9
0
 def _assertSplitUserBin(self, compBins, start, end):
     region = GenomeRegion('hg18','chr1', start, end)
     compBinRegions = [GenomeRegion('hg18', 'chr1', elStart, elEnd) for elStart, elEnd in compBins]
     AssertList(compBinRegions, CompBinManager.splitUserBin(region), self.assertEqual)
Esempio n. 10
0
    def storeBoundingRegions(self, boundingRegionTuples, genomeElementChrList, sparse):
        assert sparse in [False, True]

        tempContents = OrderedDict()

        genomeElementChrs = set(genomeElementChrList)    
        lastRegion = None
        chrStartIdxs = OrderedDict()
        chrEndIdxs = OrderedDict()
        totElCount = 0
        totBinCount = 0
        
        for br in boundingRegionTuples:
            if lastRegion is None or br.region.chr != lastRegion.chr:
                if br.region.chr in tempContents:
                    raise InvalidFormatError("Error: bounding region (%s) is not grouped with previous bounding regions of the same chromosome (sequence)." % br.region)
                
                lastRegion = None
                tempContents[br.region.chr] = OrderedDict()
                if sparse:
                    chrStartIdxs[br.region.chr] = totElCount
            else:
                if br.region < lastRegion:
                    raise InvalidFormatError("Error: bounding regions in the same chromosome (sequence) are unsorted: %s > %s." % (lastRegion, br.region))
                if lastRegion.overlaps(br.region):
                    raise InvalidFormatError("Error: bounding regions '%s' and '%s' overlap." % (lastRegion, br.region))
                if lastRegion.end == br.region.start:
                    raise InvalidFormatError("Error: bounding regions '%s' and '%s' are adjoining (there is no gap between them)." % (lastRegion, br.region))
            
            if len(br.region) < 1:
                raise InvalidFormatError("Error: bounding region '%s' does not have positive length." % br.region)
                
            if not sparse and len(br.region) != br.elCount:
                raise InvalidFormatError("Error: track type representation is dense, but the length of bounding region '%s' is not equal to the element count: %s != %s" % (br.region, len(br.region), br.elCount))
            
            startIdx, endIdx = (totElCount, totElCount + br.elCount) if not sparse else (None, None)
            totElCount += br.elCount
            if sparse:
                chrEndIdxs[br.region.chr] = totElCount
            
            tempContents[br.region.chr][br.region.start] = BoundingRegionInfo(br.region.start, br.region.end, startIdx, endIdx, 0, 0)
            
            lastRegion = br.region
        
        if sparse:
            totBinCount = 0
            for chr in tempContents:
                chrLen = GenomeInfo.getChrLen(self._genome, chr)
                numBinsInChr = CompBinManager.getNumOfBins(GenomeRegion(start=0, end=chrLen))
                for key in tempContents[chr].keys():
                    startBinIdx = totBinCount
                    endBinIdx = totBinCount + numBinsInChr
                    brInfo = tempContents[chr][key]
                    
                    if chr in genomeElementChrs:
                        tempContents[chr][key] = BoundingRegionInfo(brInfo.start, brInfo.end, \
                                                                    chrStartIdxs[chr], chrEndIdxs[chr], \
                                                                    startBinIdx, endBinIdx)
                    else:
                        if chrEndIdxs[chr] - chrStartIdxs[chr] > 0:
                            raise InvalidFormatError("Error: bounding region '%s' has incorrect element count: %s > 0" % (GenomeRegion(chr=chr, start=brInfo.start, end=brInfo.end), chrEndIdxs[chr] - chrStartIdxs[chr]))
                        tempContents[chr][key] = BoundingRegionInfo(brInfo.start, brInfo.end, 0, 0, 0, 0)
                
                if chr in genomeElementChrs:
                    totBinCount += numBinsInChr
        
        if len(genomeElementChrs - set(tempContents.keys())) > 0:
            raise InvalidFormatError('Error: some chromosomes (sequences) contains data, but has no bounding regions: %s' % ', '.join(genomeElementChrs - set(tempContents.keys())))
        
        ensurePathExists(self._fn)
        
        for chr in tempContents:
            brInfoDict = tempContents[chr]
            tempContents[chr] = BrInfoHolder(tuple(brInfoDict.keys()), tuple(brInfoDict.values()))
        
        brShelve = safeshelve.open(self._fn)
        brShelve.update(tempContents)
        brShelve.close()
        
        while not self.fileExists():
            from gtrackcore_memmap.application.LogSetup import logMessage
            logMessage("Bounding region shelve file '%s' has yet to be created" % self._fn)
            import time
            time.sleep(0.2)