Beispiel #1
0
    def getTrackData(self, trackName, genome, chr, allowOverlaps, forceChrFolders=False):
        trackData = TrackData()
        
        brShelve = BoundingRegionShelve(genome, trackName, allowOverlaps)        
        if not forceChrFolders and brShelve.fileExists():
            chr = None
        
        dir = createDirPath(trackName, genome, chr, allowOverlaps)

        for fn in os.listdir(dir):
            fullFn = dir + os.sep + fn
            
            if fn[0] == '.' or os.path.isdir(fullFn):
                continue
                
            if isBoundingRegionFileName(fn):
                if fullFn not in self._fileDict:
                    self._fileDict[fullFn] = brShelve
                trackData.boundingRegionShelve = self._fileDict[fullFn]
                continue
            
            prefix, elementDim, dtypeDim, dtype = parseMemmapFileFn(fn)
            
            assert prefix not in trackData
            trackData[prefix] = self._getFile(chr, dir, fullFn, elementDim, dtype, dtypeDim)
        
        return trackData
 def _getBoundingRegionShelve(self, trackName):
     if trackName in [None, []] or ExternalTrackManager.isVirtualTrack(trackName):
         brShelve = None
     else:
         brShelve = BoundingRegionShelve(self.genome, trackName, allowOverlaps=False)
         if not brShelve.fileExists():
             raise BoundingRegionsNotAvailableError('Bounding regions not available for track: ' + \
                 prettyPrintTrackName(trackName))
     
     return brShelve
Beispiel #3
0
 def createBoundingRegionShelve(genome, trackName, allowOverlaps):
     collector = PreProcMetaDataCollector(genome, trackName)
     boundingRegionTuples = collector.getBoundingRegionTuples(allowOverlaps)
     if not collector.getTrackFormat().reprIsDense():
         boundingRegionTuples = sorted(boundingRegionTuples)
     
     geChrList = collector.getPreProcessedChrs(allowOverlaps)
     brShelve = BoundingRegionShelve(genome, trackName, allowOverlaps)
     brShelve.storeBoundingRegions(boundingRegionTuples, geChrList, not collector.getTrackFormat().reprIsDense())
     
     #Sanity check
     if brShelve.getTotalElementCount() != collector.getNumElements(allowOverlaps):
         raise ShouldNotOccurError("Error: The total element count for all bounding regions is not equal to the total number of genome elements. %s != %s" % \
                                   (brShelve.getTotalElementCount(), collector.getNumElements(allowOverlaps)) )
    def __init__(self, genome, trackName, allowOverlaps=False, *args, **kwArgs):

        from gtrackcore_memmap.track.memmap.BoundingRegionShelve import BoundingRegionShelve

        brShelve = BoundingRegionShelve(genome, trackName, allowOverlaps)
        if brShelve.fileExists():
            boundingRegions = list(brShelve.getAllBoundingRegions())
        else:
            boundingRegions = GenomeInfo.getStdChrRegionList(genome)
        TrackGenomeElementSource.__init__(
            self,
            genome=genome,
            trackName=trackName,
            boundingRegions=boundingRegions,
            globalCoords=True,
            allowOverlaps=allowOverlaps,
            printWarnings=True,
        )
 def _setUpShelve(self):
     self._brShelve = BoundingRegionShelve('TestGenome',['testBoundingRegionShelve'], allowOverlaps=False)
class TestBoundingRegionShelve(unittest.TestCase):
    def setUp(self):
        self._path = createDirPath(['testBoundingRegionShelve'], 'TestGenome', allowOverlaps=False)
        self._fn = self._path + os.sep + 'boundingRegions.shelve'
        
    def _setUpShelve(self):
        self._brShelve = BoundingRegionShelve('TestGenome',['testBoundingRegionShelve'], allowOverlaps=False)
        
    def tearDown(self):
        if os.path.exists(self._path):
            shutil.rmtree(self._path)
    
    def testNoBoundingRegions(self):
        for sparse in [False, True]:
            self._setUpShelve()
            self._brShelve.storeBoundingRegions([], [], sparse)
            self.assertEquals(BoundingRegionInfo(50000, 52000, 0, 0, 0, 0),
                              self._brShelve.getBoundingRegionInfo(GenomeRegion('TestGenome', 'chr21', 50000, 52000)))
    
    def _commonStoreBoundingRegions(self, sparse):
        brTuples = [BoundingRegionTuple(GenomeRegion('TestGenome', 'chr21', 0, 1000000), 10 if sparse else 1000000),\
                    BoundingRegionTuple(GenomeRegion('TestGenome', 'chr21', 2000000, 2500000), 20 if sparse else 500000),\
                    BoundingRegionTuple(GenomeRegion('TestGenome', 'chrM', 1000, 2000), 5 if sparse else 1000)]
        self._brShelve.storeBoundingRegions(brTuples, ['chr21', 'chrM'], sparse)
        
    def testShelveFileExists(self):
        self._setUpShelve()
        
        self.assertFalse(self._brShelve.fileExists())
        self.assertFalse(os.path.exists(self._fn))
        
        self._commonStoreBoundingRegions(sparse=True)
        
        self.assertTrue(self._brShelve.fileExists())
        self.assertTrue(os.path.exists(self._fn))
    
    def testShelveLocking(self):
        self._setUpShelve()
        
        BoundingRegionShelve('TestGenome',['testBoundingRegionShelve'], allowOverlaps=False)
        
        self._commonStoreBoundingRegions(sparse=True)
        
        BoundingRegionShelve('TestGenome',['testBoundingRegionShelve'], allowOverlaps=False)
    
    def testBoundingRegionsOverlapping(self):
        self._setUpShelve()
        
        brTuples = [BoundingRegionTuple(GenomeRegion('TestGenome', 'chr21', 0, 1000000), 10), \
                    BoundingRegionTuple(GenomeRegion('TestGenome', 'chr21', 500000, 2500000), 20)]

        self.assertRaises(InvalidFormatError, self._brShelve.storeBoundingRegions, brTuples, ['chr21'], sparse=True)
    
    def testBoundingRegionsNoGaps(self):
        self._setUpShelve()
        
        brTuples = [BoundingRegionTuple(GenomeRegion('TestGenome', 'chr21', 0, 1000000), 10), \
                    BoundingRegionTuple(GenomeRegion('TestGenome', 'chr21', 1000000, 2500000), 20)]

        self.assertRaises(InvalidFormatError, self._brShelve.storeBoundingRegions, brTuples, ['chr21'], sparse=True)
        
    def testBoundingRegionsNotPositive(self):
        self._setUpShelve()
        
        brTuples = [BoundingRegionTuple(GenomeRegion('TestGenome', 'chr21', 0, 0), 1)]

        self.assertRaises(InvalidFormatError, self._brShelve.storeBoundingRegions, brTuples, ['chr21'], sparse=True)
    
    def testBoundingRegionsUnsortedInChr(self):
        self._setUpShelve()
        
        brTuples = [BoundingRegionTuple(GenomeRegion('TestGenome', 'chr21', 2000000, 2500000), 20),\
                    BoundingRegionTuple(GenomeRegion('TestGenome', 'chr21', 0, 1000000), 10)]
        
        self.assertRaises(InvalidFormatError, self._brShelve.storeBoundingRegions, brTuples, ['chr21'], sparse=True)
        
    def testBoundingRegionsChrNotGrouped(self):
        self._setUpShelve()
        
        brTuples = [BoundingRegionTuple(GenomeRegion('TestGenome', 'chr21', 0, 1000000), 10), \
                    BoundingRegionTuple(GenomeRegion('TestGenome', 'chrM', 1000, 2000), 5), \
                    BoundingRegionTuple(GenomeRegion('TestGenome', 'chr21', 2000000, 2500000), 20)]

        self.assertRaises(InvalidFormatError, self._brShelve.storeBoundingRegions, brTuples, ['chr21', 'chrM'], sparse=True)
        
    def testBoundingRegionsChrInUnsortedOrder(self):
        self._setUpShelve()
        
        brTuples = [BoundingRegionTuple(GenomeRegion('TestGenome', 'chrM', 1000, 2000), 5), \
                    BoundingRegionTuple(GenomeRegion('TestGenome', 'chr21', 0, 1000000), 10), \
                    BoundingRegionTuple(GenomeRegion('TestGenome', 'chr21', 2000000, 2500000), 20)]
        
        self._brShelve.storeBoundingRegions(brTuples, ['chr21', 'chrM'], sparse=True)
        
    def testBoundingRegionsNotBoundingAllChrs(self):
        self._setUpShelve()
        
        brTuples = [BoundingRegionTuple(GenomeRegion('TestGenome', 'chr21', 0, 1000000), 10), \
                    BoundingRegionTuple(GenomeRegion('TestGenome', 'chr21', 2000000, 2500000), 20)]

        self.assertRaises(InvalidFormatError, self._brShelve.storeBoundingRegions, brTuples, ['chr21', 'chrM'], sparse=True)
        
    def testBoundingRegionsIncorrectCountSparse(self):
        self._setUpShelve()
        
        brTuples = [BoundingRegionTuple(GenomeRegion('TestGenome', 'chr21', 0, 1000000), 10), \
                    BoundingRegionTuple(GenomeRegion('TestGenome', 'chr21', 2000000, 2500000), 20), \
                    BoundingRegionTuple(GenomeRegion('TestGenome', 'chrM', 1000, 2000), 5)]

        self.assertRaises(InvalidFormatError, self._brShelve.storeBoundingRegions, brTuples, ['chr21'], sparse=True)
        
    def testBoundingRegionIncorrectCountDense(self):
        self._setUpShelve()
        
        brTuples = [BoundingRegionTuple(GenomeRegion('TestGenome', 'chr21', 0, 1000000), 1000000), \
                    BoundingRegionTuple(GenomeRegion('TestGenome', 'chr21', 2000000, 2500000), 500000), \
                    BoundingRegionTuple(GenomeRegion('TestGenome', 'chrM', 1000, 2000), 500)]

        self.assertRaises(InvalidFormatError, self._brShelve.storeBoundingRegions, brTuples, ['chr21', 'chrM'], sparse=False)
    
    def testStdGetBoundingInfoSparse(self):
        self._setUpShelve()
        self._commonStoreBoundingRegions(sparse=True)
        
        self.assertEquals(BoundingRegionInfo(0, 1000000, 0, 30, 0, 470),
                          self._brShelve.getBoundingRegionInfo(GenomeRegion('TestGenome', 'chr21', 50000, 52000)))
        self.assertEquals(BoundingRegionInfo(2000000, 2500000, 0, 30, 0, 470),
                          self._brShelve.getBoundingRegionInfo(GenomeRegion('TestGenome', 'chr21', 2050000, 2052000)))
        self.assertEquals(BoundingRegionInfo(1000, 2000, 30, 35, 470, 471),
                          self._brShelve.getBoundingRegionInfo(GenomeRegion('TestGenome', 'chrM', 1000, 2000)))
        
    def testStdGetBoundingInfoDense(self):
        self._setUpShelve()
        self._commonStoreBoundingRegions(sparse=False)
        
        self.assertEquals(BoundingRegionInfo(0, 1000000, 0, 1000000, 0, 0),
                          self._brShelve.getBoundingRegionInfo(GenomeRegion('TestGenome', 'chr21', 50000, 52000)))
        self.assertEquals(BoundingRegionInfo(2000000, 2500000, 1000000, 1500000, 0, 0),
                          self._brShelve.getBoundingRegionInfo(GenomeRegion('TestGenome', 'chr21', 2050000, 2052000)))
        self.assertEquals(BoundingRegionInfo(1000, 2000, 1500000, 1501000, 0, 0),
                          self._brShelve.getBoundingRegionInfo(GenomeRegion('TestGenome', 'chrM', 1000, 2000)))
    
    def testGetBoundingInfoEmptyBoundingRegionSparse(self):
        self._setUpShelve()
        
        brTuples = [BoundingRegionTuple(GenomeRegion('TestGenome', 'chrM', 1000, 2000), 0), \
                    BoundingRegionTuple(GenomeRegion('TestGenome', 'chr21', 0, 1000000), 10), \
                    BoundingRegionTuple(GenomeRegion('TestGenome', 'chr21', 2000000, 2500000), 20)]
        
        self._brShelve.storeBoundingRegions(brTuples, ['chr21'], sparse=True)
        
        self.assertEquals(BoundingRegionInfo(0, 1000000, 0, 30, 0, 470),
                          self._brShelve.getBoundingRegionInfo(GenomeRegion('TestGenome', 'chr21', 50000, 52000)))
        self.assertEquals(BoundingRegionInfo(2000000, 2500000, 0, 30, 0, 470),
                          self._brShelve.getBoundingRegionInfo(GenomeRegion('TestGenome', 'chr21', 2050000, 2052000)))
        self.assertEquals(BoundingRegionInfo(1000, 2000, 0, 0, 0, 0),
                          self._brShelve.getBoundingRegionInfo(GenomeRegion('TestGenome', 'chrM', 1000, 2000)))
    
    def _testGetBoundingInfoOutsideCommon(self, sparse):
        self._setUpShelve()
        self._commonStoreBoundingRegions(sparse=sparse)
        self.assertRaises(OutsideBoundingRegionError, \
                          self._brShelve.getBoundingRegionInfo, \
                          GenomeRegion('TestGenome', 'chr21', 50000, 1052000))
        self.assertRaises(OutsideBoundingRegionError, \
                          self._brShelve.getBoundingRegionInfo, \
                          GenomeRegion('TestGenome', 'chr21', 1000000, 1052000))
        self.assertRaises(OutsideBoundingRegionError, \
                          self._brShelve.getBoundingRegionInfo, \
                          GenomeRegion('TestGenome', 'chrM', 1500, 3000))
        self.assertEquals(BoundingRegionInfo(100000, 110000, 0, 0, 0, 0),
                          self._brShelve.getBoundingRegionInfo(GenomeRegion('TestGenome', 'chr2', 100000, 110000)))
        
    def testGetBoundingInfoOutsideSparse(self):
        self._testGetBoundingInfoOutsideCommon(sparse=True)
        
    def testGetBoundingInfoOutsideDense(self):
        self._testGetBoundingInfoOutsideCommon(sparse=False)
    
    def testStdGetTotalElementCountForChrSparse(self):
        self._setUpShelve()
        self._commonStoreBoundingRegions(sparse=True)
        self.assertEquals(30,
                          self._brShelve.getTotalElementCountForChr('chr21'))
        self.assertEquals(5,
                          self._brShelve.getTotalElementCountForChr('chrM'))
        self.assertEquals(0,
                          self._brShelve.getTotalElementCountForChr('chr1'))
        
    def testStdGetTotalElementCountForChrDense(self):
        self._setUpShelve()
        self._commonStoreBoundingRegions(sparse=False)
        self.assertEquals(1500000,
                          self._brShelve.getTotalElementCountForChr('chr21'))
        self.assertEquals(1000,
                          self._brShelve.getTotalElementCountForChr('chrM'))
        self.assertEquals(0,
                          self._brShelve.getTotalElementCountForChr('chr1'))
        
    def runTest(self):
        pass