Esempio n. 1
0
    def updateShelveItemsAndCopyToNewFile(cls):
        assert not os.path.exists(cls.SHELVE_COPY_FN)
        assert not os.path.exists(cls.SHELVE_ERRORS_FN)
        
        trackInfoShelveCopy = safeshelve.open(cls.SHELVE_COPY_FN, 'c', protocol=cls.PROTOCOL)
        trackInfoShelveErrors = safeshelve.open(cls.SHELVE_ERRORS_FN, 'c', protocol=cls.PROTOCOL)
        
        trackInfoShelve = safeshelve.open(cls.SHELVE_FN, 'r', protocol=cls.PROTOCOL)
        keys = trackInfoShelve.keys()
        trackInfoShelve.close()

        for i,key in enumerate(keys):
            try:
                ti = TrackInfo.createInstanceFromKey(key)
            except:
                trackInfoShelve = safeshelve.open(cls.SHELVE_FN, 'r', protocol=cls.PROTOCOL)
                trackInfoShelveErrors[key] = trackInfoShelve[key]
                trackInfoShelve.close()
            
            trackInfoShelveCopy[key] = ti
            
            if i%10000 == 0:
                print '.',

        trackInfoShelveCopy.close()
        trackInfoShelveErrors.close()
 def _updateContentsIfNecessary(self, chr):
     #if self._contents is None:
     #    self._contents = {}
     #    if self.fileExists():
     #        self._contents.update(safeshelve.open(self._fn, 'r'))
     if not chr in self._updatedChrs:
         if self.fileExists():
             brListForChr = safeshelve.open(self._fn, 'r').get(chr)
             if brListForChr is not None:
                 self._contents[chr] = brListForChr
         self._updatedChrs.add(chr)
Esempio n. 3
0
 def __new__(cls, genome, trackName):
     #Temporary hack
     if genome in ['hg18','NCBI36']:
         genome = 'NCBI36'
     
     ensurePathExists(cls.SHELVE_FN)
     trackInfoShelve = safeshelve.open(cls.SHELVE_FN, 'c', protocol=cls.PROTOCOL)
     stored = trackInfoShelve.get( constructKey(genome, trackName) )
     trackInfoShelve.close()
     if stored is not None:
         return stored
     else:
         return object.__new__(cls)
Esempio n. 4
0
 def removeFilteredEntriesFromShelve(cls, genome, trackNameFilter):
     filteredKeys = TrackInfo.getFilteredEntriesFromShelve(genome, trackNameFilter)
     trackInfoShelve = safeshelve.open(cls.SHELVE_FN, 'w', protocol=cls.PROTOCOL)
     for key in filteredKeys:
         del trackInfoShelve[key]
     trackInfoShelve.close()
Esempio n. 5
0
 def getFilteredEntriesFromShelve(cls, genome, trackNameFilter):
     filterKey = constructKey(genome, trackNameFilter)
     trackInfoShelve = safeshelve.open(cls.SHELVE_FN, 'r', protocol=cls.PROTOCOL)
     filteredKeys = [x for x in trackInfoShelve.keys() if x.startswith(filterKey)]
     trackInfoShelve.close()
     return filteredKeys
Esempio n. 6
0
 def removeEntryFromShelve(self):
     trackInfoShelve = safeshelve.open(self.SHELVE_FN, protocol=self.PROTOCOL)
     key = constructKey(self.genome, self.trackName)
     if key in trackInfoShelve:
         del trackInfoShelve[key]
     trackInfoShelve.close()
Esempio n. 7
0
 def store(self):
     trackInfoShelve = safeshelve.open(self.SHELVE_FN, protocol=self.PROTOCOL)
     trackInfoShelve[ constructKey(self.genome, self.trackName) ] = self
     trackInfoShelve.close()
    def storeBoundingRegions(self, boundingRegionTuples, genomeElementChrList, sparse):
        assert sparse in [False, True]

        tempContents = OrderedDict()

        genomeElementChrs = set(genomeElementChrList)    
        lastRegion = None
        chrStartIdxs = OrderedDict()
        chrEndIdxs = OrderedDict()
        totElCount = 0
        totBinCount = 0
        
        for br in boundingRegionTuples:
            if lastRegion is None or br.region.chr != lastRegion.chr:
                if br.region.chr in tempContents:
                    raise InvalidFormatError("Error: bounding region (%s) is not grouped with previous bounding regions of the same chromosome (sequence)." % br.region)
                
                lastRegion = None
                tempContents[br.region.chr] = OrderedDict()
                if sparse:
                    chrStartIdxs[br.region.chr] = totElCount
            else:
                if br.region < lastRegion:
                    raise InvalidFormatError("Error: bounding regions in the same chromosome (sequence) are unsorted: %s > %s." % (lastRegion, br.region))
                if lastRegion.overlaps(br.region):
                    raise InvalidFormatError("Error: bounding regions '%s' and '%s' overlap." % (lastRegion, br.region))
                if lastRegion.end == br.region.start:
                    raise InvalidFormatError("Error: bounding regions '%s' and '%s' are adjoining (there is no gap between them)." % (lastRegion, br.region))
            
            if len(br.region) < 1:
                raise InvalidFormatError("Error: bounding region '%s' does not have positive length." % br.region)
                
            if not sparse and len(br.region) != br.elCount:
                raise InvalidFormatError("Error: track type representation is dense, but the length of bounding region '%s' is not equal to the element count: %s != %s" % (br.region, len(br.region), br.elCount))
            
            startIdx, endIdx = (totElCount, totElCount + br.elCount) if not sparse else (None, None)
            totElCount += br.elCount
            if sparse:
                chrEndIdxs[br.region.chr] = totElCount
            
            tempContents[br.region.chr][br.region.start] = BoundingRegionInfo(br.region.start, br.region.end, startIdx, endIdx, 0, 0)
            
            lastRegion = br.region
        
        if sparse:
            totBinCount = 0
            for chr in tempContents:
                chrLen = GenomeInfo.getChrLen(self._genome, chr)
                numBinsInChr = CompBinManager.getNumOfBins(GenomeRegion(start=0, end=chrLen))
                for key in tempContents[chr].keys():
                    startBinIdx = totBinCount
                    endBinIdx = totBinCount + numBinsInChr
                    brInfo = tempContents[chr][key]
                    
                    if chr in genomeElementChrs:
                        tempContents[chr][key] = BoundingRegionInfo(brInfo.start, brInfo.end, \
                                                                    chrStartIdxs[chr], chrEndIdxs[chr], \
                                                                    startBinIdx, endBinIdx)
                    else:
                        if chrEndIdxs[chr] - chrStartIdxs[chr] > 0:
                            raise InvalidFormatError("Error: bounding region '%s' has incorrect element count: %s > 0" % (GenomeRegion(chr=chr, start=brInfo.start, end=brInfo.end), chrEndIdxs[chr] - chrStartIdxs[chr]))
                        tempContents[chr][key] = BoundingRegionInfo(brInfo.start, brInfo.end, 0, 0, 0, 0)
                
                if chr in genomeElementChrs:
                    totBinCount += numBinsInChr
        
        if len(genomeElementChrs - set(tempContents.keys())) > 0:
            raise InvalidFormatError('Error: some chromosomes (sequences) contains data, but has no bounding regions: %s' % ', '.join(genomeElementChrs - set(tempContents.keys())))
        
        ensurePathExists(self._fn)
        
        for chr in tempContents:
            brInfoDict = tempContents[chr]
            tempContents[chr] = BrInfoHolder(tuple(brInfoDict.keys()), tuple(brInfoDict.values()))
        
        brShelve = safeshelve.open(self._fn)
        brShelve.update(tempContents)
        brShelve.close()
        
        while not self.fileExists():
            from gtrackcore_memmap.application.LogSetup import logMessage
            logMessage("Bounding region shelve file '%s' has yet to be created" % self._fn)
            import time
            time.sleep(0.2)