コード例 #1
0
    def __iter__(self):
        brShelve1 = self._getBoundingRegionShelve(self._trackName1)
        brShelve2 = self._getBoundingRegionShelve(self._trackName2)
        
        allBrsAreWholeChrs1 = self._commonAllBoundingRegionsAreWholeChr(brShelve1) \
            if brShelve1 is not None else False
        allBrsAreWholeChrs2 = self._commonAllBoundingRegionsAreWholeChr(brShelve2) \
            if brShelve2 is not None else False

        for chr in GenomeInfo.getExtendedChrList(self.genome):
            if brShelve1 is None:
                yield GenomeRegion(self.genome, chr, 0, GenomeInfo.getChrLen(self.genome, chr))
            else:
                brList1 = brShelve1.getAllBoundingRegionsForChr(chr)

                if brShelve2 is None or \
                    (allBrsAreWholeChrs2 and not allBrsAreWholeChrs1):
                    for reg in brList1:
                        yield reg
                else:
                    brList2 = brShelve2.getAllBoundingRegionsForChr(chr)
                    if allBrsAreWholeChrs1 and not allBrsAreWholeChrs2:
                        for reg in brList2:
                            yield reg
                    else:
                        for reg in self.getAllIntersectingRegions(self.genome, chr, brList1, brList2):
                            yield reg
コード例 #2
0
ファイル: GenomeRegion.py プロジェクト: henrikgs/gtrackcore
 def getTotalBpSpan(self):
     # print 'SELF: ', self.chr, self.start, self.end
     if self.chr is None:
         return sum(GenomeInfo.getChrLen(self.genome, chr) for chr in GenomeInfo.getExtendedChrList(self.genome))
     # elif not self.start:
     # return GenomeInfo.getChrLen(self.genome, self.chr)
     else:
         return len(self)
コード例 #3
0
 def _removeBoundingRegionTuplesIfFullChrsAndNotFixedGapSize(self):
     if self.getFixedGapSize() == 0 and not self._reprIsDense:
         # If only full chromosomes
         if all(
             brt.region.chr in GenomeInfo.getExtendedChrList(self._genome)
             and brt.region.start == 0
             and brt.region.end == GenomeInfo.getChrLen(self._genome, brt.region.chr)
             for brt in self._boundingRegionTuples
         ):
             self._boundingRegionTuples = []
コード例 #4
0
    def _checkValidStart(self, chr, start):
        if start < 0:
            raise InvalidFormatError('Error: start position is negative: %s' % start)

        if self.genome and \
            GenomeInfo.isValidChr(self.genome, chr) and \
                start > GenomeInfo.getChrLen(self.genome, chr):
                    raise InvalidFormatError('Error: start position is larger than the size of chromosome "%s" (%s > %s)' % \
                                             (chr, start, GenomeInfo.getChrLen(self.genome, chr)))
        return start
コード例 #5
0
    def _checkValidEnd(self, chr, end, start=None):
        if end < 0:
            raise InvalidFormatError('Error: end position is negative: %s' % end)

        if self.genome and \
            GenomeInfo.isValidChr(self.genome, chr) and \
                end-1 > GenomeInfo.getChrLen(self.genome, chr):
                    raise InvalidFormatError('Error: end position is larger than the size of chromosome "%s" (%s > %s)' % \
                                             (chr, end-1, GenomeInfo.getChrLen(self.genome, chr)))
        if start is not None and end <= start:
            if not start == end == 1:
                raise InvalidFormatError('Error: end position (end-exclusive) is smaller than or equal to start position: %d <= %d' % (end, start))

        return end
コード例 #6
0
 def _getBoundingRegionTupleList(self, case, sortedAssertElList):
     boundingRegions = [br for br in sorted(case.boundingRegionsAssertList) if br.region.chr is not None]
     if len(boundingRegions) > 0:
         return [BoundingRegionTuple(GenomeRegion(self.GENOME, chr=br.region.chr, \
                                                  start=br.region.start if br.region.start is not None else 0, \
                                                  end=br.region.end if br.region.end is not None else \
                                                      GenomeInfo.getChrLen(self.GENOME, br.region.chr)), br.elCount)
                 for br in boundingRegions]
     else:
         totChrList = [ge.chr for ge in sortedAssertElList]
         chrBrList = OrderedDict( [ (i, totChrList.count(i)) for i in sorted(set(totChrList)) ] )
         return [BoundingRegionTuple(GenomeRegion(self.GENOME, chr=chr, start=0, \
                                                  end=GenomeInfo.getChrLen(self.GENOME, chr)), elCount) \
                 for chr, elCount in chrBrList.iteritems()]
コード例 #7
0
ファイル: CompBinManager.py プロジェクト: henrikgs/gtrackcore
 def isCompBin(region):
     if isIter(region):
         return False
     
     offsetOK = (CompBinManager.getOffset( region.start, CompBinManager.getBinNumber(region.start) ) == 0)
     lengthOK = (len(region) == min(CompBinManager.getCompBinSize(), GenomeInfo.getChrLen(region.genome, region.chr) - region.start))
     return offsetOK and lengthOK
コード例 #8
0
 def isValidTrack(genome, trackName, fullAccess=False):
     if not TrackInfo(genome, trackName).isValid(fullAccess):
         return False
     
     for fn in ProcTrackOptions._getDirContents(genome, trackName):
         if GenomeInfo.isValidChr(genome, fn) or isBoundingRegionFileName(fn):
             return True
     return False
コード例 #9
0
 def assertChrElCounts(self, trackName, chrElCountDict, allowOverlaps, customBins):
     for chr in chrElCountDict.keys():
         if chr in customBins:
             region = customBins[chr]
         else:
             region = GenomeRegion(self.GENOME, chr, 0, GenomeInfo.getChrLen(self.GENOME, chr))
         tv = self._getTrackView(trackName, region, allowOverlaps)
         self.assertEquals(chrElCountDict[chr], len([x for x in tv]))
コード例 #10
0
 def getAllBoundingRegions(self):
     if not self.fileExists():
         from gtrackcore_memmap.util.CommonFunctions import prettyPrintTrackName
         raise BoundingRegionsNotAvailableError('Bounding regions not available for track: ' + \
             prettyPrintTrackName(self._trackName))
     
     for chr in GenomeInfo.getExtendedChrList(self._genome):
         for reg in self.getAllBoundingRegionsForChr(chr):
             yield reg
コード例 #11
0
ファイル: GenomeRegion.py プロジェクト: henrikgs/gtrackcore
    def extend(self, extensionSize, ensureValidity=True):
        if extensionSize >= 0:
            self.end += extensionSize
        else:
            self.start += extensionSize

        if ensureValidity:
            self.start = max(0, self.start)
            self.end = min(self.end, GenomeInfo.getChrLen(self.genome, self.chr))

        return self
コード例 #12
0
    def getSubtypes(genome, trackName, fullAccess=False):
        dirPath = createDirPath(trackName, genome)
        subtypes = [fn for fn in ProcTrackOptions._getDirContents(genome, trackName) \
                    if not (fn[0] in ['.','_'] or os.path.isfile(dirPath + os.sep + fn) \
                    or GenomeInfo.isValidChr(genome, fn))]

        #fixme, just temporarily:, these dirs should start with _
        subtypes= [x for x in subtypes if not x in ['external','ucsc'] ]
        
        #if not fullAccess and not ProcTrackOptions._isLiteratureTrack(genome, trackName):
        #    subtypes = [x for x in subtypes if not TrackInfo(genome, trackName+[x]).private]

        return sorted(subtypes, key=str.lower)
コード例 #13
0
ファイル: AutoBinner.py プロジェクト: henrikgs/gtrackcore
    def nextBin(self):
        for region in self._userBinSource:
            start = region.start if region.start is not None else 0

            chrLen = GenomeInfo.getChrLen(region.genome, region.chr) if region.genome is not None else None
            regEnd = min([x for x in [region.end, chrLen] if x is not None])
            
            if self._binLen is None:
                yield GenomeRegion(region.genome, region.chr, start, regEnd)
            else:
                while start < regEnd:
                    end = min(start + self._binLen, regEnd)
                    yield GenomeRegion(region.genome, region.chr, start, end)
                    start += self._binLen
コード例 #14
0
ファイル: OutputManager.py プロジェクト: henrikgs/gtrackcore
 def _createOutputDirectory(self, genome, chr, trackName, allowOverlaps, geSourceManager):
     dirPath = createDirPath(trackName, genome, chr, allowOverlaps)
     
     from gtrackcore_memmap.metadata.GenomeInfo import GenomeInfo
     return  OutputDirectory(dirPath, geSourceManager.getPrefixList(), \
                             geSourceManager.getNumElementsForChr(chr), \
                             GenomeInfo.getChrLen(genome, chr), \
                             geSourceManager.getValDataType(), \
                             geSourceManager.getValDim(), \
                             geSourceManager.getEdgeWeightDataType(), \
                             geSourceManager.getEdgeWeightDim(), \
                             geSourceManager.getMaxNumEdgesForChr(chr), \
                             geSourceManager.getMaxStrLensForChr(chr), \
                             geSourceManager.isSorted())
コード例 #15
0
    def __init__(self, genome, trackName, allowOverlaps=False, *args, **kwArgs):

        from gtrackcore_memmap.track.memmap.BoundingRegionShelve import BoundingRegionShelve

        brShelve = BoundingRegionShelve(genome, trackName, allowOverlaps)
        if brShelve.fileExists():
            boundingRegions = list(brShelve.getAllBoundingRegions())
        else:
            boundingRegions = GenomeInfo.getStdChrRegionList(genome)
        TrackGenomeElementSource.__init__(
            self,
            genome=genome,
            trackName=trackName,
            boundingRegions=boundingRegions,
            globalCoords=True,
            allowOverlaps=allowOverlaps,
            printWarnings=True,
        )
コード例 #16
0
    def getBoundingRegionTuples(self):
        boundingRegionTuples = [x for x in self._getBoundingRegionTuples() \
                                if x.region.chr is not None]

        if len(boundingRegionTuples) == 0:
            from gtrackcore_memmap.input.core.GenomeElementSource import BoundingRegionTuple
            from gtrackcore_memmap.track.core.GenomeRegion import GenomeRegion
            from gtrackcore_memmap.metadata.GenomeInfo import GenomeInfo

            geChrList = self.getAllChrs()
            boundingRegionTuples = [BoundingRegionTuple( \
                                     GenomeRegion(chr=chr, start=0, end=GenomeInfo.getChrLen(self._geSource.genome, chr)), \
                                     self.getNumElementsForChr(chr) ) \
                                    for chr in geChrList]
            self._boundingRegionsAndGEsCorrespond = False
        else:
            self._boundingRegionsAndGEsCorrespond = True

        return boundingRegionTuples
コード例 #17
0
 def _isOldTypeChromDirectory(dirPath, genome):
     if dirPath[-1] == os.sep:
         dirPath = os.path.dirname(dirPath)
     dirName = os.path.basename(dirPath)
     return dirName in set(GenomeInfo.getExtendedChrList(genome)) and \
         not any(os.path.isdir(os.path.join(dirPath, subFn)) for subFn in os.listdir(dirPath))
コード例 #18
0
ファイル: GenomeRegion.py プロジェクト: henrikgs/gtrackcore
 def strWithCentromerInfo(self):
     return str(self) + (" (intersects centromere)" if GenomeInfo.regIntersectsCentromer(self) else "")
コード例 #19
0
ファイル: GenomeRegion.py プロジェクト: henrikgs/gtrackcore
    def isWholeChr(self):
        if self.genome is None or self.chr is None:
            return False

        return (self.start, self.end) == (0, GenomeInfo.getChrLen(self.genome, self.chr))
コード例 #20
0
 def _commonAllBoundingRegionsAreWholeChr(self, brShelve):
     for chr in GenomeInfo.getExtendedChrList(self.genome):
         for reg in brShelve.getAllBoundingRegionsForChr(chr):
             if not reg.isWholeChr():
                 return False
     return True
コード例 #21
0
 def getTotalElementCount(self):
     return sum(self.getTotalElementCountForChr(chr) for chr in GenomeInfo.getExtendedChrList(self._genome))
コード例 #22
0
 def _checkValidChr(self, chr):
     if self.genome and not GenomeInfo.isValidChr(self.genome, chr):
         raise InvalidFormatWarning('Chromosome incorrectly specified: ' + chr)
     return chr
コード例 #23
0
    def storeBoundingRegions(self, boundingRegionTuples, genomeElementChrList, sparse):
        assert sparse in [False, True]

        tempContents = OrderedDict()

        genomeElementChrs = set(genomeElementChrList)    
        lastRegion = None
        chrStartIdxs = OrderedDict()
        chrEndIdxs = OrderedDict()
        totElCount = 0
        totBinCount = 0
        
        for br in boundingRegionTuples:
            if lastRegion is None or br.region.chr != lastRegion.chr:
                if br.region.chr in tempContents:
                    raise InvalidFormatError("Error: bounding region (%s) is not grouped with previous bounding regions of the same chromosome (sequence)." % br.region)
                
                lastRegion = None
                tempContents[br.region.chr] = OrderedDict()
                if sparse:
                    chrStartIdxs[br.region.chr] = totElCount
            else:
                if br.region < lastRegion:
                    raise InvalidFormatError("Error: bounding regions in the same chromosome (sequence) are unsorted: %s > %s." % (lastRegion, br.region))
                if lastRegion.overlaps(br.region):
                    raise InvalidFormatError("Error: bounding regions '%s' and '%s' overlap." % (lastRegion, br.region))
                if lastRegion.end == br.region.start:
                    raise InvalidFormatError("Error: bounding regions '%s' and '%s' are adjoining (there is no gap between them)." % (lastRegion, br.region))
            
            if len(br.region) < 1:
                raise InvalidFormatError("Error: bounding region '%s' does not have positive length." % br.region)
                
            if not sparse and len(br.region) != br.elCount:
                raise InvalidFormatError("Error: track type representation is dense, but the length of bounding region '%s' is not equal to the element count: %s != %s" % (br.region, len(br.region), br.elCount))
            
            startIdx, endIdx = (totElCount, totElCount + br.elCount) if not sparse else (None, None)
            totElCount += br.elCount
            if sparse:
                chrEndIdxs[br.region.chr] = totElCount
            
            tempContents[br.region.chr][br.region.start] = BoundingRegionInfo(br.region.start, br.region.end, startIdx, endIdx, 0, 0)
            
            lastRegion = br.region
        
        if sparse:
            totBinCount = 0
            for chr in tempContents:
                chrLen = GenomeInfo.getChrLen(self._genome, chr)
                numBinsInChr = CompBinManager.getNumOfBins(GenomeRegion(start=0, end=chrLen))
                for key in tempContents[chr].keys():
                    startBinIdx = totBinCount
                    endBinIdx = totBinCount + numBinsInChr
                    brInfo = tempContents[chr][key]
                    
                    if chr in genomeElementChrs:
                        tempContents[chr][key] = BoundingRegionInfo(brInfo.start, brInfo.end, \
                                                                    chrStartIdxs[chr], chrEndIdxs[chr], \
                                                                    startBinIdx, endBinIdx)
                    else:
                        if chrEndIdxs[chr] - chrStartIdxs[chr] > 0:
                            raise InvalidFormatError("Error: bounding region '%s' has incorrect element count: %s > 0" % (GenomeRegion(chr=chr, start=brInfo.start, end=brInfo.end), chrEndIdxs[chr] - chrStartIdxs[chr]))
                        tempContents[chr][key] = BoundingRegionInfo(brInfo.start, brInfo.end, 0, 0, 0, 0)
                
                if chr in genomeElementChrs:
                    totBinCount += numBinsInChr
        
        if len(genomeElementChrs - set(tempContents.keys())) > 0:
            raise InvalidFormatError('Error: some chromosomes (sequences) contains data, but has no bounding regions: %s' % ', '.join(genomeElementChrs - set(tempContents.keys())))
        
        ensurePathExists(self._fn)
        
        for chr in tempContents:
            brInfoDict = tempContents[chr]
            tempContents[chr] = BrInfoHolder(tuple(brInfoDict.keys()), tuple(brInfoDict.values()))
        
        brShelve = safeshelve.open(self._fn)
        brShelve.update(tempContents)
        brShelve.close()
        
        while not self.fileExists():
            from gtrackcore_memmap.application.LogSetup import logMessage
            logMessage("Bounding region shelve file '%s' has yet to be created" % self._fn)
            import time
            time.sleep(0.2)
コード例 #24
0
def parseRegSpec(regSpec, genome = None, includeExtraChrs = False):
    from gtrackcore_memmap.track.core.GenomeRegion import GenomeRegion
    from gtrackcore_memmap.metadata.GenomeInfo import GenomeInfo

    class SimpleUserBinSource(list):
        pass
        
    regions = []
    allRegSpecs = regSpec.strip().split(',')
    for curRegSpec in allRegSpecs:
        regParts = curRegSpec.strip().split(':')
        if genome == None:
            genome = regParts[0]
            #assert GenomeInfo(genome).isInstalled(), "Specified genome is not installed: %s" % genome
        
        if not (regParts[0]=='*' or regParts[0] in GenomeInfo.getExtendedChrList(genome)):
        #if (regParts[0]=='*' or regParts[0].startswith('chr')):
        #    if genome == None:
        #        genome = DEFAULT_GENOME
        #else:
        #    assert genome is None or genome == regParts[0], \
    
            assert regParts[0] == genome, \
                "Region specification does not start with one of '*' or correct chromosome or genome name. Region specification: %s. Genome: %s" % (curRegSpec, genome)
            #genome = regParts[0]        
            regParts = regParts[1:]
        
        if regParts[0] == '*':
            assert len(regParts) == 1, \
                "Region specification starts with '*' but continues with ':'. Region specification: %s" % curRegSpec
            assert len(allRegSpecs) == 1, \
                "Region specification is '*', but is in a list with other region specifications: %s" % regSpec
            chrList = GenomeInfo.getExtendedChrList(genome) if includeExtraChrs else GenomeInfo.getChrList(genome)
            for chr in chrList:
                regions.append(GenomeRegion(genome, chr, 0, GenomeInfo.getChrLen(genome, chr)))
        else:
            #assert(regParts[0].startswith('chr')), \
            assert regParts[0] in GenomeInfo.getExtendedChrList(genome), \
                "Region specification does not start with chromosome specification. Region specification: %s " % curRegSpec
            chr = regParts[0]
            try:
                chrLen = GenomeInfo.getChrLen(genome, chr)
            except Exception, e:
                raise InvalidFormatError("Chromosome '%s' does not exist for genome '%s'" % (chr, genome))
                
            if len(regParts)>1:
                posParts = regParts[1]
                assert '-' in posParts, \
                    "Position specification does not include character '-'. Region specification: %s " % curRegSpec
                rawStart, rawEnd = posParts.split('-')
                
                start = int(rawStart.replace('k','001').replace('m','000001'))
                end = int(rawEnd.replace('k','000').replace('m','000000')) if rawEnd != '' else chrLen
                assert start >= 1, \
                    "Start position is not positive. Region specification: %s " % curRegSpec
                assert end >= start, \
                    "End position is not larger than start position. Region specification: %s " % curRegSpec
                assert end <= chrLen, \
                    "End position is larger than chromosome size. Genome: %s. Chromosome size: %s. Region specification: %s" % (genome, chrLen, curRegSpec)
                #-1 for conversion from 1-indexing to 0-indexing end-exclusive
                start-=1
                
            else:
                start,end = 0, chrLen
            regions.append( GenomeRegion(genome, chr, start, end) )
コード例 #25
0
ファイル: UserBinSource.py プロジェクト: henrikgs/gtrackcore
 def __new__(cls, genome):
     from gtrackcore_memmap.track.core.GenomeRegion import GenomeRegion
     from gtrackcore_memmap.metadata.GenomeInfo import GenomeInfo
     chrList = GenomeInfo.getChrList(genome)
     if len(chrList) > 0:
         return [GenomeRegion(genome, GenomeInfo.getChrList(genome)[0], 0, 1)]