def __iter__(self): brShelve1 = self._getBoundingRegionShelve(self._trackName1) brShelve2 = self._getBoundingRegionShelve(self._trackName2) allBrsAreWholeChrs1 = self._commonAllBoundingRegionsAreWholeChr(brShelve1) \ if brShelve1 is not None else False allBrsAreWholeChrs2 = self._commonAllBoundingRegionsAreWholeChr(brShelve2) \ if brShelve2 is not None else False for chr in GenomeInfo.getExtendedChrList(self.genome): if brShelve1 is None: yield GenomeRegion(self.genome, chr, 0, GenomeInfo.getChrLen(self.genome, chr)) else: brList1 = brShelve1.getAllBoundingRegionsForChr(chr) if brShelve2 is None or \ (allBrsAreWholeChrs2 and not allBrsAreWholeChrs1): for reg in brList1: yield reg else: brList2 = brShelve2.getAllBoundingRegionsForChr(chr) if allBrsAreWholeChrs1 and not allBrsAreWholeChrs2: for reg in brList2: yield reg else: for reg in self.getAllIntersectingRegions(self.genome, chr, brList1, brList2): yield reg
def getTotalBpSpan(self): # print 'SELF: ', self.chr, self.start, self.end if self.chr is None: return sum(GenomeInfo.getChrLen(self.genome, chr) for chr in GenomeInfo.getExtendedChrList(self.genome)) # elif not self.start: # return GenomeInfo.getChrLen(self.genome, self.chr) else: return len(self)
def getAllBoundingRegions(self): if not self.fileExists(): from gtrackcore_memmap.util.CommonFunctions import prettyPrintTrackName raise BoundingRegionsNotAvailableError('Bounding regions not available for track: ' + \ prettyPrintTrackName(self._trackName)) for chr in GenomeInfo.getExtendedChrList(self._genome): for reg in self.getAllBoundingRegionsForChr(chr): yield reg
def _removeBoundingRegionTuplesIfFullChrsAndNotFixedGapSize(self): if self.getFixedGapSize() == 0 and not self._reprIsDense: # If only full chromosomes if all( brt.region.chr in GenomeInfo.getExtendedChrList(self._genome) and brt.region.start == 0 and brt.region.end == GenomeInfo.getChrLen(self._genome, brt.region.chr) for brt in self._boundingRegionTuples ): self._boundingRegionTuples = []
def parseRegSpec(regSpec, genome = None, includeExtraChrs = False): from gtrackcore_memmap.track.core.GenomeRegion import GenomeRegion from gtrackcore_memmap.metadata.GenomeInfo import GenomeInfo class SimpleUserBinSource(list): pass regions = [] allRegSpecs = regSpec.strip().split(',') for curRegSpec in allRegSpecs: regParts = curRegSpec.strip().split(':') if genome == None: genome = regParts[0] #assert GenomeInfo(genome).isInstalled(), "Specified genome is not installed: %s" % genome if not (regParts[0]=='*' or regParts[0] in GenomeInfo.getExtendedChrList(genome)): #if (regParts[0]=='*' or regParts[0].startswith('chr')): # if genome == None: # genome = DEFAULT_GENOME #else: # assert genome is None or genome == regParts[0], \ assert regParts[0] == genome, \ "Region specification does not start with one of '*' or correct chromosome or genome name. Region specification: %s. Genome: %s" % (curRegSpec, genome) #genome = regParts[0] regParts = regParts[1:] if regParts[0] == '*': assert len(regParts) == 1, \ "Region specification starts with '*' but continues with ':'. Region specification: %s" % curRegSpec assert len(allRegSpecs) == 1, \ "Region specification is '*', but is in a list with other region specifications: %s" % regSpec chrList = GenomeInfo.getExtendedChrList(genome) if includeExtraChrs else GenomeInfo.getChrList(genome) for chr in chrList: regions.append(GenomeRegion(genome, chr, 0, GenomeInfo.getChrLen(genome, chr))) else: #assert(regParts[0].startswith('chr')), \ assert regParts[0] in GenomeInfo.getExtendedChrList(genome), \ "Region specification does not start with chromosome specification. Region specification: %s " % curRegSpec chr = regParts[0] try: chrLen = GenomeInfo.getChrLen(genome, chr) except Exception, e: raise InvalidFormatError("Chromosome '%s' does not exist for genome '%s'" % (chr, genome)) if len(regParts)>1: posParts = regParts[1] assert '-' in posParts, \ "Position specification does not include character '-'. Region specification: %s " % curRegSpec rawStart, rawEnd = posParts.split('-') start = int(rawStart.replace('k','001').replace('m','000001')) end = int(rawEnd.replace('k','000').replace('m','000000')) if rawEnd != '' else chrLen assert start >= 1, \ "Start position is not positive. Region specification: %s " % curRegSpec assert end >= start, \ "End position is not larger than start position. Region specification: %s " % curRegSpec assert end <= chrLen, \ "End position is larger than chromosome size. Genome: %s. Chromosome size: %s. Region specification: %s" % (genome, chrLen, curRegSpec) #-1 for conversion from 1-indexing to 0-indexing end-exclusive start-=1 else: start,end = 0, chrLen regions.append( GenomeRegion(genome, chr, start, end) )
def _isOldTypeChromDirectory(dirPath, genome): if dirPath[-1] == os.sep: dirPath = os.path.dirname(dirPath) dirName = os.path.basename(dirPath) return dirName in set(GenomeInfo.getExtendedChrList(genome)) and \ not any(os.path.isdir(os.path.join(dirPath, subFn)) for subFn in os.listdir(dirPath))
def _commonAllBoundingRegionsAreWholeChr(self, brShelve): for chr in GenomeInfo.getExtendedChrList(self.genome): for reg in brShelve.getAllBoundingRegionsForChr(chr): if not reg.isWholeChr(): return False return True
def getTotalElementCount(self): return sum(self.getTotalElementCountForChr(chr) for chr in GenomeInfo.getExtendedChrList(self._genome))