def loadTrackView(trackData, region, borderHandling, allowOverlaps, trackName=[]): """ trackData : see TrackSource.getTrackData {'id' : smartmemmap} region : see GenomeRegion """ #brShelve = BoundingRegionShelve(region.genome, trackName, allowOverlaps) brShelve = trackData.boundingRegionShelve brInfo = brShelve.getBoundingRegionInfo(region) if brShelve is not None else None extraArrayNames = [arrayName for arrayName in trackData if arrayName not in \ RESERVED_PREFIXES.keys() + ['leftIndex', 'rightIndex']] reservedArrays = [TrackViewLoader._getArray(trackData, arrayName, brInfo) for arrayName in RESERVED_PREFIXES] extraArrays = [TrackViewLoader._getArray(trackData, arrayName, brInfo) for arrayName in extraArrayNames] trackFormat = TrackFormat( *(reservedArrays + [OrderedDict(zip(extraArrayNames, extraArrays))]) ) if trackFormat.reprIsDense(): if brInfo is None: leftIndex = region.start rightIndex = region.end else: leftIndex = region.start - brInfo.start rightIndex = region.end - brInfo.start else: leftBin = CompBinManager.getBinNumber(region.start) rightBin = CompBinManager.getBinNumber(region.end-1) #leftBin = region.start/COMP_BIN_SIZE #rightBin = (region.end-1)/COMP_BIN_SIZE if trackData.get('leftIndex') is None or trackData.get('rightIndex') is None: raise IOError('Preprocessed track not found. TrackData: ' + ', '.join(trackData.keys())) leftIndex = TrackViewLoader._getArray(trackData, 'leftIndex', brInfo, leftBin) rightIndex = TrackViewLoader._getArray(trackData, 'rightIndex', brInfo, rightBin) slicedReservedArrays = [(array[leftIndex:rightIndex] if array is not None else None) for array in reservedArrays] slicedExtraArrays = [(array[leftIndex:rightIndex] if array is not None else None) for array in extraArrays] argList = [region] + slicedReservedArrays + [borderHandling, allowOverlaps] + [OrderedDict(zip(extraArrayNames, slicedExtraArrays))] tv = TrackView( *(argList) ) if not trackFormat.reprIsDense(): tv.sliceElementsAccordingToGenomeAnchor() #tv._doScatteredSlicing() return tv
class TrackView(object): def _handlePointsAndPartitions(self): #if self.trackFormat.isDense() and not self.trackFormat.reprIsDense(): if self.trackFormat.isPartitionOrStepFunction(): self._startList = self._endList[:-1] self._endList = self._endList[1:] if self._valList is not None: self._valList = self._valList[1:] if self._strandList is not None: self._strandList = self._strandList[1:] if self._idList is not None: self._idList = self._idList[1:] if self._edgesList is not None: self._edgesList = self._edgesList[1:] if self._weightsList is not None: self._weightsList = self._weightsList[1:] for key, extraList in self._extraLists.items(): if extraList is not None: self._extraLists[key] = extraList[1:] if self.trackFormat.isPoints(): self._endList = VirtualPointEnd(self._startList) def __init__(self, genomeAnchor, startList=None, endList=None, valList=None, strandList=None, idList=None, edgesList=None, weightsList=None, borderHandling='crop', allowOverlaps=False, extraLists=OrderedDict()): assert (startList is not None) or (endList is not None) or (valList is not None) or (edgesList is not None) assert borderHandling in ['crop'] self.genomeAnchor = genomeAnchor.getCopy() self.trackFormat = TrackFormat(startList, endList, valList, strandList, idList, edgesList, weightsList, extraLists) self.borderHandling = borderHandling self.allowOverlaps = allowOverlaps self._trackElement = TrackElement(self) #self._bpLevelArray = None self._startList = startList self._endList = endList self._valList = valList self._strandList = strandList self._idList = idList self._edgesList = edgesList self._weightsList = weightsList self._extraLists = copy(extraLists) self._handlePointsAndPartitions() if self._startList is None: self._trackElement.start = noneFunc if self._endList is None: self._trackElement.end = noneFunc if self._valList is None: self._trackElement.val = noneFunc if self._strandList is None: self._trackElement.strand = noneFunc if self._idList is None: self._trackElement.id = noneFunc if self._edgesList is None: self._trackElement.edges = noneFunc if self._weightsList is None: self._trackElement.weights = noneFunc self._updateNumListElements() for i, list in enumerate([self._startList, self._endList, self._valList, self._strandList, self._idList, self._edgesList, self._weightsList] \ + [extraList for extraList in self._extraLists.values()]): assert list is None or len(list) == self._numListElements, 'List (%s): ' % i + str(list) + ' (expected %s elements, found %s)' % (self._numListElements, len(list)) def __iter__(self): self._trackElement._index = -1 return self def _updateNumListElements(self): "" self._numListElements = self._computeNumListElements() if self.allowOverlaps and self._numListElements > 0: self._numIterElements = self._computeNumIterElements() else: self._numIterElements = self._numListElements def _computeNumListElements(self): for list in [self._startList, self._endList, self._valList, self._edgesList]: if list is not None: return len(list) raise ShouldNotOccurError def _computeNumIterElements(self): for list in [self._startList, self._endList, self._valList, self._edgesList]: if list is not None: if isinstance(list, numpy.ndarray): return len(self._removeStowawaysFromNumpyArray(list)) else: return sum(1 for x in self) raise ShouldNotOccurError def __len__(self): "" return self._bpSize() def getNumElements(self): return self._numIterElements def _bpSize(self): return len(self.genomeAnchor) def next(self): self._trackElement._index += 1 #To remove any blind passengers - segments entirely in front of genomeanchor, # but sorted after a larger segment crossing the border if self.allowOverlaps and not self.trackFormat.reprIsDense(): while self._trackElement._index < self._numListElements and self._endList[self._trackElement._index] <= self.genomeAnchor.start: #self._trackElement.end() <= 0: self._trackElement._index += 1 if self._trackElement._index < self._numListElements: return self._trackElement else: raise StopIteration def _findLeftIndex(self): leftIndex = 0 #remove track elements entirely to the left of the anchor while leftIndex < len(self._endList) and self._endList[leftIndex] <= self.genomeAnchor.start: leftIndex += 1 return leftIndex def _findRightIndex(self): rightIndex = self._numListElements while rightIndex > 0 and self._startList[rightIndex-1] >= self.genomeAnchor.end: rightIndex -= 1 return rightIndex def sliceElementsAccordingToGenomeAnchor(self): assert( not self.trackFormat.reprIsDense() ) self._doScatteredSlicing() def _doScatteredSlicing(self): leftIndex = self._findLeftIndex() rightIndex = self._findRightIndex() if self._bpSize() == 0: rightIndex = leftIndex self._startList = self._startList[leftIndex:rightIndex] self._endList = self._endList[leftIndex:rightIndex] if self._valList is not None: self._valList = self._valList[leftIndex:rightIndex] if self._strandList is not None: self._strandList = self._strandList[leftIndex:rightIndex] if self._idList is not None: self._idList = self._idList[leftIndex:rightIndex] if self._edgesList is not None: self._edgesList = self._edgesList[leftIndex:rightIndex] if self._weightsList is not None: self._weightsList = self._weightsList[leftIndex:rightIndex] for key, extraList in self._extraLists.items(): self._extraLists[key] = extraList[leftIndex:rightIndex] self._updateNumListElements() def _doDenseSlicing(self, i, j): if self._valList is not None: self._valList = self._valList[i:j] if self._strandList is not None: self._strandList = self._strandList[i:j] if self._idList is not None: self._idList = self._idList[i:j] if self._edgesList is not None: self._edgesList = self._edgesList[i:j] if self._weightsList is not None: self._weightsList = self._weightsList[i:j] for key, extraList in self._extraLists.items(): self._extraLists[key] = extraList[i:j] self._updateNumListElements() def __getslice__(self, i, j): slicedTV = TrackView(self.genomeAnchor, self._startList, self._endList, \ self._valList, self._strandList, self._idList, \ self._edgesList, self._weightsList, \ self.borderHandling, self.allowOverlaps, \ extraLists=self._extraLists) slicedTV.trackFormat = self.trackFormat slicedTV.genomeAnchor.start += i if j>=0: try: slicedTV.genomeAnchor.end = min(self.genomeAnchor.end, self.genomeAnchor.start + j) except FloatingPointError: # Caused by trackView[:] with self.genomeAnchor.start > 0 slicedTV.genomeAnchor.end = self.genomeAnchor.end if j<0: slicedTV.genomeAnchor.end += j if self.trackFormat.reprIsDense(): slicedTV._doDenseSlicing(i,j) else: slicedTV._doScatteredSlicing() return slicedTV def _getBpLevelModificationArray(self, indexes, vals): bpLevelMod = numpy.bincount(indexes, vals) origLen = len(bpLevelMod) bpLevelMod.resize(self._bpSize()+1) bpLevelMod[origLen:] = 0 return bpLevelMod def _commonGetBpLevelArray(self, vals): if self.trackFormat.reprIsDense(): if self.allowOverlaps: raise ShouldNotOccurError() return vals else: bpLevelArray = numpy.zeros(self._bpSize()+1) numElements = self.getNumElements() if numElements > 0: bpLevelArray += self._getBpLevelModificationArray(self.startsAsNumpyArray(), vals) bpLevelArray -= self._getBpLevelModificationArray(self.endsAsNumpyArray(), vals) bpLevelArray = bpLevelArray.cumsum(dtype='float64') return bpLevelArray[:-1] def getBinaryBpLevelArray(self): vals = numpy.ones(self.getNumElements(), dtype='int32') return numpy.array(self._commonGetBpLevelArray(vals), dtype='bool8') def getCoverageBpLevelArray(self): vals = numpy.ones(self.getNumElements(), dtype='int32') return numpy.array(self._commonGetBpLevelArray(vals), dtype='int32') def getValueBpLevelArray(self, voidValue=0): ''' Creates a bp-level function of any valued track. In case of scattered tracks, uncovered aras are filled with voidValue (which would typically be set to 0 or numpy.nan). In the case of overlapping regions, the values are added.''' assert self.trackFormat.isValued('number'), self.trackFormat vals = self.valsAsNumpyArray() bpLevelArray = numpy.array(self._commonGetBpLevelArray(vals), dtype=vals.dtype) if voidValue != 0: bpLevelArray[~self.getBinaryBpLevelArray()] = voidValue return bpLevelArray def _removeStowawaysFromNumpyArray(self, numpyArray): ''' To remove any stowaways - segments entirely in front of genomeanchor, but sorted after a larger segment crossing the border. ''' if self.allowOverlaps and len(numpyArray) > 0: numpyArray = numpyArray[numpy.where(self._endList > self.genomeAnchor.start)] return numpyArray def _commonAsNumpyArray(self, numpyArray, numpyArrayModMethod, name): assert(self.borderHandling in ['crop']) if numpyArray is None: return None numpyArray = self._removeStowawaysFromNumpyArray(numpyArray) if numpyArrayModMethod is not None: return numpyArrayModMethod(numpyArray) else: return numpyArray def startsAsNumpyArray(self): return self._commonAsNumpyArray(self._startList, self._startListModMethod, 'starts') def _startListModMethod(self, startList): return numpy.maximum(startList - self.genomeAnchor.start, \ numpy.zeros(len(startList), dtype='int32')) def endsAsNumpyArray(self): return self._commonAsNumpyArray(self._endList, self._endListModMethod, 'ends') def _endListModMethod(self, endList): return numpy.minimum(endList - self.genomeAnchor.start, \ numpy.zeros(len(endList), dtype='int32') + len(self.genomeAnchor)) def valsAsNumpyArray(self): return self._commonAsNumpyArray(self._valList, None, 'vals') def strandsAsNumpyArray(self): return self._commonAsNumpyArray(self._strandList, None, 'strands') def idsAsNumpyArray(self): return self._commonAsNumpyArray(self._idList, None, 'ids') def edgesAsNumpyArray(self): return self._commonAsNumpyArray(self._edgesList, None, 'edges') def weightsAsNumpyArray(self): return self._commonAsNumpyArray(self._weightsList, None, 'weights') def extrasAsNumpyArray(self, key): assert self.hasExtra(key) from functools import partial return self._commonAsNumpyArray(self._extraLists[key], None, 'extras') def allExtrasAsDictOfNumpyArrays(self): return OrderedDict([(key,self.extrasAsNumpyArray(key)) for key in self._extraLists]) def hasExtra(self, key): return key in self._extraLists
def loadTrackView(trackData, region, borderHandling, allowOverlaps, trackName=[]): """ trackData : see TrackSource.getTrackData {'id' : smartmemmap} region : see GenomeRegion """ #brShelve = BoundingRegionShelve(region.genome, trackName, allowOverlaps) brShelve = trackData.boundingRegionShelve brInfo = brShelve.getBoundingRegionInfo( region) if brShelve is not None else None extraArrayNames = [arrayName for arrayName in trackData if arrayName not in \ RESERVED_PREFIXES.keys() + ['leftIndex', 'rightIndex']] reservedArrays = [ TrackViewLoader._getArray(trackData, arrayName, brInfo) for arrayName in RESERVED_PREFIXES ] extraArrays = [ TrackViewLoader._getArray(trackData, arrayName, brInfo) for arrayName in extraArrayNames ] trackFormat = TrackFormat( *(reservedArrays + [OrderedDict(zip(extraArrayNames, extraArrays))])) if trackFormat.reprIsDense(): if brInfo is None: leftIndex = region.start rightIndex = region.end else: leftIndex = region.start - brInfo.start rightIndex = region.end - brInfo.start else: leftBin = CompBinManager.getBinNumber(region.start) rightBin = CompBinManager.getBinNumber(region.end - 1) #leftBin = region.start/COMP_BIN_SIZE #rightBin = (region.end-1)/COMP_BIN_SIZE if trackData.get('leftIndex') is None or trackData.get( 'rightIndex') is None: raise IOError('Preprocessed track not found. TrackData: ' + ', '.join(trackData.keys())) leftIndex = TrackViewLoader._getArray(trackData, 'leftIndex', brInfo, leftBin) rightIndex = TrackViewLoader._getArray(trackData, 'rightIndex', brInfo, rightBin) slicedReservedArrays = [ (array[leftIndex:rightIndex] if array is not None else None) for array in reservedArrays ] slicedExtraArrays = [ (array[leftIndex:rightIndex] if array is not None else None) for array in extraArrays ] argList = [region] + slicedReservedArrays + [ borderHandling, allowOverlaps ] + [OrderedDict(zip(extraArrayNames, slicedExtraArrays))] tv = TrackView(*(argList)) if not trackFormat.reprIsDense(): tv.sliceElementsAccordingToGenomeAnchor() #tv._doScatteredSlicing() return tv