def _assertIsCompatibleWith(self, tfReq, reqList): for start in [None, []]: for end in [None, []]: for val in [None, []]: for strand in [None, []]: for id,edges,weights in [(None,None,None), ([],None,None), ([],[],None), ([],[],[])]: for extra in [None, {'a':[],'b':[]}]: if [] in [start, end, val]: tf = TrackFormat(start, end, val, strand, id, edges, weights, extra) propList = [tf.isDense(), tf.isValued(), tf.isInterval(), tf.isLinked(), tf.hasStrand(), tf.hasId(), tf.isWeighted(), tf.hasExtra(), \ tf.getValTypeName() if tf.getValTypeName() != '' else False, \ tf.getWeightTypeName() if tf.getWeightTypeName() != '' else False, \ tf.getExtraNames() if tf.getExtraNames() != [] else False] isCompatible = (not False in [(r==None or r==p) for r,p in zip(reqList, propList)]) self.assertEqual(isCompatible, tfReq.isCompatibleWith(tf))
def _assertIsCompatibleWith(self, tfReq, reqList): for start in [None, []]: for end in [None, []]: for val in [None, []]: for strand in [None, []]: for id, edges, weights in [(None, None, None), ([], None, None), ([], [], None), ([], [], [])]: for extra in [None, {'a': [], 'b': []}]: if [] in [start, end, val]: tf = TrackFormat(start, end, val, strand, id, edges, weights, extra) propList = [tf.isDense(), tf.isValued(), tf.isInterval(), tf.isLinked(), tf.hasStrand(), tf.hasId(), tf.isWeighted(), tf.hasExtra(), \ tf.getValTypeName() if tf.getValTypeName() != '' else False, \ tf.getWeightTypeName() if tf.getWeightTypeName() != '' else False, \ tf.getExtraNames() if tf.getExtraNames() != [] else False] isCompatible = (not False in [ (r == None or r == p) for r, p in zip(reqList, propList) ]) self.assertEqual( isCompatible, tfReq.isCompatibleWith(tf))
class TrackView(object): def _handlePointsAndPartitions(self): #if self.trackFormat.isDense() and not self.trackFormat.reprIsDense(): if self.trackFormat.isPartitionOrStepFunction(): self._startList = self._endList[:-1] self._endList = self._endList[1:] if self._valList is not None: self._valList = self._valList[1:] if self._strandList is not None: self._strandList = self._strandList[1:] if self._idList is not None: self._idList = self._idList[1:] if self._edgesList is not None: self._edgesList = self._edgesList[1:] if self._weightsList is not None: self._weightsList = self._weightsList[1:] for key, extraList in self._extraLists.items(): if extraList is not None: self._extraLists[key] = extraList[1:] if self.trackFormat.isPoints(): self._endList = VirtualPointEnd(self._startList) def __init__(self, genomeAnchor, startList=None, endList=None, valList=None, strandList=None, idList=None, edgesList=None, weightsList=None, borderHandling='crop', allowOverlaps=False, extraLists=OrderedDict()): assert (startList is not None) or (endList is not None) or (valList is not None) or (edgesList is not None) assert borderHandling in ['crop'] self.genomeAnchor = genomeAnchor.getCopy() self.trackFormat = TrackFormat(startList, endList, valList, strandList, idList, edgesList, weightsList, extraLists) self.borderHandling = borderHandling self.allowOverlaps = allowOverlaps self._trackElement = TrackElement(self) #self._bpLevelArray = None self._startList = startList self._endList = endList self._valList = valList self._strandList = strandList self._idList = idList self._edgesList = edgesList self._weightsList = weightsList self._extraLists = copy(extraLists) self._handlePointsAndPartitions() if self._startList is None: self._trackElement.start = noneFunc if self._endList is None: self._trackElement.end = noneFunc if self._valList is None: self._trackElement.val = noneFunc if self._strandList is None: self._trackElement.strand = noneFunc if self._idList is None: self._trackElement.id = noneFunc if self._edgesList is None: self._trackElement.edges = noneFunc if self._weightsList is None: self._trackElement.weights = noneFunc self._updateNumListElements() for i, list in enumerate([self._startList, self._endList, self._valList, self._strandList, self._idList, self._edgesList, self._weightsList] \ + [extraList for extraList in self._extraLists.values()]): assert list is None or len(list) == self._numListElements, 'List (%s): ' % i + str(list) + ' (expected %s elements, found %s)' % (self._numListElements, len(list)) def __iter__(self): self._trackElement._index = -1 return self def _updateNumListElements(self): "" self._numListElements = self._computeNumListElements() if self.allowOverlaps and self._numListElements > 0: self._numIterElements = self._computeNumIterElements() else: self._numIterElements = self._numListElements def _computeNumListElements(self): for list in [self._startList, self._endList, self._valList, self._edgesList]: if list is not None: return len(list) raise ShouldNotOccurError def _computeNumIterElements(self): for list in [self._startList, self._endList, self._valList, self._edgesList]: if list is not None: if isinstance(list, numpy.ndarray): return len(self._removeStowawaysFromNumpyArray(list)) else: return sum(1 for x in self) raise ShouldNotOccurError def __len__(self): "" return self._bpSize() def getNumElements(self): return self._numIterElements def _bpSize(self): return len(self.genomeAnchor) def next(self): self._trackElement._index += 1 #To remove any blind passengers - segments entirely in front of genomeanchor, # but sorted after a larger segment crossing the border if self.allowOverlaps and not self.trackFormat.reprIsDense(): while self._trackElement._index < self._numListElements and self._endList[self._trackElement._index] <= self.genomeAnchor.start: #self._trackElement.end() <= 0: self._trackElement._index += 1 if self._trackElement._index < self._numListElements: return self._trackElement else: raise StopIteration def _findLeftIndex(self): leftIndex = 0 #remove track elements entirely to the left of the anchor while leftIndex < len(self._endList) and self._endList[leftIndex] <= self.genomeAnchor.start: leftIndex += 1 return leftIndex def _findRightIndex(self): rightIndex = self._numListElements while rightIndex > 0 and self._startList[rightIndex-1] >= self.genomeAnchor.end: rightIndex -= 1 return rightIndex def sliceElementsAccordingToGenomeAnchor(self): assert( not self.trackFormat.reprIsDense() ) self._doScatteredSlicing() def _doScatteredSlicing(self): leftIndex = self._findLeftIndex() rightIndex = self._findRightIndex() if self._bpSize() == 0: rightIndex = leftIndex self._startList = self._startList[leftIndex:rightIndex] self._endList = self._endList[leftIndex:rightIndex] if self._valList is not None: self._valList = self._valList[leftIndex:rightIndex] if self._strandList is not None: self._strandList = self._strandList[leftIndex:rightIndex] if self._idList is not None: self._idList = self._idList[leftIndex:rightIndex] if self._edgesList is not None: self._edgesList = self._edgesList[leftIndex:rightIndex] if self._weightsList is not None: self._weightsList = self._weightsList[leftIndex:rightIndex] for key, extraList in self._extraLists.items(): self._extraLists[key] = extraList[leftIndex:rightIndex] self._updateNumListElements() def _doDenseSlicing(self, i, j): if self._valList is not None: self._valList = self._valList[i:j] if self._strandList is not None: self._strandList = self._strandList[i:j] if self._idList is not None: self._idList = self._idList[i:j] if self._edgesList is not None: self._edgesList = self._edgesList[i:j] if self._weightsList is not None: self._weightsList = self._weightsList[i:j] for key, extraList in self._extraLists.items(): self._extraLists[key] = extraList[i:j] self._updateNumListElements() def __getslice__(self, i, j): slicedTV = TrackView(self.genomeAnchor, self._startList, self._endList, \ self._valList, self._strandList, self._idList, \ self._edgesList, self._weightsList, \ self.borderHandling, self.allowOverlaps, \ extraLists=self._extraLists) slicedTV.trackFormat = self.trackFormat slicedTV.genomeAnchor.start += i if j>=0: try: slicedTV.genomeAnchor.end = min(self.genomeAnchor.end, self.genomeAnchor.start + j) except FloatingPointError: # Caused by trackView[:] with self.genomeAnchor.start > 0 slicedTV.genomeAnchor.end = self.genomeAnchor.end if j<0: slicedTV.genomeAnchor.end += j if self.trackFormat.reprIsDense(): slicedTV._doDenseSlicing(i,j) else: slicedTV._doScatteredSlicing() return slicedTV def _getBpLevelModificationArray(self, indexes, vals): bpLevelMod = numpy.bincount(indexes, vals) origLen = len(bpLevelMod) bpLevelMod.resize(self._bpSize()+1) bpLevelMod[origLen:] = 0 return bpLevelMod def _commonGetBpLevelArray(self, vals): if self.trackFormat.reprIsDense(): if self.allowOverlaps: raise ShouldNotOccurError() return vals else: bpLevelArray = numpy.zeros(self._bpSize()+1) numElements = self.getNumElements() if numElements > 0: bpLevelArray += self._getBpLevelModificationArray(self.startsAsNumpyArray(), vals) bpLevelArray -= self._getBpLevelModificationArray(self.endsAsNumpyArray(), vals) bpLevelArray = bpLevelArray.cumsum(dtype='float64') return bpLevelArray[:-1] def getBinaryBpLevelArray(self): vals = numpy.ones(self.getNumElements(), dtype='int32') return numpy.array(self._commonGetBpLevelArray(vals), dtype='bool8') def getCoverageBpLevelArray(self): vals = numpy.ones(self.getNumElements(), dtype='int32') return numpy.array(self._commonGetBpLevelArray(vals), dtype='int32') def getValueBpLevelArray(self, voidValue=0): ''' Creates a bp-level function of any valued track. In case of scattered tracks, uncovered aras are filled with voidValue (which would typically be set to 0 or numpy.nan). In the case of overlapping regions, the values are added.''' assert self.trackFormat.isValued('number'), self.trackFormat vals = self.valsAsNumpyArray() bpLevelArray = numpy.array(self._commonGetBpLevelArray(vals), dtype=vals.dtype) if voidValue != 0: bpLevelArray[~self.getBinaryBpLevelArray()] = voidValue return bpLevelArray def _removeStowawaysFromNumpyArray(self, numpyArray): ''' To remove any stowaways - segments entirely in front of genomeanchor, but sorted after a larger segment crossing the border. ''' if self.allowOverlaps and len(numpyArray) > 0: numpyArray = numpyArray[numpy.where(self._endList > self.genomeAnchor.start)] return numpyArray def _commonAsNumpyArray(self, numpyArray, numpyArrayModMethod, name): assert(self.borderHandling in ['crop']) if numpyArray is None: return None numpyArray = self._removeStowawaysFromNumpyArray(numpyArray) if numpyArrayModMethod is not None: return numpyArrayModMethod(numpyArray) else: return numpyArray def startsAsNumpyArray(self): return self._commonAsNumpyArray(self._startList, self._startListModMethod, 'starts') def _startListModMethod(self, startList): return numpy.maximum(startList - self.genomeAnchor.start, \ numpy.zeros(len(startList), dtype='int32')) def endsAsNumpyArray(self): return self._commonAsNumpyArray(self._endList, self._endListModMethod, 'ends') def _endListModMethod(self, endList): return numpy.minimum(endList - self.genomeAnchor.start, \ numpy.zeros(len(endList), dtype='int32') + len(self.genomeAnchor)) def valsAsNumpyArray(self): return self._commonAsNumpyArray(self._valList, None, 'vals') def strandsAsNumpyArray(self): return self._commonAsNumpyArray(self._strandList, None, 'strands') def idsAsNumpyArray(self): return self._commonAsNumpyArray(self._idList, None, 'ids') def edgesAsNumpyArray(self): return self._commonAsNumpyArray(self._edgesList, None, 'edges') def weightsAsNumpyArray(self): return self._commonAsNumpyArray(self._weightsList, None, 'weights') def extrasAsNumpyArray(self, key): assert self.hasExtra(key) from functools import partial return self._commonAsNumpyArray(self._extraLists[key], None, 'extras') def allExtrasAsDictOfNumpyArrays(self): return OrderedDict([(key,self.extrasAsNumpyArray(key)) for key in self._extraLists]) def hasExtra(self, key): return key in self._extraLists