def getBoundingRegionInfo(self, region): self._updateContentsIfNecessary(region.chr) if region.chr in self._contents: brInfoHolder = self._contents[region.chr] #Temporary, to store old preprocessed boundingRegion.shelve files isDict = isinstance(brInfoHolder, dict) if isDict: brStarts = brInfoHolder.keys() else: brStarts = brInfoHolder.brStarts #idx = self._contents[region.chr].keys().bisect_right(region.start) idx = bisect_right(brStarts, region.start) if idx > 0: if isDict: brInfo = brInfoHolder[brStarts[idx - 1]] else: brInfo = brInfoHolder.brInfos[idx - 1] if region.start < brInfo.end and region.end <= brInfo.end: return brInfo if not self._minimalRegion == region: # #There are bounding regions in the same chromosome, but not any encompassing the user bin #Thus the bounding regions are explicitly defined (not just the complete chromosome) # from gtrackcore.util.CommonFunctions import prettyPrintTrackName raise OutsideBoundingRegionError("The analysis region '%s' is outside the bounding regions of track: %s" \ % (region, prettyPrintTrackName(self._trackName))) return BoundingRegionInfo(region.start, region.end, 0, 0, 0, 0)
def getTrackView(self, region): allowOverlaps = self._trackFormatReq.allowOverlaps() borderHandling = self._trackFormatReq.borderHandling() assert (allowOverlaps is not None) assert (borderHandling is not None) origTrackView = self._getRawTrackView(region, borderHandling, allowOverlaps) if self.formatConverters is None: self.formatConverters = getFormatConverters( origTrackView.trackFormat, self._trackFormatReq) if self.formatConverters == []: raise IncompatibleTracksError(prettyPrintTrackName(self.trackName) + ' with format: '\ + str(origTrackView.trackFormat) + ('(' + origTrackView.trackFormat._val + ')' if origTrackView.trackFormat._val else '') + \ ' does not satisfy ' + str(self._trackFormatReq)) if not self.formatConverters[0].canHandle(origTrackView.trackFormat, self._trackFormatReq): raise IncompatibleTracksError(getClassName(self.formatConverters[0]) +\ ' does not support conversion from ' + str(origTrackView.trackFormat) + \ ' to ' + str(self._trackFormatReq)) return self.formatConverters[0].convert(origTrackView)
def getBoundingRegionInfo(self, region): self._updateContentsIfNecessary(region.chr) if region.chr in self._contents: brInfoHolder = self._contents[region.chr] #Temporary, to store old preprocessed boundingRegion.shelve files isDict = isinstance(brInfoHolder, dict) if isDict: brStarts = brInfoHolder.keys() else: brStarts = brInfoHolder.brStarts #idx = self._contents[region.chr].keys().bisect_right(region.start) idx = bisect_right(brStarts, region.start) if idx > 0: if isDict: brInfo = brInfoHolder[brStarts[idx-1]] else: brInfo = brInfoHolder.brInfos[idx-1] if region.start < brInfo.end and region.end <= brInfo.end: return brInfo if not self._minimalRegion == region: # #There are bounding regions in the same chromosome, but not any encompassing the user bin #Thus the bounding regions are explicitly defined (not just the complete chromosome) # from gtrackcore.util.CommonFunctions import prettyPrintTrackName raise OutsideBoundingRegionError("The analysis region '%s' is outside the bounding regions of track: %s" \ % (region, prettyPrintTrackName(self._trackName))) return BoundingRegionInfo(region.start, region.end, 0, 0, 0, 0)
def getAllBoundingRegions(self): if not self.fileExists(): from gtrackcore.util.CommonFunctions import prettyPrintTrackName raise BoundingRegionsNotAvailableError('Bounding regions not available for track: ' + \ prettyPrintTrackName(self._trackName)) for chr in GenomeInfo.getExtendedChrList(self._genome): for reg in self.getAllBoundingRegionsForChr(chr): yield reg
def getAllBoundingRegions(self): if not self.fileExists(): from gtrackcore.util.CommonFunctions import prettyPrintTrackName raise BoundingRegionsNotAvailableError('Bounding regions not available for track: ' + \ prettyPrintTrackName(self._trackName)) for chr in GenomeInfo.getExtendedChrList(self._genome): for reg in self.getAllBoundingRegionsForChr(chr): yield reg
def _getRawTrackView(self, region, borderHandling, allowOverlaps): assert len(region) == 1 from collections import OrderedDict from gtrackcore.track.memmap.CommonMemmapFunctions import findEmptyVal from gtrackcore.track.core.TrackView import TrackView import numpy as np geSource = ExternalTrackManager.getGESourceFromGalaxyOrVirtualTN(self.trackName, region.genome) prefixList = geSource.getPrefixList() valDataType = geSource.getValDataType() valDim = geSource.getValDim() weightDataType = geSource.getEdgeWeightDataType() weightDim = geSource.getEdgeWeightDim() startList, endList, valList, strandList, idList, edgesList, weightsList = [None]*7 extraLists=OrderedDict() tf = TrackFormat.createInstanceFromPrefixList(prefixList, valDataType, valDim, \ weightDataType, weightDim) if allowOverlaps and (tf.isDense() or geSource.hasNoOverlappingElements()): raise IncompatibleTracksError(prettyPrintTrackName(self.trackName) + ' with format: '\ + str(tf) + ' does not satisfy ' + str(self._trackFormatReq)) denseAndInterval = tf.isDense() and tf.isInterval() numEls = 2 if denseAndInterval else 1 if valDataType == 'S': valDataType = 'S2' if weightDataType == 'S': weightDataType = 'S2' for prefix in prefixList: if prefix == 'start': startList = np.array([-1], dtype='int32') elif prefix == 'end': if denseAndInterval: endList = np.array([0, 1], dtype='int32') else: endList = np.array([0], dtype='int32') elif prefix == 'val': valList = np.array([findEmptyVal(valDataType)] * valDim * numEls, \ dtype=valDataType).reshape((numEls, valDim) if valDim > 1 else numEls) elif prefix == 'strand': strandList = np.array([1] * numEls, dtype='int8') elif prefix == 'id': idList = np.array([''] * numEls, dtype='S1') elif prefix == 'edges': edgesList = np.array([['']] * numEls, dtype='S1') elif prefix == 'weights': weightsList = np.array([[[findEmptyVal(weightDataType)]]] * weightDim * numEls, \ dtype=weightDataType).reshape((numEls, 1, weightDim) if weightDim > 1 else (numEls, 1)) else: extraLists[prefix] = np.array([''] * numEls, dtype='S1') return TrackView(region, startList, endList, valList, strandList, idList, edgesList, weightsList, borderHandling, allowOverlaps, extraLists)
def _getBoundingRegionShelve(self, trackName): if trackName in [None, []] or ExternalTrackManager.isVirtualTrack(trackName): brShelve = None else: brShelve = BoundingRegionShelve(self.genome, trackName, allowOverlaps=False) if not brShelve.fileExists(): raise BoundingRegionsNotAvailableError('Bounding regions not available for track: ' + \ prettyPrintTrackName(trackName)) return brShelve
def getTrackView(self, region): allowOverlaps = self._trackFormatReq.allowOverlaps() borderHandling = self._trackFormatReq.borderHandling() assert(allowOverlaps is not None) assert(borderHandling is not None) origTrackView = self._getRawTrackView(region, borderHandling, allowOverlaps) if self.formatConverters is None: self.formatConverters = getFormatConverters(origTrackView.trackFormat, self._trackFormatReq) if self.formatConverters == []: raise IncompatibleTracksError(prettyPrintTrackName(self.trackName) + ' with format: '\ + str(origTrackView.trackFormat) + ('(' + origTrackView.trackFormat._val + ')' if origTrackView.trackFormat._val else '') + \ ' does not satisfy ' + str(self._trackFormatReq)) if not self.formatConverters[0].canHandle(origTrackView.trackFormat, self._trackFormatReq): raise IncompatibleTracksError(getClassName(self.formatConverters[0]) +\ ' does not support conversion from ' + str(origTrackView.trackFormat) + \ ' to ' + str(self._trackFormatReq)) return self.formatConverters[0].convert(origTrackView)
def _getRawTrackView(self, region, borderHandling, allowOverlaps): assert len(region) == 1 from collections import OrderedDict from gtrackcore.track.memmap.CommonMemmapFunctions import findEmptyVal from gtrackcore.track.core.TrackView import TrackView import numpy as np geSource = ExternalTrackManager.getGESourceFromGalaxyOrVirtualTN( self.trackName, region.genome) prefixList = geSource.getPrefixList() valDataType = geSource.getValDataType() valDim = geSource.getValDim() weightDataType = geSource.getEdgeWeightDataType() weightDim = geSource.getEdgeWeightDim() startList, endList, valList, strandList, idList, edgesList, weightsList = [ None ] * 7 extraLists = OrderedDict() tf = TrackFormat.createInstanceFromPrefixList(prefixList, valDataType, valDim, \ weightDataType, weightDim) if allowOverlaps and (tf.isDense() or geSource.hasNoOverlappingElements()): raise IncompatibleTracksError(prettyPrintTrackName(self.trackName) + ' with format: '\ + str(tf) + ' does not satisfy ' + str(self._trackFormatReq)) denseAndInterval = tf.isDense() and tf.isInterval() numEls = 2 if denseAndInterval else 1 if valDataType == 'S': valDataType = 'S2' if weightDataType == 'S': weightDataType = 'S2' for prefix in prefixList: if prefix == 'start': startList = np.array([-1], dtype='int32') elif prefix == 'end': if denseAndInterval: endList = np.array([0, 1], dtype='int32') else: endList = np.array([0], dtype='int32') elif prefix == 'val': valList = np.array([findEmptyVal(valDataType)] * valDim * numEls, \ dtype=valDataType).reshape((numEls, valDim) if valDim > 1 else numEls) elif prefix == 'strand': strandList = np.array([1] * numEls, dtype='int8') elif prefix == 'id': idList = np.array([''] * numEls, dtype='S1') elif prefix == 'edges': edgesList = np.array([['']] * numEls, dtype='S1') elif prefix == 'weights': weightsList = np.array([[[findEmptyVal(weightDataType)]]] * weightDim * numEls, \ dtype=weightDataType).reshape((numEls, 1, weightDim) if weightDim > 1 else (numEls, 1)) else: extraLists[prefix] = np.array([''] * numEls, dtype='S1') return TrackView(region, startList, endList, valList, strandList, idList, edgesList, weightsList, borderHandling, allowOverlaps, extraLists)
if self.PASS_ON_EXCEPTIONS: raise else: self._printExceptionMsg(e, trackName, Error=False) except Exception, e: collector.removeEntry() if self.PASS_ON_EXCEPTIONS: raise else: self._printExceptionMsg(e, trackName, Error=True) self._calcAndStoreSubTrackCount(trackName) if self._raiseIfAnyWarnings and len(self._warningTrackNames) > 0: raise Warning('Warnings occurred in the following tracks: ' + \ ', '.join(prettyPrintTrackName(tn) for tn in self._warningTrackNames)) return atLeastOneFinalized def _allTrackNames(self): raise AbstractClassError def _allGESourceManagers(self, trackName, allowOverlaps): collector = PreProcMetaDataCollector(self._genome, trackName) if allowOverlaps == False and collector.overlapRuleHasBeenFinalized(True): for i in range(1): self._status = 'Trying to prepare preprocessing for track "%s"' % ':'.join(trackName) + \ (' (allowOverlaps: %s)' % allowOverlaps) yield self._getGESourceManagerFromTrack(trackName) else: for geSource in self._allGESources(trackName): if allowOverlaps == True:
if self.PASS_ON_EXCEPTIONS: raise else: self._printExceptionMsg(e, trackName, Error=False) except Exception, e: collector.removeEntry() if self.PASS_ON_EXCEPTIONS: raise else: self._printExceptionMsg(e, trackName, Error=True) self._calcAndStoreSubTrackCount(trackName) if self._raiseIfAnyWarnings and len(self._warningTrackNames) > 0: raise Warning('Warnings occurred in the following tracks: ' + \ ', '.join(prettyPrintTrackName(tn) for tn in self._warningTrackNames)) return atLeastOneFinalized def _allTrackNames(self): raise AbstractClassError def _allGESourceManagers(self, trackName, allowOverlaps): collector = PreProcMetaDataCollector(self._genome, trackName) if allowOverlaps == False and collector.overlapRuleHasBeenFinalized( True): for i in range(1): self._status = 'Trying to prepare preprocessing for track "%s"' % ':'.join(trackName) + \ (' (allowOverlaps: %s)' % allowOverlaps) yield self._getGESourceManagerFromTrack(trackName) else: for geSource in self._allGESources(trackName):