def getBoundingRegionInfo(self, region):
        self._updateContentsIfNecessary(region.chr)

        if region.chr in self._contents:
            brInfoHolder = self._contents[region.chr]

            #Temporary, to store old preprocessed boundingRegion.shelve files
            isDict = isinstance(brInfoHolder, dict)
            if isDict:
                brStarts = brInfoHolder.keys()
            else:
                brStarts = brInfoHolder.brStarts

            #idx = self._contents[region.chr].keys().bisect_right(region.start)
            idx = bisect_right(brStarts, region.start)

            if idx > 0:
                if isDict:
                    brInfo = brInfoHolder[brStarts[idx - 1]]
                else:
                    brInfo = brInfoHolder.brInfos[idx - 1]

                if region.start < brInfo.end and region.end <= brInfo.end:
                    return brInfo

            if not self._minimalRegion == region:
                #
                #There are bounding regions in the same chromosome, but not any encompassing the user bin
                #Thus the bounding regions are explicitly defined (not just the complete chromosome)
                #
                from gtrackcore.util.CommonFunctions import prettyPrintTrackName
                raise OutsideBoundingRegionError("The analysis region '%s' is outside the bounding regions of track: %s" \
                                                 % (region, prettyPrintTrackName(self._trackName)))

        return BoundingRegionInfo(region.start, region.end, 0, 0, 0, 0)
Example #2
0
    def getTrackView(self, region):
        allowOverlaps = self._trackFormatReq.allowOverlaps()
        borderHandling = self._trackFormatReq.borderHandling()
        assert (allowOverlaps is not None)
        assert (borderHandling is not None)

        origTrackView = self._getRawTrackView(region, borderHandling,
                                              allowOverlaps)

        if self.formatConverters is None:
            self.formatConverters = getFormatConverters(
                origTrackView.trackFormat, self._trackFormatReq)

        if self.formatConverters == []:
            raise IncompatibleTracksError(prettyPrintTrackName(self.trackName) + ' with format: '\
                                          + str(origTrackView.trackFormat) +
                                          ('(' + origTrackView.trackFormat._val + ')' if origTrackView.trackFormat._val else '') + \
                                          ' does not satisfy ' + str(self._trackFormatReq))

        if not self.formatConverters[0].canHandle(origTrackView.trackFormat,
                                                  self._trackFormatReq):
            raise IncompatibleTracksError(getClassName(self.formatConverters[0]) +\
                                          ' does not support conversion from ' + str(origTrackView.trackFormat) + \
                                          ' to ' + str(self._trackFormatReq))
        return self.formatConverters[0].convert(origTrackView)
 def getBoundingRegionInfo(self, region):
     self._updateContentsIfNecessary(region.chr)
     
     if region.chr in self._contents:
         brInfoHolder = self._contents[region.chr]
         
         #Temporary, to store old preprocessed boundingRegion.shelve files
         isDict = isinstance(brInfoHolder, dict)
         if isDict:
             brStarts = brInfoHolder.keys()
         else:
             brStarts = brInfoHolder.brStarts
             
         #idx = self._contents[region.chr].keys().bisect_right(region.start)
         idx = bisect_right(brStarts, region.start)
         
         if idx > 0:
             if isDict:
                 brInfo = brInfoHolder[brStarts[idx-1]]
             else:
                 brInfo = brInfoHolder.brInfos[idx-1]
             
             if region.start < brInfo.end and region.end <= brInfo.end:
                 return brInfo
                 
         if not self._minimalRegion == region:
             #
             #There are bounding regions in the same chromosome, but not any encompassing the user bin
             #Thus the bounding regions are explicitly defined (not just the complete chromosome)
             #
             from gtrackcore.util.CommonFunctions import prettyPrintTrackName
             raise OutsideBoundingRegionError("The analysis region '%s' is outside the bounding regions of track: %s" \
                                              % (region, prettyPrintTrackName(self._trackName)))
     
     return BoundingRegionInfo(region.start, region.end, 0, 0, 0, 0)
    def getAllBoundingRegions(self):
        if not self.fileExists():
            from gtrackcore.util.CommonFunctions import prettyPrintTrackName
            raise BoundingRegionsNotAvailableError('Bounding regions not available for track: ' + \
                prettyPrintTrackName(self._trackName))

        for chr in GenomeInfo.getExtendedChrList(self._genome):
            for reg in self.getAllBoundingRegionsForChr(chr):
                yield reg
 def getAllBoundingRegions(self):
     if not self.fileExists():
         from gtrackcore.util.CommonFunctions import prettyPrintTrackName
         raise BoundingRegionsNotAvailableError('Bounding regions not available for track: ' + \
             prettyPrintTrackName(self._trackName))
     
     for chr in GenomeInfo.getExtendedChrList(self._genome):
         for reg in self.getAllBoundingRegionsForChr(chr):
             yield reg
Example #6
0
    def _getRawTrackView(self, region, borderHandling, allowOverlaps):
        assert len(region) == 1
        
        from collections import OrderedDict
        from gtrackcore.track.memmap.CommonMemmapFunctions import findEmptyVal
        from gtrackcore.track.core.TrackView import TrackView
        import numpy as np
        
        geSource = ExternalTrackManager.getGESourceFromGalaxyOrVirtualTN(self.trackName, region.genome)
        prefixList = geSource.getPrefixList()
        valDataType = geSource.getValDataType()
        valDim = geSource.getValDim()
        weightDataType = geSource.getEdgeWeightDataType()
        weightDim = geSource.getEdgeWeightDim()

        startList, endList, valList, strandList, idList, edgesList, weightsList = [None]*7
        extraLists=OrderedDict()
        
        tf = TrackFormat.createInstanceFromPrefixList(prefixList, valDataType, valDim, \
                                                      weightDataType, weightDim)
        if allowOverlaps and (tf.isDense() or geSource.hasNoOverlappingElements()):
            raise IncompatibleTracksError(prettyPrintTrackName(self.trackName) + ' with format: '\
                                          + str(tf) + ' does not satisfy ' + str(self._trackFormatReq))
        
        denseAndInterval = tf.isDense() and tf.isInterval()
        numEls = 2 if denseAndInterval else 1
        
        if valDataType == 'S':
            valDataType = 'S2'
        if weightDataType == 'S':
            weightDataType = 'S2'
        
        for prefix in prefixList:
            if prefix == 'start':
                startList = np.array([-1], dtype='int32')
            elif prefix == 'end':
                if denseAndInterval:
                    endList = np.array([0, 1], dtype='int32')
                else:
                    endList = np.array([0], dtype='int32')
            elif prefix == 'val':
                valList = np.array([findEmptyVal(valDataType)] * valDim * numEls, \
                                   dtype=valDataType).reshape((numEls, valDim) if valDim > 1 else numEls)
            elif prefix == 'strand':
                strandList = np.array([1] * numEls, dtype='int8')
            elif prefix == 'id':
                idList = np.array([''] * numEls, dtype='S1')
            elif prefix == 'edges':
                edgesList = np.array([['']] * numEls, dtype='S1')
            elif prefix == 'weights':
                weightsList = np.array([[[findEmptyVal(weightDataType)]]] * weightDim * numEls, \
                                       dtype=weightDataType).reshape((numEls, 1, weightDim) if weightDim > 1 else (numEls, 1))
            else:
                extraLists[prefix] = np.array([''] * numEls, dtype='S1')
        
        return TrackView(region, startList, endList, valList, strandList, idList, edgesList, weightsList, borderHandling, allowOverlaps, extraLists)
 def _getBoundingRegionShelve(self, trackName):
     if trackName in [None, []] or ExternalTrackManager.isVirtualTrack(trackName):
         brShelve = None
     else:
         brShelve = BoundingRegionShelve(self.genome, trackName, allowOverlaps=False)
         if not brShelve.fileExists():
             raise BoundingRegionsNotAvailableError('Bounding regions not available for track: ' + \
                 prettyPrintTrackName(trackName))
     
     return brShelve
Example #8
0
 def getTrackView(self, region):
     allowOverlaps = self._trackFormatReq.allowOverlaps()
     borderHandling = self._trackFormatReq.borderHandling()
     assert(allowOverlaps is not None) 
     assert(borderHandling is not None) 
     
     origTrackView = self._getRawTrackView(region, borderHandling, allowOverlaps)
     
     if self.formatConverters is None:
         self.formatConverters = getFormatConverters(origTrackView.trackFormat, self._trackFormatReq)
     
     if self.formatConverters == []:
         raise IncompatibleTracksError(prettyPrintTrackName(self.trackName) + ' with format: '\
                                       + str(origTrackView.trackFormat) +
                                       ('(' + origTrackView.trackFormat._val + ')' if origTrackView.trackFormat._val else '') + \
                                       ' does not satisfy ' + str(self._trackFormatReq))
     
     if not self.formatConverters[0].canHandle(origTrackView.trackFormat, self._trackFormatReq):
         raise IncompatibleTracksError(getClassName(self.formatConverters[0]) +\
                                       ' does not support conversion from ' + str(origTrackView.trackFormat) + \
                                       ' to ' + str(self._trackFormatReq))
     return self.formatConverters[0].convert(origTrackView)
Example #9
0
    def _getRawTrackView(self, region, borderHandling, allowOverlaps):
        assert len(region) == 1

        from collections import OrderedDict
        from gtrackcore.track.memmap.CommonMemmapFunctions import findEmptyVal
        from gtrackcore.track.core.TrackView import TrackView
        import numpy as np

        geSource = ExternalTrackManager.getGESourceFromGalaxyOrVirtualTN(
            self.trackName, region.genome)
        prefixList = geSource.getPrefixList()
        valDataType = geSource.getValDataType()
        valDim = geSource.getValDim()
        weightDataType = geSource.getEdgeWeightDataType()
        weightDim = geSource.getEdgeWeightDim()

        startList, endList, valList, strandList, idList, edgesList, weightsList = [
            None
        ] * 7
        extraLists = OrderedDict()

        tf = TrackFormat.createInstanceFromPrefixList(prefixList, valDataType, valDim, \
                                                      weightDataType, weightDim)
        if allowOverlaps and (tf.isDense()
                              or geSource.hasNoOverlappingElements()):
            raise IncompatibleTracksError(prettyPrintTrackName(self.trackName) + ' with format: '\
                                          + str(tf) + ' does not satisfy ' + str(self._trackFormatReq))

        denseAndInterval = tf.isDense() and tf.isInterval()
        numEls = 2 if denseAndInterval else 1

        if valDataType == 'S':
            valDataType = 'S2'
        if weightDataType == 'S':
            weightDataType = 'S2'

        for prefix in prefixList:
            if prefix == 'start':
                startList = np.array([-1], dtype='int32')
            elif prefix == 'end':
                if denseAndInterval:
                    endList = np.array([0, 1], dtype='int32')
                else:
                    endList = np.array([0], dtype='int32')
            elif prefix == 'val':
                valList = np.array([findEmptyVal(valDataType)] * valDim * numEls, \
                                   dtype=valDataType).reshape((numEls, valDim) if valDim > 1 else numEls)
            elif prefix == 'strand':
                strandList = np.array([1] * numEls, dtype='int8')
            elif prefix == 'id':
                idList = np.array([''] * numEls, dtype='S1')
            elif prefix == 'edges':
                edgesList = np.array([['']] * numEls, dtype='S1')
            elif prefix == 'weights':
                weightsList = np.array([[[findEmptyVal(weightDataType)]]] * weightDim * numEls, \
                                       dtype=weightDataType).reshape((numEls, 1, weightDim) if weightDim > 1 else (numEls, 1))
            else:
                extraLists[prefix] = np.array([''] * numEls, dtype='S1')

        return TrackView(region, startList, endList, valList, strandList,
                         idList, edgesList, weightsList, borderHandling,
                         allowOverlaps, extraLists)
                if self.PASS_ON_EXCEPTIONS:
                    raise
                else:
                    self._printExceptionMsg(e, trackName, Error=False)
            except Exception, e:
                collector.removeEntry()
                if self.PASS_ON_EXCEPTIONS:
                    raise
                else:
                    self._printExceptionMsg(e, trackName, Error=True)

            self._calcAndStoreSubTrackCount(trackName)

        if self._raiseIfAnyWarnings and len(self._warningTrackNames) > 0:
            raise Warning('Warnings occurred in the following tracks: ' + \
                          ', '.join(prettyPrintTrackName(tn) for tn in self._warningTrackNames))
        return atLeastOneFinalized

    def _allTrackNames(self):
        raise AbstractClassError

    def _allGESourceManagers(self, trackName, allowOverlaps):
        collector = PreProcMetaDataCollector(self._genome, trackName)
        if allowOverlaps == False and collector.overlapRuleHasBeenFinalized(True):
            for i in range(1):
                self._status = 'Trying to prepare preprocessing for track "%s"' % ':'.join(trackName) + \
                                (' (allowOverlaps: %s)' % allowOverlaps)
                yield self._getGESourceManagerFromTrack(trackName)
        else:
            for geSource in self._allGESources(trackName):
                if allowOverlaps == True:
Example #11
0
                if self.PASS_ON_EXCEPTIONS:
                    raise
                else:
                    self._printExceptionMsg(e, trackName, Error=False)
            except Exception, e:
                collector.removeEntry()
                if self.PASS_ON_EXCEPTIONS:
                    raise
                else:
                    self._printExceptionMsg(e, trackName, Error=True)

            self._calcAndStoreSubTrackCount(trackName)

        if self._raiseIfAnyWarnings and len(self._warningTrackNames) > 0:
            raise Warning('Warnings occurred in the following tracks: ' + \
                          ', '.join(prettyPrintTrackName(tn) for tn in self._warningTrackNames))
        return atLeastOneFinalized

    def _allTrackNames(self):
        raise AbstractClassError

    def _allGESourceManagers(self, trackName, allowOverlaps):
        collector = PreProcMetaDataCollector(self._genome, trackName)
        if allowOverlaps == False and collector.overlapRuleHasBeenFinalized(
                True):
            for i in range(1):
                self._status = 'Trying to prepare preprocessing for track "%s"' % ':'.join(trackName) + \
                                (' (allowOverlaps: %s)' % allowOverlaps)
                yield self._getGESourceManagerFromTrack(trackName)
        else:
            for geSource in self._allGESources(trackName):