Beispiel #1
0
    def getTrackView(self, region):
        allowOverlaps = self._trackFormatReq.allowOverlaps()
        borderHandling = self._trackFormatReq.borderHandling()
        assert (allowOverlaps is not None)
        assert (borderHandling is not None)

        origTrackView = self._getRawTrackView(region, borderHandling,
                                              allowOverlaps)

        if self.formatConverters is None:
            self.formatConverters = getFormatConverters(
                origTrackView.trackFormat, self._trackFormatReq)

        if self.formatConverters == []:
            raise IncompatibleTracksError(prettyPrintTrackName(self.trackName) + ' with format: '\
                                          + str(origTrackView.trackFormat) +
                                          ('(' + origTrackView.trackFormat._val + ')' if origTrackView.trackFormat._val else '') + \
                                          ' does not satisfy ' + str(self._trackFormatReq))

        if not self.formatConverters[0].canHandle(origTrackView.trackFormat,
                                                  self._trackFormatReq):
            raise IncompatibleTracksError(getClassName(self.formatConverters[0]) +\
                                          ' does not support conversion from ' + str(origTrackView.trackFormat) + \
                                          ' to ' + str(self._trackFormatReq))
        return self.formatConverters[0].convert(origTrackView)
Beispiel #2
0
 def addFormatReq(self, requestedTrackFormat):
     prevFormatReq = self._trackFormatReq
     self._trackFormatReq = TrackFormatReq.merge(self._trackFormatReq,
                                                 requestedTrackFormat)
     if self._trackFormatReq is None:
         raise IncompatibleTracksError(str(prevFormatReq ) + \
                                       ' is incompatible with additional ' + str(requestedTrackFormat))
Beispiel #3
0
    def _compute(self):
        rawData = self._children[0].getResult()
        ends = rawData.endsAsNumpyArray()
        starts = rawData.startsAsNumpyArray()
        catSequence = rawData.valsAsNumpyArray()
        if catSequence is None:
            raise IncompatibleTracksError()

        catSet = numpy.unique(catSequence)
        res = {}
        for cat in catSet:
            filter = (catSequence == cat)
            if rawData.trackFormat.reprIsDense():
                res[cat] = filter.sum()
            else:
                #print 'BpCoverage..: ',ends, starts, catSequence, catSet, type(catSequence), filter
                #res[cat] = ends[filter].sum() - starts[filter].sum()
                catStarts = starts[filter]
                catEnds = ends[filter]

                totCoverage = catEnds.sum() - catStarts.sum()

                runningMaxEnds = numpy.maximum.accumulate(catEnds)
                tempArray1 = runningMaxEnds[:-1] - catStarts[1:]
                tempArray2 = runningMaxEnds[:-1] - catEnds[1:]
                totOverlap = tempArray1[tempArray1 > 0].sum() - tempArray2[
                    tempArray2 > 0].sum()

                res[cat] = totCoverage - totOverlap

        return res
    def getTrackView(self, region):
        if self.formatConverters is None:
            self.formatConverters = getFormatConverters(
                self._tv.trackFormat, self._trackFormatReq)

        if self.formatConverters == []:
            raise IncompatibleTracksError('Track with format: '\
                                          + str(self._tv.trackFormat) +
                                          ('(' + self._tv.trackFormat._val + ')' if self._tv.trackFormat._val else '') + \
                                          ' does not satisfy ' + str(self._trackFormatReq))

        if not self.formatConverters[0].canHandle(self._tv.trackFormat,
                                                  self._trackFormatReq):
            raise IncompatibleTracksError(getClassName(self.formatConverters[0]) +\
                                          ' does not support conversion from ' + str(self._tv.trackFormat) + \
                                          ' to ' + str(self._trackFormatReq))
        return self.formatConverters[0].convert(self._tv[region.start - self._tv.genomeAnchor.start : \
                                                         region.end - self._tv.genomeAnchor.start])
Beispiel #5
0
    def _getRawTrackView(self, region, borderHandling, allowOverlaps):
        assert len(region) == 1

        from collections import OrderedDict
        from gold.track.CommonMemmapFunctions import findEmptyVal
        from gold.track.TrackView import TrackView
        import numpy as np

        geSource = ExternalTrackManager.getGESourceFromGalaxyOrVirtualTN(self.trackName, region.genome)
        prefixList = geSource.getPrefixList()
        valDataType = geSource.getValDataType()
        valDim = geSource.getValDim()
        weightDataType = geSource.getEdgeWeightDataType()
        weightDim = geSource.getEdgeWeightDim()

        startList, endList, valList, strandList, idList, edgesList, weightsList = [None]*7
        extraLists=OrderedDict()

        tf = TrackFormat.createInstanceFromPrefixList(prefixList, valDataType, valDim, \
                                                      weightDataType, weightDim)
        if allowOverlaps and (tf.isDense() or geSource.hasNoOverlappingElements()):
            raise IncompatibleTracksError(prettyPrintTrackName(self.trackName) + ' with format: '\
                                          + str(tf) + ' does not satisfy ' + str(self._trackFormatReq))

        denseAndInterval = tf.isDense() and tf.isInterval()
        numEls = 2 if denseAndInterval else 1

        if valDataType == 'S':
            valDataType = 'S2'
        if weightDataType == 'S':
            weightDataType = 'S2'

        for prefix in prefixList:
            if prefix == 'start':
                startList = np.array([-1], dtype='int32')
            elif prefix == 'end':
                if denseAndInterval:
                    endList = np.array([0, 1], dtype='int32')
                else:
                    endList = np.array([0], dtype='int32')
            elif prefix == 'val':
                valList = np.array([findEmptyVal(valDataType)] * valDim * numEls, \
                                   dtype=valDataType).reshape((numEls, valDim) if valDim > 1 else numEls)
            elif prefix == 'strand':
                strandList = np.array([1] * numEls, dtype='int8')
            elif prefix == 'id':
                idList = np.array([''] * numEls, dtype='S1')
            elif prefix == 'edges':
                edgesList = np.array([['']] * numEls, dtype='S1')
            elif prefix == 'weights':
                weightsList = np.array([[[findEmptyVal(weightDataType)]]] * weightDim * numEls, \
                                       dtype=weightDataType).reshape((numEls, 1, weightDim) if weightDim > 1 else (numEls, 1))
            else:
                extraLists[prefix] = np.array([''] * numEls, dtype='S1')

        return TrackView(region, startList, endList, valList, strandList, idList, edgesList, weightsList, borderHandling, allowOverlaps, extraLists)
Beispiel #6
0
    def _checkTrackFormat(self, origTV):
        # Commented out, as segments are not currently supported at all (end list is never returned)
        #
        #if origTV.trackFormat.isDense():
        #    raise IncompatibleTracksError()
        #
        #if origTV.trackFormat.isInterval():
        #    if not IS_EXPERIMENTAL_INSTALLATION:
        #        raise IncompatibleTracksError

        if origTV.trackFormat.isDense() or origTV.trackFormat.isInterval():
            raise IncompatibleTracksError()
Beispiel #7
0
    def _compute(self):
        rawData = self._children[0].getResult()
        starts = rawData.startsAsNumpyArray()
        catSequence = rawData.valsAsNumpyArray()
        if catSequence is None:
            raise IncompatibleTracksError()

        catSet = numpy.unique(catSequence)
        res = OrderedDict()
        for cat in catSet:
            filter = (catSequence == cat)
            res[cat] = len(numpy.unique(starts[filter]))
        return res
    def _validateAllTracksRead(self):
        if not self.hasResult():
            raise ShouldNotOccurError("At this stage, statistic should either have result, "
                                      "or exception should have been raised")

        tracks = self.getAllTracks()
        trackUniqueKeys = [Track(tr.trackName).getUniqueKey(self.getGenome()) for tr in tracks]

        for trackIndex, restTrackIndexes in allElementsVersusRest(xrange(len(trackUniqueKeys))):
            track = tracks[trackIndex]
            if track is not None and not track.hasBeenFlaggedAsRead():
                uniqueKeyForRestTracks = \
                    set(trackUniqueKeys[i] for i in restTrackIndexes)

                # If several tracks are the same, memory memoization will only result
                # in one RawDataStat being created, for one Track object. This is a
                # wanted optimization. In other cases, something is probably wrong if
                # a track has not been touched. However, this rule may be revisited
                # when track structure functionality is implemented.
                if trackUniqueKeys[trackIndex] not in uniqueKeyForRestTracks:
                    raise IncompatibleTracksError(
                        'Track ' + prettyPrintTrackName(track.trackName) +
                        ' was created, but not touched by statistic')
Beispiel #9
0
    def _determineStatClass(self):
        assert( hasattr(self, '_track') )
        assert( hasattr(self, '_track2') )
        dummyGESource = MinimalBinSource(self._genome)

        if len(self._statClassList)==0:
            #logging.getLogger(HB_LOGGER).warning('Stat class list is empty, for analysisDef: ' + self._analysisLine)
            if self._reversed:
                logMessage('Stat class list is empty, for analysisDef: ' + self._analysisLine, level = logging.WARNING)
            if DebugConfig.PASS_ON_VALIDSTAT_EXCEPTIONS:
                raise ShouldNotOccurError('Stat class list is empty. Analysisdef: '+self._analysisLine)
        
        for statClass in self._statClassList:
            if DebugConfig.VERBOSE:
                logMessage(statClass.__name__ + ': Trying (' + self.getDefAfterChoices() + ')')
#                print statClass.__name__ + ': Trying (' + self.getDefAfterChoices() + ')'

            #for reversed, trackA, trackB in [(False, self._track, self._track2), (True, self._track2, self._track) ]:

            trackA, trackB = self._track, self._track2
            if trackA is None:
                continue

            try:
                # The hackiest of all hacks!
                # TODO: reimplement together with TrackStructure
                job = StatJob(dummyGESource, trackA, trackB, statClass, minimal=True,
                              **self.getChoices(filterByActivation=True))
                stat = job._getSingleResult(dummyGESource[0])[-1]
                tracks = stat._tracks if hasattr(stat, '_tracks') else [trackA, trackB]
                trackUniqueKeys = [Track(tr.trackName).getUniqueKey(self._genome) for tr in tracks
                                   if tr is not None]

                StatJob(dummyGESource, trackA, trackB, statClass, minimal=True,
                        **self.getChoices(filterByActivation=True)).run(False)
                #In order not to mess up integration tests
                initSeed()

                for trackIndex, restTrackIndexes in allElementsVersusRest(xrange(len(tracks))):
                    track = tracks[trackIndex]
                    if track is not None and track.formatConverters is None:
                        uniqueKeyForRestTracks = \
                            set(trackUniqueKeys[i] for i in restTrackIndexes)

                        # If several tracks are the same, memory memoization will only result
                        # in one RawDataStat being created, for one Track object. This is a
                        # wanted optimization. In other cases, something is probably wrong if
                        # a track has not been touched. However, this rule may be revisited
                        # when track structure functionality is implemented.
                        if trackUniqueKeys[trackIndex] not in uniqueKeyForRestTracks:
                            raise IncompatibleTracksError(
                                'Track ' + prettyPrintTrackName(track.trackName) +
                                ' was created, but not touched by statistic')

            except IncompatibleTracksError, e:
                if DebugConfig.VERBOSE:
                    logException(e, level=logging.DEBUG,
                                 message='(Warning: error in _determineStatClass for stat: %s)' % statClass.__name__)
                if DebugConfig.PASS_ON_VALIDSTAT_EXCEPTIONS:
                    raise
            except (AssertionError, IncompatibleAssumptionsError, IdenticalTrackNamesError), e:
                if DebugConfig.VERBOSE:
                    logException(e, level=logging.DEBUG,
                                 message='(Warning: error in _determineStatClass for stat: %s)' % statClass.__name__)
                if DebugConfig.PASS_ON_VALIDSTAT_EXCEPTIONS:
                    raise
Beispiel #10
0
 def addFormatReq(self, requestedTrackFormat):
     if not self._ignoreTrackFormat and requestedTrackFormat != None and not requestedTrackFormat.isCompatibleWith(
             self._tv.trackFormat):
         raise IncompatibleTracksError(
             str(requestedTrackFormat) + ' not compatible with ' +
             str(self._tv.trackFormat))
 def _checkTrackFormat(self, origTV):
     if origTV.trackFormat.isDense():
         raise IncompatibleTracksError()
 def supportsTrackFormat(cls, origTrackFormat):
     if origTrackFormat.trackFormat.isDense():
         raise IncompatibleTracksError()
Beispiel #13
0
 def _checkTrackFormat(self, origTV):
     if not origTV.trackFormat.isValued():
         raise IncompatibleTracksError(str(origTV.trackFormat))