Ejemplo n.º 1
0
    def __init__(self,
                 region,
                 track,
                 track2,
                 rawStatistic=None,
                 normalizationType='zeroToOne',
                 minimal=False,
                 **kwArgs):
        if minimal == True:
            self._globalSource = MinimalBinSource(region.genome)
        else:
            from gold.application.StatRunner import StatJob
            assert StatJob.USER_BIN_SOURCE is not None
            self._globalSource = StatJob.USER_BIN_SOURCE

        Statistic.__init__(self,
                           region,
                           track,
                           track2,
                           rawStatistic=rawStatistic,
                           normalizationType=normalizationType,
                           minimal=minimal,
                           **kwArgs)

        if type(rawStatistic) is str:
            from gold.statistic.AllStatistics import STAT_CLASS_DICT
            rawStatistic = STAT_CLASS_DICT[rawStatistic]

        self._rawStatistic = rawStatistic
        self._normalizationType = normalizationType
Ejemplo n.º 2
0
    def _determineStatClass(self, flushMemoized=True):
        assert( hasattr(self, '_track') )
        assert( hasattr(self, '_track2') )
        dummyGESource = MinimalBinSource(self._genome)

        if len(self._statClassList) == 0:
            # if self._reversed:
            logMessage('Stat class list is empty, for analysisDef: ' + self._analysisLine, level = logging.WARNING)
            if DebugConfig.PASS_ON_VALIDSTAT_EXCEPTIONS:
                raise ShouldNotOccurError('Stat class list is empty. Analysisdef: ' + self._analysisLine)
        
        for statClass in self._statClassList:
            if DebugConfig.VERBOSE:
                logMessage('Checking validity of stat class "{}" for analysisDef "{}".'.format(statClass.__name__, self.getDefAfterChoices()))

            trackA, trackB = self._track, self._track2
            if trackA is None:
                continue

            try:
                StatJob(dummyGESource, trackA, trackB, statClass, minimal=True,
                        **self.getAllChoices(filterByActivation=True)).run(False, flushMemoized=flushMemoized)

            except IncompatibleTracksError, e:
                if DebugConfig.VERBOSE:
                    logException(e, level=logging.DEBUG,
                                 messagePrefix='Warning: error in _determineStatClass for stat: %s' % statClass.__name__)
                if DebugConfig.PASS_ON_VALIDSTAT_EXCEPTIONS:
                    raise
            except (AssertionError, IncompatibleAssumptionsError, IdenticalTrackNamesError), e:
                if DebugConfig.VERBOSE:
                    logException(e, level=logging.DEBUG,
                                 messagePrefix='Warning: error in _determineStatClass for stat: %s' % statClass.__name__)
                if DebugConfig.PASS_ON_VALIDSTAT_EXCEPTIONS:
                    raise
Ejemplo n.º 3
0
    def _init(self, globalSource='', minimal=False):
        if isIter(self._region):
            raise SplittableStatNotAvailableError()

        if minimal:
            self._globalSource = MinimalBinSource(self._region.genome)
        elif globalSource == 'test':
            self._globalSource = UserBinSource(
                'TestGenome:chr21:10000000-15000000', '1000000')
        else:
            self._globalSource = GlobalBinSource(self._region.genome)
Ejemplo n.º 4
0
    def __init__(self, region, track, track2, globalSource='', minimal=False, **kwArgs):
        if isIter(region):
            raise SplittableStatNotAvailableError()

        if minimal == True:
            self._globalSource = MinimalBinSource(region.genome)
        elif globalSource == 'test':
            self._globalSource = UserBinSource('TestGenome:chr21:10000000-15000000','1000000')
        else:
            self._globalSource = GlobalBinSource(region.genome)
        
        super(self.__class__, self).__init__(region, track, track2, globalSource=globalSource, minimal=minimal, **kwArgs)
    def __init__(self, genome, trackName, allowOverlaps):
        assert allowOverlaps in [False, True]

        self._genome = genome
        self._trackName = trackName

        self._fn = createDirPath(
            trackName, genome,
            allowOverlaps=allowOverlaps) + os.sep + BR_SHELVE_FILE_NAME
        self._contents = {}  #None
        self._updatedChrs = set([])

        from quick.application.UserBinSource import MinimalBinSource
        minimalBinList = MinimalBinSource(genome)
        self._minimalRegion = minimalBinList[
            0] if minimalBinList is not None else None
 def getGlobalSource(globalSourceStr, genome, minimal):
     if minimal == True:
         return MinimalBinSource(genome)
     elif globalSourceStr == 'test':
         return UserBinSource('TestGenome:chr21:10000000-15000000',
                              '1000000')
     elif globalSourceStr == 'chrs':
         return GenomeInfo.getChrRegs(genome)
     elif globalSourceStr == 'chrarms':
         return GenomeInfo.getChrArmRegs(genome)
     elif globalSourceStr == 'ensembl':
         return GenomeInfo.getStdGeneRegs(genome)
     elif globalSourceStr == 'userbins':
         from gold.application.StatRunner import StatJob
         assert StatJob.USER_BIN_SOURCE is not None
         return StatJob.USER_BIN_SOURCE
         #return kwArgs['userBins']
     else:
         raise ShouldNotOccurError('globalSource not recognized')
Ejemplo n.º 7
0
    def _determineStatClass(self):
        assert( hasattr(self, '_track') )
        assert( hasattr(self, '_track2') )
        dummyGESource = MinimalBinSource(self._genome)

        if len(self._statClassList)==0:
            #logging.getLogger(HB_LOGGER).warning('Stat class list is empty, for analysisDef: ' + self._analysisLine)
            if self._reversed:
                logMessage('Stat class list is empty, for analysisDef: ' + self._analysisLine, level = logging.WARNING)
            if DebugConfig.PASS_ON_VALIDSTAT_EXCEPTIONS:
                raise ShouldNotOccurError('Stat class list is empty. Analysisdef: '+self._analysisLine)
        
        for statClass in self._statClassList:
            if DebugConfig.VERBOSE:
                logMessage(statClass.__name__ + ': Trying (' + self.getDefAfterChoices() + ')')
#                print statClass.__name__ + ': Trying (' + self.getDefAfterChoices() + ')'

            #for reversed, trackA, trackB in [(False, self._track, self._track2), (True, self._track2, self._track) ]:

            trackA, trackB = self._track, self._track2
            if trackA is None:
                continue

            try:
                # The hackiest of all hacks!
                # TODO: reimplement together with TrackStructure
                job = StatJob(dummyGESource, trackA, trackB, statClass, minimal=True,
                              **self.getChoices(filterByActivation=True))
                stat = job._getSingleResult(dummyGESource[0])[-1]
                tracks = stat._tracks if hasattr(stat, '_tracks') else [trackA, trackB]
                trackUniqueKeys = [Track(tr.trackName).getUniqueKey(self._genome) for tr in tracks
                                   if tr is not None]

                StatJob(dummyGESource, trackA, trackB, statClass, minimal=True,
                        **self.getChoices(filterByActivation=True)).run(False)
                #In order not to mess up integration tests
                initSeed()

                for trackIndex, restTrackIndexes in allElementsVersusRest(xrange(len(tracks))):
                    track = tracks[trackIndex]
                    if track is not None and track.formatConverters is None:
                        uniqueKeyForRestTracks = \
                            set(trackUniqueKeys[i] for i in restTrackIndexes)

                        # If several tracks are the same, memory memoization will only result
                        # in one RawDataStat being created, for one Track object. This is a
                        # wanted optimization. In other cases, something is probably wrong if
                        # a track has not been touched. However, this rule may be revisited
                        # when track structure functionality is implemented.
                        if trackUniqueKeys[trackIndex] not in uniqueKeyForRestTracks:
                            raise IncompatibleTracksError(
                                'Track ' + prettyPrintTrackName(track.trackName) +
                                ' was created, but not touched by statistic')

            except IncompatibleTracksError, e:
                if DebugConfig.VERBOSE:
                    logException(e, level=logging.DEBUG,
                                 message='(Warning: error in _determineStatClass for stat: %s)' % statClass.__name__)
                if DebugConfig.PASS_ON_VALIDSTAT_EXCEPTIONS:
                    raise
            except (AssertionError, IncompatibleAssumptionsError, IdenticalTrackNamesError), e:
                if DebugConfig.VERBOSE:
                    logException(e, level=logging.DEBUG,
                                 message='(Warning: error in _determineStatClass for stat: %s)' % statClass.__name__)
                if DebugConfig.PASS_ON_VALIDSTAT_EXCEPTIONS:
                    raise
Ejemplo n.º 8
0
 def isMinimalBin(region):
     from quick.application.UserBinSource import MinimalBinSource
     if not isIter(region) and region == MinimalBinSource(region.genome)[0]:
         return True