def __init__(self, region, track, track2, rawStatistic=None, normalizationType='zeroToOne', minimal=False, **kwArgs): if minimal == True: self._globalSource = MinimalBinSource(region.genome) else: from gold.application.StatRunner import StatJob assert StatJob.USER_BIN_SOURCE is not None self._globalSource = StatJob.USER_BIN_SOURCE Statistic.__init__(self, region, track, track2, rawStatistic=rawStatistic, normalizationType=normalizationType, minimal=minimal, **kwArgs) if type(rawStatistic) is str: from gold.statistic.AllStatistics import STAT_CLASS_DICT rawStatistic = STAT_CLASS_DICT[rawStatistic] self._rawStatistic = rawStatistic self._normalizationType = normalizationType
def _determineStatClass(self, flushMemoized=True): assert( hasattr(self, '_track') ) assert( hasattr(self, '_track2') ) dummyGESource = MinimalBinSource(self._genome) if len(self._statClassList) == 0: # if self._reversed: logMessage('Stat class list is empty, for analysisDef: ' + self._analysisLine, level = logging.WARNING) if DebugConfig.PASS_ON_VALIDSTAT_EXCEPTIONS: raise ShouldNotOccurError('Stat class list is empty. Analysisdef: ' + self._analysisLine) for statClass in self._statClassList: if DebugConfig.VERBOSE: logMessage('Checking validity of stat class "{}" for analysisDef "{}".'.format(statClass.__name__, self.getDefAfterChoices())) trackA, trackB = self._track, self._track2 if trackA is None: continue try: StatJob(dummyGESource, trackA, trackB, statClass, minimal=True, **self.getAllChoices(filterByActivation=True)).run(False, flushMemoized=flushMemoized) except IncompatibleTracksError, e: if DebugConfig.VERBOSE: logException(e, level=logging.DEBUG, messagePrefix='Warning: error in _determineStatClass for stat: %s' % statClass.__name__) if DebugConfig.PASS_ON_VALIDSTAT_EXCEPTIONS: raise except (AssertionError, IncompatibleAssumptionsError, IdenticalTrackNamesError), e: if DebugConfig.VERBOSE: logException(e, level=logging.DEBUG, messagePrefix='Warning: error in _determineStatClass for stat: %s' % statClass.__name__) if DebugConfig.PASS_ON_VALIDSTAT_EXCEPTIONS: raise
def _init(self, globalSource='', minimal=False): if isIter(self._region): raise SplittableStatNotAvailableError() if minimal: self._globalSource = MinimalBinSource(self._region.genome) elif globalSource == 'test': self._globalSource = UserBinSource( 'TestGenome:chr21:10000000-15000000', '1000000') else: self._globalSource = GlobalBinSource(self._region.genome)
def __init__(self, region, track, track2, globalSource='', minimal=False, **kwArgs): if isIter(region): raise SplittableStatNotAvailableError() if minimal == True: self._globalSource = MinimalBinSource(region.genome) elif globalSource == 'test': self._globalSource = UserBinSource('TestGenome:chr21:10000000-15000000','1000000') else: self._globalSource = GlobalBinSource(region.genome) super(self.__class__, self).__init__(region, track, track2, globalSource=globalSource, minimal=minimal, **kwArgs)
def __init__(self, genome, trackName, allowOverlaps): assert allowOverlaps in [False, True] self._genome = genome self._trackName = trackName self._fn = createDirPath( trackName, genome, allowOverlaps=allowOverlaps) + os.sep + BR_SHELVE_FILE_NAME self._contents = {} #None self._updatedChrs = set([]) from quick.application.UserBinSource import MinimalBinSource minimalBinList = MinimalBinSource(genome) self._minimalRegion = minimalBinList[ 0] if minimalBinList is not None else None
def getGlobalSource(globalSourceStr, genome, minimal): if minimal == True: return MinimalBinSource(genome) elif globalSourceStr == 'test': return UserBinSource('TestGenome:chr21:10000000-15000000', '1000000') elif globalSourceStr == 'chrs': return GenomeInfo.getChrRegs(genome) elif globalSourceStr == 'chrarms': return GenomeInfo.getChrArmRegs(genome) elif globalSourceStr == 'ensembl': return GenomeInfo.getStdGeneRegs(genome) elif globalSourceStr == 'userbins': from gold.application.StatRunner import StatJob assert StatJob.USER_BIN_SOURCE is not None return StatJob.USER_BIN_SOURCE #return kwArgs['userBins'] else: raise ShouldNotOccurError('globalSource not recognized')
def _determineStatClass(self): assert( hasattr(self, '_track') ) assert( hasattr(self, '_track2') ) dummyGESource = MinimalBinSource(self._genome) if len(self._statClassList)==0: #logging.getLogger(HB_LOGGER).warning('Stat class list is empty, for analysisDef: ' + self._analysisLine) if self._reversed: logMessage('Stat class list is empty, for analysisDef: ' + self._analysisLine, level = logging.WARNING) if DebugConfig.PASS_ON_VALIDSTAT_EXCEPTIONS: raise ShouldNotOccurError('Stat class list is empty. Analysisdef: '+self._analysisLine) for statClass in self._statClassList: if DebugConfig.VERBOSE: logMessage(statClass.__name__ + ': Trying (' + self.getDefAfterChoices() + ')') # print statClass.__name__ + ': Trying (' + self.getDefAfterChoices() + ')' #for reversed, trackA, trackB in [(False, self._track, self._track2), (True, self._track2, self._track) ]: trackA, trackB = self._track, self._track2 if trackA is None: continue try: # The hackiest of all hacks! # TODO: reimplement together with TrackStructure job = StatJob(dummyGESource, trackA, trackB, statClass, minimal=True, **self.getChoices(filterByActivation=True)) stat = job._getSingleResult(dummyGESource[0])[-1] tracks = stat._tracks if hasattr(stat, '_tracks') else [trackA, trackB] trackUniqueKeys = [Track(tr.trackName).getUniqueKey(self._genome) for tr in tracks if tr is not None] StatJob(dummyGESource, trackA, trackB, statClass, minimal=True, **self.getChoices(filterByActivation=True)).run(False) #In order not to mess up integration tests initSeed() for trackIndex, restTrackIndexes in allElementsVersusRest(xrange(len(tracks))): track = tracks[trackIndex] if track is not None and track.formatConverters is None: uniqueKeyForRestTracks = \ set(trackUniqueKeys[i] for i in restTrackIndexes) # If several tracks are the same, memory memoization will only result # in one RawDataStat being created, for one Track object. This is a # wanted optimization. In other cases, something is probably wrong if # a track has not been touched. However, this rule may be revisited # when track structure functionality is implemented. if trackUniqueKeys[trackIndex] not in uniqueKeyForRestTracks: raise IncompatibleTracksError( 'Track ' + prettyPrintTrackName(track.trackName) + ' was created, but not touched by statistic') except IncompatibleTracksError, e: if DebugConfig.VERBOSE: logException(e, level=logging.DEBUG, message='(Warning: error in _determineStatClass for stat: %s)' % statClass.__name__) if DebugConfig.PASS_ON_VALIDSTAT_EXCEPTIONS: raise except (AssertionError, IncompatibleAssumptionsError, IdenticalTrackNamesError), e: if DebugConfig.VERBOSE: logException(e, level=logging.DEBUG, message='(Warning: error in _determineStatClass for stat: %s)' % statClass.__name__) if DebugConfig.PASS_ON_VALIDSTAT_EXCEPTIONS: raise
def isMinimalBin(region): from quick.application.UserBinSource import MinimalBinSource if not isIter(region) and region == MinimalBinSource(region.genome)[0]: return True