def run(self, printProgress=PRINT_PROGRESS):
        '''
        Runs the statistic specified in self._analysis (from analysisDef) and returns an object of class Result
        
        '''
        #Should be there for batch runs.. Should never happen from GUI..
        if self._statClass == None:
            self._handleMissingStat()
            return None

        if USE_PROFILING:
            profiler = Profiler()
            resDict = {}
            profiler.run('resDict[0] = StatJob.run(self, printProgress=printProgress)', globals(), locals())
            res = resDict[0]
        else:
            res = StatJob.run(self, printProgress=printProgress)
        res.setAnalysis(self._analysis)
        res.setAnalysisText(str(self._analysis))
        
        ResultsMemoizer.flushStoredResults()
        if USE_PROFILING:
            profiler.printStats()
        
        return res
Exemple #2
0
    def getValidAnalysisDefFromTitle(cls, analysisTitle, genome, trackName1,
                                     trackName2):
        matchingAnalyses = [
            analysis for key, analysis in cls.getAllAnalysisTuples()
            if ':' in key and key[0:key.find(':')] == analysisTitle
        ]
        validAnalyses = []
        validReversedAnalyses = []

        for analysis in matchingAnalyses:
            analysis, reversed = cls._tryAnalysisForValidity(
                analysis, genome, trackName1, trackName2)
            if analysis is not None:
                if reversed:
                    validReversedAnalyses.append(analysis)
                else:
                    validAnalyses.append(analysis)
        ResultsMemoizer.flushStoredResults()

        if len(validAnalyses) == 1:
            return validAnalyses[0].getDef()
        elif len(validReversedAnalyses) == 1:
            return validReversedAnalyses[0].getDef()

#        logMessage('No analysisDef chosen. validAnalyses: %s, validReversedAnalyses: %s' \
#                   % ([x.getDef() for x in validAnalyses], [x.getDef() for x in validReversedAnalyses]))
        return ''
 def _compute(self):
     kwArgs = copy(self._kwArgs)
     if 'rawStatistic' in kwArgs:
         del kwArgs['rawStatistic']
         
     matrixElRes = []
     tr1Subtypes = ProcTrackOptions.getSubtypes(self.getGenome(), self._track.trackName, True)
     assert len(tr1Subtypes) > 0
     for subtype1 in tr1Subtypes:#['0','1']:
         for subtype2 in ['0','1']:
             tn1 = self._track.trackName + [subtype1]
             tn2 = self._track2.trackName + [subtype2]
             if not os.path.exists(createDirPath(tn1,self.getGenome())) or not os.path.exists(createDirPath(tn2,self.getGenome())):
                 raise IncompatibleTracksError
             
             #print ','
             track1 = Track( tn1)
             track1.formatConverters = self._track.formatConverters
             track2 = Track( tn2)
             track2.formatConverters = self._track2.formatConverters
             #self._addChild(self._rawStatistic(self._region, track1, track2, **kwArgs) )
             matrixElRes.append( self._rawStatistic(self._region, track1, track2, **kwArgs).getResult() )
             ResultsMemoizer.flushStoredResults()
     
     #assert len(self._children) == 7
     #return dict(zip( '00,01,10,11'.split(','), [x.getResult() for x in self._children[3:]]))
     
     allChildRes = array(matrixElRes)
     #allChildRes = array([x.getResult() for x in self._children[3:]])
     allChildRes = allChildRes.reshape((-1,2))
     return OrderedDict([('Matrix', allChildRes.tolist()), ('Rows', tr1Subtypes), ('Cols', ['Case','Control'])])
Exemple #4
0
    def run(self, printProgress=PRINT_PROGRESS):
        '''
        Runs the statistic specified in self._analysis (from analysisDef) and returns an object of class Result
        
        '''
        #Should be there for batch runs.. Should never happen from GUI..
        if self._statClass == None:
            self._handleMissingStat()
            return None

        if DebugConfig.USE_PROFILING:
            from gold.util.Profiler import Profiler
            profiler = Profiler()
            resDict = {}
            profiler.run('resDict[0] = StatJob.run(self, printProgress=printProgress)', globals(), locals())
            res = resDict[0]
        else:
            res = StatJob.run(self, printProgress=printProgress)
        
        res.setAnalysis(self._analysis)
        res.setAnalysisText(str(self._analysis))
        
        ResultsMemoizer.flushStoredResults()
        
        if DebugConfig.USE_PROFILING:
            profiler.printStats()
            if DebugConfig.USE_CALLGRAPH and self._galaxyFn:
                profiler.printLinkToCallGraph(['profile_AnalysisDefJob'], self._galaxyFn)
        
        return res
Exemple #5
0
 def _loadMemoized(self):
     self.resultLoadedFromDisk = False
     try:
         ResultsMemoizer.loadResult(self)
     except IOError, e:
         logMessageOnce(
             'No memoization due to IOError (probably because some other process are writing same data): '
             + str(e))
 def __iter__(self):
     job = self.job
     for bin in job._userBinSource:
         stat = job._statClass(bin, job._track, job._track2, **job._kwArgs)
         ResultsMemoizer.loadResult(stat)
         if hasattr(stat, "resultLoadedFromDisk") and stat.resultLoadedFromDisk:
             continue
         
         yield StatisticArgumentPickleWrapper(job._statClass, bin, job._track, job._track2, **job._kwArgs)
 def __iter__(self):
     job = self.job
     for bin in job._userBinSource:
         stat = job._statClass(bin, job._track, job._track2, **job._kwArgs)
         ResultsMemoizer.loadResult(stat)
         if stat.resultLoadedFromDisk():
             continue
         
         yield StatisticArgumentPickleWrapper(job._statClass, bin, job._track, job._track2, **job._kwArgs)
Exemple #8
0
 def _storeResult(self):
     try:
         ResultsMemoizer.storeResult(self)
     except IOError, e:
         #logging.getLogger(HB_LOGGER).debug('No memoization due to IOError (probably because some other process are reading/writing same data): ' + str(e))
         logExceptionOnce(
             e,
             message=
             'No memoization due to IOError (probably because some other process are reading/writing same data) '
         )
    def _compute(self):
        kwArgs = copy(self._kwArgs)
        if 'rawStatistic' in kwArgs:
            del kwArgs['rawStatistic']

        matrixElRes = []
        tr1Subtypes = ProcTrackOptions.getSubtypes(self.getGenome(),
                                                   self._track.trackName, True)
        tr2Subtypes = ProcTrackOptions.getSubtypes(self.getGenome(),
                                                   self._track2.trackName,
                                                   True)
        assert len(tr1Subtypes) > 0, str(self._track.trackName)
        assert len(tr2Subtypes) > 0, str(self._track2.trackName)
        if 'minimal' in self._kwArgs:
            tr1Subtypes = tr1Subtypes[:1]
            tr2Subtypes = tr2Subtypes[:1]
        for subtype1 in tr1Subtypes:  #['0','1']:
            #for subtype2 in ['0','1']:
            for subtype2 in tr2Subtypes:
                #                print ','
                tn1 = self._track.trackName + [subtype1]
                tn2 = self._track2.trackName + [subtype2]
                if not os.path.exists(createDirPath(
                        tn1, self.getGenome())) or not os.path.exists(
                            createDirPath(tn2, self.getGenome())):
                    raise IncompatibleTracksError

                #print ','
                track1 = Track(tn1)
                track1.formatConverters = self._track.formatConverters
                track2 = Track(tn2)
                track2.formatConverters = self._track2.formatConverters
                #self._addChild(self._rawStatistic(self._region, track1, track2, **kwArgs) )
                matrixElRes.append(
                    self._rawStatistic(self._region, track1, track2,
                                       **kwArgs).getResult())
            ResultsMemoizer.flushStoredResults()

        #assert len(self._children) == 7
        #return dict(zip( '00,01,10,11'.split(','), [x.getResult() for x in self._children[3:]]))

        allChildRes = array(matrixElRes)
        #allChildRes = array([x.getResult() for x in self._children[3:]])
        allChildRes = allChildRes.reshape((len(tr1Subtypes), len(tr2Subtypes)))
        return {
            'Result':
            OrderedDict([('Matrix', allChildRes.tolist()),
                         ('Rows', tr1Subtypes), ('Cols', tr2Subtypes)])
        }
Exemple #10
0
    def getValidAnalysesInCategory(
            category, genome, trackName1,
            trackName2):  #, formatConverter1=None, formatConverter2=None):
        #print 'AnalysisManager: ',trackName1
        validAnalyses = []
        for analysis in AnalysisManager.getAnalysisDict()[category].values():
            #from time import time
            #t = time()
            analysis, reversed = AnalysisManager._tryAnalysisForValidity(
                analysis, genome, trackName1, trackName2)
            #logMessage(analysisDef)
            #logMessage('%s' % (time() - t))
            if analysis is not None:
                validAnalyses.append(analysis)

        ResultsMemoizer.flushStoredResults()

        return validAnalyses
Exemple #11
0
    def _getSingleResult(self, region):
        #print 'Kw Here: ', self._kwArgs, 'args here: ', self._args
        
        stat = self._statClass(region, self._trackStructure, *self._args, **self._kwArgs)
        try:
            res = stat.getResult()
        except (CentromerError, NoneResultError):
            res = None
            if DebugConfig.PASS_ON_NONERESULT_EXCEPTIONS:  # @UndefinedVariable
                raise
            
        #if not isinstance(res, dict):
        if not getClassName(res) in ['dict', 'OrderedDict']:
            res = {} if res is None else {self.GENERAL_RESDICTKEY : res}
            #res = {self.GENERAL_RESDICTKEY : res}

        ResultsMemoizer.flushStoredResults()
        return res, stat
Exemple #12
0
class StatJobV2(StatJob):
    def __init__(self, userBinSource, trackStructure, statClass, *args,
                 **kwArgs):
        StatJob.USER_BIN_SOURCE = userBinSource
        #if 'userBins' in kwArgs:
        #    logMessage('key "userBins" already found in kwArgs in StatJob.__init__')
        #else:
        #    kwArgs['userBins'] = userBinSource
        self._userBinSource = userBinSource
        self._trackStructure = trackStructure
        self._statClass = statClass
        self._args = args
        self._kwArgs = kwArgs

        self._numUserBins = None

    @property
    def _track(self):
        if TrackStructure.QUERY_KEY not in self._trackStructure\
        or not self._trackStructure.getQueryTrackList():
            raise ShouldNotOccurError(
                'Track structure must contain a query list of at least one track'
            )
        return self._trackStructure.getQueryTrackList()[0]

    @property
    def _track2(self):
        if TrackStructure.REF_KEY in self._trackStructure\
        and self._trackStructure.getReferenceTrackList():
            return self._trackStructure.getReferenceTrackList()[0]

        return None

#     def _emptyResults(self):

    def _getSingleResult(self, region):
        stat = self._statClass(region, self._trackStructure, *self._args,
                               **self._kwArgs)
        try:
            res = stat.getResult()
        except (CentromerError, NoneResultError), e:
            res = None
            if DebugConfig.PASS_ON_NONERESULT_EXCEPTIONS:
                raise

        #if not isinstance(res, dict):
        if not getClassName(res) in ['dict', 'OrderedDict']:
            res = {} if res is None else {self.GENERAL_RESDICTKEY: res}
            #res = {self.GENERAL_RESDICTKEY : res}

        ResultsMemoizer.flushStoredResults()
        return res, stat
class StatisticTaskWrapper(TaskWrapper):
    def __init__(self):
        TaskWrapper.__init__(self)
        self.GENERAL_RESDICTKEY = "Result"
        self.referenceKeeper = None
        
    def handleTask(self, task):  
        stat = task.toStatistic()
        
        try:
            res = stat.getResult()
        except (CentromerError, NoneResultError),e:
            res = None
            
        if not isinstance(res, dict):
            res = {} if res is None else {self.GENERAL_RESDICTKEY : res}

        ResultsMemoizer.flushStoredResults()
        self.referenceKeeper = stat #keeps statistic from being garbage collected
        stat.afterComputeCleanup()        
        
        return self._createResultsDictFromMemoDict()
 def _loadMemoizedResult(self):
     self.resetResultLoadedFromDiskFlag()
     ResultsMemoizer.loadResult(self)
 def _storeMemoizedResult(self):
     ResultsMemoizer.storeResult(self)
 def _loadMinimalMemoizedResult(self):
     self.resetResultLoadedFromDiskFlag()
     return ResultsMemoizer.loadMinimalResult(self)
 def _storeMinimalMemoizedResult(self):
     ResultsMemoizer.storeMinimalResult(self)
 def _storeMinimalMemoizedError(self):
     ResultsMemoizer.storeMinimalError(self, sys.exc_info())
 def _loadMemoized(self):
     self.resultLoadedFromDisk = False
     try:
         ResultsMemoizer.loadResult(self)
     except IOError, e:
         logMessageOnce('No memoization due to IOError (probably because some other process are writing same data): ' + str(e))
Exemple #20
0
 def run(self, printProgress=PRINT_PROGRESS):
     res = StatJob.run(self, printProgress=printProgress)
     ResultsMemoizer.flushStoredResults()
     return res
Exemple #21
0
        stat = self._statClass(region, self._trackStructure, *self._args, **self._kwArgs)
        try:
            res = stat.getResult()
        except (CentromerError, NoneResultError),e:
            res = None
            if DebugConfig.VERBOSE:
                logException(e, level=logging.DEBUG)
            if DebugConfig.PASS_ON_NONERESULT_EXCEPTIONS:
                raise
            
        #if not isinstance(res, dict):
        if not getClassName(res) in ['dict', 'OrderedDict']:
            res = {} if res is None else {self.GENERAL_RESDICTKEY : res}
            #res = {self.GENERAL_RESDICTKEY : res}

        ResultsMemoizer.flushStoredResults()
        return res, stat
    
    def getNumUserBins(self):
        if self._numUserBins is None:
            self._numUserBins = sum(1 for el in self._userBinSource)
        return self._numUserBins

    def _checkNumUserBinsIsValid(self):
        numUserBins = self.getNumUserBins()
        if numUserBins < 1:
            raise InvalidFormatError('Zero analysis bins specified.')
            #return False
        elif numUserBins > MAX_NUM_USER_BINS and not self._avoidUbStatMemoization():
            raise InvalidFormatError('Maximum number of user bins exceeded - Maximum: '+str(MAX_NUM_USER_BINS)+ ', Requested: '+str(numUserBins))
            #return False
 def _storeResult(self):
     try:
         ResultsMemoizer.storeResult(self)
     except IOError, e:
         #logging.getLogger(HB_LOGGER).debug('No memoization due to IOError (probably because some other process are reading/writing same data): ' + str(e))
         logExceptionOnce(e, message='No memoization due to IOError (probably because some other process are reading/writing same data) ')
    def _getSingleResult(self, region):
        #print 'Kw Here: ', self._kwArgs
        stat = self._statClass(region, self._track, self._track2, *self._args, **self._kwArgs)
        try:
            res = stat.getResult()
        except (CentromerError, NoneResultError),e:
            res = None
            if DebugConfig.PASS_ON_NONERESULT_EXCEPTIONS:
                raise
            
        #if not isinstance(res, dict):
        if not getClassName(res) in ['dict', 'OrderedDict']:
            res = {} if res is None else {self.GENERAL_RESDICTKEY : res}
            #res = {self.GENERAL_RESDICTKEY : res}

        ResultsMemoizer.flushStoredResults()
        return res, stat
    
    def getNumUserBins(self):
        if self._numUserBins is None:
            self._numUserBins = sum(1 for el in self._userBinSource)
        return self._numUserBins

    def _checkNumUserBinsIsValid(self):
        numUserBins = self.getNumUserBins()
        if numUserBins < 1:
            raise InvalidFormatError('Zero analysis bins specified.')
            #return False
        elif numUserBins > MAX_NUM_USER_BINS and not self._avoidUbStatMemoization():
            raise InvalidFormatError('Maximum number of user bins exceeded - Maximum: '+str(MAX_NUM_USER_BINS)+ ', Requested: '+str(numUserBins))
            #return False
 def run(self, printProgress=PRINT_PROGRESS):
     res = StatJob.run(self, printProgress)
     ResultsMemoizer.flushStoredResults()
     return res
Exemple #25
0
class StatJob(object):
    GENERAL_RESDICTKEY = 'Result'
    USER_BIN_SOURCE = None

    #@takes(StatJob, UserBinSource, Track, Track, Statistic)
    #statClass will typically be a functools.partial object
    def __init__(self, userBinSource, track, track2, statClass, *args,
                 **kwArgs):
        #Not relevant, as minimal runs are anyway done
        #if StatJob.USER_BIN_SOURCE != None:
        #logMessage('USER_BIN_SOURCE already set in StatJob')

        StatJob.USER_BIN_SOURCE = userBinSource
        #if 'userBins' in kwArgs:
        #    logMessage('key "userBins" already found in kwArgs in StatJob.__init__')
        #else:
        #    kwArgs['userBins'] = userBinSource

        self._userBinSource = userBinSource
        self._track = track
        self._track2 = track2
        self._statClass = statClass
        self._args = args
        self._kwArgs = kwArgs
        self._numUserBins = None

    def _initProgress(self, printProgress):
        if hasattr(self._statClass, 'keywords'):
            #since kwArgs to Statistic usually has been wrapped in by functools.partial.
            statKwArgs = self._statClass.keywords
        else:
            statKwArgs = self._kwArgs

        from quick.statistic.McFdrSamplingStat import McFdrSamplingStat
        from quick.statistic.SequentialMcSamplingStat import SequentialMcSamplingStat
        #if self._kwArgs.get('minimal') == True or statKwArgs.get('silentProgress') == 'yes': #minimal is in kwArgs to StatJob
        if self._kwArgs.get(
                'minimal') == True:  #minimal is in kwArgs to StatJob
            progressClass = SilentProgress
        #elif self._kwArgs.get('numResamplings') < self._kwArgs.get('maxSamples'):
        #elif self._statClass.keywords.get('numResamplings') < self._statClass.keywords.get('maxSamples'): #since kwArgs to Statistic has been wrapped in by functools.partial.
        elif statKwArgs.get('mcSamplerClass') in [
                'McFdrSamplingStat', McFdrSamplingStat
        ]:
            progressClass = McFdrProgress
        elif statKwArgs.get('mcSamplerClass') in [
                'SequentialMcSamplingStat', SequentialMcSamplingStat
        ]:
            progressClass = SequentialMcProgress
        elif RandomizationManagerStat.getMcSamplingScheme(
                statKwArgs) == 'Sequential MC':
            progressClass = SequentialMcProgress
        elif RandomizationManagerStat.getMcSamplingScheme(
                statKwArgs) == 'MCFDR':
            progressClass = McFdrProgress
        else:
            #print 'KWARGS: ',self._kwArgs, self._args
            progressClass = StandardProgress

        #self._progress = progressClass(self.getNumUserBins(), printProgress, description=\
        #                    '<p><b>Analyzing ' + str(self._track.trackName) + \
        #                    (' vs ' + str(self._track2.trackName) if self._track2 is not None else '') + ' using statistic: ' + \
        #                    self._statClass.__name__ + '</b><br><br> Performing local analysis: <br>')
        if hasattr(self, '_analysis'):
            nspi = self._analysis.getChoice('numSamplesPerChunk')
        else:
            nspi = self._kwArgs.get('numSamplesPerChunk')

        self._progress = progressClass(self.getNumUserBins(), printProgress, description=\
                            '<b>Analyzing ' + str(self._track.trackName) + \
                            (' vs ' + str(self._track2.trackName) if self._track2 is not None else '') + ' using statistic: ' + \
                            self._statClass.__name__ + '</b>\n\nPerforming local analysis:',\
                            numSamplesPerIteration=nspi)
        self._progress.addCount(0)

    def run(self,
            reset=True,
            printProgress=PRINT_PROGRESS,
            flushMemoized=True):
        if reset:
            MagicStatFactory.resetMemoDict()

        results = self._emptyResults()
        try:
            self._checkNumUserBinsIsValid()
        except Exception, e:
            results.addError(e)
            return results

        self._initProgress(printProgress)

        if USE_PARALLEL and not ('minimal' in self._kwArgs
                                 and self._kwArgs['minimal']):
            from quick.application.parallel.JobHandler import JobHandler
            from quick.application.parallel.JobWrapper import JobWrapperFactory
            jobWrapper = JobWrapperFactory.makeJobWrapper(self)
            #If we are using another statistic wrapper (i.e. monte carlo), the parallelization happens
            #"further down" due to how the HB works and we do not do anything here
            if jobWrapper.__class__.__name__ == "StatisticJobWrapper":
                if 'uniqueId' in self._kwArgs:
                    uniqueId = self._kwArgs["uniqueId"]
                else:
                    uniqueId = datetime.datetime.now().strftime(
                        '%Y%m%d-%H%M%S%f')
                jobHandler = JobHandler(uniqueId, True, restrictions=[])
                jobHandler.run(jobWrapper)

        startLocal = time.time()

        try:
            while True:
                stats = self._doLocalAnalysis(results, stats=[])
                #stats[0] is used to call class method
                if self._kwArgs.get('minimal') == True:
                    break

                numNotDetermined = stats[
                    0].validateAndPossiblyResetLocalResults(
                        stats) if len(stats) > 0 else 0
                self._progress.addFullLocalAnalysisIteration(numNotDetermined)
                if len(stats) == 0 or numNotDetermined == 0:
                    break
                #print 'Continuing McFdr'

            localAnalysisTime = time.time() - startLocal
            if USE_PARALLEL and not ('minimal' in self._kwArgs
                                     and self._kwArgs['minimal']):
                print "local analysis took %f seconds" % localAnalysisTime

            #startGlobal = time.time()
            #import pdb
            #pdb.set_trace()
            self._progress.globalAnalysisStarted()
            self._progress.printMessage('\nPerforming global analysis...')
            while True:
                stat = self._doGlobalAnalysis(results, stats)
                if stat is None:
                    break
                nonDetermined, mValue, mThreshold, pValue, pThreshold = stat.validateAndPossiblyResetGlobalResult(
                    stat)
                self._progress.addGlobalAnalysisIteration(
                    mValue, mThreshold, pValue, pThreshold)

                if nonDetermined == 0:
                    break
        finally:
            if flushMemoized:
                ResultsMemoizer.flushStoredResults()

        self._progress.globalAnalysisEnded()
        #print "<br>global analysis took %f seconds" % (time.time() - startGlobal)
        self._endProgress()
        return results