Esempio n. 1
0
class AnalysisDefJob(StatJob):
    '''
    @takes(AnalysisDefJob, str, list, list, UserBinSource)
    @returns Results
    '''
    def __init__(self, analysisDef, trackName1, trackName2, userBinSource,
                 genome=None, galaxyFn=None, *args, **kwArgs):
        from gold.description.Analysis import Analysis

        #  to be removed later.. Just for convenience with development now..
        self._analysisDef = analysisDef
        #  self._trackName1 = trackName1
        #  self._trackName2 = trackName2
        
        if genome is None:
            genome = userBinSource.genome
            
        self._galaxyFn = galaxyFn
            
        self._analysis = Analysis(analysisDef, genome, trackName1, trackName2)
        self._setRandomSeedIfNeeded()
            
        track, track2 = self._analysis.getTracks()
        StatJob.__init__(self, userBinSource, track, track2,
                         self._analysis.getStat(), *args, **kwArgs)

    def _setRandomSeedIfNeeded(self):
        from gold.util.RandomUtil import getRandomSeed
        randSeedChoice = self._analysis.getChoice('randomSeed')
        if randSeedChoice == 'Random':
            self._analysis.changeChoices('randomSeed', [(str(getRandomSeed()),)*2])
    
    def run(self, printProgress=PRINT_PROGRESS):
        '''
        Runs the statistic specified in self._analysis (from analysisDef) and returns an object of class Result
        
        '''
        #Should be there for batch runs.. Should never happen from GUI..
        if self._statClass == None:
            self._handleMissingStat()
            return None

        if DebugConfig.USE_PROFILING:
            from gold.util.Profiler import Profiler
            profiler = Profiler()
            resDict = {}
            profiler.run('resDict[0] = StatJob.run(self, printProgress=printProgress)', globals(), locals())
            res = resDict[0]
        else:
            res = StatJob.run(self, printProgress=printProgress)
        
        res.setAnalysis(self._analysis)
        res.setAnalysisText(str(self._analysis))
        
        ResultsMemoizer.flushStoredResults()
        
        if DebugConfig.USE_PROFILING:
            profiler.printStats()
            if DebugConfig.USE_CALLGRAPH and self._galaxyFn:
                profiler.printLinkToCallGraph(['profile_AnalysisDefJob'], self._galaxyFn)
        
        return res

    def _handleMissingStat(self):
        from gold.application.LogSetup import logMessage, logging
        from gold.description.RunDescription import RunDescription
        import gold.description.Analysis as AnalysisModule
        #AnalysisModule.VERBOSE = True
        msg = 'Started run with invalid statistic... Please run with debug mode set to "Debug by raising hidden ' \
              'exceptions" to see underlying problem. ' \
              'Def: ' + self._analysisDef
                    #+ ', Run description: ' + \
                    #RunDescription.getRevEngBatchLine( self._trackName1, self._trackName2, self._analysisDef, \
                                                      #'Not Available', 'Not Available', self._userBinSource.genome)
        logMessage(msg, level=logging.ERROR)
        raise Exception(msg)
Esempio n. 2
0
    def getRevEngBatchLine(analysisDef, trackNames, cleanedTrackNames, regSpec,
                           binSpec, genome, **kwArgs):
        #analysisDef is assumed to be unquoted

        #if this is to work, must check explicitly against special keywords  in regSpec (or check that regSpec is a valid region that is to have region..)...
        #if not genome in regSpec:
        #    regSpec = genome+':'+regSpec
        try:
            if DebugConfig.VERBOSE:
                logMessage('getting RevEngBatchLine:')
            #analysisDef =analysisDef.replace('%20PointCountInSegsPvalStat%2C','') #REMOVE
            #print 'NOWAG: ',analysisDef

            analysis = Analysis(analysisDef, genome, cleanedTrackNames[0],
                                cleanedTrackNames[1], **kwArgs)

            #assert unquote(regSpec) == regSpec
            assert unquote(
                binSpec
            ) == binSpec  #To assure that unquote can be safely applied to binSpec without any consequences (we don't want to always quote, but still want the possibility to use quoted history track names)
            quotedTrackName1 = (':'.join(
                [quote(x, safe='') for x in trackNames[0]]))
            quotedTrackName2 = (':'.join([
                quote(x, safe='') for x in trackNames[1]
            ])) if trackNames[1] is not None else 'None'
            intensityChoice = analysis.getChoice('trackNameIntensity')
            if intensityChoice:
                quotedIntensityTrackName = quote(intensityChoice, safe='^|')
                analysis.changeChoices('trackNameIntensity',
                                       [(quotedIntensityTrackName, ) * 2])

            stat = analysis.getStat()
            if stat is None:
                return 'No corr batch line, as no valid statistic was found.. '
            #print 'CAME HERE'
            statClassName = stat.__name__
            #fixme: Add space, but this is not checked in batchrunner...
            params = ','.join(['='.join(choicePair) for choicePair in analysis.getAllChoices(filterByActivation=True).items() \
                                 if choicePair[0] not in ['H0','H1_more','H1_less','H1_different','H1_ha1','H1_ha2','H1_ha3','H1_ha4','H1_ha5'] ])
            statText = statClassName + '(' + params + ')'

            batchElements = [
                genome, regSpec, binSpec, quotedTrackName1, quotedTrackName2,
                statText
            ]
            #batchElements = [el.replace(BATCH_COL_SEPARATOR, '\\' + BATCH_COL_SEPARATOR) for el in batchElements]
            #batchElements = [quote(el, safe='') for el in batchElements]
            oneLineBatch = BATCH_COL_SEPARATOR.join(batchElements)

            #return oneLineBatch
            #Under construction...:
            from collections import OrderedDict
            #batchVariables = OrderedDict([('@GENOME',genome), ('@REGION',regSpec), ('@BINNING',binSpec), ('@TN1',tn1), ('@TN2',tn2), ('@ANALYSIS',statText)])
            batchVariables = OrderedDict([('@REGION', regSpec),
                                          ('@BINNING', binSpec),
                                          ('@TN1', quotedTrackName1),
                                          ('@TN2', quotedTrackName2),
                                          ('@ANALYSIS', statText)])
            batchComposition = BATCH_COL_SEPARATOR.join([genome] +
                                                        batchVariables.keys())
            fullBatchList = [
                '='.join(assignment) for assignment in batchVariables.items()
            ] + [batchComposition]
            fullBatch = '<br>'.join(fullBatchList)

            batchLinkDef = '<a href="%s/hyper?mako=generictool&tool_id=hb_batch_run_tool&command=%s&dbkey=%s">%s</a>'
            oneLineBatchLink = batchLinkDef % (URL_PREFIX, quote(oneLineBatch),
                                               genome, 'single line version')
            fullBatchLink = batchLinkDef % (URL_PREFIX,
                                            quote('\n'.join(fullBatchList)),
                                            genome, 'variable based version')

            #return oneLineBatch + '<br><br>or corresponding spec using variable assignment:<br><br>' + fullBatch + '<br><br>Execute batchline in ' \
            #+ oneLineBatchLink + ' / ' + fullBatchLink
            return oneLineBatch + '<br><br>Execute batchline in ' + oneLineBatchLink + ' / ' + fullBatchLink

        except Exception, e:
            #raise
            logException(e, logging.WARNING,
                         'Could not generate corresponding batch line: ')
            #if DebugConfig.VERBOSE:
            logMessage('analysis, genome, trackName1, trackName2: \n' +
                       str([analysis, genome, trackNames[0], trackNames[1]]))
            return 'Warning: Could not generate corresponding batch line.'
Esempio n. 3
0
class AnalysisDefJob(StatJob):
    #@takes(AnalysisDefJob, str, list, list, UserBinSource)
    #@returns Results
    def __init__(self, analysisDef, trackName1, trackName2, userBinSource, genome=None, *args, **kwArgs):        
        from gold.description.Analysis import Analysis
    
        #to be removed later.. Just for convenience with development now.. 
        self._analysisDef = analysisDef
        #self._trackName1 = trackName1
        #self._trackName2 = trackName2
        
        if genome is None:
            genome = userBinSource.genome
            
        self._analysis = Analysis(analysisDef, genome, trackName1, trackName2)
        self._setRandomSeedIfNeeded()
            
        track, track2 = self._analysis.getTracks()
        StatJob.__init__(self, userBinSource, track, track2, self._analysis.getStat(), *args, **kwArgs)
    
    def _setRandomSeedIfNeeded(self):
        from gold.util.RandomUtil import getRandomSeed
        randSeedChoice = self._analysis.getChoice('randomSeed')
        if randSeedChoice == 'Random':
            self._analysis.changeChoices('randomSeed', [(str(getRandomSeed()),)*2])
    
    def run(self, printProgress=PRINT_PROGRESS):
        '''
        Runs the statistic specified in self._analysis (from analysisDef) and returns an object of class Result
        
        '''
        #Should be there for batch runs.. Should never happen from GUI..
        if self._statClass == None:
            self._handleMissingStat()
            return None

        if USE_PROFILING:
            profiler = Profiler()
            resDict = {}
            profiler.run('resDict[0] = StatJob.run(self, printProgress=printProgress)', globals(), locals())
            res = resDict[0]
        else:
            res = StatJob.run(self, printProgress=printProgress)
        res.setAnalysis(self._analysis)
        res.setAnalysisText(str(self._analysis))
        
        ResultsMemoizer.flushStoredResults()
        if USE_PROFILING:
            profiler.printStats()
        
        return res

    def _handleMissingStat(self):
        from gold.application.LogSetup import logMessage, logging
        from gold.description.RunDescription import RunDescription
        import gold.description.Analysis as AnalysisModule
        #AnalysisModule.VERBOSE = True
        msg = 'Started run with invalid statistic... Def: ' + self._analysisDef
                    #+ ', Run description: ' + \
                    #RunDescription.getRevEngBatchLine( self._trackName1, self._trackName2, self._analysisDef, \
                                                      #'Not Available', 'Not Available', self._userBinSource.genome)
        logMessage(msg, level=logging.ERROR)
        raise Exception(msg)