def __str__(self):
        if self._str is not None:
            return self._str

        if self._results.isEmpty():
            if len(self._results.getAllErrors() ) > 0:
                return str(self._generateErrorText(HtmlCore))
            else:
                return str( HtmlCore().line('This analysis gave no results (might be due to too limited data). '))
                
        self._results.inferAdjustedPvalues()
        self._presenters = []
        if len(self._results.getAllRegionKeys()) > 0:
            self._addAllPresenters()        
        #print self._generateHeader()
        #print self._generateErrorText()
        #print self._generateTable(presenters)
        
        hideTable = False
        coreCls = HtmlCore
        
        try:
            if self._results.isSignificanceTesting():
                startText = self._generateAnswerText(coreCls)
                hideTable = True
            else:
                startText = self._generateHeader(coreCls)
        except Exception,e:
            startText = self._generateHeader(coreCls)
            logException(e, message='Error producing autogenerated result')
            logException(e,message='Error in auto-generated answer')
Example #2
0
 def __exit__(self, type, value, traceback):
     if type in [TooLargeBinError, TooSmallBinError, CentromerError]:
         logException(value)
     
     if type in [ZeroDivisionError, FloatingPointError, TypeError, ValueError, OutsideBoundingRegionError]:
         if DebugConfig.VERBOSE or type in [TypeError, ValueError]:
             logException(value, message='kwArgs: ' + str(self._kwArgs))
             return True
         if not DebugConfig.PASS_ON_COMPUTE_EXCEPTIONS:
             return True
Example #3
0
 def createChildren(self):
     if self.hasResult() or self._curChild is not None:
         return
     self._trace('_createChildren')
     #logMessage(str(self._bins))
     try:
         self._curChild = self._getChildObject(self._bins.next())
     except StopIteration,e:
         logException(e)
         raise ShouldNotOccurError('Splittable statistic should not have zero bins!')
 def syncH1WithTail(self):
     optionKeys = self.getAllOptionsAsKeys()
     if self.H1_KEY in optionKeys and self.TAIL_KEY in optionKeys:
         try:
             tailChoice = self.getChoice(self.TAIL_KEY)                
             self.setChoice(self.H1_KEY, tailChoice)
         
         except (ShouldNotOccurError), e:
             logException(e, logging.WARNING,'Could not find H1, probably mismatch between tail and H1 in analysisDef (tail choice: %s)' % self.getChoice(self.TAIL_KEY) )                
         except Exception, e:
             logException(e, logging.WARNING,'Could not find H1')
Example #5
0
    def _determineStatClass(self):
        assert( hasattr(self, '_track') )
        assert( hasattr(self, '_track2') )
        dummyGESource = MinimalBinSource(self._genome)

        if len(self._statClassList)==0:
            #logging.getLogger(HB_LOGGER).warning('Stat class list is empty, for analysisDef: ' + self._analysisLine)
            logMessage('Stat class list is empty, for analysisDef: ' + self._analysisLine, level = logging.WARNING)
            if DebugConfig.PASS_ON_VALIDSTAT_EXCEPTIONS:
                raise ShouldNotOccurError('Stat class list is empty. Analysisdef: '+self._analysisLine)
        
        for statClass in self._statClassList:
            if DebugConfig.VERBOSE:
                logMessage(statClass.__name__ + ': Trying (' + self.getDefAfterChoices() + ')')
#                print statClass.__name__ + ': Trying (' + self.getDefAfterChoices() + ')'

            #for reversed, trackA, trackB in [(False, self._track, self._track2), (True, self._track2, self._track) ]:
            for trackA, trackB in [[self._track, self._track2]]:
                if trackA == None:
                    continue

                try:
                    StatJob(dummyGESource, trackA, trackB, statClass, minimal=True, **self.getChoices()).run(False)
                    #In order not to mess up integration tests
                    initSeed()
                    for track in [trackA, trackB]:
                        if track is not None and track.formatConverters is None:
                            raise IncompatibleTracksError('Track ' + prettyPrintTrackName(track.trackName) +\
                                                          'was created, but not touched by statistic')
                    
                except IncompatibleTracksError, e:
                    if DebugConfig.PASS_ON_VALIDSTAT_EXCEPTIONS:
                        raise
                    if DebugConfig.VERBOSE:
                        logException(e, message='(Warning: error in _determineStatClass for stat: %s)' % statClass.__name__)
                    #if VERBOSE:
                    #    print 'Incompatible tracks: ', \
                    #          statClass.__name__ + ': ' + e.__class__.__name__ + ': ' + str(e)
                    #    print 'Incompatible: ', e
                except (AssertionError, IncompatibleAssumptionsError, IdenticalTrackNamesError), e:
                    if DebugConfig.PASS_ON_VALIDSTAT_EXCEPTIONS:
                        raise
                    if DebugConfig.VERBOSE:
                        logException(e, message='(Warning: error in _determineStatClass for stat: %s)' % statClass.__name__)
                    #if VERBOSE:
                    #    print 'Warning: exception in getStat: ', \
                    #        statClass.__name__ + ': ' + e.__class__.__name__ + ': ' + str(e)
                    #    traceback.print_exc(file=sys.stdout)
                except OSError, e:
                    if DebugConfig.PASS_ON_VALIDSTAT_EXCEPTIONS:
                        raise
                    elif not 'withOverlaps' in str(e):
                        raise
 def _addPresenter(self, presenterClass, sendHeader=False, **kwArgs):
     #print 'Generating figure: ',presenterClass.__name__,'<br>'
     try:
         pres = presenterClass(self._results, self._baseDir, *([self._getHeader()] if sendHeader else []), **kwArgs ) 
         self._presenters.append( pres)
         return pres
     except SilentError:
         return None
     except Exception,e:
         logException(e, WARNING, 'Error generating figure with ' + str(presenterClass.__name__))
         print 'Error generating figure with ', presenterClass.__name__, '(',Exception,' - ',e,')'
         return None
 def storePickledResults(self):
     try:
         from cPickle import dump
         pickleStaticFile = GalaxyRunSpecificFile(['results.pickle'],self._galaxyFn)
         #print 'TEMP1: PATH: ',pickleStaticFile.getDiskPath(True)
         from copy import copy
         pickleList = [copy(res) for res in self._resultsList]
         for res in pickleList:
             res._analysis=None
         dump(pickleList, pickleStaticFile.getFile())
         #dump(self._resultsList, pickleStaticFile.getFile())
     except Exception, e:
         logException(e, message='Not able to pickle results object')
Example #8
0
    def __exit__(self, type, value, traceback):
        if type in [TooLargeBinError, TooSmallBinError, CentromerError]:
            logException(value)

        if type in [
                ZeroDivisionError, FloatingPointError, TypeError, ValueError,
                OutsideBoundingRegionError
        ]:
            if DebugConfig.VERBOSE or type in [TypeError, ValueError]:
                logException(value,
                             level=logging.DEBUG,
                             message='kwArgs: ' + str(self._kwArgs))
                return True
            if not DebugConfig.PASS_ON_COMPUTE_EXCEPTIONS:
                return True
Example #9
0
 def _addPresenter(self, presenterClass, sendHeader=False, **kwArgs):
     #print 'Generating figure: ',presenterClass.__name__,'<br>'
     try:
         pres = presenterClass(self._results, self._baseDir,
                               *([self._getHeader()] if sendHeader else []),
                               **kwArgs)
         self._presenters.append(pres)
         return pres
     except SilentError:
         return None
     except Exception, e:
         logException(
             e, WARNING,
             'Error generating figure with ' + str(presenterClass.__name__))
         print 'Error generating figure with ', presenterClass.__name__, '(', Exception, ' - ', e, ')'
         return None
 def getRevEngBatchLine(trackName1, trackName2, cleanedTrackName1, cleanedTrackName2, analysisDef, \
                        regSpec, binSpec, genome, manualSeed, **kwArgs):
     #analysisDef is assumed to be unquoted
     
     #if this is to work, must check explicitly against special keywords  in regSpec (or check that regSpec is a valid region that is to have region..)...
     #if not genome in regSpec:
     #    regSpec = genome+':'+regSpec
     try:
         if DebugConfig.VERBOSE:
             logMessage('getting RevEngBatchLine:')
         #analysisDef =analysisDef.replace('%20PointCountInSegsPvalStat%2C','') #REMOVE
         #print 'NOWAG: ',analysisDef
         
         analysis = Analysis(analysisDef, genome, cleanedTrackName1, cleanedTrackName2, **kwArgs)
         stat = analysis.getStat()
         if stat is None:
             return 'No corr batch line, as no valid statistic was found.. '
         #print 'CAME HERE'
         statClassName = stat.__name__
         #fixme: Add space, but this is not checked in batchrunner...
         params = ','.join(['='.join([choicePair[0], str(manualSeed)]) \
                              if (manualSeed is not None and choicePair[0] == 'randomSeed' and choicePair[1] == 'Random')
                                 else '='.join(choicePair) \
                             for choicePair in analysis.getChoices().items() \
                              if choicePair[0] not in ['H0','H1_more','H1_less','H1_different','H1_ha1','H1_ha2','H1_ha3','H1_ha4','H1_ha5'] ])
         statText = statClassName + '(' + params + ')'
         #return BATCH_COL_SEPARATOR.join([regSpec, binSpec, \
         #                 (':'.join(trackName1)).replace(' ','_'),\
         #                 (':'.join(trackName2)).replace(' ','_') if trackName2 is not None else 'None',\
         #                 statText])
         #assert unquote(regSpec) == regSpec
         assert unquote(binSpec) == binSpec #To assure that unquote can be safely applied to binSpec without any consequences (we don't want to always quote, but still want the possibility to use quoted history track names)
         batchElements = [genome, regSpec, binSpec, \
                          (':'.join([quote(x, safe='') for x in trackName1])),\
                          (':'.join([quote(x, safe='') for x in trackName2])) if trackName2 is not None else 'None',\
                          statText]
         #batchElements = [el.replace(BATCH_COL_SEPARATOR, '\\' + BATCH_COL_SEPARATOR) for el in batchElements]
         #batchElements = [quote(el, safe='') for el in batchElements]
         return BATCH_COL_SEPARATOR.join(batchElements)
         
     except Exception,e:
         #raise
         logException(e,logging.WARNING,'Could not generate corresponding batch line: ')
         #if DebugConfig.VERBOSE:
         logMessage('analysisDef, genome, trackName1, trackName2: \n' +
                    str([analysisDef, genome, trackName1, trackName2]) )
         return 'Warning: Could not generate corresponding batch line.' 
 def _getSubCls(origCls, region):
     #print "with class: ",origCls.__name__,'and region: ',region,"<br>"
     if isIter(region) or CompBinManager.canBeSplitted(region):
         try:                
             splittableClass = MagicStatFactory._getClass(origCls.__name__, 'Splittable')
             #print "FOUND SPLITTABLE: ", <splittableClass
             if isIter(region):
                 #Always use splittableClass if a global region
                 return splittableClass
             else:
                 #Use only if splittableClass also accepts splitting of userbins
                 if not issubclass(splittableClass, OnlyGloballySplittable):
                     return splittableClass                    
         #except (KeyError, SplittableStatNotAvailableError), e:
         except KeyError, e:
             if DebugConfig.VERBOSE:
                 logException(e, message="In MagicStatFactory._getSubCls: ")
Example #12
0
 def getDemoURL(self):
     try:
         demo = self.prototype.getDemoSelections()
         url = '?mako=generictool&tool_id=' + self.toolId
         for i, id in enumerate(self.inputIds):
             if self.inputTypes[i] == '__genome__':
                 id = 'dbkey'
             #else:
             #    id = self.inputIds[i]
             try:
                 val = getattr(demo, id)
             except:
                 val = demo[i]
             url += '&' + id + '=' + val
     except Exception, e:
         from gold.application.LogSetup import logException
         logException(e)
         url = None
Example #13
0
    def _calcChrInfo(self):
        try:
            self.numChrs = len(GenomeInfo.getChrList(self.genome))
            self.numExtChrs = len(GenomeInfo.getExtendedChrList(
                self.genome)) - self.numChrs
            self.numBps = self.getGenomeLen(self.genome, standardChrs=False)
            if self.numExtChrs > 0:
                self.numBpsWithExt = self.getGenomeLen(self.genome,
                                                       standardChrs=True)
            else:
                self.numBpsWithExt = None

        except Exception, e:
            if IS_EXPERIMENTAL_INSTALLATION:
                from gold.application.LogSetup import logException, logMessage
                logMessage('Exception for genome: %s' % self.genome)
                logException(e)
                import traceback
                logMessage(''.join(traceback.format_stack()))
Example #14
0
    def getDemoURL(self):
        try:
            demo = self.prototype.getDemoSelections()
            url = "?mako=generictool&tool_id=" + self.toolId
            for i, id in enumerate(self.inputIds):
                if self.inputTypes[i] == "__genome__":
                    id = "dbkey"
                # else:
                #    id = self.inputIds[i]
                try:
                    val = getattr(demo, id)
                except:
                    val = demo[i]
                url += "&" + id + "=" + val
        except Exception, e:
            from gold.application.LogSetup import logException

            logException(e)
            url = None
 def _getSubCls(origCls, region):
     #print "with class: ",origCls.__name__,'and region: ',region,"<br>"
     if isIter(region) or CompBinManager.canBeSplitted(region):
         try:
             splittableClass = MagicStatFactory._getClass(
                 origCls.__name__, 'Splittable')
             #print "FOUND SPLITTABLE: ", <splittableClass
             if isIter(region):
                 #Always use splittableClass if a global region
                 return splittableClass
             else:
                 #Use only if splittableClass also accepts splitting of userbins
                 if not issubclass(splittableClass, OnlyGloballySplittable):
                     return splittableClass
         #except (KeyError, SplittableStatNotAvailableError), e:
         except KeyError, e:
             if DebugConfig.VERBOSE:
                 logException(e,
                              level=logging.DEBUG,
                              message="In MagicStatFactory._getSubCls: ")
Example #16
0
    def validateRegAndBinSpec(self, regSpec, binSpec):
        ubSourceInfo = self._getUserBinSourceInfo(regSpec)
        errorString = ubSourceInfo.validateRegAndBinSpec(regSpec, binSpec)

        if errorString:
            return "Error in specification of analysis regions: " + errorString

        try:
            ubSource = self.getUserBinSource(regSpec, binSpec)
            hasBins = any(bin for bin in ubSource)

            if not hasBins:
                errorString = ubSourceInfo.getZeroBinsValidationMessage(
                    regSpec, binSpec)

        except Exception as e:
            from gold.application.LogSetup import logException
            logException(e)
            errorString = "Error fetching genome region using the specification of analysis regions: %s" % e

        return errorString
Example #17
0
    def _tryAnalysisDefForValidity(analysisDef,
                                   genome,
                                   trackName1,
                                   trackName2,
                                   tryReversed=True):
        if DebugConfig.VERBOSE:
            logMessage('Trying analysisDef: ' + str(analysisDef))
        try:
            for tnA, tnB, reversed in [
                (trackName1, trackName2, False)
            ] + ([(trackName2, trackName1, True)] if tryReversed else []):
                #print "TEMP1: ", (analysisDef, genome, tnA, tnB)
                analysis = Analysis(analysisDef, genome, tnA, tnB, reversed)

                #analysis.setTracks(trackName1, trackName2)
                #analysis.setConverters(formatConverter1, formatConverter2)
                if analysis.isValid():
                    return analysis, reversed
        except Exception, e:
            if DebugConfig.VERBOSE:
                logException(e)
            if DebugConfig.PASS_ON_VALIDSTAT_EXCEPTIONS:
                raise
Example #18
0
 def _tryAnalysisForValidity(cls,
                             analysis,
                             genome,
                             trackName1,
                             trackName2,
                             tryReversed=True):
     if DebugConfig.VERBOSE:
         logMessage('Trying analysisDef: ' + str(analysis.getDef()))
     try:
         if cls._trackFormatsAreEqual(genome, trackName1, trackName2):
             tryReversed = False
         for tnA, tnB, reversed in [(trackName1, trackName2, False)] + \
                                   ([(trackName2, trackName1, True)] if tryReversed and trackName2 not in [None, []] else
                                   []):
             analysis = Analysis.createFromParsedAnalysis(
                 analysis, genome, tnA, tnB, reversed)
             if analysis.isValidForListing():
                 return analysis, reversed
     except Exception, e:
         if DebugConfig.VERBOSE:
             logException(e)
         if DebugConfig.PASS_ON_VALIDSTAT_EXCEPTIONS:
             raise
Example #19
0
    def _calcAndStoreChrInfo(self):
        try:
            dirty = False

            if not hasattr(self, 'numChrs') or self.numChrs == 0:
                self.numChrs = len(GenomeInfo.getChrList(self.genome))
                dirty = True

            if not hasattr(self, 'numExtChrs') or self.numExtChrs == 0:
                self.numExtChrs = len(
                    GenomeInfo.getExtendedChrList(self.genome)) - self.numChrs
                dirty = True

            if not hasattr(self, 'numBps') or self.numBps == 0:
                self.numBps = self.getGenomeLen(self.genome,
                                                standardChrs=False)
                dirty = True

            if not hasattr(self, 'numBpsWithExt') or self.numBpsWithExt == 0:
                if self.numExtChrs > 0:
                    self.numBpsWithExt = self.getGenomeLen(self.genome,
                                                           standardChrs=True)
                else:
                    self.numBpsWithExt = None
                dirty = True

            if dirty:
                self.store()

        except Exception, e:
            if IS_EXPERIMENTAL_INSTALLATION:
                from gold.application.LogSetup import logException, logMessage
                logMessage('Exception for genome: %s' % self.genome)
                logException(e)
                import traceback
                logMessage(''.join(traceback.format_stack()))
    def getBoundingRegionTuples(self):
        return []
        
    def parseFirstDataLine(self):
        try:
            geIter = self.__iter__()
            geIter._printWarnings = False
            ge = geIter.next()
        except StopIteration, e:
            #logException(e)
            lastWarningMsg = ' Last warning when parsing file: %s' % geIter.getLastWarning() \
                             if geIter.anyWarnings() else ''
            print >> sys.stderr, '%s' % self._trackName
            raise Warning('File has no valid data lines.%s' % lastWarningMsg)
        except Exception, e:
            logException(e)
            raise
        return ge, geIter
    
    def getPrefixList(self):
        if self._prefixList is None:
            ge, geIter = self.parseFirstDataLine()
            self._prefixList = [prefix for prefix in ['start', 'end', 'val', 'strand', 'id', 'edges', 'weights'] if ge.__dict__.get(prefix) is not None]
            if ge.extra is not None:
                self._prefixList += [x for x in ge.orderedExtraKeys]
        return self._prefixList
        
    def getValDataType(self):
        return 'float64'

    def getValDim(self):
    def getBoundingRegionTuples(self):
        return []

    def parseFirstDataLine(self):
        try:
            geIter = self.__iter__()
            geIter._printWarnings = False
            ge = geIter.next()
        except StopIteration, e:
            #logException(e)
            lastWarningMsg = ' Last warning when parsing file: %s' % geIter.getLastWarning() \
                             if geIter.anyWarnings() else ''
            raise Warning('File has no valid data lines.%s' % lastWarningMsg)
        except Exception, e:
            logException(e)
            raise
        return ge

    def getPrefixList(self):
        if self._prefixList is None:
            ge = self.parseFirstDataLine()
            self._prefixList = [
                prefix for prefix in
                ['start', 'end', 'val', 'strand', 'id', 'edges', 'weights']
                if ge.__dict__.get(prefix) is not None
            ]
            if ge.extra is not None:
                self._prefixList += [x for x in ge.orderedExtraKeys]
        return self._prefixList
Example #22
0
class Analysis(AnalysisDefHandler):
    def __init__(self, analysisLine, genome, trackName1, trackName2, reversed=False):
        #print 'IN ANALYSIS: ',analysisLine
        AnalysisDefHandler.__init__(self, analysisLine, reversed)
        self._genome = genome
        self._setTracks(trackName1, trackName2)
        self._validStatClass = None
        if analysisLine:
            self._initFromDef()

    def _initFromDef(self):
        self._useConvertersFromId()

    @classmethod
    def createFromParsedAnalysis(cls, analysis, genome, trackName1, trackName2, reversed):
        assert isinstance(analysis, AnalysisDefHandler)
        obj = Analysis(None, genome, trackName1, trackName2, reversed)
        obj.integrateParsedAnalysis(analysis)
        return obj

    def integrateParsedAnalysis(self, other):
        assert isinstance(other, AnalysisDefHandler)
        super(Analysis, self).integrateParsedAnalysis(other)
        self._initFromDef()

    def getTracks(self):
        return self._track, self._track2
    
    def _setTracks(self, trackName1, trackName2):
        self._track = Track(trackName1)
        self._track2 = Track(trackName2)
        #self.resetValidStat()
        #print 'setTracks: ',self._track.trackName

    def _useConvertersFromId(self):
        formatConverter1 = self.getChoice(self.TF1_KEY)
        formatConverter2 = self.getChoice(self.TF2_KEY)
        #assert( not None in [formatConverter1, formatConverter2] )
        self.setConverters(formatConverter1, formatConverter2)

    def setConverters(self, formatConverter1, formatConverter2):
        self._setConverter(self._track, formatConverter1, self.TF1_KEY)
        self._setConverter(self._track2, formatConverter2, self.TF2_KEY)
        
    def _setConverter(self, track, formatConverter, labelKey):
        if track is not None:
            track.setFormatConverter(formatConverter)
            if formatConverter is not None:
                self._appendConverterOptions(track, labelKey)
    
    def resetTracks(self):
        for track in (self._track, self._track2):
            if track is not None:
                track.resetTrackSource()
        
    #def resetValidStat(self):
    #    if hasattr(self, '_validStatClass'):
    #        del self._validStatClass

    def initRandomUtilAndUpdateSeedIfNeeded(self):
        from gold.util.RandomUtil import autoSeed, getSeed, setManualSeed
        randSeedChoice = self.getChoice(self.RANDOM_SEED_KEY)
        if randSeedChoice == self.RANDOM_SEED_CHOICE_RANDOM:
            autoSeed()
            self.changeChoices(self.RANDOM_SEED_KEY, [(str(getSeed()),) * 2])
        elif randSeedChoice is not None:
            setManualSeed(int(randSeedChoice))
        else:
            autoSeed()
    
    def getAllStats(self):
        return self._statClassList
    
    def isValidForListing(self):
        anyTextParts = len(self._analysisParts) > 0
        if not anyTextParts:
            if DebugConfig.VERBOSE:
                logMessage('Analysisdef "{}" does not have any text available for listing. '.format(self.getDef()) +
                           'Skipping...')
        else:
            return self.getStat(flushMemoized=False) is not None
    
    #def getStat(self):
    #    #assert( len(self._statClassList) >= 1 )
    #    #if not hasattr(self, '_validStatClass'):
    #    prevCfgPrintProgress = StatRunnerModule.PRINT_PROGRESS
    #    StatRunnerModule.PRINT_PROGRESS = False
    #    validStatClass = self._determineStatClass()
    #    StatRunnerModule.PRINT_PROGRESS = prevCfgPrintProgress
    #    return validStatClass
    
    def getGenome(self):
        return self._genome
               
    def getStat(self, flushMemoized=True):
        if self._validStatClass is None:
            options = self.getAllOptionsAsKeys()
            if self.ASSUMP_LABEL_KEY in options:
                validAssumptions = []
                allAssumptions = options[self.ASSUMP_LABEL_KEY]
                for assumption in allAssumptions:
                    self.setChoice(self.ASSUMP_LABEL_KEY, assumption)
                    if self._determineStatClass(flushMemoized=False) is not None:
                        validAssumptions.append(assumption)
                if len(validAssumptions) == 0:
                    return None
                
                if len(validAssumptions) not in [0, len(allAssumptions)]:
                    self._logAssumptionReduction(set(allAssumptions) - set(validAssumptions))
                self.reduceChoices(self.ASSUMP_LABEL_KEY, validAssumptions)
                self.setDefaultChoice(self.ASSUMP_LABEL_KEY)

            self._validStatClass = self._determineStatClass(flushMemoized=flushMemoized)
            if self._validStatClass is not None:
                self._appendConverterOptions(self._track, self.TF1_KEY)
                self._appendConverterOptions(self._track2, self.TF2_KEY)
        return self._validStatClass
        
    def _logAssumptionReduction(self, removedAssumptions):
        #global VERBOSE
        #prev = VERBOSE
        #VERBOSE = True
    
        for assumption in removedAssumptions:
            logMessage('Assumption "' + str(assumption) + '" was removed from analysisDef: ' + self.getDef())
            self.setChoice(self.ASSUMP_LABEL_KEY, assumption)
            self._determineStatClass(flushMemoized=False)
            
        #VERBOSE = prev
        
    #@noProgress
    def _determineStatClass(self, flushMemoized=True):
        assert( hasattr(self, '_track') )
        assert( hasattr(self, '_track2') )
        dummyGESource = MinimalBinSource(self._genome)

        if len(self._statClassList) == 0:
            # if self._reversed:
            logMessage('Stat class list is empty, for analysisDef: ' + self._analysisLine, level = logging.WARNING)
            if DebugConfig.PASS_ON_VALIDSTAT_EXCEPTIONS:
                raise ShouldNotOccurError('Stat class list is empty. Analysisdef: ' + self._analysisLine)
        
        for statClass in self._statClassList:
            if DebugConfig.VERBOSE:
                logMessage('Checking validity of stat class "{}" for analysisDef "{}".'.format(statClass.__name__, self.getDefAfterChoices()))

            trackA, trackB = self._track, self._track2
            if trackA is None:
                continue

            try:
                StatJob(dummyGESource, trackA, trackB, statClass, minimal=True,
                        **self.getAllChoices(filterByActivation=True)).run(False, flushMemoized=flushMemoized)

            except IncompatibleTracksError, e:
                if DebugConfig.VERBOSE:
                    logException(e, level=logging.DEBUG,
                                 messagePrefix='Warning: error in _determineStatClass for stat: %s' % statClass.__name__)
                if DebugConfig.PASS_ON_VALIDSTAT_EXCEPTIONS:
                    raise
            except (AssertionError, IncompatibleAssumptionsError, IdenticalTrackNamesError), e:
                if DebugConfig.VERBOSE:
                    logException(e, level=logging.DEBUG,
                                 messagePrefix='Warning: error in _determineStatClass for stat: %s' % statClass.__name__)
                if DebugConfig.PASS_ON_VALIDSTAT_EXCEPTIONS:
                    raise
            except OSError, e:
                if DebugConfig.VERBOSE:
                    logException(e, messagePrefix='Error in _determineStatClass, with statClass %s' % statClass.__name__)
                if DebugConfig.PASS_ON_VALIDSTAT_EXCEPTIONS:
                    raise
                elif not 'withOverlaps' in str(e):
                    raise
Example #23
0
    def runJob(batchLine,
               genome,
               fullAccess,
               galaxyFn=None,
               printProgress=True):
        bc = BatchRunner.parseBatchLine(batchLine, genome, fullAccess)
        if bc.errorResult is not None:
            return bc.errorResult

        #Try a full run, and return either results or an exception
        try:
            #track = Track(trackName1)
            #track2 = Track(trackName2)
            #if 'tf1' in paramDict:
            #    track.setFormatConverter(formatConverter)

            #results = StatRunner.run(userBinSource , Track(trackName1), Track(trackName2), \
            #                         wrapClass(STAT_CLASS_DICT[statClassName], keywords=paramDict) )
            #results = StatRunner.run(userBinSource , track, track2, \
            #                         wrapClass(STAT_CLASS_DICT[statClassName], keywords=paramDict) )
            fullRunParams = {}

            if USE_PARALLEL:
                #if galaxyFn == None: #then this is a test
                uniqueId = time.time()
                #else:
                #uniqueId = extractIdFromGalaxyFn(galaxyFn)[1]

                fullRunParams["uniqueId"] = uniqueId

            if bc.cleanedTrackNameIntensity is not None:
                fullRunParams['trackNameIntensity'] = '|'.join(
                    tuple(bc.cleanedTrackNameIntensity))

            analysisDefParams = [
                '[' + key + '=' + value + ']'
                for key, value in bc.paramDict.items()
            ]
            analysisDef = ''.join(analysisDefParams) + '->' + bc.statClassName

            from quick.application.GalaxyInterface import GalaxyInterface

            GalaxyInterface._tempAnalysisDefHacks(analysisDef)

            if printProgress:
                print 'Corresponding batch command line:<br>' + \
                    GalaxyInterface._revEngBatchLine(bc.trackName1, bc.trackName2, bc.trackNameIntensity, analysisDef, bc.regSpec, bc.binSpec, genome) + '<br><br>'

            results = AnalysisDefJob(analysisDef,
                                     bc.cleanedTrackName1,
                                     bc.cleanedTrackName2,
                                     bc.userBinSource,
                                     galaxyFn=galaxyFn,
                                     **fullRunParams).run(printProgress)
            presCollectionType = results.getPresCollectionType()

            if len(
                    results.getResDictKeys()
            ) > 0 and GalaxyInterface.APPEND_ASSEMBLY_GAPS and presCollectionType == 'standard':
                if USE_PARALLEL:
                    gapRes = AssemblyGapJob(
                        bc.userBinSource, genome,
                        uniqueId=uniqueId).run(printProgress)
                else:
                    gapRes = AssemblyGapJob(bc.userBinSource,
                                            genome).run(printProgress)
                results.includeAdditionalResults(
                    gapRes, ensureAnalysisConsistency=False)

        except Exception, e:
            #print 'NOWAG BExc'
            results = Results(bc.cleanedTrackName1, bc.cleanedTrackName2,
                              bc.statClassName)
            results.addError(e)
            logException(e, message='Error in batch run')
            if DebugConfig.PASS_ON_BATCH_EXCEPTIONS:
                raise
            return results
Example #24
0
    def runJob(batchLine, genome, fullAccess, galaxyFn=None, printProgress=True):
        bc = BatchRunner.parseBatchLine(batchLine, genome, fullAccess)
        if bc.errorResult is not None:
            return bc.errorResult
        
        #Try a full run, and return either results or an exception
        try:
            #track = Track(trackName1)
            #track2 = Track(trackName2)
            #if 'tf1' in paramDict:
            #    track.setFormatConverter(formatConverter)
            
            #results = StatRunner.run(userBinSource , Track(trackName1), Track(trackName2), \
            #                         wrapClass(STAT_CLASS_DICT[statClassName], keywords=paramDict) )
            #results = StatRunner.run(userBinSource , track, track2, \
            #                         wrapClass(STAT_CLASS_DICT[statClassName], keywords=paramDict) )
            fullRunParams = {}
            
            if USE_PARALLEL:
                # TODO: Requirements for parallel runs should not be added in places like these. Parallelization
                #  should be a feature of the job runner somehow

                #if galaxyFn == None: #then this is a test
                uniqueId = time.time()
                #else:
                    #uniqueId = extractIdFromGalaxyFn(galaxyFn)[1]
                    
                fullRunParams["uniqueId"] = uniqueId

            from quick.application.GalaxyInterface import GalaxyInterface

            analysisDefParams = [ '[' + key + '=' + value + ']' for key,value in bc.paramDict.items()]
            analysisDef = ''.join(analysisDefParams) + '->' + bc.statClassName

            # TODO: Keeping the ugly accesses to private methods in GalaxyInterface for now. To be refactored.
            trackNames, analysisDef = GalaxyInterface._cleanUpAnalysisDef(bc.cleanedTrackNames, analysisDef)

            if printProgress:
                revEngBatchLine = RunDescription.getRevEngBatchLine(
                    analysisDef, bc.trackNames, bc.cleanedTrackNames, bc.regSpec, bc.binSpec, genome
                )

                print 'Corresponding batch command line:<br>{}<br><br>'.format(revEngBatchLine)

            results = AnalysisDefJob(analysisDef, bc.cleanedTrackNames[0], bc.cleanedTrackNames[1], bc.userBinSource, galaxyFn=galaxyFn, **fullRunParams).run(printProgress)
            presCollectionType = results.getPresCollectionType()

            if len(results.getResDictKeys()) > 0 and GalaxyInterface.APPEND_ASSEMBLY_GAPS and presCollectionType=='standard':
                if USE_PARALLEL:
                    gapRes = AssemblyGapJob(bc.userBinSource, genome, uniqueId=uniqueId).run(printProgress)
                else:
                    gapRes = AssemblyGapJob(bc.userBinSource, genome).run(printProgress)
                results.includeAdditionalResults(gapRes, ensureAnalysisConsistency=False)

        except Exception, e:
            #print 'NOWAG BExc'
            results = Results(bc.cleanedTrackNames[0], bc.cleanedTrackNames[1], bc.statClassName)
            results.addError(e)
            logException(e,message='Error in batch run')
            if DebugConfig.PASS_ON_BATCH_EXCEPTIONS:
                raise
            return results
Example #25
0
    def getRevEngBatchLine(analysisDef, trackNames, cleanedTrackNames, regSpec,
                           binSpec, genome, **kwArgs):
        #analysisDef is assumed to be unquoted

        #if this is to work, must check explicitly against special keywords  in regSpec (or check that regSpec is a valid region that is to have region..)...
        #if not genome in regSpec:
        #    regSpec = genome+':'+regSpec
        try:
            if DebugConfig.VERBOSE:
                logMessage('getting RevEngBatchLine:')
            #analysisDef =analysisDef.replace('%20PointCountInSegsPvalStat%2C','') #REMOVE
            #print 'NOWAG: ',analysisDef

            analysis = Analysis(analysisDef, genome, cleanedTrackNames[0],
                                cleanedTrackNames[1], **kwArgs)

            #assert unquote(regSpec) == regSpec
            assert unquote(
                binSpec
            ) == binSpec  #To assure that unquote can be safely applied to binSpec without any consequences (we don't want to always quote, but still want the possibility to use quoted history track names)
            quotedTrackName1 = (':'.join(
                [quote(x, safe='') for x in trackNames[0]]))
            quotedTrackName2 = (':'.join([
                quote(x, safe='') for x in trackNames[1]
            ])) if trackNames[1] is not None else 'None'
            intensityChoice = analysis.getChoice('trackNameIntensity')
            if intensityChoice:
                quotedIntensityTrackName = quote(intensityChoice, safe='^|')
                analysis.changeChoices('trackNameIntensity',
                                       [(quotedIntensityTrackName, ) * 2])

            stat = analysis.getStat()
            if stat is None:
                return 'No corr batch line, as no valid statistic was found.. '
            #print 'CAME HERE'
            statClassName = stat.__name__
            #fixme: Add space, but this is not checked in batchrunner...
            params = ','.join(['='.join(choicePair) for choicePair in analysis.getAllChoices(filterByActivation=True).items() \
                                 if choicePair[0] not in ['H0','H1_more','H1_less','H1_different','H1_ha1','H1_ha2','H1_ha3','H1_ha4','H1_ha5'] ])
            statText = statClassName + '(' + params + ')'

            batchElements = [
                genome, regSpec, binSpec, quotedTrackName1, quotedTrackName2,
                statText
            ]
            #batchElements = [el.replace(BATCH_COL_SEPARATOR, '\\' + BATCH_COL_SEPARATOR) for el in batchElements]
            #batchElements = [quote(el, safe='') for el in batchElements]
            oneLineBatch = BATCH_COL_SEPARATOR.join(batchElements)

            #return oneLineBatch
            #Under construction...:
            from collections import OrderedDict
            #batchVariables = OrderedDict([('@GENOME',genome), ('@REGION',regSpec), ('@BINNING',binSpec), ('@TN1',tn1), ('@TN2',tn2), ('@ANALYSIS',statText)])
            batchVariables = OrderedDict([('@REGION', regSpec),
                                          ('@BINNING', binSpec),
                                          ('@TN1', quotedTrackName1),
                                          ('@TN2', quotedTrackName2),
                                          ('@ANALYSIS', statText)])
            batchComposition = BATCH_COL_SEPARATOR.join([genome] +
                                                        batchVariables.keys())
            fullBatchList = [
                '='.join(assignment) for assignment in batchVariables.items()
            ] + [batchComposition]
            fullBatch = '<br>'.join(fullBatchList)

            batchLinkDef = '<a href="%s/hyper?mako=generictool&tool_id=hb_batch_run_tool&command=%s&dbkey=%s">%s</a>'
            oneLineBatchLink = batchLinkDef % (URL_PREFIX, quote(oneLineBatch),
                                               genome, 'single line version')
            fullBatchLink = batchLinkDef % (URL_PREFIX,
                                            quote('\n'.join(fullBatchList)),
                                            genome, 'variable based version')

            #return oneLineBatch + '<br><br>or corresponding spec using variable assignment:<br><br>' + fullBatch + '<br><br>Execute batchline in ' \
            #+ oneLineBatchLink + ' / ' + fullBatchLink
            return oneLineBatch + '<br><br>Execute batchline in ' + oneLineBatchLink + ' / ' + fullBatchLink

        except Exception, e:
            #raise
            logException(e, logging.WARNING,
                         'Could not generate corresponding batch line: ')
            #if DebugConfig.VERBOSE:
            logMessage('analysis, genome, trackName1, trackName2: \n' +
                       str([analysis, genome, trackNames[0], trackNames[1]]))
            return 'Warning: Could not generate corresponding batch line.'
Example #26
0
                if DebugConfig.PASS_ON_VALIDSTAT_EXCEPTIONS:
                    raise
            except OSError, e:
                if DebugConfig.VERBOSE:
                    logException(e, message='(Error in _determineStatClass, with statClass %s)' % statClass.__name__)
                if DebugConfig.PASS_ON_VALIDSTAT_EXCEPTIONS:
                    raise
                elif not 'withOverlaps' in str(e):
                    raise


            except Exception, e:
                if getClassName(e) == 'AttributeError' and \
                        any(x in str(e) for x in ["has no attribute '_track2'", "'NoneType' object has no attribute"]):
                    if DebugConfig.VERBOSE:
                        logException(e, level=logging.DEBUG, message='(Warning: error in _determineStatClass for stat: %s)' % statClass.__name__)
                else:
                    logException(e, message='(Error in _determineStatClass, with statClass %s)' % statClass.__name__)
                if DebugConfig.PASS_ON_VALIDSTAT_EXCEPTIONS:
                    raise

            else:
                #self._reversed = reversed
                #self._conversionsUsed = len(trackA.conversionsUsed) > 0 or \
                #    ((trackB is not None) and len(trackB.conversionsUsed) > 0)
                ##self._validStatClass = functools.partial(statClass, **self.getChoices())
                #functools.update_wrapper(self._validStatClass, statClass)
                validStatClass = wrapClass(statClass, keywords=self.getChoices(filterByActivation=True) ) #fixme: Perhaps return validStatClass, self.getChoices() instead?
                #self.setConverters( self._track.formatConverters, self._track2.formatConverters if self._track2 is not None else None)
                #self._updateOptions()
                if DebugConfig.VERBOSE:
Example #27
0
class Analysis(AnalysisDefHandler):
    def __init__(self, analysisLine, genome, trackName1, trackName2, reversed=False):
        #print 'IN ANALYSIS: ',analysisLine
        AnalysisDefHandler.__init__(self, analysisLine, reversed)
        self._genome = genome
        self._setTracks(trackName1, trackName2)
        self._useConvertersFromId()
        self._validStatClass = None

    def getTracks(self):
        return self._track, self._track2
    
    def _setTracks(self, trackName1, trackName2):
        self._track = Track(trackName1)
        self._track2 = Track(trackName2)
        #self.resetValidStat()
        #print 'setTracks: ',self._track.trackName

    def _useConvertersFromId(self):
        formatConverter1 = self.getChoice(self.TF1_KEY)
        formatConverter2 = self.getChoice(self.TF2_KEY)
        #assert( not None in [formatConverter1, formatConverter2] )
        self.setConverters(formatConverter1, formatConverter2)

    def setConverters(self, formatConverter1, formatConverter2):
        self._setConverter(self._track, formatConverter1, self.TF1_KEY)
        self._setConverter(self._track2, formatConverter2, self.TF2_KEY)
        
    def _setConverter(self, track, formatConverter, labelKey):
        if track is not None:
            track.setFormatConverter(formatConverter)
            if formatConverter is not None:
                self._appendConverterOptions(track, labelKey)
    
    def resetTracks(self):
        for track in (self._track, self._track2):
            if track is not None:
                track.resetTrackSource()
        
    #def resetValidStat(self):
    #    if hasattr(self, '_validStatClass'):
    #        del self._validStatClass
    
    def getAllStats(self):
        return self._statClassList
    
    def isValid(self):
        return len(self._analysisParts) > 0 and self.getStat() is not None
    
    #def getStat(self):
    #    #assert( len(self._statClassList) >= 1 )
    #    #if not hasattr(self, '_validStatClass'):
    #    prevCfgPrintProgress = StatRunnerModule.PRINT_PROGRESS
    #    StatRunnerModule.PRINT_PROGRESS = False
    #    validStatClass = self._determineStatClass()
    #    StatRunnerModule.PRINT_PROGRESS = prevCfgPrintProgress
    #    return validStatClass
    
    def getGenome(self):
        return self._genome
               
    def getStat(self):
        if self._validStatClass is None:
            options = self.getAllOptionsAsKeys()
            if self.ASSUMP_LABEL_KEY in options:
                validAssumptions = []
                allAssumptions = options[self.ASSUMP_LABEL_KEY]
                for assumption in allAssumptions:
                    self.setChoice(self.ASSUMP_LABEL_KEY, assumption)
                    if self._determineStatClass() is not None:
                        validAssumptions.append(assumption)
                if len(validAssumptions) == 0:
                    return None
                
                if len(validAssumptions) not in [0, len(allAssumptions)]:
                    self._logAssumptionReduction( set(allAssumptions) - set(validAssumptions) )
                self.reduceChoices(self.ASSUMP_LABEL_KEY, validAssumptions)
                self.setDefaultChoice(self.ASSUMP_LABEL_KEY)
            
            self._validStatClass = self._determineStatClass()
            if self._validStatClass is not None:
                self._appendConverterOptions(self._track, self.TF1_KEY)
                self._appendConverterOptions(self._track2, self.TF2_KEY)
        return self._validStatClass
        
    def _logAssumptionReduction(self, removedAssumptions):
        #global VERBOSE
        #prev = VERBOSE
        #VERBOSE = True
    
        for assumption in removedAssumptions:
            logMessage('Assumption "' + str(assumption) + '" was removed from analysisDef: ' + self.getDef())
            self.setChoice(self.ASSUMP_LABEL_KEY, assumption)
            self._determineStatClass()    
            
        #VERBOSE = prev
        
    #@noProgress
    def _determineStatClass(self):
        assert( hasattr(self, '_track') )
        assert( hasattr(self, '_track2') )
        dummyGESource = MinimalBinSource(self._genome)

        if len(self._statClassList)==0:
            #logging.getLogger(HB_LOGGER).warning('Stat class list is empty, for analysisDef: ' + self._analysisLine)
            if self._reversed:
                logMessage('Stat class list is empty, for analysisDef: ' + self._analysisLine, level = logging.WARNING)
            if DebugConfig.PASS_ON_VALIDSTAT_EXCEPTIONS:
                raise ShouldNotOccurError('Stat class list is empty. Analysisdef: '+self._analysisLine)
        
        for statClass in self._statClassList:
            if DebugConfig.VERBOSE:
                logMessage(statClass.__name__ + ': Trying (' + self.getDefAfterChoices() + ')')
#                print statClass.__name__ + ': Trying (' + self.getDefAfterChoices() + ')'

            #for reversed, trackA, trackB in [(False, self._track, self._track2), (True, self._track2, self._track) ]:

            trackA, trackB = self._track, self._track2
            if trackA is None:
                continue

            try:
                # The hackiest of all hacks!
                # TODO: reimplement together with TrackStructure
                job = StatJob(dummyGESource, trackA, trackB, statClass, minimal=True,
                              **self.getChoices(filterByActivation=True))
                stat = job._getSingleResult(dummyGESource[0])[-1]
                tracks = stat._tracks if hasattr(stat, '_tracks') else [trackA, trackB]
                trackUniqueKeys = [Track(tr.trackName).getUniqueKey(self._genome) for tr in tracks
                                   if tr is not None]

                StatJob(dummyGESource, trackA, trackB, statClass, minimal=True,
                        **self.getChoices(filterByActivation=True)).run(False)
                #In order not to mess up integration tests
                initSeed()

                for trackIndex, restTrackIndexes in allElementsVersusRest(xrange(len(tracks))):
                    track = tracks[trackIndex]
                    if track is not None and track.formatConverters is None:
                        uniqueKeyForRestTracks = \
                            set(trackUniqueKeys[i] for i in restTrackIndexes)

                        # If several tracks are the same, memory memoization will only result
                        # in one RawDataStat being created, for one Track object. This is a
                        # wanted optimization. In other cases, something is probably wrong if
                        # a track has not been touched. However, this rule may be revisited
                        # when track structure functionality is implemented.
                        if trackUniqueKeys[trackIndex] not in uniqueKeyForRestTracks:
                            raise IncompatibleTracksError(
                                'Track ' + prettyPrintTrackName(track.trackName) +
                                ' was created, but not touched by statistic')

            except IncompatibleTracksError, e:
                if DebugConfig.VERBOSE:
                    logException(e, level=logging.DEBUG,
                                 message='(Warning: error in _determineStatClass for stat: %s)' % statClass.__name__)
                if DebugConfig.PASS_ON_VALIDSTAT_EXCEPTIONS:
                    raise
            except (AssertionError, IncompatibleAssumptionsError, IdenticalTrackNamesError), e:
                if DebugConfig.VERBOSE:
                    logException(e, level=logging.DEBUG,
                                 message='(Warning: error in _determineStatClass for stat: %s)' % statClass.__name__)
                if DebugConfig.PASS_ON_VALIDSTAT_EXCEPTIONS:
                    raise
            except OSError, e:
                if DebugConfig.VERBOSE:
                    logException(e, message='(Error in _determineStatClass, with statClass %s)' % statClass.__name__)
                if DebugConfig.PASS_ON_VALIDSTAT_EXCEPTIONS:
                    raise
                elif not 'withOverlaps' in str(e):
                    raise
Example #28
0
    def _determineStatClass(self):
        assert( hasattr(self, '_track') )
        assert( hasattr(self, '_track2') )
        dummyGESource = MinimalBinSource(self._genome)

        if len(self._statClassList)==0:
            #logging.getLogger(HB_LOGGER).warning('Stat class list is empty, for analysisDef: ' + self._analysisLine)
            if self._reversed:
                logMessage('Stat class list is empty, for analysisDef: ' + self._analysisLine, level = logging.WARNING)
            if DebugConfig.PASS_ON_VALIDSTAT_EXCEPTIONS:
                raise ShouldNotOccurError('Stat class list is empty. Analysisdef: '+self._analysisLine)
        
        for statClass in self._statClassList:
            if DebugConfig.VERBOSE:
                logMessage(statClass.__name__ + ': Trying (' + self.getDefAfterChoices() + ')')
#                print statClass.__name__ + ': Trying (' + self.getDefAfterChoices() + ')'

            #for reversed, trackA, trackB in [(False, self._track, self._track2), (True, self._track2, self._track) ]:

            trackA, trackB = self._track, self._track2
            if trackA is None:
                continue

            try:
                # The hackiest of all hacks!
                # TODO: reimplement together with TrackStructure
                job = StatJob(dummyGESource, trackA, trackB, statClass, minimal=True,
                              **self.getChoices(filterByActivation=True))
                stat = job._getSingleResult(dummyGESource[0])[-1]
                tracks = stat._tracks if hasattr(stat, '_tracks') else [trackA, trackB]
                trackUniqueKeys = [Track(tr.trackName).getUniqueKey(self._genome) for tr in tracks
                                   if tr is not None]

                StatJob(dummyGESource, trackA, trackB, statClass, minimal=True,
                        **self.getChoices(filterByActivation=True)).run(False)
                #In order not to mess up integration tests
                initSeed()

                for trackIndex, restTrackIndexes in allElementsVersusRest(xrange(len(tracks))):
                    track = tracks[trackIndex]
                    if track is not None and track.formatConverters is None:
                        uniqueKeyForRestTracks = \
                            set(trackUniqueKeys[i] for i in restTrackIndexes)

                        # If several tracks are the same, memory memoization will only result
                        # in one RawDataStat being created, for one Track object. This is a
                        # wanted optimization. In other cases, something is probably wrong if
                        # a track has not been touched. However, this rule may be revisited
                        # when track structure functionality is implemented.
                        if trackUniqueKeys[trackIndex] not in uniqueKeyForRestTracks:
                            raise IncompatibleTracksError(
                                'Track ' + prettyPrintTrackName(track.trackName) +
                                ' was created, but not touched by statistic')

            except IncompatibleTracksError, e:
                if DebugConfig.VERBOSE:
                    logException(e, level=logging.DEBUG,
                                 message='(Warning: error in _determineStatClass for stat: %s)' % statClass.__name__)
                if DebugConfig.PASS_ON_VALIDSTAT_EXCEPTIONS:
                    raise
            except (AssertionError, IncompatibleAssumptionsError, IdenticalTrackNamesError), e:
                if DebugConfig.VERBOSE:
                    logException(e, level=logging.DEBUG,
                                 message='(Warning: error in _determineStatClass for stat: %s)' % statClass.__name__)
                if DebugConfig.PASS_ON_VALIDSTAT_EXCEPTIONS:
                    raise