def __str__(self): if self._str is not None: return self._str if self._results.isEmpty(): if len(self._results.getAllErrors() ) > 0: return str(self._generateErrorText(HtmlCore)) else: return str( HtmlCore().line('This analysis gave no results (might be due to too limited data). ')) self._results.inferAdjustedPvalues() self._presenters = [] if len(self._results.getAllRegionKeys()) > 0: self._addAllPresenters() #print self._generateHeader() #print self._generateErrorText() #print self._generateTable(presenters) hideTable = False coreCls = HtmlCore try: if self._results.isSignificanceTesting(): startText = self._generateAnswerText(coreCls) hideTable = True else: startText = self._generateHeader(coreCls) except Exception,e: startText = self._generateHeader(coreCls) logException(e, message='Error producing autogenerated result') logException(e,message='Error in auto-generated answer')
def __exit__(self, type, value, traceback): if type in [TooLargeBinError, TooSmallBinError, CentromerError]: logException(value) if type in [ZeroDivisionError, FloatingPointError, TypeError, ValueError, OutsideBoundingRegionError]: if DebugConfig.VERBOSE or type in [TypeError, ValueError]: logException(value, message='kwArgs: ' + str(self._kwArgs)) return True if not DebugConfig.PASS_ON_COMPUTE_EXCEPTIONS: return True
def createChildren(self): if self.hasResult() or self._curChild is not None: return self._trace('_createChildren') #logMessage(str(self._bins)) try: self._curChild = self._getChildObject(self._bins.next()) except StopIteration,e: logException(e) raise ShouldNotOccurError('Splittable statistic should not have zero bins!')
def syncH1WithTail(self): optionKeys = self.getAllOptionsAsKeys() if self.H1_KEY in optionKeys and self.TAIL_KEY in optionKeys: try: tailChoice = self.getChoice(self.TAIL_KEY) self.setChoice(self.H1_KEY, tailChoice) except (ShouldNotOccurError), e: logException(e, logging.WARNING,'Could not find H1, probably mismatch between tail and H1 in analysisDef (tail choice: %s)' % self.getChoice(self.TAIL_KEY) ) except Exception, e: logException(e, logging.WARNING,'Could not find H1')
def _determineStatClass(self): assert( hasattr(self, '_track') ) assert( hasattr(self, '_track2') ) dummyGESource = MinimalBinSource(self._genome) if len(self._statClassList)==0: #logging.getLogger(HB_LOGGER).warning('Stat class list is empty, for analysisDef: ' + self._analysisLine) logMessage('Stat class list is empty, for analysisDef: ' + self._analysisLine, level = logging.WARNING) if DebugConfig.PASS_ON_VALIDSTAT_EXCEPTIONS: raise ShouldNotOccurError('Stat class list is empty. Analysisdef: '+self._analysisLine) for statClass in self._statClassList: if DebugConfig.VERBOSE: logMessage(statClass.__name__ + ': Trying (' + self.getDefAfterChoices() + ')') # print statClass.__name__ + ': Trying (' + self.getDefAfterChoices() + ')' #for reversed, trackA, trackB in [(False, self._track, self._track2), (True, self._track2, self._track) ]: for trackA, trackB in [[self._track, self._track2]]: if trackA == None: continue try: StatJob(dummyGESource, trackA, trackB, statClass, minimal=True, **self.getChoices()).run(False) #In order not to mess up integration tests initSeed() for track in [trackA, trackB]: if track is not None and track.formatConverters is None: raise IncompatibleTracksError('Track ' + prettyPrintTrackName(track.trackName) +\ 'was created, but not touched by statistic') except IncompatibleTracksError, e: if DebugConfig.PASS_ON_VALIDSTAT_EXCEPTIONS: raise if DebugConfig.VERBOSE: logException(e, message='(Warning: error in _determineStatClass for stat: %s)' % statClass.__name__) #if VERBOSE: # print 'Incompatible tracks: ', \ # statClass.__name__ + ': ' + e.__class__.__name__ + ': ' + str(e) # print 'Incompatible: ', e except (AssertionError, IncompatibleAssumptionsError, IdenticalTrackNamesError), e: if DebugConfig.PASS_ON_VALIDSTAT_EXCEPTIONS: raise if DebugConfig.VERBOSE: logException(e, message='(Warning: error in _determineStatClass for stat: %s)' % statClass.__name__) #if VERBOSE: # print 'Warning: exception in getStat: ', \ # statClass.__name__ + ': ' + e.__class__.__name__ + ': ' + str(e) # traceback.print_exc(file=sys.stdout) except OSError, e: if DebugConfig.PASS_ON_VALIDSTAT_EXCEPTIONS: raise elif not 'withOverlaps' in str(e): raise
def _addPresenter(self, presenterClass, sendHeader=False, **kwArgs): #print 'Generating figure: ',presenterClass.__name__,'<br>' try: pres = presenterClass(self._results, self._baseDir, *([self._getHeader()] if sendHeader else []), **kwArgs ) self._presenters.append( pres) return pres except SilentError: return None except Exception,e: logException(e, WARNING, 'Error generating figure with ' + str(presenterClass.__name__)) print 'Error generating figure with ', presenterClass.__name__, '(',Exception,' - ',e,')' return None
def storePickledResults(self): try: from cPickle import dump pickleStaticFile = GalaxyRunSpecificFile(['results.pickle'],self._galaxyFn) #print 'TEMP1: PATH: ',pickleStaticFile.getDiskPath(True) from copy import copy pickleList = [copy(res) for res in self._resultsList] for res in pickleList: res._analysis=None dump(pickleList, pickleStaticFile.getFile()) #dump(self._resultsList, pickleStaticFile.getFile()) except Exception, e: logException(e, message='Not able to pickle results object')
def __exit__(self, type, value, traceback): if type in [TooLargeBinError, TooSmallBinError, CentromerError]: logException(value) if type in [ ZeroDivisionError, FloatingPointError, TypeError, ValueError, OutsideBoundingRegionError ]: if DebugConfig.VERBOSE or type in [TypeError, ValueError]: logException(value, level=logging.DEBUG, message='kwArgs: ' + str(self._kwArgs)) return True if not DebugConfig.PASS_ON_COMPUTE_EXCEPTIONS: return True
def _addPresenter(self, presenterClass, sendHeader=False, **kwArgs): #print 'Generating figure: ',presenterClass.__name__,'<br>' try: pres = presenterClass(self._results, self._baseDir, *([self._getHeader()] if sendHeader else []), **kwArgs) self._presenters.append(pres) return pres except SilentError: return None except Exception, e: logException( e, WARNING, 'Error generating figure with ' + str(presenterClass.__name__)) print 'Error generating figure with ', presenterClass.__name__, '(', Exception, ' - ', e, ')' return None
def getRevEngBatchLine(trackName1, trackName2, cleanedTrackName1, cleanedTrackName2, analysisDef, \ regSpec, binSpec, genome, manualSeed, **kwArgs): #analysisDef is assumed to be unquoted #if this is to work, must check explicitly against special keywords in regSpec (or check that regSpec is a valid region that is to have region..)... #if not genome in regSpec: # regSpec = genome+':'+regSpec try: if DebugConfig.VERBOSE: logMessage('getting RevEngBatchLine:') #analysisDef =analysisDef.replace('%20PointCountInSegsPvalStat%2C','') #REMOVE #print 'NOWAG: ',analysisDef analysis = Analysis(analysisDef, genome, cleanedTrackName1, cleanedTrackName2, **kwArgs) stat = analysis.getStat() if stat is None: return 'No corr batch line, as no valid statistic was found.. ' #print 'CAME HERE' statClassName = stat.__name__ #fixme: Add space, but this is not checked in batchrunner... params = ','.join(['='.join([choicePair[0], str(manualSeed)]) \ if (manualSeed is not None and choicePair[0] == 'randomSeed' and choicePair[1] == 'Random') else '='.join(choicePair) \ for choicePair in analysis.getChoices().items() \ if choicePair[0] not in ['H0','H1_more','H1_less','H1_different','H1_ha1','H1_ha2','H1_ha3','H1_ha4','H1_ha5'] ]) statText = statClassName + '(' + params + ')' #return BATCH_COL_SEPARATOR.join([regSpec, binSpec, \ # (':'.join(trackName1)).replace(' ','_'),\ # (':'.join(trackName2)).replace(' ','_') if trackName2 is not None else 'None',\ # statText]) #assert unquote(regSpec) == regSpec assert unquote(binSpec) == binSpec #To assure that unquote can be safely applied to binSpec without any consequences (we don't want to always quote, but still want the possibility to use quoted history track names) batchElements = [genome, regSpec, binSpec, \ (':'.join([quote(x, safe='') for x in trackName1])),\ (':'.join([quote(x, safe='') for x in trackName2])) if trackName2 is not None else 'None',\ statText] #batchElements = [el.replace(BATCH_COL_SEPARATOR, '\\' + BATCH_COL_SEPARATOR) for el in batchElements] #batchElements = [quote(el, safe='') for el in batchElements] return BATCH_COL_SEPARATOR.join(batchElements) except Exception,e: #raise logException(e,logging.WARNING,'Could not generate corresponding batch line: ') #if DebugConfig.VERBOSE: logMessage('analysisDef, genome, trackName1, trackName2: \n' + str([analysisDef, genome, trackName1, trackName2]) ) return 'Warning: Could not generate corresponding batch line.'
def _getSubCls(origCls, region): #print "with class: ",origCls.__name__,'and region: ',region,"<br>" if isIter(region) or CompBinManager.canBeSplitted(region): try: splittableClass = MagicStatFactory._getClass(origCls.__name__, 'Splittable') #print "FOUND SPLITTABLE: ", <splittableClass if isIter(region): #Always use splittableClass if a global region return splittableClass else: #Use only if splittableClass also accepts splitting of userbins if not issubclass(splittableClass, OnlyGloballySplittable): return splittableClass #except (KeyError, SplittableStatNotAvailableError), e: except KeyError, e: if DebugConfig.VERBOSE: logException(e, message="In MagicStatFactory._getSubCls: ")
def getDemoURL(self): try: demo = self.prototype.getDemoSelections() url = '?mako=generictool&tool_id=' + self.toolId for i, id in enumerate(self.inputIds): if self.inputTypes[i] == '__genome__': id = 'dbkey' #else: # id = self.inputIds[i] try: val = getattr(demo, id) except: val = demo[i] url += '&' + id + '=' + val except Exception, e: from gold.application.LogSetup import logException logException(e) url = None
def _calcChrInfo(self): try: self.numChrs = len(GenomeInfo.getChrList(self.genome)) self.numExtChrs = len(GenomeInfo.getExtendedChrList( self.genome)) - self.numChrs self.numBps = self.getGenomeLen(self.genome, standardChrs=False) if self.numExtChrs > 0: self.numBpsWithExt = self.getGenomeLen(self.genome, standardChrs=True) else: self.numBpsWithExt = None except Exception, e: if IS_EXPERIMENTAL_INSTALLATION: from gold.application.LogSetup import logException, logMessage logMessage('Exception for genome: %s' % self.genome) logException(e) import traceback logMessage(''.join(traceback.format_stack()))
def getDemoURL(self): try: demo = self.prototype.getDemoSelections() url = "?mako=generictool&tool_id=" + self.toolId for i, id in enumerate(self.inputIds): if self.inputTypes[i] == "__genome__": id = "dbkey" # else: # id = self.inputIds[i] try: val = getattr(demo, id) except: val = demo[i] url += "&" + id + "=" + val except Exception, e: from gold.application.LogSetup import logException logException(e) url = None
def _getSubCls(origCls, region): #print "with class: ",origCls.__name__,'and region: ',region,"<br>" if isIter(region) or CompBinManager.canBeSplitted(region): try: splittableClass = MagicStatFactory._getClass( origCls.__name__, 'Splittable') #print "FOUND SPLITTABLE: ", <splittableClass if isIter(region): #Always use splittableClass if a global region return splittableClass else: #Use only if splittableClass also accepts splitting of userbins if not issubclass(splittableClass, OnlyGloballySplittable): return splittableClass #except (KeyError, SplittableStatNotAvailableError), e: except KeyError, e: if DebugConfig.VERBOSE: logException(e, level=logging.DEBUG, message="In MagicStatFactory._getSubCls: ")
def validateRegAndBinSpec(self, regSpec, binSpec): ubSourceInfo = self._getUserBinSourceInfo(regSpec) errorString = ubSourceInfo.validateRegAndBinSpec(regSpec, binSpec) if errorString: return "Error in specification of analysis regions: " + errorString try: ubSource = self.getUserBinSource(regSpec, binSpec) hasBins = any(bin for bin in ubSource) if not hasBins: errorString = ubSourceInfo.getZeroBinsValidationMessage( regSpec, binSpec) except Exception as e: from gold.application.LogSetup import logException logException(e) errorString = "Error fetching genome region using the specification of analysis regions: %s" % e return errorString
def _tryAnalysisDefForValidity(analysisDef, genome, trackName1, trackName2, tryReversed=True): if DebugConfig.VERBOSE: logMessage('Trying analysisDef: ' + str(analysisDef)) try: for tnA, tnB, reversed in [ (trackName1, trackName2, False) ] + ([(trackName2, trackName1, True)] if tryReversed else []): #print "TEMP1: ", (analysisDef, genome, tnA, tnB) analysis = Analysis(analysisDef, genome, tnA, tnB, reversed) #analysis.setTracks(trackName1, trackName2) #analysis.setConverters(formatConverter1, formatConverter2) if analysis.isValid(): return analysis, reversed except Exception, e: if DebugConfig.VERBOSE: logException(e) if DebugConfig.PASS_ON_VALIDSTAT_EXCEPTIONS: raise
def _tryAnalysisForValidity(cls, analysis, genome, trackName1, trackName2, tryReversed=True): if DebugConfig.VERBOSE: logMessage('Trying analysisDef: ' + str(analysis.getDef())) try: if cls._trackFormatsAreEqual(genome, trackName1, trackName2): tryReversed = False for tnA, tnB, reversed in [(trackName1, trackName2, False)] + \ ([(trackName2, trackName1, True)] if tryReversed and trackName2 not in [None, []] else []): analysis = Analysis.createFromParsedAnalysis( analysis, genome, tnA, tnB, reversed) if analysis.isValidForListing(): return analysis, reversed except Exception, e: if DebugConfig.VERBOSE: logException(e) if DebugConfig.PASS_ON_VALIDSTAT_EXCEPTIONS: raise
def _calcAndStoreChrInfo(self): try: dirty = False if not hasattr(self, 'numChrs') or self.numChrs == 0: self.numChrs = len(GenomeInfo.getChrList(self.genome)) dirty = True if not hasattr(self, 'numExtChrs') or self.numExtChrs == 0: self.numExtChrs = len( GenomeInfo.getExtendedChrList(self.genome)) - self.numChrs dirty = True if not hasattr(self, 'numBps') or self.numBps == 0: self.numBps = self.getGenomeLen(self.genome, standardChrs=False) dirty = True if not hasattr(self, 'numBpsWithExt') or self.numBpsWithExt == 0: if self.numExtChrs > 0: self.numBpsWithExt = self.getGenomeLen(self.genome, standardChrs=True) else: self.numBpsWithExt = None dirty = True if dirty: self.store() except Exception, e: if IS_EXPERIMENTAL_INSTALLATION: from gold.application.LogSetup import logException, logMessage logMessage('Exception for genome: %s' % self.genome) logException(e) import traceback logMessage(''.join(traceback.format_stack()))
def getBoundingRegionTuples(self): return [] def parseFirstDataLine(self): try: geIter = self.__iter__() geIter._printWarnings = False ge = geIter.next() except StopIteration, e: #logException(e) lastWarningMsg = ' Last warning when parsing file: %s' % geIter.getLastWarning() \ if geIter.anyWarnings() else '' print >> sys.stderr, '%s' % self._trackName raise Warning('File has no valid data lines.%s' % lastWarningMsg) except Exception, e: logException(e) raise return ge, geIter def getPrefixList(self): if self._prefixList is None: ge, geIter = self.parseFirstDataLine() self._prefixList = [prefix for prefix in ['start', 'end', 'val', 'strand', 'id', 'edges', 'weights'] if ge.__dict__.get(prefix) is not None] if ge.extra is not None: self._prefixList += [x for x in ge.orderedExtraKeys] return self._prefixList def getValDataType(self): return 'float64' def getValDim(self):
def getBoundingRegionTuples(self): return [] def parseFirstDataLine(self): try: geIter = self.__iter__() geIter._printWarnings = False ge = geIter.next() except StopIteration, e: #logException(e) lastWarningMsg = ' Last warning when parsing file: %s' % geIter.getLastWarning() \ if geIter.anyWarnings() else '' raise Warning('File has no valid data lines.%s' % lastWarningMsg) except Exception, e: logException(e) raise return ge def getPrefixList(self): if self._prefixList is None: ge = self.parseFirstDataLine() self._prefixList = [ prefix for prefix in ['start', 'end', 'val', 'strand', 'id', 'edges', 'weights'] if ge.__dict__.get(prefix) is not None ] if ge.extra is not None: self._prefixList += [x for x in ge.orderedExtraKeys] return self._prefixList
class Analysis(AnalysisDefHandler): def __init__(self, analysisLine, genome, trackName1, trackName2, reversed=False): #print 'IN ANALYSIS: ',analysisLine AnalysisDefHandler.__init__(self, analysisLine, reversed) self._genome = genome self._setTracks(trackName1, trackName2) self._validStatClass = None if analysisLine: self._initFromDef() def _initFromDef(self): self._useConvertersFromId() @classmethod def createFromParsedAnalysis(cls, analysis, genome, trackName1, trackName2, reversed): assert isinstance(analysis, AnalysisDefHandler) obj = Analysis(None, genome, trackName1, trackName2, reversed) obj.integrateParsedAnalysis(analysis) return obj def integrateParsedAnalysis(self, other): assert isinstance(other, AnalysisDefHandler) super(Analysis, self).integrateParsedAnalysis(other) self._initFromDef() def getTracks(self): return self._track, self._track2 def _setTracks(self, trackName1, trackName2): self._track = Track(trackName1) self._track2 = Track(trackName2) #self.resetValidStat() #print 'setTracks: ',self._track.trackName def _useConvertersFromId(self): formatConverter1 = self.getChoice(self.TF1_KEY) formatConverter2 = self.getChoice(self.TF2_KEY) #assert( not None in [formatConverter1, formatConverter2] ) self.setConverters(formatConverter1, formatConverter2) def setConverters(self, formatConverter1, formatConverter2): self._setConverter(self._track, formatConverter1, self.TF1_KEY) self._setConverter(self._track2, formatConverter2, self.TF2_KEY) def _setConverter(self, track, formatConverter, labelKey): if track is not None: track.setFormatConverter(formatConverter) if formatConverter is not None: self._appendConverterOptions(track, labelKey) def resetTracks(self): for track in (self._track, self._track2): if track is not None: track.resetTrackSource() #def resetValidStat(self): # if hasattr(self, '_validStatClass'): # del self._validStatClass def initRandomUtilAndUpdateSeedIfNeeded(self): from gold.util.RandomUtil import autoSeed, getSeed, setManualSeed randSeedChoice = self.getChoice(self.RANDOM_SEED_KEY) if randSeedChoice == self.RANDOM_SEED_CHOICE_RANDOM: autoSeed() self.changeChoices(self.RANDOM_SEED_KEY, [(str(getSeed()),) * 2]) elif randSeedChoice is not None: setManualSeed(int(randSeedChoice)) else: autoSeed() def getAllStats(self): return self._statClassList def isValidForListing(self): anyTextParts = len(self._analysisParts) > 0 if not anyTextParts: if DebugConfig.VERBOSE: logMessage('Analysisdef "{}" does not have any text available for listing. '.format(self.getDef()) + 'Skipping...') else: return self.getStat(flushMemoized=False) is not None #def getStat(self): # #assert( len(self._statClassList) >= 1 ) # #if not hasattr(self, '_validStatClass'): # prevCfgPrintProgress = StatRunnerModule.PRINT_PROGRESS # StatRunnerModule.PRINT_PROGRESS = False # validStatClass = self._determineStatClass() # StatRunnerModule.PRINT_PROGRESS = prevCfgPrintProgress # return validStatClass def getGenome(self): return self._genome def getStat(self, flushMemoized=True): if self._validStatClass is None: options = self.getAllOptionsAsKeys() if self.ASSUMP_LABEL_KEY in options: validAssumptions = [] allAssumptions = options[self.ASSUMP_LABEL_KEY] for assumption in allAssumptions: self.setChoice(self.ASSUMP_LABEL_KEY, assumption) if self._determineStatClass(flushMemoized=False) is not None: validAssumptions.append(assumption) if len(validAssumptions) == 0: return None if len(validAssumptions) not in [0, len(allAssumptions)]: self._logAssumptionReduction(set(allAssumptions) - set(validAssumptions)) self.reduceChoices(self.ASSUMP_LABEL_KEY, validAssumptions) self.setDefaultChoice(self.ASSUMP_LABEL_KEY) self._validStatClass = self._determineStatClass(flushMemoized=flushMemoized) if self._validStatClass is not None: self._appendConverterOptions(self._track, self.TF1_KEY) self._appendConverterOptions(self._track2, self.TF2_KEY) return self._validStatClass def _logAssumptionReduction(self, removedAssumptions): #global VERBOSE #prev = VERBOSE #VERBOSE = True for assumption in removedAssumptions: logMessage('Assumption "' + str(assumption) + '" was removed from analysisDef: ' + self.getDef()) self.setChoice(self.ASSUMP_LABEL_KEY, assumption) self._determineStatClass(flushMemoized=False) #VERBOSE = prev #@noProgress def _determineStatClass(self, flushMemoized=True): assert( hasattr(self, '_track') ) assert( hasattr(self, '_track2') ) dummyGESource = MinimalBinSource(self._genome) if len(self._statClassList) == 0: # if self._reversed: logMessage('Stat class list is empty, for analysisDef: ' + self._analysisLine, level = logging.WARNING) if DebugConfig.PASS_ON_VALIDSTAT_EXCEPTIONS: raise ShouldNotOccurError('Stat class list is empty. Analysisdef: ' + self._analysisLine) for statClass in self._statClassList: if DebugConfig.VERBOSE: logMessage('Checking validity of stat class "{}" for analysisDef "{}".'.format(statClass.__name__, self.getDefAfterChoices())) trackA, trackB = self._track, self._track2 if trackA is None: continue try: StatJob(dummyGESource, trackA, trackB, statClass, minimal=True, **self.getAllChoices(filterByActivation=True)).run(False, flushMemoized=flushMemoized) except IncompatibleTracksError, e: if DebugConfig.VERBOSE: logException(e, level=logging.DEBUG, messagePrefix='Warning: error in _determineStatClass for stat: %s' % statClass.__name__) if DebugConfig.PASS_ON_VALIDSTAT_EXCEPTIONS: raise except (AssertionError, IncompatibleAssumptionsError, IdenticalTrackNamesError), e: if DebugConfig.VERBOSE: logException(e, level=logging.DEBUG, messagePrefix='Warning: error in _determineStatClass for stat: %s' % statClass.__name__) if DebugConfig.PASS_ON_VALIDSTAT_EXCEPTIONS: raise except OSError, e: if DebugConfig.VERBOSE: logException(e, messagePrefix='Error in _determineStatClass, with statClass %s' % statClass.__name__) if DebugConfig.PASS_ON_VALIDSTAT_EXCEPTIONS: raise elif not 'withOverlaps' in str(e): raise
def runJob(batchLine, genome, fullAccess, galaxyFn=None, printProgress=True): bc = BatchRunner.parseBatchLine(batchLine, genome, fullAccess) if bc.errorResult is not None: return bc.errorResult #Try a full run, and return either results or an exception try: #track = Track(trackName1) #track2 = Track(trackName2) #if 'tf1' in paramDict: # track.setFormatConverter(formatConverter) #results = StatRunner.run(userBinSource , Track(trackName1), Track(trackName2), \ # wrapClass(STAT_CLASS_DICT[statClassName], keywords=paramDict) ) #results = StatRunner.run(userBinSource , track, track2, \ # wrapClass(STAT_CLASS_DICT[statClassName], keywords=paramDict) ) fullRunParams = {} if USE_PARALLEL: #if galaxyFn == None: #then this is a test uniqueId = time.time() #else: #uniqueId = extractIdFromGalaxyFn(galaxyFn)[1] fullRunParams["uniqueId"] = uniqueId if bc.cleanedTrackNameIntensity is not None: fullRunParams['trackNameIntensity'] = '|'.join( tuple(bc.cleanedTrackNameIntensity)) analysisDefParams = [ '[' + key + '=' + value + ']' for key, value in bc.paramDict.items() ] analysisDef = ''.join(analysisDefParams) + '->' + bc.statClassName from quick.application.GalaxyInterface import GalaxyInterface GalaxyInterface._tempAnalysisDefHacks(analysisDef) if printProgress: print 'Corresponding batch command line:<br>' + \ GalaxyInterface._revEngBatchLine(bc.trackName1, bc.trackName2, bc.trackNameIntensity, analysisDef, bc.regSpec, bc.binSpec, genome) + '<br><br>' results = AnalysisDefJob(analysisDef, bc.cleanedTrackName1, bc.cleanedTrackName2, bc.userBinSource, galaxyFn=galaxyFn, **fullRunParams).run(printProgress) presCollectionType = results.getPresCollectionType() if len( results.getResDictKeys() ) > 0 and GalaxyInterface.APPEND_ASSEMBLY_GAPS and presCollectionType == 'standard': if USE_PARALLEL: gapRes = AssemblyGapJob( bc.userBinSource, genome, uniqueId=uniqueId).run(printProgress) else: gapRes = AssemblyGapJob(bc.userBinSource, genome).run(printProgress) results.includeAdditionalResults( gapRes, ensureAnalysisConsistency=False) except Exception, e: #print 'NOWAG BExc' results = Results(bc.cleanedTrackName1, bc.cleanedTrackName2, bc.statClassName) results.addError(e) logException(e, message='Error in batch run') if DebugConfig.PASS_ON_BATCH_EXCEPTIONS: raise return results
def runJob(batchLine, genome, fullAccess, galaxyFn=None, printProgress=True): bc = BatchRunner.parseBatchLine(batchLine, genome, fullAccess) if bc.errorResult is not None: return bc.errorResult #Try a full run, and return either results or an exception try: #track = Track(trackName1) #track2 = Track(trackName2) #if 'tf1' in paramDict: # track.setFormatConverter(formatConverter) #results = StatRunner.run(userBinSource , Track(trackName1), Track(trackName2), \ # wrapClass(STAT_CLASS_DICT[statClassName], keywords=paramDict) ) #results = StatRunner.run(userBinSource , track, track2, \ # wrapClass(STAT_CLASS_DICT[statClassName], keywords=paramDict) ) fullRunParams = {} if USE_PARALLEL: # TODO: Requirements for parallel runs should not be added in places like these. Parallelization # should be a feature of the job runner somehow #if galaxyFn == None: #then this is a test uniqueId = time.time() #else: #uniqueId = extractIdFromGalaxyFn(galaxyFn)[1] fullRunParams["uniqueId"] = uniqueId from quick.application.GalaxyInterface import GalaxyInterface analysisDefParams = [ '[' + key + '=' + value + ']' for key,value in bc.paramDict.items()] analysisDef = ''.join(analysisDefParams) + '->' + bc.statClassName # TODO: Keeping the ugly accesses to private methods in GalaxyInterface for now. To be refactored. trackNames, analysisDef = GalaxyInterface._cleanUpAnalysisDef(bc.cleanedTrackNames, analysisDef) if printProgress: revEngBatchLine = RunDescription.getRevEngBatchLine( analysisDef, bc.trackNames, bc.cleanedTrackNames, bc.regSpec, bc.binSpec, genome ) print 'Corresponding batch command line:<br>{}<br><br>'.format(revEngBatchLine) results = AnalysisDefJob(analysisDef, bc.cleanedTrackNames[0], bc.cleanedTrackNames[1], bc.userBinSource, galaxyFn=galaxyFn, **fullRunParams).run(printProgress) presCollectionType = results.getPresCollectionType() if len(results.getResDictKeys()) > 0 and GalaxyInterface.APPEND_ASSEMBLY_GAPS and presCollectionType=='standard': if USE_PARALLEL: gapRes = AssemblyGapJob(bc.userBinSource, genome, uniqueId=uniqueId).run(printProgress) else: gapRes = AssemblyGapJob(bc.userBinSource, genome).run(printProgress) results.includeAdditionalResults(gapRes, ensureAnalysisConsistency=False) except Exception, e: #print 'NOWAG BExc' results = Results(bc.cleanedTrackNames[0], bc.cleanedTrackNames[1], bc.statClassName) results.addError(e) logException(e,message='Error in batch run') if DebugConfig.PASS_ON_BATCH_EXCEPTIONS: raise return results
def getRevEngBatchLine(analysisDef, trackNames, cleanedTrackNames, regSpec, binSpec, genome, **kwArgs): #analysisDef is assumed to be unquoted #if this is to work, must check explicitly against special keywords in regSpec (or check that regSpec is a valid region that is to have region..)... #if not genome in regSpec: # regSpec = genome+':'+regSpec try: if DebugConfig.VERBOSE: logMessage('getting RevEngBatchLine:') #analysisDef =analysisDef.replace('%20PointCountInSegsPvalStat%2C','') #REMOVE #print 'NOWAG: ',analysisDef analysis = Analysis(analysisDef, genome, cleanedTrackNames[0], cleanedTrackNames[1], **kwArgs) #assert unquote(regSpec) == regSpec assert unquote( binSpec ) == binSpec #To assure that unquote can be safely applied to binSpec without any consequences (we don't want to always quote, but still want the possibility to use quoted history track names) quotedTrackName1 = (':'.join( [quote(x, safe='') for x in trackNames[0]])) quotedTrackName2 = (':'.join([ quote(x, safe='') for x in trackNames[1] ])) if trackNames[1] is not None else 'None' intensityChoice = analysis.getChoice('trackNameIntensity') if intensityChoice: quotedIntensityTrackName = quote(intensityChoice, safe='^|') analysis.changeChoices('trackNameIntensity', [(quotedIntensityTrackName, ) * 2]) stat = analysis.getStat() if stat is None: return 'No corr batch line, as no valid statistic was found.. ' #print 'CAME HERE' statClassName = stat.__name__ #fixme: Add space, but this is not checked in batchrunner... params = ','.join(['='.join(choicePair) for choicePair in analysis.getAllChoices(filterByActivation=True).items() \ if choicePair[0] not in ['H0','H1_more','H1_less','H1_different','H1_ha1','H1_ha2','H1_ha3','H1_ha4','H1_ha5'] ]) statText = statClassName + '(' + params + ')' batchElements = [ genome, regSpec, binSpec, quotedTrackName1, quotedTrackName2, statText ] #batchElements = [el.replace(BATCH_COL_SEPARATOR, '\\' + BATCH_COL_SEPARATOR) for el in batchElements] #batchElements = [quote(el, safe='') for el in batchElements] oneLineBatch = BATCH_COL_SEPARATOR.join(batchElements) #return oneLineBatch #Under construction...: from collections import OrderedDict #batchVariables = OrderedDict([('@GENOME',genome), ('@REGION',regSpec), ('@BINNING',binSpec), ('@TN1',tn1), ('@TN2',tn2), ('@ANALYSIS',statText)]) batchVariables = OrderedDict([('@REGION', regSpec), ('@BINNING', binSpec), ('@TN1', quotedTrackName1), ('@TN2', quotedTrackName2), ('@ANALYSIS', statText)]) batchComposition = BATCH_COL_SEPARATOR.join([genome] + batchVariables.keys()) fullBatchList = [ '='.join(assignment) for assignment in batchVariables.items() ] + [batchComposition] fullBatch = '<br>'.join(fullBatchList) batchLinkDef = '<a href="%s/hyper?mako=generictool&tool_id=hb_batch_run_tool&command=%s&dbkey=%s">%s</a>' oneLineBatchLink = batchLinkDef % (URL_PREFIX, quote(oneLineBatch), genome, 'single line version') fullBatchLink = batchLinkDef % (URL_PREFIX, quote('\n'.join(fullBatchList)), genome, 'variable based version') #return oneLineBatch + '<br><br>or corresponding spec using variable assignment:<br><br>' + fullBatch + '<br><br>Execute batchline in ' \ #+ oneLineBatchLink + ' / ' + fullBatchLink return oneLineBatch + '<br><br>Execute batchline in ' + oneLineBatchLink + ' / ' + fullBatchLink except Exception, e: #raise logException(e, logging.WARNING, 'Could not generate corresponding batch line: ') #if DebugConfig.VERBOSE: logMessage('analysis, genome, trackName1, trackName2: \n' + str([analysis, genome, trackNames[0], trackNames[1]])) return 'Warning: Could not generate corresponding batch line.'
if DebugConfig.PASS_ON_VALIDSTAT_EXCEPTIONS: raise except OSError, e: if DebugConfig.VERBOSE: logException(e, message='(Error in _determineStatClass, with statClass %s)' % statClass.__name__) if DebugConfig.PASS_ON_VALIDSTAT_EXCEPTIONS: raise elif not 'withOverlaps' in str(e): raise except Exception, e: if getClassName(e) == 'AttributeError' and \ any(x in str(e) for x in ["has no attribute '_track2'", "'NoneType' object has no attribute"]): if DebugConfig.VERBOSE: logException(e, level=logging.DEBUG, message='(Warning: error in _determineStatClass for stat: %s)' % statClass.__name__) else: logException(e, message='(Error in _determineStatClass, with statClass %s)' % statClass.__name__) if DebugConfig.PASS_ON_VALIDSTAT_EXCEPTIONS: raise else: #self._reversed = reversed #self._conversionsUsed = len(trackA.conversionsUsed) > 0 or \ # ((trackB is not None) and len(trackB.conversionsUsed) > 0) ##self._validStatClass = functools.partial(statClass, **self.getChoices()) #functools.update_wrapper(self._validStatClass, statClass) validStatClass = wrapClass(statClass, keywords=self.getChoices(filterByActivation=True) ) #fixme: Perhaps return validStatClass, self.getChoices() instead? #self.setConverters( self._track.formatConverters, self._track2.formatConverters if self._track2 is not None else None) #self._updateOptions() if DebugConfig.VERBOSE:
class Analysis(AnalysisDefHandler): def __init__(self, analysisLine, genome, trackName1, trackName2, reversed=False): #print 'IN ANALYSIS: ',analysisLine AnalysisDefHandler.__init__(self, analysisLine, reversed) self._genome = genome self._setTracks(trackName1, trackName2) self._useConvertersFromId() self._validStatClass = None def getTracks(self): return self._track, self._track2 def _setTracks(self, trackName1, trackName2): self._track = Track(trackName1) self._track2 = Track(trackName2) #self.resetValidStat() #print 'setTracks: ',self._track.trackName def _useConvertersFromId(self): formatConverter1 = self.getChoice(self.TF1_KEY) formatConverter2 = self.getChoice(self.TF2_KEY) #assert( not None in [formatConverter1, formatConverter2] ) self.setConverters(formatConverter1, formatConverter2) def setConverters(self, formatConverter1, formatConverter2): self._setConverter(self._track, formatConverter1, self.TF1_KEY) self._setConverter(self._track2, formatConverter2, self.TF2_KEY) def _setConverter(self, track, formatConverter, labelKey): if track is not None: track.setFormatConverter(formatConverter) if formatConverter is not None: self._appendConverterOptions(track, labelKey) def resetTracks(self): for track in (self._track, self._track2): if track is not None: track.resetTrackSource() #def resetValidStat(self): # if hasattr(self, '_validStatClass'): # del self._validStatClass def getAllStats(self): return self._statClassList def isValid(self): return len(self._analysisParts) > 0 and self.getStat() is not None #def getStat(self): # #assert( len(self._statClassList) >= 1 ) # #if not hasattr(self, '_validStatClass'): # prevCfgPrintProgress = StatRunnerModule.PRINT_PROGRESS # StatRunnerModule.PRINT_PROGRESS = False # validStatClass = self._determineStatClass() # StatRunnerModule.PRINT_PROGRESS = prevCfgPrintProgress # return validStatClass def getGenome(self): return self._genome def getStat(self): if self._validStatClass is None: options = self.getAllOptionsAsKeys() if self.ASSUMP_LABEL_KEY in options: validAssumptions = [] allAssumptions = options[self.ASSUMP_LABEL_KEY] for assumption in allAssumptions: self.setChoice(self.ASSUMP_LABEL_KEY, assumption) if self._determineStatClass() is not None: validAssumptions.append(assumption) if len(validAssumptions) == 0: return None if len(validAssumptions) not in [0, len(allAssumptions)]: self._logAssumptionReduction( set(allAssumptions) - set(validAssumptions) ) self.reduceChoices(self.ASSUMP_LABEL_KEY, validAssumptions) self.setDefaultChoice(self.ASSUMP_LABEL_KEY) self._validStatClass = self._determineStatClass() if self._validStatClass is not None: self._appendConverterOptions(self._track, self.TF1_KEY) self._appendConverterOptions(self._track2, self.TF2_KEY) return self._validStatClass def _logAssumptionReduction(self, removedAssumptions): #global VERBOSE #prev = VERBOSE #VERBOSE = True for assumption in removedAssumptions: logMessage('Assumption "' + str(assumption) + '" was removed from analysisDef: ' + self.getDef()) self.setChoice(self.ASSUMP_LABEL_KEY, assumption) self._determineStatClass() #VERBOSE = prev #@noProgress def _determineStatClass(self): assert( hasattr(self, '_track') ) assert( hasattr(self, '_track2') ) dummyGESource = MinimalBinSource(self._genome) if len(self._statClassList)==0: #logging.getLogger(HB_LOGGER).warning('Stat class list is empty, for analysisDef: ' + self._analysisLine) if self._reversed: logMessage('Stat class list is empty, for analysisDef: ' + self._analysisLine, level = logging.WARNING) if DebugConfig.PASS_ON_VALIDSTAT_EXCEPTIONS: raise ShouldNotOccurError('Stat class list is empty. Analysisdef: '+self._analysisLine) for statClass in self._statClassList: if DebugConfig.VERBOSE: logMessage(statClass.__name__ + ': Trying (' + self.getDefAfterChoices() + ')') # print statClass.__name__ + ': Trying (' + self.getDefAfterChoices() + ')' #for reversed, trackA, trackB in [(False, self._track, self._track2), (True, self._track2, self._track) ]: trackA, trackB = self._track, self._track2 if trackA is None: continue try: # The hackiest of all hacks! # TODO: reimplement together with TrackStructure job = StatJob(dummyGESource, trackA, trackB, statClass, minimal=True, **self.getChoices(filterByActivation=True)) stat = job._getSingleResult(dummyGESource[0])[-1] tracks = stat._tracks if hasattr(stat, '_tracks') else [trackA, trackB] trackUniqueKeys = [Track(tr.trackName).getUniqueKey(self._genome) for tr in tracks if tr is not None] StatJob(dummyGESource, trackA, trackB, statClass, minimal=True, **self.getChoices(filterByActivation=True)).run(False) #In order not to mess up integration tests initSeed() for trackIndex, restTrackIndexes in allElementsVersusRest(xrange(len(tracks))): track = tracks[trackIndex] if track is not None and track.formatConverters is None: uniqueKeyForRestTracks = \ set(trackUniqueKeys[i] for i in restTrackIndexes) # If several tracks are the same, memory memoization will only result # in one RawDataStat being created, for one Track object. This is a # wanted optimization. In other cases, something is probably wrong if # a track has not been touched. However, this rule may be revisited # when track structure functionality is implemented. if trackUniqueKeys[trackIndex] not in uniqueKeyForRestTracks: raise IncompatibleTracksError( 'Track ' + prettyPrintTrackName(track.trackName) + ' was created, but not touched by statistic') except IncompatibleTracksError, e: if DebugConfig.VERBOSE: logException(e, level=logging.DEBUG, message='(Warning: error in _determineStatClass for stat: %s)' % statClass.__name__) if DebugConfig.PASS_ON_VALIDSTAT_EXCEPTIONS: raise except (AssertionError, IncompatibleAssumptionsError, IdenticalTrackNamesError), e: if DebugConfig.VERBOSE: logException(e, level=logging.DEBUG, message='(Warning: error in _determineStatClass for stat: %s)' % statClass.__name__) if DebugConfig.PASS_ON_VALIDSTAT_EXCEPTIONS: raise except OSError, e: if DebugConfig.VERBOSE: logException(e, message='(Error in _determineStatClass, with statClass %s)' % statClass.__name__) if DebugConfig.PASS_ON_VALIDSTAT_EXCEPTIONS: raise elif not 'withOverlaps' in str(e): raise
def _determineStatClass(self): assert( hasattr(self, '_track') ) assert( hasattr(self, '_track2') ) dummyGESource = MinimalBinSource(self._genome) if len(self._statClassList)==0: #logging.getLogger(HB_LOGGER).warning('Stat class list is empty, for analysisDef: ' + self._analysisLine) if self._reversed: logMessage('Stat class list is empty, for analysisDef: ' + self._analysisLine, level = logging.WARNING) if DebugConfig.PASS_ON_VALIDSTAT_EXCEPTIONS: raise ShouldNotOccurError('Stat class list is empty. Analysisdef: '+self._analysisLine) for statClass in self._statClassList: if DebugConfig.VERBOSE: logMessage(statClass.__name__ + ': Trying (' + self.getDefAfterChoices() + ')') # print statClass.__name__ + ': Trying (' + self.getDefAfterChoices() + ')' #for reversed, trackA, trackB in [(False, self._track, self._track2), (True, self._track2, self._track) ]: trackA, trackB = self._track, self._track2 if trackA is None: continue try: # The hackiest of all hacks! # TODO: reimplement together with TrackStructure job = StatJob(dummyGESource, trackA, trackB, statClass, minimal=True, **self.getChoices(filterByActivation=True)) stat = job._getSingleResult(dummyGESource[0])[-1] tracks = stat._tracks if hasattr(stat, '_tracks') else [trackA, trackB] trackUniqueKeys = [Track(tr.trackName).getUniqueKey(self._genome) for tr in tracks if tr is not None] StatJob(dummyGESource, trackA, trackB, statClass, minimal=True, **self.getChoices(filterByActivation=True)).run(False) #In order not to mess up integration tests initSeed() for trackIndex, restTrackIndexes in allElementsVersusRest(xrange(len(tracks))): track = tracks[trackIndex] if track is not None and track.formatConverters is None: uniqueKeyForRestTracks = \ set(trackUniqueKeys[i] for i in restTrackIndexes) # If several tracks are the same, memory memoization will only result # in one RawDataStat being created, for one Track object. This is a # wanted optimization. In other cases, something is probably wrong if # a track has not been touched. However, this rule may be revisited # when track structure functionality is implemented. if trackUniqueKeys[trackIndex] not in uniqueKeyForRestTracks: raise IncompatibleTracksError( 'Track ' + prettyPrintTrackName(track.trackName) + ' was created, but not touched by statistic') except IncompatibleTracksError, e: if DebugConfig.VERBOSE: logException(e, level=logging.DEBUG, message='(Warning: error in _determineStatClass for stat: %s)' % statClass.__name__) if DebugConfig.PASS_ON_VALIDSTAT_EXCEPTIONS: raise except (AssertionError, IncompatibleAssumptionsError, IdenticalTrackNamesError), e: if DebugConfig.VERBOSE: logException(e, level=logging.DEBUG, message='(Warning: error in _determineStatClass for stat: %s)' % statClass.__name__) if DebugConfig.PASS_ON_VALIDSTAT_EXCEPTIONS: raise