def _parseDef(self, id): self._analysisParts = [] self._statClassList = [] # ([^-[]* #pure text - not '[' #print 'NOWAG id:', id parts = re.findall(''' # Match pure text (part[0]): ( (?: [^-[]* (?:-(?!>))? )* #1. pure text - not '[' or '-', #2. separated by a possible '-' that is not before a '>' #1 and 2 is repeated as long as necessary [^-[\s]+) #should not end with whitespace, #as this may belong to the '->'-expression # Match option clause (part[1]) |( \[ [^[\]]* \] ) #Matches an expression inside brackets '[]' # Match specification of statistic classes (part[2]) |( \s? \-> \s? .* ) # Match any additional whitespace (part[3]) |(\s*) ''', id, flags=re.VERBOSE) from gold.statistic.AllStatistics import STAT_CLASS_DICT for part in parts: if part[0] != '': self._analysisParts.append(part[0]) if part[1] != '': self._analysisParts.append(AnalysisOption(part[1])) if part[2] != '': statNames = part[2].replace('->', '').replace(' ', '').split(',') #self._statClassList = statNames self._statClassList = [STAT_CLASS_DICT[statName] for statName in statNames \ if STAT_CLASS_DICT.get(statName) is not None] if len(self._statClassList) == 0: if len(statNames) == 0: logMessage( 'No statistic found when parsing analysisDef: ' + self._analysisLine) else: logMessage( 'Specified statistics not found in STAT_CLASS_DICT. Statistics:%s, and keys in STAT_CLASS_DICT: %s' % (str(statNames), str(STAT_CLASS_DICT))) if part[3] != '': self._analysisParts.append(part[3])
def _parseDef(self, id): self._analysisParts = [] self._statClassList = [] # ([^-[]* #pure text - not '[' #print 'NOWAG id:', id parts = re.findall(''' # Match pure text (part[0]): ( (?: [^-[]* (?:-(?!>))? )* #1. pure text - not '[' or '-', #2. separated by a possible '-' that is not before a '>' #1 and 2 is repeated as long as necessary [^-[\s]+) #should not end with whitespace, #as this may belong to the '->'-expression # Match option clause (part[1]) |( \[ [^[\]]* \] ) #Matches an expression inside brackets '[]' # Match specification of statistic classes (part[2]) |( \s? \-> \s? .* ) # Match any additional whitespace (part[3]) |(\s*) ''', id, flags=re.VERBOSE) from gold.statistic.AllStatistics import STAT_CLASS_DICT for part in parts: if part[0] != '': self._analysisParts.append(part[0]) if part[1] != '': self._analysisParts.append(AnalysisOption(part[1])) if part[2] != '': statNames = part[2].replace('->','').replace(' ','').split(',') #self._statClassList = statNames self._statClassList = [STAT_CLASS_DICT[statName] for statName in statNames \ if STAT_CLASS_DICT.get(statName) is not None] if len(self._statClassList)==0: if len(statNames)==0: logMessage('No statistic found when parsing analysisDef: ' + self._analysisLine) else: logMessage('Specified statistics not found in STAT_CLASS_DICT. Statistics:%s, and keys in STAT_CLASS_DICT: %s' % (str(statNames), str(STAT_CLASS_DICT)) ) if part[3] != '': self._analysisParts.append(part[3])
def getOptionsBoxStatistic(): return sorted(STAT_CLASS_DICT.keys())
# except Exception,e: # try: # _getStatResDict2(track1_Num, track2_Num,a) # except Exception,e: # try: # _getStatResDict2(track1, track2_Num,a) # except Exception,e: # print e.__class__, e, traceback.format_exc() # pass #print [ k + ' = ' + resultInfoDict[k] + os.linesep \ # for k in sorted(resultInfoDict)] #import sys #sys.exit() statClassList = STAT_CLASS_DICT.values() for statClass in statClassList: print statClass try: _getStatResDict(track1, track2) except Exception,e: try: _getStatResDict(track1_Num, track2_Num) except Exception,e: try: _getStatResDict(track1, track2_Num) except Exception,e: print statClass.__name__ + ': ',e.__class__, e pass
#from gold.statistic.AllStatistics import STAT_CLASS_DICT #from gold.application.StatRunner import StatRunner #from test.gold.track.common.SampleTrack import SampleTrack #from test.gold.track.common.SampleTrackView import SampleTV, SampleTV_Num #from gold.track.GenomeRegion import GenomeRegion from gold.description.AnalysisManager import AnalysisManager from gold.statistic.AllStatistics import STAT_CLASS_DICT #class GenerateDefaultAnalysiss: #def _getSampleTrack(numbers = False): # if numbers: # return SampleTrack( SampleTV_Num(numElements=50), True ) # else: # return SampleTrack( SampleTV(starts=True, ends=True, vals=True, numElements=50), True ) #stats = StatOptions().getStats(_getSampleTrack(), _getSampleTrack() ) stats = STAT_CLASS_DICT.values() uncoveredStats = [ stat.__name__ for stat in stats if not (True in [(stat in a.getAllStats()) for a in AnalysisManager.getAllAnalyses()]) ] for stat in uncoveredStats: print 'Data inspection: ' + stat + ' -> ' + stat #for statClass in STAT_CLASS_DICT.values(): # try: # StatRunner.run([GenomeRegion('hg18', 'chr1', 0, 1)], track1, track2, statClass) # except: # pass