def extract_feature(self, genome, track, ref, option) : ''' this function return the relation of clusterTrack to referenceTrack option is the statistical function used, should be named feature track, ref is clusterTrack and referenceTrack ''' validFeature = FeatureCatalog.getFeaturesFromTracks(genome,track,ref)[option] #validFeature contains analysisDef and the key to get the needed number from the global result if option == 'Prop. of tr1-points falling inside segments of tr2' and cls.getTrackFormat(genome, track) in ['Segments', 'Valued segments'] : analysisDef = 'dummy [tf1=SegmentToMidPointFormatConverter] -> DerivedPointCountsVsSegsStat' else : analysisDef = validFeature[0] #or any other statistic from the HB collection if self.params.get("compare_in") == "Chromosomes" : regSpec = "__chrs__" binSpec = self.params.get("Chromosomes") elif self.params.get("compare_in") == "Chromosome arms" : regSpec = "__chrArms__" binSpec = self.params.get("Chromosome_arms") elif self.params.get("compare_in") == "Cytobands" : regSpec = "__chrBands__" binSpec = self.params.get("Cytobands") else : regSpec = self.params.get("region") binSpec = self.params.get("binsize") #regSpec = self.params.get("region") #binSpec = self.params.get("binsize") #regSpec = 'chr1' #could also be e.g. 'chr1' for the whole chromosome or '*' for the whole genome #binSpec = '10m' #could also be e.g.'100', '1k' or '*' for whole regions/chromosomes as bins #genome = 'hg18' userBinSource = GalaxyInterface._getUserBinSource(regSpec,binSpec,genome) if option == 'Prop. of tr2 covered by tr1' : #because the confuse of refTrack and clusterTrack in this statistics result = AnalysisDefJob(analysisDef, ref, track, userBinSource).run() else : result = AnalysisDefJob(analysisDef, track, ref, userBinSource).run() mainResultDict = result.getGlobalResult() return mainResultDict[validFeature[1]]
def plainRun(analysisDef, genome, track1Fn, track2Fn): ''' Currently under development, not yet functioning. Note that file names (track1Fn, track2Fn) must either be a valid .dat-file from a Galaxy system, or be located in a path where the two deepest levels are numerical values above 1000, and unique between different runs by this tool''' #GalaxyInterface.runManual(trackNames, analysisDef, regSpec, binSpec, genome, galaxyFn=None, trackNameIntensity=None, username='', \ #printResults=True, printProgress=True, printHtmlWarningMsgs=True, applyBoundaryFilter=False, printRunDescription=True, **kwArgs): from quick.application.ExternalTrackManager import ExternalTrackManager #fixme: is this correct? assert all( idPart > 1000 for idPart in ExternalTrackManager.extractIdFromGalaxyFn(track1Fn)) tn1 = ExternalTrackManager.constructGalaxyTnFromSuitedFn(track1Fn) assert all( idPart > 1000 for idPart in ExternalTrackManager.extractIdFromGalaxyFn(track2Fn)) tn2 = ExternalTrackManager.constructGalaxyTnFromSuitedFn(track2Fn) from quick.application.GalaxyInterface import GalaxyInterface userBinSource = GalaxyInterface._getUserBinSource('chrs', '*', genome, tn1, tn2) from gold.application.StatRunner import AnalysisDefJob job = AnalysisDefJob(analysisDef, tn1, tn2, userBinSource) result = job.run(printProgress=False) return result
def extract_feature(self, genome, track, ref, option) : ''' this function return the relation of clusterTrack to referenceTrack option is the statistical function used, should be named feature track, ref is clusterTrack and referenceTrack ''' validFeature = FeatureCatalog.getFeaturesFromTracks(genome,track,ref)[option] #validFeature contains analysisDef and the key to get the needed number from the global result if option == 'Prop. of tr1-points falling inside segments of tr2' and self.getTrackFormat(genome, track) in ['Segments', 'Valued segments'] : analysisDef = 'dummy [tf1=SegmentToMidPointFormatConverter] -> DerivedPointCountsVsSegsStat' else : analysisDef = validFeature[0] #or any other statistic from the HB collection if self.params.get("compare_in") == "Chromosomes" : regSpec = "__chrs__" binSpec = self.params.get("Chromosomes") elif self.params.get("compare_in") == "Chromosome arms" : regSpec = "__chrArms__" binSpec = self.params.get("Chromosome_arms") elif self.params.get("compare_in") == "Cytobands" : regSpec = "__chrBands__" binSpec = self.params.get("Cytobands") else : regSpec = self.params.get("region") binSpec = self.params.get("binsize") #regSpec = self.params.get("region") #binSpec = self.params.get("binsize") #regSpec = 'chr1' #could also be e.g. 'chr1' for the whole chromosome or '*' for the whole genome #binSpec = '10m' #could also be e.g.'100', '1k' or '*' for whole regions/chromosomes as bins #genome = 'hg18' userBinSource = GalaxyInterface._getUserBinSource(regSpec,binSpec,genome) if option == 'Prop. of tr2 covered by tr1' : #because the confuse of refTrack and clusterTrack in this statistics result = AnalysisDefJob(analysisDef, ref, track, userBinSource).run() else : result = AnalysisDefJob(analysisDef, track, ref, userBinSource).run() mainResultDict = result.getGlobalResult() return mainResultDict[validFeature[1]]
def handleRegionClustering(self, genome, tracks, clusterMethod, extra_option): region_cluster_track = self.getHistoryTrackDef('track1') print region_cluster_track region_ref_track = self.params.get('reftrack1') if region_cluster_track[0] == 'galaxy' : file_type = region_cluster_track[1] track_path = region_cluster_track[2] userBinSource = GalaxyInterface._getUserBinSource('bed', track_path, genome) validFeature = SplittedRegionsAsFeaturesCatalog.getValidAnalyses(genome,region_ref_track,[]) analysisDef = validFeature[0] result = AnalysisDefJob(analysisDef, region_ref_track, [], userBinSource).run() print [result[localKey][validFeature[1]] for localKey in sorted(result.keys())]
def runJob(batchLine, genome, fullAccess, galaxyFn=None, printProgress=True): bc = BatchRunner.parseBatchLine(batchLine, genome, fullAccess) if bc.errorResult is not None: return bc.errorResult #Try a full run, and return either results or an exception try: #track = Track(trackName1) #track2 = Track(trackName2) #if 'tf1' in paramDict: # track.setFormatConverter(formatConverter) #results = StatRunner.run(userBinSource , Track(trackName1), Track(trackName2), \ # wrapClass(STAT_CLASS_DICT[statClassName], keywords=paramDict) ) #results = StatRunner.run(userBinSource , track, track2, \ # wrapClass(STAT_CLASS_DICT[statClassName], keywords=paramDict) ) fullRunParams = {} if USE_PARALLEL: #if galaxyFn == None: #then this is a test uniqueId = time.time() #else: #uniqueId = extractIdFromGalaxyFn(galaxyFn)[1] fullRunParams["uniqueId"] = uniqueId if bc.cleanedTrackNameIntensity is not None: fullRunParams['trackNameIntensity'] = '|'.join(tuple(bc.cleanedTrackNameIntensity)) analysisDefParams = [ '[' + key + '=' + value + ']' for key,value in bc.paramDict.items()] analysisDef = ''.join(analysisDefParams) + '->' + bc.statClassName from quick.application.GalaxyInterface import GalaxyInterface GalaxyInterface._tempAnalysisDefHacks(analysisDef) if printProgress: print 'Corresponding batch command line:<br>' + \ GalaxyInterface._revEngBatchLine(bc.trackName1, bc.trackName2, bc.trackNameIntensity, analysisDef, bc.regSpec, bc.binSpec, genome) + '<br><br>' results = AnalysisDefJob(analysisDef, bc.cleanedTrackName1, bc.cleanedTrackName2, bc.userBinSource, galaxyFn=galaxyFn, **fullRunParams).run(printProgress) presCollectionType = results.getPresCollectionType() if len(results.getResDictKeys()) > 0 and GalaxyInterface.APPEND_ASSEMBLY_GAPS and presCollectionType=='standard': if USE_PARALLEL: gapRes = AssemblyGapJob(bc.userBinSource, genome, uniqueId=uniqueId).run(printProgress) else: gapRes = AssemblyGapJob(bc.userBinSource, genome).run(printProgress) results.includeAdditionalResults(gapRes, ensureAnalysisConsistency=False) except Exception, e: #print 'NOWAG BExc' results = Results(bc.cleanedTrackName1, bc.cleanedTrackName2, bc.statClassName) results.addError(e) logException(e,message='Error in batch run') if DebugConfig.PASS_ON_BATCH_EXCEPTIONS: raise return results
def doAnalysisFromDefTwo(cls, tracks, analysisDef, regSpec, binSpec, genome, flag): userBinSource = UserBinSource(regSpec, binSpec, genome) job = AnalysisDefJob(analysisDef.getDefAfterChoices(), tracks[0].trackName, tracks[1].trackName, userBinSource, galaxyFn=None) result = job.run() unorderedResults = [(str(key), result[key]) for key in result.getAllRegionKeys()] return list(cls.sortDictionaries(unorderedResults, flag))
def computeDistance(cls, genome, track1, track2, feature, regSpec, binSpec): #direct distance between track1, track2 ''' track1 and track2 are two lists like : ['Sequence','Repeating elements','LINE'] feature specifies how the distance between track1 and track2 is defined ''' validFeature = DirectDistanceCatalog.getValidAnalyses(genome, track1, track2)[feature] analysisDef = validFeature[0] #'bla bla -> PropFreqOfTr1VsTr2Stat' #or any other statistic from the HB collection userBinSource = GalaxyInterface._getUserBinSource(regSpec,binSpec,genome) result = AnalysisDefJob(analysisDef, track1, track2, userBinSource).run() mainResultDict = result.getGlobalResult() return mainResultDict[validFeature[1]]
def build_feature_vector(genome, ctrack, feature, regSpec, binSpec): ''' this function create a feature vector for ctrack feature specifies how the vector is constructed ''' #print 'Feauter:', LocalResultsAsFeaturesCatalog.getValidAnalyses(genome, ctrack, []) validFeature = LocalResultsAsFeaturesCatalog.getValidAnalyses(genome, ctrack, [])[feature] analysisDef = validFeature[0] #regSpec = self.params.get("region") #binSpec = self.params.get("binsize") userBinSource = GalaxyInterface._getUserBinSource(regSpec,binSpec,genome) result = AnalysisDefJob(analysisDef, ctrack, [], userBinSource).run() return [result[localKey][validFeature[1]] for localKey in sorted(result.keys())]
def extract_feature(cls, genome, track, ref, option, regSpec, binSpec, trackFormat) : #print 'genome, track, ref, option, regSpec, binSpec, trackFormat: ', genome, track, ref, option, regSpec, binSpec, trackFormat validFeature = FeatureCatalog.getFeaturesFromTracks(genome,track,ref)[option] #validFeature contains analysisDef and the key to get the needed number from the global result if option == 'Prop. of tr1-points falling inside segments of tr2' and trackFormat in ['Segments', 'Valued segments'] : analysisDef = 'dummy [tf1=SegmentToMidPointFormatConverter] -> DerivedPointCountsVsSegsStat' else : analysisDef = validFeature[0] #or any other statistic from the HB collection userBinSource = GalaxyInterface._getUserBinSource(regSpec,binSpec,genome) result = AnalysisDefJob(analysisDef, ref, track, userBinSource).run() if option == 'Prop. of tr2 covered by tr1' else AnalysisDefJob(analysisDef, track, ref, userBinSource).run() validAnalysisDef = validFeature[1] assert result.getGlobalResult() is not None, 'Did not get any global result for analysisDef: '+validAnalysisDef return result.getGlobalResult()[validAnalysisDef]
def testCreateIntensityTrack(self): regions = [GenomeRegion(self._genome, self._chr, 1000, 5000),\ GenomeRegion(self._genome, self._chr, 6000, 7000),\ GenomeRegion(self._genome, self._chr, 10000, 16000)] job = AnalysisDefJob('[dataStat=SimpleBpIntensityStat] [outTrackName=' + '^'.join(self._trackName) + '] [numDiscreteVals=10] -> CreateFunctionTrackStat', \ ['nums'], ['points'], regions, genome=self._genome) for x in range(2): job.run() brShelve = BoundingRegionShelve(self._genome, self._trackName, allowOverlaps=False) self.assertRaises(OutsideBoundingRegionError, \ brShelve.getBoundingRegionInfo, GenomeRegion(self._genome, self._chr, 0, 1)) #self.assertEquals(BoundingRegionInfo(0, 1, 0, 0, 0, 0), # brShelve.getBoundingRegionInfo(GenomeRegion(self._genome, self._chr, 0, 1))) self.assertEquals( BoundingRegionInfo(1000, 5000, 0, 4000, 0, 0), brShelve.getBoundingRegionInfo( GenomeRegion(self._genome, self._chr, 2000, 2001))) self.assertRaises(OutsideBoundingRegionError, \ brShelve.getBoundingRegionInfo, GenomeRegion(self._genome, self._chr, 5500, 5501)) #self.assertEquals(BoundingRegionInfo(5500, 5501, 0, 0, 0, 0), # brShelve.getBoundingRegionInfo(GenomeRegion(self._genome, self._chr, 5500, 5501))) self.assertEquals( BoundingRegionInfo(6000, 7000, 4000, 5000, 0, 0), brShelve.getBoundingRegionInfo( GenomeRegion(self._genome, self._chr, 6500, 6501))) self.assertRaises(OutsideBoundingRegionError, \ brShelve.getBoundingRegionInfo, GenomeRegion(self._genome, self._chr, 8000, 8001)) #self.assertEquals(BoundingRegionInfo(8000, 8001, 0, 0, 0, 0), # brShelve.getBoundingRegionInfo(GenomeRegion(self._genome, self._chr, 8000, 8001))) self.assertEquals( BoundingRegionInfo(10000, 16000, 5000, 11000, 0, 0), brShelve.getBoundingRegionInfo( GenomeRegion(self._genome, self._chr, 11000, 11001))) self.assertRaises(OutsideBoundingRegionError, \ brShelve.getBoundingRegionInfo, GenomeRegion(self._genome, self._chr, 16500, 16501)) #self.assertEquals(BoundingRegionInfo(16500, 16501, 0, 0, 0, 0), # brShelve.getBoundingRegionInfo(GenomeRegion(self._genome, self._chr, 16500, 16501))) trackData = TrackSource().getTrackData(self._trackName, self._genome, None, False) self.assertListsOrDicts(['val'], trackData.keys()) self.assertListsOrDicts((11000, ), trackData['val'].shape)
def computeDistance(track1, track2, feature='direct distance'): ''' track1 and track2 are two lists like : ['Sequence','Repeating elements','LINE'] feature specifies how the distance between track1 and track2 is defined ''' analysisDef = 'bla bla -> PropFreqOfTr1VsTr2Stat' #or any other statistic from the HB collection regSpec = 'chr1' #could also be e.g. 'chr1' for the whole chromosome or '*' for the whole genome binSpec = '10m' #could also be e.g.'100', '1k' or '*' for whole regions/chromosomes as bins genome = 'hg18' # path /../../..../genome #allRepeats = GalaxyInterface.getSubTrackNames(genome,['Sequence','Repeating elements'],False) #all elements in 'Repeating elements' directory #GalaxyInterface.run(trackName1, trackName2, question, regSpec, binSpec, genome='hg18') userBinSource = GalaxyInterface._getUserBinSource(regSpec,binSpec,genome) result = AnalysisDefJob(analysisDef, track1, track2, userBinSource).run() #result er av klassen Results.. #from gold.result.Results import Results mainResultDict = result.getGlobalResult() #from PropFreqOfTr1VsTr2Stat:... #self._result = {'Track1Prop':ratio,'CountTrack1':c1, 'CountTrack2':c2,'Variance':variance} mainValueOfInterest = mainResultDict['Variance'] return mainValueOfInterest
def doAnalysis(analysisSpec, analysisBins, tracks): '''Performs an analysis, as specified by analysisSpec object, in each bin specified by analysisBins, on data sets specified in tracks. Typical usage: analysisSpec = AnalysisSpec(AvgSegLenStat) analysisSpec.addParameter("withOverlaps","no") analysisBins = GlobalBinSource('hg18') tracks = [ Track(['Genes and gene subsets','Genes','Refseq']) ] results = doAnalysis(analysisSpec, analysisBins, tracks) ''' # TODO: handle multiple tracks analysis # assert len(tracks) in [1,2] #for now.. # in an API setting, exceptions should not generally be hidden. # Maybe this should be optional. # setupDebugModeAndLogging() silenceRWarnings() silenceNumpyWarnings() if len(tracks) > 2: from gold.util.CommonConstants import MULTIPLE_EXTRA_TRACKS_SEPARATOR analysisSpec.addParameter( 'extraTracks', MULTIPLE_EXTRA_TRACKS_SEPARATOR.join([ '^'.join([quote(part) for part in x.trackName]) for x in tracks[2:] ])) job = AnalysisDefJob(analysisSpec.getDefAfterChoices(), tracks[0].trackName, tracks[1].trackName if len(tracks) > 1 else None, analysisBins, galaxyFn=None) res = job.run(printProgress=False) # printProgress should be optional? return res
def computeDistance(self, genome, track1, track2, feature): #direct distance between track1, track2 ''' track1 and track2 are two lists like : ['Sequence','Repeating elements','LINE'] feature specifies how the distance between track1 and track2 is defined ''' validFeature = DirectDistanceCatalog.getValidAnalyses(genome, track1, track2)[feature] analysisDef = validFeature[0] #'bla bla -> PropFreqOfTr1VsTr2Stat' #or any other statistic from the HB collection if self.params.get("compare_in") == "Chromosomes" : regSpec = "__chrs__" binSpec = self.params.get("Chromosomes") elif self.params.get("compare_in") == "Chromosome arms" : regSpec = "__chrArms__" binSpec = self.params.get("Chromosome_arms") elif self.params.get("compare_in") == "Cytobands" : regSpec = "__chrBands__" binSpec = self.params.get("Cytobands") else : regSpec = self.params.get("region") binSpec = self.params.get("binsize") #regSpec = 'chr1' #could also be e.g. 'chr1' for the whole chromosome or '*' for the whole genome #binSpec = '10m' #could also be e.g.'100', '1k' or '*' for whole regions/chromosomes as bins #genome = 'hg18' # path /../../..../genome #allRepeats = GalaxyInterface.getSubTrackNames(genome,['Sequence','Repeating elements'],False) #GalaxyInterface.run(trackName1, trackName2, question, regSpec, binSpec, genome='hg18') userBinSource = GalaxyInterface._getUserBinSource(regSpec,binSpec,genome) result = AnalysisDefJob(analysisDef, track1, track2, userBinSource).run() #result er av klassen Results.. #from gold.result.Results import Results mainResultDict = result.getGlobalResult() #from PropFreqOfTr1VsTr2Stat:... #self._result = {'Track1Prop':ratio,'CountTrack1':c1, 'CountTrack2':c2,'Variance':variance} #mainValueOfInterest = mainResultDict['Variance'] return mainResultDict[validFeature[1]]
def execute(choices, galaxyFn=None, username=''): '''Is called when execute-button is pushed by web-user. Should print output as HTML to standard out, which will be directed to a results page in Galaxy history. If needed, StaticFile can be used to get a path where additional files can be put (e.g. generated image files). choices is a list of selections made by web-user in each options box. ''' #print 'Executing...' #print choices #trackName1 = ['Sequence','Repeating elements','LINE'] #a list of subdirectories from 'genome' to the repeat file #trackName2 = ['Sequence','Repeating elements','SINE'] #allRepeats = GalaxyInterface.getSubTrackNames(genome,['Sequence','Repeating elements'],False) #all elements in 'Repeating elements' directory #analysisDef = 'bla bla -> PropFreqOfTr1VsTr2Stat' #or any other statistic from the HB collection #regSpec = 'chr1' #could also be e.g. 'chr1' for the whole chromosome or '*' for the whole genome #binSpec = '10m' #could also be e.g.'100', '1k' or '*' for whole regions/chromosomes as bins genome = 'hg18' # path /../../..../genome allRepeats = GalaxyInterface.getSubTrackNames(genome,['Sequence','Repeating elements'],False) #all elements in 'Repeating elements' directory #GalaxyInterface.run(trackName1, trackName2, question, regSpec, binSpec, genome='hg18') #userBinSource = GalaxyInterface._getUserBinSource(regSpec,binSpec,genome) #result = AnalysisDefJob(analysisDef, trackName1, trackName2, userBinSource).run() #result er av klassen Results.. #from gold.result.Results import Results #mainResultDict = result.getGlobalResult() #from PropFreqOfTr1VsTr2Stat:... #self._result = {'Track1Prop':ratio,'CountTrack1':c1, 'CountTrack2':c2,'Variance':variance} #mainValueOfInterest = mainResultDict['Variance'] #print 'first repeat', allRepeats[0] #print '\n all repeats', allRepeats #minValue = HiepsTool.computeDistance(trackName1,trackName2) #print minValue #choicedTracks = [['Sequence','Repeating elements',name] for name in choices] #print '\n choiced tracks', choicedTracks #d_matrix = HiepsTool.constructDistMatrix(choicedTracks) #tree = treecluster(distancematrix=d_matrix, method='s') #print tree #figure = StaticFile(['hiepln','dendro'],'jpg') #filepath = figure.getDiskPath() #print filepath #draw_dendrogram(tree,choices,filepath) #print figure.getLink('clustring result') track1 = ['Sequence','Repeating elements', 'DNA'] track2 = ['Gene regulation', 'TFBS', 'High Throughput'] analysisDef = 'bla bla -> DerivedOverlapStat' #or any other statistic from the HB collection regSpec = 'chr1' #could also be e.g. 'chr1' for the whole chromosome or '*' for the whole genome binSpec = '10m' #could also be e.g.'100', '1k' or '*' for whole regions/chromosomes as bins genome = 'hg18' # path /../../..../genome #allRepeats = GalaxyInterface.getSubTrackNames(genome,['Sequence','Repeating elements'],False) #all elements in 'Repeating elements' directory #GalaxyInterface.run(trackName1, trackName2, question, regSpec, binSpec, genome='hg18') userBinSource = GalaxyInterface._getUserBinSource(regSpec,binSpec,genome) result = AnalysisDefJob(analysisDef, track1, track2, userBinSource).run() #result er av klassen Results.. #from gold.result.Results import Results mainResultDict = result.getGlobalResult() #keys = result.getResDictKeys() #print keys #print mainResultDict['2in1'] print '<ol>' for key in mainResultDict.keys() : print '<li>key:%s,value:%s </li>'%(key,mainResultDict[key]) print '</ol>'
from gold.application.GalaxyInterface import GalaxyInterface from gold.application.StatRunner import AnalysisDefJob trackName1 = ['Sequence', 'Repeating elements', 'LINE'] trackName2 = ['Sequence', 'Repeating elements', 'SINE'] #GalaxyInterface.getSubTrackNames(['Sequence','Repeating elements'],False) analysisDef = 'bla bla -> PropFreqOfTr1VsTr2Stat' #or any other statistic from the HB collection regSpec = 'chr1' #could also be e.g. 'chr1' for the whole chromosome or '*' for the whole genome binSpec = '10m' #could also be e.g.'100', '1k' or '*' for whole regions/chromosomes as bins genome = 'hg18' #GalaxyInterface.run(trackName1, trackName2, question, regSpec, binSpec, genome='hg18') userBinSource = GalaxyInterface._getUserBinSource(regSpec, binSpec, genome) result = AnalysisDefJob(analysisDef, trackName1, trackName2, userBinSource).run() #result er av klassen Results.. #from gold.result.Results import Results mainResultDict = result.getGlobalResult() #from PropFreqOfTr1VsTr2Stat:... #self._result = {'Track1Prop':ratio,'CountTrack1':c1, 'CountTrack2':c2,'Variance':variance} mainValueOfInterest = mainResultDict['Variance'] print 'The ..variance..: ', mainValueOfInterest
def runJob(batchLine, genome, fullAccess, galaxyFn=None, printProgress=True): bc = BatchRunner.parseBatchLine(batchLine, genome, fullAccess) if bc.errorResult is not None: return bc.errorResult #Try a full run, and return either results or an exception try: #track = Track(trackName1) #track2 = Track(trackName2) #if 'tf1' in paramDict: # track.setFormatConverter(formatConverter) #results = StatRunner.run(userBinSource , Track(trackName1), Track(trackName2), \ # wrapClass(STAT_CLASS_DICT[statClassName], keywords=paramDict) ) #results = StatRunner.run(userBinSource , track, track2, \ # wrapClass(STAT_CLASS_DICT[statClassName], keywords=paramDict) ) fullRunParams = {} if USE_PARALLEL: # TODO: Requirements for parallel runs should not be added in places like these. Parallelization # should be a feature of the job runner somehow #if galaxyFn == None: #then this is a test uniqueId = time.time() #else: #uniqueId = extractIdFromGalaxyFn(galaxyFn)[1] fullRunParams["uniqueId"] = uniqueId from quick.application.GalaxyInterface import GalaxyInterface analysisDefParams = [ '[' + key + '=' + value + ']' for key,value in bc.paramDict.items()] analysisDef = ''.join(analysisDefParams) + '->' + bc.statClassName # TODO: Keeping the ugly accesses to private methods in GalaxyInterface for now. To be refactored. trackNames, analysisDef = GalaxyInterface._cleanUpAnalysisDef(bc.cleanedTrackNames, analysisDef) if printProgress: revEngBatchLine = RunDescription.getRevEngBatchLine( analysisDef, bc.trackNames, bc.cleanedTrackNames, bc.regSpec, bc.binSpec, genome ) print 'Corresponding batch command line:<br>{}<br><br>'.format(revEngBatchLine) results = AnalysisDefJob(analysisDef, bc.cleanedTrackNames[0], bc.cleanedTrackNames[1], bc.userBinSource, galaxyFn=galaxyFn, **fullRunParams).run(printProgress) presCollectionType = results.getPresCollectionType() if len(results.getResDictKeys()) > 0 and GalaxyInterface.APPEND_ASSEMBLY_GAPS and presCollectionType=='standard': if USE_PARALLEL: gapRes = AssemblyGapJob(bc.userBinSource, genome, uniqueId=uniqueId).run(printProgress) else: gapRes = AssemblyGapJob(bc.userBinSource, genome).run(printProgress) results.includeAdditionalResults(gapRes, ensureAnalysisConsistency=False) except Exception, e: #print 'NOWAG BExc' results = Results(bc.cleanedTrackNames[0], bc.cleanedTrackNames[1], bc.statClassName) results.addError(e) logException(e,message='Error in batch run') if DebugConfig.PASS_ON_BATCH_EXCEPTIONS: raise return results
from gold.application.GalaxyInterface import GalaxyInterface from gold.application.StatRunner import AnalysisDefJob trackName1 = ['Sequence','Repeating elements','LINE'] trackName2 = ['Sequence','Repeating elements','SINE'] #GalaxyInterface.getSubTrackNames(['Sequence','Repeating elements'],False) analysisDef = 'bla bla -> PropFreqOfTr1VsTr2Stat' #or any other statistic from the HB collection regSpec = 'chr1' #could also be e.g. 'chr1' for the whole chromosome or '*' for the whole genome binSpec = '10m' #could also be e.g.'100', '1k' or '*' for whole regions/chromosomes as bins genome = 'hg18' #GalaxyInterface.run(trackName1, trackName2, question, regSpec, binSpec, genome='hg18') userBinSource = GalaxyInterface._getUserBinSource(regSpec,binSpec,genome) result = AnalysisDefJob(analysisDef, trackName1, trackName2, userBinSource).run() #result er av klassen Results.. #from gold.result.Results import Results mainResultDict = result.getGlobalResult() #from PropFreqOfTr1VsTr2Stat:... #self._result = {'Track1Prop':ratio,'CountTrack1':c1, 'CountTrack2':c2,'Variance':variance} mainValueOfInterest = mainResultDict['Variance'] print 'The ..variance..: ', mainValueOfInterest