def decodeChoice(self, opts, id, choice): if opts == '__track__': tn = str(choice).split(':') GalaxyInterface.cleanUpTrackName(tn) choice = ':'.join(tn) else: choice = super(HyperBrowserControllerMixin, self).decodeChoice(opts, id, choice) return choice
def execute(self): self.stdoutToHistory() #print self.params #tracks = self.params['track1'].split(':') username = self.params['userEmail'] if self.params.has_key('userEmail') else '' track = self.params['track1'] if self.params.has_key('track1') else [] print 'GalaxyInterface.startPreProcessing', (self.genome, track, username) GalaxyInterface.startPreProcessing(self.genome, track, username)
def execute(self): self.stdoutToHistory() #print self.params #tracks = self.params['track1'].split(':') username = self.params['userEmail'] if self.params.has_key( 'userEmail') else '' track = self.params['track1'] if self.params.has_key('track1') else [] print 'GalaxyInterface.startPreProcessing', (self.genome, track, username) GalaxyInterface.startPreProcessing(self.genome, track, username)
def testGetStatOptions(self): if self.VERBOSE: DebugConfig.VERBOSE = True DebugConfig.PASS_ON_COMPUTE_EXCEPTIONS = True prevVal = DebugConfig.PASS_ON_VALIDSTAT_EXCEPTIONS DebugConfig.PASS_ON_VALIDSTAT_EXCEPTIONS= False self.assertTrue(len( GalaxyInterface.getStatOptions('TestGenome',['segsMany'], ['segs'], 'Hypothesis testing') ) > 0) self.assertTrue(len( GalaxyInterface.getStatOptions('TestGenome',['segsMany'], ['nums'], 'Hypothesis testing') ) > 0) DebugConfig.PASS_ON_VALIDSTAT_EXCEPTIONS= prevVal
def reinstallGenomes(genomeList, username, genomeListIsFn='True', verbose='False'): "genomeList username genomeListIsFn=True verbose=False" from quick.application.GalaxyInterface import GalaxyInterface from gold.util.CustomExceptions import PreprocessError from quick.extra.StandardizeTrackFiles import PlainMover from config.Config import DebugConfig genomeListIsFn = ast.literal_eval(genomeListIsFn) verbose = ast.literal_eval(verbose) prevPassOnPreprocessExceptions = DebugConfig.PASS_ON_PREPROCESS_EXCEPTIONS prevVerbose = DebugConfig.VERBOSE DebugConfig.PASS_ON_PREPROCESS_EXCEPTIONS = True DebugConfig.VERBOSE = verbose track_cats = ['Sequence:DNA', 'Genome build properties', 'Genes and gene subsets', 'Sample data'] if genomeListIsFn: genomes = [_.strip() for _ in open(genomeList)] else: genomes = genomeList.split(',') for genome in genomes: genome = genome.strip() print genome if GenomeInfo(genome).installed and GenomeInfo._queryShelveWithKey(genome, 'r', old=False) is not None: # Trigger installation double-check gi = GenomeInfo(genome) gi.installed = False gi.store() if gi.isInstalled(): print '\n**Genome "{}" was alread installed in the new shelve. Skipping!**\n'.format(genome) continue for track_cat in track_cats: try: GalaxyInterface.startPreProcessing(genome, track_cat.split(':'), username) except PreprocessError, e: traceback.print_exc() print match = re.search(r'trackName="([^"]+)"', str(e)) if match: trackName = match.group(1).split(':') PlainMover.parseFiles(genome, trackName, 'std_to_error') print '\n\n**Moved track "{}" to "parsingErrorTracks" file structure**\n'.format(':'.join(trackName)) genome_info = GenomeInfo(genome) installed = genome_info.flagAsInstalled(username) if installed: print '\n**Genome "{}" was reinstalled successfully!**\n'.format(genome) else: print '\n**Genome "{}" was not reinstalled!**\n'.format(genome)
def getTrackElement(self, id, label, history=False, ucsc=False, tracks=None): datasets = [] if history: try: datasets = self.galaxy.getHistory(GalaxyInterface.getSupportedGalaxyFileFormats()) except Exception, e: print e
def _assertExpandBedSegments(self, inContents, outContents, upFlank, downFlank, treatTrackAs, removeChrBorderCrossing, suffix): from tempfile import NamedTemporaryFile with NamedTemporaryFile(suffix=suffix) as inFile: inFile.write(inContents) inFile.flush() with NamedTemporaryFile(suffix=suffix) as outFile: GalaxyInterface.expandBedSegments(inFile.name, outFile.name, 'TestGenome', upFlank, downFlank, treatTrackAs, removeChrBorderCrossing, suffix) expandedContents = outFile.read() self.assertEquals(outContents, expandedContents)
def getInputValueForTrack(self, id, name): try: # assert False cachedTracks = self.getCacheData(id) track = self.getTrackElement(id, name, tracks=cachedTracks) except: print 'track cache is empty' track = self.getTrackElement(id, name) self.putCacheData(id, track.tracks) self.trackElements[id] = track tn = track.definition(False) GalaxyInterface.cleanUpTrackName(tn) val = ':'.join(tn) # val = track.asString() return val
def getTrackElement(self, id, label, history=False, ucsc=False): datasets = [] if history: try: datasets = self.galaxy.getHistory(GalaxyInterface.getSupportedGalaxyFileFormats()) except Exception, e: print e
def makeLowercaseName2NameShelfFromTnSubTypes(genome, trackName, shelfFn): 'genome trackName shelfFn' trackName = re.split('/|:', trackName) from gold.application.GalaxyInterface import GalaxyInterface analysisDef = "-> ListOfPresentCategoriesStat" results = GalaxyInterface.runManual([trackName, None], analysisDef, '*', '*', genome, printResults=False, printHtmlWarningMsgs=False) categories = results.getGlobalResult()['Result'] shelf = safeshelve.open(shelfFn) for cat in categories: shelf[cat.lower()] = cat ##basePath = createDirPath(trackName, genome) #basePath = gcf.createOrigPath(genome, trackName) #shelf = safeshelve.open(shelfFn) # #for fn in os.listdir(basePath): # if os.path.isdir(os.sep.join([basePath, fn])) and not any([fn.startswith(x) for x in ['_','.'] + GenomeInfo.getExtendedChrList(genome)]): # shelf[fn.lower()] = fn shelf.close()
def updateAllTrackInfoToVersion15(): from quick.application.GalaxyInterface import GalaxyInterface from gold.origdata.PreProcessUtils import PreProcessUtils for genomeTuple in GalaxyInterface.getAllGenomes(): genome = genomeTuple[1] updateGenomeTrackInfoToVersion15(genome)
def _assertRunEqual(self, target, *args, **kwArgs): if self.VERBOSE: DebugConfig.PASS_ON_COMPUTE_EXCEPTIONS = True print '\n***\n' + str(self.id()) + '\n***' args = list(args) analysisDef = [x.strip() for x in args[2].split('->')] if len(analysisDef) == 1: analysisDef.append(analysisDef[0]) analysisDef[0] += ' [randomSeed:=0]' args[2] = analysisDef[0] + " -> " + analysisDef[1] for diskMemo in [False, True]: gold.statistic.ResultsMemoizer.LOAD_DISK_MEMOIZATION = diskMemo if self._usesProfiling(): gold.application.StatRunner.USE_PROFILING = True res = GalaxyInterface.run(*args, **{'genome':'TestGenome'}) self._assertEqualResults(target, res) if kwArgs.get('globalTarget') != None: self._assertEqualGlobalResults(kwArgs['globalTarget'], res) if self._usesProfiling(): self._storeProfile(diskMemo)
def extract_feature(self, genome, track, ref, option) : ''' this function return the relation of clusterTrack to referenceTrack option is the statistical function used, should be named feature track, ref is clusterTrack and referenceTrack ''' validFeature = FeatureCatalog.getFeaturesFromTracks(genome,track,ref)[option] #validFeature contains analysisDef and the key to get the needed number from the global result if option == 'Prop. of tr1-points falling inside segments of tr2' and cls.getTrackFormat(genome, track) in ['Segments', 'Valued segments'] : analysisDef = 'dummy [tf1=SegmentToMidPointFormatConverter] -> DerivedPointCountsVsSegsStat' else : analysisDef = validFeature[0] #or any other statistic from the HB collection if self.params.get("compare_in") == "Chromosomes" : regSpec = "__chrs__" binSpec = self.params.get("Chromosomes") elif self.params.get("compare_in") == "Chromosome arms" : regSpec = "__chrArms__" binSpec = self.params.get("Chromosome_arms") elif self.params.get("compare_in") == "Cytobands" : regSpec = "__chrBands__" binSpec = self.params.get("Cytobands") else : regSpec = self.params.get("region") binSpec = self.params.get("binsize") #regSpec = self.params.get("region") #binSpec = self.params.get("binsize") #regSpec = 'chr1' #could also be e.g. 'chr1' for the whole chromosome or '*' for the whole genome #binSpec = '10m' #could also be e.g.'100', '1k' or '*' for whole regions/chromosomes as bins #genome = 'hg18' userBinSource = GalaxyInterface._getUserBinSource(regSpec,binSpec,genome) if option == 'Prop. of tr2 covered by tr1' : #because the confuse of refTrack and clusterTrack in this statistics result = AnalysisDefJob(analysisDef, ref, track, userBinSource).run() else : result = AnalysisDefJob(analysisDef, track, ref, userBinSource).run() mainResultDict = result.getGlobalResult() return mainResultDict[validFeature[1]]
def execute(cls, choices, galaxyFn=None, username=''): '''Is called when execute-button is pushed by web-user. Should print output as HTML to standard out, which will be directed to a results page in Galaxy history. If needed, StaticFile can be used to get a path where additional files can be put (e.g. generated image files). choices is a list of selections made by web-user in each options box. ''' genome = choices[0] genes = choices[1] genelist = [] for geneLine in genes.split('\n'): for gene in geneLine.split(','): gene = gene.strip() if gene != '': genelist.append(gene) GalaxyInterface.getEnsemblGenes(genome, genelist, galaxyFn)
def createSplittedChrArms(genome, binSize, outFn): """genome binsize outFn.bed""" outFile = open(outFn, 'w') from quick.application.GalaxyInterface import GalaxyInterface chrArms = GalaxyInterface._getUserBinSource('__chrArms__', '*', genome) chrArmBins = AutoBinner(chrArms, int(binSize)) for bin in chrArmBins: outFile.write('\t'.join([bin.chr, str(bin.start), str(bin.end)]) + os.linesep) outFile.close()
def getValidAnalyses(cls, genome, trackName1, trackName2): trackNames = [trackName1, trackName2] allFeatures = cls.getAllFeatures() validFeatures = {} trackNames = GalaxyInterface._cleanUpTracks(trackNames, genome, realPreProc=False) #trackName1 = trackName1[1:] #FixMe, temp, Boris.. #genome='hg19' #print 'TEMP3: ', (genome, trackName1, trackName2) if not GalaxyInterface.areTrackNamesValid(genome, trackNames): return {} #print 'TEMP2: ', allFeatures for key in allFeatures: analysisDef = allFeatures[key][0] #print AnalysisManager._tryAnalysisDefForValidity(analysisDef, genome, trackName1, trackName2, tryReversed=False) if AnalysisManager._tryAnalysisDefForValidity(analysisDef, genome, trackName1, trackName2, tryReversed=False)[0] is not None: #maybe also try reversed.. validFeatures[key] = allFeatures[key] #print "Valid: %s (for trackNames: %s and %s)" % (validFeatures, trackName1, trackName2) return validFeatures
def handleRegionClustering(self, genome, tracks, clusterMethod, extra_option): region_cluster_track = self.getHistoryTrackDef('track1') print region_cluster_track region_ref_track = self.params.get('reftrack1') if region_cluster_track[0] == 'galaxy' : file_type = region_cluster_track[1] track_path = region_cluster_track[2] userBinSource = GalaxyInterface._getUserBinSource('bed', track_path, genome) validFeature = SplittedRegionsAsFeaturesCatalog.getValidAnalyses(genome,region_ref_track,[]) analysisDef = validFeature[0] result = AnalysisDefJob(analysisDef, region_ref_track, [], userBinSource).run() print [result[localKey][validFeature[1]] for localKey in sorted(result.keys())]
def computeDistance(cls, genome, track1, track2, feature, regSpec, binSpec): #direct distance between track1, track2 ''' track1 and track2 are two lists like : ['Sequence','Repeating elements','LINE'] feature specifies how the distance between track1 and track2 is defined ''' validFeature = DirectDistanceCatalog.getValidAnalyses(genome, track1, track2)[feature] analysisDef = validFeature[0] #'bla bla -> PropFreqOfTr1VsTr2Stat' #or any other statistic from the HB collection userBinSource = GalaxyInterface._getUserBinSource(regSpec,binSpec,genome) result = AnalysisDefJob(analysisDef, track1, track2, userBinSource).run() mainResultDict = result.getGlobalResult() return mainResultDict[validFeature[1]]
def testRunValid(self): self.assertEqual(True, GalaxyInterface.runValid(["segsLen1"],["segs"],'RawOverlapStat','chr21:1-4001','2000','TestGenome')) self.assertNotEqual(True, GalaxyInterface.runValid(["segsLen1"],["segs"],'','TestGenome:chr21:1-4001','2000','TestGenome')) self.assertNotEqual(True, GalaxyInterface.runValid(["segsLen1"],["segs"],'RawOverlapStat','1-4001','2000','TestGenome')) self.assertNotEqual(True, GalaxyInterface.runValid(["segsLen1"],["segs"],'RawOverlapStat','TestGenome:chr21:1-4001','default','TestGenome')) self.assertNotEqual(True, GalaxyInterface.runValid(["segs"],["segs"],'','TestGenome:chr21:1-4001','default','TestGenome')) self.assertNotEqual(True, GalaxyInterface.runValid(["segs"],["segs"],'','TestGenome:chr21:1-4001','default','hg18'))
def build_feature_vector(genome, ctrack, feature, regSpec, binSpec): ''' this function create a feature vector for ctrack feature specifies how the vector is constructed ''' #print 'Feauter:', LocalResultsAsFeaturesCatalog.getValidAnalyses(genome, ctrack, []) validFeature = LocalResultsAsFeaturesCatalog.getValidAnalyses(genome, ctrack, [])[feature] analysisDef = validFeature[0] #regSpec = self.params.get("region") #binSpec = self.params.get("binsize") userBinSource = GalaxyInterface._getUserBinSource(regSpec,binSpec,genome) result = AnalysisDefJob(analysisDef, ctrack, [], userBinSource).run() return [result[localKey][validFeature[1]] for localKey in sorted(result.keys())]
def extract_feature(cls, genome, track, ref, option, regSpec, binSpec, trackFormat) : #print 'genome, track, ref, option, regSpec, binSpec, trackFormat: ', genome, track, ref, option, regSpec, binSpec, trackFormat validFeature = FeatureCatalog.getFeaturesFromTracks(genome,track,ref)[option] #validFeature contains analysisDef and the key to get the needed number from the global result if option == 'Prop. of tr1-points falling inside segments of tr2' and trackFormat in ['Segments', 'Valued segments'] : analysisDef = 'dummy [tf1=SegmentToMidPointFormatConverter] -> DerivedPointCountsVsSegsStat' else : analysisDef = validFeature[0] #or any other statistic from the HB collection userBinSource = GalaxyInterface._getUserBinSource(regSpec,binSpec,genome) result = AnalysisDefJob(analysisDef, ref, track, userBinSource).run() if option == 'Prop. of tr2 covered by tr1' else AnalysisDefJob(analysisDef, track, ref, userBinSource).run() validAnalysisDef = validFeature[1] assert result.getGlobalResult() is not None, 'Did not get any global result for analysisDef: '+validAnalysisDef return result.getGlobalResult()[validAnalysisDef]
def _assertRunEqual(self, target, *args, **kwArgs): if DebugConfig.VERBOSE: print '\n***\n' + str(self.id()) + '\n***' args = list(args) analysisDef = [x.strip() for x in args[2].split('->')] if len(analysisDef) == 1: analysisDef.append(analysisDef[0]) analysisDef[0] += ' [randomSeed:=0:]' args[2] = analysisDef[0] + " -> " + analysisDef[1] for runType in self._runTypeGenerator(): # if self._usesProfiling(): # DebugConfig.USE_PROFILING = True res = GalaxyInterface.run(*args, **{'genome': 'TestGenome'}) self._assertEqualResults(target, res) if kwArgs.get('globalTarget') is not None: self._assertEqualGlobalResults(kwArgs['globalTarget'], res)
def computeDistance(track1, track2, feature='direct distance'): ''' track1 and track2 are two lists like : ['Sequence','Repeating elements','LINE'] feature specifies how the distance between track1 and track2 is defined ''' analysisDef = 'bla bla -> PropFreqOfTr1VsTr2Stat' #or any other statistic from the HB collection regSpec = 'chr1' #could also be e.g. 'chr1' for the whole chromosome or '*' for the whole genome binSpec = '10m' #could also be e.g.'100', '1k' or '*' for whole regions/chromosomes as bins genome = 'hg18' # path /../../..../genome #allRepeats = GalaxyInterface.getSubTrackNames(genome,['Sequence','Repeating elements'],False) #all elements in 'Repeating elements' directory #GalaxyInterface.run(trackName1, trackName2, question, regSpec, binSpec, genome='hg18') userBinSource = GalaxyInterface._getUserBinSource(regSpec,binSpec,genome) result = AnalysisDefJob(analysisDef, track1, track2, userBinSource).run() #result er av klassen Results.. #from gold.result.Results import Results mainResultDict = result.getGlobalResult() #from PropFreqOfTr1VsTr2Stat:... #self._result = {'Track1Prop':ratio,'CountTrack1':c1, 'CountTrack2':c2,'Variance':variance} mainValueOfInterest = mainResultDict['Variance'] return mainValueOfInterest
def computeDistance(self, genome, track1, track2, feature): #direct distance between track1, track2 ''' track1 and track2 are two lists like : ['Sequence','Repeating elements','LINE'] feature specifies how the distance between track1 and track2 is defined ''' validFeature = DirectDistanceCatalog.getValidAnalyses(genome, track1, track2)[feature] analysisDef = validFeature[0] #'bla bla -> PropFreqOfTr1VsTr2Stat' #or any other statistic from the HB collection if self.params.get("compare_in") == "Chromosomes" : regSpec = "__chrs__" binSpec = self.params.get("Chromosomes") elif self.params.get("compare_in") == "Chromosome arms" : regSpec = "__chrArms__" binSpec = self.params.get("Chromosome_arms") elif self.params.get("compare_in") == "Cytobands" : regSpec = "__chrBands__" binSpec = self.params.get("Cytobands") else : regSpec = self.params.get("region") binSpec = self.params.get("binsize") #regSpec = 'chr1' #could also be e.g. 'chr1' for the whole chromosome or '*' for the whole genome #binSpec = '10m' #could also be e.g.'100', '1k' or '*' for whole regions/chromosomes as bins #genome = 'hg18' # path /../../..../genome #allRepeats = GalaxyInterface.getSubTrackNames(genome,['Sequence','Repeating elements'],False) #GalaxyInterface.run(trackName1, trackName2, question, regSpec, binSpec, genome='hg18') userBinSource = GalaxyInterface._getUserBinSource(regSpec,binSpec,genome) result = AnalysisDefJob(analysisDef, track1, track2, userBinSource).run() #result er av klassen Results.. #from gold.result.Results import Results mainResultDict = result.getGlobalResult() #from PropFreqOfTr1VsTr2Stat:... #self._result = {'Track1Prop':ratio,'CountTrack1':c1, 'CountTrack2':c2,'Variance':variance} #mainValueOfInterest = mainResultDict['Variance'] return mainResultDict[validFeature[1]]
def _assertBatchEqual(self, target, *args): for diskMemo in [False, True]: gold.statistic.ResultsMemoizer.LOAD_DISK_MEMOIZATION = diskMemo batchRes = GalaxyInterface.runBatchLines(*args) for i in range(len(batchRes)): self._assertEqualResults(target[i], batchRes[i])
def testCountStat(self): #self._assertRunEqual([[('Result', 119121)], [('Result', 0)]],\ GalaxyInterface.run(["segsMany"], ["segs"], 'CountStat', 'TestGenome:chr21:10000000-10004000', '2000')
def _assertBatchEqual(self, target, *args): for runType in self._runTypeGenerator(): batchRes = GalaxyInterface.runBatchLines(*args) for i in range(len(batchRes)): self._assertEqualResults(target[i], batchRes[i])
# Copyright (C) 2009, Geir Kjetil Sandve, Sveinung Gundersen and Morten Johansen # This file is part of The Genomic HyperBrowser. # # The Genomic HyperBrowser is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # The Genomic HyperBrowser is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with The Genomic HyperBrowser. If not, see <http://www.gnu.org/licenses/>. from gold.application.GalaxyInterface import GalaxyInterface #first you need to create a statistic class. For this you will use a StatisticTemplate, and just add a few custom lines of code.. #In our example we will make a simple statistic that computes the number of bps covered by all segments of a single track (in a bin). #1: open the file StatisticTemplate.py #2: save this file with your own name in the folder "quick/statistic". Lets call it "BpCoverageStat.py". (By putting your new class here, it will automatically be loaded into the Hyperbrowser system.) #3: rename the classes according to your name for the statistic - the same as used for the file. The file should then have a class "BpCoverageStat" and a class "BpCoverageStatUnsplittable". (The system will automatically use the unsplittable class, and would in certain cases also automatically have used a corresponding splittable class if it had been defined..) #4: define what input your statistic will need. In our case we will simply need the raw track data. Add the following line in the method "_createChildren": #self._addChild( RawDataStat(self._region, self._track, TrackFormatReq()) ) #5: define how to compute the result. Add the following line under "_compute": #return sum( el.end() - el.start() for el in self._children[0].getResult()) #6: Now you can make a simple call that computes full genome-wide results based on your statistics code: GalaxyInterface.run(['genes','refseq'], ['repeats','LINE'], 'My new statistic -> BpCoverageStat', '*', '*', genome='hg18') #7: You can also very simply make your new statistic available on the web if you have a web system running against the Hyperbrowser. Simply add the following line somewhere inside the string variable "QUESTION_SPEC_STR" in the file "gold/description/AnalysisList.py": #'My new statistic -> BpCoverageStat'
from gold.application.GalaxyInterface import GalaxyInterface #GalaxyInterface.run(['HCNE','density_mm8_90pc_50col'], ['genes','refseq'], '[altHyp:=ha1:]a -> PointPositioningPValStat','chr1','10m') print GalaxyInterface.getTrackInfoRecord('hg18', ['Regulation', 'CpG islands']) GalaxyInterface.setTrackInfoRecord('hg18', ['Regulation', 'CpG islands'], {'description': 'Test'}, False) print GalaxyInterface.getTrackInfoRecord('hg18', ['Regulation', 'CpG islands']) GalaxyInterface.setTrackInfoRecord('hg18', ['Regulation', 'CpG islands'], {'private': True}, False) print GalaxyInterface.getTrackInfoRecord('hg18', ['Regulation', 'CpG islands']) GalaxyInterface.setTrackInfoRecord('hg18', ['Regulation', 'CpG islands'], {'description': ''}, False) GalaxyInterface.setTrackInfoRecord('hg18', ['Regulation', 'CpG islands'], {'private': False}, False) print GalaxyInterface.getTrackInfoRecord('hg18', ['Regulation', 'CpG islands'])
def execute(choices, galaxyFn=None, username=''): '''Is called when execute-button is pushed by web-user. Should print output as HTML to standard out, which will be directed to a results page in Galaxy history. If needed, StaticFile can be used to get a path where additional files can be put (e.g. generated image files). choices is a list of selections made by web-user in each options box. ''' #print 'Executing...' #print choices #trackName1 = ['Sequence','Repeating elements','LINE'] #a list of subdirectories from 'genome' to the repeat file #trackName2 = ['Sequence','Repeating elements','SINE'] #allRepeats = GalaxyInterface.getSubTrackNames(genome,['Sequence','Repeating elements'],False) #all elements in 'Repeating elements' directory #analysisDef = 'bla bla -> PropFreqOfTr1VsTr2Stat' #or any other statistic from the HB collection #regSpec = 'chr1' #could also be e.g. 'chr1' for the whole chromosome or '*' for the whole genome #binSpec = '10m' #could also be e.g.'100', '1k' or '*' for whole regions/chromosomes as bins genome = 'hg18' # path /../../..../genome allRepeats = GalaxyInterface.getSubTrackNames(genome,['Sequence','Repeating elements'],False) #all elements in 'Repeating elements' directory #GalaxyInterface.run(trackName1, trackName2, question, regSpec, binSpec, genome='hg18') #userBinSource = GalaxyInterface._getUserBinSource(regSpec,binSpec,genome) #result = AnalysisDefJob(analysisDef, trackName1, trackName2, userBinSource).run() #result er av klassen Results.. #from gold.result.Results import Results #mainResultDict = result.getGlobalResult() #from PropFreqOfTr1VsTr2Stat:... #self._result = {'Track1Prop':ratio,'CountTrack1':c1, 'CountTrack2':c2,'Variance':variance} #mainValueOfInterest = mainResultDict['Variance'] #print 'first repeat', allRepeats[0] #print '\n all repeats', allRepeats #minValue = HiepsTool.computeDistance(trackName1,trackName2) #print minValue #choicedTracks = [['Sequence','Repeating elements',name] for name in choices] #print '\n choiced tracks', choicedTracks #d_matrix = HiepsTool.constructDistMatrix(choicedTracks) #tree = treecluster(distancematrix=d_matrix, method='s') #print tree #figure = StaticFile(['hiepln','dendro'],'jpg') #filepath = figure.getDiskPath() #print filepath #draw_dendrogram(tree,choices,filepath) #print figure.getLink('clustring result') track1 = ['Sequence','Repeating elements', 'DNA'] track2 = ['Gene regulation', 'TFBS', 'High Throughput'] analysisDef = 'bla bla -> DerivedOverlapStat' #or any other statistic from the HB collection regSpec = 'chr1' #could also be e.g. 'chr1' for the whole chromosome or '*' for the whole genome binSpec = '10m' #could also be e.g.'100', '1k' or '*' for whole regions/chromosomes as bins genome = 'hg18' # path /../../..../genome #allRepeats = GalaxyInterface.getSubTrackNames(genome,['Sequence','Repeating elements'],False) #all elements in 'Repeating elements' directory #GalaxyInterface.run(trackName1, trackName2, question, regSpec, binSpec, genome='hg18') userBinSource = GalaxyInterface._getUserBinSource(regSpec,binSpec,genome) result = AnalysisDefJob(analysisDef, track1, track2, userBinSource).run() #result er av klassen Results.. #from gold.result.Results import Results mainResultDict = result.getGlobalResult() #keys = result.getResDictKeys() #print keys #print mainResultDict['2in1'] print '<ol>' for key in mainResultDict.keys() : print '<li>key:%s,value:%s </li>'%(key,mainResultDict[key]) print '</ol>'
from gold.application.GalaxyInterface import GalaxyInterface from gold.application.StatRunner import AnalysisDefJob trackName1 = ['Sequence', 'Repeating elements', 'LINE'] trackName2 = ['Sequence', 'Repeating elements', 'SINE'] #GalaxyInterface.getSubTrackNames(['Sequence','Repeating elements'],False) analysisDef = 'bla bla -> PropFreqOfTr1VsTr2Stat' #or any other statistic from the HB collection regSpec = 'chr1' #could also be e.g. 'chr1' for the whole chromosome or '*' for the whole genome binSpec = '10m' #could also be e.g.'100', '1k' or '*' for whole regions/chromosomes as bins genome = 'hg18' #GalaxyInterface.run(trackName1, trackName2, question, regSpec, binSpec, genome='hg18') userBinSource = GalaxyInterface._getUserBinSource(regSpec, binSpec, genome) result = AnalysisDefJob(analysisDef, trackName1, trackName2, userBinSource).run() #result er av klassen Results.. #from gold.result.Results import Results mainResultDict = result.getGlobalResult() #from PropFreqOfTr1VsTr2Stat:... #self._result = {'Track1Prop':ratio,'CountTrack1':c1, 'CountTrack2':c2,'Variance':variance} mainValueOfInterest = mainResultDict['Variance'] print 'The ..variance..: ', mainValueOfInterest
def __init__(self, trans, job): BaseToolController.__init__(self, trans, job) self.genomes = GalaxyInterface.getAllGenomes(self.galaxy.getUserName() \ if hasattr(self, 'galaxy') else '') self.genome = self.params.get('dbkey', self.genomes[0][1])
def testGetRunDescription(self): prevVal = DebugConfig.PASS_ON_VALIDSTAT_EXCEPTIONS DebugConfig.PASS_ON_VALIDSTAT_EXCEPTIONS = False analysisDef = 'Different frequency inside segments?:Are track1-points occurring [tail:Alternative hypothesis=different:with different frequency/more:more frequently/less:less frequently] inside track2-segment than outside? [rawStatistic:=PointCountInsideSegsStat:] [assumptions:_Assumptions=poissonPoints:Poisson-distributed points/_PermutedSegsAndSampledIntersegsTrack:Permuted segments, sampled spaces (MC)/_PermutedSegsAndIntersegsTrack:Permuted segments, permuted spaces (MC)/_RandomGenomeLocationTrack:Segments fetched from random genome location (MC)] [numResamplings:_Resamplings=20/200/2000] -> PointCountInSegsPvalStat, RandomizationManagerStat' GalaxyInterface.getRunDescription(['segsMany'], ['segs'], analysisDef, 'chr21:2-4001','2000', 'TestGenome') DebugConfig.PASS_ON_VALIDSTAT_EXCEPTIONS = prevVal
def executeSelfFeature(cls, genome, tracks, track_names, clusterMethod, extra_option, feature, distanceType, kmeans_alg, galaxyFn, regSpec, binSpec): from gold.application.RSetup import r #regSpec, binSpec = 'bed', '/usit/invitro/data/galaxy/galaxy-dist-hg-dev/./database/files/017/dataset_17084.dat' jobFile = open(galaxyFn, 'w') print>>jobFile, 'PARAMS: ', dict(zip('genome, tracks, track_names, clusterMethod, extra_option, feature, distanceType, kmeans_alg, regSpec, binSpec'.split(','), [repr(v)+'<br>'for v in [genome, tracks, track_names, clusterMethod, extra_option, feature, distanceType, kmeans_alg,regSpec, binSpec]])) print>>jobFile, '<br><br>To run:<br>$clusterBySelfFeature', (genome, '$'.join([':'.join(t) for t in tracks]), ':'.join(track_names) , clusterMethod, extra_option, feature, distanceType, kmeans_alg, regSpec, binSpec), '<br><br>' print>>jobFile, 'signature of method clusterBySelfFeature:<br>', 'clusterBySelfFeature(genome, tracksStr, track_namesStr, clusterMethod, extra_option, feature, distanceType, kmeans_alg, regSpec, binSpec):<br><br><br>' prettyTrackNames = [v[-1].replace('RoadMap_','').replace('.H3K4me1','') for v in tracks] #prettyTrackNames = [prettyPrintTrackName(v, shortVersion=True) for v in tracks] f_matrix = cls.construct_feature_matrix(genome, tracks, feature, regSpec, binSpec) print>>jobFile, 'dir f_matrix: ', dir(f_matrix), regSpec, binSpec userBinSource = GalaxyInterface._getUserBinSource(regSpec,binSpec,genome) r.assign('bin_names',[str(bin) for binIndex, bin in enumerate(sorted(list(userBinSource)))]) r.assign('track_names',prettyTrackNames) #use as track names, will be shown in clustering figure r.assign('f_matrix',f_matrix) r.assign('distanceType',distanceType) r('row.names(f_matrix) <- track_names') r('colnames(f_matrix) <- bin_names') if clusterMethod == 'Hierarchical clustering' and extra_option != "--select--" : #print 'galaxyFn: ', galaxyFn figure = GalaxyRunSpecificFile(['cluster_tracks_result_figure.pdf'], galaxyFn) figurepath = figure.getDiskPath(True) r.pdf(figurepath) r('d <- dist(f_matrix, method=distanceType)') r_f_matrixFile = GalaxyRunSpecificFile(['f-matrix.robj'], galaxyFn) #', '.join([str(v) for v in row]) r.assign('f_matrix_fn', r_f_matrixFile.getDiskPath(True)) r('dput(f_matrix, f_matrix_fn)') #r_f_matrixFile.writeTextToFile(', '.join(cls.getFlattenedMatrix(f_matrix)) + '\n\nTrack names: '+', '.join(prettyTrackNames)+'\n\nNumber of tracks: '+str(len(prettyTrackNames))+'\n\nbins: +) #r_f_matrixFile.writeTextToFile() #r_f_matrixFile.writeTextToFile(str(f_matrix)+'\n\n'+str(r.d)) print>>jobFile, r_f_matrixFile.getLink('feature_matrix') r.assign('extra_option',extra_option) r('hr <- hclust(d, method=extra_option, members=NULL)') r('plot(hr, ylab="Distance", hang=-1)') r('dev.off()') print>>jobFile, figure.getLink('clustering results figure<br>') heatmap = GalaxyRunSpecificFile(['heatmap_figure.pdf'], galaxyFn) heatmap_path = heatmap.getDiskPath(True) r.pdf(heatmap_path) r('heatmap(f_matrix, col=cm.colors(256), distfun=function(c) dist(c, method=distanceType), hclustfun=function(c) hclust(c, method=extra_option, members=NULL),Colv=NA, scale="none", xlab="", ylab="", cexRow=0.5, cexCol=0.5, margin=c(8,10))')#Features cluster tracks r('dev.off()') print>>jobFile, r('dimnames(f_matrix)') print>>jobFile, heatmap.getLink('heatmap figure <br>') elif clusterMethod == 'K-means clustering' and extra_option != "--select--" and kmeans_alg != "--select--": textFile = GalaxyRunSpecificFile(['result_of_kmeans_clustering.txt'], galaxyFn) textFilePath = textFile.getDiskPath(True) extra_option = int(extra_option) r.assign('kmeans_alg',kmeans_alg) r.assign('extra_option',extra_option) r('hr <- kmeans(f_matrix,extra_option,algorithm=kmeans_alg)') #the number of cluster is gotten from clusterMethod+ tag, instead of 3 used here kmeans_output = open(textFilePath,'w') clusterSizes = r('hr$size') #size of every cluster withinSS = r('hr$withinss') clusters = r('hr$cluster') for index1 in range(extra_option) : #extra_option actually the number of clusters #trackInCluster = [k for k,val in clusters.items() if val == index1] trackInCluster = [k+1 for k,val in enumerate(clusters) if val == index1+1] #IS THIS CORRECT, I.E. SAME AS ABOVE?? print>>kmeans_output, 'Cluster %i(%s objects) : ' % (index1+1, str(clusterSizes[index1])) for name in trackInCluster : print>>kmeans_output, name, '(This result may be a bit shaky afters some changes in rpy access)' print>>kmeans_output, 'Sum of square error for this cluster is : '+str(withinSS[index1])+'\n' kmeans_output.close() print>>jobFile, textFile.getLink('Detailed result of kmeans clustering <br>') cls.print_data(f_matrix, jobFile) '''
def executeReferenceTrack(cls, genome, tracks, track_names, clusterMethod, extra_option, distanceType, kmeans_alg, galaxyFn, regSpec, binSpec, numreferencetracks=None, refTracks=None, refFeatures=None, yesNo=None, howMany=None, upFlank=None, downFlank=None): from gold.application.RSetup import r jobFile = open(galaxyFn, 'w') print>>jobFile, 'PARAMS: ', dict(zip('genome, tracks, track_names, clusterMethod, extra_option, distanceType, kmeans_alg, regSpec, binSpec'.split(','), [repr(v)+'<br>'for v in [genome, tracks, track_names, clusterMethod, extra_option, distanceType, kmeans_alg, regSpec, binSpec]])) print>>jobFile, '<br><br>To run:<br>', '$clusterByReference', (genome, '$'.join([':'.join(t) for t in tracks]), ':'.join(track_names) , clusterMethod, extra_option, distanceType, kmeans_alg, regSpec, binSpec,numreferencetracks, refTracks, refFeatures, yesNo, howMany, upFlank, downFlank), '<br><br>' print>>jobFile, 'signature of method clusterByReference:<br>', 'clusterByReference(genome, tracksStr, track_namesStr, clusterMethod, extra_option, distanceType, kmeans_alg, regSpec, binSpec, numreferencetracks=None, refTracks=None, refFeatures=None, yesNo=None, howMany=None, upFlank=None, downFlank=None)<br><br><br>' prettyTrackNames = [v[-1].replace("RoadMap_","").replace('.H3K4me1','') for v in tracks] #prettyTrackNames = [prettyPrintTrackName(v) for v in tracks] #paramNames = ['numreferencetracks', 'refTracks', 'refFeatures', 'yesNo', 'howMany', 'upFlank', 'downFlank'] #for index, value in enumerate([numreferencetracks, refTracks, refFeatures, yesNo, howMany, upFlank, downFlank]): # if value != None: # print paramNames[index]+'='+ str(value), #print '' reftrack_names = [] #for use in creating the heatmap (as the column names) options = [] #for the case using refTracks, options contains feature for every refTrack, chosen by user. if numreferencetracks : for i in range(int(numreferencetracks)): ref_i = refTracks[i].split(":") #name of refTrack is being used to construct the name of expanded refTrack #refTracks.append(ref_i) #put the refTrack into refTracks list reftrack_names.append(ref_i[-1]) temp_opt1 = 'ref'+str(i)+'feature' options+= [] if refFeatures[i] == None else [refFeatures[i]] if yesNo[i] == "Yes" and howMany[i] != '--select--': for expan in range(int(howMany[i])) : reftrack_names.append(ref_i[-1]+'_'+ upFlank[i][expan]) upFlank = int(upFlank[i][expan]) downFlank = int(downFlank[i][expan]) withinRunId = str(i+1)+' expansion '+str(expan + 1) outTrackName = GalaxyInterface.expandBedSegmentsFromTrackNameUsingGalaxyFn(ref_i, genome, upFlank, downFlank, galaxyFn, withinRunId) #outTrackName is unique for run refTracks.append(outTrackName) #put the expanded track into refTracks list options.append(options[-1]) # use chosen feature for refTack as valid feature for the expanded for index, track in enumerate(refTracks) : #print track, '<br>' if type(track) == str : track = track.split(":") refTracks[index] = track[:-1] if track[-1] == "-- All subtypes --" else track if len(refTracks) > 0: trackFormats = [TrackInfo(genome,track).trackFormatName for track in tracks] trackLen = len(tracks) refLen = len(refTracks) f_matrix = zeros((trackLen, refLen)) for i in range(trackLen): for j in range(refLen): #print 'len(options), refLen, len(tracks), trackLen, len(trackFormats):', len(options), refLen, len(tracks), trackLen, len(trackFormats) f_matrix[i,j] = cls.extract_feature(genome,tracks[i],refTracks[j],options[j], regSpec, binSpec, trackFormats[i]) r.assign('track_names',prettyTrackNames) #use as track names, will be shown in clustering figure r.assign('reftrack_names',reftrack_names) r.assign('f_matrix',f_matrix) r.assign('distanceType',distanceType) r('row.names(f_matrix) <- track_names') r('colnames(f_matrix) <- reftrack_names') if clusterMethod == 'Hierarchical clustering' and extra_option != "--select--": figure = GalaxyRunSpecificFile(['cluster_tracks_result_figure.pdf'], galaxyFn) figurepath = figure.getDiskPath(True) r.pdf(figurepath, 8,8) r('d <- dist(f_matrix, method=distanceType)') #print r.f_matrix #print r.d r_f_matrixFile = GalaxyRunSpecificFile(['f-matrix.robj'], galaxyFn) r.assign('f_matrix_fn', r_f_matrixFile.getDiskPath(True)) r('dput(f_matrix, f_matrix_fn)') print>>jobFile, r_f_matrixFile.getLink('feature_matrix') r_f_matrixFile = GalaxyRunSpecificFile(['f-matrix.txt'], galaxyFn) r_f_matrixFile.writeTextToFile(str(f_matrix)+'\n\n'+str(r.d)) print>>jobFile, r_f_matrixFile.getLink('r.f_matrix & r.d') r.assign('extra_option',extra_option) r('hr <- hclust(d, method=extra_option, members=NULL)') r('plot(hr, ylab="Distance", hang=-1)') r('dev.off()') print>>jobFile, figure.getLink('clustering results figure<br>') elif clusterMethod == 'K-means clustering' and extra_option != "--select--" and kmeans_alg != "--select--": textFile = GalaxyRunSpecificFile(['result_of_kmeans_clustering.txt'], galaxyFn) textFilePath = textFile.getDiskPath(True) extra_option = int(extra_option) r.assign('extra_option',extra_option) r.assign('kmeans_alg',kmeans_alg) r('hr <- kmeans(f_matrix,extra_option,algorithm=kmeans_alg)') #the number of cluster is gotten from clusterMethod+ tag, instead of 3 used here kmeans_output = open(textFilePath,'w') clusterSizes = r('hr$size') #size of every cluster withinSS = r('hr$withinss') clusters = array(r('hr$cluster')) #convert to array in order to handle the index more easily track_names = array(track_names) for index1 in range(extra_option) : #extra_option actually the number of clusters trackInCluster = [k for k,val in clusters.items() if val == index1] print>>kmeans_output, 'Cluster %i(%s objects) : ' % (index1+1, str(clusterSizes[index1])) for name in trackInCluster : print>>kmeans_output, name print>>kmeans_output, 'Sum of square error for this cluster is : '+str(withinSS[index1])+'\n' kmeans_output.close() print>>jobFile, textFile.getLink('Detailed result of kmeans clustering <br>') heatmap = GalaxyRunSpecificFile(['heatmap_figure.png'], galaxyFn) heatmap_path = heatmap.getDiskPath(True) r.png(heatmap_path, width=800, height=700) r('heatmap(f_matrix, col=cm.colors(256), Colv=NA, scale="none", xlab="", ylab="", margins=c(10,10))')#Features cluster tracks r('dev.off()') print>>jobFile, heatmap.getLink('heatmap figure <br>') cls.print_data(f_matrix, jobFile) else : print 'Have to specify a set of refTracks'
def execute(cls, choices, galaxyFn=None, username=''): from quick.application.GalaxyInterface import GalaxyInterface fileformat = choices[9]; outputFile = open(galaxyFn, "w") if fileformat == "html": print GalaxyInterface.getHtmlBeginForRuns(galaxyFn) print GalaxyInterface.getHtmlForToggles(withRunDescription=False) t = calendar.timegm(time.gmtime()) htmlfile = GalaxyRunSpecificFile(["css", str(t)], galaxyFn); genome = choices[0] track1 = choices[1].split(":") track2 = choices[2].split(":") tn1 = ExternalTrackManager.getPreProcessedTrackFromGalaxyTN(genome, track1) tn2 = ExternalTrackManager.getPreProcessedTrackFromGalaxyTN(genome, track2) compare = choices[3] != "Count individual SNP-differences in window" if choices[4] == "Classical MDS": mds = 0; elif choices[4] == "SMACOF": mds = 1; else: mds = 2; windowSize = int(choices[5]) windowStep = int(choices[6]) mcTreshold = int(choices[7]) mcRuns = int(choices[8]) outputFile.write("#seqid\tstart\tscore\tp\n") if fileformat == "html": text = "#seqid\tstart\tscore\tp\n"; print "chrs:"+str(GenomeInfo.getChrList(genome)) reg = "*" bins = "*" analysisDef = "Dummy: dummy name ([wStep=%g] [wSize=%s] [func=%s] [mds=%s] [mcT=%s] [mcR=%s])-> CategoryClusterSeparationStat" % (windowStep, windowSize, compare, mds, mcTreshold, mcRuns) userBinSource = GalaxyInterface._getUserBinSource(reg, bins, genome) result = GalaxyInterface.runManual([tn1, tn2], analysisDef, reg, bins, genome, galaxyFn=galaxyFn) for key in result.getAllRegionKeys(): chrom = str(key).split(":")[0]; r = result[key]; if 'Result' not in r.keys(): print "skipping chr:", chrom, r; continue; r = r['Result']; scores = r[0]; stddev = r[1]; for i in range(len(scores)): if scores[i] != 0: pos = i*windowStep; if fileformat == "tabular": outputFile.write("%s\t%s\t%s\t%s\n" % (str(chrom), pos, str(scores[i]), str(stddev[i]))) else: text += "%s\t%s\t%s\t%s\n" % (str(chrom), pos, str(scores[i]), str(stddev[i])); if fileformat == "html": htmlfile.writeTextToFile(text); print htmlfile.getLink("Result file"); print GalaxyInterface.getHtmlEndForRuns() outputFile.close();
def _getAllGenomes(self): return [('----- Select -----', '', False)] + GalaxyInterface.getAllGenomes(self.galaxy.getUserName())
def main(): filename = sys.argv[1] tool = None if len(sys.argv) > 2: tool = sys.argv[2] job_params, params = hg.load_input_parameters(filename) # print job_params, params file_path = None trackName1 = "" trackName2 = "" intensityTrackName = None subName1 = "" subName2 = "" intensityTrackFile = None intensityTrackFileType = None statClassName = "" binSize = "*" region = "*" userBins = None output = filename extractFile = None customFile = None statsFile = None method = None segLength = 0 overlaps = None genome = 'hg18' username = None for o, a in params.items(): if a == "": continue a = str(a) if o == "dbkey": genome = a elif o == "tool": tool = a elif o == "track1": trackName1 = a elif o == "track2": trackName2 = a elif o == "trackIntensity": intensityTrackName = a elif o == "grptrack1": grpName1 = a elif o == "grptrack2": grpName2 = a elif o == "subtrack1": subName1 = a elif o == "subtrack2": subName2 = a elif o == "stats": statClassName = a elif o == "binsize": binSize = a elif o == "seglength": segLength = int(a) elif o == "region": region = a elif o == "method": method = a elif o == "output": output = a # sys.stdout = open(a, "w", 0) elif o == "extract": extractFile = a elif o == "custom": sys.stdout = open(a, "w", 0) customFile = a elif o == "binfile": region = "bed" userBins = a elif o == "statsfile": statsFile = a elif o == "file_path": file_path = a elif o == "overlaps": overlaps = unquote(a) elif o == "userEmail": username = a if method in ['__chrs__', '__chrBands__', '__chrArms__', '__genes__']: region = method binSize = params[method] elif method == '__brs__': region = method binSize = '*' if userBins: if userBins[0] == 'galaxy': # For backwards compatibility binSize = userBins[1] region = userBins[2] elif userBins.startswith('galaxy'): binSize, region = getSecureIdAndExtFromDatasetInfoAsStr(userBins) tracks1 = trackName1.split(':') tracks2 = trackName2.split(':') if intensityTrackName != None: intensityTracks = intensityTrackName.split(':') else: intensityTracks = [] # if statClassName.startswith('galaxy'): # statsFileId = statClassName.split(',')[1] # statsFile = getGalaxyFnFromDatasetId(statsFileId) # statClassName = '[scriptFn:=' + statsFile.encode('hex_codec') + ':] -> CustomRStat' if tool == 'extract': #print 'GalaxyInterface.parseExtFormatAndExtractTrackManyBins*', (genome, tracks1, region, binSize, True, overlaps, output) if output != None: sys.stdout = open(output, "w", 0) if params.has_key('sepFilePrRegion'): GalaxyInterface.parseExtFormatAndExtractTrackManyBinsToRegionDirsInZipFile( genome, tracks1, region, binSize, True, overlaps, output) else: GalaxyInterface.parseExtFormatAndExtractTrackManyBins( genome, tracks1, region, binSize, True, overlaps, output) else: #run analysis if output != None: sys.stdout = open(output, "w", 0) demoID = params['demoID'] if params.has_key('demoID') else None GalaxyInterface.run(tracks1, tracks2, statClassName, region, binSize, genome, output, intensityTracks, username, demoID)
def userIsOneOfUs(self): return GalaxyInterface._userHasFullAccess(self.galaxy.getUserName())
def _getAllGenomes(self): return [("----- Select -----", "", False)] + GalaxyInterface.getAllGenomes(self.galaxy.getUserName())
def getDictOfAllGenomes(self): return OrderedDict([(x[0], False) for x in GalaxyInterface.getAllGenomes(self.galaxy.getUserName())])
return 1 def meltSeg(val,diff): if diff < -0.13: return -2 elif diff > 0.13: return 2 elif -0.01 <= diff <= 0.01: return 0 else: return None meltSegLines = ''' if diff < -0.13: return -2 elif diff > 0.13: return 2 elif -0.01 <= diff <= 0.01: return 0 else: return None '''.split(os.linesep) #FunctionCategorizer(['melting'], meltSeg).createNewTrack(['melting','meltMapSeg']) GalaxyInterface.createSegmentation('hg18',['melting'], ['melting','meltMapSeg2'], meltSegLines) #exec( os.linesep.join( ['def categorizerMethod(val,diff):'] + meltSegLines) ) #a = categorizerMethod #print a(3,-1)
def __init__(self, trans, job): BaseToolController.__init__(self, trans, job) self.genomes = GalaxyInterface.getAllGenomes(self.galaxy.getUserName() \ if hasattr(self, 'galaxy') else '') self.genome = self.params.get('dbkey', '')
# Copyright (C) 2009, Geir Kjetil Sandve, Sveinung Gundersen and Morten Johansen # This file is part of The Genomic HyperBrowser. # # The Genomic HyperBrowser is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # The Genomic HyperBrowser is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with The Genomic HyperBrowser. If not, see <http://www.gnu.org/licenses/>. from gold.application.GalaxyInterface import GalaxyInterface #GalaxyInterface.run(['HCNE','density_mm8_90pc_50col'], ['genes','refseq'], '[altHyp:=ha1:]a -> PointPositioningPValStat','chr1','10m') print GalaxyInterface.getTrackInfoRecord('hg18',['Regulation','CpG islands']) GalaxyInterface.setTrackInfoRecord('hg18',['Regulation','CpG islands'], {'description':'Test'}, False) print GalaxyInterface.getTrackInfoRecord('hg18',['Regulation','CpG islands']) GalaxyInterface.setTrackInfoRecord('hg18',['Regulation','CpG islands'], {'private':True}, False) print GalaxyInterface.getTrackInfoRecord('hg18',['Regulation','CpG islands']) GalaxyInterface.setTrackInfoRecord('hg18',['Regulation','CpG islands'], {'description':''}, False) GalaxyInterface.setTrackInfoRecord('hg18',['Regulation','CpG islands'], {'private':False}, False) print GalaxyInterface.getTrackInfoRecord('hg18',['Regulation','CpG islands'])
def test3(): GalaxyInterface.run(['repeats','SINE'],['repeats'],\ '[scriptFn:='+fn+':] -> CustomRStat',\ 'chr1:1-100000000','10m')
def meltSeg(val, diff): if diff < -0.13: return -2 elif diff > 0.13: return 2 elif -0.01 <= diff <= 0.01: return 0 else: return None meltSegLines = ''' if diff < -0.13: return -2 elif diff > 0.13: return 2 elif -0.01 <= diff <= 0.01: return 0 else: return None '''.split(os.linesep) #FunctionCategorizer(['melting'], meltSeg).createNewTrack(['melting','meltMapSeg']) GalaxyInterface.createSegmentation('hg18', ['melting'], ['melting', 'meltMapSeg2'], meltSegLines) #exec( os.linesep.join( ['def categorizerMethod(val,diff):'] + meltSegLines) ) #a = categorizerMethod #print a(3,-1)
# Copyright (C) 2009, Geir Kjetil Sandve, Sveinung Gundersen and Morten Johansen # This file is part of The Genomic HyperBrowser. # # The Genomic HyperBrowser is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # The Genomic HyperBrowser is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with The Genomic HyperBrowser. If not, see <http://www.gnu.org/licenses/>. from gold.application.GalaxyInterface import GalaxyInterface trackName1 = ['Genes and Gene Prediction Tracks','Genes','Refseq'] #or any other track that are precomputed on the server trackName2 = ['Sequence','Repeating elements'] question = 'Are track1-points occurring [tail:=different:with different frequency] inside track2-segment than outside? -> PointCountInSegsPvalStat' #or any other statistic from the HB collection regSpec = 'chr1:1-10000000' #could also be e.g. 'chr1' for the whole chromosome or '*' for the whole genome binSpec = '1m' #could also be e.g.'100', '1k' or '*' for whole regions/chromosomes as bins GalaxyInterface.run(trackName1, trackName2, question, regSpec, binSpec, genome='hg18')
def getExpandedTrackNameFromInTrackName(self, inTrackName, outTrackName,uniqueStaticId, genome, upFlank, downFlank): GalaxyInterface.expandBedSegmentsFromTrackName(inTrackName, outTrackName, uniqueStaticId, genome, upFlank, downFlank) return outTrackName
def userHasFullAccess(self): return self.isPublic() or GalaxyInterface._userHasFullAccess(self.galaxy.getUserName())
from gold.application.GalaxyInterface import GalaxyInterface #first you need to create a statistic class. For this you will use a StatisticTemplate, and just add a few custom lines of code.. #In our example we will make a simple statistic that computes the number of bps covered by all segments of a single track (in a bin). #1: open the file StatisticTemplate.py #2: save this file with your own name in the folder "quick/statistic". Lets call it "BpCoverageStat.py". (By putting your new class here, it will automatically be loaded into the Hyperbrowser system.) #3: rename the classes according to your name for the statistic - the same as used for the file. The file should then have a class "BpCoverageStat" and a class "BpCoverageStatUnsplittable". (The system will automatically use the unsplittable class, and would in certain cases also automatically have used a corresponding splittable class if it had been defined..) #4: define what input your statistic will need. In our case we will simply need the raw track data. Add the following line in the method "_createChildren": #self._addChild( RawDataStat(self._region, self._track, TrackFormatReq()) ) #5: define how to compute the result. Add the following line under "_compute": #return sum( el.end() - el.start() for el in self._children[0].getResult()) #6: Now you can make a simple call that computes full genome-wide results based on your statistics code: GalaxyInterface.run(['genes', 'refseq'], ['repeats', 'LINE'], 'My new statistic -> BpCoverageStat', '*', '*', genome='hg18') #7: You can also very simply make your new statistic available on the web if you have a web system running against the Hyperbrowser. Simply add the following line somewhere inside the string variable "QUESTION_SPEC_STR" in the file "gold/description/AnalysisList.py": #'My new statistic -> BpCoverageStat'
def testCountStat(self): #self._assertRunEqual([[('Result', 119121)], [('Result', 0)]],\ GalaxyInterface.run(["segsMany"],["segs"],'CountStat','TestGenome:chr21:10000000-10004000','2000')
def execute(cls, choices, galaxyFn=None, username=""): from quick.application.GalaxyInterface import GalaxyInterface fileformat = choices[6] outputFile = open(galaxyFn, "w") if fileformat == "html": print GalaxyInterface.getHtmlBeginForRuns(galaxyFn) print GalaxyInterface.getHtmlForToggles(withRunDescription=False) t = calendar.timegm(time.gmtime()) htmlfile = GalaxyRunSpecificFile(["fet", str(t)], galaxyFn) genome = choices[0] track1 = choices[1].split(":") track2 = choices[2].split(":") tn1 = ExternalTrackManager.getPreProcessedTrackFromGalaxyTN(genome, track1) tn2 = ExternalTrackManager.getPreProcessedTrackFromGalaxyTN(genome, track2) windowSize = int(choices[3]) windowStep = int(choices[4]) percentile = float(choices[5]) # results = {} # TODO: why this? # tr = Track(tn1) # tr.addFormatReq(TrackFormatReq(dense=False, allowOverlaps=True)) outputFile.write("#seqid\tstart\tscore\tstddev\n") if fileformat == "html": text = "#seqid\tstart\tscore\tstddev\n" print "chrs:", str(GenomeInfo.getChrList(genome)) reg = "*" bins = "*" analysisDef = "Dummy: dummy name ([wStep=%g] [wSize=%g] [percentile=%g])-> FisherExactScoreStat" % ( windowStep, windowSize, percentile, ) userBinSource = GalaxyInterface._getUserBinSource(reg, bins, genome) result = GalaxyInterface.runManual([tn1, tn2], analysisDef, reg, bins, genome, galaxyFn=galaxyFn) for key in result.getAllRegionKeys(): chrom = str(key).split(":")[0] r = result[key] if "Result" not in r.keys(): print "skipping chr:", chrom, r continue r = r["Result"] scores = r[0] stddev = r[1] for i in range(len(scores)): if scores[i] != 0: pos = i * windowStep # if choices[5] == "html": # print "%s\t%s\t%s\t%s\n" % (str(chrom), pos, str(scores[i]), str(stddev[i])) if fileformat == "tabular": outputFile.write("%s\t%s\t%s\t%s\n" % (str(chrom), pos, str(scores[i]), str(stddev[i]))) else: text += "%s\t%s\t%s\t%s\n" % (str(chrom), pos, str(scores[i]), str(stddev[i])) if fileformat == "html": htmlfile.writeTextToFile(text) print htmlfile.getLink("Result file") print GalaxyInterface.getHtmlEndForRuns() outputFile.close()