Example #1
0
 def decodeChoice(self, opts, id, choice):
     if opts == '__track__':
         tn = str(choice).split(':')
         GalaxyInterface.cleanUpTrackName(tn)
         choice = ':'.join(tn)
     else:
         choice = super(HyperBrowserControllerMixin, self).decodeChoice(opts, id, choice)
     return choice
 def execute(self):
     self.stdoutToHistory()
     #print self.params
     #tracks = self.params['track1'].split(':')
     username = self.params['userEmail'] if self.params.has_key('userEmail') else ''
     track = self.params['track1'] if self.params.has_key('track1') else []
     print 'GalaxyInterface.startPreProcessing', (self.genome, track, username)
     GalaxyInterface.startPreProcessing(self.genome, track, username)
 def execute(self):
     self.stdoutToHistory()
     #print self.params
     #tracks = self.params['track1'].split(':')
     username = self.params['userEmail'] if self.params.has_key(
         'userEmail') else ''
     track = self.params['track1'] if self.params.has_key('track1') else []
     print 'GalaxyInterface.startPreProcessing', (self.genome, track,
                                                  username)
     GalaxyInterface.startPreProcessing(self.genome, track, username)
    def testGetStatOptions(self):
        if self.VERBOSE:
            DebugConfig.VERBOSE = True
            DebugConfig.PASS_ON_COMPUTE_EXCEPTIONS = True


        prevVal = DebugConfig.PASS_ON_VALIDSTAT_EXCEPTIONS
        DebugConfig.PASS_ON_VALIDSTAT_EXCEPTIONS= False
        self.assertTrue(len( GalaxyInterface.getStatOptions('TestGenome',['segsMany'], ['segs'], 'Hypothesis testing') ) > 0)
        self.assertTrue(len( GalaxyInterface.getStatOptions('TestGenome',['segsMany'], ['nums'], 'Hypothesis testing') ) > 0)
        DebugConfig.PASS_ON_VALIDSTAT_EXCEPTIONS= prevVal
    def testGetStatOptions(self):
        if self.VERBOSE:
            DebugConfig.VERBOSE = True
            DebugConfig.PASS_ON_COMPUTE_EXCEPTIONS = True

        
        prevVal = DebugConfig.PASS_ON_VALIDSTAT_EXCEPTIONS
        DebugConfig.PASS_ON_VALIDSTAT_EXCEPTIONS= False
        self.assertTrue(len( GalaxyInterface.getStatOptions('TestGenome',['segsMany'], ['segs'], 'Hypothesis testing') ) > 0)
        self.assertTrue(len( GalaxyInterface.getStatOptions('TestGenome',['segsMany'], ['nums'], 'Hypothesis testing') ) > 0)
        DebugConfig.PASS_ON_VALIDSTAT_EXCEPTIONS= prevVal
Example #6
0
def reinstallGenomes(genomeList, username, genomeListIsFn='True', verbose='False'):
    "genomeList username genomeListIsFn=True verbose=False"
    from quick.application.GalaxyInterface import GalaxyInterface
    from gold.util.CustomExceptions import PreprocessError
    from quick.extra.StandardizeTrackFiles import PlainMover
    from config.Config import DebugConfig

    genomeListIsFn = ast.literal_eval(genomeListIsFn)
    verbose = ast.literal_eval(verbose)

    prevPassOnPreprocessExceptions = DebugConfig.PASS_ON_PREPROCESS_EXCEPTIONS
    prevVerbose = DebugConfig.VERBOSE
    DebugConfig.PASS_ON_PREPROCESS_EXCEPTIONS = True
    DebugConfig.VERBOSE = verbose

    track_cats = ['Sequence:DNA', 'Genome build properties', 'Genes and gene subsets', 'Sample data']

    if genomeListIsFn:
        genomes = [_.strip() for _ in open(genomeList)]
    else:
        genomes = genomeList.split(',')

    for genome in genomes:
        genome = genome.strip()
        print genome

        if GenomeInfo(genome).installed and GenomeInfo._queryShelveWithKey(genome, 'r', old=False) is not None:
            # Trigger installation double-check
            gi = GenomeInfo(genome)
            gi.installed = False
            gi.store()
            if gi.isInstalled():
                print '\n**Genome "{}" was alread installed in the new shelve. Skipping!**\n'.format(genome)
                continue

        for track_cat in track_cats:
            try:
                GalaxyInterface.startPreProcessing(genome, track_cat.split(':'), username)
            except PreprocessError, e:
                traceback.print_exc()
                print

                match = re.search(r'trackName="([^"]+)"', str(e))
                if match:
                    trackName = match.group(1).split(':')
                    PlainMover.parseFiles(genome, trackName, 'std_to_error')
                    print '\n\n**Moved track "{}" to "parsingErrorTracks" file structure**\n'.format(':'.join(trackName))

        genome_info = GenomeInfo(genome)
        installed = genome_info.flagAsInstalled(username)
        if installed:
            print '\n**Genome "{}" was reinstalled successfully!**\n'.format(genome)
        else:
            print '\n**Genome "{}" was not reinstalled!**\n'.format(genome)
Example #7
0
 def getTrackElement(self, id, label, history=False, ucsc=False, tracks=None):
     datasets = []
     if history:
         try:
             datasets = self.galaxy.getHistory(GalaxyInterface.getSupportedGalaxyFileFormats())
         except Exception, e:
             print e
Example #8
0
    def _assertExpandBedSegments(self, inContents, outContents, upFlank, downFlank,
                                 treatTrackAs, removeChrBorderCrossing, suffix):

        from tempfile import NamedTemporaryFile

        with NamedTemporaryFile(suffix=suffix) as inFile:
            inFile.write(inContents)
            inFile.flush()

            with NamedTemporaryFile(suffix=suffix) as outFile:
                GalaxyInterface.expandBedSegments(inFile.name, outFile.name, 'TestGenome', upFlank, downFlank,
                                                  treatTrackAs, removeChrBorderCrossing, suffix)

                expandedContents = outFile.read()

                self.assertEquals(outContents, expandedContents)
Example #9
0
    def getInputValueForTrack(self, id, name):
        try:
            # assert False
            cachedTracks = self.getCacheData(id)
            track = self.getTrackElement(id, name, tracks=cachedTracks)
        except:
            print 'track cache is empty'
            track = self.getTrackElement(id, name)
            self.putCacheData(id, track.tracks)

        self.trackElements[id] = track
        tn = track.definition(False)
        GalaxyInterface.cleanUpTrackName(tn)
        val = ':'.join(tn)
        # val = track.asString()
        return val
 def getTrackElement(self, id, label, history=False, ucsc=False):
     datasets = []
     if history:
         try:
             datasets = self.galaxy.getHistory(GalaxyInterface.getSupportedGalaxyFileFormats())
         except Exception, e:
             print e
Example #11
0
def makeLowercaseName2NameShelfFromTnSubTypes(genome, trackName, shelfFn):
    'genome trackName shelfFn'
    trackName = re.split('/|:', trackName)

    from gold.application.GalaxyInterface import GalaxyInterface
    analysisDef = "-> ListOfPresentCategoriesStat"
    results = GalaxyInterface.runManual([trackName, None],
                                        analysisDef,
                                        '*',
                                        '*',
                                        genome,
                                        printResults=False,
                                        printHtmlWarningMsgs=False)
    categories = results.getGlobalResult()['Result']

    shelf = safeshelve.open(shelfFn)
    for cat in categories:
        shelf[cat.lower()] = cat

    ##basePath = createDirPath(trackName, genome)
    #basePath = gcf.createOrigPath(genome, trackName)
    #shelf = safeshelve.open(shelfFn)
    #
    #for fn in os.listdir(basePath):
    #    if os.path.isdir(os.sep.join([basePath, fn])) and not any([fn.startswith(x) for x in ['_','.'] + GenomeInfo.getExtendedChrList(genome)]):
    #        shelf[fn.lower()] = fn
    shelf.close()
Example #12
0
def updateAllTrackInfoToVersion15():
    from quick.application.GalaxyInterface import GalaxyInterface
    from gold.origdata.PreProcessUtils import PreProcessUtils

    for genomeTuple in GalaxyInterface.getAllGenomes():
        genome = genomeTuple[1]
        updateGenomeTrackInfoToVersion15(genome)
    def _assertRunEqual(self, target, *args, **kwArgs):
        if self.VERBOSE:
            DebugConfig.PASS_ON_COMPUTE_EXCEPTIONS = True
            print '\n***\n' + str(self.id()) + '\n***'
        
        args = list(args)
        analysisDef = [x.strip() for x in args[2].split('->')]
        if len(analysisDef) == 1:
            analysisDef.append(analysisDef[0])
        analysisDef[0] += ' [randomSeed:=0]'
        
        args[2] = analysisDef[0] + " -> " + analysisDef[1]
        
        for diskMemo in [False, True]:
            gold.statistic.ResultsMemoizer.LOAD_DISK_MEMOIZATION = diskMemo

            if self._usesProfiling():
                gold.application.StatRunner.USE_PROFILING = True
                
            res = GalaxyInterface.run(*args, **{'genome':'TestGenome'})

            self._assertEqualResults(target, res)
            if kwArgs.get('globalTarget') != None:
                self._assertEqualGlobalResults(kwArgs['globalTarget'], res)
                
            if self._usesProfiling():
                self._storeProfile(diskMemo)
 def extract_feature(self, genome, track, ref, option) : 
     '''
     this function return the relation of clusterTrack to referenceTrack
     option is the statistical function used, should be named feature
     track, ref is clusterTrack and referenceTrack
     '''
     validFeature = FeatureCatalog.getFeaturesFromTracks(genome,track,ref)[option] #validFeature contains analysisDef and the key to get the needed number from the global result
     if option == 'Prop. of tr1-points falling inside segments of tr2' and cls.getTrackFormat(genome, track) in ['Segments', 'Valued segments'] :
         analysisDef = 'dummy [tf1=SegmentToMidPointFormatConverter] -> DerivedPointCountsVsSegsStat'
     else :    
         analysisDef = validFeature[0] #or any other statistic from the HB collection
     if self.params.get("compare_in") == "Chromosomes" :
         regSpec = "__chrs__"
         binSpec = self.params.get("Chromosomes")
     elif self.params.get("compare_in") == "Chromosome arms" :
         regSpec = "__chrArms__"
         binSpec = self.params.get("Chromosome_arms")
     elif self.params.get("compare_in") == "Cytobands" :
         regSpec = "__chrBands__"
         binSpec = self.params.get("Cytobands")
     else :
         regSpec = self.params.get("region")
         binSpec = self.params.get("binsize")
     #regSpec = self.params.get("region")
     #binSpec = self.params.get("binsize")
     #regSpec = 'chr1' #could also be e.g. 'chr1' for the whole chromosome or '*' for the whole genome
     #binSpec = '10m' #could also be e.g.'100', '1k' or '*' for whole regions/chromosomes as bins 
     #genome = 'hg18'
     userBinSource = GalaxyInterface._getUserBinSource(regSpec,binSpec,genome)
     if option == 'Prop. of tr2 covered by tr1' : #because the confuse of refTrack and clusterTrack in this statistics
         result = AnalysisDefJob(analysisDef, ref, track, userBinSource).run()
     else :
         result = AnalysisDefJob(analysisDef, track, ref, userBinSource).run()
     mainResultDict = result.getGlobalResult()
     return mainResultDict[validFeature[1]]
Example #15
0
 def execute(cls, choices, galaxyFn=None, username=''):
     '''Is called when execute-button is pushed by web-user.
     Should print output as HTML to standard out, which will be directed to a results page in Galaxy history.
     If needed, StaticFile can be used to get a path where additional files can be put (e.g. generated image files).
     choices is a list of selections made by web-user in each options box.
     '''
     
     genome = choices[0]
     genes = choices[1]
 
     genelist = []
     for geneLine in genes.split('\n'):
         for gene in geneLine.split(','):
             gene = gene.strip()
             if gene != '':
                 genelist.append(gene)
                 
     GalaxyInterface.getEnsemblGenes(genome, genelist, galaxyFn)
Example #16
0
def createSplittedChrArms(genome, binSize, outFn):
    """genome binsize outFn.bed"""
    outFile = open(outFn, 'w')
    
    from quick.application.GalaxyInterface import GalaxyInterface
    chrArms = GalaxyInterface._getUserBinSource('__chrArms__', '*', genome)
    chrArmBins = AutoBinner(chrArms, int(binSize))
    for bin in chrArmBins:
        outFile.write('\t'.join([bin.chr, str(bin.start), str(bin.end)]) + os.linesep)
    outFile.close()
    def getValidAnalyses(cls, genome, trackName1, trackName2):
        trackNames = [trackName1, trackName2]
        allFeatures = cls.getAllFeatures()
        validFeatures = {}
        trackNames = GalaxyInterface._cleanUpTracks(trackNames, genome, realPreProc=False)
        #trackName1 = trackName1[1:] #FixMe, temp, Boris..
        #genome='hg19'
        #print 'TEMP3: ', (genome, trackName1, trackName2)
        if not GalaxyInterface.areTrackNamesValid(genome, trackNames):
            return {}

        #print 'TEMP2: ', allFeatures
        for key in allFeatures:
            analysisDef = allFeatures[key][0]
            #print AnalysisManager._tryAnalysisDefForValidity(analysisDef, genome, trackName1, trackName2, tryReversed=False)
            if AnalysisManager._tryAnalysisDefForValidity(analysisDef, genome, trackName1, trackName2, tryReversed=False)[0] is not None: #maybe also try reversed..
                validFeatures[key] = allFeatures[key]

        #print "Valid: %s (for trackNames: %s and %s)" % (validFeatures, trackName1, trackName2)
        return validFeatures
 def handleRegionClustering(self, genome, tracks, clusterMethod, extra_option):
     region_cluster_track = self.getHistoryTrackDef('track1')
     print region_cluster_track
     region_ref_track = self.params.get('reftrack1')
     if region_cluster_track[0] == 'galaxy' :
         file_type = region_cluster_track[1]
         track_path = region_cluster_track[2]
         userBinSource = GalaxyInterface._getUserBinSource('bed', track_path, genome)
         validFeature = SplittedRegionsAsFeaturesCatalog.getValidAnalyses(genome,region_ref_track,[])
         analysisDef = validFeature[0]
         result = AnalysisDefJob(analysisDef, region_ref_track, [], userBinSource).run()
         print [result[localKey][validFeature[1]] for localKey in sorted(result.keys())]
 def computeDistance(cls, genome, track1, track2, feature, regSpec, binSpec): #direct distance between track1, track2
     '''
     track1 and track2 are two lists like : ['Sequence','Repeating elements','LINE']
     feature specifies how the distance between track1 and track2 is defined 
     '''
     validFeature = DirectDistanceCatalog.getValidAnalyses(genome, track1, track2)[feature]
     analysisDef = validFeature[0] #'bla bla -> PropFreqOfTr1VsTr2Stat' #or any other statistic from the HB collection
     userBinSource = GalaxyInterface._getUserBinSource(regSpec,binSpec,genome)
     
     result = AnalysisDefJob(analysisDef, track1, track2, userBinSource).run()
     mainResultDict = result.getGlobalResult()
     return mainResultDict[validFeature[1]]
 def testRunValid(self):
     self.assertEqual(True, GalaxyInterface.runValid(["segsLen1"],["segs"],'RawOverlapStat','chr21:1-4001','2000','TestGenome'))
     self.assertNotEqual(True, GalaxyInterface.runValid(["segsLen1"],["segs"],'','TestGenome:chr21:1-4001','2000','TestGenome'))
     self.assertNotEqual(True, GalaxyInterface.runValid(["segsLen1"],["segs"],'RawOverlapStat','1-4001','2000','TestGenome'))
     self.assertNotEqual(True, GalaxyInterface.runValid(["segsLen1"],["segs"],'RawOverlapStat','TestGenome:chr21:1-4001','default','TestGenome'))
     self.assertNotEqual(True, GalaxyInterface.runValid(["segs"],["segs"],'','TestGenome:chr21:1-4001','default','TestGenome'))
     self.assertNotEqual(True, GalaxyInterface.runValid(["segs"],["segs"],'','TestGenome:chr21:1-4001','default','hg18'))
Example #21
0
 def testRunValid(self):
     self.assertEqual(True, GalaxyInterface.runValid(["segsLen1"],["segs"],'RawOverlapStat','chr21:1-4001','2000','TestGenome'))
     self.assertNotEqual(True, GalaxyInterface.runValid(["segsLen1"],["segs"],'','TestGenome:chr21:1-4001','2000','TestGenome'))
     self.assertNotEqual(True, GalaxyInterface.runValid(["segsLen1"],["segs"],'RawOverlapStat','1-4001','2000','TestGenome'))
     self.assertNotEqual(True, GalaxyInterface.runValid(["segsLen1"],["segs"],'RawOverlapStat','TestGenome:chr21:1-4001','default','TestGenome'))
     self.assertNotEqual(True, GalaxyInterface.runValid(["segs"],["segs"],'','TestGenome:chr21:1-4001','default','TestGenome'))
     self.assertNotEqual(True, GalaxyInterface.runValid(["segs"],["segs"],'','TestGenome:chr21:1-4001','default','hg18'))
 def build_feature_vector(genome, ctrack, feature, regSpec, binSpec):
     '''
     this function create a feature vector for ctrack
     feature specifies how the vector is constructed
     '''
     #print 'Feauter:', LocalResultsAsFeaturesCatalog.getValidAnalyses(genome, ctrack, [])
     validFeature = LocalResultsAsFeaturesCatalog.getValidAnalyses(genome, ctrack, [])[feature]
     analysisDef = validFeature[0]
     #regSpec = self.params.get("region")
     #binSpec = self.params.get("binsize")
     userBinSource = GalaxyInterface._getUserBinSource(regSpec,binSpec,genome)
     result = AnalysisDefJob(analysisDef, ctrack, [], userBinSource).run()
     return [result[localKey][validFeature[1]] for localKey in sorted(result.keys())]
 def extract_feature(cls, genome, track, ref, option, regSpec, binSpec, trackFormat) : 
     #print 'genome, track, ref, option, regSpec, binSpec, trackFormat: ', genome, track, ref, option, regSpec, binSpec, trackFormat
     validFeature = FeatureCatalog.getFeaturesFromTracks(genome,track,ref)[option] #validFeature contains analysisDef and the key to get the needed number from the global result
     if option == 'Prop. of tr1-points falling inside segments of tr2' and trackFormat in ['Segments', 'Valued segments'] :
         analysisDef = 'dummy [tf1=SegmentToMidPointFormatConverter] -> DerivedPointCountsVsSegsStat'
     else :    
         analysisDef = validFeature[0] #or any other statistic from the HB collection
     
     userBinSource = GalaxyInterface._getUserBinSource(regSpec,binSpec,genome)
     result = AnalysisDefJob(analysisDef, ref, track, userBinSource).run() if option == 'Prop. of tr2 covered by tr1' else AnalysisDefJob(analysisDef, track, ref, userBinSource).run()
                     
     validAnalysisDef = validFeature[1]
     assert result.getGlobalResult() is not None, 'Did not get any global result for analysisDef: '+validAnalysisDef
     return result.getGlobalResult()[validAnalysisDef]
    def _assertRunEqual(self, target, *args, **kwArgs):
        if DebugConfig.VERBOSE:
            print '\n***\n' + str(self.id()) + '\n***'
        
        args = list(args)
        analysisDef = [x.strip() for x in args[2].split('->')]
        if len(analysisDef) == 1:
            analysisDef.append(analysisDef[0])
        analysisDef[0] += ' [randomSeed:=0:]'
        
        args[2] = analysisDef[0] + " -> " + analysisDef[1]
        
        for runType in self._runTypeGenerator():
            # if self._usesProfiling():
            #     DebugConfig.USE_PROFILING = True
                
            res = GalaxyInterface.run(*args, **{'genome': 'TestGenome'})

            self._assertEqualResults(target, res)
            if kwArgs.get('globalTarget') is not None:
                self._assertEqualGlobalResults(kwArgs['globalTarget'], res)
Example #25
0
    def computeDistance(track1, track2, feature='direct distance'):
        '''
        track1 and track2 are two lists like : ['Sequence','Repeating elements','LINE']
        feature specifies how the distance between track1 and track2 is defined 
        '''
        analysisDef = 'bla bla -> PropFreqOfTr1VsTr2Stat' #or any other statistic from the HB collection
        regSpec = 'chr1' #could also be e.g. 'chr1' for the whole chromosome or '*' for the whole genome
        binSpec = '10m' #could also be e.g.'100', '1k' or '*' for whole regions/chromosomes as bins 
        genome = 'hg18' # path /../../..../genome
        #allRepeats = GalaxyInterface.getSubTrackNames(genome,['Sequence','Repeating elements'],False) #all elements in 'Repeating elements' directory
        #GalaxyInterface.run(trackName1, trackName2, question, regSpec, binSpec, genome='hg18')
        userBinSource = GalaxyInterface._getUserBinSource(regSpec,binSpec,genome)
        
        result = AnalysisDefJob(analysisDef, track1, track2, userBinSource).run()
        #result er av klassen Results..
        #from gold.result.Results import Results

        mainResultDict = result.getGlobalResult()
        #from PropFreqOfTr1VsTr2Stat:...
        #self._result = {'Track1Prop':ratio,'CountTrack1':c1, 'CountTrack2':c2,'Variance':variance}

        mainValueOfInterest = mainResultDict['Variance']
        return mainValueOfInterest
    def computeDistance(self, genome, track1, track2, feature): #direct distance between track1, track2
        '''
        track1 and track2 are two lists like : ['Sequence','Repeating elements','LINE']
        feature specifies how the distance between track1 and track2 is defined 
        '''
        validFeature = DirectDistanceCatalog.getValidAnalyses(genome, track1, track2)[feature]
        analysisDef = validFeature[0] #'bla bla -> PropFreqOfTr1VsTr2Stat' #or any other statistic from the HB collection
        if self.params.get("compare_in") == "Chromosomes" :
            regSpec = "__chrs__"
            binSpec = self.params.get("Chromosomes")
        elif self.params.get("compare_in") == "Chromosome arms" :
            regSpec = "__chrArms__"
            binSpec = self.params.get("Chromosome_arms")
        elif self.params.get("compare_in") == "Cytobands" :
            regSpec = "__chrBands__"
            binSpec = self.params.get("Cytobands")
        else :
            regSpec = self.params.get("region")
            binSpec = self.params.get("binsize")
        #regSpec = 'chr1' #could also be e.g. 'chr1' for the whole chromosome or '*' for the whole genome
        #binSpec = '10m' #could also be e.g.'100', '1k' or '*' for whole regions/chromosomes as bins 
        #genome = 'hg18' # path /../../..../genome
        #allRepeats = GalaxyInterface.getSubTrackNames(genome,['Sequence','Repeating elements'],False)
        #GalaxyInterface.run(trackName1, trackName2, question, regSpec, binSpec, genome='hg18')
        userBinSource = GalaxyInterface._getUserBinSource(regSpec,binSpec,genome)
        
        result = AnalysisDefJob(analysisDef, track1, track2, userBinSource).run()
        #result er av klassen Results..
        #from gold.result.Results import Results

        mainResultDict = result.getGlobalResult()
        #from PropFreqOfTr1VsTr2Stat:...
        #self._result = {'Track1Prop':ratio,'CountTrack1':c1, 'CountTrack2':c2,'Variance':variance}

        #mainValueOfInterest = mainResultDict['Variance']
        return mainResultDict[validFeature[1]]
 def _assertBatchEqual(self, target, *args):
     for diskMemo in [False, True]:
         gold.statistic.ResultsMemoizer.LOAD_DISK_MEMOIZATION = diskMemo
         batchRes = GalaxyInterface.runBatchLines(*args)
         for i in range(len(batchRes)):
             self._assertEqualResults(target[i], batchRes[i])
 def testCountStat(self):
     #self._assertRunEqual([[('Result', 119121)], [('Result', 0)]],\
     GalaxyInterface.run(["segsMany"], ["segs"], 'CountStat',
                         'TestGenome:chr21:10000000-10004000', '2000')
Example #29
0
 def _assertBatchEqual(self, target, *args):
     for runType in self._runTypeGenerator():
         batchRes = GalaxyInterface.runBatchLines(*args)
         for i in range(len(batchRes)):
             self._assertEqualResults(target[i], batchRes[i])
# Copyright (C) 2009, Geir Kjetil Sandve, Sveinung Gundersen and Morten Johansen
# This file is part of The Genomic HyperBrowser.
#
#    The Genomic HyperBrowser is free software: you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation, either version 3 of the License, or
#    (at your option) any later version.
#
#    The Genomic HyperBrowser is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    You should have received a copy of the GNU General Public License
#    along with The Genomic HyperBrowser.  If not, see <http://www.gnu.org/licenses/>.

from gold.application.GalaxyInterface import GalaxyInterface

#first you need to create a statistic class. For this you will use a StatisticTemplate, and just add a few custom lines of code..
#In our example we will make a simple statistic that computes the number of bps covered by all segments of a single track (in a bin).
#1: open the file StatisticTemplate.py
#2: save this file with your own name in the folder "quick/statistic". Lets call it "BpCoverageStat.py".  (By putting your new class here, it will automatically be loaded into the Hyperbrowser system.)
#3: rename the classes according to your name for the statistic - the same as used for the file. The file should then have a class "BpCoverageStat" and a class "BpCoverageStatUnsplittable". (The system will automatically use the unsplittable class, and would in certain cases also automatically have used a corresponding splittable class if it had been defined..)
#4: define what input your statistic will need. In our case we will simply need the raw track data. Add the following line in the method "_createChildren":
#self._addChild( RawDataStat(self._region, self._track, TrackFormatReq()) )
#5: define how to compute the result. Add the following line under "_compute":
#return sum( el.end() - el.start() for el in self._children[0].getResult())
#6: Now you can make a simple call that computes full genome-wide results based on your statistics code:
GalaxyInterface.run(['genes','refseq'], ['repeats','LINE'], 'My new statistic -> BpCoverageStat', '*', '*', genome='hg18')
#7: You can also very simply make your new statistic available on the web if you have a web system running against the Hyperbrowser. Simply add the following line somewhere inside the string variable "QUESTION_SPEC_STR" in the file "gold/description/AnalysisList.py":
#'My new statistic -> BpCoverageStat'
from gold.application.GalaxyInterface import GalaxyInterface
#GalaxyInterface.run(['HCNE','density_mm8_90pc_50col'], ['genes','refseq'], '[altHyp:=ha1:]a -> PointPositioningPValStat','chr1','10m')

print GalaxyInterface.getTrackInfoRecord('hg18', ['Regulation', 'CpG islands'])
GalaxyInterface.setTrackInfoRecord('hg18', ['Regulation', 'CpG islands'],
                                   {'description': 'Test'}, False)
print GalaxyInterface.getTrackInfoRecord('hg18', ['Regulation', 'CpG islands'])
GalaxyInterface.setTrackInfoRecord('hg18', ['Regulation', 'CpG islands'],
                                   {'private': True}, False)
print GalaxyInterface.getTrackInfoRecord('hg18', ['Regulation', 'CpG islands'])
GalaxyInterface.setTrackInfoRecord('hg18', ['Regulation', 'CpG islands'],
                                   {'description': ''}, False)
GalaxyInterface.setTrackInfoRecord('hg18', ['Regulation', 'CpG islands'],
                                   {'private': False}, False)
print GalaxyInterface.getTrackInfoRecord('hg18', ['Regulation', 'CpG islands'])
Example #32
0
    def execute(choices, galaxyFn=None, username=''):
        '''Is called when execute-button is pushed by web-user.
        Should print output as HTML to standard out, which will be directed to a results page in Galaxy history.
        If needed, StaticFile can be used to get a path where additional files can be put (e.g. generated image files).
        choices is a list of selections made by web-user in each options box.
        '''
        
        #print 'Executing...'    
        #print choices   
        #trackName1 = ['Sequence','Repeating elements','LINE'] #a list of subdirectories from 'genome' to the repeat file
        #trackName2 = ['Sequence','Repeating elements','SINE']
        #allRepeats = GalaxyInterface.getSubTrackNames(genome,['Sequence','Repeating elements'],False) #all elements in 'Repeating elements' directory
        
        #analysisDef = 'bla bla -> PropFreqOfTr1VsTr2Stat' #or any other statistic from the HB collection
        #regSpec = 'chr1' #could also be e.g. 'chr1' for the whole chromosome or '*' for the whole genome
        #binSpec = '10m' #could also be e.g.'100', '1k' or '*' for whole regions/chromosomes as bins 
        genome = 'hg18' # path /../../..../genome
        allRepeats = GalaxyInterface.getSubTrackNames(genome,['Sequence','Repeating elements'],False) #all elements in 'Repeating elements' directory
        #GalaxyInterface.run(trackName1, trackName2, question, regSpec, binSpec, genome='hg18')
        #userBinSource = GalaxyInterface._getUserBinSource(regSpec,binSpec,genome)

        #result = AnalysisDefJob(analysisDef, trackName1, trackName2, userBinSource).run()
        #result er av klassen Results..
        #from gold.result.Results import Results

        #mainResultDict = result.getGlobalResult()
        #from PropFreqOfTr1VsTr2Stat:...
        #self._result = {'Track1Prop':ratio,'CountTrack1':c1, 'CountTrack2':c2,'Variance':variance}

        #mainValueOfInterest = mainResultDict['Variance']
        
        #print 'first repeat', allRepeats[0]
        #print '\n all repeats', allRepeats
        #minValue = HiepsTool.computeDistance(trackName1,trackName2)
        #print minValue
        #choicedTracks = [['Sequence','Repeating elements',name] for name in choices]
        #print '\n choiced tracks', choicedTracks
        #d_matrix = HiepsTool.constructDistMatrix(choicedTracks)
        #tree = treecluster(distancematrix=d_matrix, method='s')
        #print tree

        #figure = StaticFile(['hiepln','dendro'],'jpg')
        #filepath = figure.getDiskPath()
        #print filepath           
        #draw_dendrogram(tree,choices,filepath)
        #print figure.getLink('clustring result')
        
        track1 = ['Sequence','Repeating elements', 'DNA']
        track2 = ['Gene regulation', 'TFBS', 'High Throughput']
        analysisDef = 'bla bla -> DerivedOverlapStat' #or any other statistic from the HB collection
        regSpec = 'chr1' #could also be e.g. 'chr1' for the whole chromosome or '*' for the whole genome
        binSpec = '10m' #could also be e.g.'100', '1k' or '*' for whole regions/chromosomes as bins 
        genome = 'hg18' # path /../../..../genome
        #allRepeats = GalaxyInterface.getSubTrackNames(genome,['Sequence','Repeating elements'],False) #all elements in 'Repeating elements' directory
        #GalaxyInterface.run(trackName1, trackName2, question, regSpec, binSpec, genome='hg18')
        userBinSource = GalaxyInterface._getUserBinSource(regSpec,binSpec,genome)
        
        result = AnalysisDefJob(analysisDef, track1, track2, userBinSource).run()
        #result er av klassen Results..
        #from gold.result.Results import Results

        mainResultDict = result.getGlobalResult()
        #keys = result.getResDictKeys()
        #print keys
        #print mainResultDict['2in1']
        print '<ol>'
        for key in mainResultDict.keys() : 
            print '<li>key:%s,value:%s </li>'%(key,mainResultDict[key])
        print '</ol>'
from gold.application.GalaxyInterface import GalaxyInterface
from gold.application.StatRunner import AnalysisDefJob

trackName1 = ['Sequence', 'Repeating elements', 'LINE']
trackName2 = ['Sequence', 'Repeating elements', 'SINE']
#GalaxyInterface.getSubTrackNames(['Sequence','Repeating elements'],False)

analysisDef = 'bla bla -> PropFreqOfTr1VsTr2Stat'  #or any other statistic from the HB collection
regSpec = 'chr1'  #could also be e.g. 'chr1' for the whole chromosome or '*' for the whole genome
binSpec = '10m'  #could also be e.g.'100', '1k' or '*' for whole regions/chromosomes as bins
genome = 'hg18'

#GalaxyInterface.run(trackName1, trackName2, question, regSpec, binSpec, genome='hg18')
userBinSource = GalaxyInterface._getUserBinSource(regSpec, binSpec, genome)

result = AnalysisDefJob(analysisDef, trackName1, trackName2,
                        userBinSource).run()
#result er av klassen Results..
#from gold.result.Results import Results

mainResultDict = result.getGlobalResult()
#from PropFreqOfTr1VsTr2Stat:...
#self._result = {'Track1Prop':ratio,'CountTrack1':c1, 'CountTrack2':c2,'Variance':variance}

mainValueOfInterest = mainResultDict['Variance']
print 'The ..variance..: ', mainValueOfInterest
 def __init__(self, trans, job):
     BaseToolController.__init__(self, trans, job)
     self.genomes = GalaxyInterface.getAllGenomes(self.galaxy.getUserName() \
                                                  if hasattr(self, 'galaxy') else '')
     self.genome = self.params.get('dbkey', self.genomes[0][1])
Example #35
0
 def testGetRunDescription(self):
     prevVal = DebugConfig.PASS_ON_VALIDSTAT_EXCEPTIONS
     DebugConfig.PASS_ON_VALIDSTAT_EXCEPTIONS = False
     analysisDef = 'Different frequency inside segments?:Are track1-points occurring [tail:Alternative hypothesis=different:with different frequency/more:more frequently/less:less frequently] inside track2-segment than outside? [rawStatistic:=PointCountInsideSegsStat:] [assumptions:_Assumptions=poissonPoints:Poisson-distributed points/_PermutedSegsAndSampledIntersegsTrack:Permuted segments, sampled spaces (MC)/_PermutedSegsAndIntersegsTrack:Permuted segments, permuted spaces (MC)/_RandomGenomeLocationTrack:Segments fetched from random genome location (MC)] [numResamplings:_Resamplings=20/200/2000] -> PointCountInSegsPvalStat, RandomizationManagerStat'
     GalaxyInterface.getRunDescription(['segsMany'], ['segs'], analysisDef, 'chr21:2-4001','2000', 'TestGenome')
     DebugConfig.PASS_ON_VALIDSTAT_EXCEPTIONS = prevVal
 def executeSelfFeature(cls, genome, tracks, track_names, clusterMethod, extra_option, feature, distanceType, kmeans_alg, galaxyFn, regSpec, binSpec):
     from gold.application.RSetup import r
     #regSpec, binSpec = 'bed', '/usit/invitro/data/galaxy/galaxy-dist-hg-dev/./database/files/017/dataset_17084.dat'
     
     jobFile = open(galaxyFn, 'w')
     print>>jobFile, 'PARAMS: ', dict(zip('genome, tracks, track_names, clusterMethod, extra_option, feature, distanceType, kmeans_alg, regSpec, binSpec'.split(','), [repr(v)+'<br>'for v in [genome, tracks, track_names, clusterMethod, extra_option, feature, distanceType, kmeans_alg,regSpec, binSpec]]))
     print>>jobFile, '<br><br>To run:<br>$clusterBySelfFeature', (genome, '$'.join([':'.join(t) for t in tracks]), ':'.join(track_names)  , clusterMethod, extra_option, feature, distanceType, kmeans_alg, regSpec, binSpec), '<br><br>'
     print>>jobFile, 'signature of method clusterBySelfFeature:<br>', 'clusterBySelfFeature(genome, tracksStr, track_namesStr, clusterMethod, extra_option, feature, distanceType, kmeans_alg, regSpec, binSpec):<br><br><br>'
     prettyTrackNames = [v[-1].replace('RoadMap_','').replace('.H3K4me1','') for v in tracks]
     #prettyTrackNames = [prettyPrintTrackName(v, shortVersion=True) for v in tracks]
     f_matrix = cls.construct_feature_matrix(genome, tracks, feature, regSpec, binSpec)
     print>>jobFile, 'dir f_matrix: ', dir(f_matrix), regSpec, binSpec
     userBinSource = GalaxyInterface._getUserBinSource(regSpec,binSpec,genome)
     r.assign('bin_names',[str(bin)  for binIndex, bin in enumerate(sorted(list(userBinSource)))])
     r.assign('track_names',prettyTrackNames) #use as track names, will be shown in clustering figure
     r.assign('f_matrix',f_matrix)
     r.assign('distanceType',distanceType)
     r('row.names(f_matrix) <- track_names')
     r('colnames(f_matrix) <- bin_names')
     
     if clusterMethod == 'Hierarchical clustering' and extra_option != "--select--" :
         #print 'galaxyFn: ', galaxyFn
         figure = GalaxyRunSpecificFile(['cluster_tracks_result_figure.pdf'], galaxyFn)
         figurepath = figure.getDiskPath(True)
         r.pdf(figurepath)
         r('d <- dist(f_matrix, method=distanceType)')
         r_f_matrixFile = GalaxyRunSpecificFile(['f-matrix.robj'], galaxyFn)
         #', '.join([str(v) for v in row])
         r.assign('f_matrix_fn', r_f_matrixFile.getDiskPath(True))
         r('dput(f_matrix, f_matrix_fn)')
         #r_f_matrixFile.writeTextToFile(', '.join(cls.getFlattenedMatrix(f_matrix)) + '\n\nTrack names: '+', '.join(prettyTrackNames)+'\n\nNumber of tracks: '+str(len(prettyTrackNames))+'\n\nbins: +)
         #r_f_matrixFile.writeTextToFile()
         
         
         #r_f_matrixFile.writeTextToFile(str(f_matrix)+'\n\n'+str(r.d))
         print>>jobFile, r_f_matrixFile.getLink('feature_matrix')
         r.assign('extra_option',extra_option)
         r('hr <- hclust(d, method=extra_option, members=NULL)')
         r('plot(hr, ylab="Distance", hang=-1)')
         r('dev.off()')
         print>>jobFile, figure.getLink('clustering results figure<br>')
         
         heatmap = GalaxyRunSpecificFile(['heatmap_figure.pdf'], galaxyFn)
         heatmap_path = heatmap.getDiskPath(True)
         
         r.pdf(heatmap_path)
         r('heatmap(f_matrix, col=cm.colors(256), distfun=function(c) dist(c, method=distanceType), hclustfun=function(c) hclust(c, method=extra_option, members=NULL),Colv=NA, scale="none", xlab="", ylab="", cexRow=0.5, cexCol=0.5, margin=c(8,10))')#Features cluster tracks
         r('dev.off()')
         print>>jobFile, r('dimnames(f_matrix)')
         print>>jobFile, heatmap.getLink('heatmap figure <br>')
     elif clusterMethod == 'K-means clustering' and extra_option != "--select--" and kmeans_alg != "--select--":
         textFile = GalaxyRunSpecificFile(['result_of_kmeans_clustering.txt'], galaxyFn)
         textFilePath = textFile.getDiskPath(True)
         extra_option = int(extra_option)
         r.assign('kmeans_alg',kmeans_alg)
         r.assign('extra_option',extra_option)
         r('hr <- kmeans(f_matrix,extra_option,algorithm=kmeans_alg)') #the number of cluster is gotten from clusterMethod+ tag, instead of 3 used here
         kmeans_output = open(textFilePath,'w')
         clusterSizes = r('hr$size') #size of every cluster
         withinSS = r('hr$withinss')
         clusters = r('hr$cluster')
         for index1 in range(extra_option) : #extra_option actually the number of clusters
            #trackInCluster = [k for k,val in clusters.items() if val == index1]
            trackInCluster = [k+1 for k,val in enumerate(clusters) if val == index1+1] #IS THIS CORRECT, I.E. SAME AS ABOVE??
            
            print>>kmeans_output, 'Cluster %i(%s objects) : ' % (index1+1, str(clusterSizes[index1]))
            for name in trackInCluster :
                print>>kmeans_output, name, '(This result may be a bit shaky afters some changes in rpy access)'
                
            print>>kmeans_output, 'Sum of square error for this cluster is : '+str(withinSS[index1])+'\n'
            
         kmeans_output.close()
         print>>jobFile, textFile.getLink('Detailed result of kmeans clustering <br>')
     
     cls.print_data(f_matrix, jobFile)
     '''
 def executeReferenceTrack(cls, genome, tracks, track_names, clusterMethod, extra_option, distanceType, kmeans_alg, galaxyFn, regSpec, binSpec, numreferencetracks=None, refTracks=None, refFeatures=None, yesNo=None, howMany=None, upFlank=None, downFlank=None):
     from gold.application.RSetup import r
     jobFile = open(galaxyFn, 'w')
     print>>jobFile, 'PARAMS: ', dict(zip('genome, tracks, track_names, clusterMethod, extra_option, distanceType, kmeans_alg, regSpec, binSpec'.split(','), [repr(v)+'<br>'for v in [genome, tracks, track_names, clusterMethod, extra_option, distanceType, kmeans_alg, regSpec, binSpec]]))
     print>>jobFile, '<br><br>To run:<br>', '$clusterByReference', (genome, '$'.join([':'.join(t) for t in tracks]), ':'.join(track_names)  , clusterMethod, extra_option, distanceType, kmeans_alg, regSpec, binSpec,numreferencetracks, refTracks, refFeatures, yesNo, howMany, upFlank, downFlank), '<br><br>'
     print>>jobFile, 'signature of method clusterByReference:<br>', 'clusterByReference(genome, tracksStr, track_namesStr, clusterMethod, extra_option, distanceType, kmeans_alg, regSpec, binSpec, numreferencetracks=None, refTracks=None, refFeatures=None, yesNo=None, howMany=None, upFlank=None, downFlank=None)<br><br><br>'
     prettyTrackNames = [v[-1].replace("RoadMap_","").replace('.H3K4me1','') for v in tracks]
     
     #prettyTrackNames = [prettyPrintTrackName(v) for v in tracks]
     #paramNames = ['numreferencetracks', 'refTracks', 'refFeatures', 'yesNo', 'howMany', 'upFlank', 'downFlank']
     #for index, value in enumerate([numreferencetracks, refTracks, refFeatures, yesNo, howMany, upFlank, downFlank]):
     #    if value != None:
     #        print paramNames[index]+'='+ str(value),
     #print ''
     
     reftrack_names = [] #for use in creating the heatmap (as the column names)
     
     options = [] #for the case using refTracks, options contains feature for every refTrack, chosen by user.
     
     if numreferencetracks :
         for i in range(int(numreferencetracks)):
             ref_i = refTracks[i].split(":") #name of refTrack is being used to construct the name of expanded refTrack
             #refTracks.append(ref_i) #put the refTrack into refTracks list
             reftrack_names.append(ref_i[-1])
             temp_opt1 = 'ref'+str(i)+'feature'
             options+= [] if refFeatures[i] == None else [refFeatures[i]]
             if  yesNo[i] == "Yes" and howMany[i] != '--select--':
                 for expan in range(int(howMany[i])) :
                     reftrack_names.append(ref_i[-1]+'_'+ upFlank[i][expan])
                     upFlank = int(upFlank[i][expan])
                     downFlank = int(downFlank[i][expan])
                     withinRunId = str(i+1)+' expansion '+str(expan + 1)
                     outTrackName = GalaxyInterface.expandBedSegmentsFromTrackNameUsingGalaxyFn(ref_i, genome, upFlank, downFlank, galaxyFn, withinRunId) #outTrackName is unique for run
                     refTracks.append(outTrackName) #put the expanded track into refTracks list
                     options.append(options[-1]) # use chosen feature for refTack as valid feature for the expanded
         
         for index, track in enumerate(refTracks) :
             #print track, '<br>'
             if type(track) == str :
                 track = track.split(":")
             refTracks[index] = track[:-1] if track[-1] == "-- All subtypes --" else track
             
     if len(refTracks) > 0:
         
         trackFormats = [TrackInfo(genome,track).trackFormatName for track in tracks]
         
         trackLen = len(tracks)
         refLen = len(refTracks)
         f_matrix = zeros((trackLen, refLen))
         for i in range(trackLen):
             for j in range(refLen):
                 #print 'len(options), refLen, len(tracks), trackLen, len(trackFormats):', len(options), refLen, len(tracks), trackLen, len(trackFormats)
                 f_matrix[i,j] = cls.extract_feature(genome,tracks[i],refTracks[j],options[j], regSpec, binSpec, trackFormats[i])
         r.assign('track_names',prettyTrackNames) #use as track names, will be shown in clustering figure
         r.assign('reftrack_names',reftrack_names)
         r.assign('f_matrix',f_matrix)
         r.assign('distanceType',distanceType)
         r('row.names(f_matrix) <- track_names')
         r('colnames(f_matrix) <- reftrack_names')
        
         if clusterMethod == 'Hierarchical clustering' and extra_option != "--select--":
             figure = GalaxyRunSpecificFile(['cluster_tracks_result_figure.pdf'], galaxyFn)
             figurepath = figure.getDiskPath(True) 
             r.pdf(figurepath, 8,8)
             r('d <- dist(f_matrix, method=distanceType)')
             #print r.f_matrix
             #print r.d
             r_f_matrixFile = GalaxyRunSpecificFile(['f-matrix.robj'], galaxyFn)
             r.assign('f_matrix_fn', r_f_matrixFile.getDiskPath(True))
             r('dput(f_matrix, f_matrix_fn)')
             print>>jobFile, r_f_matrixFile.getLink('feature_matrix')
             
             r_f_matrixFile = GalaxyRunSpecificFile(['f-matrix.txt'], galaxyFn)
             r_f_matrixFile.writeTextToFile(str(f_matrix)+'\n\n'+str(r.d))
             print>>jobFile, r_f_matrixFile.getLink('r.f_matrix & r.d')
             r.assign('extra_option',extra_option)
             r('hr <- hclust(d, method=extra_option, members=NULL)')
             r('plot(hr, ylab="Distance", hang=-1)')
             
             r('dev.off()')
             print>>jobFile, figure.getLink('clustering results figure<br>')
         elif clusterMethod == 'K-means clustering' and extra_option != "--select--" and kmeans_alg != "--select--":
             textFile = GalaxyRunSpecificFile(['result_of_kmeans_clustering.txt'], galaxyFn)
             textFilePath = textFile.getDiskPath(True)
             extra_option = int(extra_option)
             r.assign('extra_option',extra_option)
             r.assign('kmeans_alg',kmeans_alg)
             r('hr <- kmeans(f_matrix,extra_option,algorithm=kmeans_alg)') #the number of cluster is gotten from clusterMethod+ tag, instead of 3 used here
            
             kmeans_output = open(textFilePath,'w')
             clusterSizes = r('hr$size') #size of every cluster
             
             withinSS = r('hr$withinss')
             clusters = array(r('hr$cluster')) #convert to array in order to handle the index more easily
             track_names = array(track_names) 
             for index1 in range(extra_option) : #extra_option actually the number of clusters
                 trackInCluster = [k for k,val in clusters.items() if val == index1]
                
                 print>>kmeans_output, 'Cluster %i(%s objects) : ' % (index1+1, str(clusterSizes[index1]))
                 for name in trackInCluster :
                     print>>kmeans_output, name
                    
                 print>>kmeans_output, 'Sum of square error for this cluster is : '+str(withinSS[index1])+'\n'
             kmeans_output.close()
             print>>jobFile, textFile.getLink('Detailed result of kmeans clustering <br>') 
        
         heatmap = GalaxyRunSpecificFile(['heatmap_figure.png'], galaxyFn)
         heatmap_path = heatmap.getDiskPath(True)
         r.png(heatmap_path, width=800, height=700)
         r('heatmap(f_matrix, col=cm.colors(256), Colv=NA, scale="none", xlab="", ylab="", margins=c(10,10))')#Features cluster tracks
         r('dev.off()')
        
         print>>jobFile, heatmap.getLink('heatmap figure <br>')
         cls.print_data(f_matrix, jobFile)
        
     else :
         print 'Have to specify a set of refTracks'
    def execute(cls, choices, galaxyFn=None, username=''):
        from quick.application.GalaxyInterface import GalaxyInterface

        fileformat = choices[9];
        outputFile = open(galaxyFn, "w")
        
        if fileformat == "html":
            print GalaxyInterface.getHtmlBeginForRuns(galaxyFn)
            print GalaxyInterface.getHtmlForToggles(withRunDescription=False)
            t = calendar.timegm(time.gmtime())
            htmlfile = GalaxyRunSpecificFile(["css", str(t)], galaxyFn);


        genome = choices[0]
        track1 = choices[1].split(":")
        track2 = choices[2].split(":")
        tn1 = ExternalTrackManager.getPreProcessedTrackFromGalaxyTN(genome, track1)
        tn2 = ExternalTrackManager.getPreProcessedTrackFromGalaxyTN(genome, track2)

        compare = choices[3] != "Count individual SNP-differences in window"
        if choices[4] == "Classical MDS":
            mds = 0;
        elif choices[4] == "SMACOF":
            mds = 1;
        else:
            mds = 2;
        windowSize = int(choices[5])
        windowStep = int(choices[6])
        
        mcTreshold = int(choices[7])
        mcRuns = int(choices[8])

        outputFile.write("#seqid\tstart\tscore\tp\n")
        if fileformat == "html":
            text = "#seqid\tstart\tscore\tp\n";

	print "chrs:"+str(GenomeInfo.getChrList(genome))
        reg = "*"
        bins = "*"
        analysisDef = "Dummy: dummy name ([wStep=%g] [wSize=%s] [func=%s] [mds=%s] [mcT=%s] [mcR=%s])-> CategoryClusterSeparationStat" % (windowStep, windowSize, compare, mds, mcTreshold, mcRuns)
        userBinSource = GalaxyInterface._getUserBinSource(reg, bins, genome)
        result = GalaxyInterface.runManual([tn1, tn2], analysisDef, reg, bins, genome, galaxyFn=galaxyFn)
        for key in result.getAllRegionKeys():
            chrom = str(key).split(":")[0];
            r = result[key];
            if 'Result' not in r.keys():
                print "skipping chr:", chrom, r;
                continue;
            r = r['Result'];
            scores = r[0];
            stddev = r[1];
            for i in range(len(scores)):
                if scores[i] != 0:
                    pos = i*windowStep;
                    if fileformat == "tabular":
                        outputFile.write("%s\t%s\t%s\t%s\n" % (str(chrom), pos, str(scores[i]), str(stddev[i])))
                    else:
                        text += "%s\t%s\t%s\t%s\n" % (str(chrom), pos, str(scores[i]), str(stddev[i]));
        if fileformat == "html":
            htmlfile.writeTextToFile(text);
            print htmlfile.getLink("Result file");
            print GalaxyInterface.getHtmlEndForRuns()
        
        outputFile.close();
Example #39
0
 def _getAllGenomes(self):
     return [('----- Select -----', '', False)] + GalaxyInterface.getAllGenomes(self.galaxy.getUserName())
Example #40
0
def main():
    filename = sys.argv[1]
    tool = None
    if len(sys.argv) > 2:
        tool = sys.argv[2]

    job_params, params = hg.load_input_parameters(filename)
    #    print job_params, params

    file_path = None

    trackName1 = ""
    trackName2 = ""
    intensityTrackName = None
    subName1 = ""
    subName2 = ""
    intensityTrackFile = None
    intensityTrackFileType = None
    statClassName = ""
    binSize = "*"
    region = "*"
    userBins = None
    output = filename
    extractFile = None
    customFile = None
    statsFile = None
    method = None
    segLength = 0
    overlaps = None
    genome = 'hg18'
    username = None

    for o, a in params.items():
        if a == "":
            continue
        a = str(a)
        if o == "dbkey":
            genome = a
        elif o == "tool":
            tool = a
        elif o == "track1":
            trackName1 = a
        elif o == "track2":
            trackName2 = a
        elif o == "trackIntensity":
            intensityTrackName = a
        elif o == "grptrack1":
            grpName1 = a
        elif o == "grptrack2":
            grpName2 = a
        elif o == "subtrack1":
            subName1 = a
        elif o == "subtrack2":
            subName2 = a
        elif o == "stats":
            statClassName = a
        elif o == "binsize":
            binSize = a
        elif o == "seglength":
            segLength = int(a)
        elif o == "region":
            region = a
        elif o == "method":
            method = a
        elif o == "output":
            output = a


#            sys.stdout = open(a, "w", 0)
        elif o == "extract":
            extractFile = a
        elif o == "custom":
            sys.stdout = open(a, "w", 0)
            customFile = a
        elif o == "binfile":
            region = "bed"
            userBins = a
        elif o == "statsfile":
            statsFile = a
        elif o == "file_path":
            file_path = a
        elif o == "overlaps":
            overlaps = unquote(a)
        elif o == "userEmail":
            username = a

    if method in ['__chrs__', '__chrBands__', '__chrArms__', '__genes__']:
        region = method
        binSize = params[method]
    elif method == '__brs__':
        region = method
        binSize = '*'

    if userBins:
        if userBins[0] == 'galaxy':  # For backwards compatibility
            binSize = userBins[1]
            region = userBins[2]
        elif userBins.startswith('galaxy'):
            binSize, region = getSecureIdAndExtFromDatasetInfoAsStr(userBins)

    tracks1 = trackName1.split(':')

    tracks2 = trackName2.split(':')

    if intensityTrackName != None:
        intensityTracks = intensityTrackName.split(':')
    else:
        intensityTracks = []

    # if statClassName.startswith('galaxy'):
    #     statsFileId = statClassName.split(',')[1]
    #     statsFile = getGalaxyFnFromDatasetId(statsFileId)
    #     statClassName = '[scriptFn:=' + statsFile.encode('hex_codec') + ':] -> CustomRStat'

    if tool == 'extract':
        #print 'GalaxyInterface.parseExtFormatAndExtractTrackManyBins*', (genome, tracks1, region, binSize, True, overlaps, output)
        if output != None:
            sys.stdout = open(output, "w", 0)
        if params.has_key('sepFilePrRegion'):
            GalaxyInterface.parseExtFormatAndExtractTrackManyBinsToRegionDirsInZipFile(
                genome, tracks1, region, binSize, True, overlaps, output)
        else:
            GalaxyInterface.parseExtFormatAndExtractTrackManyBins(
                genome, tracks1, region, binSize, True, overlaps, output)

    else:  #run analysis
        if output != None:
            sys.stdout = open(output, "w", 0)
        demoID = params['demoID'] if params.has_key('demoID') else None
        GalaxyInterface.run(tracks1, tracks2, statClassName, region, binSize,
                            genome, output, intensityTracks, username, demoID)
 def userIsOneOfUs(self):
     return GalaxyInterface._userHasFullAccess(self.galaxy.getUserName())
 def _getAllGenomes(self):
     return [("----- Select -----", "", False)] + GalaxyInterface.getAllGenomes(self.galaxy.getUserName())
 def getDictOfAllGenomes(self):
     return OrderedDict([(x[0], False) for x in GalaxyInterface.getAllGenomes(self.galaxy.getUserName())])
        return 1

def meltSeg(val,diff):
    if diff < -0.13:
        return -2
    elif diff > 0.13:
        return 2
    elif -0.01 <= diff <= 0.01:
        return 0
    else:
        return None

meltSegLines = '''
    if diff < -0.13:
        return -2
    elif diff > 0.13:
        return 2
    elif -0.01 <= diff <= 0.01:
        return 0
    else:
        return None
'''.split(os.linesep)

#FunctionCategorizer(['melting'], meltSeg).createNewTrack(['melting','meltMapSeg'])
GalaxyInterface.createSegmentation('hg18',['melting'], ['melting','meltMapSeg2'], meltSegLines)
#exec( os.linesep.join( ['def categorizerMethod(val,diff):'] + meltSegLines) )
#a = categorizerMethod
#print a(3,-1)


 def __init__(self, trans, job):
     BaseToolController.__init__(self, trans, job)
     self.genomes = GalaxyInterface.getAllGenomes(self.galaxy.getUserName() \
                                                  if hasattr(self, 'galaxy') else '')
     self.genome = self.params.get('dbkey', '')
# Copyright (C) 2009, Geir Kjetil Sandve, Sveinung Gundersen and Morten Johansen
# This file is part of The Genomic HyperBrowser.
#
#    The Genomic HyperBrowser is free software: you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation, either version 3 of the License, or
#    (at your option) any later version.
#
#    The Genomic HyperBrowser is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    You should have received a copy of the GNU General Public License
#    along with The Genomic HyperBrowser.  If not, see <http://www.gnu.org/licenses/>.

from gold.application.GalaxyInterface import GalaxyInterface
#GalaxyInterface.run(['HCNE','density_mm8_90pc_50col'], ['genes','refseq'], '[altHyp:=ha1:]a -> PointPositioningPValStat','chr1','10m')

print GalaxyInterface.getTrackInfoRecord('hg18',['Regulation','CpG islands'])
GalaxyInterface.setTrackInfoRecord('hg18',['Regulation','CpG islands'], {'description':'Test'}, False)
print GalaxyInterface.getTrackInfoRecord('hg18',['Regulation','CpG islands'])
GalaxyInterface.setTrackInfoRecord('hg18',['Regulation','CpG islands'], {'private':True}, False)
print GalaxyInterface.getTrackInfoRecord('hg18',['Regulation','CpG islands'])
GalaxyInterface.setTrackInfoRecord('hg18',['Regulation','CpG islands'], {'description':''}, False)
GalaxyInterface.setTrackInfoRecord('hg18',['Regulation','CpG islands'], {'private':False}, False)
print GalaxyInterface.getTrackInfoRecord('hg18',['Regulation','CpG islands'])
def test3():
    GalaxyInterface.run(['repeats','SINE'],['repeats'],\
                        '[scriptFn:='+fn+':] -> CustomRStat',\
                        'chr1:1-100000000','10m')
Example #48
0

def meltSeg(val, diff):
    if diff < -0.13:
        return -2
    elif diff > 0.13:
        return 2
    elif -0.01 <= diff <= 0.01:
        return 0
    else:
        return None


meltSegLines = '''
    if diff < -0.13:
        return -2
    elif diff > 0.13:
        return 2
    elif -0.01 <= diff <= 0.01:
        return 0
    else:
        return None
'''.split(os.linesep)

#FunctionCategorizer(['melting'], meltSeg).createNewTrack(['melting','meltMapSeg'])
GalaxyInterface.createSegmentation('hg18', ['melting'],
                                   ['melting', 'meltMapSeg2'], meltSegLines)
#exec( os.linesep.join( ['def categorizerMethod(val,diff):'] + meltSegLines) )
#a = categorizerMethod
#print a(3,-1)
# Copyright (C) 2009, Geir Kjetil Sandve, Sveinung Gundersen and Morten Johansen
# This file is part of The Genomic HyperBrowser.
#
#    The Genomic HyperBrowser is free software: you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation, either version 3 of the License, or
#    (at your option) any later version.
#
#    The Genomic HyperBrowser is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    You should have received a copy of the GNU General Public License
#    along with The Genomic HyperBrowser.  If not, see <http://www.gnu.org/licenses/>.

from gold.application.GalaxyInterface import GalaxyInterface

trackName1 = ['Genes and Gene Prediction Tracks','Genes','Refseq'] #or any other track that are precomputed on the server
trackName2 = ['Sequence','Repeating elements']
question = 'Are track1-points occurring [tail:=different:with different frequency] inside track2-segment than outside? -> PointCountInSegsPvalStat' #or any other statistic from the HB collection
regSpec = 'chr1:1-10000000' #could also be e.g. 'chr1' for the whole chromosome or '*' for the whole genome
binSpec = '1m' #could also be e.g.'100', '1k' or '*' for whole regions/chromosomes as bins 

GalaxyInterface.run(trackName1, trackName2, question, regSpec, binSpec, genome='hg18')
 def testGetRunDescription(self):
     prevVal = DebugConfig.PASS_ON_VALIDSTAT_EXCEPTIONS 
     DebugConfig.PASS_ON_VALIDSTAT_EXCEPTIONS = False
     analysisDef = 'Different frequency inside segments?:Are track1-points occurring [tail:Alternative hypothesis=different:with different frequency/more:more frequently/less:less frequently] inside track2-segment than outside? [rawStatistic:=PointCountInsideSegsStat:] [assumptions:_Assumptions=poissonPoints:Poisson-distributed points/_PermutedSegsAndSampledIntersegsTrack:Permuted segments, sampled spaces (MC)/_PermutedSegsAndIntersegsTrack:Permuted segments, permuted spaces (MC)/_RandomGenomeLocationTrack:Segments fetched from random genome location (MC)] [numResamplings:_Resamplings=20/200/2000] -> PointCountInSegsPvalStat, RandomizationManagerStat'
     GalaxyInterface.getRunDescription(['segsMany'], ['segs'], analysisDef, 'chr21:2-4001','2000', 'TestGenome')
     DebugConfig.PASS_ON_VALIDSTAT_EXCEPTIONS = prevVal
 def getExpandedTrackNameFromInTrackName(self, inTrackName, outTrackName,uniqueStaticId, genome, upFlank, downFlank):
     GalaxyInterface.expandBedSegmentsFromTrackName(inTrackName, outTrackName, uniqueStaticId, genome, upFlank, downFlank)
     return outTrackName
 def userHasFullAccess(self):
     return self.isPublic() or GalaxyInterface._userHasFullAccess(self.galaxy.getUserName())
Example #53
0
from gold.application.GalaxyInterface import GalaxyInterface

#first you need to create a statistic class. For this you will use a StatisticTemplate, and just add a few custom lines of code..
#In our example we will make a simple statistic that computes the number of bps covered by all segments of a single track (in a bin).
#1: open the file StatisticTemplate.py
#2: save this file with your own name in the folder "quick/statistic". Lets call it "BpCoverageStat.py".  (By putting your new class here, it will automatically be loaded into the Hyperbrowser system.)
#3: rename the classes according to your name for the statistic - the same as used for the file. The file should then have a class "BpCoverageStat" and a class "BpCoverageStatUnsplittable". (The system will automatically use the unsplittable class, and would in certain cases also automatically have used a corresponding splittable class if it had been defined..)
#4: define what input your statistic will need. In our case we will simply need the raw track data. Add the following line in the method "_createChildren":
#self._addChild( RawDataStat(self._region, self._track, TrackFormatReq()) )
#5: define how to compute the result. Add the following line under "_compute":
#return sum( el.end() - el.start() for el in self._children[0].getResult())
#6: Now you can make a simple call that computes full genome-wide results based on your statistics code:
GalaxyInterface.run(['genes', 'refseq'], ['repeats', 'LINE'],
                    'My new statistic -> BpCoverageStat',
                    '*',
                    '*',
                    genome='hg18')
#7: You can also very simply make your new statistic available on the web if you have a web system running against the Hyperbrowser. Simply add the following line somewhere inside the string variable "QUESTION_SPEC_STR" in the file "gold/description/AnalysisList.py":
#'My new statistic -> BpCoverageStat'
 def testCountStat(self):
     #self._assertRunEqual([[('Result', 119121)], [('Result', 0)]],\
     GalaxyInterface.run(["segsMany"],["segs"],'CountStat','TestGenome:chr21:10000000-10004000','2000')
    def execute(cls, choices, galaxyFn=None, username=""):

        from quick.application.GalaxyInterface import GalaxyInterface

        fileformat = choices[6]
        outputFile = open(galaxyFn, "w")

        if fileformat == "html":
            print GalaxyInterface.getHtmlBeginForRuns(galaxyFn)
            print GalaxyInterface.getHtmlForToggles(withRunDescription=False)
            t = calendar.timegm(time.gmtime())
            htmlfile = GalaxyRunSpecificFile(["fet", str(t)], galaxyFn)

        genome = choices[0]
        track1 = choices[1].split(":")
        track2 = choices[2].split(":")
        tn1 = ExternalTrackManager.getPreProcessedTrackFromGalaxyTN(genome, track1)
        tn2 = ExternalTrackManager.getPreProcessedTrackFromGalaxyTN(genome, track2)

        windowSize = int(choices[3])
        windowStep = int(choices[4])
        percentile = float(choices[5])

        # results = {}

        # TODO: why this?
        # tr = Track(tn1)
        # tr.addFormatReq(TrackFormatReq(dense=False, allowOverlaps=True))

        outputFile.write("#seqid\tstart\tscore\tstddev\n")

        if fileformat == "html":
            text = "#seqid\tstart\tscore\tstddev\n"
        print "chrs:", str(GenomeInfo.getChrList(genome))
        reg = "*"
        bins = "*"
        analysisDef = "Dummy: dummy name ([wStep=%g] [wSize=%g] [percentile=%g])-> FisherExactScoreStat" % (
            windowStep,
            windowSize,
            percentile,
        )
        userBinSource = GalaxyInterface._getUserBinSource(reg, bins, genome)
        result = GalaxyInterface.runManual([tn1, tn2], analysisDef, reg, bins, genome, galaxyFn=galaxyFn)
        for key in result.getAllRegionKeys():
            chrom = str(key).split(":")[0]
            r = result[key]
            if "Result" not in r.keys():
                print "skipping chr:", chrom, r
                continue
            r = r["Result"]
            scores = r[0]
            stddev = r[1]
            for i in range(len(scores)):
                if scores[i] != 0:
                    pos = i * windowStep
                    # if choices[5] == "html":
                    # print "%s\t%s\t%s\t%s\n" % (str(chrom), pos, str(scores[i]), str(stddev[i]))
                    if fileformat == "tabular":
                        outputFile.write("%s\t%s\t%s\t%s\n" % (str(chrom), pos, str(scores[i]), str(stddev[i])))
                    else:
                        text += "%s\t%s\t%s\t%s\n" % (str(chrom), pos, str(scores[i]), str(stddev[i]))

        if fileformat == "html":
            htmlfile.writeTextToFile(text)
            print htmlfile.getLink("Result file")
            print GalaxyInterface.getHtmlEndForRuns()

        outputFile.close()