def extract_feature(self, genome, track, ref, option) : 
     '''
     this function return the relation of clusterTrack to referenceTrack
     option is the statistical function used, should be named feature
     track, ref is clusterTrack and referenceTrack
     '''
     validFeature = FeatureCatalog.getFeaturesFromTracks(genome,track,ref)[option] #validFeature contains analysisDef and the key to get the needed number from the global result
     if option == 'Prop. of tr1-points falling inside segments of tr2' and cls.getTrackFormat(genome, track) in ['Segments', 'Valued segments'] :
         analysisDef = 'dummy [tf1=SegmentToMidPointFormatConverter] -> DerivedPointCountsVsSegsStat'
     else :    
         analysisDef = validFeature[0] #or any other statistic from the HB collection
     if self.params.get("compare_in") == "Chromosomes" :
         regSpec = "__chrs__"
         binSpec = self.params.get("Chromosomes")
     elif self.params.get("compare_in") == "Chromosome arms" :
         regSpec = "__chrArms__"
         binSpec = self.params.get("Chromosome_arms")
     elif self.params.get("compare_in") == "Cytobands" :
         regSpec = "__chrBands__"
         binSpec = self.params.get("Cytobands")
     else :
         regSpec = self.params.get("region")
         binSpec = self.params.get("binsize")
     #regSpec = self.params.get("region")
     #binSpec = self.params.get("binsize")
     #regSpec = 'chr1' #could also be e.g. 'chr1' for the whole chromosome or '*' for the whole genome
     #binSpec = '10m' #could also be e.g.'100', '1k' or '*' for whole regions/chromosomes as bins 
     #genome = 'hg18'
     userBinSource = GalaxyInterface._getUserBinSource(regSpec,binSpec,genome)
     if option == 'Prop. of tr2 covered by tr1' : #because the confuse of refTrack and clusterTrack in this statistics
         result = AnalysisDefJob(analysisDef, ref, track, userBinSource).run()
     else :
         result = AnalysisDefJob(analysisDef, track, ref, userBinSource).run()
     mainResultDict = result.getGlobalResult()
     return mainResultDict[validFeature[1]]
Esempio n. 2
0
def plainRun(analysisDef, genome, track1Fn, track2Fn):
    '''
    Currently under development, not yet functioning.
    Note that file names (track1Fn, track2Fn) must either be a valid .dat-file from a Galaxy system,
      or be located in a path where the two deepest levels are numerical values above 1000, and unique between different runs by this tool'''
    #GalaxyInterface.runManual(trackNames, analysisDef, regSpec, binSpec, genome, galaxyFn=None, trackNameIntensity=None, username='', \
    #printResults=True, printProgress=True, printHtmlWarningMsgs=True, applyBoundaryFilter=False, printRunDescription=True, **kwArgs):

    from quick.application.ExternalTrackManager import ExternalTrackManager

    #fixme: is this correct?
    assert all(
        idPart > 1000
        for idPart in ExternalTrackManager.extractIdFromGalaxyFn(track1Fn))
    tn1 = ExternalTrackManager.constructGalaxyTnFromSuitedFn(track1Fn)
    assert all(
        idPart > 1000
        for idPart in ExternalTrackManager.extractIdFromGalaxyFn(track2Fn))
    tn2 = ExternalTrackManager.constructGalaxyTnFromSuitedFn(track2Fn)

    from quick.application.GalaxyInterface import GalaxyInterface
    userBinSource = GalaxyInterface._getUserBinSource('chrs', '*', genome, tn1,
                                                      tn2)

    from gold.application.StatRunner import AnalysisDefJob
    job = AnalysisDefJob(analysisDef, tn1, tn2, userBinSource)
    result = job.run(printProgress=False)
    return result
 def extract_feature(self, genome, track, ref, option) : 
     '''
     this function return the relation of clusterTrack to referenceTrack
     option is the statistical function used, should be named feature
     track, ref is clusterTrack and referenceTrack
     '''
     validFeature = FeatureCatalog.getFeaturesFromTracks(genome,track,ref)[option] #validFeature contains analysisDef and the key to get the needed number from the global result
     if option == 'Prop. of tr1-points falling inside segments of tr2' and self.getTrackFormat(genome, track) in ['Segments', 'Valued segments'] :
         analysisDef = 'dummy [tf1=SegmentToMidPointFormatConverter] -> DerivedPointCountsVsSegsStat'
     else :    
         analysisDef = validFeature[0] #or any other statistic from the HB collection
     if self.params.get("compare_in") == "Chromosomes" :
         regSpec = "__chrs__"
         binSpec = self.params.get("Chromosomes")
     elif self.params.get("compare_in") == "Chromosome arms" :
         regSpec = "__chrArms__"
         binSpec = self.params.get("Chromosome_arms")
     elif self.params.get("compare_in") == "Cytobands" :
         regSpec = "__chrBands__"
         binSpec = self.params.get("Cytobands")
     else :
         regSpec = self.params.get("region")
         binSpec = self.params.get("binsize")
     #regSpec = self.params.get("region")
     #binSpec = self.params.get("binsize")
     #regSpec = 'chr1' #could also be e.g. 'chr1' for the whole chromosome or '*' for the whole genome
     #binSpec = '10m' #could also be e.g.'100', '1k' or '*' for whole regions/chromosomes as bins 
     #genome = 'hg18'
     userBinSource = GalaxyInterface._getUserBinSource(regSpec,binSpec,genome)
     if option == 'Prop. of tr2 covered by tr1' : #because the confuse of refTrack and clusterTrack in this statistics
         result = AnalysisDefJob(analysisDef, ref, track, userBinSource).run()
     else :
         result = AnalysisDefJob(analysisDef, track, ref, userBinSource).run()
     mainResultDict = result.getGlobalResult()
     return mainResultDict[validFeature[1]]
 def handleRegionClustering(self, genome, tracks, clusterMethod, extra_option):
     region_cluster_track = self.getHistoryTrackDef('track1')
     print region_cluster_track
     region_ref_track = self.params.get('reftrack1')
     if region_cluster_track[0] == 'galaxy' :
         file_type = region_cluster_track[1]
         track_path = region_cluster_track[2]
         userBinSource = GalaxyInterface._getUserBinSource('bed', track_path, genome)
         validFeature = SplittedRegionsAsFeaturesCatalog.getValidAnalyses(genome,region_ref_track,[])
         analysisDef = validFeature[0]
         result = AnalysisDefJob(analysisDef, region_ref_track, [], userBinSource).run()
         print [result[localKey][validFeature[1]] for localKey in sorted(result.keys())]
Esempio n. 5
0
    def runJob(batchLine, genome, fullAccess, galaxyFn=None, printProgress=True):
        bc = BatchRunner.parseBatchLine(batchLine, genome, fullAccess)
        if bc.errorResult is not None:
            return bc.errorResult
        
        #Try a full run, and return either results or an exception
        try:
            #track = Track(trackName1)
            #track2 = Track(trackName2)
            #if 'tf1' in paramDict:
            #    track.setFormatConverter(formatConverter)
            
            #results = StatRunner.run(userBinSource , Track(trackName1), Track(trackName2), \
            #                         wrapClass(STAT_CLASS_DICT[statClassName], keywords=paramDict) )
            #results = StatRunner.run(userBinSource , track, track2, \
            #                         wrapClass(STAT_CLASS_DICT[statClassName], keywords=paramDict) )
            fullRunParams = {}
            
            if USE_PARALLEL:
                #if galaxyFn == None: #then this is a test
                uniqueId = time.time()
                #else:
                    #uniqueId = extractIdFromGalaxyFn(galaxyFn)[1]
                    
                fullRunParams["uniqueId"] = uniqueId
            
            if bc.cleanedTrackNameIntensity is not None:
                fullRunParams['trackNameIntensity'] = '|'.join(tuple(bc.cleanedTrackNameIntensity))
            
            analysisDefParams = [ '[' + key + '=' + value + ']' for key,value in bc.paramDict.items()]
            analysisDef = ''.join(analysisDefParams) + '->' + bc.statClassName

            from quick.application.GalaxyInterface import GalaxyInterface
            
            GalaxyInterface._tempAnalysisDefHacks(analysisDef)
            
            if printProgress:
                print 'Corresponding batch command line:<br>' + \
                    GalaxyInterface._revEngBatchLine(bc.trackName1, bc.trackName2, bc.trackNameIntensity, analysisDef, bc.regSpec, bc.binSpec, genome) + '<br><br>'
            
            results = AnalysisDefJob(analysisDef, bc.cleanedTrackName1, bc.cleanedTrackName2, bc.userBinSource, galaxyFn=galaxyFn, **fullRunParams).run(printProgress)
            presCollectionType = results.getPresCollectionType()

            if len(results.getResDictKeys()) > 0 and GalaxyInterface.APPEND_ASSEMBLY_GAPS and presCollectionType=='standard':
                if USE_PARALLEL:
                    gapRes = AssemblyGapJob(bc.userBinSource, genome, uniqueId=uniqueId).run(printProgress)
                else:
                    gapRes = AssemblyGapJob(bc.userBinSource, genome).run(printProgress)
                results.includeAdditionalResults(gapRes, ensureAnalysisConsistency=False)

        except Exception, e:
            #print 'NOWAG BExc'
            results = Results(bc.cleanedTrackName1, bc.cleanedTrackName2, bc.statClassName)
            results.addError(e)
            logException(e,message='Error in batch run')
            if DebugConfig.PASS_ON_BATCH_EXCEPTIONS:
                raise
            return results
Esempio n. 6
0
 def doAnalysisFromDefTwo(cls, tracks, analysisDef, regSpec, binSpec,
                          genome, flag):
     userBinSource = UserBinSource(regSpec, binSpec, genome)
     job = AnalysisDefJob(analysisDef.getDefAfterChoices(),
                          tracks[0].trackName,
                          tracks[1].trackName,
                          userBinSource,
                          galaxyFn=None)
     result = job.run()
     unorderedResults = [(str(key), result[key])
                         for key in result.getAllRegionKeys()]
     return list(cls.sortDictionaries(unorderedResults, flag))
 def computeDistance(cls, genome, track1, track2, feature, regSpec, binSpec): #direct distance between track1, track2
     '''
     track1 and track2 are two lists like : ['Sequence','Repeating elements','LINE']
     feature specifies how the distance between track1 and track2 is defined 
     '''
     validFeature = DirectDistanceCatalog.getValidAnalyses(genome, track1, track2)[feature]
     analysisDef = validFeature[0] #'bla bla -> PropFreqOfTr1VsTr2Stat' #or any other statistic from the HB collection
     userBinSource = GalaxyInterface._getUserBinSource(regSpec,binSpec,genome)
     
     result = AnalysisDefJob(analysisDef, track1, track2, userBinSource).run()
     mainResultDict = result.getGlobalResult()
     return mainResultDict[validFeature[1]]
 def handleRegionClustering(self, genome, tracks, clusterMethod, extra_option):
     region_cluster_track = self.getHistoryTrackDef('track1')
     print region_cluster_track
     region_ref_track = self.params.get('reftrack1')
     if region_cluster_track[0] == 'galaxy' :
         file_type = region_cluster_track[1]
         track_path = region_cluster_track[2]
         userBinSource = GalaxyInterface._getUserBinSource('bed', track_path, genome)
         validFeature = SplittedRegionsAsFeaturesCatalog.getValidAnalyses(genome,region_ref_track,[])
         analysisDef = validFeature[0]
         result = AnalysisDefJob(analysisDef, region_ref_track, [], userBinSource).run()
         print [result[localKey][validFeature[1]] for localKey in sorted(result.keys())]
 def build_feature_vector(genome, ctrack, feature, regSpec, binSpec):
     '''
     this function create a feature vector for ctrack
     feature specifies how the vector is constructed
     '''
     #print 'Feauter:', LocalResultsAsFeaturesCatalog.getValidAnalyses(genome, ctrack, [])
     validFeature = LocalResultsAsFeaturesCatalog.getValidAnalyses(genome, ctrack, [])[feature]
     analysisDef = validFeature[0]
     #regSpec = self.params.get("region")
     #binSpec = self.params.get("binsize")
     userBinSource = GalaxyInterface._getUserBinSource(regSpec,binSpec,genome)
     result = AnalysisDefJob(analysisDef, ctrack, [], userBinSource).run()
     return [result[localKey][validFeature[1]] for localKey in sorted(result.keys())]
 def extract_feature(cls, genome, track, ref, option, regSpec, binSpec, trackFormat) : 
     #print 'genome, track, ref, option, regSpec, binSpec, trackFormat: ', genome, track, ref, option, regSpec, binSpec, trackFormat
     validFeature = FeatureCatalog.getFeaturesFromTracks(genome,track,ref)[option] #validFeature contains analysisDef and the key to get the needed number from the global result
     if option == 'Prop. of tr1-points falling inside segments of tr2' and trackFormat in ['Segments', 'Valued segments'] :
         analysisDef = 'dummy [tf1=SegmentToMidPointFormatConverter] -> DerivedPointCountsVsSegsStat'
     else :    
         analysisDef = validFeature[0] #or any other statistic from the HB collection
     
     userBinSource = GalaxyInterface._getUserBinSource(regSpec,binSpec,genome)
     result = AnalysisDefJob(analysisDef, ref, track, userBinSource).run() if option == 'Prop. of tr2 covered by tr1' else AnalysisDefJob(analysisDef, track, ref, userBinSource).run()
                     
     validAnalysisDef = validFeature[1]
     assert result.getGlobalResult() is not None, 'Did not get any global result for analysisDef: '+validAnalysisDef
     return result.getGlobalResult()[validAnalysisDef]
    def testCreateIntensityTrack(self):
        regions = [GenomeRegion(self._genome, self._chr, 1000, 5000),\
                   GenomeRegion(self._genome, self._chr, 6000, 7000),\
                   GenomeRegion(self._genome, self._chr, 10000, 16000)]
        job = AnalysisDefJob('[dataStat=SimpleBpIntensityStat] [outTrackName=' + '^'.join(self._trackName) + '] [numDiscreteVals=10] -> CreateFunctionTrackStat', \
                              ['nums'], ['points'], regions, genome=self._genome)
        for x in range(2):
            job.run()

        brShelve = BoundingRegionShelve(self._genome,
                                        self._trackName,
                                        allowOverlaps=False)
        self.assertRaises(OutsideBoundingRegionError, \
                          brShelve.getBoundingRegionInfo, GenomeRegion(self._genome, self._chr, 0, 1))
        #self.assertEquals(BoundingRegionInfo(0, 1, 0, 0, 0, 0),
        #                  brShelve.getBoundingRegionInfo(GenomeRegion(self._genome, self._chr, 0, 1)))
        self.assertEquals(
            BoundingRegionInfo(1000, 5000, 0, 4000, 0, 0),
            brShelve.getBoundingRegionInfo(
                GenomeRegion(self._genome, self._chr, 2000, 2001)))
        self.assertRaises(OutsideBoundingRegionError, \
                          brShelve.getBoundingRegionInfo, GenomeRegion(self._genome, self._chr, 5500, 5501))
        #self.assertEquals(BoundingRegionInfo(5500, 5501, 0, 0, 0, 0),
        #                  brShelve.getBoundingRegionInfo(GenomeRegion(self._genome, self._chr, 5500, 5501)))
        self.assertEquals(
            BoundingRegionInfo(6000, 7000, 4000, 5000, 0, 0),
            brShelve.getBoundingRegionInfo(
                GenomeRegion(self._genome, self._chr, 6500, 6501)))
        self.assertRaises(OutsideBoundingRegionError, \
                          brShelve.getBoundingRegionInfo, GenomeRegion(self._genome, self._chr, 8000, 8001))
        #self.assertEquals(BoundingRegionInfo(8000, 8001, 0, 0, 0, 0),
        #                  brShelve.getBoundingRegionInfo(GenomeRegion(self._genome, self._chr, 8000, 8001)))
        self.assertEquals(
            BoundingRegionInfo(10000, 16000, 5000, 11000, 0, 0),
            brShelve.getBoundingRegionInfo(
                GenomeRegion(self._genome, self._chr, 11000, 11001)))
        self.assertRaises(OutsideBoundingRegionError, \
                          brShelve.getBoundingRegionInfo, GenomeRegion(self._genome, self._chr, 16500, 16501))
        #self.assertEquals(BoundingRegionInfo(16500, 16501, 0, 0, 0, 0),
        #                  brShelve.getBoundingRegionInfo(GenomeRegion(self._genome, self._chr, 16500, 16501)))

        trackData = TrackSource().getTrackData(self._trackName, self._genome,
                                               None, False)
        self.assertListsOrDicts(['val'], trackData.keys())
        self.assertListsOrDicts((11000, ), trackData['val'].shape)
Esempio n. 12
0
    def computeDistance(track1, track2, feature='direct distance'):
        '''
        track1 and track2 are two lists like : ['Sequence','Repeating elements','LINE']
        feature specifies how the distance between track1 and track2 is defined 
        '''
        analysisDef = 'bla bla -> PropFreqOfTr1VsTr2Stat' #or any other statistic from the HB collection
        regSpec = 'chr1' #could also be e.g. 'chr1' for the whole chromosome or '*' for the whole genome
        binSpec = '10m' #could also be e.g.'100', '1k' or '*' for whole regions/chromosomes as bins 
        genome = 'hg18' # path /../../..../genome
        #allRepeats = GalaxyInterface.getSubTrackNames(genome,['Sequence','Repeating elements'],False) #all elements in 'Repeating elements' directory
        #GalaxyInterface.run(trackName1, trackName2, question, regSpec, binSpec, genome='hg18')
        userBinSource = GalaxyInterface._getUserBinSource(regSpec,binSpec,genome)
        
        result = AnalysisDefJob(analysisDef, track1, track2, userBinSource).run()
        #result er av klassen Results..
        #from gold.result.Results import Results

        mainResultDict = result.getGlobalResult()
        #from PropFreqOfTr1VsTr2Stat:...
        #self._result = {'Track1Prop':ratio,'CountTrack1':c1, 'CountTrack2':c2,'Variance':variance}

        mainValueOfInterest = mainResultDict['Variance']
        return mainValueOfInterest
Esempio n. 13
0
def doAnalysis(analysisSpec, analysisBins, tracks):
    '''Performs an analysis,
    as specified by analysisSpec object,
    in each bin specified by analysisBins,
    on data sets specified in tracks.

    Typical usage:
    analysisSpec = AnalysisSpec(AvgSegLenStat)
    analysisSpec.addParameter("withOverlaps","no")
    analysisBins = GlobalBinSource('hg18')
    tracks = [ Track(['Genes and gene subsets','Genes','Refseq']) ]
    results = doAnalysis(analysisSpec, analysisBins, tracks)
    '''

    # TODO: handle multiple tracks analysis
    # assert len(tracks) in [1,2] #for now..
    # in an API setting, exceptions should not generally be hidden.
    # Maybe this should be optional.
    # setupDebugModeAndLogging()
    silenceRWarnings()
    silenceNumpyWarnings()

    if len(tracks) > 2:
        from gold.util.CommonConstants import MULTIPLE_EXTRA_TRACKS_SEPARATOR
        analysisSpec.addParameter(
            'extraTracks',
            MULTIPLE_EXTRA_TRACKS_SEPARATOR.join([
                '^'.join([quote(part) for part in x.trackName])
                for x in tracks[2:]
            ]))
    job = AnalysisDefJob(analysisSpec.getDefAfterChoices(),
                         tracks[0].trackName,
                         tracks[1].trackName if len(tracks) > 1 else None,
                         analysisBins,
                         galaxyFn=None)
    res = job.run(printProgress=False)  # printProgress should be optional?
    return res
Esempio n. 14
0
    def computeDistance(self, genome, track1, track2, feature): #direct distance between track1, track2
        '''
        track1 and track2 are two lists like : ['Sequence','Repeating elements','LINE']
        feature specifies how the distance between track1 and track2 is defined 
        '''
        validFeature = DirectDistanceCatalog.getValidAnalyses(genome, track1, track2)[feature]
        analysisDef = validFeature[0] #'bla bla -> PropFreqOfTr1VsTr2Stat' #or any other statistic from the HB collection
        if self.params.get("compare_in") == "Chromosomes" :
            regSpec = "__chrs__"
            binSpec = self.params.get("Chromosomes")
        elif self.params.get("compare_in") == "Chromosome arms" :
            regSpec = "__chrArms__"
            binSpec = self.params.get("Chromosome_arms")
        elif self.params.get("compare_in") == "Cytobands" :
            regSpec = "__chrBands__"
            binSpec = self.params.get("Cytobands")
        else :
            regSpec = self.params.get("region")
            binSpec = self.params.get("binsize")
        #regSpec = 'chr1' #could also be e.g. 'chr1' for the whole chromosome or '*' for the whole genome
        #binSpec = '10m' #could also be e.g.'100', '1k' or '*' for whole regions/chromosomes as bins 
        #genome = 'hg18' # path /../../..../genome
        #allRepeats = GalaxyInterface.getSubTrackNames(genome,['Sequence','Repeating elements'],False)
        #GalaxyInterface.run(trackName1, trackName2, question, regSpec, binSpec, genome='hg18')
        userBinSource = GalaxyInterface._getUserBinSource(regSpec,binSpec,genome)
        
        result = AnalysisDefJob(analysisDef, track1, track2, userBinSource).run()
        #result er av klassen Results..
        #from gold.result.Results import Results

        mainResultDict = result.getGlobalResult()
        #from PropFreqOfTr1VsTr2Stat:...
        #self._result = {'Track1Prop':ratio,'CountTrack1':c1, 'CountTrack2':c2,'Variance':variance}

        #mainValueOfInterest = mainResultDict['Variance']
        return mainResultDict[validFeature[1]]
    def computeDistance(self, genome, track1, track2, feature): #direct distance between track1, track2
        '''
        track1 and track2 are two lists like : ['Sequence','Repeating elements','LINE']
        feature specifies how the distance between track1 and track2 is defined 
        '''
        validFeature = DirectDistanceCatalog.getValidAnalyses(genome, track1, track2)[feature]
        analysisDef = validFeature[0] #'bla bla -> PropFreqOfTr1VsTr2Stat' #or any other statistic from the HB collection
        if self.params.get("compare_in") == "Chromosomes" :
            regSpec = "__chrs__"
            binSpec = self.params.get("Chromosomes")
        elif self.params.get("compare_in") == "Chromosome arms" :
            regSpec = "__chrArms__"
            binSpec = self.params.get("Chromosome_arms")
        elif self.params.get("compare_in") == "Cytobands" :
            regSpec = "__chrBands__"
            binSpec = self.params.get("Cytobands")
        else :
            regSpec = self.params.get("region")
            binSpec = self.params.get("binsize")
        #regSpec = 'chr1' #could also be e.g. 'chr1' for the whole chromosome or '*' for the whole genome
        #binSpec = '10m' #could also be e.g.'100', '1k' or '*' for whole regions/chromosomes as bins 
        #genome = 'hg18' # path /../../..../genome
        #allRepeats = GalaxyInterface.getSubTrackNames(genome,['Sequence','Repeating elements'],False)
        #GalaxyInterface.run(trackName1, trackName2, question, regSpec, binSpec, genome='hg18')
        userBinSource = GalaxyInterface._getUserBinSource(regSpec,binSpec,genome)
        
        result = AnalysisDefJob(analysisDef, track1, track2, userBinSource).run()
        #result er av klassen Results..
        #from gold.result.Results import Results

        mainResultDict = result.getGlobalResult()
        #from PropFreqOfTr1VsTr2Stat:...
        #self._result = {'Track1Prop':ratio,'CountTrack1':c1, 'CountTrack2':c2,'Variance':variance}

        #mainValueOfInterest = mainResultDict['Variance']
        return mainResultDict[validFeature[1]]
Esempio n. 16
0
    def execute(choices, galaxyFn=None, username=''):
        '''Is called when execute-button is pushed by web-user.
        Should print output as HTML to standard out, which will be directed to a results page in Galaxy history.
        If needed, StaticFile can be used to get a path where additional files can be put (e.g. generated image files).
        choices is a list of selections made by web-user in each options box.
        '''
        
        #print 'Executing...'    
        #print choices   
        #trackName1 = ['Sequence','Repeating elements','LINE'] #a list of subdirectories from 'genome' to the repeat file
        #trackName2 = ['Sequence','Repeating elements','SINE']
        #allRepeats = GalaxyInterface.getSubTrackNames(genome,['Sequence','Repeating elements'],False) #all elements in 'Repeating elements' directory
        
        #analysisDef = 'bla bla -> PropFreqOfTr1VsTr2Stat' #or any other statistic from the HB collection
        #regSpec = 'chr1' #could also be e.g. 'chr1' for the whole chromosome or '*' for the whole genome
        #binSpec = '10m' #could also be e.g.'100', '1k' or '*' for whole regions/chromosomes as bins 
        genome = 'hg18' # path /../../..../genome
        allRepeats = GalaxyInterface.getSubTrackNames(genome,['Sequence','Repeating elements'],False) #all elements in 'Repeating elements' directory
        #GalaxyInterface.run(trackName1, trackName2, question, regSpec, binSpec, genome='hg18')
        #userBinSource = GalaxyInterface._getUserBinSource(regSpec,binSpec,genome)

        #result = AnalysisDefJob(analysisDef, trackName1, trackName2, userBinSource).run()
        #result er av klassen Results..
        #from gold.result.Results import Results

        #mainResultDict = result.getGlobalResult()
        #from PropFreqOfTr1VsTr2Stat:...
        #self._result = {'Track1Prop':ratio,'CountTrack1':c1, 'CountTrack2':c2,'Variance':variance}

        #mainValueOfInterest = mainResultDict['Variance']
        
        #print 'first repeat', allRepeats[0]
        #print '\n all repeats', allRepeats
        #minValue = HiepsTool.computeDistance(trackName1,trackName2)
        #print minValue
        #choicedTracks = [['Sequence','Repeating elements',name] for name in choices]
        #print '\n choiced tracks', choicedTracks
        #d_matrix = HiepsTool.constructDistMatrix(choicedTracks)
        #tree = treecluster(distancematrix=d_matrix, method='s')
        #print tree

        #figure = StaticFile(['hiepln','dendro'],'jpg')
        #filepath = figure.getDiskPath()
        #print filepath           
        #draw_dendrogram(tree,choices,filepath)
        #print figure.getLink('clustring result')
        
        track1 = ['Sequence','Repeating elements', 'DNA']
        track2 = ['Gene regulation', 'TFBS', 'High Throughput']
        analysisDef = 'bla bla -> DerivedOverlapStat' #or any other statistic from the HB collection
        regSpec = 'chr1' #could also be e.g. 'chr1' for the whole chromosome or '*' for the whole genome
        binSpec = '10m' #could also be e.g.'100', '1k' or '*' for whole regions/chromosomes as bins 
        genome = 'hg18' # path /../../..../genome
        #allRepeats = GalaxyInterface.getSubTrackNames(genome,['Sequence','Repeating elements'],False) #all elements in 'Repeating elements' directory
        #GalaxyInterface.run(trackName1, trackName2, question, regSpec, binSpec, genome='hg18')
        userBinSource = GalaxyInterface._getUserBinSource(regSpec,binSpec,genome)
        
        result = AnalysisDefJob(analysisDef, track1, track2, userBinSource).run()
        #result er av klassen Results..
        #from gold.result.Results import Results

        mainResultDict = result.getGlobalResult()
        #keys = result.getResDictKeys()
        #print keys
        #print mainResultDict['2in1']
        print '<ol>'
        for key in mainResultDict.keys() : 
            print '<li>key:%s,value:%s </li>'%(key,mainResultDict[key])
        print '</ol>'
from gold.application.GalaxyInterface import GalaxyInterface
from gold.application.StatRunner import AnalysisDefJob

trackName1 = ['Sequence', 'Repeating elements', 'LINE']
trackName2 = ['Sequence', 'Repeating elements', 'SINE']
#GalaxyInterface.getSubTrackNames(['Sequence','Repeating elements'],False)

analysisDef = 'bla bla -> PropFreqOfTr1VsTr2Stat'  #or any other statistic from the HB collection
regSpec = 'chr1'  #could also be e.g. 'chr1' for the whole chromosome or '*' for the whole genome
binSpec = '10m'  #could also be e.g.'100', '1k' or '*' for whole regions/chromosomes as bins
genome = 'hg18'

#GalaxyInterface.run(trackName1, trackName2, question, regSpec, binSpec, genome='hg18')
userBinSource = GalaxyInterface._getUserBinSource(regSpec, binSpec, genome)

result = AnalysisDefJob(analysisDef, trackName1, trackName2,
                        userBinSource).run()
#result er av klassen Results..
#from gold.result.Results import Results

mainResultDict = result.getGlobalResult()
#from PropFreqOfTr1VsTr2Stat:...
#self._result = {'Track1Prop':ratio,'CountTrack1':c1, 'CountTrack2':c2,'Variance':variance}

mainValueOfInterest = mainResultDict['Variance']
print 'The ..variance..: ', mainValueOfInterest
Esempio n. 18
0
    def runJob(batchLine, genome, fullAccess, galaxyFn=None, printProgress=True):
        bc = BatchRunner.parseBatchLine(batchLine, genome, fullAccess)
        if bc.errorResult is not None:
            return bc.errorResult
        
        #Try a full run, and return either results or an exception
        try:
            #track = Track(trackName1)
            #track2 = Track(trackName2)
            #if 'tf1' in paramDict:
            #    track.setFormatConverter(formatConverter)
            
            #results = StatRunner.run(userBinSource , Track(trackName1), Track(trackName2), \
            #                         wrapClass(STAT_CLASS_DICT[statClassName], keywords=paramDict) )
            #results = StatRunner.run(userBinSource , track, track2, \
            #                         wrapClass(STAT_CLASS_DICT[statClassName], keywords=paramDict) )
            fullRunParams = {}
            
            if USE_PARALLEL:
                # TODO: Requirements for parallel runs should not be added in places like these. Parallelization
                #  should be a feature of the job runner somehow

                #if galaxyFn == None: #then this is a test
                uniqueId = time.time()
                #else:
                    #uniqueId = extractIdFromGalaxyFn(galaxyFn)[1]
                    
                fullRunParams["uniqueId"] = uniqueId

            from quick.application.GalaxyInterface import GalaxyInterface

            analysisDefParams = [ '[' + key + '=' + value + ']' for key,value in bc.paramDict.items()]
            analysisDef = ''.join(analysisDefParams) + '->' + bc.statClassName

            # TODO: Keeping the ugly accesses to private methods in GalaxyInterface for now. To be refactored.
            trackNames, analysisDef = GalaxyInterface._cleanUpAnalysisDef(bc.cleanedTrackNames, analysisDef)

            if printProgress:
                revEngBatchLine = RunDescription.getRevEngBatchLine(
                    analysisDef, bc.trackNames, bc.cleanedTrackNames, bc.regSpec, bc.binSpec, genome
                )

                print 'Corresponding batch command line:<br>{}<br><br>'.format(revEngBatchLine)

            results = AnalysisDefJob(analysisDef, bc.cleanedTrackNames[0], bc.cleanedTrackNames[1], bc.userBinSource, galaxyFn=galaxyFn, **fullRunParams).run(printProgress)
            presCollectionType = results.getPresCollectionType()

            if len(results.getResDictKeys()) > 0 and GalaxyInterface.APPEND_ASSEMBLY_GAPS and presCollectionType=='standard':
                if USE_PARALLEL:
                    gapRes = AssemblyGapJob(bc.userBinSource, genome, uniqueId=uniqueId).run(printProgress)
                else:
                    gapRes = AssemblyGapJob(bc.userBinSource, genome).run(printProgress)
                results.includeAdditionalResults(gapRes, ensureAnalysisConsistency=False)

        except Exception, e:
            #print 'NOWAG BExc'
            results = Results(bc.cleanedTrackNames[0], bc.cleanedTrackNames[1], bc.statClassName)
            results.addError(e)
            logException(e,message='Error in batch run')
            if DebugConfig.PASS_ON_BATCH_EXCEPTIONS:
                raise
            return results
Esempio n. 19
0
from gold.application.GalaxyInterface import GalaxyInterface
from gold.application.StatRunner import AnalysisDefJob

trackName1 = ['Sequence','Repeating elements','LINE'] 
trackName2 = ['Sequence','Repeating elements','SINE']
#GalaxyInterface.getSubTrackNames(['Sequence','Repeating elements'],False)

analysisDef = 'bla bla -> PropFreqOfTr1VsTr2Stat' #or any other statistic from the HB collection
regSpec = 'chr1' #could also be e.g. 'chr1' for the whole chromosome or '*' for the whole genome
binSpec = '10m' #could also be e.g.'100', '1k' or '*' for whole regions/chromosomes as bins 
genome = 'hg18'

#GalaxyInterface.run(trackName1, trackName2, question, regSpec, binSpec, genome='hg18')
userBinSource = GalaxyInterface._getUserBinSource(regSpec,binSpec,genome)

result = AnalysisDefJob(analysisDef, trackName1, trackName2, userBinSource).run()
#result er av klassen Results..
#from gold.result.Results import Results

mainResultDict = result.getGlobalResult()
#from PropFreqOfTr1VsTr2Stat:...
#self._result = {'Track1Prop':ratio,'CountTrack1':c1, 'CountTrack2':c2,'Variance':variance}

mainValueOfInterest = mainResultDict['Variance']
print 'The ..variance..: ', mainValueOfInterest