예제 #1
0
 def extract_feature(self, genome, track, ref, option) : 
     '''
     this function return the relation of clusterTrack to referenceTrack
     option is the statistical function used, should be named feature
     track, ref is clusterTrack and referenceTrack
     '''
     validFeature = FeatureCatalog.getFeaturesFromTracks(genome,track,ref)[option] #validFeature contains analysisDef and the key to get the needed number from the global result
     if option == 'Prop. of tr1-points falling inside segments of tr2' and cls.getTrackFormat(genome, track) in ['Segments', 'Valued segments'] :
         analysisDef = 'dummy [tf1=SegmentToMidPointFormatConverter] -> DerivedPointCountsVsSegsStat'
     else :    
         analysisDef = validFeature[0] #or any other statistic from the HB collection
     if self.params.get("compare_in") == "Chromosomes" :
         regSpec = "__chrs__"
         binSpec = self.params.get("Chromosomes")
     elif self.params.get("compare_in") == "Chromosome arms" :
         regSpec = "__chrArms__"
         binSpec = self.params.get("Chromosome_arms")
     elif self.params.get("compare_in") == "Cytobands" :
         regSpec = "__chrBands__"
         binSpec = self.params.get("Cytobands")
     else :
         regSpec = self.params.get("region")
         binSpec = self.params.get("binsize")
     #regSpec = self.params.get("region")
     #binSpec = self.params.get("binsize")
     #regSpec = 'chr1' #could also be e.g. 'chr1' for the whole chromosome or '*' for the whole genome
     #binSpec = '10m' #could also be e.g.'100', '1k' or '*' for whole regions/chromosomes as bins 
     #genome = 'hg18'
     userBinSource = GalaxyInterface._getUserBinSource(regSpec,binSpec,genome)
     if option == 'Prop. of tr2 covered by tr1' : #because the confuse of refTrack and clusterTrack in this statistics
         result = AnalysisDefJob(analysisDef, ref, track, userBinSource).run()
     else :
         result = AnalysisDefJob(analysisDef, track, ref, userBinSource).run()
     mainResultDict = result.getGlobalResult()
     return mainResultDict[validFeature[1]]
예제 #2
0
def createSplittedChrArms(genome, binSize, outFn):
    """genome binsize outFn.bed"""
    outFile = open(outFn, 'w')
    
    from quick.application.GalaxyInterface import GalaxyInterface
    chrArms = GalaxyInterface._getUserBinSource('__chrArms__', '*', genome)
    chrArmBins = AutoBinner(chrArms, int(binSize))
    for bin in chrArmBins:
        outFile.write('\t'.join([bin.chr, str(bin.start), str(bin.end)]) + os.linesep)
    outFile.close()
예제 #3
0
 def handleRegionClustering(self, genome, tracks, clusterMethod, extra_option):
     region_cluster_track = self.getHistoryTrackDef('track1')
     print region_cluster_track
     region_ref_track = self.params.get('reftrack1')
     if region_cluster_track[0] == 'galaxy' :
         file_type = region_cluster_track[1]
         track_path = region_cluster_track[2]
         userBinSource = GalaxyInterface._getUserBinSource('bed', track_path, genome)
         validFeature = SplittedRegionsAsFeaturesCatalog.getValidAnalyses(genome,region_ref_track,[])
         analysisDef = validFeature[0]
         result = AnalysisDefJob(analysisDef, region_ref_track, [], userBinSource).run()
         print [result[localKey][validFeature[1]] for localKey in sorted(result.keys())]
 def computeDistance(cls, genome, track1, track2, feature, regSpec, binSpec): #direct distance between track1, track2
     '''
     track1 and track2 are two lists like : ['Sequence','Repeating elements','LINE']
     feature specifies how the distance between track1 and track2 is defined 
     '''
     validFeature = DirectDistanceCatalog.getValidAnalyses(genome, track1, track2)[feature]
     analysisDef = validFeature[0] #'bla bla -> PropFreqOfTr1VsTr2Stat' #or any other statistic from the HB collection
     userBinSource = GalaxyInterface._getUserBinSource(regSpec,binSpec,genome)
     
     result = AnalysisDefJob(analysisDef, track1, track2, userBinSource).run()
     mainResultDict = result.getGlobalResult()
     return mainResultDict[validFeature[1]]
 def build_feature_vector(genome, ctrack, feature, regSpec, binSpec):
     '''
     this function create a feature vector for ctrack
     feature specifies how the vector is constructed
     '''
     #print 'Feauter:', LocalResultsAsFeaturesCatalog.getValidAnalyses(genome, ctrack, [])
     validFeature = LocalResultsAsFeaturesCatalog.getValidAnalyses(genome, ctrack, [])[feature]
     analysisDef = validFeature[0]
     #regSpec = self.params.get("region")
     #binSpec = self.params.get("binsize")
     userBinSource = GalaxyInterface._getUserBinSource(regSpec,binSpec,genome)
     result = AnalysisDefJob(analysisDef, ctrack, [], userBinSource).run()
     return [result[localKey][validFeature[1]] for localKey in sorted(result.keys())]
 def extract_feature(cls, genome, track, ref, option, regSpec, binSpec, trackFormat) : 
     #print 'genome, track, ref, option, regSpec, binSpec, trackFormat: ', genome, track, ref, option, regSpec, binSpec, trackFormat
     validFeature = FeatureCatalog.getFeaturesFromTracks(genome,track,ref)[option] #validFeature contains analysisDef and the key to get the needed number from the global result
     if option == 'Prop. of tr1-points falling inside segments of tr2' and trackFormat in ['Segments', 'Valued segments'] :
         analysisDef = 'dummy [tf1=SegmentToMidPointFormatConverter] -> DerivedPointCountsVsSegsStat'
     else :    
         analysisDef = validFeature[0] #or any other statistic from the HB collection
     
     userBinSource = GalaxyInterface._getUserBinSource(regSpec,binSpec,genome)
     result = AnalysisDefJob(analysisDef, ref, track, userBinSource).run() if option == 'Prop. of tr2 covered by tr1' else AnalysisDefJob(analysisDef, track, ref, userBinSource).run()
                     
     validAnalysisDef = validFeature[1]
     assert result.getGlobalResult() is not None, 'Did not get any global result for analysisDef: '+validAnalysisDef
     return result.getGlobalResult()[validAnalysisDef]
예제 #7
0
    def computeDistance(track1, track2, feature='direct distance'):
        '''
        track1 and track2 are two lists like : ['Sequence','Repeating elements','LINE']
        feature specifies how the distance between track1 and track2 is defined 
        '''
        analysisDef = 'bla bla -> PropFreqOfTr1VsTr2Stat' #or any other statistic from the HB collection
        regSpec = 'chr1' #could also be e.g. 'chr1' for the whole chromosome or '*' for the whole genome
        binSpec = '10m' #could also be e.g.'100', '1k' or '*' for whole regions/chromosomes as bins 
        genome = 'hg18' # path /../../..../genome
        #allRepeats = GalaxyInterface.getSubTrackNames(genome,['Sequence','Repeating elements'],False) #all elements in 'Repeating elements' directory
        #GalaxyInterface.run(trackName1, trackName2, question, regSpec, binSpec, genome='hg18')
        userBinSource = GalaxyInterface._getUserBinSource(regSpec,binSpec,genome)
        
        result = AnalysisDefJob(analysisDef, track1, track2, userBinSource).run()
        #result er av klassen Results..
        #from gold.result.Results import Results

        mainResultDict = result.getGlobalResult()
        #from PropFreqOfTr1VsTr2Stat:...
        #self._result = {'Track1Prop':ratio,'CountTrack1':c1, 'CountTrack2':c2,'Variance':variance}

        mainValueOfInterest = mainResultDict['Variance']
        return mainValueOfInterest
예제 #8
0
    def computeDistance(self, genome, track1, track2, feature): #direct distance between track1, track2
        '''
        track1 and track2 are two lists like : ['Sequence','Repeating elements','LINE']
        feature specifies how the distance between track1 and track2 is defined 
        '''
        validFeature = DirectDistanceCatalog.getValidAnalyses(genome, track1, track2)[feature]
        analysisDef = validFeature[0] #'bla bla -> PropFreqOfTr1VsTr2Stat' #or any other statistic from the HB collection
        if self.params.get("compare_in") == "Chromosomes" :
            regSpec = "__chrs__"
            binSpec = self.params.get("Chromosomes")
        elif self.params.get("compare_in") == "Chromosome arms" :
            regSpec = "__chrArms__"
            binSpec = self.params.get("Chromosome_arms")
        elif self.params.get("compare_in") == "Cytobands" :
            regSpec = "__chrBands__"
            binSpec = self.params.get("Cytobands")
        else :
            regSpec = self.params.get("region")
            binSpec = self.params.get("binsize")
        #regSpec = 'chr1' #could also be e.g. 'chr1' for the whole chromosome or '*' for the whole genome
        #binSpec = '10m' #could also be e.g.'100', '1k' or '*' for whole regions/chromosomes as bins 
        #genome = 'hg18' # path /../../..../genome
        #allRepeats = GalaxyInterface.getSubTrackNames(genome,['Sequence','Repeating elements'],False)
        #GalaxyInterface.run(trackName1, trackName2, question, regSpec, binSpec, genome='hg18')
        userBinSource = GalaxyInterface._getUserBinSource(regSpec,binSpec,genome)
        
        result = AnalysisDefJob(analysisDef, track1, track2, userBinSource).run()
        #result er av klassen Results..
        #from gold.result.Results import Results

        mainResultDict = result.getGlobalResult()
        #from PropFreqOfTr1VsTr2Stat:...
        #self._result = {'Track1Prop':ratio,'CountTrack1':c1, 'CountTrack2':c2,'Variance':variance}

        #mainValueOfInterest = mainResultDict['Variance']
        return mainResultDict[validFeature[1]]
    def execute(cls, choices, galaxyFn=None, username=''):
        from quick.application.GalaxyInterface import GalaxyInterface

        fileformat = choices[9];
        outputFile = open(galaxyFn, "w")
        
        if fileformat == "html":
            print GalaxyInterface.getHtmlBeginForRuns(galaxyFn)
            print GalaxyInterface.getHtmlForToggles(withRunDescription=False)
            t = calendar.timegm(time.gmtime())
            htmlfile = GalaxyRunSpecificFile(["css", str(t)], galaxyFn);


        genome = choices[0]
        track1 = choices[1].split(":")
        track2 = choices[2].split(":")
        tn1 = ExternalTrackManager.getPreProcessedTrackFromGalaxyTN(genome, track1)
        tn2 = ExternalTrackManager.getPreProcessedTrackFromGalaxyTN(genome, track2)

        compare = choices[3] != "Count individual SNP-differences in window"
        if choices[4] == "Classical MDS":
            mds = 0;
        elif choices[4] == "SMACOF":
            mds = 1;
        else:
            mds = 2;
        windowSize = int(choices[5])
        windowStep = int(choices[6])
        
        mcTreshold = int(choices[7])
        mcRuns = int(choices[8])

        outputFile.write("#seqid\tstart\tscore\tp\n")
        if fileformat == "html":
            text = "#seqid\tstart\tscore\tp\n";

	print "chrs:"+str(GenomeInfo.getChrList(genome))
        reg = "*"
        bins = "*"
        analysisDef = "Dummy: dummy name ([wStep=%g] [wSize=%s] [func=%s] [mds=%s] [mcT=%s] [mcR=%s])-> CategoryClusterSeparationStat" % (windowStep, windowSize, compare, mds, mcTreshold, mcRuns)
        userBinSource = GalaxyInterface._getUserBinSource(reg, bins, genome)
        result = GalaxyInterface.runManual([tn1, tn2], analysisDef, reg, bins, genome, galaxyFn=galaxyFn)
        for key in result.getAllRegionKeys():
            chrom = str(key).split(":")[0];
            r = result[key];
            if 'Result' not in r.keys():
                print "skipping chr:", chrom, r;
                continue;
            r = r['Result'];
            scores = r[0];
            stddev = r[1];
            for i in range(len(scores)):
                if scores[i] != 0:
                    pos = i*windowStep;
                    if fileformat == "tabular":
                        outputFile.write("%s\t%s\t%s\t%s\n" % (str(chrom), pos, str(scores[i]), str(stddev[i])))
                    else:
                        text += "%s\t%s\t%s\t%s\n" % (str(chrom), pos, str(scores[i]), str(stddev[i]));
        if fileformat == "html":
            htmlfile.writeTextToFile(text);
            print htmlfile.getLink("Result file");
            print GalaxyInterface.getHtmlEndForRuns()
        
        outputFile.close();
예제 #10
0
    def execute(choices, galaxyFn=None, username=''):
        '''Is called when execute-button is pushed by web-user.
        Should print output as HTML to standard out, which will be directed to a results page in Galaxy history.
        If needed, StaticFile can be used to get a path where additional files can be put (e.g. generated image files).
        choices is a list of selections made by web-user in each options box.
        '''
        
        #print 'Executing...'    
        #print choices   
        #trackName1 = ['Sequence','Repeating elements','LINE'] #a list of subdirectories from 'genome' to the repeat file
        #trackName2 = ['Sequence','Repeating elements','SINE']
        #allRepeats = GalaxyInterface.getSubTrackNames(genome,['Sequence','Repeating elements'],False) #all elements in 'Repeating elements' directory
        
        #analysisDef = 'bla bla -> PropFreqOfTr1VsTr2Stat' #or any other statistic from the HB collection
        #regSpec = 'chr1' #could also be e.g. 'chr1' for the whole chromosome or '*' for the whole genome
        #binSpec = '10m' #could also be e.g.'100', '1k' or '*' for whole regions/chromosomes as bins 
        genome = 'hg18' # path /../../..../genome
        allRepeats = GalaxyInterface.getSubTrackNames(genome,['Sequence','Repeating elements'],False) #all elements in 'Repeating elements' directory
        #GalaxyInterface.run(trackName1, trackName2, question, regSpec, binSpec, genome='hg18')
        #userBinSource = GalaxyInterface._getUserBinSource(regSpec,binSpec,genome)

        #result = AnalysisDefJob(analysisDef, trackName1, trackName2, userBinSource).run()
        #result er av klassen Results..
        #from gold.result.Results import Results

        #mainResultDict = result.getGlobalResult()
        #from PropFreqOfTr1VsTr2Stat:...
        #self._result = {'Track1Prop':ratio,'CountTrack1':c1, 'CountTrack2':c2,'Variance':variance}

        #mainValueOfInterest = mainResultDict['Variance']
        
        #print 'first repeat', allRepeats[0]
        #print '\n all repeats', allRepeats
        #minValue = HiepsTool.computeDistance(trackName1,trackName2)
        #print minValue
        #choicedTracks = [['Sequence','Repeating elements',name] for name in choices]
        #print '\n choiced tracks', choicedTracks
        #d_matrix = HiepsTool.constructDistMatrix(choicedTracks)
        #tree = treecluster(distancematrix=d_matrix, method='s')
        #print tree

        #figure = StaticFile(['hiepln','dendro'],'jpg')
        #filepath = figure.getDiskPath()
        #print filepath           
        #draw_dendrogram(tree,choices,filepath)
        #print figure.getLink('clustring result')
        
        track1 = ['Sequence','Repeating elements', 'DNA']
        track2 = ['Gene regulation', 'TFBS', 'High Throughput']
        analysisDef = 'bla bla -> DerivedOverlapStat' #or any other statistic from the HB collection
        regSpec = 'chr1' #could also be e.g. 'chr1' for the whole chromosome or '*' for the whole genome
        binSpec = '10m' #could also be e.g.'100', '1k' or '*' for whole regions/chromosomes as bins 
        genome = 'hg18' # path /../../..../genome
        #allRepeats = GalaxyInterface.getSubTrackNames(genome,['Sequence','Repeating elements'],False) #all elements in 'Repeating elements' directory
        #GalaxyInterface.run(trackName1, trackName2, question, regSpec, binSpec, genome='hg18')
        userBinSource = GalaxyInterface._getUserBinSource(regSpec,binSpec,genome)
        
        result = AnalysisDefJob(analysisDef, track1, track2, userBinSource).run()
        #result er av klassen Results..
        #from gold.result.Results import Results

        mainResultDict = result.getGlobalResult()
        #keys = result.getResDictKeys()
        #print keys
        #print mainResultDict['2in1']
        print '<ol>'
        for key in mainResultDict.keys() : 
            print '<li>key:%s,value:%s </li>'%(key,mainResultDict[key])
        print '</ol>'
예제 #11
0
from gold.application.GalaxyInterface import GalaxyInterface
from gold.application.StatRunner import AnalysisDefJob

trackName1 = ['Sequence', 'Repeating elements', 'LINE']
trackName2 = ['Sequence', 'Repeating elements', 'SINE']
#GalaxyInterface.getSubTrackNames(['Sequence','Repeating elements'],False)

analysisDef = 'bla bla -> PropFreqOfTr1VsTr2Stat'  #or any other statistic from the HB collection
regSpec = 'chr1'  #could also be e.g. 'chr1' for the whole chromosome or '*' for the whole genome
binSpec = '10m'  #could also be e.g.'100', '1k' or '*' for whole regions/chromosomes as bins
genome = 'hg18'

#GalaxyInterface.run(trackName1, trackName2, question, regSpec, binSpec, genome='hg18')
userBinSource = GalaxyInterface._getUserBinSource(regSpec, binSpec, genome)

result = AnalysisDefJob(analysisDef, trackName1, trackName2,
                        userBinSource).run()
#result er av klassen Results..
#from gold.result.Results import Results

mainResultDict = result.getGlobalResult()
#from PropFreqOfTr1VsTr2Stat:...
#self._result = {'Track1Prop':ratio,'CountTrack1':c1, 'CountTrack2':c2,'Variance':variance}

mainValueOfInterest = mainResultDict['Variance']
print 'The ..variance..: ', mainValueOfInterest
    def execute(cls, choices, galaxyFn=None, username=""):

        from quick.application.GalaxyInterface import GalaxyInterface

        fileformat = choices[6]
        outputFile = open(galaxyFn, "w")

        if fileformat == "html":
            print GalaxyInterface.getHtmlBeginForRuns(galaxyFn)
            print GalaxyInterface.getHtmlForToggles(withRunDescription=False)
            t = calendar.timegm(time.gmtime())
            htmlfile = GalaxyRunSpecificFile(["fet", str(t)], galaxyFn)

        genome = choices[0]
        track1 = choices[1].split(":")
        track2 = choices[2].split(":")
        tn1 = ExternalTrackManager.getPreProcessedTrackFromGalaxyTN(genome, track1)
        tn2 = ExternalTrackManager.getPreProcessedTrackFromGalaxyTN(genome, track2)

        windowSize = int(choices[3])
        windowStep = int(choices[4])
        percentile = float(choices[5])

        # results = {}

        # TODO: why this?
        # tr = Track(tn1)
        # tr.addFormatReq(TrackFormatReq(dense=False, allowOverlaps=True))

        outputFile.write("#seqid\tstart\tscore\tstddev\n")

        if fileformat == "html":
            text = "#seqid\tstart\tscore\tstddev\n"
        print "chrs:", str(GenomeInfo.getChrList(genome))
        reg = "*"
        bins = "*"
        analysisDef = "Dummy: dummy name ([wStep=%g] [wSize=%g] [percentile=%g])-> FisherExactScoreStat" % (
            windowStep,
            windowSize,
            percentile,
        )
        userBinSource = GalaxyInterface._getUserBinSource(reg, bins, genome)
        result = GalaxyInterface.runManual([tn1, tn2], analysisDef, reg, bins, genome, galaxyFn=galaxyFn)
        for key in result.getAllRegionKeys():
            chrom = str(key).split(":")[0]
            r = result[key]
            if "Result" not in r.keys():
                print "skipping chr:", chrom, r
                continue
            r = r["Result"]
            scores = r[0]
            stddev = r[1]
            for i in range(len(scores)):
                if scores[i] != 0:
                    pos = i * windowStep
                    # if choices[5] == "html":
                    # print "%s\t%s\t%s\t%s\n" % (str(chrom), pos, str(scores[i]), str(stddev[i]))
                    if fileformat == "tabular":
                        outputFile.write("%s\t%s\t%s\t%s\n" % (str(chrom), pos, str(scores[i]), str(stddev[i])))
                    else:
                        text += "%s\t%s\t%s\t%s\n" % (str(chrom), pos, str(scores[i]), str(stddev[i]))

        if fileformat == "html":
            htmlfile.writeTextToFile(text)
            print htmlfile.getLink("Result file")
            print GalaxyInterface.getHtmlEndForRuns()

        outputFile.close()
 def executeSelfFeature(cls, genome, tracks, track_names, clusterMethod, extra_option, feature, distanceType, kmeans_alg, galaxyFn, regSpec, binSpec):
     from gold.application.RSetup import r
     #regSpec, binSpec = 'bed', '/usit/invitro/data/galaxy/galaxy-dist-hg-dev/./database/files/017/dataset_17084.dat'
     
     jobFile = open(galaxyFn, 'w')
     print>>jobFile, 'PARAMS: ', dict(zip('genome, tracks, track_names, clusterMethod, extra_option, feature, distanceType, kmeans_alg, regSpec, binSpec'.split(','), [repr(v)+'<br>'for v in [genome, tracks, track_names, clusterMethod, extra_option, feature, distanceType, kmeans_alg,regSpec, binSpec]]))
     print>>jobFile, '<br><br>To run:<br>$clusterBySelfFeature', (genome, '$'.join([':'.join(t) for t in tracks]), ':'.join(track_names)  , clusterMethod, extra_option, feature, distanceType, kmeans_alg, regSpec, binSpec), '<br><br>'
     print>>jobFile, 'signature of method clusterBySelfFeature:<br>', 'clusterBySelfFeature(genome, tracksStr, track_namesStr, clusterMethod, extra_option, feature, distanceType, kmeans_alg, regSpec, binSpec):<br><br><br>'
     prettyTrackNames = [v[-1].replace('RoadMap_','').replace('.H3K4me1','') for v in tracks]
     #prettyTrackNames = [prettyPrintTrackName(v, shortVersion=True) for v in tracks]
     f_matrix = cls.construct_feature_matrix(genome, tracks, feature, regSpec, binSpec)
     print>>jobFile, 'dir f_matrix: ', dir(f_matrix), regSpec, binSpec
     userBinSource = GalaxyInterface._getUserBinSource(regSpec,binSpec,genome)
     r.assign('bin_names',[str(bin)  for binIndex, bin in enumerate(sorted(list(userBinSource)))])
     r.assign('track_names',prettyTrackNames) #use as track names, will be shown in clustering figure
     r.assign('f_matrix',f_matrix)
     r.assign('distanceType',distanceType)
     r('row.names(f_matrix) <- track_names')
     r('colnames(f_matrix) <- bin_names')
     
     if clusterMethod == 'Hierarchical clustering' and extra_option != "--select--" :
         #print 'galaxyFn: ', galaxyFn
         figure = GalaxyRunSpecificFile(['cluster_tracks_result_figure.pdf'], galaxyFn)
         figurepath = figure.getDiskPath(True)
         r.pdf(figurepath)
         r('d <- dist(f_matrix, method=distanceType)')
         r_f_matrixFile = GalaxyRunSpecificFile(['f-matrix.robj'], galaxyFn)
         #', '.join([str(v) for v in row])
         r.assign('f_matrix_fn', r_f_matrixFile.getDiskPath(True))
         r('dput(f_matrix, f_matrix_fn)')
         #r_f_matrixFile.writeTextToFile(', '.join(cls.getFlattenedMatrix(f_matrix)) + '\n\nTrack names: '+', '.join(prettyTrackNames)+'\n\nNumber of tracks: '+str(len(prettyTrackNames))+'\n\nbins: +)
         #r_f_matrixFile.writeTextToFile()
         
         
         #r_f_matrixFile.writeTextToFile(str(f_matrix)+'\n\n'+str(r.d))
         print>>jobFile, r_f_matrixFile.getLink('feature_matrix')
         r.assign('extra_option',extra_option)
         r('hr <- hclust(d, method=extra_option, members=NULL)')
         r('plot(hr, ylab="Distance", hang=-1)')
         r('dev.off()')
         print>>jobFile, figure.getLink('clustering results figure<br>')
         
         heatmap = GalaxyRunSpecificFile(['heatmap_figure.pdf'], galaxyFn)
         heatmap_path = heatmap.getDiskPath(True)
         
         r.pdf(heatmap_path)
         r('heatmap(f_matrix, col=cm.colors(256), distfun=function(c) dist(c, method=distanceType), hclustfun=function(c) hclust(c, method=extra_option, members=NULL),Colv=NA, scale="none", xlab="", ylab="", cexRow=0.5, cexCol=0.5, margin=c(8,10))')#Features cluster tracks
         r('dev.off()')
         print>>jobFile, r('dimnames(f_matrix)')
         print>>jobFile, heatmap.getLink('heatmap figure <br>')
     elif clusterMethod == 'K-means clustering' and extra_option != "--select--" and kmeans_alg != "--select--":
         textFile = GalaxyRunSpecificFile(['result_of_kmeans_clustering.txt'], galaxyFn)
         textFilePath = textFile.getDiskPath(True)
         extra_option = int(extra_option)
         r.assign('kmeans_alg',kmeans_alg)
         r.assign('extra_option',extra_option)
         r('hr <- kmeans(f_matrix,extra_option,algorithm=kmeans_alg)') #the number of cluster is gotten from clusterMethod+ tag, instead of 3 used here
         kmeans_output = open(textFilePath,'w')
         clusterSizes = r('hr$size') #size of every cluster
         withinSS = r('hr$withinss')
         clusters = r('hr$cluster')
         for index1 in range(extra_option) : #extra_option actually the number of clusters
            #trackInCluster = [k for k,val in clusters.items() if val == index1]
            trackInCluster = [k+1 for k,val in enumerate(clusters) if val == index1+1] #IS THIS CORRECT, I.E. SAME AS ABOVE??
            
            print>>kmeans_output, 'Cluster %i(%s objects) : ' % (index1+1, str(clusterSizes[index1]))
            for name in trackInCluster :
                print>>kmeans_output, name, '(This result may be a bit shaky afters some changes in rpy access)'
                
            print>>kmeans_output, 'Sum of square error for this cluster is : '+str(withinSS[index1])+'\n'
            
         kmeans_output.close()
         print>>jobFile, textFile.getLink('Detailed result of kmeans clustering <br>')
     
     cls.print_data(f_matrix, jobFile)
     '''
예제 #14
0
from gold.application.GalaxyInterface import GalaxyInterface
from gold.application.StatRunner import AnalysisDefJob

trackName1 = ['Sequence','Repeating elements','LINE'] 
trackName2 = ['Sequence','Repeating elements','SINE']
#GalaxyInterface.getSubTrackNames(['Sequence','Repeating elements'],False)

analysisDef = 'bla bla -> PropFreqOfTr1VsTr2Stat' #or any other statistic from the HB collection
regSpec = 'chr1' #could also be e.g. 'chr1' for the whole chromosome or '*' for the whole genome
binSpec = '10m' #could also be e.g.'100', '1k' or '*' for whole regions/chromosomes as bins 
genome = 'hg18'

#GalaxyInterface.run(trackName1, trackName2, question, regSpec, binSpec, genome='hg18')
userBinSource = GalaxyInterface._getUserBinSource(regSpec,binSpec,genome)

result = AnalysisDefJob(analysisDef, trackName1, trackName2, userBinSource).run()
#result er av klassen Results..
#from gold.result.Results import Results

mainResultDict = result.getGlobalResult()
#from PropFreqOfTr1VsTr2Stat:...
#self._result = {'Track1Prop':ratio,'CountTrack1':c1, 'CountTrack2':c2,'Variance':variance}

mainValueOfInterest = mainResultDict['Variance']
print 'The ..variance..: ', mainValueOfInterest