def findTFsOccurringInRegions(cls, genome, tfSource, regionsBedFn, upFlankSize, downFlankSize, galaxyFn):
        uniqueWebPath = getUniqueWebPath(extractIdFromGalaxyFn(galaxyFn))
        #assert genome == 'hg18' #other genomes not supported. TF id links do not specify genome for pre-selection of analysis
        
        tfTrackNameMappings = TfInfo.getTfTrackNameMappings(genome)
        assert tfTrackNameMappings != {}, 'No TF info for genome: %s' % genome
        
        tfTrackName = tfTrackNameMappings[tfSource]
                
        if (upFlankSize == downFlankSize == 0):
            flankedRegionsFn = regionsBedFn
        else:
            flankedRegionsFn= uniqueWebPath + os.sep + 'flankedRegs.bed'
            GalaxyInterface.expandBedSegments(regionsBedFn, flankedRegionsFn, genome, upFlankSize, downFlankSize)

        regSpec, binSpec = 'bed', flankedRegionsFn
        res = cls._runCategoryPointCount(genome, regSpec, binSpec, tfTrackName)

        tfNames = res.getResDictKeys()
        #print 'RES: ', res.getGlobalResult()[tfNames[0]], type(res.getGlobalResult()[tfNames[0]])
        import third_party.safeshelve as safeshelve
        pwm2tfids = safeshelve.open(os.sep.join([HB_SOURCE_CODE_BASE_DIR,'data','pwm2TFids.shelf']), 'r')
        tf2class = safeshelve.open(os.sep.join([HB_SOURCE_CODE_BASE_DIR,'data','TfId2Class.shelf']), 'r')
        pwmName2id= safeshelve.open(os.sep.join([HB_SOURCE_CODE_BASE_DIR,'data','pwmName2id.shelf']), 'r')
        #print tfNames[0],tfNames[1], ' VS ', pwm2tfids.keys()[0], len(pwm2tfids)
        #tfs = list(reversed(sorted([(res.getGlobalResult()[tf], tf, '%s (%i hits (class %s))'%(tf, res.getGlobalResult()[tf]), '/'.join([tf2class[x] for x in pwm2tfids[tf]]) ) for tf in tfNames]))) #num hits, tfName, tfTextInclHits
        tfs = list(reversed(sorted([(res.getGlobalResult()[tf], tf, '%s (%i hits )'%(tf, res.getGlobalResult()[tf]) + \
                                     (' (class: %s)'%'/'.join(set([str(tf2class.get(x)) for x in pwm2tfids[pwmName2id[tf]] if x in tf2class]))\
                                      if (tf in pwmName2id and pwmName2id[tf] in pwm2tfids and any([x in tf2class for x in pwm2tfids[pwmName2id[tf]]]))\
                                    else '') ) \
                                    for tf in tfNames])) ) #num hits, tfName, tfTextInclHits
        
        tfsPlural = 's' if len(tfs)!=1 else ''
        print '<p>There are %i TF%s targeting your regions of interest, using "%s" as source of TF occurrences.</p>' % (len(tfs), tfsPlural, tfSource)
        
        expansionStr = ' flanked' if not (upFlankSize == downFlankSize == 0) else ''                

        idHtmlFileNamer = GalaxyRunSpecificFile(['allTfIds.html'],galaxyFn)
        idHtmlFileNamer.writeTextToFile('<br>'.join(['<a href=/hbdev/hyper?track1=%s&track2=>%s</a>'%( quote(':'.join(tfTrackName+[tf[1]])), tf[2]) for tf in tfs]))
        print '<p>', idHtmlFileNamer.getLink('Inspect html file'), ' of all TF IDs occurring 1 or more times within your%s regions of interest, with each TF ID linking to analysis with this TF pre-selected.</p>' % (expansionStr)

        idFileNamer = GalaxyRunSpecificFile(['allTfIds.txt'],galaxyFn)
        idFileNamer.writeTextToFile(os.linesep.join([tf[2] for tf in tfs]) + os.linesep)
        print '<p>', idFileNamer.getLink('Inspect text file'), ' listing all TF IDs occurring 1 or more times within your%s regions of interest.</p>' % (expansionStr)
    
        extractedTfbsFileNamer = GalaxyRunSpecificFile(['tfbsInGeneRegions.bed'],galaxyFn)
        GalaxyInterface.extractTrackManyBins(genome, tfTrackName, regSpec, binSpec, True, 'bed', False, False, extractedTfbsFileNamer.getDiskPath(), True)
        print '<p>', extractedTfbsFileNamer.getLoadToHistoryLink('Inspect bed-file'), 'of all TF binding sites occurring within your%s regions of interest.</p>' % (expansionStr)

        for dummy,tf,dummy2 in tfs:            
            extractedTfbsFileNamer = GalaxyRunSpecificFile([tf+'_tfbsInGeneRegions.bed'],galaxyFn)
            GalaxyInterface.extractTrackManyBins(genome, tfTrackName+[tf], regSpec, binSpec, True, 'bed', False, False, extractedTfbsFileNamer.getDiskPath())
            print '<p>', extractedTfbsFileNamer.getLoadToHistoryLink('Binding sites of the TF %s' %tf, 'bed'), 'occurring within your%s regions of interest (bed-file).</p>' % (expansionStr)
Exemple #2
0
    def singleSimulation(self, numH0, numH1, replicateIndex, verbose=False):
        tests = MultipleTestCollection(numH0, numH1, self._maxNumSamples, self._h, self._fdrThreshold,self._a,self._b)
        tests.addSamples(self.NUM_SAMPLES_INITIALLY)
        while not tests.allTestsAreDetermined():            
            tests.addSamples(self.NUM_SAMPLES_PER_CHUNK)
            #if verbose:
                #print tests.getTotalNumSamples()
        #As sampling is now anyway over, we set fdrThreshold to a threshold used after computations are finished (i.e. affects final rejection/acception, but not stopping of samples)
        tests.setFdrThresholdAtAllCounters(self._postFdrThreshold)
        
        #print 'FINALLY, #samples: ',
        if self._galaxyFn is not None:
            if self._h is None:
                scheme = 'Basic'
            elif self._fdrThreshold is None:
                scheme = 'Sequential'
            else:
                scheme = 'McFdr'
            staticFile = GalaxyRunSpecificFile([scheme,str(numH1),str(replicateIndex),'PandQvals.txt'], self._galaxyFn)              
            tests.writeAllPandQVals(staticFile.getFile() )                        
            linkToRaw = staticFile.getLink('Raw p and q-vals') + ' under %s scheme with %i true H1, (replication %i)' % (scheme, numH1, replicateIndex)
            
            figStaticFile = GalaxyRunSpecificFile([scheme,str(numH1),str(replicateIndex),'PandQvals.png'], self._galaxyFn)
            figStaticFile.openRFigure()
            tests.makeAllPandQValsFigure()
            figStaticFile.closeRFigure()
            linkToFig = figStaticFile.getLink(' (p/q-figure) ') + '<br>'

            figNumSamplesStaticFile = GalaxyRunSpecificFile([scheme,str(numH1),str(replicateIndex),'NumSamples.png'], self._galaxyFn)
            figNumSamplesStaticFile.openRFigure()
            tests.makeNumSamplesFigure()
            figNumSamplesStaticFile.closeRFigure()
            linkToNumSamplesFig = figNumSamplesStaticFile.getLink(' (numSamples-figure) ') + '<br>'

            catalogStaticFile = GalaxyRunSpecificFile([str(numH1),'cat.html'], self._galaxyFn)
            catalogStaticFile.writeTextToFile(linkToRaw + linkToFig + linkToNumSamplesFig, mode='a')

                        
        #if verbose:
            #print sorted(tests.getFdrVals())
            #print 'NumS ign Below 0.2: ', sum([1 if t<0.2 else 0 for t in tests.getFdrVals()])
        #return tests.getTotalNumSamples(), tests.getTotalNumRejected()
        return tests.getTotalNumSamples(), tests.getTotalNumRejected(), tests.getClassificationSummaries()
    def execute(cls, choices, galaxyFn=None, username=''):
        from quick.application.GalaxyInterface import GalaxyInterface

        fileformat = choices[9];
        outputFile = open(galaxyFn, "w")
        
        if fileformat == "html":
            print GalaxyInterface.getHtmlBeginForRuns(galaxyFn)
            print GalaxyInterface.getHtmlForToggles(withRunDescription=False)
            t = calendar.timegm(time.gmtime())
            htmlfile = GalaxyRunSpecificFile(["css", str(t)], galaxyFn);


        genome = choices[0]
        track1 = choices[1].split(":")
        track2 = choices[2].split(":")
        tn1 = ExternalTrackManager.getPreProcessedTrackFromGalaxyTN(genome, track1)
        tn2 = ExternalTrackManager.getPreProcessedTrackFromGalaxyTN(genome, track2)

        compare = choices[3] != "Count individual SNP-differences in window"
        if choices[4] == "Classical MDS":
            mds = 0;
        elif choices[4] == "SMACOF":
            mds = 1;
        else:
            mds = 2;
        windowSize = int(choices[5])
        windowStep = int(choices[6])
        
        mcTreshold = int(choices[7])
        mcRuns = int(choices[8])

        outputFile.write("#seqid\tstart\tscore\tp\n")
        if fileformat == "html":
            text = "#seqid\tstart\tscore\tp\n";

	print "chrs:"+str(GenomeInfo.getChrList(genome))
        reg = "*"
        bins = "*"
        analysisDef = "Dummy: dummy name ([wStep=%g] [wSize=%s] [func=%s] [mds=%s] [mcT=%s] [mcR=%s])-> CategoryClusterSeparationStat" % (windowStep, windowSize, compare, mds, mcTreshold, mcRuns)
        userBinSource = GalaxyInterface._getUserBinSource(reg, bins, genome)
        result = GalaxyInterface.runManual([tn1, tn2], analysisDef, reg, bins, genome, galaxyFn=galaxyFn)
        for key in result.getAllRegionKeys():
            chrom = str(key).split(":")[0];
            r = result[key];
            if 'Result' not in r.keys():
                print "skipping chr:", chrom, r;
                continue;
            r = r['Result'];
            scores = r[0];
            stddev = r[1];
            for i in range(len(scores)):
                if scores[i] != 0:
                    pos = i*windowStep;
                    if fileformat == "tabular":
                        outputFile.write("%s\t%s\t%s\t%s\n" % (str(chrom), pos, str(scores[i]), str(stddev[i])))
                    else:
                        text += "%s\t%s\t%s\t%s\n" % (str(chrom), pos, str(scores[i]), str(stddev[i]));
        if fileformat == "html":
            htmlfile.writeTextToFile(text);
            print htmlfile.getLink("Result file");
            print GalaxyInterface.getHtmlEndForRuns()
        
        outputFile.close();
    def execute(choices, galaxyFn=None, username=''):
        '''Is called when execute-button is pushed by web-user.
        Should print output as HTML to standard out, which will be directed to a results page in Galaxy history.
        If getOutputFormat is anything else than HTML, the output should be written to the file with path galaxyFn.
        If needed, StaticFile can be used to get a path where additional files can be put (e.g. generated image files).
        choices is a list of selections made by web-user in each options box.
        '''
        import subprocess
        import os
        from quick.util.StaticFile import GalaxyRunSpecificFile
        from config.Config import HB_SOURCE_CODE_BASE_DIR
        from quick.application.ExternalTrackManager import ExternalTrackManager
        
        tempInStaticFile = GalaxyRunSpecificFile(['tempIn.txt'], galaxyFn)
        outStaticFile = GalaxyRunSpecificFile(['tempOut.fasta'], galaxyFn)
        #print os.getcwd()
        inFn = ExternalTrackManager.extractFnFromGalaxyTN( choices[0].split(':') )
        #print inFn
        tempOutFn = outStaticFile.getDiskPath(True)
        #print tempOutFn
        os.chdir(HB_SOURCE_CODE_BASE_DIR + '/third_party/nonpython')
        #print outStaticFile.getLink('output')
        markovOrder = int(choices[1])

        seqs = []     
        for line in open(inFn):
            if line.startswith('>'):
                seqs.append( [line[1:].strip(),[]] )
            else:
                seqs[-1][1].append(line.strip())
        for seq in seqs:
            seq[1] = ''.join(seq[1])
            
        pureSequence = ''.join( [seq[1] for seq in seqs])
        totalSeqLen = len(pureSequence)
        #pureSequence = ''.join([line.replace('\n','') for line in open(inFn) if not line.startswith('>')])
        tempInStaticFile.writeTextToFile(pureSequence)
        numSamples = int(choices[2])
        
        if numSamples>1:
            zipOutStatic = GalaxyRunSpecificFile(['randomFastas.zip'], galaxyFn)                
            zipOut = zipfile.ZipFile(zipOutStatic.getDiskPath(True),'w')
            
        for iteration in range(numSamples):
            if numSamples>1:
                fastaOutStatic = GalaxyRunSpecificFile(['random','s%s.fa'%iteration], galaxyFn)
                fastaOutFn = fastaOutStatic.getDiskPath(True)
            else:
                fastaOutFn = galaxyFn
            #fastaOutStatic = GalaxyRunSpecificFile(['random%s'%iteration], galaxyFn)
            #subprocess.call('javac',shell=True)
            #subprocess.call('javac',shell=False)
            #subprocess.call('javac MarkovModel.java',shell=True)
            subprocess.call('java MarkovModel %s %s %s >%s' % (tempInStaticFile.getDiskPath(), markovOrder, totalSeqLen, tempOutFn), shell=True )
            #subprocess.call('javac third_party/nonpython/MarkovModel.java')
            #subprocess.call('java third_party/nonpython/MarkovModel.java')
            pureMarkovSequence = open(tempOutFn).readline().strip()
            pmsIndex = 0
            fastaOutF = open(fastaOutFn,'w')
            for seq in seqs:
                fastaOutF.write('>'+seq[0]+os.linesep)
                nextPmsIndex = pmsIndex+len(seq[1])
                #seq.append(pureMarkovSequence[pmsIndex:nextPmsIndex])
                fastaOutF.write( pureMarkovSequence[pmsIndex:nextPmsIndex] + os.linesep)
                pmsIndex = nextPmsIndex
            fastaOutF.close()
            assert pmsIndex == totalSeqLen == len(pureMarkovSequence), (pmsIndex, totalSeqLen , len(pureMarkovSequence))
            if numSamples>1:
                #print 'Adding %s to archive' % fastaOutFn.split('/')[-1]
                zipOut.write(fastaOutFn, fastaOutFn.split('/')[-1])

        if numSamples>1:
            zipOut.close()
            print zipOutStatic.getLink('Zipped random sequences')
    def execute(cls, choices, galaxyFn=None, username=""):

        from quick.application.GalaxyInterface import GalaxyInterface

        fileformat = choices[6]
        outputFile = open(galaxyFn, "w")

        if fileformat == "html":
            print GalaxyInterface.getHtmlBeginForRuns(galaxyFn)
            print GalaxyInterface.getHtmlForToggles(withRunDescription=False)
            t = calendar.timegm(time.gmtime())
            htmlfile = GalaxyRunSpecificFile(["fet", str(t)], galaxyFn)

        genome = choices[0]
        track1 = choices[1].split(":")
        track2 = choices[2].split(":")
        tn1 = ExternalTrackManager.getPreProcessedTrackFromGalaxyTN(genome, track1)
        tn2 = ExternalTrackManager.getPreProcessedTrackFromGalaxyTN(genome, track2)

        windowSize = int(choices[3])
        windowStep = int(choices[4])
        percentile = float(choices[5])

        # results = {}

        # TODO: why this?
        # tr = Track(tn1)
        # tr.addFormatReq(TrackFormatReq(dense=False, allowOverlaps=True))

        outputFile.write("#seqid\tstart\tscore\tstddev\n")

        if fileformat == "html":
            text = "#seqid\tstart\tscore\tstddev\n"
        print "chrs:", str(GenomeInfo.getChrList(genome))
        reg = "*"
        bins = "*"
        analysisDef = "Dummy: dummy name ([wStep=%g] [wSize=%g] [percentile=%g])-> FisherExactScoreStat" % (
            windowStep,
            windowSize,
            percentile,
        )
        userBinSource = GalaxyInterface._getUserBinSource(reg, bins, genome)
        result = GalaxyInterface.runManual([tn1, tn2], analysisDef, reg, bins, genome, galaxyFn=galaxyFn)
        for key in result.getAllRegionKeys():
            chrom = str(key).split(":")[0]
            r = result[key]
            if "Result" not in r.keys():
                print "skipping chr:", chrom, r
                continue
            r = r["Result"]
            scores = r[0]
            stddev = r[1]
            for i in range(len(scores)):
                if scores[i] != 0:
                    pos = i * windowStep
                    # if choices[5] == "html":
                    # print "%s\t%s\t%s\t%s\n" % (str(chrom), pos, str(scores[i]), str(stddev[i]))
                    if fileformat == "tabular":
                        outputFile.write("%s\t%s\t%s\t%s\n" % (str(chrom), pos, str(scores[i]), str(stddev[i])))
                    else:
                        text += "%s\t%s\t%s\t%s\n" % (str(chrom), pos, str(scores[i]), str(stddev[i]))

        if fileformat == "html":
            htmlfile.writeTextToFile(text)
            print htmlfile.getLink("Result file")
            print GalaxyInterface.getHtmlEndForRuns()

        outputFile.close()
 def getGeneIdStaticFileWithContent(self):
     targetBins = self.getIntersectedReferenceBins()
     idFileNamer = GalaxyRunSpecificFile(['allGeneIds.txt'],self._galaxyFn)
     idFileNamer.writeTextToFile(os.linesep.join([str(bin.val).split('|')[0] for bin in targetBins]) + os.linesep)
     return idFileNamer
 def runBinaryClassificationSuiteEvaluation(self, algorithmNames, predictionTrackNames, 
                 answerTrackNames, regionTrackNames, overlapAnalysisDef, ROCanalysisDef):
     
     # Number of test sets and number of algorithms to evaluate
     nTestSets = len(answerTrackNames)
     nAlgorithms = len(predictionTrackNames)/nTestSets
     # Initialize list data structures
     resultFiles = []
     globalOverlapResults = []
     globalEqOverlapResults = []
     globalRocResults = []
     tmpAlgorithmNames = []
     number = 1000000000000
     
     statPlot = StatisticPlot()
     globalResultFile = GalaxyRunSpecificFile(['globalResults.html'], self._galaxyFn)
     
     # Initialize the global result lists, which collects localResults across all test sets
     for i in range(0, nAlgorithms):
         
         tmpAlgorithmNames.append(algorithmNames[i*nTestSets])
         globalOverlapResults.append(OrderedDict(zip(['Neither','Only1','Only2','Both'] , (0,0,0,0))))
         globalEqOverlapResults.append(OrderedDict(zip(['Neither','Only1','Only2','Both'] , (0,0,0,0))))
         globalRocResults.append({'Result': []})
     
     algorithmNames = tmpAlgorithmNames
     
     # For all test sets...
     for i in range(0, nTestSets):
         # Create a result file for this test set
         resultFile = GalaxyRunSpecificFile(['testset%d.html' % i], self._galaxyFn)
         localOverlapResults = []
         localRocResults = []
         answerTrackName = answerTrackNames[i]
         regionTrackName = regionTrackNames[i]
         
         # Evaluate the predictions for every algorithm for this test set
         for j in range(0, nAlgorithms):
             predictionTrackName = predictionTrackNames[(j*nTestSets)+i]
             
             # Run statistics for to compute overlap and ROC values
             localOverlapResult = self._runSingleStatistic(regionTrackName, overlapAnalysisDef,
                                 predictionTrackName, answerTrackName)
             
             if self._isRocCurveCompatible(predictionTrackName):
                 localRocResult = self._runSingleStatistic(regionTrackName, ROCanalysisDef,
                                                 predictionTrackName, answerTrackName)
             else:
                 localRocResult = None
             
             # Collect the local results and global add to global results
             localOverlapResults.append(localOverlapResult)
             localRocResults.append(localRocResult)
             
             globalOverlapResults[j]['Neither'] = globalOverlapResults[j]['Neither'] + localOverlapResult['Neither']
             globalOverlapResults[j]['Only1'] = globalOverlapResults[j]['Only1'] + localOverlapResult['Only1']
             globalOverlapResults[j]['Only2'] = globalOverlapResults[j]['Only2'] + localOverlapResult['Only2']
             globalOverlapResults[j]['Both'] = globalOverlapResults[j]['Both'] + localOverlapResult['Both']
             
             testSetLength = localOverlapResult['Neither'] + localOverlapResult['Only1'] + localOverlapResult['Only2'] + localOverlapResult['Both']
             
             globalEqOverlapResults[j]['Neither'] = globalEqOverlapResults[j]['Neither'] + long(localOverlapResult['Neither']*number)/testSetLength
             globalEqOverlapResults[j]['Only1'] = globalEqOverlapResults[j]['Only1'] + long(localOverlapResult['Only1']*number)/testSetLength
             globalEqOverlapResults[j]['Only2'] = globalEqOverlapResults[j]['Only2'] + long(localOverlapResult['Only2']*number)/testSetLength
             globalEqOverlapResults[j]['Both'] = globalEqOverlapResults[j]['Both'] + long(localOverlapResult['Both']*number)/testSetLength
             
             if localRocResult != None:
                 globalRocResults[j]['Result'] = globalRocResults[j]['Result'] + localRocResult['Result']
         
         # Create statistics for this test set
         localStatisticsLink = statPlot.createBinaryClassificationStatistics(i, 
                 algorithmNames, localOverlapResults, self._galaxyFn, 'Benchmark statistics')
         
         totalPositives, totalNegatives = self._getTotalNegativesAndPositivesFromOverlapResults(localOverlapResults)
     
         localRocCurveLink = statPlot.createROCCurve(i, algorithmNames, 
                         totalPositives, totalNegatives, localRocResults, self._galaxyFn)
         
         # Write statistical information for this test set to file
         resultFile.writeTextToFile('%s</br>%s' % (localStatisticsLink, localRocCurveLink), 'w')
         resultFiles.append(resultFile)
     
     # Create statistics for all test sets
     globalStatisticsLink = statPlot.createBinaryClassificationStatistics(nTestSets, 
                 algorithmNames, globalOverlapResults, self._galaxyFn, 
                 'Benchmark statistics (sum, longer test set has higher weight)')
     
     globalEqStatisticsLink = statPlot.createBinaryClassificationStatistics(nTestSets+1, 
                 algorithmNames, globalEqOverlapResults, self._galaxyFn, 
                 'Benchmark statistics (same weight for each test set)')
     
     totalPositives, totalNegatives = self._getTotalNegativesAndPositivesFromOverlapResults(globalOverlapResults)
     
     globalRocCurveLink = statPlot.createROCCurve(nTestSets, algorithmNames, 
                         totalPositives, totalNegatives, globalRocResults, self._galaxyFn)
     
     # Write statistical information for all test sets to file
     globalResultFile.writeTextToFile('%s</br>%s</br>%s' % (globalStatisticsLink, globalEqStatisticsLink, globalRocCurveLink), 'w')
     
     # Add all result files to a result list, and return
     results = []
     
     results.append(globalResultFile.getLink('Global results\n\n'))
     
     for i in range(0, len(resultFiles)):
         results.append(resultFiles[i].getLink('Test set %d' % (i+1)))
     
     return results
    def findTFsTargetingGenes(cls, genome, tfSource, ensembleGeneIdList,upFlankSize, downFlankSize, geneSource, galaxyFn):
        #galaxyFn = '/usit/insilico/web/lookalike/galaxy_dist-20090924-dev/database/files/003/dataset_3347.dat'
        #print 'overriding galaxyFN!: ', galaxyFn
        uniqueWebPath = getUniqueWebPath(extractIdFromGalaxyFn(galaxyFn))

        assert genome in ['mm9','hg18'] #other genomes not supported. TF id links do not specify genome for pre-selection of analysis
        
        #if tfSource == 'UCSC tfbs conserved':
        #    tfTrackName = ['Gene regulation','TFBS','UCSC prediction track']
        #else:
        #    raise
        tfTrackNameMappings = TfInfo.getTfTrackNameMappings(genome)
        tfTrackName = tfTrackNameMappings[tfSource]
                
        #Get gene track
        #targetGeneRegsTempFn = uniqueWebPath + os.sep + 'geneRegs.bed'
        #geneRegsTrackName = GenomeInfo.getStdGeneRegsTn(genome)
        #geneRegsFn = getOrigFn(genome, geneRegsTrackName, '.category.bed')
        #GalaxyInterface.getGeneTrackFromGeneList(genome, geneRegsTrackName, ensembleGeneIdList, targetGeneRegsTempFn )
        
        if not (upFlankSize == downFlankSize == 0):            
            unflankedGeneRegsTempFn = uniqueWebPath + os.sep + '_geneRegs.bed'
            flankedGeneRegsTempFn  = uniqueWebPath + os.sep + 'flankedGeneRegs.bed'
            geneRegsTrackName = GenomeInfo.getStdGeneRegsTn(genome)
            #geneRegsFn = getOrigFn(genome, geneRegsTrackName, '.category.bed')
            GalaxyInterface.getGeneTrackFromGeneList(genome, geneRegsTrackName, ensembleGeneIdList, unflankedGeneRegsTempFn )
            GalaxyInterface.expandBedSegments(unflankedGeneRegsTempFn, flankedGeneRegsTempFn, genome, upFlankSize, downFlankSize)
            #flankedGeneRegsExternalTN = ['external'] +galaxyId +  [flankedGeneRegsTempFn]
            regSpec, binSpec = 'file', flankedGeneRegsTempFn
        else:
            regSpec, binSpec = '__genes__', ','.join(ensembleGeneIdList)

        res = cls._runCategoryPointCount(genome, regSpec, binSpec, tfTrackName)

        #trackName1 = tfTrackName
        #
        #analysisDef = 'Category point count: Number of elements each category of track1 (with overlaps)'+\
        #          '[tf1:=SegmentToStartPointFormatConverter:]'+\
        #          '-> FreqByCatStat'
        ##assert len(ensembleGeneIdList)==1
        ##geneId = ensembleGeneIdList[0]
        #
        #print '<div class="debug">'        
        #userBinSource, fullRunArgs = GalaxyInterface._prepareRun(trackName1, None, analysisDef, regSpec, binSpec, genome)
        #res = AnalysisDefJob(analysisDef, trackName1, None, userBinSource, **fullRunArgs).run()
        #
        #print res        
        ##GalaxyInterface._viewResults([res], galaxyFn)
        #print '</div>'
        tfs = res.getResDictKeys()
        
        genesPlural = 's' if len(ensembleGeneIdList)>1 else ''
        tfsPlural = 's' if len(tfs)!=1 else ''
        print '<p>There are %i TF%s targeting your gene%s of interest (%s), using "%s" as source of TF occurrences.</p>' % (len(tfs), tfsPlural, genesPlural, ','.join(ensembleGeneIdList), tfSource)
        
        expansionStr = ' flanked' if not (upFlankSize == downFlankSize == 0) else ''                

        idHtmlFileNamer = GalaxyRunSpecificFile(['allTfIds.html'],galaxyFn)
        idHtmlFileNamer.writeTextToFile('<br>'.join(['<a href=%s/hyper?dbkey=%s&track1=%s&track2=>%s</a>'%(URL_PREFIX, genome, quote(':'.join(tfTrackName+[tf])), tf) for tf in tfs]))
        #idHtmlFileNamer.writeTextToFile('<br>'.join(['<a href=/hbdev/hyper?track1=%s&track2=>%s</a>'%( ':'.join(tfTrackName+[tf]), tf) for tf in tfs]))
        print '<p>', idHtmlFileNamer.getLink('Inspect html file'), ' of all TF IDs occurring 1 or more times within your%s gene region%s of interest, with each TF ID linking to analysis with this TF pre-selected.</p>' % (expansionStr, genesPlural)

        idFileNamer = GalaxyRunSpecificFile(['allTfIds.txt'],galaxyFn)
        idFileNamer.writeTextToFile(os.linesep.join(tfs) + os.linesep)
        print '<p>', idFileNamer.getLink('Inspect text file'), ' listing all TF IDs occurring 1 or more times within your%s gene region%s of interest.</p>' % (expansionStr, genesPlural)
    
        extractedTfbsFileNamer = GalaxyRunSpecificFile(['tfbsInGeneRegions.bed'],galaxyFn)
        GalaxyInterface.extractTrackManyBins(genome, tfTrackName, regSpec, binSpec, True, 'bed', False, False, extractedTfbsFileNamer.getDiskPath())
        print '<p>', extractedTfbsFileNamer.getLink('Inspect bed-file'), 'of all TF binding sites occurring within your%s gene region%s of interest.</p>' % (expansionStr, genesPlural)
        
        #idFile = idFileNamer.getFile()
        #idFile.write(', '.join([str(bin.val) for bin in targetBins if res[bin][resDictKey]>0]) + os.sep)
        #idFile.close()
        
        #print idFileNamer.getLink('Text file'), ' of TF IDs'
        
        #GalaxyInterface.run(tfTrackName, tcGeneRegsExternalTN, analysisDef, regSpec, binSpec, genome, galaxyFn)
        #GalaxyInterface.run(':'.join(tfTrackName), ':'.join(tcGeneRegsExternalTN), analysisDef, regSpec, binSpec, genome, galaxyFn)
                
 def executeReferenceTrack(cls, genome, tracks, track_names, clusterMethod, extra_option, distanceType, kmeans_alg, galaxyFn, regSpec, binSpec, numreferencetracks=None, refTracks=None, refFeatures=None, yesNo=None, howMany=None, upFlank=None, downFlank=None):
     from gold.application.RSetup import r
     jobFile = open(galaxyFn, 'w')
     print>>jobFile, 'PARAMS: ', dict(zip('genome, tracks, track_names, clusterMethod, extra_option, distanceType, kmeans_alg, regSpec, binSpec'.split(','), [repr(v)+'<br>'for v in [genome, tracks, track_names, clusterMethod, extra_option, distanceType, kmeans_alg, regSpec, binSpec]]))
     print>>jobFile, '<br><br>To run:<br>', '$clusterByReference', (genome, '$'.join([':'.join(t) for t in tracks]), ':'.join(track_names)  , clusterMethod, extra_option, distanceType, kmeans_alg, regSpec, binSpec,numreferencetracks, refTracks, refFeatures, yesNo, howMany, upFlank, downFlank), '<br><br>'
     print>>jobFile, 'signature of method clusterByReference:<br>', 'clusterByReference(genome, tracksStr, track_namesStr, clusterMethod, extra_option, distanceType, kmeans_alg, regSpec, binSpec, numreferencetracks=None, refTracks=None, refFeatures=None, yesNo=None, howMany=None, upFlank=None, downFlank=None)<br><br><br>'
     prettyTrackNames = [v[-1].replace("RoadMap_","").replace('.H3K4me1','') for v in tracks]
     
     #prettyTrackNames = [prettyPrintTrackName(v) for v in tracks]
     #paramNames = ['numreferencetracks', 'refTracks', 'refFeatures', 'yesNo', 'howMany', 'upFlank', 'downFlank']
     #for index, value in enumerate([numreferencetracks, refTracks, refFeatures, yesNo, howMany, upFlank, downFlank]):
     #    if value != None:
     #        print paramNames[index]+'='+ str(value),
     #print ''
     
     reftrack_names = [] #for use in creating the heatmap (as the column names)
     
     options = [] #for the case using refTracks, options contains feature for every refTrack, chosen by user.
     
     if numreferencetracks :
         for i in range(int(numreferencetracks)):
             ref_i = refTracks[i].split(":") #name of refTrack is being used to construct the name of expanded refTrack
             #refTracks.append(ref_i) #put the refTrack into refTracks list
             reftrack_names.append(ref_i[-1])
             temp_opt1 = 'ref'+str(i)+'feature'
             options+= [] if refFeatures[i] == None else [refFeatures[i]]
             if  yesNo[i] == "Yes" and howMany[i] != '--select--':
                 for expan in range(int(howMany[i])) :
                     reftrack_names.append(ref_i[-1]+'_'+ upFlank[i][expan])
                     upFlank = int(upFlank[i][expan])
                     downFlank = int(downFlank[i][expan])
                     withinRunId = str(i+1)+' expansion '+str(expan + 1)
                     outTrackName = GalaxyInterface.expandBedSegmentsFromTrackNameUsingGalaxyFn(ref_i, genome, upFlank, downFlank, galaxyFn, withinRunId) #outTrackName is unique for run
                     refTracks.append(outTrackName) #put the expanded track into refTracks list
                     options.append(options[-1]) # use chosen feature for refTack as valid feature for the expanded
         
         for index, track in enumerate(refTracks) :
             #print track, '<br>'
             if type(track) == str :
                 track = track.split(":")
             refTracks[index] = track[:-1] if track[-1] == "-- All subtypes --" else track
             
     if len(refTracks) > 0:
         
         trackFormats = [TrackInfo(genome,track).trackFormatName for track in tracks]
         
         trackLen = len(tracks)
         refLen = len(refTracks)
         f_matrix = zeros((trackLen, refLen))
         for i in range(trackLen):
             for j in range(refLen):
                 #print 'len(options), refLen, len(tracks), trackLen, len(trackFormats):', len(options), refLen, len(tracks), trackLen, len(trackFormats)
                 f_matrix[i,j] = cls.extract_feature(genome,tracks[i],refTracks[j],options[j], regSpec, binSpec, trackFormats[i])
         r.assign('track_names',prettyTrackNames) #use as track names, will be shown in clustering figure
         r.assign('reftrack_names',reftrack_names)
         r.assign('f_matrix',f_matrix)
         r.assign('distanceType',distanceType)
         r('row.names(f_matrix) <- track_names')
         r('colnames(f_matrix) <- reftrack_names')
        
         if clusterMethod == 'Hierarchical clustering' and extra_option != "--select--":
             figure = GalaxyRunSpecificFile(['cluster_tracks_result_figure.pdf'], galaxyFn)
             figurepath = figure.getDiskPath(True) 
             r.pdf(figurepath, 8,8)
             r('d <- dist(f_matrix, method=distanceType)')
             #print r.f_matrix
             #print r.d
             r_f_matrixFile = GalaxyRunSpecificFile(['f-matrix.robj'], galaxyFn)
             r.assign('f_matrix_fn', r_f_matrixFile.getDiskPath(True))
             r('dput(f_matrix, f_matrix_fn)')
             print>>jobFile, r_f_matrixFile.getLink('feature_matrix')
             
             r_f_matrixFile = GalaxyRunSpecificFile(['f-matrix.txt'], galaxyFn)
             r_f_matrixFile.writeTextToFile(str(f_matrix)+'\n\n'+str(r.d))
             print>>jobFile, r_f_matrixFile.getLink('r.f_matrix & r.d')
             r.assign('extra_option',extra_option)
             r('hr <- hclust(d, method=extra_option, members=NULL)')
             r('plot(hr, ylab="Distance", hang=-1)')
             
             r('dev.off()')
             print>>jobFile, figure.getLink('clustering results figure<br>')
         elif clusterMethod == 'K-means clustering' and extra_option != "--select--" and kmeans_alg != "--select--":
             textFile = GalaxyRunSpecificFile(['result_of_kmeans_clustering.txt'], galaxyFn)
             textFilePath = textFile.getDiskPath(True)
             extra_option = int(extra_option)
             r.assign('extra_option',extra_option)
             r.assign('kmeans_alg',kmeans_alg)
             r('hr <- kmeans(f_matrix,extra_option,algorithm=kmeans_alg)') #the number of cluster is gotten from clusterMethod+ tag, instead of 3 used here
            
             kmeans_output = open(textFilePath,'w')
             clusterSizes = r('hr$size') #size of every cluster
             
             withinSS = r('hr$withinss')
             clusters = array(r('hr$cluster')) #convert to array in order to handle the index more easily
             track_names = array(track_names) 
             for index1 in range(extra_option) : #extra_option actually the number of clusters
                 trackInCluster = [k for k,val in clusters.items() if val == index1]
                
                 print>>kmeans_output, 'Cluster %i(%s objects) : ' % (index1+1, str(clusterSizes[index1]))
                 for name in trackInCluster :
                     print>>kmeans_output, name
                    
                 print>>kmeans_output, 'Sum of square error for this cluster is : '+str(withinSS[index1])+'\n'
             kmeans_output.close()
             print>>jobFile, textFile.getLink('Detailed result of kmeans clustering <br>') 
        
         heatmap = GalaxyRunSpecificFile(['heatmap_figure.png'], galaxyFn)
         heatmap_path = heatmap.getDiskPath(True)
         r.png(heatmap_path, width=800, height=700)
         r('heatmap(f_matrix, col=cm.colors(256), Colv=NA, scale="none", xlab="", ylab="", margins=c(10,10))')#Features cluster tracks
         r('dev.off()')
        
         print>>jobFile, heatmap.getLink('heatmap figure <br>')
         cls.print_data(f_matrix, jobFile)
        
     else :
         print 'Have to specify a set of refTracks'
Exemple #10
0
    def MakeHeatmapFromTracks(cls, galaxyFn, **trKwArgs):
        tr1 = trKwArgs.get('tr1')
        tr2 = trKwArgs.get('tr2')
        tr3 = trKwArgs.get('tr3')
        tableRowEntryTemplate = """<tr><td>%s</td><td><a href="%s"><img src="%s" /></a></td></tr>"""
        #htmlTemplate = '''<head><link rel="stylesheet" type="text/css" href="image_zoom/styles/stylesheet.css" /><script language="javascript" type="text/javascript" src="image_zoom/scripts/mootools-1.2.1-core.js"></script><script language="javascript" type="text/javascript" src="image_zoom/scripts/mootools-1.2-more.js"></script><script language="javascript" type="text/javascript" src="image_zoom/scripts/ImageZoom.js"></script>
        #                <script language="javascript" type="text/javascript" >
        #                liste = %s;
        #                function point_it(event){
        #                        pos_x = event.offsetX?(event.offsetX):event.pageX-document.getElementById("zoomer_image").offsetLeft;
        #                        pos_y = event.offsetY?(event.offsetY):event.pageY-document.getElementById("zoomer_image").offsetTop;
        #                        pos_x = Math.floor(pos_x/10);
        #                        pos_y = Math.floor(pos_y/10);
        #                        alert("Hello World!, you clicked: " +liste[pos_y][pos_x]);
        #                }</script>
        #                </head><body><div id="container"><!-- Image zoom start --><div id="zoomer_big_container"></div><div id="zoomer_thumb">		<a href="%s" target="_blank" ><img src="%s" /></a></div><!-- Image zoom end --></div></body></html>'''
        javaScriptCode = '''
liste = %s;
    function point_it(event){
            pos_x = event.offsetX?(event.offsetX):event.pageX-document.getElementById("zoomer_image").offsetLeft;
            pos_y = event.offsetY?(event.offsetY):event.pageY-document.getElementById("zoomer_image").offsetTop;
            pos_x = Math.floor(pos_x/10);
            pos_y = Math.floor(pos_y/10);
            alert("Hello World!, you clicked: " +liste[pos_y][pos_x]);
    }
'''

        
        
        ResultDicts = [cls.getValuesFromBedFile(tr1,colorPattern=(1,0,0))]
        ResultDicts += [cls.getValuesFromBedFile(tr2,colorPattern=(0,1,0))] if tr2 else []
        ResultDicts += [cls.getValuesFromBedFile(tr3,colorPattern=(0,0,1))] if tr3 else []
    
    
        htmlTableContent = []
        resultDict = cls.syncResultDict(ResultDicts)
        
        for chrom, valList in resultDict.items():
            areaList = []
            #For doing recursive pattern picture
            posMatrix = cls.getResult(len(valList), 2,2)
            javaScriptList = [[0 for v in xrange(len(posMatrix[0])) ] for t in xrange(len(posMatrix))]
            rowLen = len(posMatrix[0])
            im = Image.new("RGB", (rowLen, len(posMatrix)), "white")
            for yIndex, row in enumerate(posMatrix):
                for xIndex, elem in enumerate(row):
                    im.putpixel((xIndex, yIndex), valList[elem])
                    region = yIndex*rowLen + xIndex
                    javaScriptList[yIndex][xIndex] = chrom+':'+str(elem*10)+'-'+str((elem+1)*10)+': '+repr([ round((255-v)/255.0 ,2 ) for v in valList[elem]])
                    #areaList.append(areaTemplate % (xIndex*10, yIndex*10, xIndex*11, yIndex*11, repr(valList[elem])))
            im2 = im.resize((len(posMatrix[0])*10, len(posMatrix)*10))
            
            origSegsFile = GalaxyRunSpecificFile([chrom+'smallPic.png'], galaxyFn)
            origSegsFn = origSegsFile.getDiskPath(True)
            bigSegsFile = GalaxyRunSpecificFile([chrom+'BigPic.png'], galaxyFn)
            bigSegsFn = bigSegsFile.getDiskPath(True)
            
            im.save(origSegsFn)
            im2.save(bigSegsFn)
            
            
            #open('Recursive/'+chrom+'Zooming.html','w').write(htmlTemplate % (str(javaScriptList), chrom+'Big.png',chrom+'.png'))
            core = HtmlCore()
            core.begin( extraJavaScriptFns=['mootools-1.2.1-core.js', 'mootools-1.2-more.js', 'ImageZoom.js'], extraJavaScriptCode=javaScriptCode % str(javaScriptList), extraCssFns=['image_zoom.css'] )
            core.styleInfoBegin(styleId='container')
            core.styleInfoBegin(styleId='zoomer_big_container')
            core.styleInfoEnd()
            core.styleInfoBegin(styleId='zoomer_thumb')
            core.link(url=bigSegsFile.getURL(), text=str(HtmlCore().image(origSegsFile.getURL())), popup=True)
            core.styleInfoEnd()
            core.styleInfoEnd()
            core.end()
            htmlfile = GalaxyRunSpecificFile([chrom+'.html'], galaxyFn)
            htmlfile.writeTextToFile(str(core))
            htmlTableContent.append(tableRowEntryTemplate % (chrom, htmlfile.getURL(), origSegsFile.getURL()))
            
            #return str(core)  #htmlTemplate % (str(javaScriptList), bigSegsFn, origSegsFn)
        
            #######
            
            # FOr doing normal picture
            #columns = int(round((len(valList)/1000)+0.5))
            #im = Image.new("RGB", (1000, columns), "white")        
            #y=-1    
            #for index, valuTuple in enumerate(valList):
            #    x = index%1000
            #
            #    if x == 0:
            #        y+=1
            #    try:
            #        im.putpixel((x, y), valuTuple)
            #    except:
            #        pass
            #im.save(chrom+'.png')
            #htmlTableContent.append(tableRowEntryTemplate % (chrom, chrom+'.png'))
        htmlPageTemplate = """<html><body><table border="1">%s</table></body></html>"""
        return htmlPageTemplate % ('\n'.join(htmlTableContent))