def findOverrepresentedTFsFromGeneSet(genome, tfSource, ensembleGeneIdList,upFlankSize, downFlankSize, geneSource, galaxyFn):
        #galaxyFn = '/usit/insilico/web/lookalike/galaxy_dist-20090924-dev/database/files/003/dataset_3347.dat'
        #print 'overriding galaxyFN!: ', galaxyFn
        galaxyId = extractIdFromGalaxyFn(galaxyFn)
        uniqueWebPath = getUniqueWebPath(extractIdFromGalaxyFn(galaxyFn))

        assert genome == 'hg18'
        
        tfTrackNameMappings = TfInfo.getTfTrackNameMappings(genome)
        tfTrackName = tfTrackNameMappings[tfSource]
        
        
        #Get gene track
        assert geneSource == 'Ensembl'
        targetGeneRegsTempFn = uniqueWebPath + os.sep + 'geneRegs.bed'
        geneRegsTrackName = GenomeInfo.getStdGeneRegsTn(genome)
        geneRegsFn = getOrigFn(genome, geneRegsTrackName, '.category.bed')
        GalaxyInterface.getGeneTrackFromGeneList(genome, geneRegsTrackName, ensembleGeneIdList, targetGeneRegsTempFn )
        
        assert upFlankSize == downFlankSize == 0 #Should instead extend regions to include flanks
        
        tcGeneRegsTempFn = uniqueWebPath + os.sep + 'tcGeneRegs.targetcontrol.bedgraph'
        #Think this will be okay, subtraction not necessary as targets are put first:
        controlGeneRegsTempFn = geneRegsFn
        #print targetGeneRegsTempFn, controlGeneRegsTempFn, tcGeneRegsTempFn
        GalaxyInterface.combineToTargetControl(targetGeneRegsTempFn, controlGeneRegsTempFn, tcGeneRegsTempFn)
        
        #tcGeneRegsExternalTN = ['external'] +galaxyId +  [tcGeneRegsTempFn]
        tcGeneRegsExternalTN = ExternalTrackManager.createStdTrackName(galaxyId, 'tempTc')
        
        #tcGeneRegsExternalTN = ['external'] +targetGalaxyId +  [tcGeneRegsTempFn]
        #tcGeneRegsExternalTN = ['galaxy', externalId, tcGeneRegsTempFn]
        
        targetGeneRegsExternalTN = ExternalTrackManager.createStdTrackName(galaxyId, 'tempTc', '1')
        controlGeneRegsExternalTN = ExternalTrackManager.createStdTrackName(galaxyId, 'tempTc', '0')
        
        #pre-process
        print 'Pre-processing file: %s, with trackname: %s ' % (tcGeneRegsTempFn, tcGeneRegsExternalTN)
        ExternalTrackManager.preProcess(tcGeneRegsTempFn, tcGeneRegsExternalTN, 'targetcontrol.bedgraph',genome)
        print 'Pre-processing TN: ', targetGeneRegsExternalTN
        ExternalTrackManager.preProcess(targetGeneRegsTempFn, targetGeneRegsExternalTN, 'bed',genome)
        print 'Pre-processing TN: ', controlGeneRegsExternalTN
        ExternalTrackManager.preProcess(controlGeneRegsTempFn, controlGeneRegsExternalTN, 'bed',genome)
        
        #print tcGeneRegsExternalTN
        trackName1, trackName2 = tfTrackName, tcGeneRegsExternalTN
        
        analysisDef = 'Categories differentially located in targets?: Which categories of track1-points fall more inside case than control track2-segments? [rawStatistic:=PointCountInsideSegsStat:]' +\
                  '[tf1:=SegmentToStartPointFormatConverter:] [tf2:=TrivialFormatConverter:]' +\
                  '-> DivergentRowsInCategoryMatrixStat'
        regSpec, binSpec = '*','*'
        
        #print 'skipping preproc!!'
        #ExternalTrackManager.preProcess(tcGeneRegsExternalTN[-1], tcGeneRegsExternalTN, 'targetcontrol.bedgraph', genome)
        #ExternalTrackManager.preProcess(targetGeneRegsTempFn, targetGeneRegsExternalTN, 'bed', genome)
        
        GalaxyInterface.runManual([trackName1, trackName2], analysisDef, regSpec, binSpec, genome, printResults=True, printHtmlWarningMsgs=False)
Example #2
0
 def execute(self, imageFn, html, dpi = None):
     galaxyId = extractIdFromGalaxyFn(html)
     outDir = getUniqueWebPath(galaxyId)
     try:
         os.makedirs(outDir + '/tiles')
     except OSError, e:
         if e.errno == errno.EEXIST:
             print e
         else:
             raise e
 def __init__(self, name, id, mapId, batchid=0):
     self._name = name
     self._mapId = mapId
     
     colldir =  ('%.3f' % (int(id)/1000 *1.0 / 1000))[2:]
     
     self._resultsFilesDir = '/'.join([RESULTS_FILES_PATH, colldir])
     self._resultsStaticDir = getUniqueWebPath([colldir, id, batchid])
     self._googleMapsDir = '/'.join([STATIC_PATH, 'maps', name])
     self._googleMapsCommonDir = '/'.join([STATIC_PATH, 'maps', 'common'])
     self._googleMapsCommonMapIdDir = '/'.join([self._googleMapsCommonDir, mapId])
    def findTFsOccurringInRegions(cls, genome, tfSource, regionsBedFn, upFlankSize, downFlankSize, galaxyFn):
        uniqueWebPath = getUniqueWebPath(extractIdFromGalaxyFn(galaxyFn))
        #assert genome == 'hg18' #other genomes not supported. TF id links do not specify genome for pre-selection of analysis
        
        tfTrackNameMappings = TfInfo.getTfTrackNameMappings(genome)
        assert tfTrackNameMappings != {}, 'No TF info for genome: %s' % genome
        
        tfTrackName = tfTrackNameMappings[tfSource]
                
        if (upFlankSize == downFlankSize == 0):
            flankedRegionsFn = regionsBedFn
        else:
            flankedRegionsFn= uniqueWebPath + os.sep + 'flankedRegs.bed'
            GalaxyInterface.expandBedSegments(regionsBedFn, flankedRegionsFn, genome, upFlankSize, downFlankSize)

        regSpec, binSpec = 'bed', flankedRegionsFn
        res = cls._runCategoryPointCount(genome, regSpec, binSpec, tfTrackName)

        tfNames = res.getResDictKeys()
        #print 'RES: ', res.getGlobalResult()[tfNames[0]], type(res.getGlobalResult()[tfNames[0]])
        import third_party.safeshelve as safeshelve
        pwm2tfids = safeshelve.open(os.sep.join([HB_SOURCE_CODE_BASE_DIR,'data','pwm2TFids.shelf']), 'r')
        tf2class = safeshelve.open(os.sep.join([HB_SOURCE_CODE_BASE_DIR,'data','TfId2Class.shelf']), 'r')
        pwmName2id= safeshelve.open(os.sep.join([HB_SOURCE_CODE_BASE_DIR,'data','pwmName2id.shelf']), 'r')
        #print tfNames[0],tfNames[1], ' VS ', pwm2tfids.keys()[0], len(pwm2tfids)
        #tfs = list(reversed(sorted([(res.getGlobalResult()[tf], tf, '%s (%i hits (class %s))'%(tf, res.getGlobalResult()[tf]), '/'.join([tf2class[x] for x in pwm2tfids[tf]]) ) for tf in tfNames]))) #num hits, tfName, tfTextInclHits
        tfs = list(reversed(sorted([(res.getGlobalResult()[tf], tf, '%s (%i hits )'%(tf, res.getGlobalResult()[tf]) + \
                                     (' (class: %s)'%'/'.join(set([str(tf2class.get(x)) for x in pwm2tfids[pwmName2id[tf]] if x in tf2class]))\
                                      if (tf in pwmName2id and pwmName2id[tf] in pwm2tfids and any([x in tf2class for x in pwm2tfids[pwmName2id[tf]]]))\
                                    else '') ) \
                                    for tf in tfNames])) ) #num hits, tfName, tfTextInclHits
        
        tfsPlural = 's' if len(tfs)!=1 else ''
        print '<p>There are %i TF%s targeting your regions of interest, using "%s" as source of TF occurrences.</p>' % (len(tfs), tfsPlural, tfSource)
        
        expansionStr = ' flanked' if not (upFlankSize == downFlankSize == 0) else ''                

        idHtmlFileNamer = GalaxyRunSpecificFile(['allTfIds.html'],galaxyFn)
        idHtmlFileNamer.writeTextToFile('<br>'.join(['<a href=/hbdev/hyper?track1=%s&track2=>%s</a>'%( quote(':'.join(tfTrackName+[tf[1]])), tf[2]) for tf in tfs]))
        print '<p>', idHtmlFileNamer.getLink('Inspect html file'), ' of all TF IDs occurring 1 or more times within your%s regions of interest, with each TF ID linking to analysis with this TF pre-selected.</p>' % (expansionStr)

        idFileNamer = GalaxyRunSpecificFile(['allTfIds.txt'],galaxyFn)
        idFileNamer.writeTextToFile(os.linesep.join([tf[2] for tf in tfs]) + os.linesep)
        print '<p>', idFileNamer.getLink('Inspect text file'), ' listing all TF IDs occurring 1 or more times within your%s regions of interest.</p>' % (expansionStr)
    
        extractedTfbsFileNamer = GalaxyRunSpecificFile(['tfbsInGeneRegions.bed'],galaxyFn)
        GalaxyInterface.extractTrackManyBins(genome, tfTrackName, regSpec, binSpec, True, 'bed', False, False, extractedTfbsFileNamer.getDiskPath(), True)
        print '<p>', extractedTfbsFileNamer.getLoadToHistoryLink('Inspect bed-file'), 'of all TF binding sites occurring within your%s regions of interest.</p>' % (expansionStr)

        for dummy,tf,dummy2 in tfs:            
            extractedTfbsFileNamer = GalaxyRunSpecificFile([tf+'_tfbsInGeneRegions.bed'],galaxyFn)
            GalaxyInterface.extractTrackManyBins(genome, tfTrackName+[tf], regSpec, binSpec, True, 'bed', False, False, extractedTfbsFileNamer.getDiskPath())
            print '<p>', extractedTfbsFileNamer.getLoadToHistoryLink('Binding sites of the TF %s' %tf, 'bed'), 'occurring within your%s regions of interest (bed-file).</p>' % (expansionStr)
 def __init__(self, resultsList, galaxyFn):
     self._viewers = []
     self._resultsList = resultsList
     self._galaxyFn = galaxyFn
     galaxyId = extractIdFromGalaxyFn(galaxyFn)
     uniqueWebPath = getUniqueWebPath(galaxyId)
     baseDir = uniqueWebPath
     
     if len(resultsList) > 1:
         self._viewers.append(MultiBatchResultsViewer(resultsList, baseDir))
     for i,results in enumerate(resultsList):
         self._viewers.append(ResultsViewer(results, os.sep.join([baseDir,str(i)]) ))
Example #6
0
    def saveMap(self, args):
        if args['name'].isdigit():
            galaxyId = int(args['name'])
            outDir = getUniqueWebPath(['%03d' % (galaxyId / 1000), str(galaxyId)])
        else:
            outDir = '/'.join([GoogleMapsInterface.BASE_DIR, args['name']])

        try:
            os.makedirs(outDir + '/cookies')
        except OSError, e:
            if e.errno == errno.EEXIST:
                pass
            else:
                raise e
    def findTFsTargetingGenes(cls, genome, tfSource, ensembleGeneIdList,upFlankSize, downFlankSize, geneSource, galaxyFn):
        #galaxyFn = '/usit/insilico/web/lookalike/galaxy_dist-20090924-dev/database/files/003/dataset_3347.dat'
        #print 'overriding galaxyFN!: ', galaxyFn
        uniqueWebPath = getUniqueWebPath(extractIdFromGalaxyFn(galaxyFn))

        assert genome in ['mm9','hg18'] #other genomes not supported. TF id links do not specify genome for pre-selection of analysis
        
        #if tfSource == 'UCSC tfbs conserved':
        #    tfTrackName = ['Gene regulation','TFBS','UCSC prediction track']
        #else:
        #    raise
        tfTrackNameMappings = TfInfo.getTfTrackNameMappings(genome)
        tfTrackName = tfTrackNameMappings[tfSource]
                
        #Get gene track
        #targetGeneRegsTempFn = uniqueWebPath + os.sep + 'geneRegs.bed'
        #geneRegsTrackName = GenomeInfo.getStdGeneRegsTn(genome)
        #geneRegsFn = getOrigFn(genome, geneRegsTrackName, '.category.bed')
        #GalaxyInterface.getGeneTrackFromGeneList(genome, geneRegsTrackName, ensembleGeneIdList, targetGeneRegsTempFn )
        
        if not (upFlankSize == downFlankSize == 0):            
            unflankedGeneRegsTempFn = uniqueWebPath + os.sep + '_geneRegs.bed'
            flankedGeneRegsTempFn  = uniqueWebPath + os.sep + 'flankedGeneRegs.bed'
            geneRegsTrackName = GenomeInfo.getStdGeneRegsTn(genome)
            #geneRegsFn = getOrigFn(genome, geneRegsTrackName, '.category.bed')
            GalaxyInterface.getGeneTrackFromGeneList(genome, geneRegsTrackName, ensembleGeneIdList, unflankedGeneRegsTempFn )
            GalaxyInterface.expandBedSegments(unflankedGeneRegsTempFn, flankedGeneRegsTempFn, genome, upFlankSize, downFlankSize)
            #flankedGeneRegsExternalTN = ['external'] +galaxyId +  [flankedGeneRegsTempFn]
            regSpec, binSpec = 'file', flankedGeneRegsTempFn
        else:
            regSpec, binSpec = '__genes__', ','.join(ensembleGeneIdList)

        res = cls._runCategoryPointCount(genome, regSpec, binSpec, tfTrackName)

        #trackName1 = tfTrackName
        #
        #analysisDef = 'Category point count: Number of elements each category of track1 (with overlaps)'+\
        #          '[tf1:=SegmentToStartPointFormatConverter:]'+\
        #          '-> FreqByCatStat'
        ##assert len(ensembleGeneIdList)==1
        ##geneId = ensembleGeneIdList[0]
        #
        #print '<div class="debug">'        
        #userBinSource, fullRunArgs = GalaxyInterface._prepareRun(trackName1, None, analysisDef, regSpec, binSpec, genome)
        #res = AnalysisDefJob(analysisDef, trackName1, None, userBinSource, **fullRunArgs).run()
        #
        #print res        
        ##GalaxyInterface._viewResults([res], galaxyFn)
        #print '</div>'
        tfs = res.getResDictKeys()
        
        genesPlural = 's' if len(ensembleGeneIdList)>1 else ''
        tfsPlural = 's' if len(tfs)!=1 else ''
        print '<p>There are %i TF%s targeting your gene%s of interest (%s), using "%s" as source of TF occurrences.</p>' % (len(tfs), tfsPlural, genesPlural, ','.join(ensembleGeneIdList), tfSource)
        
        expansionStr = ' flanked' if not (upFlankSize == downFlankSize == 0) else ''                

        idHtmlFileNamer = GalaxyRunSpecificFile(['allTfIds.html'],galaxyFn)
        idHtmlFileNamer.writeTextToFile('<br>'.join(['<a href=%s/hyper?dbkey=%s&track1=%s&track2=>%s</a>'%(URL_PREFIX, genome, quote(':'.join(tfTrackName+[tf])), tf) for tf in tfs]))
        #idHtmlFileNamer.writeTextToFile('<br>'.join(['<a href=/hbdev/hyper?track1=%s&track2=>%s</a>'%( ':'.join(tfTrackName+[tf]), tf) for tf in tfs]))
        print '<p>', idHtmlFileNamer.getLink('Inspect html file'), ' of all TF IDs occurring 1 or more times within your%s gene region%s of interest, with each TF ID linking to analysis with this TF pre-selected.</p>' % (expansionStr, genesPlural)

        idFileNamer = GalaxyRunSpecificFile(['allTfIds.txt'],galaxyFn)
        idFileNamer.writeTextToFile(os.linesep.join(tfs) + os.linesep)
        print '<p>', idFileNamer.getLink('Inspect text file'), ' listing all TF IDs occurring 1 or more times within your%s gene region%s of interest.</p>' % (expansionStr, genesPlural)
    
        extractedTfbsFileNamer = GalaxyRunSpecificFile(['tfbsInGeneRegions.bed'],galaxyFn)
        GalaxyInterface.extractTrackManyBins(genome, tfTrackName, regSpec, binSpec, True, 'bed', False, False, extractedTfbsFileNamer.getDiskPath())
        print '<p>', extractedTfbsFileNamer.getLink('Inspect bed-file'), 'of all TF binding sites occurring within your%s gene region%s of interest.</p>' % (expansionStr, genesPlural)
        
        #idFile = idFileNamer.getFile()
        #idFile.write(', '.join([str(bin.val) for bin in targetBins if res[bin][resDictKey]>0]) + os.sep)
        #idFile.close()
        
        #print idFileNamer.getLink('Text file'), ' of TF IDs'
        
        #GalaxyInterface.run(tfTrackName, tcGeneRegsExternalTN, analysisDef, regSpec, binSpec, genome, galaxyFn)
        #GalaxyInterface.run(':'.join(tfTrackName), ':'.join(tcGeneRegsExternalTN), analysisDef, regSpec, binSpec, genome, galaxyFn)
                
 def _getDiskPathForFiles(galaxyFn):
     galaxyId = extractIdFromGalaxyFn(galaxyFn)
     return getUniqueWebPath(galaxyId)