Esempio n. 1
0
    def findTFsOccurringInRegions(cls, genome, tfSource, regionsBedFn,
                                  upFlankSize, downFlankSize, galaxyFn):
        uniqueWebPath = GalaxyRunSpecificFile([], galaxyFn).getDiskPath()
        #assert genome == 'hg18' #other genomes not supported. TF id links do not specify genome for pre-selection of analysis

        tfTrackNameMappings = TfInfo.getTfTrackNameMappings(genome)
        assert tfTrackNameMappings != {}, 'No TF info for genome: %s' % genome

        tfTrackName = tfTrackNameMappings[tfSource]

        if (upFlankSize == downFlankSize == 0):
            flankedRegionsFn = regionsBedFn
        else:
            flankedRegionsFn = uniqueWebPath + os.sep + 'flankedRegs.bed'
            GalaxyInterface.expandBedSegments(regionsBedFn, flankedRegionsFn,
                                              genome, upFlankSize,
                                              downFlankSize)

        regSpec, binSpec = 'bed', flankedRegionsFn
        res = cls._runCategoryPointCount(genome, regSpec, binSpec, tfTrackName)

        tfNames = res.getResDictKeys()
        #print 'RES: ', res.getGlobalResult()[tfNames[0]], type(res.getGlobalResult()[tfNames[0]])
        pwm2tfids = safeshelve.open(
            os.sep.join([HB_SOURCE_CODE_BASE_DIR, 'data', 'pwm2TFids.shelf']),
            'r')
        tf2class = safeshelve.open(
            os.sep.join([HB_SOURCE_CODE_BASE_DIR, 'data', 'TfId2Class.shelf']),
            'r')
        pwmName2id = safeshelve.open(
            os.sep.join([HB_SOURCE_CODE_BASE_DIR, 'data', 'pwmName2id.shelf']),
            'r')
        #print tfNames[0],tfNames[1], ' VS ', pwm2tfids.keys()[0], len(pwm2tfids)
        #tfs = list(reversed(sorted([(res.getGlobalResult()[tf], tf, '%s (%i hits (class %s))'%(tf, res.getGlobalResult()[tf]), '/'.join([tf2class[x] for x in pwm2tfids[tf]]) ) for tf in tfNames]))) #num hits, tfName, tfTextInclHits
        tfs = list(reversed(sorted([(res.getGlobalResult()[tf], tf, '%s (%i hits )'%(tf, res.getGlobalResult()[tf]) + \
                                     (' (class: %s)'%'/'.join(set([str(tf2class.get(x)) for x in pwm2tfids[pwmName2id[tf]] if x in tf2class]))\
                                      if (tf in pwmName2id and pwmName2id[tf] in pwm2tfids and any([x in tf2class for x in pwm2tfids[pwmName2id[tf]]]))\
                                    else '') ) \
                                    for tf in tfNames])) ) #num hits, tfName, tfTextInclHits

        tfsPlural = 's' if len(tfs) != 1 else ''
        print '<p>There are %i TF%s targeting your regions of interest, using "%s" as source of TF occurrences.</p>' % (
            len(tfs), tfsPlural, tfSource)

        expansionStr = ' flanked' if not (
            upFlankSize == downFlankSize == 0) else ''

        idHtmlFileNamer = GalaxyRunSpecificFile(['allTfIds.html'], galaxyFn)
        idHtmlFileNamer.writeTextToFile('<br>'.join([
            '<a href=/hbdev/hyper?track1=%s&track2=>%s</a>' %
            (quote(':'.join(tfTrackName + [tf[1]])), tf[2]) for tf in tfs
        ]))
        print '<p>', idHtmlFileNamer.getLink(
            'Inspect html file'
        ), ' of all TF IDs occurring 1 or more times within your%s regions of interest, with each TF ID linking to analysis with this TF pre-selected.</p>' % (
            expansionStr)

        idFileNamer = GalaxyRunSpecificFile(['allTfIds.txt'], galaxyFn)
        idFileNamer.writeTextToFile(
            os.linesep.join([tf[2] for tf in tfs]) + os.linesep)
        print '<p>', idFileNamer.getLink(
            'Inspect text file'
        ), ' listing all TF IDs occurring 1 or more times within your%s regions of interest.</p>' % (
            expansionStr)

        extractedTfbsFileNamer = GalaxyRunSpecificFile(
            ['tfbsInGeneRegions.bed'], galaxyFn)
        GalaxyInterface.extractTrackManyBins(
            genome, tfTrackName, regSpec, binSpec, True, 'bed', False, False,
            extractedTfbsFileNamer.getDiskPath(), True)
        print '<p>', extractedTfbsFileNamer.getLoadToHistoryLink(
            'Inspect bed-file'
        ), 'of all TF binding sites occurring within your%s regions of interest.</p>' % (
            expansionStr)

        for dummy, tf, dummy2 in tfs:
            extractedTfbsFileNamer = GalaxyRunSpecificFile(
                [tf + '_tfbsInGeneRegions.bed'], galaxyFn)
            GalaxyInterface.extractTrackManyBins(
                genome, tfTrackName + [tf], regSpec, binSpec, True, 'bed',
                False, False, extractedTfbsFileNamer.getDiskPath())
            print '<p>', extractedTfbsFileNamer.getLoadToHistoryLink(
                'Binding sites of the TF %s' % tf, 'bed'
            ), 'occurring within your%s regions of interest (bed-file).</p>' % (
                expansionStr)
Esempio n. 2
0
    def execute(cls, choices, galaxyFn=None, username=''):
        genome = choices.genome
         

        from quick.multitrack.MultiTrackCommon import getGSuiteDataFromGalaxyTN
        trackTitles, refTrackNameList, genome = getGSuiteDataFromGalaxyTN(choices.gsuite)
        
        queryTrackName = ExternalTrackManager.extractFnFromGalaxyTN(choices.targetTrack)
        if choices.isBasic:
            suffix = ExternalTrackManager.extractFileSuffixFromGalaxyTN(choices.targetTrack, False)
            regSpec = suffix
            binSpec = queryTrackName
        else:
            regSpec, binSpec = UserBinMixin.getRegsAndBinsSpec(choices)
        
        #targetTrack = choices.targetTrack.split(':')
        #targetTrackTitle = targetTrack[-1]
        #print targetTrackTitle
        #
        #binSpec = targetTrackTitle
        #Phenotype and disease associations:Assorted experiments:Virus integration, HPV specific, Kraus and Schmitz, including 50kb flanks

        from gold.gsuite.GSuiteConstants import TITLE_COL
        from gold.gsuite.GSuite import GSuite
        from proto.hyperbrowser.StaticFile import GalaxyRunSpecificFile
        from gold.gsuite.GSuiteEditor import selectColumnsFromGSuite
        staticFile=[]
        
        results = []
        for refTrack in refTrackNameList:
            analysisDef = '-> ProportionCountStat' #ProportionCountStat #CountStat
            res = GalaxyInterface.runManual([refTrack], analysisDef, regSpec, binSpec, genome, username=username, galaxyFn=galaxyFn, printRunDescription=False, printResults=False, printProgress=False)
            segCoverageProp = [res[seg]['Result'] for seg in res.getAllRegionKeys()]
            results.append(segCoverageProp)
            
            regFileNamer = GalaxyRunSpecificFile(refTrack, galaxyFn)
            staticFile.append([regFileNamer.getLink('Download bed-file'), regFileNamer.getLoadToHistoryLink('Download bed-file to History')])

        refGSuite = getGSuiteFromGalaxyTN(choices.gsuite)

        if TITLE_COL == choices.selectColumns:
            selected = trackTitles
        else:
            selected = refGSuite.getAttributeValueList(choices.selectColumns)

        yAxisNameOverMouse=[]
        metadataAll =[]

        for x in range(0, len(selected)):
            if selected[x] == None:
                yAxisNameOverMouse.append(str(trackTitles[x]) + ' --- ' + 'None')
            else:
                if TITLE_COL == choices.selectColumns:
                    yAxisNameOverMouse.append(selected[x].replace('\'', '').replace('"', ''))
                else:
                    metadata = str(selected[x].replace('\'', '').replace('"', ''))
                    yAxisNameOverMouse.append(str(trackTitles[x]) + ' --- ' + metadata)
                    metadataAll.append(metadata)

        colorListForYAxisNameOverMouse = []
        if len(metadataAll) > 0:
            import quick.webtools.restricted.visualization.visualizationGraphs as vg
            cList = vg.colorList().fullColorList()
            uniqueCList = list(set(metadataAll))

            for m in metadataAll:
                colorListForYAxisNameOverMouse.append(cList[uniqueCList.index(m)])

        #startEnd - order in res
        startEndInterval = []
        startEnd = []
        i=0
        

        extraX=[]
        rowLabel = []
        for ch in res.getAllRegionKeys():
            rowLabel.append(str(ch.chr) + ":" + str(ch.start) + "-" + str(ch.end) + str(' (Pos)' if ch.strand else ' (Neg)'))
            if not i==0 and not i==len(res.getAllRegionKeys())-1:
                start = ch.start
                if start-end > 0:
                    startEnd.append(start-end)
                else:
                    startEnd.append('null')
                    extraX.append("""{ color: 'orange', width: 5, value: '""" + str(i-0.5) + """' }""")
                startEndInterval.append(ch.end - ch.start)
            else:
                startEndInterval.append(ch.end - ch.start)
            end = ch.end
            i+=1

        extraXAxis='plotLines: [ '
        extraXAxis = extraXAxis + ",".join(extraX)
        extraXAxis = extraXAxis + """ ],  """

        #rowLabel = res.getAllRegionKeys()
        #rowLabel = [str(x) for x in rowLabel]
        

        import quick.webtools.restricted.visualization.visualizationPlots as vp

        htmlCore = HtmlCore()
        htmlCore.begin()
        htmlCore.divBegin(divId='results-page')
        htmlCore.divBegin(divClass='results-section')
        htmlCore.divBegin('plotDiv')
        htmlCore.line(vp.addJSlibs())
        htmlCore.line(vp.useThemePlot())
        htmlCore.line(vp.addJSlibsExport())
        htmlCore.line(vp.axaddJSlibsOverMouseAxisisPopup())
        #vp.addGuideline(htmlCore)
        htmlCore.line(vp._addGuidelineV1())

        htmlCore.line(vp.addJSlibsHeatmap())

        from config.Config import DATA_FILES_PATH

        from proto.StaticFile import StaticFile, GalaxyRunSpecificFile

        #sf = GalaxyRunSpecificFile(['result.txt'], galaxyFn)
        #outFile = sf.getDiskPath(ensurePath=True)

        htmlCore.divBegin()
        writeFile = open(
            cls.makeHistElement(galaxyExt='tabular',
                                title='result'), 'w')
        # htmlCore.link('Get all results', sf.getURL())
        htmlCore.divEnd()

        i = 0

        writeFile.write('Track' + '\t' + '\t'.join(rowLabel)+ '\n')
        for rList in results:
            writeFile.write(str(yAxisNameOverMouse[i]) + '\t' + '\t'.join([str(r) for r in rList]) + '\n')
            i+=1




        fileOutput = GalaxyRunSpecificFile(['heatmap.png'],
                                           galaxyFn)
        ensurePathExists(fileOutput.getDiskPath())

        fileOutputPdf = GalaxyRunSpecificFile(['heatmap.pdf'],
                                              galaxyFn)
        ensurePathExists(fileOutputPdf.getDiskPath())

        cls.generateStaticRPlot(results, colorListForYAxisNameOverMouse, rowLabel, yAxisNameOverMouse,
                                colorMaps[choices.colorMapSelectList],
                                fileOutput.getDiskPath(), fileOutputPdf.getDiskPath())


        htmlCore.divBegin(divId='heatmap', style="padding: 10px 0 px 10 px 0px;margin: 10px 0 px 10 px 0px")
        htmlCore.link('Download heatmap image', fileOutputPdf.getURL())
        htmlCore.divEnd()

        if len(results) * len(results[1]) >= 10000:
            htmlCore.image(fileOutput.getURL())


        else:

            min = 1000000000
            max = -1000000000
            for rList in results:
                for r in rList:
                    if min > r:
                        min = r
                    if max < r:
                        max = r




            if max-min != 0:
                resultNormalised = []
                for rList in results:
                    resultNormalisedPart = []
                    for r in rList:
                        resultNormalisedPart.append((r-min)/(max-min))
                    resultNormalised.append(resultNormalisedPart)

                addText = '(normalised to [0, 1])'
            else:
                resultNormalised = results
                addText = ''


            hm, heatmapPlotNumber, heatmapPlot = vp.drawHeatMap(
                                                    resultNormalised,
                                                    colorMaps[choices.colorMapSelectList],
                                                    label='this.series.xAxis.categories[this.point.x] + ' + "'<br >'" + ' + yAxisNameOverMouse[this.point.y] + ' + "'<br>Overlap proportion" + str(addText) + ": <b>'" + ' + this.point.value + ' + "'</b>'",
                                                    yAxisTitle= 'Reference tracks',
                                                    categories=rowLabel,
                                                    tickInterval=1,
                                                    plotNumber=3,
                                                    interaction=True,
                                                    otherPlotNumber=1,
                                                    titleText='Overlap with reference tracks for each local region',
                                                    otherPlotData=[startEnd, startEndInterval],
                                                    overMouseAxisX=True,
                                                    overMouseAxisY=True,
                                                    yAxisNameOverMouse=yAxisNameOverMouse,
                                                    overMouseLabelY=" + 'Track: '" + ' + this.value + ' + "' '" + ' + yAxisNameOverMouse[this.value] + ',
                                                    overMouseLabelX = ' + this.value.substring(0, 20) +',
                                                    extrOp = staticFile
                                                    )
            htmlCore.line(hm)
            htmlCore.line(vp.drawChartInteractionWithHeatmap(
                [startEndInterval, startEnd],
                tickInterval=1,
                type='line',
                categories=[rowLabel, rowLabel],
                seriesType=['line', 'column'],
                minWidth=300,
                height=500,
                lineWidth=3,
                titleText=['Lengths of segments (local regions)','Gaps between consecutive segments'],
                label=['<b>Length: </b>{point.y}<br/>', '<b>Gap length: </b>{point.y}<br/>'],
                subtitleText=['',''],
                yAxisTitle=['Lengths','Gap lengths'],
                seriesName=['Lengths','Gap lengths'],
                xAxisRotation=90,
                legend=False,
                extraXAxis=extraXAxis,
                heatmapPlot=heatmapPlot,
                heatmapPlotNumber=heatmapPlotNumber,
                overMouseAxisX=True,
                overMouseLabelX = ' + this.value.substring(0, 20) +'
                ))


        htmlCore.divEnd()
        htmlCore.divEnd()
        htmlCore.divEnd()
        htmlCore.end()

        htmlCore.hideToggle(styleClass='debug')

        print htmlCore