예제 #1
0
    def generateQ1output(cls, additionalResultsDict, analysisQuestion, choices, galaxyFn,
                         gsPerTrackResults, queryTrackTitle, gsuite, results,
                         similarityStatClassName):
        core = HtmlCore()
        core.begin()
        core.divBegin(divId='results-page')
        core.divBegin(divClass='results-section')
        core.header(analysisQuestion)
        topTrackTitle = results.keys()[0]
        core.paragraph('''
                The track "%s" in the GSuite is the one most similar to the query track %s, with a similarity score of %s
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                as measured by the "%s" track similarity measure.
            ''' % (
            topTrackTitle, queryTrackTitle, strWithNatLangFormatting(results[topTrackTitle]),
            similarityStatClassName))
        core.divBegin()

        addTableWithTabularAndGsuiteImportButtons(
            core, choices, galaxyFn, cls.Q1_SHORT, tableDict=gsPerTrackResults[1],
            columnNames=gsPerTrackResults[0], gsuite=gsuite, results=results,
            gsuiteAppendAttrs=['similarity_score'], sortable=True)

        core.divEnd()
        columnInd = 0
        if choices.leadAttribute and choices.leadAttribute != GSuiteConstants.TITLE_COL:
            columnInd = 1

        res = GSuiteTracksCoincidingWithQueryTrackTool.drawPlot(
            results, additionalResultsDict,
            'Similarity to query track', columnInd=columnInd)
        core.line(res)
        core.divEnd()
        core.divEnd()
        core.end()
        return core
    def execute(cls, choices, galaxyFn=None, username=''):

        cls._setDebugModeIfSelected(choices)

        gSuite = getGSuiteFromGalaxyTN(choices.gsuite)
        if gSuite.genome != choices.genome:
            gSuite.setGenomeOfAllTracks(choices.genome)
        regSpec, binSpec = UserBinMixin.getRegsAndBinsSpec(choices)
        paragraphs = []
        paragraphs += generatePilotPageTwoParagraphs(gSuite,
                                                     galaxyFn,
                                                     regSpec=regSpec,
                                                     binSpec=binSpec)
        paragraphs += generatePilotPageThreeParagraphs(gSuite,
                                                       galaxyFn,
                                                       regSpec=regSpec,
                                                       binSpec=binSpec)

        core = HtmlCore()
        core.begin()
        core.divBegin(divId='results-page')
        core.divBegin(divClass='results-section')
        core.header('Similarity and uniqueness of tracks')
        for prg in paragraphs:
            core.paragraph(prg)
        core.divEnd()
        core.divEnd()
        core.end()

        print core
 def createDescription(toolDescription=None, stepsToRunTool=None, toolResult=None, limitation=None):
     core = HtmlCore()
     
     if toolDescription!=None or stepsToRunTool!=None or toolResult!=None or limitation!=None:
         core.divBegin(divId='decription-page')
         core.divBegin(divClass='decription-section')
         core.header('Description')  
         
         #small description of tool (The resaon of creating the tool)
         if toolDescription!=None:
             core.divBegin(divClass='decription-section-main')
             core.paragraph(toolDescription)
             core.divEnd()
 
         #how to use tool
         if stepsToRunTool!=None:
             core.paragraph('To run the tool, follow these steps:')
             core.orderedList(stepsToRunTool)
 
         #what is the result of tool
         if toolDescription!=None:
             core.divBegin(divClass='decription-section-main')
             core.paragraph(toolResult)
             core.divEnd()
         
         #what are the limitation for tool
 #         if limitation:
 #             limits...
         
         
         core.divEnd()
         core.divEnd()
     
     return str(core)
    def execute(cls, choices, galaxyFn=None, username=''):

        #rsids = choices.rsid.split()
        if choices.run == 'Batch':
            #print rsids
            return cls.execute_batch(choices, galaxyFn, username)
        elif choices.batch != '__batch__':
            print '<div class="debug">'

        results = GalaxyRunSpecificFile(['html'], galaxyFn)

        dir = os.path.dirname(results.getDiskPath(ensurePath=True))
        os.mkdir(dir + '/html')

        #print '<div class="debug">'

        cls.choices = choices
        cls.run_varmelt(dir, choices)

        url = results.getURL()

        if choices.run == 'Single' and choices.batch != '__batch__':
            print '</div></pre>'

        core = HtmlCore()
        core.header('Primer3 candidates')
        VariantMeltingProfile.primer3_table_header(core)
        VariantMeltingProfile.primer3_resultsfile_header(dir)

        for r in range(0, int(choices.numReturn)):
            datafile = dir + '/tempdata.' + str(r) + '.results.txt'
            if os.path.exists(datafile):
                variant_pos = VariantMeltingProfile.proc_temp_data(dir, str(r))

                chart = open(dir + '/html/chart-' + str(r) + '.html', 'w')
                chart.write(VariantMeltingProfile.make_chart(variant_pos, r))
                chart.write(cls.primer3_results_table(dir, r))
                chart.write('</body></html>')
                chart.close()

                cls.primer3_results(dir, r)
                cls.primer3_results_table(dir, r, core, url)
                print '<a href="%s/chart-%d.html">Results/graph num %d</a><br>' % (
                    url, r, r + 1)
            else:
                cls.primer3_results(dir, r)
                cls.primer3_results_table(dir, r, core, None)
                break

        core.tableFooter()

        if choices.run == 'Single' and choices.batch != '__batch__':
            print str(core)
            print '<pre>'

        xcore = HtmlCore()
        xcore.begin()
        xcore.append(str(core))
        xcore.end()
        open(dir + '/results.html', 'w').write(str(xcore))
예제 #5
0
    def execute(choices, galaxyFn=None, username=''):
        '''
        Is called when execute-button is pushed by web-user. Should print
        output as HTML to standard out, which will be directed to a results page
        in Galaxy history. If getOutputFormat is anything else than HTML, the
        output should be written to the file with path galaxyFn. If needed,
        StaticFile can be used to get a path where additional files can be put
        (e.g. generated image files). choices is a list of selections made by
        web-user in each options box.
        '''

        resultsFN = ExternalTrackManager.extractFnFromGalaxyTN(
            choices.resultsFile)
        examResults = TaskScoreOverview(resultsFN, galaxyFn)
        examResults.run()
        core = HtmlCore()
        core.begin()
        core.header('Overview of exam scores')
        for table in examResults.getTables():
            core.divBegin(divClass='resultsTable')
            core.tableHeader([])
            for key, val in table.iteritems():
                core.tableLine([key, val])
            core.tableFooter()
            core.divEnd()

        for plotUrl in examResults.getPlotUrls():
            core.divBegin(divClass='plot')
            core.image(plotUrl)
            core.divEnd()
        core.end()
        print core
예제 #6
0
    def generateQ2Output(cls, additionalAttributesDict, additionalResultsDict, analysisQuestion, choices,
                         galaxyFn, queryTrackTitle, gsuite, results, similarityStatClassName):
        gsPerTrackResultsModel = GSuitePerTrackResultModel(results, ['Similarity to query track', 'P-value'],
                                                           additionalResultsDict=additionalResultsDict,
                                                           additionalAttributesDict=additionalAttributesDict)
        if choices.leadAttribute and choices.leadAttribute != GSuiteConstants.TITLE_COL:
            gsPerTrackResults = gsPerTrackResultsModel.generateColumnTitlesAndResultsDict(choices.leadAttribute)
        else:
            gsPerTrackResults = gsPerTrackResultsModel.generateColumnTitlesAndResultsDict()
        core = HtmlCore()
        core.begin()
        core.divBegin(divId='results-page')
        core.divBegin(divClass='results-section')
        core.header(analysisQuestion)
        topTrackTitle = results.keys()[0]
        core.paragraph('''
                The track "%s" has the lowest P-value of %s corresponding to %s  similarity to the query track "%s"
                as measured by "%s" track similarity measure.
            ''' % (topTrackTitle, strWithNatLangFormatting(results[topTrackTitle][1]),
                   strWithNatLangFormatting(results[topTrackTitle][0]), queryTrackTitle, similarityStatClassName))

        addTableWithTabularAndGsuiteImportButtons(
            core, choices, galaxyFn, cls.Q2_SHORT, tableDict=gsPerTrackResults[1],
            columnNames=gsPerTrackResults[0], gsuite=gsuite, results=results,
            gsuiteAppendAttrs=['similarity_score', 'p_value'], sortable=True)

        columnInd = 0
        if choices.leadAttribute and choices.leadAttribute != GSuiteConstants.TITLE_COL:
            columnInd = 1

        resultsSeparateListPart = OrderedDict()
        additionalResultsDictIncludePartFromResults = OrderedDict()

        for k, v in results.iteritems():
            if k not in resultsSeparateListPart.keys():
                resultsSeparateListPart[k] = v[0]
            if k not in additionalResultsDictIncludePartFromResults.keys():
                additionalResultsDictIncludePartFromResults[k] = OrderedDict()
            additionalResultsDictIncludePartFromResults[k]['P-Value'] = v[1]
            for k1, v1 in additionalResultsDict[k].iteritems():
                additionalResultsDictIncludePartFromResults[k][k1] = v1

        res = GSuiteTracksCoincidingWithQueryTrackTool.drawPlot(
            resultsSeparateListPart, additionalResultsDictIncludePartFromResults,
            'Similarity to query track', columnInd=columnInd)
        core.line(res)
        core.divEnd()
        core.divEnd()
        core.end()
        return core
    def primer3_results_table(cls, dir, rnr, core=None, url='.'):
        append = True
        if not core:
            append = False
            core = HtmlCore()
            core.header('Primer3 results')
            VariantMeltingProfile.primer3_table_header(core)

        rows = []
        try:
            p3 = open(dir + '/primer3.' + str(rnr) + '.results.txt', 'r')
            line = p3.readline()
            if line:
                if url != None:
                    link = '<a href="%s/chart-%d.html">%d (view)</a>' % (
                        url, rnr, rnr + 1)
                else:
                    link = str(rnr + 1)
                row = [link]
                cols = line.strip().split('\t')
                for col in cols:
                    if len(col) > 40:
                        row.append('<br>'.join(
                            [col[c:c + 40] for c in xrange(0, len(col), 40)]))
                    else:
                        row.append(col)
                row += [''] * (len(cls.primer3_headers) - len(cols) - 1)
            else:
                #XXX
                row = [link] + ['?'] * 10 + [line.split()[10]] + ['?']
            rows.append(row)
            p3.close()

        except IOError:
            rows.append(
                [str(rnr + 1), cls.choices.chr[3:], 'No primers found'] +
                ['?'] * 8)

        for row in rows:
            core.tableLine(row)
        if not append:
            core.tableFooter()
            #core.append('<p><a href="javascript:window.history.back()">Go back</a></p>')
        return str(core)
예제 #8
0
    def execute(choices, galaxyFn=None, username=''):

        gSuite = getGSuiteFromGalaxyTN(choices.gsuite)
        if gSuite.genome != choices.genome:
            gSuite.setGenomeOfAllTracks(choices.genome)
        regSpec, binSpec = UserBinMixin.getRegsAndBinsSpec(choices)
        paragraphs = generatePilotPageTwoParagraphs(gSuite,
                                                    galaxyFn,
                                                    regSpec=regSpec,
                                                    binSpec=binSpec)

        core = HtmlCore()
        core.begin()
        core.header('Overlap between tracks')
        for prg in paragraphs:
            core.paragraph(prg)
        core.end()

        print core
    def execute(choices, galaxyFn=None, username=''):

        gSuite = getGSuiteFromGalaxyTN(choices.gsuite)
        if gSuite.genome != choices.genome:
            gSuite.setGenomeOfAllTracks(choices.genome)
#         regSpec, binSpec = UserBinSelector.getRegsAndBinsSpec(choices)
        paragraphs = generatePilotPageFiveParagraphs(gSuite, galaxyFn)

        core = HtmlCore()
        core.begin()
        core.divBegin(divId='results-page')
        core.divBegin(divClass='results-section')
        core.header('Clustering of track elements')
        for prg in paragraphs:
            core.paragraph(prg)
        core.divEnd()
        core.divEnd()
        core.end()

        print core
예제 #10
0
 def generateQ3output(cls, analysisQuestion, queryTrackTitle, results, similarityStatClassName):
     pval = results['P-value']
     observed = results['TSMC_SummarizedInteractionWithOtherTracksV2Stat']
     significanceLevel = 'strong' if pval < 0.01 else ('weak' if pval < 0.05 else 'no')
     core = HtmlCore()
     core.begin()
     core.divBegin(divId='results-page')
     core.divBegin(divClass='results-section')
     core.header(analysisQuestion)
     core.paragraph('''
                 The query track %s shows %s significance in similarity to the suite of %s
                 and corresponding p-value of %s,
                 as measured by "%s" track similarity measure.
             ''' % (
         queryTrackTitle, significanceLevel, strWithNatLangFormatting(observed),
         strWithNatLangFormatting(pval),
         similarityStatClassName))
     core.divEnd()
     core.divEnd()
     core.end()
     return core
    def execute(cls, choices, galaxyFn=None, username=''):
        '''Is called when execute-button is pushed by web-user.
        Should print output as HTML to standard out, which will be directed to a results page in Galaxy history. If getOutputFormat is anything else than HTML, the output should be written to the file with path galaxyFn.gtr
        If needed, StaticFile can be used to get a path where additional files can be put (e.g. generated image files).
        choices is a list of selections made by web-user in each options box.
        '''

        fnSource = ExternalTrackManager.extractFnFromGalaxyTN(
            choices[2].split(':'))

        core = HtmlCore()
        core.begin()

        valid = False
        try:
            core.header('Validating GTrack headers')
            core.styleInfoBegin(styleClass='debug')

            print str(core)
            core = HtmlCore()

            gtrackSource = GtrackGenomeElementSource(
                fnSource,
                choices[1] if choices[0] == 'Yes' else None,
                printWarnings=True)

            core.append('Done')
            core.styleInfoEnd()
            core.header('Validating complete GTrack file')
            core.styleInfoBegin(styleClass='debug')

            print str(core)
            core = HtmlCore()

            try:
                for ge in gtrackSource:
                    pass
            except Exception, e:
                raise
            else:
예제 #12
0
 def _buildContent(self):
     #iterate through dictionary and for each key create a section (one of [GSuite, Track...])
     # each value in the dictionary is a list of GiudeData objects that go into the section defined by the key
     htmlCore = HtmlCore()
     htmlCore.divBegin('toolGuideInfo')
     htmlCore.divBegin(divClass='toolGuideInfoText')
     htmlCore.divBegin(divClass='toolGuideInfoTextHeader')
     htmlCore.line(TOOL_GUIDE_HELP_HEADER_TEXT)
     htmlCore.divEnd()
     htmlCore.divBegin(divClass='toolGuideInfoText')
     htmlCore.line(TOOL_GUIDE_HELP_HEADER_TEXT_TEXT)
     htmlCore.divEnd()
     htmlCore.divEnd()
     for guideDataKey, guideDataValues in self._guideDataDict.iteritems():
         htmlCore.divBegin('toolGuide')
         if guideDataKey in TOOL_INPUT_TYPE_TO_TOOL_GUIDE_HELP_HEADER_DICT:
             htmlCore.header(TOOL_INPUT_TYPE_TO_TOOL_GUIDE_HELP_HEADER_DICT[
                 guideDataKey])
         for guideDataValue in guideDataValues:
             htmlCore.divBegin(divClass='toolGuideData')
             htmlCore.divBegin(divClass='toolGuideImgTitle')
             if guideDataValue.imgUrl:
                 htmlCore.image(guideDataValue.imgUrl)
             htmlCore.link(text=guideDataValue.toolDisplayName,
                           url=str(guideDataValue.toolUrl),
                           args=(' onclick="%s"' % guideDataValue.onclick)
                           if guideDataValue.onclick else '')
             htmlCore.divEnd()
             htmlCore.divBegin(divClass='toolGuideDesc')
             htmlCore.append(guideDataValue.description)
             if guideDataValue.helpPageUrl:
                 htmlCore.link(text='...read more',
                               url=str(guideDataValue.helpPageUrl))
             htmlCore.divEnd()
             htmlCore.divEnd()
         htmlCore.divEnd()
     htmlCore.divEnd()
     #raise Exception(str(htmlCore))#to debug
     self._guideContent = str(htmlCore)
예제 #13
0
    def execute(choices, galaxyFn=None, username=''):

        gSuite = getGSuiteFromGalaxyTN(choices.gsuite)
        if gSuite.genome != choices.genome:
            gSuite.setGenomeOfAllTracks(choices.genome)
        regSpec, binSpec = UserBinMixin.getRegsAndBinsSpec(choices)
        paragraphs = OrderedDict()
        paragraphs[
            'Basic overview of tracks in collection'] = generatePilotPageOneParagraphs(
                gSuite,
                galaxyFn,
                regSpec=regSpec,
                binSpec=binSpec,
                username=username)
        paragraphs['Overlap between tracks'] = generatePilotPageTwoParagraphs(
            gSuite, galaxyFn, regSpec=regSpec, binSpec=binSpec)
        paragraphs[
            'Similarity and uniqueness of tracks'] = generatePilotPageThreeParagraphs(
                gSuite, galaxyFn, regSpec=regSpec, binSpec=binSpec)
        paragraphs['Clustering of tracks'] = generatePilotPageFiveParagraphs(
            gSuite, galaxyFn)

        core = HtmlCore()
        core.begin()
        core.divBegin(divId='results-page', divClass='trackbook_main')
        for hdr, prgList in paragraphs.iteritems():
            core.divBegin(divClass='trackbook_section')
            core.divBegin(divClass='results-section')
            core.header(hdr)
            for prg in prgList:
                core.paragraph(prg)
            core.divEnd()
            core.divEnd()
        core.divEnd()
        core.end()

        print core
    def execute(cls, choices, galaxyFn=None, username=''):

        gSuite = getGSuiteFromGalaxyTN(choices.gsuite)
        if gSuite.genome != choices.genome:
            gSuite.setGenomeOfAllTracks(choices.genome)
        regSpec, binSpec = UserBinMixin.getRegsAndBinsSpec(choices)
        paragraphs = generatePilotPageOneParagraphs(gSuite,
                                                    galaxyFn,
                                                    regSpec=regSpec,
                                                    binSpec=binSpec,
                                                    username=username)

        core = HtmlCore()
        core.begin()
        core.divBegin(divId='results-page')
        core.divBegin(divClass='results-section')
        core.header('Basic overview of tracks in collection')
        for prg in paragraphs:
            core.paragraph(prg)
        core.divEnd()
        core.divEnd()
        core.end()

        print core
예제 #15
0
    def execute(choices, galaxyFn=None, username=''):
        '''
        Is called when execute-button is pushed by web-user. Should print
        output as HTML to standard out, which will be directed to a results page
        in Galaxy history. If getOutputFormat is anything else than HTML, the
        output should be written to the file with path galaxyFn. If needed,
        StaticFile can be used to get a path where additional files can be put
        (e.g. generated image files). choices is a list of selections made by
        web-user in each options box.
        '''
        from gold.application.LogSetup import setupDebugModeAndLogging
        setupDebugModeAndLogging()

        targetTrackNames, targetTrackCollection, targetTrackGenome = getGSuiteDataFromGalaxyTN(
            choices.gSuiteFirst)
        targetTracksDict = OrderedDict(
            zip(targetTrackNames, targetTrackCollection))
        refTrackNames, refTrackCollection, refTrackCollectionGenome = getGSuiteDataFromGalaxyTN(
            choices.gSuiteSecond)
        assert targetTrackGenome == refTrackCollectionGenome, 'Reference genome must be the same one in both GSuite files.'
        refTracksDict = OrderedDict(zip(refTrackNames, refTrackCollection))

        regSpec, binSpec = UserBinMixin.getRegsAndBinsSpec(choices)

        analysisDef = 'dummy -> RawOverlapStat'
        results = OrderedDict()
        for targetTrackName, targetTrack in targetTracksDict.iteritems():
            for refTrackName, refTrack in refTracksDict.iteritems():
                result = GalaxyInterface.runManual([targetTrack, refTrack],
                                                   analysisDef,
                                                   regSpec,
                                                   binSpec,
                                                   targetTrackGenome,
                                                   galaxyFn,
                                                   printRunDescription=False,
                                                   printResults=False)
                if targetTrackName not in results:
                    results[targetTrackName] = OrderedDict()
                results[targetTrackName][
                    refTrackName] = result.getGlobalResult()

        targetTrackTitles = results.keys()

        stat = choices.statistic
        statIndex = STAT_LIST_INDEX[stat]
        title = stat + ' analysis of track collections'

        processedResults = []
        headerColumn = []
        for targetTrackName in targetTrackTitles:
            resultRowDict = processRawResults(results[targetTrackName])
            resultColumn = []
            headerColumn = []
            for refTrackName, statList in resultRowDict.iteritems():
                resultColumn.append(statList[statIndex])
                headerColumn.append(refTrackName)
            processedResults.append(resultColumn)

        transposedProcessedResults = [list(x) for x in zip(*processedResults)]

        tableHeader = ['Track names'] + targetTrackTitles
        htmlCore = HtmlCore()
        htmlCore.begin()
        htmlCore.header(title)
        htmlCore.divBegin('resultsDiv')
        htmlCore.tableHeader(tableHeader,
                             sortable=True,
                             tableId='resultsTable')
        for i, row in enumerate(transposedProcessedResults):
            line = [headerColumn[i]] + row
            htmlCore.tableLine(line)
        htmlCore.tableFooter()
        htmlCore.divEnd()

        addColumnPlotToHtmlCore(htmlCore,
                                targetTrackNames,
                                refTrackNames,
                                stat,
                                title + ' plot',
                                processedResults,
                                xAxisRotation=315)

        htmlCore.hideToggle(styleClass='debug')
        htmlCore.end()

        print htmlCore
예제 #16
0
    def execute(cls, choices, galaxyFn=None, username=''):
        cls._setDebugModeIfSelected(choices)

        genome = choices.genome
        genomicRegions = choices.genomicRegions
        genomicRegionsTracks = choices.genomicRegionsTracks
        sourceTfs = choices.sourceTfs
        sourceTfsDetails = choices.sourceTfsDetails
        tfTracks = choices.tfTracks

        # Get Genomic Region track name:
        if genomicRegions == cls.REGIONS_FROM_HISTORY:
            galaxyTN = genomicRegionsTracks.split(':')
            genElementTrackName = ExternalTrackManager.getPreProcessedTrackFromGalaxyTN(
                genome, galaxyTN)

            #queryGSuite = getGSuiteFromGalaxyTN(genomicRegionsTracks)
            #queryTrackList = [Track(x.trackName, x.title) for x in queryGSuite.allTracks()]

        elif genomicRegions == 'Hyperbrowser repository':
            selectedGenRegTrack = TfbsTrackNameMappings.getTfbsTrackNameMappings(
                genome)[genomicRegionsTracks]
            if isinstance(selectedGenRegTrack, dict):
                genElementTrackName = selectedGenRegTrack.values()
            else:
                genElementTrackName = selectedGenRegTrack
        elif genomicRegions == 'Hyperbrowser repository (cell-type-specific)':
            genElementTrackName = ['Private', 'Antonio'
                                   ] + genomicRegionsTracks.split(':')
        else:
            return

        # Get TF track names:
        if isinstance(tfTracks, dict):
            selectedTfTracks = [
                key for key, val in tfTracks.iteritems() if val == 'True'
            ]
        else:
            selectedTfTracks = [tfTracks]

        queryTrackTitle = '--'.join(genElementTrackName)

        trackTitles = [queryTrackTitle]
        tracks = [Track(genElementTrackName, trackTitle=queryTrackTitle)]

        for i in selectedTfTracks:
            if sourceTfs == 'Hyperbrowser repository':
                tfTrackName = TfTrackNameMappings.getTfTrackNameMappings(
                    genome)[sourceTfsDetails] + [i]
                tracks.append(
                    Track(tfTrackName,
                          trackTitle=tfTrackName[len(tfTrackName) - 1]))
                trackTitles.append(tfTrackName[len(tfTrackName) - 1])

            else:
                tfTrackName = i.split(':')

                queryGSuite = getGSuiteFromGalaxyTN(sourceTfsDetails)

                for x in queryGSuite.allTracks():
                    selectedTrackNames = (':'.join(x.trackName))
                    if i == selectedTrackNames:
                        tracks.append(Track(x.trackName, x.title))
                        trackTitles.append(x.trackName[-1])

                # queryGSuite = getGSuiteFromGalaxyTN(sourceTfsDetails)
                # tfTrackName = [x.trackName for x in queryGSuite.allTracks()] + [i]
                # tracks += [Track(x.trackName, x.title) for x in queryGSuite.allTracks()]
                # trackTitles += tfTrackName

        # print tfTrackName
        # print tracks
        # print trackTitles

        trackTitlesForStat = trackTitles

        trackTitles = CommonConstants.TRACK_TITLES_SEPARATOR.join(trackTitles)

        ##first statistic for Q2
        resultsForStatistics = OrderedDict()

        similarityFunc = [  #GSuiteStatUtils.T7_RATIO_OF_OBSERVED_TO_EXPECTED_OVERLAP,
            GSuiteStatUtils.T5_RATIO_OF_OBSERVED_TO_EXPECTED_OVERLAP
        ]

        for similarityStatClassName in similarityFunc:
            regSpec, binSpec = UserBinMixin.getRegsAndBinsSpec(choices)
            analysisBins = GalaxyInterface._getUserBinSource(regSpec,
                                                             binSpec,
                                                             genome=genome)

            mcfdrDepth = AnalysisDefHandler(
                REPLACE_TEMPLATES['$MCFDR$']).getOptionsAsText().values()[0][0]
            analysisDefString = REPLACE_TEMPLATES[
                '$MCFDR$'] + ' -> GSuiteSimilarityToQueryTrackRankingsAndPValuesWrapperStat'
            analysisSpec = AnalysisDefHandler(analysisDefString)
            analysisSpec.setChoice('MCFDR sampling depth', mcfdrDepth)
            analysisSpec.addParameter('assumptions',
                                      'PermutedSegsAndIntersegsTrack_')
            analysisSpec.addParameter(
                'rawStatistic', GSuiteStatUtils.
                PAIRWISE_STAT_LABEL_TO_CLASS_MAPPING[similarityStatClassName])
            analysisSpec.addParameter(
                'pairwiseStatistic', GSuiteStatUtils.
                PAIRWISE_STAT_LABEL_TO_CLASS_MAPPING[similarityStatClassName]
            )  #needed for call of non randomized stat for assertion
            analysisSpec.addParameter('tail', 'more')
            analysisSpec.addParameter('trackTitles',
                                      trackTitles)  #that need to be string
            analysisSpec.addParameter('queryTracksNum', str(len(tracks)))

            results = doAnalysis(analysisSpec, analysisBins,
                                 tracks).getGlobalResult()

            if not similarityStatClassName in resultsForStatistics:
                resultsForStatistics[similarityStatClassName] = {}

            resultsForStatistics[similarityStatClassName] = results

        keyTitle = [
            #'Normalized ratio of observed to expected overlap (normalized Forbes similarity measure)',
            'Ratio of observed to expected overlap (Forbes similarity measure)'
        ]

        # 'Normalized Forbes coefficient: ratio of observed to expected overlap normalized in relation to the reference GSuite',
        # 'Forbes coefficient: ratio of observed to expected overlap'

        keyTitle = [
            #GSuiteStatUtils.T7_RATIO_OF_OBSERVED_TO_EXPECTED_OVERLAP,
            GSuiteStatUtils.T5_RATIO_OF_OBSERVED_TO_EXPECTED_OVERLAP
        ]

        resultDict = AllTfsOfRegions.countStatistics(similarityFunc, choices,
                                                     genome, tracks,
                                                     trackTitlesForStat)

        resultDictShow = AllTfsOfRegions.countStatisticResults(
            resultDict, keyTitle, trackTitlesForStat)

        #         print resultsForStatistics
        '''selectedTrackNames = []
        if sourceTfs == 'History (user-defined)':
            if selectedTfTracks.split(":")[1] == "gsuite":
                gSuite = getGSuiteFromGalaxyTN(selectedTfTracks)
                for track in gSuite.allTracks():
                    selectedTrackNames.append(track.trackName)
            else:
                galaxyTN = selectedTfTracks.split(':')
                gRegTrackName = ExternalTrackManager.getPreProcessedTrackFromGalaxyTN(genome, galaxyTN)
                selectedTrackNames.append(gRegTrackName)
        else:'''

        tfNameList = []

        #Intersection between TF Tracks and selected region (Table 1):
        n = 0
        allTargetBins = []
        alltfNames = []
        table1 = []
        for i in selectedTfTracks:
            n = n + 1
            #newGalaxyFn = galaxyFn.split(".")[0] + str(n) + "." + "dat"

            if sourceTfs == 'Hyperbrowser repository':
                tfTrackName = TfTrackNameMappings.getTfTrackNameMappings(
                    genome)[sourceTfsDetails] + [i]
            else:
                tfTrackName = i.split(':')
                tfTrackName.pop(0)
            #tfIntersection.expandReferenceTrack(upFlankSize, downFlankSize)
            tfIntersection = TrackIntersection(genome, genElementTrackName,
                                               tfTrackName, galaxyFn, str(n))

            regFileNamer = tfIntersection.getIntersectedRegionsStaticFileWithContent(
            )
            targetBins = tfIntersection.getIntersectedReferenceBins()

            #regSpec, targetBins = UserBinSelector.getRegsAndBinsSpec(choices)

            tfHits = [i] * len(targetBins)
            fixedTargetBins = [str(a).split(" ")[0] for a in targetBins]
            extendedTargetBins = [
                list(a) for a in zip(fixedTargetBins, tfHits)
            ]
            allTargetBins = allTargetBins + extendedTargetBins
            tfName = i
            alltfNames = alltfNames + [tfName]

            # Save output table:
            tfNameList.append(tfName)
            line = [tfName] + [len(targetBins)] + [
                regFileNamer.getLink('Download bed-file')
            ] + [
                regFileNamer.getLoadToHistoryLink('Send bed-file to History')
            ]
            table1 = table1 + [line]

        # Computing totals:
        fullCase = ','.join(alltfNames)
        firstColumn = [item[0] for item in allTargetBins]
        uniqueAllTargetBins = list(set(firstColumn))

        # Group TFs by bound region:
        d1 = defaultdict(list)
        for k, v in allTargetBins:
            d1[k].append(v)
        allTFTargetBins = dict((k, ','.join(v)) for k, v in d1.iteritems())

        allTFTargetList = []
        fullCaseTFTargetList = []
        for key, value in allTFTargetBins.iteritems():
            allTFTargetList = allTFTargetList + [[key, value]]
            if value == fullCase:
                fullCaseTFTargetList = fullCaseTFTargetList + [[key, value]]

        analysis3 = TrackIntersection.getFileFromTargetBins(
            allTFTargetList, galaxyFn, str(3))
        analysis4 = TrackIntersection.getFileFromTargetBins(
            fullCaseTFTargetList, galaxyFn, str(4))

        # Print output to table:
        title = 'TF targets and co-occupancy of ' + genElementTrackName[
            -1] + ' genomic regions'
        htmlCore = HtmlCore()

        pf = plotFunction(tableId='resultsTable')

        htmlCore.begin()
        htmlCore.header(title)
        htmlCore.divBegin('resultsDiv')

        htmlCore.line(pf.createButton(bText='Show/Hide more results'))

        # htmlCore.tableHeader(['Transcription Factor', 'Normalized ratio of observed to expected overlap (normalized Forbes similarity measure) -- Similarity to genomic regions track', 'Normalized ratio of observed to expected overlap (normalized Forbes similarity measure) -- p-value','Ratio of observed to expected overlap (Forbes similarity measure) -- Similarity to genomic regions track', 'Ratio of observed to expected overlap (Forbes similarity measure) -- p-value', 'Number of TF-Target Track Regions', 'File of TF Target Regions', 'File of TF Target Regions', 'Number of TF-co-occupied Regions', 'File of TF co-occupied Regions', 'File of TF co-occupied Regions', 'Rank of TF co-occupancy motifs', 'Rank of TF co-occupancy motifs'], sortable=True, tableId='resultsTable')

        #previous ordering
        # htmlCore.tableHeader(['Transcription Factor', 'Normalized Forbes index --overlap score',
        #                       'Normalized Forbes index --p-value',
        #                       'Forbes index --overlap score', 'Forbes index --p-value',
        #                       'Number of TF-Target Track Regions', 'File of TF Target Regions',
        #                       'File of TF Target Regions', 'Number of target track regions occupied by this TF',
        #                       'File of TF co-occupied Regions', 'File of TF co-occupied Regions',
        #                       'Rank of TF co-occupancy motifs', 'Rank of TF co-occupancy motifs'],
        #                      sortable=True, tableId='resultsTable')

        htmlCore.tableHeader(
            [
                'Transcription Factor',
                'Number of TF-Target Track Regions',
                'File of TF Track Regions',
                'Number of target track regions occupied by this TF',
                'File of TF Target Regions',
                'Forbes index --overlap score',
                'Forbes index --p-value',
                #'Normalized Forbes index --overlap score', 'Normalized Forbes index --p-value',
                'File of TF co-occupied Regions',
                'Rank of TF co-occupancy motifs'
            ],
            sortable=True,
            tableId='resultsTable')

        # Adding co-occupancy results to table:
        n = 1000
        genRegionNumElements = [
            int(x) for x in getTrackRelevantInfo.getNumberElements(
                genome, genElementTrackName)
        ]

        for key0, it0 in resultsForStatistics.iteritems():
            for el in tfNameList:
                if el not in it0:
                    resultsForStatistics[key0][el] = [None, None]

        resultsPlotDict = {}
        resultPlotCat = []
        resultsPlot = []

        resultsForStatisticsProper = {}
        for key0, it0 in resultsForStatistics.iteritems():
            if not key0 in resultsPlotDict:
                resultsPlotDict[key0] = {}
            resultsPlotPart = []
            for key1, it1 in it0.iteritems():
                resultsPlotPart.append(it1[0])
                if not key1 in resultsForStatisticsProper:
                    resultsForStatisticsProper[key1] = []
                if not key1 in resultsPlotDict[key0]:
                    resultsPlotDict[key0][key1] = None
                for el in it1:
                    resultsForStatisticsProper[key1].append(el)
                resultsPlotDict[key0][key1] = it1[0]

        resultPlotCat.append(tfNameList)
        resultPlotCat.append(tfNameList)

        #resultPlotCatPart = tfNameList

        #         print resultPlotCatPart

        for key0, it0 in resultsPlotDict.iteritems():
            resultsPlotPart = []
            for el in tfNameList:
                if el in it0:
                    resultsPlotPart.append(it0[el])
                else:
                    resultsPlotPart.append(None)
            resultsPlot.append(resultsPlotPart)

        for i in table1:
            thisCaseTFTargetList = []
            for key, value in allTFTargetList:
                if i[0] in value and ',' in value:
                    thisCaseTFTargetList = thisCaseTFTargetList + [[
                        key, value
                    ]]
            n = n + 1

            thisAnalysis = TrackIntersection.getFileFromTargetBins(
                thisCaseTFTargetList, galaxyFn, str(n))

            thisCaseCoCountsList = []
            thing = [x[1] for x in thisCaseTFTargetList]
            for k in list(set(thing)):
                thisCount = thing.count(k)
                thisCaseCoCountsList = thisCaseCoCountsList +  \
                                       [[k, thisCount, 100*float(thisCount)/float(sum(genRegionNumElements)), 100*float(thisCount)/float(len(thisCaseTFTargetList))]]
            thisCaseCoCountsList.sort(key=lambda x: x[2], reverse=True)
            n = n + 1

            thisCoCountsAnalysis = TrackIntersection.getOccupancySummaryFile(
                thisCaseCoCountsList, galaxyFn, str(n))

            thisLine = [len(thisCaseTFTargetList)] + \
            [thisAnalysis.getLink('Download file')] + [thisAnalysis.getLoadToHistoryLink('Send file to History')] + \
            [thisCoCountsAnalysis.getLink('Download file')] + [thisCoCountsAnalysis.getLoadToHistoryLink('Send file to History')]

            newLineI = []
            tfName = i[0]
            newLineI.append(tfName)

            for el in resultsForStatisticsProper[tfName]:
                newLineI.append(el)

            for elN in range(1, len(i)):
                newLineI.append(i[elN])

#             htmlCore.tableLine(i + thisLine)

# htmlCore.tableHeader(['Transcription Factor', 'Normalized Forbes index --overlap score',
#                       'Normalized Forbes index --p-value',
#                       'Forbes index --overlap score', 'Forbes index --p-value',
#                       'Number of TF-Target Track Regions', 'File of TF Target Regions',
#                       'File of TF Target Regions', 'Number of target track regions occupied by this TF',
#                       'File of TF co-occupied Regions', 'File of TF co-occupied Regions',
#                       'Rank of TF co-occupancy motifs', 'Rank of TF co-occupancy motifs'],
#                      sortable=True, tableId='resultsTable')

# htmlCore.tableHeader(['Transcription Factor', 'Number of TF-Target Track Regions', 'File of TF Track Regions',
#                      'Number of target track regions occupied by this TF', 'File of TF Target Regions',
#                      'Forbes index --overlap score', 'Forbes index --p-value',
#                      'Normalized Forbes index --overlap score', 'Normalized Forbes index --p-value',
#                      'File of TF co-occupied Regions', 'Rank of TF co-occupancy motifs'],
#                     sortable=True, tableId='resultsTable')

            tl = newLineI + thisLine
            # previous ordering tl - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12
            # actual ordering - 0, 5, 7, 8, 7, 3, 4, 1, 2, 9, 11

            #ordering  = [0, 5, 7, 8, 10, 3, 4, 1, 2, 10, 12]
            ordering = [0, 3, 5, 6, 8, 1, 2, 8, 10]

            #1, 2, => delete

            eoList = []
            for eo in ordering:
                eoList.append(tl[eo])

            htmlCore.tableLine(eoList)

        totalCoOccupancyTargetList = []
        n = 2000
        for key, value in allTFTargetList:
            n = n + 1
            if ',' in value:
                totalCoOccupancyTargetList = totalCoOccupancyTargetList + [[
                    key, value
                ]]
        #newGalaxyFn = galaxyFn.split(".")[0] + str(n) + "." + "dat"
        totalCoOccupancyAnalysis = TrackIntersection.getFileFromTargetBins(
            totalCoOccupancyTargetList, galaxyFn, str(n))
        #line = ['Total reported regions'] + [len(allTargetBins)] + [''] + [''] + [''] + [''] + ['']

        #line = ['Full co-occupancy of ' + fullCase] + ['-'] + ['-'] + ['-'] + ['-'] + ['-'] + ['-'] + ['-'] + [len(fullCaseTFTargetList)] + [analysis4.getLink('Download file')] + [analysis4.getLoadToHistoryLink('Send file to History')] + ['-'] + ['-']

        line = ['Full co-occupancy of ' + fullCase] + \
               ['-'] + \
               ['-'] + \
               [len(fullCaseTFTargetList)] + \
               ['-'] + \
               ['-'] + \
               ['-'] + \
               [analysis4.getLoadToHistoryLink('Send file to History')] + \
               ['-']

        htmlCore.tableLine(line)
        #line = ['Total unique regions'] + ['-'] + ['-'] + ['-'] + ['-']  + [len(allTFTargetList)] + [analysis3.getLink('Download bed-file')] + [analysis3.getLoadToHistoryLink('Send bed-file to History')] + [len(totalCoOccupancyTargetList)] + [totalCoOccupancyAnalysis.getLink('Download file')] + [totalCoOccupancyAnalysis.getLoadToHistoryLink('Send file to History')] + ['-'] + ['-']

        line = ['Total unique regions'] + \
               [len(allTFTargetList)] + \
               ['-'] + \
               [len(totalCoOccupancyTargetList)] + \
               [analysis3.getLoadToHistoryLink('Send bed-file to History')] + \
               ['-'] +\
               ['-'] + \
               [totalCoOccupancyAnalysis.getLoadToHistoryLink('Send file to History')] + \
               ['-']

        htmlCore.tableLine(line)

        htmlCore.tableFooter()
        htmlCore.divEnd()

        # htmlCore.line(pf.hideColumns(indexList=[2, 4]))
        #

        sumRes = 0
        for r in resultsPlot[0]:
            if r != None:
                sumRes += r

        if sumRes != 0:
            vg = visualizationGraphs()
            result = vg.drawColumnCharts(
                [resultsPlot[0]],
                height=300,
                categories=resultPlotCat,
                legend=False,
                addOptions='width: 90%; float:left; margin: 0 4%;',
                #titleText=['Overlap between TFs and genomic region using normalized Forbes', 'Overlap between TFs and genomic region using Forbes'],
                titleText=[
                    'Overlap between TFs and genomic region using Forbes'
                ],
                xAxisRotation=90,
                xAxisTitle='TF',
                yAxisTitle='value')

            htmlCore.line(result)

        for key0, it0 in resultDictShow.iteritems():
            htmlCore.divBegin('resultsDiv' + str(key0))
            htmlCore.header(key0)
            htmlCore.tableHeader(it0[0],
                                 sortable=True,
                                 tableId='resultsTable' + str(key0))

            for elN in range(1, len(it0)):
                htmlCore.tableLine(it0[elN])

            htmlCore.tableFooter()
            htmlCore.divEnd()

        htmlCore.hideToggle(styleClass='debug')

        htmlCore.end()
        print htmlCore
    def execute(choices, galaxyFn=None, username=''):
        '''
        Is called when execute-button is pushed by web-user. Should print
        output as HTML to standard out, which will be directed to a results page
        in Galaxy history. If getOutputFormat is anything else than HTML, the
        output should be written to the file with path galaxyFn. If needed,
        StaticFile can be used to get a path where additional files can be put
        (e.g. generated image files). choices is a list of selections made by
        web-user in each options box.
        '''
        gSuite = getGSuiteFromGalaxyTN(choices.gsuite)
        attributeNames = gSuite.attributes
        attributeValCountDict = dict()
        for attrName in attributeNames:
            attributeValCountDict[attrName] = defaultdict(int)
        for gsTrack in gSuite.allTracks():
            for attrName in gsTrack.attributes:
                attributeValCountDict[attrName][gsTrack.getAttribute(
                    attrName)] = attributeValCountDict[attrName][
                        gsTrack.getAttribute(attrName)] + 1

        htmlCore = HtmlCore()

        htmlCore.begin()
        htmlCore.divBegin(divId='results-page')
        htmlCore.divBegin(divClass='results-section')
        htmlCore.header('Meta-data summary:')

        summaryList = []
        locationLine = '''Location:<b> %s</b>. %s'''
        if gSuite.location == GSuiteConstants.UNKNOWN:
            summaryList.append(locationLine % (
                gSuite.location,
                "The location of the tracks in the GSuite is not specified as remote or local."
            ))
        elif gSuite.location == GSuiteConstants.REMOTE:
            summaryList.append(
                locationLine %
                (gSuite.location,
                 "The tracks in the GSuite are located in a remote server."))
        elif gSuite.location == GSuiteConstants.LOCAL:
            summaryList.append(
                locationLine %
                (gSuite.location,
                 "The tracks in the GSuite are located on your server."))
        elif gSuite.location == GSuiteConstants.MULTIPLE:
            summaryList.append(locationLine % (
                gSuite.location,
                "The tracks in the GSuite are located both on your local server and at a remote location."
            ))

        fileFormatLine = '''File format:<b> %s</b>. %s'''
        if gSuite.fileFormat == GSuiteConstants.UNKNOWN:
            summaryList.append(fileFormatLine % (
                gSuite.fileFormat,
                "The file format of the tracks in the GSuite is not specified."
            ))
        elif gSuite.fileFormat == GSuiteConstants.PREPROCESSED:
            summaryList.append(fileFormatLine % (
                gSuite.fileFormat,
                "The tracks in the GSuite are preprocessed and ready for analysis."
            ))
        elif gSuite.fileFormat == GSuiteConstants.PRIMARY:
            summaryList.append(fileFormatLine % (
                gSuite.fileFormat,
                "The tracks in the GSuite can be manipulated, but must be preprocessed first for analysis."
            ))
        elif gSuite.fileFormat == GSuiteConstants.MULTIPLE:
            summaryList.append(fileFormatLine % (
                gSuite.fileFormat,
                "The tracks in the GSuite are both in preprocessed and primary formats. Only preprocessed tracks can be analyzed by HyperBrowser tools."
            ))

        trackTypeLine = '''Track type:<b> %s</b>. %s'''
        if gSuite.trackType == GSuiteConstants.UNKNOWN:
            summaryList.append(
                trackTypeLine %
                (gSuite.trackType,
                 "The track type of the tracks in the GSuite is not specified."
                 ))
        elif gSuite.trackType == GSuiteConstants.MULTIPLE:
            summaryList.append(
                trackTypeLine %
                (gSuite.trackType,
                 "The tracks in the GSuite are of different track types."))
        else:
            summaryList.append(trackTypeLine % (
                gSuite.trackType,
                "The tracks in the GSuite are all (subtypes) of the same type."
            ))

        genomeLine = '''Genome:<b> %s</b>. %s'''
        if gSuite.genome == GSuiteConstants.UNKNOWN:
            summaryList.append(
                genomeLine %
                (gSuite.genome,
                 "The genome of the tracks in the GSuite is not specified."))
        elif gSuite.fileFormat == GSuiteConstants.MULTIPLE:
            summaryList.append(
                genomeLine %
                (gSuite.genome,
                 "The tracks in the GSuite are of different genomes."))
        else:
            summaryList.append(
                genomeLine %
                (gSuite.genome,
                 "The tracks in the GSuite come from the same genome."))

        htmlCore.unorderedList(summaryList)

        if len(attributeNames) > 0:

            paragraph2 = '''
                There are<b> %s </b>attributes in the GSuite. 
                For each of the attributes the most frequent value is given in the table below.
            ''' % str(len(attributeNames))

            htmlCore.paragraph(paragraph2)

            tableDataDict = OrderedDict()
            for attrName in attributeNames:
                maxVal, maxCount = max(
                    attributeValCountDict[attrName].iteritems(),
                    key=operator.itemgetter(1))

                if maxCount == 1:
                    mostFreqVal = '[All values are unique]'
                    nrOfOccurrences = 1
                elif maxCount == gSuite.numTracks():
                    mostFreqVal = maxVal
                    nrOfOccurrences = str(gSuite.numTracks()) + ' [all tracks]'
                else:
                    mostFreqValList = [
                        x for x, y in
                        attributeValCountDict[attrName].iteritems()
                        if y == maxCount
                    ]
                    mostFreqVal = ' | '.join(mostFreqValList)
                    nrOfOccurrences = str(maxCount) + ' [tie]' if len(
                        mostFreqValList) > 1 else maxCount

                tableDataDict[attrName] = [mostFreqVal, nrOfOccurrences]

            htmlCore.tableFromDictionary(tableDataDict, [
                'Attribute name', 'Most frequent value',
                'Number of occurrences'
            ],
                                         sortable=False,
                                         expandable=False)

        htmlCore.divEnd()
        htmlCore.divEnd()
        htmlCore.end()

        print htmlCore
예제 #18
0
    def execute(cls, choices, galaxyFn=None, username=''):
        '''
        Is called when execute-button is pushed by web-user. Should print
        output as HTML to standard out, which will be directed to a results page
        in Galaxy history. If getOutputFormat is anything else than HTML, the
        output should be written to the file with path galaxyFn. If needed,
        StaticFile can be used to get a path where additional files can be put
        (e.g. generated image files). choices is a list of selections made by
        web-user in each options box.
        '''

        cls._setDebugModeIfSelected(choices)
        genome = choices.genome
        queryGSuite = getGSuiteFromGalaxyTN(choices.queryGSuite)
        refGSuite = getGSuiteFromGalaxyTN(choices.refGSuite)
        if choices.similarityFunc:
            similarityStatClassNameKey = choices.similarityFunc
        else:
            similarityStatClassNameKey = GSuiteStatUtils.T5_RATIO_OF_OBSERVED_TO_EXPECTED_OVERLAP

        isPointsVsSegments, pointsGSuite, segGSuite = cls.isPointsVsSegmentsAnalysis(queryGSuite, refGSuite)

        regSpec, binSpec = UserBinMixin.getRegsAndBinsSpec(choices)
        analysisBins = GalaxyInterface._getUserBinSource(regSpec, binSpec, genome=genome)

        queryTrackList = [Track(x.trackName, x.title) for x in queryGSuite.allTracks()]
        refTrackList = [Track(x.trackName, x.title) for x in refGSuite.allTracks()]

        queryTrackTitles = CommonConstants.TRACK_TITLES_SEPARATOR.join(
            [quote(x.title, safe='') for x in queryGSuite.allTracks()])
        refTrackTitles = CommonConstants.TRACK_TITLES_SEPARATOR.join(
            [quote(x.title, safe='') for x in refGSuite.allTracks()])

        analysisSpec = AnalysisSpec(GSuiteVsGSuiteWrapperStat)
        analysisSpec.addParameter('queryTracksNum', str(len(queryTrackList)))
        analysisSpec.addParameter('refTracksNum', str(len(refTrackList)))
        analysisSpec.addParameter('queryTrackTitleList', queryTrackTitles)
        analysisSpec.addParameter('refTrackTitleList', refTrackTitles)
        analysisSpec.addParameter('similarityStatClassName',
                                  GSuiteStatUtils.PAIRWISE_STAT_LABEL_TO_CLASS_MAPPING[similarityStatClassNameKey])
        if choices.removeZeroRow:
            analysisSpec.addParameter('removeZeroRow', choices.removeZeroRow)
        if choices.removeZeroCol:
            analysisSpec.addParameter('removeZeroColumn', choices.removeZeroCol)
        resultsObj = doAnalysis(analysisSpec, analysisBins, queryTrackList + refTrackList)
        results = resultsObj.getGlobalResult()

        # baseDir = GalaxyRunSpecificFile([RAW_OVERLAP_TABLE_RESULT_KEY], galaxyFn).getDiskPath()
        # rawOverlapHeatmapPresenter = HeatmapFromDictOfDictsPresenter(resultsObj, baseDir,
        #                                                              'Overlapping base-pair of tracks from the two suites',
        #                                                              printDimensions=False)

        rawOverlapTableData = results[RAW_OVERLAP_TABLE_RESULT_KEY]
        maxRawOverlap, maxROt1, maxROt2 = rawOverlapTableData.getMaxElement()
        similarityScoreTableData = results[SIMILARITY_SCORE_TABLE_RESULT_KEY]
        maxSimScore, maxSSt1, maxSSt2 = similarityScoreTableData.getMaxElement()

        baseDir = GalaxyRunSpecificFile([], galaxyFn=galaxyFn).getDiskPath()
        heatmapPresenter = HeatmapFromTableDataPresenter(resultsObj, baseDir=baseDir,
                                            header='Overlapping base-pairs between the tracks of the two suites',
                                            printDimensions=False)
        tablePresenter = MatrixGlobalValueFromTableDataPresenter(resultsObj, baseDir=baseDir,
                                            header='Table of overlapping base-pairs between the tracks of the two suites')

        core = HtmlCore()
        core.begin()
        core.divBegin(divId='results-page')
        core.divBegin(divId='svs-res-main-div', divClass='svs-res-main')    
        core.divBegin(divId='raw-overlap-div', divClass='results-section')
        core.divBegin(divId='raw-overlap-table', divClass='svs-table-div')
        core.header('Base-pair overlaps between the tracks of the two GSuites')
        core.paragraph("""From the tracks in the two GSuites the highest base-pair overlap <b>(%s bps)</b>
        is observed for the pair of <b>'%s'</b> and <b>'%s'</b>.""" % (maxRawOverlap, maxROt1, maxROt2))
        
        core.divBegin(divId='raw-table-result', divClass='result-div')
        core.divBegin(divId='raw-table-result', divClass='result-div-left')
        core.line('''Follow the links to view the results in an HTML table
        or raw tabular form:''')
        core.divEnd()
        core.divBegin(divId='raw-table-result', divClass='result-div-right')
        core.line(tablePresenter.getReference(RAW_OVERLAP_TABLE_RESULT_KEY))
        core.divEnd()#rawoverlap table
        core.divEnd()
        core.divEnd()
        core.divBegin(divId='raw-overlap-heatmap', divClass='svs-heatmap-div')
        try:
            core.header('Heatmap of base-pair overlaps')
            core.divBegin(divId='raw-table-result', divClass='result-div-heatmap')
            core.divBegin(divId='raw-table-result', divClass='result-div-left')
            core.line('''Follow the links to view the heatmap in the desired format:''')
            core.divEnd()
            core.divBegin(divId='raw-table-result', divClass='result-div-right')
            core.line(heatmapPresenter.getReference(RAW_OVERLAP_TABLE_RESULT_KEY))
            core.divEnd()
            core.divEnd()
        except:
            core.line('Heatmap for the base-pair overlaps could not be created.')
            core.divEnd()
            core.divEnd()
        core.divEnd()#rawoverlap heatmap
        core.divEnd()#rawoverlap
        

        core.divBegin(divId='sim-score-div', divClass='results-section')
        core.divBegin(divId='sim-score-table', divClass='svs-table-div')
        core.header('Similarity score between the tracks of the two GSuites measured by %s' % choices.similarityFunc)
        core.paragraph("""From the tracks in the two GSuites the highest similarity score <b>(%s)</b>
        is observed for the pair of <b>'%s'</b> and <b>'%s'</b>.""" % (maxSimScore, maxSSt1, maxSSt2))
        core.divBegin(divId='raw-table-result', divClass='result-div')
        core.divBegin(divId='raw-table-result', divClass='result-div-left')
        core.line("""Follow the links to view the results in an HTML table or raw tabular form:""")
        core.divEnd()
        core.divBegin(divId='raw-table-result', divClass='result-div-right')
        core.line(tablePresenter.getReference(SIMILARITY_SCORE_TABLE_RESULT_KEY))
        core.divEnd()
        core.divEnd()
        core.divEnd()#simscore table
        core.divBegin(divId='sim-score-heatmap', divClass='svs-heatmap-div')
        try:
            core.header('Heatmap of similarity scores')
            core.divBegin(divId='raw-table-result', divClass='result-div-heatmap')
            core.divBegin(divId='raw-table-result', divClass='result-div-left')
            core.line('''Follow the links to view the heatmap in the desired format:''')
            core.divEnd()
            core.divBegin(divId='raw-table-result', divClass='result-div-right')
            core.line(heatmapPresenter.getReference(SIMILARITY_SCORE_TABLE_RESULT_KEY))
            core.divEnd()
            core.divEnd()
        except:
            core.line('Heatmap for the similarity score could not be created.')
            core.divEnd()
            core.divEnd()
        core.divEnd()#simscore heatmap
        core.divEnd()#simscore
        core.divEnd()#results
        # core.paragraph(
        #     '''Table displaying the number of base-pairs overlapping between the tracks in the two suites:''')
        # core.tableFromDictOfDicts(rawOverlapTableData, firstColName='Track title')
        # # core.paragraph(rawOverlapHeatmapPresenter.getReference(resDictKey=RAW_OVERLAP_TABLE_RESULT_KEY))
        # core.paragraph(
        #     '''Table displaying the similarity score for the tracks in the two suites as measured by %s:''' % similarityStatClassNameKey)
        # core.tableFromDictOfDicts(similarityScoreTableData, firstColName='Track title')
        #
        core.divEnd()
        core.end()

        print str(core)
예제 #19
0
    def execute(cls, choices, galaxyFn=None, username=''):
        cls._setDebugModeIfSelected(choices)

        targetGSuite = getGSuiteFromGalaxyTN(choices.gSuiteFirst)
        refGSuite = getGSuiteFromGalaxyTN(choices.gSuiteSecond)

        regSpec, binSpec = UserBinMixin.getRegsAndBinsSpec(choices)

        analysisDef = 'dummy -> RawOverlapStat'
        # analysisDef = 'dummy [withOverlaps=yes] -> RawOverlapAllowSingleTrackOverlapsStat'
        results = OrderedDict()

        for targetTrack in targetGSuite.allTracks():
            targetTrackName = targetTrack.title
            for refTrack in refGSuite.allTracks():
                refTrackName = refTrack.title
                if targetTrack.trackName == refTrack.trackName:
                    # print targetTrack.title
                    # print targetTrack.trackName
                    result = DetermineSuiteTracksCoincidingWithAnotherSuite.handleSameTrack(
                        targetTrack.trackName, regSpec, binSpec,
                        targetGSuite.genome, galaxyFn)
                else:
                    result = GalaxyInterface.runManual(
                        [targetTrack.trackName, refTrack.trackName],
                        analysisDef,
                        regSpec,
                        binSpec,
                        targetGSuite.genome,
                        galaxyFn,
                        printRunDescription=False,
                        printResults=False,
                        printProgress=False).getGlobalResult()
                if targetTrackName not in results:
                    results[targetTrackName] = OrderedDict()
                results[targetTrackName][refTrackName] = result

        stat = STAT_OVERLAP_COUNT_BPS
        statIndex = STAT_LIST_INDEX[stat]
        title = ''

        processedResults = []
        headerColumn = []
        for targetTrackName in targetGSuite.allTrackTitles():
            resultRowDict = processRawResults(results[targetTrackName])
            resultColumn = []
            headerColumn = []
            for refTrackName, statList in resultRowDict.iteritems():
                resultColumn.append(statList[statIndex])
                headerColumn.append(refTrackName)
            processedResults.append(resultColumn)

        outputTable = {}
        for elN in range(0, len(headerColumn)):
            outputTable[elN] = {}
            outputTable[elN]['id'] = headerColumn[elN]

        transposedProcessedResults = [list(x) for x in zip(*processedResults)]

        # second question sumSecondgSuite
        # first question numSecondgSuite
        # fifth question numSecondgSuitePercentage
        for i in range(0, len(transposedProcessedResults)):
            outputTable[i]['sumSecondgSuite'] = sum(
                transposedProcessedResults[i])
            if not 'numSecondgSuite' in outputTable[i]:
                outputTable[i]['numSecondgSuite'] = 0
            for j in range(0, len(transposedProcessedResults[i])):
                if transposedProcessedResults[i][j] >= 1:
                    outputTable[i]['numSecondgSuite'] += 1
                else:
                    outputTable[i]['numSecondgSuite'] += 0
            outputTable[i]['numSecondgSuitePercentage'] = float(
                outputTable[i]['numSecondgSuite']) / float(
                    targetGSuite.numTracks()) * 100

        from gold.statistic.CountSegmentStat import CountSegmentStat
        from gold.statistic.CountPointStat import CountPointStat
        from gold.description.TrackInfo import TrackInfo
        from gold.statistic.CountStat import CountStat

        # third question numPairBpSecondgSuite
        # fourth question numFreqBpSecondgSuite
        i = 0
        for refTrack in refGSuite.allTracks():
            formatName = TrackInfo(refTrack.genome,
                                   refTrack.trackName).trackFormatName
            analysisDef = CountStat
            analysisBins = GalaxyInterface._getUserBinSource(
                regSpec, binSpec, refTrack.genome)
            results = doAnalysis(AnalysisSpec(analysisDef), analysisBins,
                                 [PlainTrack(refTrack.trackName)])
            resultDict = results.getGlobalResult()
            if len(resultDict) == 0:
                outputTable[i]['numPairBpSecondgSuite'] = None
                outputTable[i]['numFreqBpSecondgSuite'] = None
                outputTable[i]['numFreqUniqueBpSecondgSuite'] = None
            else:
                outputTable[i]['numPairBpSecondgSuite'] = resultDict['Result']

                if outputTable[i]['numPairBpSecondgSuite'] != 0:
                    outputTable[i]['numFreqBpSecondgSuite'] = float(
                        outputTable[i]['sumSecondgSuite']) / float(
                            outputTable[i]['numPairBpSecondgSuite'])
                else:
                    outputTable[i]['numFreqBpSecondgSuite'] = None

                if outputTable[i]['sumSecondgSuite'] != 0:
                    outputTable[i]['numFreqUniqueBpSecondgSuite'] = float(
                        outputTable[i]['numPairBpSecondgSuite']) / float(
                            outputTable[i]['sumSecondgSuite'])
                else:
                    outputTable[i]['numFreqUniqueBpSecondgSuite'] = None

            i += 1

        # sortTable
        outputTableLine = []
        for key, item in outputTable.iteritems():
            line = [
                item['id'], item['numSecondgSuite'], item['sumSecondgSuite'],
                item['numPairBpSecondgSuite'], item['numFreqBpSecondgSuite'],
                item['numFreqUniqueBpSecondgSuite'],
                item['numSecondgSuitePercentage']
            ]
            outputTableLine.append(line)

        import operator
        outputTableLineSort = sorted(outputTableLine,
                                     key=operator.itemgetter(1),
                                     reverse=True)

        tableHeader = [
            'Region ID ', 'Number of cases with at least one event ',
            'Total number of events', 'Genome coverage (unique bp)',
            'Number of events per unique bp', 'Number of unique bp per event',
            'Percentage of cases with at least one event'
        ]
        htmlCore = HtmlCore()

        htmlCore.begin()

        htmlCore.line(
            "<b>Identification of genomic elements with high event recurrence</b> "
        )

        htmlCore.header(title)
        htmlCore.divBegin('resultsDiv')
        htmlCore.tableHeader(tableHeader,
                             sortable=True,
                             tableId='resultsTable')

        for line in outputTableLineSort:
            htmlCore.tableLine(line)

        plotRes = []
        plotXAxis = []
        for lineInx in range(1, len(outputTableLineSort[0])):
            plotResPart = []
            plotXAxisPart = []
            for lineInxO in range(0, len(outputTableLineSort)):
                # if outputTableLineSort[lineInxO][lineInx]!=0 and
                # if outputTableLineSort[lineInxO][lineInx]!=None:
                plotResPart.append(outputTableLineSort[lineInxO][lineInx])
                plotXAxisPart.append(outputTableLineSort[lineInxO][0])
            plotRes.append(plotResPart)
            plotXAxis.append(plotXAxisPart)

        htmlCore.tableFooter()
        htmlCore.divEnd()

        htmlCore.divBegin('plot', style='padding-top:20px;margin-top:20px;')

        vg = visualizationGraphs()
        res = vg.drawColumnCharts(
            plotRes,
            titleText=tableHeader[1:],
            categories=plotXAxis,
            height=500,
            xAxisRotation=270,
            xAxisTitle='Ragion ID',
            yAxisTitle='Number of cases with at least one event',
            marginTop=30,
            addTable=True,
            sortableAccordingToTable=True,
            legend=False)
        htmlCore.line(res)
        htmlCore.divEnd()

        htmlCore.hideToggle(styleClass='debug')
        htmlCore.end()

        print htmlCore
예제 #20
0
    def getRunDescription(genome, trackNames, analysisDef, ubSource,
                          revEngBatchLine, urlForTrackAutoSelection, **kwArgs):
        # genome = ubSource.genome
        assert len(trackNames) == 3
        core = HtmlCore()

        analysis = Analysis(analysisDef, genome, trackNames[0], trackNames[1],
                            **kwArgs)

        core.header('GENOME')
        core.append(GenomeInfo(genome).mainInfo(printEmpty=False))
        core.divider()

        formatChoices = analysis.getFormatConverterChoicesAsText().items()
        tr1FormatChoice, tr2FormatChoice = formatChoices if len(
            formatChoices) == 2 else (None, None)

        first = True
        for tn,label,formatChoice in zip(trackNames, \
                                         ['TRACK 1', 'TRACK 2', 'INTENSITY TRACK'], \
                                         [tr1FormatChoice, tr2FormatChoice, None]):
            if tn in [None, []]:
                continue

            if not first:
                core.divider()

            core.header(label)
            trackInfo = TrackInfo(genome, tn)
            trackText = ''
            if ExternalTrackManager.isHistoryTrack(tn):
                assert len(
                    tn) >= 4, 'Length of external track name < 4: %s' % str(tn)
                core.descriptionLine(
                    'Name',
                    ExternalTrackManager.extractNameFromHistoryTN(tn) +
                    ' (from history)' + os.linesep)
            else:
                core.descriptionLine('Name', ':'.join(tn) + os.linesep)
            core.append(trackInfo.mainInfo(printEmpty=False))

            if formatChoice is not None:
                core.descriptionLine('Treated as', formatChoice[1])

            first = False

        core.divider()
        core.header('ANALYSIS')
        core.paragraph(''.join(str(analysis).split(':')[1:]))

        first = True
        for label, choice in analysis.getInterfaceChoicesAsText().items():
            if first:
                core.divider()
                core.header('OPTIONS')

            core.descriptionLine(label, choice)
            first = False

        h0 = analysis.getH0()
        if h0 is not None:
            core.divider()
            core.header('NULL HYPOTHESIS')
            core.paragraph(h0)

        h1 = analysis.getH1()
        if h1 is not None:
            core.divider()
            core.header('ALTERNATIVE HYPOTHESIS')
            core.paragraph(h1)

        core.divider()
        core.header('ANALYSIS REGIONS')
        if hasattr(ubSource, 'description'):
            core.paragraph(ubSource.description)

        core.divider()
        core.header('SOLUTION')

        statClass = analysis.getStat()
        #One alternative is to put getDescription in MagicStatFactory-hierarchy as class-method, and get real class behind partial-object.
        #if isinstance(statClass, functools.partial):
        #statClass = statClass.func
        #core.paragraph( statClass.getDescription() )

        #Chosen alternative is to Instantiate an object, which will automatically give object of real class..
        #and then use the following two lines, which will get class in Statistic-hierarchy instead of MagicStatFactory-hierarchy ..
        try:
            reg = ubSource.__iter__().next()
        except:
            core.paragraph(
                'Solution not relevant, as there are no specified analysis regions..'
            )
        else:
            track1, track2 = analysis.getTracks()
            if statClass is None:
                core.paragraph(
                    'Solution not available, due to currently invalid analysis'
                )
                logMessage('Solution not available, with params: ' +
                           str([trackNames[0], trackNames[1], analysisDef]),
                           level=logging.WARN)
            else:
                statObj = statClass(reg, track1, track2)
                statDescr = statObj.getDescription()
                replPat = '<a href=' + os.sep.join(
                    [STATIC_REL_PATH, 'notes', 'stats', '']) + r'\1>note</a>'
                statDescr = re.sub('<note>(.*)</note>', replPat, statDescr)

                core.paragraph(statDescr)

        core.divider()
        core.header('TIME OF ANALYSIS')
        core.paragraph('Analysis initiated at time: ' +
                       str(datetime.datetime.now()))

        if urlForTrackAutoSelection not in [None, '']:
            core.divider()
            core.header('URL FOR TRACK AUTOSELECTION')
            #urlOptions = '&'.join(['track1=' + quote(':'.join(trackName1)), 'track2=' + quote(':'.join(trackName2))])
            #core.paragraph(URL_PREFIX + '/hyper?' + urlOptions)
            core.styleInfoBegin(styleClass='break-word')
            core.paragraph(urlForTrackAutoSelection)
            core.styleInfoEnd()

        if revEngBatchLine not in [None, '']:
            core.divider()
            core.header('CORRESPONDING BATCH COMMAND LINE')
            #if any(ExternalTrackManager.isRedirectOrExternalTrack(tn) for tn in [trackName1, trackName2]):
            #core.paragraph('Batch-run line not available with tracks from history')
            #else:
            core.styleInfoBegin(styleClass='break-word')
            core.paragraph(revEngBatchLine)
            core.styleInfoEnd()

        core.divider()
        core.header('REFERENCES')
        core.paragraph(
            'The HyperBrowser system is described in:<br>"Sandve et al., <a href="http://genomebiology.com/2010/11/12/R121/">The Genomic HyperBrowser: inferential genomics at the sequence level</a>, Genome Biol. 2010;11(12):R121'
        )
        from gold.statistic.RandomizationManagerStat import RandomizationManagerStat
        if statClass is not None and RandomizationManagerStat.getMcSamplingScheme(
                statClass.keywords) == 'MCFDR':
            core.paragraph('The p-values of this analysis were computed using the MCFDR scheme for Monte Carlo based p-value computation'+\
                           ', described in:<br>Sandve et al., <a href="http://bioinformatics.oxfordjournals.org/content/early/2011/10/13/bioinformatics.btr568.long">Sequential Monte Carlo multiple testing</a>, Bioinformatics 2011')

#        description = \
#'''
#Run descriptions will be introduced in the next version of HB. <br>
#Below is an example run description, which is a static text unconnected to your choices. The purpose is to get feedback from you on what this should look like:<br>
#Track1 (refseg:genes): Unmarked points (converted from unmarked segments, taking midpoints)<br>
#Track2 (DNA melting:meltmap): Function<br>
#Bins: Chr1, divided into bins of 10 megabases<br>
#Question: Are track1-points occurring with different frequency inside track2-segment than outside?<br>
#Analysis:<br>
#The main result is a p-value resulting from a statistical test connected to the question.<br>
#The null-hypothesis assumes that the track1-points are randomly distributed according to a poisson-distribution, with the same number of points as in the original data. Track2-segment are assumed fixed as they are in the original data. This can be answered by a binomial test. The alternative hypothesis is then that the count of points inside segments has resulted from a different distribution of points, where the points are then either distributed more or less inside segments versus outside. See the note on this question in the user guide for further info.<br>
#'''
        return str(core)
    def presentResults(self):
        """
        :return: Returns html core object
        """
        core = HtmlCore()
        core.begin()
        core.header("Results")
        core.divBegin(divClass='resultsExplanation')
        core.paragraph('''
            The table summarizes the results for each transcription factor and PWM that was analysed. Click on a row for details.
            ''')
        core.divEnd()

        core._str += """
         <script type="text/javascript" src="https://code.jquery.com/jquery-2.1.4.min.js"></script>
         <script>

        jQuery(document).ready(function() {
          jQuery(".content").hide();
          //toggle the componenet with class msg_body
          jQuery(".heading").click(function()
          {
            jQuery(this).next(".content").slideToggle(1);
          });
        });
        </script>
        """

        core._str += "<table class='colored bordered'>"
        #columns = ["Transcription factor", ""]
        #core.tableFromDictionary(rows, columns)
        core._str += """
            <tr>
                <th class='header'>Transcription factor</th>
                <!--<th>Peak data</th>-->
                <th class='header'>Mofif</th>
                <th class='header'>Number of peaks</th>
                <th class='header'>Number of peaks with SNP(s)</th>
                <th class='header'>Number of changed bindings</th>
                <!--<th class='header'>Binding after mutation</th>-->
            </tr>
        """

        for tf in transcription_factors:
            # First print some summary information for this TF
            name = tf.name
            core._str += "<tr class='heading' style='cursor: pointer;'>"
            core._str += "<td>" + name + "</td>"
            #core._str +=  "<td>" + ''.join(tf.name) + "</td>"
            core._str += "<td>" + tf.motif.name + "</td>"
            core._str += "<td>%d</td>" % (len(tf.peaks))
            core._str += "<td>%d</td>" % (len(
                [p for p in tf.peaks if p.hasSnps]))

            subtable = ""
            subtable += "<tr class='content'><td colspan='7'>"
            subtable += "<br><h4 style='margin-left: 20px;'>Peaks that intersect with one or more SNPs</h4>"

            subtable += "<table border='1' cellpadding='5' style='margin-left: 20px;'>"
            subtable += """
                <tr>
                    <th>Position</th>
                    <th>Best binding before mutation</th>
                    <th>Best binding after mutation</th>
                </tr>
            """

            rows_important = []
            rows = []

            n_gain_loss = 0
            for peak in tf.peaks:

                important = False

                if len(peak.tf.motif) > len(peak.sequence):
                    continue  # Ignore motifs longer than peak sequence (will only occur on test sets
                row = ""
                row += "<tr>"
                row += "<td>%s %d:%d</td>" % (peak.chr, peak.start, peak.end)
                #print "<td>%s</td>" % (''.join(peak.sequence))
                p = peak.bestBindingPositionBeforeMutation
                #print "Sequence: " + str(peak.sequence[p - peak.start : p - peak.start + len(peak.tf.motif)])
                row += "<td>On pos %d with score %.10f<br>%s</td>" % (
                    p, peak.bestBindingScoreBeforeMutation,
                    prettySequence(
                        peak, peak.sequence[p - peak.start:p - peak.start +
                                            len(peak.tf.motif)], p))

                # Only present binding after if there was a mutation either within the old binding or within a new binding
                """
                if peak.hasSnpBetween(peak.bestBindingPositionAfterMutation, peak.bestBindingPositionAfterMutation + len(peak.tf.motif))  or \
                   peak.hasSnpBetween(peak.bestBindingPositionAfterMutation, peak.bestBindingPositionAfterMutation + len(peak.tf.motif)):
                    subtable +=  "<td>On pos %d with score %.10f<br>%s<br>Binding sequence: %s</td>" % (peak.bestBindingPositionAfterMutation, peak.bestBindingScoreAfterMutation, \
                                                                               prettySequence(peak, peak.bindingSequenceAfterMutations, peak.bestBindingPositionAfterMutation),\
                                                                                ''.join(peak.bindingSequenceAfterMutations))
                """

                if peak.hasSnpBetween(peak.start, peak.end):
                    if peak.bestBindingScoreAfterMutation != peak.bestBindingScoreBeforeMutation:
                        n_gain_loss += 1
                        row += "<td><font color='darkgreen'>"
                    else:
                        row += "<td><font>"

                    row +=  "On position %d with score %.10f<br>%s<br>Binding sequence: %s</font></td>" % (peak.bestBindingPositionAfterMutation, peak.bestBindingScoreAfterMutation, \
                                                                               prettySequence(peak, peak.bindingSequenceAfterMutations, peak.bestBindingPositionAfterMutation),\
                                                                                    ''.join(peak.bindingSequenceAfterMutations))
                    important = True
                else:
                    row += "<td><font color='#666666'>No change (no point mutations)</font></td>"

                row += "</tr>"

                if important:
                    rows_important.append(row)
                else:
                    rows.append(row)

            subtable += ''.join(rows_important)

            #subtable += ''.join(rows)

            subtable += "</table><br><br>"
            subtable += "</td></tr>"

            if n_gain_loss > 0:
                core._str += "<td><b>%d</b></td>" % n_gain_loss
            else:
                core._str += "<td>%d</td>" % n_gain_loss
            core._str += "</tr>"
            core._str += subtable

        core._str += "</table>"

        return core
예제 #22
0
    def execute(cls, choices, galaxyFn=None, username=''):
        '''
        Is called when execute-button is pushed by web-user. Should print
        output as HTML to standard out, which will be directed to a results
        page in Galaxy history. If getOutputFormat is anything else than HTML,
        the output should be written to the file with path galaxyFn. If needed,
        StaticFile can be used to get a path where additional files can be put
        (e.g. generated image files). choices is a list of selections made by
        web-user in each options box.
        '''

        cls._setDebugModeIfSelected(choices)

        # First compute pvalue by running the statistic through a wrapper stat that computes the max per bin
        """
        from quick.statistic.RandomizationManagerV3Stat import RandomizationManagerV3Stat
        from quick.statistic.CollectionBinnedHypothesisWrapperStat import CollectionBinnedHypothesisWrapperStat
        analysisSpec = AnalysisSpec(CollectionBinnedHypothesisWrapperStat)
        analysisSpec.addParameter("rawStatistic", "GenericMaxBinValueStat")
        analysisSpec.addParameter('perBinStatistic', 'SummarizedStat')
        analysisSpec.addParameter('mcSamplerClass', 'NaiveMCSamplingV2Stat')
        analysisSpec.addParameter('pairwiseStatistic', 'ProportionCountStat')
        analysisSpec.addParameter('summaryFunc', choices.summaryFunc)
        analysisSpec.addParameter('evaluatorFunc','evaluatePvalueAndNullDistribution')
        analysisSpec.addParameter('tail', 'right-tail')
        analysisSpec.addParameter('assumptions', 'RandomGenomeLocationTrack')
        analysisSpec.addParameter('maxSamples', 10)

        gsuite = getGSuiteFromGalaxyTN(choices.gsuite)
        tracks = [Track(x.trackName) for x in gsuite.allTracks()]

        regSpec, binSpec = cls.getRegsAndBinsSpec(choices)

        analysisBins = GalaxyInterface._getUserBinSource(regSpec,
                                                         binSpec,
                                                         choices.genome)
        results = doAnalysis(analysisSpec, analysisBins, tracks)

        print "<p>Max stat results:</p>"

        print results.getGlobalResult()
        """
        # Stat question 4
        summaryFunc = choices.summaryFunc if choices.summaryFunc else cls.SUMMARY_FUNC_DEFAULT
        statTxt = "Average"
        if (summaryFunc == "max"): statTxt = "Maximum"

        statDesc = 'number of <b>segments</b> per base'
        if choices.analysisName == cls.Q2:
            statDesc = 'number of <b>base pairs covered by segments</b>'

        core = HtmlCore()
        core.begin()
        core.header("Enrichment of GSuite tracks across regions")
        core.divBegin(divClass='resultsExplanation')
        core.paragraph(
            'The following is a list of all regions (bins) and the <b>' +
            statTxt.lower() + '</b> ' + statDesc +
            ' across the tracks within each region.')
        core.divEnd()

        if choices.analysisName == cls.Q3:

            # Compute p-value per bin
            analysisSpec = AnalysisSpec(GSuiteBinEnrichmentPValWrapperStat)
            analysisSpec.addParameter('rawStatistic', 'BinSizeStat')
            #analysisSpec.addParameter('pairwiseStatistic', 'ProportionElementCountStat')
            #analysisSpec.addParameter('pairwiseStatistic', 'ProportionElementCountStat')
            #analysisSpec.addParameter('summaryFunc', summaryFunc)
            gsuite = getGSuiteFromGalaxyTN(choices.gsuite)
            tracks = [Track(x.trackName) for x in gsuite.allTracks()]
            regSpec, binSpec = cls.getRegsAndBinsSpec(choices)
            from quick.statistic.GenericRelativeToGlobalStat import GenericRelativeToGlobalStatUnsplittable
            #analysisSpec.addParameter("globalSource", GenericRelativeToGlobalStatUnsplittable.getGlobalSource('test', choices.genome, False))
            analysisSpec.addParameter("globalSource", 'userbins')
            analysisBins = GalaxyInterface._getUserBinSource(
                regSpec, binSpec, choices.genome)
            results_pval = doAnalysis(analysisSpec, analysisBins, tracks)

        #print results_pval

        analysisSpec = AnalysisSpec(SummarizedWrapperStat)
        analysisSpec.addParameter('rawStatistic', 'SummarizedWrapperStat')

        countStat = 'ProportionElementCountStat'
        if choices.analysisName == cls.Q2:
            countStat = 'ProportionCountStat'

        # analysisSpec.addParameter('pairwiseStatistic', 'ProportionCountStat')
        analysisSpec.addParameter('pairwiseStatistic', countStat)
        analysisSpec.addParameter('summaryFunc', summaryFunc)
        gsuite = getGSuiteFromGalaxyTN(choices.gsuite)
        tracks = [Track(x.trackName) for x in gsuite.allTracks()]

        regSpec, binSpec = cls.getRegsAndBinsSpec(choices)
        analysisBins = GalaxyInterface._getUserBinSource(
            regSpec, binSpec, choices.genome)
        results = doAnalysis(analysisSpec, analysisBins, tracks)

        prettyResults = {}
        #print results

        for key, val in results.iteritems():
            if "Result" in val.keys():

                if choices.analysisName == cls.Q3:
                    prettyResults[key] = (val["Result"],
                                          results_pval[key]["Result"])
                else:
                    prettyResults[key] = (val["Result"])
            else:
                prettyResults[key] = "No result"

        topTrackTitle = results.keys()[0]
        """
        core.paragraph('''
            Suite data is coinciding the most in bin %s
        ''' % ('test'))
        """

        columnNames = ['Bin', 'Representation within the bin']
        if choices.analysisName == cls.Q3:
            columnNames.append('p-value')

        core.divBegin()
        if choices.analysisName == cls.Q1:
            shortQuestion = cls.Q1_SHORT
        elif choices.analysisName == cls.Q2:
            shortQuestion = cls.Q2_SHORT
        else:  # Q3
            shortQuestion = cls.Q3_SHORT

        visibleRows = 20
        makeTableExpandable = len(prettyResults) > visibleRows

        addTableWithTabularAndGsuiteImportButtons(
            core,
            choices,
            galaxyFn,
            shortQuestion,
            tableDict=prettyResults,
            columnNames=columnNames,
            sortable=True,
            presorted=0,
            expandable=makeTableExpandable)

        core.divEnd()
        core.end()

        print str(core)
                )['no overlapping elements'] and sortedGeSourceHasOverlappingRegions(
                        gtrackSource):
                    raise InvalidFormatError(
                        "Error: genome elements are overlapping while header variable 'no overlapping elements' is True."
                    )

                core.append('Done')
                valid = True
        except Exception, e:
            core.append(str(e))
            valid = False

        core.styleInfoEnd()

        core.divider()
        core.header('Conclusion:')
        core.styleInfoBegin(
            styleClass='donemessage' if valid else 'errormessage')
        core.highlight('The GTrack file has %s syntax' %
                       ('valid' if valid else 'invalid'))
        core.styleInfoEnd()

        core.end()
        print str(core)

    @staticmethod
    def validateAndReturnErrors(choices):
        '''
        Should validate the selected input parameters. If the parameters are not valid,
        an error text explaining the problem should be returned. The GUI then shows this text
        to the user (if not empty) and greys out the execute button (even if the text is empty).
    def execute(cls, choices, galaxyFn=None, username=''):
        '''
        Is called when execute-button is pushed by web-user. Should print
        output as HTML to standard out, which will be directed to a results
        page in Galaxy history. If getOutputFormat is anything else than HTML,
        the output should be written to the file with path galaxyFn. If needed,
        StaticFile can be used to get a path where additional files can be put
        (e.g. generated image files). choices is a list of selections made by
        web-user in each options box.
        '''

        import warnings
        #warnings.simplefilter('error')

        cls._setDebugModeIfSelected(choices)

        similarityStatClassName = choices.similarityFunc if choices.similarityFunc else GSuiteStatUtils.T5_RATIO_OF_OBSERVED_TO_EXPECTED_OVERLAP

        summaryFunc = choices.summaryFunc if choices.summaryFunc else cls.SUMMARY_FUNC_DEFAULT

        pairwiseStatName = GSuiteStatUtils.PAIRWISE_STAT_LABEL_TO_CLASS_MAPPING[similarityStatClassName]

        gsuite = getGSuiteFromGalaxyTN(choices.gsuite)
        tracks = [Track(x.trackName) for x in gsuite.allTracks()]

        statTxt = "Average"
        if(summaryFunc == "max"): statTxt = "Maximum"


        if choices.analysisName == cls.Q2:


            mcfdrDepth = choices.mcfdrDepth if choices.mcfdrDepth else AnalysisDefHandler(REPLACE_TEMPLATES['$MCFDR$']).getOptionsAsText().values()[0][0]

            # First compute pvalue by running the statistic through a wrapper stat that computes the max per bin
            #from quick.statistic.CollectionBinnedHypothesisWrapperStat import CollectionBinnedHypothesisWrapperStat
            #analysisSpec = AnalysisSpec(CollectionBinnedHypothesisWrapperStat)

            analysisDefString = REPLACE_TEMPLATES['$MCFDRv3$'] + ' -> CollectionBinnedHypothesisWrapperStat'
            analysisSpec = AnalysisDefHandler(analysisDefString)
            analysisSpec.setChoice('MCFDR sampling depth', mcfdrDepth)

            analysisSpec.addParameter("rawStatistic", "GenericMaxBinValueStat")
            # analysisSpec.addParameter('perBinStatistic', 'SummarizedStat')
            analysisSpec.addParameter('perBinStatistic', 'MultitrackSummarizedInteractionV2Stat')
            # analysisSpec.addParameter('mcSamplerClass', 'NaiveMCSamplingV2Stat')
            analysisSpec.addParameter('pairwiseStatistic', 'ObservedVsExpectedStat')
            analysisSpec.addParameter('summaryFunc', summaryFunc)
            # analysisSpec.addParameter('evaluatorFunc','evaluatePvalueAndNullDistribution')
            analysisSpec.addParameter('tail', 'right-tail')
            analysisSpec.addParameter('assumptions', 'RandomGenomeLocationTrack')
            #analysisSpec.addParameter('maxSamples', 10)
            analysisSpec.addParameter('multitrackSummaryFunc', summaryFunc)




            regSpec, binSpec = cls.getRegsAndBinsSpec(choices)

            analysisBins = GalaxyInterface._getUserBinSource(regSpec,
                                                             binSpec,
                                                             choices.genome)

            results = doAnalysis(analysisSpec, analysisBins, tracks)
            results = results.getGlobalResult()
            resultsTxt = "The highest ranking bin based on the " + statTxt.lower() + " of the Forbes similarity measure for pairs of tracks within each bin had a score of <b>%.3f</b> with p-value <b>%.6f</b>" % (results["TSMC_GenericMaxBinValueStat"], results['P-value'])



        # Stat question 7
        core = HtmlCore()
        core.begin()
        analysisSpec = AnalysisSpec(MultitrackSummarizedInteractionWrapperStat)
        #analysisSpec.addParameter('pairwiseStatistic', 'ObservedVsExpectedStat')
        analysisSpec.addParameter('pairwiseStatistic', GSuiteStatUtils.PAIRWISE_STAT_LABEL_TO_CLASS_MAPPING[similarityStatClassName])
        analysisSpec.addParameter('summaryFunc', summaryFunc)
        analysisSpec.addParameter('multitrackSummaryFunc', summaryFunc)
        gsuite = getGSuiteFromGalaxyTN(choices.gsuite)
        tracks = [Track(x.trackName) for x in gsuite.allTracks()]

        regSpec, binSpec = cls.getRegsAndBinsSpec(choices)

        analysisBins = GalaxyInterface._getUserBinSource(regSpec,
                                                         binSpec,
                                                         choices.genome)
        results = doAnalysis(analysisSpec, analysisBins, tracks)
        #print '<br>results: ', results, '<br><br>'





        prettyResults = OrderedDict()
        for key, val in results.iteritems():
            if "Result" in val.keys():
                prettyResults[key] = val["Result"]
            else:
                prettyResults[key] = "No result"

        core.header(statTxt + " co-occurence between pairs of tracks within each bin")

        if choices.analysisName == cls.Q2:
            core.paragraph(resultsTxt)

        core.divBegin(divClass='resultsExplanation')
        core.paragraph('The following is a list of all bins and the <b>' + statTxt.lower() + '</b> co-occurrence of tracks within each bin.')
        core.divEnd()


        """
        core.paragraph('''
            Suite data is coinciding the most in bin %s
        ''' % ('test'))
        """

        visibleRows = 20
        makeTableExpandable = len(prettyResults) > visibleRows
        columnNames = ['Bin', 'Co-occurrence within the bin']
        if choices.analysisName == cls.Q1:
            shortQuestion = cls.Q1_SHORT
        else:
            shortQuestion = cls.Q2_SHORT

        addTableWithTabularAndGsuiteImportButtons(
            core, choices, galaxyFn, shortQuestion, tableDict=prettyResults,
            columnNames=columnNames, sortable=True, presorted=0,
            expandable=makeTableExpandable, visibleRows=visibleRows)

        core.divEnd()
        core.end()

        print str(core)
예제 #25
0
    def execute(cls, choices, galaxyFn=None, username=''):
        '''
        Is called when execute-button is pushed by web-user. Should print
        output as HTML to standard out, which will be directed to a results page
        in Galaxy history. If getOutputFormat is anything else than HTML, the
        output should be written to the file with path galaxyFn. If needed,
        StaticFile can be used to get a path where additional files can be put
        (e.g. generated image files). choices is a list of selections made by
        web-user in each options box.
        '''
        DebugMixin._setDebugModeIfSelected(choices)
        genome = choices.genome
        gSuite = getGSuiteFromGalaxyTN(choices.gsuite)
        #         fullCategory = AnalysisManager.combineMainAndSubCategories(choices.analysisCategory, 'Basic')
        fullCategory = AnalysisManager.combineMainAndSubCategories(
            'Descriptive statistics', 'Basic')
        tracks = list(gSuite.allTracks())
        analysisName = choices.analysis
        # selectedAnalysis = GSuiteSingleValueAnalysisPerTrackTool \
        #     ._resolveAnalysisFromName(gSuite.genome, fullCategory, tracks[0].trackName, analysisName)

        selectedAnalysis = cls.ANALYSIS_PRETTY_NAME_TO_ANALYSIS_SPEC_MAPPING[
            choices.analysis]

        regSpec, binSpec = UserBinMixin.getRegsAndBinsSpec(choices)
        analysisBins = GalaxyInterface._getUserBinSource(regSpec,
                                                         binSpec,
                                                         genome=genome)
        # paramName, paramValues = selectedAnalysis.getFirstOptionKeyAndValues()
        # if paramName and paramValues:
        #     if len(paramValues) == 1:
        #         selectedAnalysis.addParameter(paramName, paramValues[0])
        #     else:
        #         selectedAnalysis.addParameter(paramName, choices.paramOne)

        tableDict = OrderedDict()

        for track in tracks:
            tableDict[track.title] = OrderedDict()
            result = doAnalysis(selectedAnalysis, analysisBins, [track])
            resultDict = result.getGlobalResult()
            if 'Result' in resultDict:
                track.setAttribute(analysisName.lower(),
                                   str(resultDict['Result']))
                tableDict[
                    track.title][analysisName] = strWithNatLangFormatting(
                        resultDict['Result'])
            else:
                for attrName, attrVal in resultDict.iteritems():
                    attrNameExtended = analysisName + ':' + attrName
                    track.setAttribute(attrNameExtended.lower(), str(attrVal))
                    tableDict[track.title][
                        attrNameExtended] = strWithNatLangFormatting(attrVal)
                    # assert isinstance(resultDict['Result'], (int, basestring, float)), type(resultDict['Result'])

        core = HtmlCore()
        core.begin()
        core.header('Results: ' + analysisName)

        def _produceTable(core, tableDict=None, tableId=None):
            return core.tableFromDictOfDicts(tableDict,
                                             firstColName='Track title',
                                             tableId=tableId,
                                             expandable=True,
                                             visibleRows=20,
                                             presorted=0)

        tableId = 'results_table'
        tableFile = GalaxyRunSpecificFile([tableId, 'table.tsv'], galaxyFn)
        tabularHistElementName = 'Raw results: ' + analysisName

        gsuiteFile = GalaxyRunSpecificFile(
            [tableId, 'input_with_results.gsuite'], galaxyFn)
        GSuiteComposer.composeToFile(gSuite, gsuiteFile.getDiskPath())
        gsuiteHistElementName = \
            getGSuiteHistoryOutputName('result', ', ' + analysisName, choices.gsuite)

        core.tableWithImportButtons(
            tabularFile=True,
            tabularFn=tableFile.getDiskPath(),
            tabularHistElementName=tabularHistElementName,
            gsuiteFile=True,
            gsuiteFn=gsuiteFile.getDiskPath(),
            gsuiteHistElementName=gsuiteHistElementName,
            produceTableCallbackFunc=_produceTable,
            tableDict=tableDict,
            tableId=tableId)
        core.end()
        print core
예제 #26
0
    def execute(cls, choices, galaxyFn=None, username=''):
        '''
        Is called when execute-button is pushed by web-user. Should print
        output as HTML to standard out, which will be directed to a results page
        in Galaxy history. If getOutputFormat is anything else than HTML, the
        output should be written to the file with path galaxyFn. If needed,
        StaticFile can be used to get a path where additional files can be put
        (e.g. generated image files). choices is a list of selections made by
        web-user in each options box.
        '''
        import numpy
        numpy.seterr(all='raise')
        cls._setDebugModeIfSelected(choices)
        # DebugUtil.insertBreakPoint(username=username, currentUser='******')
        genome = choices.genome
        analysisQuestion = choices.analysisName
        similaryStatClassName = choices.similarityFunc if choices.similarityFunc else GSuiteStatUtils.T5_RATIO_OF_OBSERVED_TO_EXPECTED_OVERLAP
        summaryFunc = choices.summaryFunc if choices.summaryFunc else 'average'
        reverse = 'Yes' if choices.reversed else 'No'

        gsuite = getGSuiteFromGalaxyTN(choices.gsuite)
        regSpec, binSpec = UserBinMixin.getRegsAndBinsSpec(choices)
        analysisBins = GalaxyInterface._getUserBinSource(regSpec,
                                                         binSpec,
                                                         genome=genome)
        tracks = [
            Track(x.trackName, trackTitle=x.title) for x in gsuite.allTracks()
        ]
        trackTitles = CommonConstants.TRACK_TITLES_SEPARATOR.join(
            [quote(x.title, safe='') for x in gsuite.allTracks()])

        additionalResultsDict = OrderedDict()
        additionalAttributesDict = OrderedDict()
        if analysisQuestion in [cls.Q1, cls.Q2, cls.Q3]:
            additionalAttributesDict = cls.getSelectedAttributesForEachTrackDict(
                choices.additionalAttributes, gsuite)
            #additional analysis
            stats = [CountStat, CountElementStat]
            additionalResultsDict = runMultipleSingleValStatsOnTracks(
                gsuite, stats, analysisBins, queryTrack=None)

        if analysisQuestion == cls.Q1:
            analysisSpec = AnalysisSpec(
                GSuiteRepresentativenessOfTracksRankingsWrapperStat)
            analysisSpec.addParameter(
                'pairwiseStatistic', GSuiteStatUtils.
                PAIRWISE_STAT_LABEL_TO_CLASS_MAPPING[similaryStatClassName])
            analysisSpec.addParameter(
                'summaryFunc',
                GSuiteStatUtils.SUMMARY_FUNCTIONS_MAPPER[summaryFunc])
            analysisSpec.addParameter('reverse', reverse)
            analysisSpec.addParameter('ascending', 'No')
            analysisSpec.addParameter('trackTitles', trackTitles)
            analysisSpec.addParameter('queryTracksNum', len(tracks))
            results = doAnalysis(analysisSpec, analysisBins,
                                 tracks).getGlobalResult()

            gsPerTrackResultsModel = GSuitePerTrackResultModel(
                results,
                ['Similarity to rest of tracks in suite (%s)' % summaryFunc],
                additionalResultsDict=additionalResultsDict,
                additionalAttributesDict=additionalAttributesDict)
            if choices.leadAttribute and choices.leadAttribute != GSuiteConstants.TITLE_COL:
                columnTitles, decoratedResultsDict = \
                    gsPerTrackResultsModel.generateColumnTitlesAndResultsDict(choices.leadAttribute)
            else:
                columnTitles, decoratedResultsDict = \
                    gsPerTrackResultsModel.generateColumnTitlesAndResultsDict()

            core = HtmlCore()
            core.begin()
            core.divBegin(divId='results-page')
            core.divBegin(divClass='results-section')
            core.header(analysisQuestion)
            topTrackTitle = results.keys()[0]
            core.paragraph('''
                The track "%s" is the most representative track of the GSuite with %s %s similarity to the rest of the tracks
                as measured by "%s" track similarity measure.
            ''' % (topTrackTitle, results[topTrackTitle], summaryFunc,
                   similaryStatClassName))

            addTableWithTabularAndGsuiteImportButtons(
                core,
                choices,
                galaxyFn,
                cls.Q1_SHORT,
                decoratedResultsDict,
                columnTitles,
                gsuite=gsuite,
                results=results,
                gsuiteAppendAttrs=['similarity_score'],
                sortable=True)

            # plot
            columnInd = 0
            if choices.leadAttribute and choices.leadAttribute != GSuiteConstants.TITLE_COL:
                columnInd = 1
            res = GSuiteTracksCoincidingWithQueryTrackTool.drawPlot(
                results,
                additionalResultsDict,
                'Similarity to rest of tracks in suite (%s)' % summaryFunc,
                columnInd=columnInd)
            core.line(res)
            core.divEnd()
            core.divEnd()
            core.end()

        # elif analysisQuestion == cls.Q2:
        #     analysisSpec = AnalysisSpec(GSuiteRepresentativenessOfTracksRankingsWrapperStat)
        #     analysisSpec.addParameter('pairwiseStatistic', GSuiteStatUtils.PAIRWISE_STAT_LABEL_TO_CLASS_MAPPING[similaryStatClassName])
        #     analysisSpec.addParameter('summaryFunc', GSuiteStatUtils.SUMMARY_FUNCTIONS_MAPPER[summaryFunc])
        #     analysisSpec.addParameter('reverse', reverse)
        #     analysisSpec.addParameter('ascending', 'Yes')
        #     analysisSpec.addParameter('trackTitles', trackTitles)
        #     results = doAnalysis(analysisSpec, analysisBins, tracks).getGlobalResult()
        #
        #     gsPerTrackResultsModel = GSuitePerTrackResultModel(
        #         results, ['Similarity to rest of tracks in suite (%s)' % summaryFunc],
        #         additionalResultsDict=additionalResultsDict,
        #         additionalAttributesDict=additionalAttributesDict)
        #     if choices.leadAttribute and choices.leadAttribute != GSuiteConstants.TITLE_COL:
        #         columnTitles, decoratedResultsDict = \
        #             gsPerTrackResultsModel.generateColumnTitlesAndResultsDict(choices.leadAttribute)
        #     else:
        #         columnTitles, decoratedResultsDict = \
        #             gsPerTrackResultsModel.generateColumnTitlesAndResultsDict()
        #
        #     core = HtmlCore()
        #     core.begin()
        #     core.divBegin(divId='results-page')
        #     core.divBegin(divClass='results-section')
        #     core.header(analysisQuestion)
        #     topTrackTitle = results.keys()[0]
        #     core.paragraph('''
        #         The track "%s" is the most atypical track of the GSuite with %s %s similarity to the rest of the tracks
        #         as measured by the "%s" track similarity measure.
        #     ''' % (topTrackTitle, strWithNatLangFormatting(results[topTrackTitle]), summaryFunc, similaryStatClassName))
        #     # core.tableFromDictionary(results, columnNames=['Track title', 'Similarity to rest of tracks in suite (' + summaryFunc+')'], sortable=False)
        #
        #     from quick.util import CommonFunctions
        #     rawDataURIList = CommonFunctions.getHyperlinksForRawTableData(
        #         dataDict=decoratedResultsDict, colNames=columnTitles,
        #         tableId="resultsTable", galaxyFn=galaxyFn)
        #     core.tableFromDictionary(decoratedResultsDict, columnNames=columnTitles, sortable=True,
        #                              tableId='resultsTable', addInstruction=True,
        #                              addRawDataSelectBox=True, rawDataURIList=rawDataURIList)
        #     # core.tableFromDictionary(decoratedResultsDict, columnNames=columnTitles, sortable=True, tableId='resultsTable')
        #
        #     columnInd = 0
        #     if choices.leadAttribute and choices.leadAttribute != GSuiteConstants.TITLE_COL:
        #         columnInd = 1
        #     res = GSuiteTracksCoincidingWithQueryTrackTool.drawPlot(
        #         results, additionalResultsDict,
        #         'Similarity to rest of tracks in suite (%s)' % summaryFunc,
        #         columnInd=columnInd)
        #     core.line(res)
        #     core.divEnd()
        #     core.divEnd()
        #     core.end()
        #
        #     if choices.addResults == 'Yes':
        #         GSuiteStatUtils.addResultsToInputGSuite(
        #             gsuite, results, ['Similarity_score'],
        #             cls.extraGalaxyFn[GSUITE_EXPANDED_WITH_RESULT_COLUMNS_FILENAME])
        elif analysisQuestion == cls.Q3:

            mcfdrDepth = choices.mcfdrDepth if choices.mcfdrDepth else \
            AnalysisDefHandler(REPLACE_TEMPLATES['$MCFDR$']).getOptionsAsText().values()[0][0]

            analysisDefString = REPLACE_TEMPLATES[
                '$MCFDRv3$'] + ' -> GSuiteRepresentativenessOfTracksRankingsAndPValuesWrapperStat'
            analysisSpec = AnalysisDefHandler(analysisDefString)
            analysisSpec.setChoice('MCFDR sampling depth', mcfdrDepth)
            analysisSpec.addParameter('assumptions',
                                      'PermutedSegsAndIntersegsTrack')
            analysisSpec.addParameter(
                'rawStatistic',
                SummarizedInteractionWithOtherTracksV2Stat.__name__)
            analysisSpec.addParameter(
                'pairwiseStatistic', GSuiteStatUtils.
                PAIRWISE_STAT_LABEL_TO_CLASS_MAPPING[similaryStatClassName])
            analysisSpec.addParameter(
                'summaryFunc',
                GSuiteStatUtils.SUMMARY_FUNCTIONS_MAPPER[summaryFunc])
            analysisSpec.addParameter('tail', 'right-tail')
            analysisSpec.addParameter('trackTitles', trackTitles)
            results = doAnalysis(analysisSpec, analysisBins,
                                 tracks).getGlobalResult()
            core = HtmlCore()

            gsPerTrackResultsModel = GSuitePerTrackResultModel(
                results, [
                    'Similarity to rest of tracks in suite (%s)' % summaryFunc,
                    'P-value'
                ],
                additionalResultsDict=additionalResultsDict,
                additionalAttributesDict=additionalAttributesDict)
            if choices.leadAttribute and choices.leadAttribute != GSuiteConstants.TITLE_COL:
                columnTitles, decoratedResultsDict = \
                    gsPerTrackResultsModel.generateColumnTitlesAndResultsDict(choices.leadAttribute)
            else:
                columnTitles, decoratedResultsDict = \
                    gsPerTrackResultsModel.generateColumnTitlesAndResultsDict()

            core.begin()
            core.divBegin(divId='results-page')
            core.divBegin(divClass='results-section')
            core.header(analysisQuestion)
            topTrackTitle = results.keys()[0]
            core.paragraph('''
                The track "%s" has the lowest P-value of %s corresponding to %s %s similarity to the rest of the tracks
                as measured by "%s" track similarity measure.
            ''' % (topTrackTitle,
                   strWithNatLangFormatting(results[topTrackTitle][1]),
                   strWithNatLangFormatting(results[topTrackTitle][0]),
                   summaryFunc, similaryStatClassName))
            # core.tableFromDictionary(results, columnNames=['Track title', 'Similarity to rest of tracks in suite (' + summaryFunc+')', 'P-value'], sortable=False)

            addTableWithTabularAndGsuiteImportButtons(
                core,
                choices,
                galaxyFn,
                cls.Q3_SHORT,
                decoratedResultsDict,
                columnTitles,
                gsuite=gsuite,
                results=results,
                gsuiteAppendAttrs=['similarity_score', 'p_value'],
                sortable=True)

            core.divEnd()
            core.divEnd()
            core.end()
        else:  # Q4
            mcfdrDepth = choices.mcfdrDepth if choices.mcfdrDepth else \
                AnalysisDefHandler(REPLACE_TEMPLATES['$MCFDR$']).getOptionsAsText().values()[0][0]
            analysisDefString = REPLACE_TEMPLATES[
                '$MCFDRv3$'] + ' -> CollectionSimilarityHypothesisWrapperStat'
            analysisSpec = AnalysisDefHandler(analysisDefString)
            analysisSpec.setChoice('MCFDR sampling depth', mcfdrDepth)
            analysisSpec.addParameter('assumptions',
                                      'PermutedSegsAndIntersegsTrack')
            analysisSpec.addParameter('rawStatistic',
                                      'MultitrackSummarizedInteractionV2Stat')
            analysisSpec.addParameter(
                'pairwiseStatistic', GSuiteStatUtils.
                PAIRWISE_STAT_LABEL_TO_CLASS_MAPPING[similaryStatClassName])
            analysisSpec.addParameter(
                'summaryFunc',
                GSuiteStatUtils.SUMMARY_FUNCTIONS_MAPPER[summaryFunc])
            analysisSpec.addParameter('multitrackSummaryFunc',
                                      'avg')  # should it be a choice?
            analysisSpec.addParameter('tail', 'right-tail')
            results = doAnalysis(analysisSpec, analysisBins,
                                 tracks).getGlobalResult()
            pval = results['P-value']
            observed = results['TSMC_MultitrackSummarizedInteractionV2Stat']
            significanceLevel = 'strong' if pval < 0.01 else (
                'weak' if pval < 0.05 else 'no')
            core = HtmlCore()
            core.begin()
            core.divBegin(divId='results-page')
            core.divBegin(divClass='results-section')
            core.header(analysisQuestion)
            core.paragraph('''
                The tracks in the suite show %s significance in their collective similarity
                (average similarity of a track to the rest) of %s
                and corresponding p-value of %s,
                as measured by "%s" track similarity measure.
            ''' % (significanceLevel, strWithNatLangFormatting(observed),
                   strWithNatLangFormatting(pval), similaryStatClassName))
            core.divEnd()
            core.divEnd()
            core.end()

        print str(core)
예제 #27
0
    def _buildHtml(self, done):
        htmlCore = HtmlCore()
        htmlCore.begin(reloadTime=RELOAD_TIME)
        htmlCore.divBegin(divId='progress')

        runningTimeStr = self._getRunningTimeStr()
        htmlCore.header(runningTimeStr)

        remainingTime, unknown = self._estimateRemainingTime()
        timeRemainingStr = self._getEstimatedTimeRemainingStr(
            remainingTime, unknown)

        if unknown:
            if remainingTime > 0:
                timeRemainingStr += '+'

        htmlCore.header(timeRemainingStr)

        nameCellColSpan = 4  #colspan for the first cell that displays the process name

        for progressObj in self._progressObjList:
            htmlCore.tableHeader([], tableClass='progress')
            htmlCore.tableRowBegin(rowClass='progressRow')
            htmlCore.tableCell(progressObj.name, colSpan=nameCellColSpan)

            #             for i in range(progressObj.status):
            #                 content = ''
            #                 if i == int(progressObj.elementCount / 2):
            #                     content = "%0.2f" % float(progressObj.status) / progressObj.elementCount  * 100
            #                 if i == int(progressObj.elementCount / 2 + 1):
            #                     content = '%'
            #                 htmlCore.tableCell(content, cellClass='progressCellDone')
            #
            #             for i in range(progressObj.status, progressObj.elementCount):
            #                 content = ''
            #                 if i == int(progressObj.elementCount / 2):
            #                     content = "%0.2f" % float(progressObj.status) / progressObj.elementCount  * 100
            #                 if i == int(progressObj.elementCount / 2 + 1):
            #                     content = '%'
            #                 htmlCore.tableCell(content, cellClass='progressCell')

            for i in range(progressObj.elementCount):
                content = ''
                if i == int(progressObj.elementCount / 2):
                    content = "%0.2f" % (float(progressObj.status) /
                                         progressObj.elementCount * 100)
                if i == int(progressObj.elementCount / 2 + 1):
                    content = '%'
                cellCls = 'progressCellDone' if i < progressObj.status else 'progressCell'
                htmlCore.tableCell(content, cellClass=cellCls)

            htmlCore.tableRowEnd()
            htmlCore.tableFooter()

            estimatedRemainingTime = progressObj.estimateRemainingTime()
            unknown = estimatedRemainingTime == UNKNOWN_TIME_REMAINING
            progressObjInfo = self._getEstimatedTimeRemainingStr(
                estimatedRemainingTime, unknown)
            htmlCore.paragraph(progressObjInfo)

        htmlCore.divEnd()
        htmlCore.end(stopReload=done)
        return htmlCore
    def execute_batch(cls, choices, galaxyFn=None, username=''):
        print GalaxyInterface.getHtmlBeginForRuns(galaxyFn)
        html = HtmlCore()
        html.header('Batch run results')

        refSnps = cls.get_ref_snp(choices)
        #print refSnps

        batchMal = "$Tool[hb_variant_melting_profiles](" + '|'.join(
            ["'%s'"] * len(choices)) + ")"
        cmdList = []
        for rs in refSnps:
            #if len(rs[4]) > 1:
            #    rs = list(rs)
            #    rs[4] = list(rs[4])[0]
            #    rs = tuple(rs)
            fakeChoices = (choices.genome, 'Single',
                           '__batch__') + rs + choices[8:]
            #print rs
            cmdList.append(batchMal % fakeChoices)

        #print cmdList
        GalaxyInterface.runBatchLines(cmdList,
                                      galaxyFn,
                                      username=username,
                                      printResults=False,
                                      printProgress=True)
        #print HtmlCore().styleInfoEnd()

        results_tsv = GalaxyRunSpecificFile(['results.tsv'], galaxyFn)
        results = results_tsv.getFile()
        dir = os.path.dirname(results_tsv.getDiskPath())
        for i in range(0, len(cmdList)):
            header = True
            ri = 0
            for resultline in open(os.path.join(dir, str(i), 'results.tsv')):
                if header:
                    header = False
                    if i == 0:
                        headertxt = '#run\t' + resultline
                        results.write(headertxt)
                        html.tableHeader(headertxt.split('\t'))
                else:
                    results.write(str(i) + '\t' + resultline)
                    if resultline.count('?') == 0:
                        link = '<a href="%d/html/chart-%d.html">%d (graph)</a>' % (
                            i, ri, i)
                    else:
                        link = str(i)
                    html.tableLine([link] + resultline.split('\t'))
                    ri += 1

        results.close()
        html.tableFooter()

        # XXX: temp fix for HB/stable bug
        if URL_PREFIX == '/hb':
            print '</div>'

        print '<p><b>' + results_tsv.getLink('Download results') + '</b></p>'
        print html
        print GalaxyInterface.getHtmlEndForRuns()
    def execute(cls, choices, galaxyFn=None, username=''):
        cls._setDebugModeIfSelected(choices)

        genome = choices.genome
        genomicRegionsSource = choices.genomicRegionsSource
        genomicRegions = choices.genomicRegions
        #upFlankSize = int(choices.upFlankSize)
        #downFlankSize = int(choices.downFlankSize)
        sourceTfs = choices.sourceTfs
        tfTracks = choices.tfTracks

        # Get TF track name:
        if sourceTfs == cls.REGIONS_FROM_HISTORY:
            galaxyTN = tfTracks.split(':')
            tfTrackName = ExternalTrackManager.getPreProcessedTrackFromGalaxyTN(
                genome, galaxyTN)
        else:
            tfTrackName = TfTrackNameMappings.getTfTrackNameMappings(
                genome)[sourceTfs] + [tfTracks]

        # Get Genomic Regions track names:
        selectedTrackNames = []

        if isinstance(genomicRegions, dict):
            selectedGenRegions = [
                key for key, val in genomicRegions.iteritems() if val == 'True'
            ]
        else:
            selectedGenRegions = genomicRegions

        if genomicRegionsSource == 'Hyperbrowser repository (single tracks)':
            for i in selectedGenRegions:
                selectedTrackNames.append(
                    TfbsTrackNameMappings.getTfbsTrackNameMappings(genome)[i])
        elif genomicRegionsSource == 'Hyperbrowser repository (cell-specific multi-tracks)':
            for i in selectedGenRegions:
                genElementGSuiteName = TfbsGSuiteNameMappings.getTfbsGSuiteNameMappings(
                    genome)[i]
                gSuite = getGSuiteFromGSuiteFile(genElementGSuiteName)
                for track in gSuite.allTracks():
                    selectedTrackNames.append(track.trackName)
        elif genomicRegionsSource == 'History (user-defined)':
            if genomicRegions.split(":")[1] == "gsuite":
                gSuite = getGSuiteFromGalaxyTN(selectedGenRegions)
                for track in gSuite.allTracks():
                    selectedTrackNames.append(track.trackName)
            else:
                galaxyTN = selectedGenRegions.split(':')
                gRegTrackName = ExternalTrackManager.getPreProcessedTrackFromGalaxyTN(
                    genome, galaxyTN)
                selectedTrackNames.append(gRegTrackName)
        else:
            return

        #Intersection:
        title = 'Targets of ' + tfTrackName[-1] + ' TF track'
        htmlCore = HtmlCore()
        htmlCore.begin()
        htmlCore.header(title)
        htmlCore.divBegin('resultsDiv')
        htmlCore.tableHeader([
            'Genomic Region', 'Number of Target Regions',
            'Download bed file of Target Regions', 'Send bed file to history'
        ],
                             sortable=True,
                             tableId='resultsTable')

        n = 0
        allTargetBins = []
        dataY = []
        allRefSetNames = []
        #print 'all:', selectedTrackNames, '<p>'
        #print 'tf:', tfTrackName, '<p>'
        for i in selectedTrackNames:
            n = n + 1
            #newGalaxyFn = galaxyFn.split(".")[0] + str(n) + "." + "dat"

            tfIntersection = TrackIntersection(genome, i, tfTrackName,
                                               galaxyFn, str(n))
            #tfIntersection.expandReferenceTrack(upFlankSize, downFlankSize)
            regFileNamer = tfIntersection.getIntersectedRegionsStaticFileWithContent(
            )
            targetBins = tfIntersection.getIntersectedReferenceBins()
            '''print 'Target Bins = ', targetBins, '<p>'
            if genomicRegionsSource=='Hyperbrowser repository (single tracks)':
                print '\"', tfTracks, '\" in \"', ":".join((i[len(i)-2],i[len(i)-1])), '":<p>'
            elif genomicRegionsSource=='History (user-defined)':
                print '\"', tfTracks, '\" in \"', i[len(i)-1], '":<p>'
            else:
                listGenRegion = i[0].split(":")
                maxIndex = len(listGenRegion)-1
                print '\"', tfTracks, '\" in \"', ":".join((listGenRegion[maxIndex-1],listGenRegion[maxIndex])), '":<p>'
            print '<p>Number of Targets = ', len(targetBins), 'regions.</p>'
            print '<p>', regFileNamer.getLink('Download bed-file'), ' of all regions with 1 or more hits.</p>'
            print '<p>', regFileNamer.getLoadToHistoryLink('Download bed-file to History'), ' of all regions with 1 or more hits.</p>'
            print '<p>==============================================</p>'
            #with open(galaxyFn, 'w') as outFile:
                #print>>outFile, 'TargetBins=', targetBins, '<p>'
                #print >>outFile, selectedGenRegions, '<p>' '''
            # Collect all target bins and data to plot:
            allTargetBins = allTargetBins + targetBins
            dataY = dataY + [
                TrackIntersection.prepareDataForPlot(genome, targetBins)
            ]
            refSetName = i[len(i) - 1]
            allRefSetNames = allRefSetNames + [refSetName]

            # Print output to table:
            line = [refSetName] + [len(targetBins)] + [
                regFileNamer.getLink('Download bed-file')
            ] + [
                regFileNamer.getLoadToHistoryLink(
                    'Download bed-file to History')
            ]
            #print line, '<p>'
            htmlCore.tableLine(line)

        line = ['Total'] + [len(allTargetBins)] + [''] + ['']
        dataY = dataY + [
            TrackIntersection.prepareDataForPlot(genome, allTargetBins)
        ]
        allRefSetNames = allRefSetNames + ['Total']

        htmlCore.tableLine(line)
        htmlCore.tableFooter()
        htmlCore.divEnd()
        htmlCore.hideToggle(styleClass='debug')
        htmlCore.end()
        print htmlCore
        #print 'ALL Target Bins = ', allTargetBins, '<p>'
        #print 'dataY = ', dataY, '<p>'

        # Plot:
        if genome == 'hg19':
            chrNames = [
                'chr1', 'chr2', 'chr3', 'chr4', 'chr5', 'chr6', 'chr7', 'chr8',
                'chr9', 'chr10', 'chr11', 'chr12', 'chr13', 'chr14', 'chr15',
                'chr16', 'chr17', 'chr18', 'chr19', 'chr20', 'chr21', 'chr22',
                'chrX', 'chrY'
            ]
        if genome == 'mm9':
            chrNames = [
                'chr1', 'chr2', 'chr3', 'chr4', 'chr5', 'chr6', 'chr7', 'chr8',
                'chr9', 'chr10', 'chr11', 'chr12', 'chr13', 'chr14', 'chr15',
                'chr16', 'chr17', 'chr18', 'chr19', 'chrX', 'chrY'
            ]

        titleText = 'Targets per Chromosome'
        dataX = [[dataY[i][j] for i in range(len(dataY))]
                 for j in range(len(dataY[0]))]
        seriesType = ['column'] * len(dataX)
        categories = allRefSetNames
        yAxisTitle = 'Number of Targets'
        seriesName = chrNames
        shared = False
        legend = True
        xAxisRotation = 0
        #print 'dataX = ', dataX, '<p>'

        htmlCore = HtmlCore()
        htmlCore.begin()
        title = 'Targets of ' + tfTrackName[-1] + ' TF track per chromosome'
        htmlCore.header(title)
        htmlCore.line('<a href="#" id="linkContainer1">Click to see plot</a>')
        htmlCore.divBegin(divId='plotDiv', style=' margin: 0 auto')
        htmlCore.line(vp.addJSlibs())
        htmlCore.line(vp.useThemePlot())
        htmlCore.line(vp.addJSlibsExport())
        plot = vp.drawChart(dataX,
                            type='column',
                            legend=legend,
                            height=600,
                            xAxisRotation=xAxisRotation,
                            seriesType=seriesType,
                            seriesName=seriesName,
                            shared=shared,
                            titleText=titleText,
                            overMouseAxisX=True,
                            categories=categories,
                            showChartClickOnLink=True)
        htmlCore.line(plot)
        htmlCore.divEnd()
        htmlCore.end()
        print htmlCore
    def execute(choices, galaxyFn=None, username=''):
        '''
        Is called when execute-button is pushed by web-user. Should print
        output as HTML to standard out, which will be directed to a results page
        in Galaxy history. If getOutputFormat is anything else than HTML, the
        output should be written to the file with path galaxyFn. If needed,
        StaticFile can be used to get a path where additional files can be put
        (e.g. generated image files). choices is a list of selections made by
        web-user in each options box.
        '''
#         from gold.application.LogSetup import setupDebugModeAndLogging
        #setupDebugModeAndLogging()

#         targetTrackNames, targetTrackCollection, targetTrackGenome = getGSuiteDataFromGalaxyTN(choices.gSuiteFirst)
#         targetTracksDict = OrderedDict(zip(targetTrackNames, targetTrackCollection))
#         refTrackNames, refTrackCollection, refTrackCollectionGenome = getGSuiteDataFromGalaxyTN(choices.gSuiteSecond)
#         refTracksDict = OrderedDict(zip(refTrackNames, refTrackCollection))
#         
        targetGSuite = getGSuiteFromGalaxyTN(choices.gSuiteFirst)
        refGSuite = getGSuiteFromGalaxyTN(choices.gSuiteSecond)

        regSpec, binSpec = UserBinMixin.getRegsAndBinsSpec(choices)


        if choices.intraOverlap == TrackCollectionsAnalysis.MERGE_INTRA_OVERLAPS:
            analysisDef = 'dummy -> RawOverlapStat'
        else:
            analysisDef = 'dummy [withOverlaps=yes] -> RawOverlapAllowSingleTrackOverlapsStat'
        results = OrderedDict()
#         for targetTrackName, targetTrack in targetTracksDict.iteritems():
#             for refTrackName, refTrack in refTracksDict.iteritems():
        for targetTrack in targetGSuite.allTracks():
            targetTrackName = targetTrack.title
            for refTrack in refGSuite.allTracks():
                refTrackName = refTrack.title
                if targetTrack.trackName == refTrack.trackName:
                    result = TrackCollectionsAnalysis.handleSameTrack(targetTrack.trackName, regSpec, binSpec,
                                                       choices.genome, galaxyFn)
                else:
                    result = GalaxyInterface.runManual([targetTrack.trackName, refTrack.trackName],
                                                       analysisDef, regSpec, binSpec,
                                                       choices.genome, galaxyFn,
                                                       printRunDescription=False,
                                                       printResults=False).getGlobalResult()
                if targetTrackName not in results :
                    results[targetTrackName] = OrderedDict()
                results[targetTrackName][refTrackName] = result

        stat = choices.statistic
        statIndex = STAT_LIST_INDEX[stat]
        title = 'Screening track collections  (' + stat + ')'

        processedResults = []
        headerColumn = []
        for targetTrackName in targetGSuite.allTrackTitles():
            resultRowDict = processRawResults(results[targetTrackName])
            resultColumn = []
            headerColumn = []
            for refTrackName, statList in resultRowDict.iteritems():
                resultColumn.append(statList[statIndex])
                headerColumn.append(refTrackName)
            processedResults.append(resultColumn)

        transposedProcessedResults = [list(x) for x in zip(*processedResults)]

        tableHeader = ['Track names'] + targetGSuite.allTrackTitles()
        htmlCore = HtmlCore()
        htmlCore.begin()
        htmlCore.header(title)
        htmlCore.divBegin('resultsDiv')
        htmlCore.tableHeader(tableHeader, sortable=True, tableId='resultsTable')
        for i, row in enumerate(transposedProcessedResults):
            line = [headerColumn[i]] + [strWithStdFormatting(x) for x in row]
            htmlCore.tableLine(line)
        htmlCore.tableFooter()
        htmlCore.divEnd()

#         #hicharts can't handle strings that contain ' or " as input for series names
        targetTrackNames = [x.replace('\'', '').replace('"','') for x in targetGSuite.allTrackTitles()]
        refTrackNames = [x.replace('\'', '').replace('"','') for x in refGSuite.allTrackTitles()]
# 
#         '''
#         addColumnPlotToHtmlCore(htmlCore, targetTrackNames, refTrackNames,
#                                 stat, title + ' plot',
#                                 processedResults, xAxisRotation = -45, height=800)
#         '''
#         '''
#         addPlotToHtmlCore(htmlCore, targetTrackNames, refTrackNames,
#                                 stat, title + ' plot',
#                                 processedResults, xAxisRotation = -45, height=400)
#         '''
#         
        from quick.webtools.restricted.visualization.visualizationGraphs import visualizationGraphs
        vg = visualizationGraphs()
        result = vg.drawColumnChart(processedResults,
                      height=600,
                      yAxisTitle=stat,
                      categories=refTrackNames,
                      xAxisRotation=90,
                      seriesName=targetTrackNames,
                      shared=False,
                      titleText=title + ' plot',
                      overMouseAxisX=True,
                      overMouseLabelX = ' + this.value.substring(0, 10) +')
        
        htmlCore.line(result)
        #htmlCore.line(vg.visualizeResults(result, htmlCore))
        
        htmlCore.hideToggle(styleClass='debug')
        htmlCore.end()

        print htmlCore