Esempio n. 1
0
    def execute(cls, choices, galaxyFn=None, username=''):
        """
        Is called when execute-button is pushed by web-user. Should print
        output as HTML to standard out, which will be directed to a results
        page in Galaxy history. If getOutputFormat is anything else than
        'html', the output should be written to the file with path galaxyFn.
        If needed, StaticFile can be used to get a path where additional
        files can be put (cls, e.g. generated image files). choices is a list
        of selections made by web-user in each options box.

        Mandatory unless isRedirectTool() returns True.
        """
        choices_gsuite = choices.chooseTrackFiles
        choices_randType = choices.randType
        choices_randAlg = choices.randAlg
        choices_numberOfTimesToRandomize = choices.numberOfTimesToRandomize
        assert choices_gsuite is not None

        genome =  choices.genome
        assert genome is not None
        analysisBins = UserBinMixin.getUserBinSource(choices)

        gsuite = getGSuiteFromGalaxyTN(choices_gsuite)
        for i, gsTrack in enumerate(gsuite.allTracks()):
            assert gsTrack.trackName is not None, "gsuite track %s has track name None" % gsTrack
        ts = getFlatTracksTS(genome, choices_gsuite)

        cls.run_on_extracted_variables(ts, analysisBins, choices_numberOfTimesToRandomize, choices_randAlg,
                                       choices_randType, galaxyFn, genome)
Esempio n. 2
0
    def execute(cls, choices, galaxyFn=None, username=''):

        # DebugUtil.insertBreakPoint()
        cls._setDebugModeIfSelected(choices)

        choices_queryTrack = choices.queryTrack
        choices_gsuite = choices.gsuite
        genome = choices.genome
        # queryTrackNameAsList = ExternalTrackManager.getPreProcessedTrackFromGalaxyTN(genome, choices.queryTrack,
        #                                                                              printErrors=False,
        #                                                                              printProgress=False)
        # gsuite = getGSuiteFromGalaxyTN(choices.gsuite)

        analysisBins = UserBinMixin.getUserBinSource(choices)

        import quick.gsuite.GuiBasedTsFactory as factory
        queryTS = factory.getSingleTrackTS(genome, choices_queryTrack)
        refTS = factory.getFlatTracksTS(genome, choices_gsuite)

        ts = TrackStructureV2()
        realTS = TrackStructureV2()
        realTS["query"] = queryTS
        realTS["reference"] = refTS
        randQueryTS = queryTS
        randTvProvider = cls.createTrackViewProvider(choices, refTS, analysisBins, genome)
        localAnalysis = randTvProvider.supportsLocalAnalysis()
        randRefTS = getRandomizedVersionOfTs(refTS, randTvProvider)

        hypothesisKeyList = [sts.metadata["title"] for sts in randRefTS.values()]
        for hypothesisKey in hypothesisKeyList:
            realTS = TrackStructureV2()
            realTS["query"] = queryTS
            realTS["reference"] = refTS[hypothesisKey]
            randTS = TrackStructureV2()
            randTS["query"] = randQueryTS
            randTS["reference"] = randRefTS[hypothesisKey]
            hypothesisTS = TrackStructureV2()
            hypothesisTS["real"] = realTS
            hypothesisTS["rand"] = randTS
            ts[hypothesisKey] = hypothesisTS
        analysisSpec = cls._prepareAnalysisWithHypothesisTests(choices, localAnalysis)
        if DebugConfig.USE_PROFILING:
            from gold.util.Profiler import Profiler
            profiler = Profiler()
            resDict = {}
            profiler.run('resDict[0] = doAnalysis(analysisSpec, analysisBins, ts)', globals(), locals())
            res = resDict[0]
            result = res.getGlobalResult()['Result']
            profiler.printStats()
            if DebugConfig.USE_CALLGRAPH and galaxyFn:
                profiler.printLinkToCallGraph(['profile_AnalysisDefJob'], galaxyFn)
        else:
            result = doAnalysis(analysisSpec, analysisBins, ts).getGlobalResult()["Result"]
        for trackTitle, res in result.iteritems():
            print '{}: {}<br>'.format(trackTitle, repr(res.getResult()))
    def execute(cls, choices, galaxyFn=None, username=''):
        '''
        Is called when execute-button is pushed by web-user. Should print
        output as HTML to standard out, which will be directed to a results page
        in Galaxy history. If getOutputFormat is anything else than HTML, the
        output should be written to the file with path galaxyFn. If needed,
        StaticFile can be used to get a path where additional files can be put
        (e.g. generated image files). choices is a list of selections made by
        web-user in each options box.
        '''
        import numpy
        numpy.seterr(all='raise')
        cls._setDebugModeIfSelected(choices)
        genome = choices.genome
        analysisQuestion = choices.analysisName
        similaryStatClassName = choices.similarityFunc if choices.similarityFunc else GSuiteStatUtils.T5_RATIO_OF_OBSERVED_TO_EXPECTED_OVERLAP
        summaryFunc = choices.summaryFunc if choices.summaryFunc else 'average'
        reverse = 'Yes' if choices.reversed else 'No'

        gsuite = getGSuiteFromGalaxyTN(choices.gsuite)
        analysisBins = UserBinMixin.getUserBinSource(choices)
        # tracks = [Track(x.trackName, trackTitle=x.title) for x in gsuite.allTracks()]
        trackTitles = CommonConstants.TRACK_TITLES_SEPARATOR.join(
            [quote(x.title, safe='') for x in gsuite.allTracks()])

        import quick.gsuite.GuiBasedTsFactory as factory
        ts = factory.getFlatTracksTS(genome=genome,
                                     guiSelectedGSuite=choices.gsuite)

        additionalResultsDict = OrderedDict()
        additionalAttributesDict = OrderedDict()
        if analysisQuestion in [cls.Q1, cls.Q2, cls.Q3]:
            additionalAttributesDict = cls.getSelectedAttributesForEachTrackDict(
                choices.additionalAttributes, gsuite)
            # additional analysis
            stats = [CountStat, CountElementStat]
            additionalResultsDict = runMultipleSingleValStatsOnTracks(
                ts, stats, analysisBins)

        if analysisQuestion == cls.Q1:
            analysisSpec = AnalysisSpec(MultitrackSummarizedInteractionV2Stat)
            analysisSpec.addParameter('multitrackSummaryFunc', 'raw')
            analysisSpec.addParameter(
                'pairwiseStatistic', GSuiteStatUtils.
                PAIRWISE_STAT_LABEL_TO_CLASS_MAPPING[similaryStatClassName])
            analysisSpec.addParameter(
                'summaryFunc',
                GSuiteStatUtils.SUMMARY_FUNCTIONS_MAPPER[summaryFunc])
            analysisSpec.addParameter('reverse', reverse)
            analysisSpec.addParameter('ascending', 'No')
            analysisSpec.addParameter('trackTitles', trackTitles)
            results = dictifyTSResult(
                doAnalysis(analysisSpec, analysisBins,
                           ts).getGlobalResult()['Result'])

            gsPerTrackResultsModel = GSuitePerTrackResultModel(
                results,
                ['Similarity to rest of tracks in suite (%s)' % summaryFunc],
                additionalResultsDict=additionalResultsDict,
                additionalAttributesDict=additionalAttributesDict)
            if choices.leadAttribute and choices.leadAttribute != GSuiteConstants.TITLE_COL:
                columnTitles, decoratedResultsDict = \
                    gsPerTrackResultsModel.generateColumnTitlesAndResultsDict(choices.leadAttribute)
            else:
                columnTitles, decoratedResultsDict = \
                    gsPerTrackResultsModel.generateColumnTitlesAndResultsDict()

            core = HtmlCore()
            core.begin()
            core.divBegin(divId='results-page')
            core.divBegin(divClass='results-section')
            core.header(analysisQuestion)
            topTrackTitle = results.keys()[0]
            core.paragraph('''
                The track "%s" is the most representative track of the GSuite with %s %s similarity to the rest of the tracks
                as measured by "%s" track similarity measure.
            ''' % (topTrackTitle, results[topTrackTitle], summaryFunc,
                   similaryStatClassName))

            addTableWithTabularAndGsuiteImportButtons(
                core,
                choices,
                galaxyFn,
                cls.Q1_SHORT,
                decoratedResultsDict,
                columnTitles,
                gsuite=gsuite,
                results=results,
                gsuiteAppendAttrs=['similarity_score'],
                sortable=True)

            # plot
            columnInd = 0
            if choices.leadAttribute and choices.leadAttribute != GSuiteConstants.TITLE_COL:
                columnInd = 1
            res = GSuiteTracksCoincidingWithQueryTrackTool.drawPlot(
                results,
                additionalResultsDict,
                'Similarity to rest of tracks in suite (%s)' % summaryFunc,
                columnInd=columnInd)
            core.line(res)
            core.divEnd()
            core.divEnd()
            core.end()

        # elif analysisQuestion == cls.Q2:
        #     analysisSpec = AnalysisSpec(GSuiteRepresentativenessOfTracksRankingsWrapperStat)
        #     analysisSpec.addParameter('pairwiseStatistic', GSuiteStatUtils.PAIRWISE_STAT_LABEL_TO_CLASS_MAPPING[similaryStatClassName])
        #     analysisSpec.addParameter('summaryFunc', GSuiteStatUtils.SUMMARY_FUNCTIONS_MAPPER[summaryFunc])
        #     analysisSpec.addParameter('reverse', reverse)
        #     analysisSpec.addParameter('ascending', 'Yes')
        #     analysisSpec.addParameter('trackTitles', trackTitles)
        #     results = doAnalysis(analysisSpec, analysisBins, tracks).getGlobalResult()
        #
        #     gsPerTrackResultsModel = GSuitePerTrackResultModel(
        #         results, ['Similarity to rest of tracks in suite (%s)' % summaryFunc],
        #         additionalResultsDict=additionalResultsDict,
        #         additionalAttributesDict=additionalAttributesDict)
        #     if choices.leadAttribute and choices.leadAttribute != GSuiteConstants.TITLE_COL:
        #         columnTitles, decoratedResultsDict = \
        #             gsPerTrackResultsModel.generateColumnTitlesAndResultsDict(choices.leadAttribute)
        #     else:
        #         columnTitles, decoratedResultsDict = \
        #             gsPerTrackResultsModel.generateColumnTitlesAndResultsDict()
        #
        #     core = HtmlCore()
        #     core.begin()
        #     core.divBegin(divId='results-page')
        #     core.divBegin(divClass='results-section')
        #     core.header(analysisQuestion)
        #     topTrackTitle = results.keys()[0]
        #     core.paragraph('''
        #         The track "%s" is the most atypical track of the GSuite with %s %s similarity to the rest of the tracks
        #         as measured by the "%s" track similarity measure.
        #     ''' % (topTrackTitle, strWithNatLangFormatting(results[topTrackTitle]), summaryFunc, similaryStatClassName))
        #     # core.tableFromDictionary(results, columnNames=['Track title', 'Similarity to rest of tracks in suite (' + summaryFunc+')'], sortable=False)
        #
        #     from quick.util import CommonFunctions
        #     rawDataURIList = CommonFunctions.getHyperlinksForRawTableData(
        #         dataDict=decoratedResultsDict, colNames=columnTitles,
        #         tableId="resultsTable", galaxyFn=galaxyFn)
        #     core.tableFromDictionary(decoratedResultsDict, columnNames=columnTitles, sortable=True,
        #                              tableId='resultsTable', addInstruction=True,
        #                              addRawDataSelectBox=True, rawDataURIList=rawDataURIList)
        #     # core.tableFromDictionary(decoratedResultsDict, columnNames=columnTitles, sortable=True, tableId='resultsTable')
        #
        #     columnInd = 0
        #     if choices.leadAttribute and choices.leadAttribute != GSuiteConstants.TITLE_COL:
        #         columnInd = 1
        #     res = GSuiteTracksCoincidingWithQueryTrackTool.drawPlot(
        #         results, additionalResultsDict,
        #         'Similarity to rest of tracks in suite (%s)' % summaryFunc,
        #         columnInd=columnInd)
        #     core.line(res)
        #     core.divEnd()
        #     core.divEnd()
        #     core.end()
        #
        #     if choices.addResults == 'Yes':
        #         GSuiteStatUtils.addResultsToInputGSuite(
        #             gsuite, results, ['Similarity_score'],
        #             cls.extraGalaxyFn[GSUITE_EXPANDED_WITH_RESULT_COLUMNS_FILENAME])
        elif analysisQuestion == cls.Q3:

            q2TS = TrackStructureV2()
            randTvProvider = cls.createTrackViewProvider(
                choices, ts, analysisBins, genome)
            localAnalysis = randTvProvider.supportsLocalAnalysis()
            tsRand = getRandomizedVersionOfTs(ts, randTvProvider)

            for key in ts.keys():
                realTS = TrackStructureV2()
                realTS['query'] = ts[key]
                realTS['reference'] = FlatTracksTS(
                    dict([(refKey, refSTS)
                          for refKey, refSTS in ts.iteritems()
                          if refKey != key]))
                randTS = TrackStructureV2()
                randTS['query'] = tsRand[key]
                randTS['reference'] = FlatTracksTS([
                    (refKey, refSTS) for refKey, refSTS in tsRand.iteritems()
                    if refKey != key
                ])
                hypothesisTS = TrackStructureV2()
                hypothesisTS['real'] = realTS
                hypothesisTS['rand'] = randTS
                q2TS[key] = hypothesisTS

            mcfdrDepth = choices.mcfdrDepth if choices.mcfdrDepth else \
                AnalysisDefHandler(REPLACE_TEMPLATES['$MCFDRv5$']).getOptionsAsText().values()[0][0]
            analysisDefString = REPLACE_TEMPLATES[
                '$MCFDRv5$'] + ' -> ' + ' -> MultipleRandomizationManagerStat'
            analysisSpec = AnalysisDefHandler(analysisDefString)
            analysisSpec.setChoice('MCFDR sampling depth', mcfdrDepth)
            analysisSpec.addParameter(
                'rawStatistic',
                SummarizedInteractionWithOtherTracksV2Stat.__name__)
            analysisSpec.addParameter(
                'pairwiseStatistic', GSuiteStatUtils.
                PAIRWISE_STAT_LABEL_TO_CLASS_MAPPING[similaryStatClassName])
            analysisSpec.addParameter(
                'summaryFunc',
                GSuiteStatUtils.SUMMARY_FUNCTIONS_MAPPER[summaryFunc])
            analysisSpec.addParameter('multitrackSummaryFunc', 'raw')
            analysisSpec.addParameter('tail', 'right-tail')
            analysisSpec.addParameter('evaluatorFunc',
                                      'evaluatePvalueAndNullDistribution')
            analysisSpec.addParameter('runLocalAnalysis',
                                      'Yes' if localAnalysis else 'No')

            results = doAnalysis(analysisSpec, analysisBins,
                                 q2TS).getGlobalResult()
            resultsTuples = []
            for key, res in results['Result'].iteritems():
                curRes = res.getResult()
                curPval = curRes['P-value']
                curTestStat = curRes[
                    'TSMC_' +
                    SummarizedInteractionWithOtherTracksV2Stat.__name__]
                resultsTuples.append((key, [curTestStat, curPval]))
            resultsDict = OrderedDict(
                sorted(resultsTuples,
                       key=lambda t: (-t[1][1], t[1][0]),
                       reverse=True))
            core = HtmlCore()
            gsPerTrackResultsModel = GSuitePerTrackResultModel(
                resultsDict, [
                    'Similarity to rest of tracks in suite (%s)' % summaryFunc,
                    'P-value'
                ],
                additionalResultsDict=additionalResultsDict,
                additionalAttributesDict=additionalAttributesDict)
            if choices.leadAttribute and choices.leadAttribute != GSuiteConstants.TITLE_COL:
                columnTitles, decoratedResultsDict = \
                    gsPerTrackResultsModel.generateColumnTitlesAndResultsDict(choices.leadAttribute)
            else:
                columnTitles, decoratedResultsDict = \
                    gsPerTrackResultsModel.generateColumnTitlesAndResultsDict()

            core.begin()
            core.divBegin(divId='results-page')
            core.divBegin(divClass='results-section')
            core.header(analysisQuestion)
            topTrackTitle = resultsDict.keys()[0]
            core.paragraph('''
                The track "%s" has the lowest P-value of %s corresponding to %s %s similarity to the rest of the tracks
                as measured by "%s" track similarity measure.
            ''' % (topTrackTitle,
                   strWithNatLangFormatting(resultsDict[topTrackTitle][1]),
                   strWithNatLangFormatting(resultsDict[topTrackTitle][0]),
                   summaryFunc, similaryStatClassName))
            # core.tableFromDictionary(results, columnNames=['Track title', 'Similarity to rest of tracks in suite (' + summaryFunc+')', 'P-value'], sortable=False)

            addTableWithTabularAndGsuiteImportButtons(
                core,
                choices,
                galaxyFn,
                cls.Q3_SHORT,
                decoratedResultsDict,
                columnTitles,
                gsuite=gsuite,
                results=resultsDict,
                gsuiteAppendAttrs=['similarity_score', 'p_value'],
                sortable=True)

            core.divEnd()
            core.divEnd()
            core.end()
        else:  # Q4
            # mcfdrDepth = choices.mcfdrDepth if choices.mcfdrDepth else \
            #     AnalysisDefHandler(REPLACE_TEMPLATES['$MCFDR$']).getOptionsAsText().values()[0][0]
            # analysisDefString = REPLACE_TEMPLATES['$MCFDRv3$'] + ' -> CollectionSimilarityHypothesisWrapperStat'
            # analysisSpec = AnalysisDefHandler(analysisDefString)
            # analysisSpec.setChoice('MCFDR sampling depth', mcfdrDepth)
            # analysisSpec.addParameter('assumptions', 'PermutedSegsAndIntersegsTrack')
            # analysisSpec.addParameter('rawStatistic', 'MultitrackSummarizedInteractionV2Stat')
            # analysisSpec.addParameter('pairwiseStatistic', GSuiteStatUtils.PAIRWISE_STAT_LABEL_TO_CLASS_MAPPING[similaryStatClassName])
            # analysisSpec.addParameter('summaryFunc', GSuiteStatUtils.SUMMARY_FUNCTIONS_MAPPER[summaryFunc])
            # analysisSpec.addParameter('multitrackSummaryFunc', 'avg')  # should it be a choice?
            # analysisSpec.addParameter('tail', 'right-tail')
            # results = doAnalysis(analysisSpec, analysisBins, tracks).getGlobalResult()

            mcfdrDepth = choices.mcfdrDepth if choices.mcfdrDepth else \
                AnalysisDefHandler(REPLACE_TEMPLATES['$MCFDRv4$']).getOptionsAsText().values()[0][0]
            analysisDefString = REPLACE_TEMPLATES[
                '$MCFDRv4$'] + ' -> RandomizationManagerV3Stat'
            analysisSpec = AnalysisDefHandler(analysisDefString)
            analysisSpec.setChoice('MCFDR sampling depth', mcfdrDepth)
            analysisSpec.addParameter('rawStatistic',
                                      'MultitrackSummarizedInteractionV2Stat')
            analysisSpec.addParameter(
                'pairwiseStatistic', GSuiteStatUtils.
                PAIRWISE_STAT_LABEL_TO_CLASS_MAPPING[similaryStatClassName]
            )  # needed for call of non randomized stat for assertion
            analysisSpec.addParameter(
                'summaryFunc',
                GSuiteStatUtils.SUMMARY_FUNCTIONS_MAPPER[summaryFunc])
            analysisSpec.addParameter('multitrackSummaryFunc',
                                      'avg')  # should it be a choice?
            analysisSpec.addParameter('tail', 'right-tail')
            analysisSpec.addParameter(
                'tvProviderClass',
                getClassName(
                    createTrackViewProvider(choices.randType,
                                            choices.randAlg)))
            results = doAnalysis(analysisSpec, analysisBins,
                                 ts).getGlobalResult()

            pval = results['P-value']
            observed = results['TSMC_MultitrackSummarizedInteractionV2Stat']
            significanceLevel = 'strong' if pval < 0.01 else (
                'weak' if pval < 0.05 else 'no')
            core = HtmlCore()
            core.begin()
            core.divBegin(divId='results-page')
            core.divBegin(divClass='results-section')
            core.header(analysisQuestion)
            core.paragraph('''
                The tracks in the suite show %s significance in their collective similarity
                (average similarity of a track to the rest) of %s
                and corresponding p-value of %s,
                as measured by "%s" track similarity measure.
            ''' % (significanceLevel, strWithNatLangFormatting(observed),
                   strWithNatLangFormatting(pval), similaryStatClassName))
            core.divEnd()
            core.divEnd()
            core.end()

        print str(core)