def execute(cls, choices, galaxyFn=None, username=''): import time start = time.clock() # HTML settings from gold.result.HtmlCore import HtmlCore htmlCore = HtmlCore() htmlCore.divBegin(style=cls.HTML_STYLE) # Set debug environment cls._setDebugModeIfSelected(choices) # Analysis environment gSuite = getGSuiteFromGalaxyTN(choices.gSuite) analysisBins = GlobalBinSource(gSuite.genome) analysisSpec = AnalysisSpec(GeneticLociOverlapStat) analysisSpec.addParameter('filterThreshold', int(choices.geneticLocus)) # Print tool information: cls.htmlClusterTitle(cls.getToolName(), htmlCore) cls.htmlClusterSubtext(choices.corrStat, [cls.CORR_PEARSON, cls.CORR_SPEARMAN], choices.linkageCriterion, htmlCore) cls.htmlVectorHandling(htmlCore) # Get correlations overlapMatrix, labels = cls.getOverlapMatrix(analysisBins, analysisSpec, gSuite) corrDict = cls.getTriangularCorrMatrix(overlapMatrix) cls.printCorrPlots(corrDict, labels, choices.corrStat, choices.linkageCriterion, galaxyFn, htmlCore) cls.htmlClusterTime(str(time.clock() - start), htmlCore) htmlCore.divEnd() print htmlCore
def prepareQ1(cls, reverse, similarityStatClassName, trackTitles): analysisSpec = AnalysisSpec(GSuiteSimilarityToQueryTrackRankingsWrapperStat) analysisSpec.addParameter('pairwiseStatistic', GSuiteStatUtils.PAIRWISE_STAT_LABEL_TO_CLASS_MAPPING[similarityStatClassName]) analysisSpec.addParameter('reverse', reverse) analysisSpec.addParameter('trackTitles', trackTitles) analysisSpec.addParameter('queryTracksNum', str(1)) return analysisSpec
def getGSuiteRipleysKData(self, bpWindow=1000, analysisBins=None): resDict = OrderedDict() ripleysK = AnalysisSpec(RipleysKStat) ripleysK.addParameter('bpWindow', str(bpWindow)) for track in self._gSuite.allTracks(): ripleysKResults = doAnalysis(ripleysK, analysisBins, [Track(track.trackName)]) resDict[track.title] = ripleysKResults.getGlobalResult()['Result'] return resDict
def _runMultipleSingleValStatsCommon(trackStructure, stats, analysisBins, stat): assert stats is not None, 'stats argument not defined' assert type(stats) in [str, list], '''stats argument must be a list of statistics or ^-separated string of statistic names''' additionalAnalysisSpec = AnalysisSpec(stat) statsParam = stats if isinstance(stats, basestring) else "^".join([x.__name__ for x in stats]) additionalAnalysisSpec.addParameter('rawStatistics', statsParam) # use ^ separator to add additional stat classes. return doAnalysis(additionalAnalysisSpec, analysisBins, trackStructure).getGlobalResult()["Result"]
def run_on_extracted_variables(cls, ts, analysisBins, choices_numberOfTimesToRandomize, choices_randAlg, choices_randType, galaxyFn, genome): assert choices_numberOfTimesToRandomize==1 #For now, since ts probably needs to be unique each time.. outputGSuite = GSuite() for i in range(0, int(choices_numberOfTimesToRandomize)): print("TF leaf nodes") print(ts.getLeafNodes()) randTvProvider = cls._createTrackViewProvider(ts, analysisBins, genome, choices_randAlg, choices_randType, False, None) # the last False and non are temporary.. randomizedTs = getRandomizedVersionOfTs(ts, randTvProvider) # output files print("Leaf nodes") print(randomizedTs.getLeafNodes()) for j, singleTrackTs in enumerate(randomizedTs.getLeafNodes()): uri = "outputfile" uri = FileGSuiteTrack.generateURI(path='/home/ivargry/outfile_' + str(randint(0,999999999)) + '_' + str(j) + ".bed", suffix='bed', doQuote=False) print("URI: " + uri) title = singleTrackTs.metadata.pop('title') gSuiteTrack = FileGSuiteTrack(uri, title=title + '.randomized', fileFormat='primary', trackType='segments', genome=genome, attributes=singleTrackTs.metadata) outputGSuite.addTrack(gSuiteTrack) singleTrackTs.metadata['trackFilePath'] = gSuiteTrack.path singleTrackTs.metadata['randomization_run'] = i spec = AnalysisSpec(TsWriterStat) res = doAnalysis(spec, analysisBins, randomizedTs) assert galaxyFn != "" GSuiteComposer.composeToFile(outputGSuite, galaxyFn)
def handleSameTrack(cls, trackName, regSpec, binSpec, genome, galaxyFn): analysisSpec = AnalysisSpec(RawOverlapToSelfStat) analysisBins = GalaxyInterface._getUserBinSource( regSpec, binSpec, genome) return doAnalysis(analysisSpec, analysisBins, [Track(trackName)]).getGlobalResult()
def execute(cls, choices, galaxyFn=None, username=''): """ Is called when execute-button is pushed by web-user. Should print output as HTML to standard out, which will be directed to a results page in Galaxy history. If getOutputFormat is anything else than 'html', the output should be written to the file with path galaxyFn. If needed, StaticFile can be used to get a path where additional files can be put (cls, e.g. generated image files). choices is a list of selections made by web-user in each options box. Mandatory unless isRedirectTool() returns True. """ #TODO: add functionality for single gtrack within-track randomization print 'Executing...' inputGsuite = getGSuiteFromGalaxyTN(choices.gs) outputGSuite = GSuite() genome = inputGsuite.genome ts = factory.getFlatTracksTS(genome, choices.gs) randIndex = 0 bins = GlobalBinSource(genome) if choices.randType == TsRandAlgReg.BETWEEN_TRACKS_CATEGORY and \ choices.category not in [None, 'None']: ts = ts.getSplittedByCategoryTS(choices.category) randomizedTs = TrackStructureV2() for subTsKey, subTs in ts.items(): tvProvider = cls.createTrackViewProvider( choices, subTs, bins, genome) randomizedTs[subTsKey] = getRandomizedVersionOfTs( subTs, tvProvider, randIndex) randomizedTs = randomizedTs.getFlattenedTS() else: tvProvider = cls.createTrackViewProvider(choices, ts, bins, genome) randomizedTs = getRandomizedVersionOfTs(ts, tvProvider, randIndex) for singleTrackTs in randomizedTs.getLeafNodes(): uri = GalaxyGSuiteTrack.generateURI( galaxyFn=galaxyFn, extraFileName=os.path.sep.join(singleTrackTs.track.trackName) + '.randomized', suffix='bed') title = singleTrackTs.metadata.pop('title') gSuiteTrack = GSuiteTrack(uri, title=title + '.randomized', fileFormat='primary', trackType='segments', genome=genome, attributes=singleTrackTs.metadata) outputGSuite.addTrack(gSuiteTrack) singleTrackTs.metadata['trackFilePath'] = gSuiteTrack.path spec = AnalysisSpec(TsWriterStat) res = doAnalysis(spec, bins, randomizedTs) GSuiteComposer.composeToFile(outputGSuite, galaxyFn)
def runMultipleSingleValStatsOnTracks(gsuite, stats, analysisBins, queryTrack=None): ''' gsuite: The gsuite of tracks stats: List of statistics analysisBins: BinSource object queryTrack: should be defined if there are stats that need to run on two tracks (e.g. overlap) Returns an OrderedDict: Track title -> OrderedDict: Stat name -> single value''' assert stats is not None, 'stats argument not defined' assert type(stats) in [str, list ], '''stats argument must be a list of statistics or ^-separated string of statistic names''' resultsDict = OrderedDict() from quick.statistic.GenericResultsCombinerStat import GenericResultsCombinerStat additionalAnalysisSpec = AnalysisSpec(GenericResultsCombinerStat) statsParam = stats if isinstance(stats, basestring) else "^".join( [x.__name__ for x in stats]) additionalAnalysisSpec.addParameter( 'rawStatistics', statsParam) #use ^ separator to add additional stat classes. for refTrack in gsuite.allTracks(): if refTrack.title not in resultsDict: resultsDict[refTrack.title] = OrderedDict() tracks = [Track(refTrack.trackName), queryTrack ] if queryTrack else [Track(refTrack.trackName)] additionalResult = doAnalysis(additionalAnalysisSpec, analysisBins, tracks).getGlobalResult() for statClassName, res in additionalResult.iteritems(): statPrettyName = CommonConstants.STATISTIC_CLASS_NAME_TO_NATURAL_NAME_DICT[ statClassName] if statClassName in CommonConstants.STATISTIC_CLASS_NAME_TO_NATURAL_NAME_DICT else statClassName resultsDict[refTrack.title][statPrettyName] = res return resultsDict
def run(self): tracks = [t.trackName for t in self._gsuite.allTracks()] trackTitles = self._gsuite.allTrackTitles() results = OrderedDict() analysisSpec = AnalysisSpec(SummarizedInteractionWithOtherTracksStat) analysisSpec.addParameter('rawStatistic', self._rawStatistic) analysisSpec.addParameter('summaryFunc', self._summaryFunction) analysisSpec.addParameter('reverse', self._reversed) for t1Title, t1 in zip(trackTitles, tracks): for t2Title, t2 in zip(trackTitles, tracks): if t1Title != t2Title: result = doAnalysis(analysisSpec, self._analysisBins, [Track(t1), Track(t2)]) resultDict = result.getGlobalResult() # if 'Result' in resultDict: results[(t1Title, t2Title)] = resultDict['Result']
def getAnalysisDefFromStat(stat): if stat == 'CountStat': return (AnalysisSpec(CountStat), 1) elif stat == 'StartEndStat': return (AnalysisSpec(StartEndStat), 1) elif stat == 'SegmentDistancesStat': return (AnalysisSpec(SegmentDistancesStat), 0) elif stat == 'SegmentLengthsStat': return (AnalysisSpec(SegmentLengthsStat), 0) elif stat == 'AvgSegLenStat': return (AnalysisSpec(AvgSegLenStat), 0) elif stat == 'CountSegmentStat': return (AnalysisSpec(CountSegmentStat), 1) elif stat == 'ProportionCountStat': return (AnalysisSpec(ProportionCountStat), 1) elif stat == 'RawOverlapStat': return (AnalysisSpec(RawOverlapStat), 1) else: return "Something went wrong"
def execute(cls, choices, galaxyFn=None, username=''): ''' Is called when execute-button is pushed by web-user. Should print output as HTML to standard out, which will be directed to a results page in Galaxy history. If getOutputFormat is anything else than HTML, the output should be written to the file with path galaxyFn. If needed, StaticFile can be used to get a path where additional files can be put (e.g. generated image files). choices is a list of selections made by web-user in each options box. ''' cls._setDebugModeIfSelected(choices) # First compute pvalue by running the statistic through a wrapper stat that computes the max per bin """ from quick.statistic.RandomizationManagerV3Stat import RandomizationManagerV3Stat from quick.statistic.CollectionBinnedHypothesisWrapperStat import CollectionBinnedHypothesisWrapperStat analysisSpec = AnalysisSpec(CollectionBinnedHypothesisWrapperStat) analysisSpec.addParameter("rawStatistic", "GenericMaxBinValueStat") analysisSpec.addParameter('perBinStatistic', 'SummarizedStat') analysisSpec.addParameter('mcSamplerClass', 'NaiveMCSamplingV2Stat') analysisSpec.addParameter('pairwiseStatistic', 'ProportionCountStat') analysisSpec.addParameter('summaryFunc', choices.summaryFunc) analysisSpec.addParameter('evaluatorFunc','evaluatePvalueAndNullDistribution') analysisSpec.addParameter('tail', 'right-tail') analysisSpec.addParameter('assumptions', 'RandomGenomeLocationTrack') analysisSpec.addParameter('maxSamples', 10) gsuite = getGSuiteFromGalaxyTN(choices.gsuite) tracks = [Track(x.trackName) for x in gsuite.allTracks()] regSpec, binSpec = cls.getRegsAndBinsSpec(choices) analysisBins = GalaxyInterface._getUserBinSource(regSpec, binSpec, choices.genome) results = doAnalysis(analysisSpec, analysisBins, tracks) print "<p>Max stat results:</p>" print results.getGlobalResult() """ # Stat question 4 summaryFunc = choices.summaryFunc if choices.summaryFunc else cls.SUMMARY_FUNC_DEFAULT statTxt = "Average" if (summaryFunc == "max"): statTxt = "Maximum" statDesc = 'number of <b>segments</b> per base' if choices.analysisName == cls.Q2: statDesc = 'number of <b>base pairs covered by segments</b>' core = HtmlCore() core.begin() core.header("Enrichment of GSuite tracks across regions") core.divBegin(divClass='resultsExplanation') core.paragraph( 'The following is a list of all regions (bins) and the <b>' + statTxt.lower() + '</b> ' + statDesc + ' across the tracks within each region.') core.divEnd() if choices.analysisName == cls.Q3: # Compute p-value per bin analysisSpec = AnalysisSpec(GSuiteBinEnrichmentPValWrapperStat) analysisSpec.addParameter('rawStatistic', 'BinSizeStat') #analysisSpec.addParameter('pairwiseStatistic', 'ProportionElementCountStat') #analysisSpec.addParameter('pairwiseStatistic', 'ProportionElementCountStat') #analysisSpec.addParameter('summaryFunc', summaryFunc) gsuite = getGSuiteFromGalaxyTN(choices.gsuite) tracks = [Track(x.trackName) for x in gsuite.allTracks()] regSpec, binSpec = cls.getRegsAndBinsSpec(choices) from quick.statistic.GenericRelativeToGlobalStat import GenericRelativeToGlobalStatUnsplittable #analysisSpec.addParameter("globalSource", GenericRelativeToGlobalStatUnsplittable.getGlobalSource('test', choices.genome, False)) analysisSpec.addParameter("globalSource", 'userbins') analysisBins = GalaxyInterface._getUserBinSource( regSpec, binSpec, choices.genome) results_pval = doAnalysis(analysisSpec, analysisBins, tracks) #print results_pval analysisSpec = AnalysisSpec(SummarizedWrapperStat) analysisSpec.addParameter('rawStatistic', 'SummarizedWrapperStat') countStat = 'ProportionElementCountStat' if choices.analysisName == cls.Q2: countStat = 'ProportionCountStat' # analysisSpec.addParameter('pairwiseStatistic', 'ProportionCountStat') analysisSpec.addParameter('pairwiseStatistic', countStat) analysisSpec.addParameter('summaryFunc', summaryFunc) gsuite = getGSuiteFromGalaxyTN(choices.gsuite) tracks = [Track(x.trackName) for x in gsuite.allTracks()] regSpec, binSpec = cls.getRegsAndBinsSpec(choices) analysisBins = GalaxyInterface._getUserBinSource( regSpec, binSpec, choices.genome) results = doAnalysis(analysisSpec, analysisBins, tracks) prettyResults = {} #print results for key, val in results.iteritems(): if "Result" in val.keys(): if choices.analysisName == cls.Q3: prettyResults[key] = (val["Result"], results_pval[key]["Result"]) else: prettyResults[key] = (val["Result"]) else: prettyResults[key] = "No result" topTrackTitle = results.keys()[0] """ core.paragraph(''' Suite data is coinciding the most in bin %s ''' % ('test')) """ columnNames = ['Bin', 'Representation within the bin'] if choices.analysisName == cls.Q3: columnNames.append('p-value') core.divBegin() if choices.analysisName == cls.Q1: shortQuestion = cls.Q1_SHORT elif choices.analysisName == cls.Q2: shortQuestion = cls.Q2_SHORT else: # Q3 shortQuestion = cls.Q3_SHORT visibleRows = 20 makeTableExpandable = len(prettyResults) > visibleRows addTableWithTabularAndGsuiteImportButtons( core, choices, galaxyFn, shortQuestion, tableDict=prettyResults, columnNames=columnNames, sortable=True, presorted=0, expandable=makeTableExpandable) core.divEnd() core.end() print str(core)
def execute(cls, choices, galaxyFn=None, username=''): ''' Is called when execute-button is pushed by web-user. Should print output as HTML to standard out, which will be directed to a results page in Galaxy history. If getOutputFormat is anything else than HTML, the output should be written to the file with path galaxyFn. If needed, StaticFile can be used to get a path where additional files can be put (e.g. generated image files). choices is a list of selections made by web-user in each options box. ''' import warnings #warnings.simplefilter('error') cls._setDebugModeIfSelected(choices) similarityStatClassName = choices.similarityFunc if choices.similarityFunc else GSuiteStatUtils.T5_RATIO_OF_OBSERVED_TO_EXPECTED_OVERLAP summaryFunc = choices.summaryFunc if choices.summaryFunc else cls.SUMMARY_FUNC_DEFAULT pairwiseStatName = GSuiteStatUtils.PAIRWISE_STAT_LABEL_TO_CLASS_MAPPING[similarityStatClassName] gsuite = getGSuiteFromGalaxyTN(choices.gsuite) tracks = [Track(x.trackName) for x in gsuite.allTracks()] statTxt = "Average" if(summaryFunc == "max"): statTxt = "Maximum" if choices.analysisName == cls.Q2: mcfdrDepth = choices.mcfdrDepth if choices.mcfdrDepth else AnalysisDefHandler(REPLACE_TEMPLATES['$MCFDR$']).getOptionsAsText().values()[0][0] # First compute pvalue by running the statistic through a wrapper stat that computes the max per bin #from quick.statistic.CollectionBinnedHypothesisWrapperStat import CollectionBinnedHypothesisWrapperStat #analysisSpec = AnalysisSpec(CollectionBinnedHypothesisWrapperStat) analysisDefString = REPLACE_TEMPLATES['$MCFDRv3$'] + ' -> CollectionBinnedHypothesisWrapperStat' analysisSpec = AnalysisDefHandler(analysisDefString) analysisSpec.setChoice('MCFDR sampling depth', mcfdrDepth) analysisSpec.addParameter("rawStatistic", "GenericMaxBinValueStat") # analysisSpec.addParameter('perBinStatistic', 'SummarizedStat') analysisSpec.addParameter('perBinStatistic', 'MultitrackSummarizedInteractionV2Stat') # analysisSpec.addParameter('mcSamplerClass', 'NaiveMCSamplingV2Stat') analysisSpec.addParameter('pairwiseStatistic', 'ObservedVsExpectedStat') analysisSpec.addParameter('summaryFunc', summaryFunc) # analysisSpec.addParameter('evaluatorFunc','evaluatePvalueAndNullDistribution') analysisSpec.addParameter('tail', 'right-tail') analysisSpec.addParameter('assumptions', 'RandomGenomeLocationTrack') #analysisSpec.addParameter('maxSamples', 10) analysisSpec.addParameter('multitrackSummaryFunc', summaryFunc) regSpec, binSpec = cls.getRegsAndBinsSpec(choices) analysisBins = GalaxyInterface._getUserBinSource(regSpec, binSpec, choices.genome) results = doAnalysis(analysisSpec, analysisBins, tracks) results = results.getGlobalResult() resultsTxt = "The highest ranking bin based on the " + statTxt.lower() + " of the Forbes similarity measure for pairs of tracks within each bin had a score of <b>%.3f</b> with p-value <b>%.6f</b>" % (results["TSMC_GenericMaxBinValueStat"], results['P-value']) # Stat question 7 core = HtmlCore() core.begin() analysisSpec = AnalysisSpec(MultitrackSummarizedInteractionWrapperStat) #analysisSpec.addParameter('pairwiseStatistic', 'ObservedVsExpectedStat') analysisSpec.addParameter('pairwiseStatistic', GSuiteStatUtils.PAIRWISE_STAT_LABEL_TO_CLASS_MAPPING[similarityStatClassName]) analysisSpec.addParameter('summaryFunc', summaryFunc) analysisSpec.addParameter('multitrackSummaryFunc', summaryFunc) gsuite = getGSuiteFromGalaxyTN(choices.gsuite) tracks = [Track(x.trackName) for x in gsuite.allTracks()] regSpec, binSpec = cls.getRegsAndBinsSpec(choices) analysisBins = GalaxyInterface._getUserBinSource(regSpec, binSpec, choices.genome) results = doAnalysis(analysisSpec, analysisBins, tracks) #print '<br>results: ', results, '<br><br>' prettyResults = OrderedDict() for key, val in results.iteritems(): if "Result" in val.keys(): prettyResults[key] = val["Result"] else: prettyResults[key] = "No result" core.header(statTxt + " co-occurence between pairs of tracks within each bin") if choices.analysisName == cls.Q2: core.paragraph(resultsTxt) core.divBegin(divClass='resultsExplanation') core.paragraph('The following is a list of all bins and the <b>' + statTxt.lower() + '</b> co-occurrence of tracks within each bin.') core.divEnd() """ core.paragraph(''' Suite data is coinciding the most in bin %s ''' % ('test')) """ visibleRows = 20 makeTableExpandable = len(prettyResults) > visibleRows columnNames = ['Bin', 'Co-occurrence within the bin'] if choices.analysisName == cls.Q1: shortQuestion = cls.Q1_SHORT else: shortQuestion = cls.Q2_SHORT addTableWithTabularAndGsuiteImportButtons( core, choices, galaxyFn, shortQuestion, tableDict=prettyResults, columnNames=columnNames, sortable=True, presorted=0, expandable=makeTableExpandable, visibleRows=visibleRows) core.divEnd() core.end() print str(core)
def execute(cls, choices, galaxyFn=None, username=''): #cls._setDebugModeIfSelected(choices) # from config.DebugConfig import DebugConfig # from config.DebugConfig import DebugModes # DebugConfig.changeMode(DebugModes.RAISE_HIDDEN_EXCEPTIONS_NO_VERBOSE) # DebugUtil.insertBreakPoint(5678, suspend=False) choices_gsuite = choices.gsuite selected_metadata = choices.cat choices_queryTrack = choices.query #genome = 'hg19' genome = choices.genome queryTS = factory.getSingleTrackTS(genome, choices_queryTrack) refTS = factory.getFlatTracksTS(genome, choices_gsuite) categoricalTS = refTS.getSplittedByCategoryTS(selected_metadata) fullTS = TrackStructureV2() fullTS['query'] = queryTS fullTS['reference'] = categoricalTS spec = AnalysisSpec(SummarizedInteractionPerTsCatV2Stat) parameter = 'minLqMedUqMax' spec.addParameter('pairwiseStatistic', ObservedVsExpectedStat.__name__) spec.addParameter('summaryFunc', parameter) bins = UserBinSource('chr1', '*', genome=genome) res = doAnalysis(spec, bins, fullTS) tsRes = res.getGlobalResult()['Result'] htmlCore = HtmlCore() htmlCore.begin() if parameter == 'minAndMax': htmlCore.tableHeader(['Track', 'min-max'], sortable=False, tableId='tab1') for k, it in tsRes.iteritems(): htmlCore.tableLine([ k, str("%.2f" % it.getResult()[0]) + '-' + str("%.2f" % it.getResult()[1]) ]) htmlCore.tableFooter() if parameter == 'minLqMedUqMax': dataList = [] categories = [] for keyE, itE in tsRes.iteritems(): categories.append(keyE) dataList.append(list(itE.getResult())) from quick.webtools.restricted.visualization.visualizationGraphs import \ visualizationGraphs vg = visualizationGraphs() res = vg.drawBoxPlotChart(dataList, categories=categories, seriesName=selected_metadata) htmlCore.line(res) htmlCore.end() print htmlCore
class GSuiteSingleValueAnalysisPerTrackTool(GeneralGuiTool, GenomeMixin, UserBinMixin, DebugMixin): ALLOW_UNKNOWN_GENOME = False ALLOW_GENOME_OVERRIDE = False GSUITE_ALLOWED_FILE_FORMATS = [GSuiteConstants.PREPROCESSED] GSUITE_ALLOWED_LOCATIONS = [GSuiteConstants.LOCAL] GSUITE_ALLOWED_TRACK_TYPES = [ GSuiteConstants.POINTS, GSuiteConstants.VALUED_POINTS, GSuiteConstants.SEGMENTS, GSuiteConstants.VALUED_SEGMENTS ] GSUITE_DISALLOWED_GENOMES = [ GSuiteConstants.UNKNOWN, GSuiteConstants.MULTIPLE ] ANALYSIS_PRETTY_NAME_TO_ANALYSIS_SPEC_MAPPING = { 'Base-pair coverage': AnalysisSpec(CountSegmentStat), 'Average length of segments': AnalysisSpec(AvgElementLengthStat), 'Number of elements': AnalysisSpec(CountElementStat) } @staticmethod def getToolName(): ''' Specifies a header of the tool, which is displayed at the top of the page. ''' return "Compute a basic measure for each track in a GSuite" @classmethod def getInputBoxNames(cls): ''' Specifies a list of headers for the input boxes, and implicitly also the number of input boxes to display on the page. The returned list can have two syntaxes: 1) A list of strings denoting the headers for the input boxes in numerical order. 2) A list of tuples of strings, where each tuple has two items: a header and a key. The contents of each input box must be defined by the function getOptionsBoxK, where K is either a number in the range of 1 to the number of boxes (case 1), or the specified key (case 2). Note: the key has to be camelCase (e.g. "firstKey") ''' return [('Basic user mode', 'isBasic'), ('Select a GSuite:', 'gsuite'), ] + cls.getInputBoxNamesForGenomeSelection() + [ ('Select a measure (descriptive statistic)', 'analysis'), # ('Select parameter', 'paramOne'), ('', 'explainOutput'), ] + cls.getInputBoxNamesForUserBinSelection() + \ cls.getInputBoxNamesForDebug() # @staticmethod # def getInputBoxOrder(): # ''' # Specifies the order in which the input boxes should be displayed, as a # list. The input boxes are specified by index (starting with 1) or by # key. If None, the order of the input boxes is in the order specified by # getInputBoxNames. # ''' # return None @staticmethod def getOptionsBoxIsBasic(): # Alternatively: getOptionsBox1() return False @staticmethod def getOptionsBoxGsuite(prevChoices): # Alternatively: getOptionsBox1() ''' Defines the type and contents of the input box. User selections are returned to the tools in the prevChoices and choices attributes to other methods. These are lists of results, one for each input box (in the order specified by getInputBoxOrder()). The input box is defined according to the following syntax: Selection box: ['choice1','choice2'] - Returns: string Text area: 'textbox' | ('textbox',1) | ('textbox',1,False) - Tuple syntax: (contents, height (#lines) = 1, read only flag = False) - The contents is the default value shown inside the text area - Returns: string Password field: '__password__' - Returns: string Genome selection box: '__genome__' - Returns: string Track selection box: '__track__' - Requires genome selection box. - Returns: colon-separated string denoting track name History selection box: ('__history__',) | ('__history__', 'bed', 'wig') - Only history items of specified types are shown. - Returns: colon-separated string denoting galaxy track name, as specified in ExternalTrackManager.py. History check box list: ('__multihistory__', ) | ('__multihistory__', 'bed', 'wig') - Only history items of specified types are shown. - Returns: OrderedDict with galaxy id as key and galaxy track name as value if checked, else None. Hidden field: ('__hidden__', 'Hidden value') - Returns: string Table: [['header1','header2'], ['cell1_1','cell1_2'], ['cell2_1','cell2_2']] - Returns: None Check box list: OrderedDict([('key1', True), ('key2', False), ('key3', False)]) - Returns: OrderedDict from key to selection status (bool). ''' return '__history__', 'gsuite' # @staticmethod # def getOptionsBoxAnalysisCategory(prevChoices): # Alternatively: getOptionsBox2() # ''' # See getOptionsBoxFirstKey(). # # prevChoices is a namedtuple of selections made by the user in the # previous input boxes (that is, a namedtuple containing only one element # in this case). The elements can accessed either by index, e.g. # prevChoices[0] for the result of input box 1, or by key, e.g. # prevChoices.key (case 2). # ''' # if prevChoices.history: # # return AnalysisManager.getMainCategoryNames() # from quick.multitrack.MultiTrackCommon import getGSuiteFromGalaxyTN # gSuite = getGSuiteFromGalaxyTN(prevChoices.history) # tracks = list(gSuite.allTracks()) # if len(tracks) > 0: # firstTrack = tracks[0] # return firstTrack.path, 1, True # # from quick.application.GalaxyInterface import GalaxyInterface # # return getAnalysisCategories # # @staticmethod # def getOptionsBoxAnalysisSubcategory(prevChoices): # if prevChoices.analysisCategory: # return AnalysisManager.getSubCategoryNames(prevChoices.analysisCategory) @classmethod def getOptionsBoxAnalysis(cls, prevChoices): # if prevChoices.analysisCategory: if prevChoices.gsuite: # TODO: fix implementation, dont delete commented out code until than # gSuite = getGSuiteFromGalaxyTN(prevChoices.gsuite) # tracks = list(gSuite.allTracks()) # # fullCategory = AnalysisManager.combineMainAndSubCategories(prevChoices.analysisCategory, 'Basic') # fullCategory = AnalysisManager.combineMainAndSubCategories('Descriptive statistics', 'Basic') # return sorted([AnalysisDefHandler.splitAnalysisText(str(x))[0] for x in # AnalysisManager.getValidAnalysesInCategory(fullCategory, gSuite.genome, tracks[0].trackName, # None)]) return cls.ANALYSIS_PRETTY_NAME_TO_ANALYSIS_SPEC_MAPPING.keys() # AnalysisManager.getAnalysisDict()[cls.DESCRIPTIVE_BASIC_CAT].keys() # @classmethod # def getOptionsBoxParamOne(cls, prevChoices): # if prevChoices.analysis: # gSuite = getGSuiteFromGalaxyTN(prevChoices.gsuite) # tracks = list(gSuite.allTracks()) # # fullCategory = AnalysisManager.combineMainAndSubCategories('Descriptive statistics', 'Basic') # # analysis = GSuiteSingleValueAnalysisPerTrackTool._resolveAnalysisFromName(gSuite.genome, fullCategory, # # tracks[0].trackName, # # prevChoices.analysis) # analysis = cls.ANALYSIS_PRETTY_NAME_TO_ANALYSIS_SPEC_MAPPING[prevChoices.analysis] # paramOneName, paramOneValues = analysis.getFirstOptionKeyAndValues() # if paramOneName and paramOneValues and len(paramOneValues) > 1: # return paramOneValues # @staticmethod # def _resolveAnalysisFromName(genome, fullCategory, trackName, analysisName): # selectedAnalysis = None # for analysis in AnalysisManager.getValidAnalysesInCategory(fullCategory, genome, trackName, None): # if analysisName == AnalysisDefHandler.splitAnalysisText(str(analysis))[0]: # selectedAnalysis = analysis # # return selectedAnalysis @staticmethod def getOptionsBoxExplainOutput(prevChoices): core = HtmlCore() core.divBegin(divClass='input-explanation') core.paragraph( """Select 'gsuite' for output to get a new GSuite with the results as a metadata column <br> or select 'html' to view a simple table of the results.""") core.divEnd() return '__rawstr__', str(core) @classmethod def execute(cls, choices, galaxyFn=None, username=''): ''' Is called when execute-button is pushed by web-user. Should print output as HTML to standard out, which will be directed to a results page in Galaxy history. If getOutputFormat is anything else than HTML, the output should be written to the file with path galaxyFn. If needed, StaticFile can be used to get a path where additional files can be put (e.g. generated image files). choices is a list of selections made by web-user in each options box. ''' DebugMixin._setDebugModeIfSelected(choices) genome = choices.genome gSuite = getGSuiteFromGalaxyTN(choices.gsuite) # fullCategory = AnalysisManager.combineMainAndSubCategories(choices.analysisCategory, 'Basic') fullCategory = AnalysisManager.combineMainAndSubCategories( 'Descriptive statistics', 'Basic') tracks = list(gSuite.allTracks()) analysisName = choices.analysis # selectedAnalysis = GSuiteSingleValueAnalysisPerTrackTool \ # ._resolveAnalysisFromName(gSuite.genome, fullCategory, tracks[0].trackName, analysisName) selectedAnalysis = cls.ANALYSIS_PRETTY_NAME_TO_ANALYSIS_SPEC_MAPPING[ choices.analysis] regSpec, binSpec = UserBinMixin.getRegsAndBinsSpec(choices) analysisBins = GalaxyInterface._getUserBinSource(regSpec, binSpec, genome=genome) # paramName, paramValues = selectedAnalysis.getFirstOptionKeyAndValues() # if paramName and paramValues: # if len(paramValues) == 1: # selectedAnalysis.addParameter(paramName, paramValues[0]) # else: # selectedAnalysis.addParameter(paramName, choices.paramOne) tableDict = OrderedDict() for track in tracks: tableDict[track.title] = OrderedDict() result = doAnalysis(selectedAnalysis, analysisBins, [track]) resultDict = result.getGlobalResult() if 'Result' in resultDict: track.setAttribute(analysisName.lower(), str(resultDict['Result'])) tableDict[ track.title][analysisName] = strWithNatLangFormatting( resultDict['Result']) else: for attrName, attrVal in resultDict.iteritems(): attrNameExtended = analysisName + ':' + attrName track.setAttribute(attrNameExtended.lower(), str(attrVal)) tableDict[track.title][ attrNameExtended] = strWithNatLangFormatting(attrVal) # assert isinstance(resultDict['Result'], (int, basestring, float)), type(resultDict['Result']) core = HtmlCore() core.begin() core.header('Results: ' + analysisName) def _produceTable(core, tableDict=None, tableId=None): return core.tableFromDictOfDicts(tableDict, firstColName='Track title', tableId=tableId, expandable=True, visibleRows=20, presorted=0) tableId = 'results_table' tableFile = GalaxyRunSpecificFile([tableId, 'table.tsv'], galaxyFn) tabularHistElementName = 'Raw results: ' + analysisName gsuiteFile = GalaxyRunSpecificFile( [tableId, 'input_with_results.gsuite'], galaxyFn) GSuiteComposer.composeToFile(gSuite, gsuiteFile.getDiskPath()) gsuiteHistElementName = \ getGSuiteHistoryOutputName('result', ', ' + analysisName, choices.gsuite) core.tableWithImportButtons( tabularFile=True, tabularFn=tableFile.getDiskPath(), tabularHistElementName=tabularHistElementName, gsuiteFile=True, gsuiteFn=gsuiteFile.getDiskPath(), gsuiteHistElementName=gsuiteHistElementName, produceTableCallbackFunc=_produceTable, tableDict=tableDict, tableId=tableId) core.end() print core @classmethod def validateAndReturnErrors(cls, choices): ''' Should validate the selected input parameters. If the parameters are not valid, an error text explaining the problem should be returned. The GUI then shows this text to the user (if not empty) and greys out the execute button (even if the text is empty). If all parameters are valid, the method should return None, which enables the execute button. ''' from quick.toolguide.controller.ToolGuide import ToolGuideController from quick.toolguide import ToolGuideConfig if not choices.gsuite: return ToolGuideController.getHtml(cls.toolId, [ToolGuideConfig.GSUITE_INPUT], choices.isBasic) errorStr = GeneralGuiTool._checkGSuiteFile(choices.gsuite) if errorStr: return errorStr errorString = cls._validateGenome(choices) if errorString: return errorString gSuite = getGSuiteFromGalaxyTN(choices.gsuite) errorString = GeneralGuiTool._checkGSuiteRequirements \ (gSuite, cls.GSUITE_ALLOWED_FILE_FORMATS, cls.GSUITE_ALLOWED_LOCATIONS, cls.GSUITE_ALLOWED_TRACK_TYPES, cls.GSUITE_DISALLOWED_GENOMES) if errorString: return errorString errorString = GeneralGuiTool._checkGSuiteTrackListSize(gSuite) if errorString: return errorString errorString = cls.validateUserBins(choices) if errorString: return errorString @staticmethod def isPublic(): ''' Specifies whether the tool is accessible to all users. If False, the tool is only accessible to a restricted set of users as defined in LocalOSConfig.py. ''' return True @staticmethod def getOutputFormat(choices=None): return 'customhtml' @staticmethod def isDebugMode(): return False
def execute(cls, choices, galaxyFn=None, username=''): ''' Is called when execute-button is pushed by web-user. Should print output as HTML to standard out, which will be directed to a results page in Galaxy history. If getOutputFormat is anything else than HTML, the output should be written to the file with path galaxyFn. If needed, StaticFile can be used to get a path where additional files can be put (e.g. generated image files). choices is a list of selections made by web-user in each options box. ''' import numpy numpy.seterr(all='raise') cls._setDebugModeIfSelected(choices) # DebugUtil.insertBreakPoint(username=username, currentUser='******') genome = choices.genome analysisQuestion = choices.analysisName similaryStatClassName = choices.similarityFunc if choices.similarityFunc else GSuiteStatUtils.T5_RATIO_OF_OBSERVED_TO_EXPECTED_OVERLAP summaryFunc = choices.summaryFunc if choices.summaryFunc else 'average' reverse = 'Yes' if choices.reversed else 'No' gsuite = getGSuiteFromGalaxyTN(choices.gsuite) regSpec, binSpec = UserBinMixin.getRegsAndBinsSpec(choices) analysisBins = GalaxyInterface._getUserBinSource(regSpec, binSpec, genome=genome) tracks = [ Track(x.trackName, trackTitle=x.title) for x in gsuite.allTracks() ] trackTitles = CommonConstants.TRACK_TITLES_SEPARATOR.join( [quote(x.title, safe='') for x in gsuite.allTracks()]) additionalResultsDict = OrderedDict() additionalAttributesDict = OrderedDict() if analysisQuestion in [cls.Q1, cls.Q2, cls.Q3]: additionalAttributesDict = cls.getSelectedAttributesForEachTrackDict( choices.additionalAttributes, gsuite) #additional analysis stats = [CountStat, CountElementStat] additionalResultsDict = runMultipleSingleValStatsOnTracks( gsuite, stats, analysisBins, queryTrack=None) if analysisQuestion == cls.Q1: analysisSpec = AnalysisSpec( GSuiteRepresentativenessOfTracksRankingsWrapperStat) analysisSpec.addParameter( 'pairwiseStatistic', GSuiteStatUtils. PAIRWISE_STAT_LABEL_TO_CLASS_MAPPING[similaryStatClassName]) analysisSpec.addParameter( 'summaryFunc', GSuiteStatUtils.SUMMARY_FUNCTIONS_MAPPER[summaryFunc]) analysisSpec.addParameter('reverse', reverse) analysisSpec.addParameter('ascending', 'No') analysisSpec.addParameter('trackTitles', trackTitles) analysisSpec.addParameter('queryTracksNum', len(tracks)) results = doAnalysis(analysisSpec, analysisBins, tracks).getGlobalResult() gsPerTrackResultsModel = GSuitePerTrackResultModel( results, ['Similarity to rest of tracks in suite (%s)' % summaryFunc], additionalResultsDict=additionalResultsDict, additionalAttributesDict=additionalAttributesDict) if choices.leadAttribute and choices.leadAttribute != GSuiteConstants.TITLE_COL: columnTitles, decoratedResultsDict = \ gsPerTrackResultsModel.generateColumnTitlesAndResultsDict(choices.leadAttribute) else: columnTitles, decoratedResultsDict = \ gsPerTrackResultsModel.generateColumnTitlesAndResultsDict() core = HtmlCore() core.begin() core.divBegin(divId='results-page') core.divBegin(divClass='results-section') core.header(analysisQuestion) topTrackTitle = results.keys()[0] core.paragraph(''' The track "%s" is the most representative track of the GSuite with %s %s similarity to the rest of the tracks as measured by "%s" track similarity measure. ''' % (topTrackTitle, results[topTrackTitle], summaryFunc, similaryStatClassName)) addTableWithTabularAndGsuiteImportButtons( core, choices, galaxyFn, cls.Q1_SHORT, decoratedResultsDict, columnTitles, gsuite=gsuite, results=results, gsuiteAppendAttrs=['similarity_score'], sortable=True) # plot columnInd = 0 if choices.leadAttribute and choices.leadAttribute != GSuiteConstants.TITLE_COL: columnInd = 1 res = GSuiteTracksCoincidingWithQueryTrackTool.drawPlot( results, additionalResultsDict, 'Similarity to rest of tracks in suite (%s)' % summaryFunc, columnInd=columnInd) core.line(res) core.divEnd() core.divEnd() core.end() # elif analysisQuestion == cls.Q2: # analysisSpec = AnalysisSpec(GSuiteRepresentativenessOfTracksRankingsWrapperStat) # analysisSpec.addParameter('pairwiseStatistic', GSuiteStatUtils.PAIRWISE_STAT_LABEL_TO_CLASS_MAPPING[similaryStatClassName]) # analysisSpec.addParameter('summaryFunc', GSuiteStatUtils.SUMMARY_FUNCTIONS_MAPPER[summaryFunc]) # analysisSpec.addParameter('reverse', reverse) # analysisSpec.addParameter('ascending', 'Yes') # analysisSpec.addParameter('trackTitles', trackTitles) # results = doAnalysis(analysisSpec, analysisBins, tracks).getGlobalResult() # # gsPerTrackResultsModel = GSuitePerTrackResultModel( # results, ['Similarity to rest of tracks in suite (%s)' % summaryFunc], # additionalResultsDict=additionalResultsDict, # additionalAttributesDict=additionalAttributesDict) # if choices.leadAttribute and choices.leadAttribute != GSuiteConstants.TITLE_COL: # columnTitles, decoratedResultsDict = \ # gsPerTrackResultsModel.generateColumnTitlesAndResultsDict(choices.leadAttribute) # else: # columnTitles, decoratedResultsDict = \ # gsPerTrackResultsModel.generateColumnTitlesAndResultsDict() # # core = HtmlCore() # core.begin() # core.divBegin(divId='results-page') # core.divBegin(divClass='results-section') # core.header(analysisQuestion) # topTrackTitle = results.keys()[0] # core.paragraph(''' # The track "%s" is the most atypical track of the GSuite with %s %s similarity to the rest of the tracks # as measured by the "%s" track similarity measure. # ''' % (topTrackTitle, strWithNatLangFormatting(results[topTrackTitle]), summaryFunc, similaryStatClassName)) # # core.tableFromDictionary(results, columnNames=['Track title', 'Similarity to rest of tracks in suite (' + summaryFunc+')'], sortable=False) # # from quick.util import CommonFunctions # rawDataURIList = CommonFunctions.getHyperlinksForRawTableData( # dataDict=decoratedResultsDict, colNames=columnTitles, # tableId="resultsTable", galaxyFn=galaxyFn) # core.tableFromDictionary(decoratedResultsDict, columnNames=columnTitles, sortable=True, # tableId='resultsTable', addInstruction=True, # addRawDataSelectBox=True, rawDataURIList=rawDataURIList) # # core.tableFromDictionary(decoratedResultsDict, columnNames=columnTitles, sortable=True, tableId='resultsTable') # # columnInd = 0 # if choices.leadAttribute and choices.leadAttribute != GSuiteConstants.TITLE_COL: # columnInd = 1 # res = GSuiteTracksCoincidingWithQueryTrackTool.drawPlot( # results, additionalResultsDict, # 'Similarity to rest of tracks in suite (%s)' % summaryFunc, # columnInd=columnInd) # core.line(res) # core.divEnd() # core.divEnd() # core.end() # # if choices.addResults == 'Yes': # GSuiteStatUtils.addResultsToInputGSuite( # gsuite, results, ['Similarity_score'], # cls.extraGalaxyFn[GSUITE_EXPANDED_WITH_RESULT_COLUMNS_FILENAME]) elif analysisQuestion == cls.Q3: mcfdrDepth = choices.mcfdrDepth if choices.mcfdrDepth else \ AnalysisDefHandler(REPLACE_TEMPLATES['$MCFDR$']).getOptionsAsText().values()[0][0] analysisDefString = REPLACE_TEMPLATES[ '$MCFDRv3$'] + ' -> GSuiteRepresentativenessOfTracksRankingsAndPValuesWrapperStat' analysisSpec = AnalysisDefHandler(analysisDefString) analysisSpec.setChoice('MCFDR sampling depth', mcfdrDepth) analysisSpec.addParameter('assumptions', 'PermutedSegsAndIntersegsTrack') analysisSpec.addParameter( 'rawStatistic', SummarizedInteractionWithOtherTracksV2Stat.__name__) analysisSpec.addParameter( 'pairwiseStatistic', GSuiteStatUtils. PAIRWISE_STAT_LABEL_TO_CLASS_MAPPING[similaryStatClassName]) analysisSpec.addParameter( 'summaryFunc', GSuiteStatUtils.SUMMARY_FUNCTIONS_MAPPER[summaryFunc]) analysisSpec.addParameter('tail', 'right-tail') analysisSpec.addParameter('trackTitles', trackTitles) results = doAnalysis(analysisSpec, analysisBins, tracks).getGlobalResult() core = HtmlCore() gsPerTrackResultsModel = GSuitePerTrackResultModel( results, [ 'Similarity to rest of tracks in suite (%s)' % summaryFunc, 'P-value' ], additionalResultsDict=additionalResultsDict, additionalAttributesDict=additionalAttributesDict) if choices.leadAttribute and choices.leadAttribute != GSuiteConstants.TITLE_COL: columnTitles, decoratedResultsDict = \ gsPerTrackResultsModel.generateColumnTitlesAndResultsDict(choices.leadAttribute) else: columnTitles, decoratedResultsDict = \ gsPerTrackResultsModel.generateColumnTitlesAndResultsDict() core.begin() core.divBegin(divId='results-page') core.divBegin(divClass='results-section') core.header(analysisQuestion) topTrackTitle = results.keys()[0] core.paragraph(''' The track "%s" has the lowest P-value of %s corresponding to %s %s similarity to the rest of the tracks as measured by "%s" track similarity measure. ''' % (topTrackTitle, strWithNatLangFormatting(results[topTrackTitle][1]), strWithNatLangFormatting(results[topTrackTitle][0]), summaryFunc, similaryStatClassName)) # core.tableFromDictionary(results, columnNames=['Track title', 'Similarity to rest of tracks in suite (' + summaryFunc+')', 'P-value'], sortable=False) addTableWithTabularAndGsuiteImportButtons( core, choices, galaxyFn, cls.Q3_SHORT, decoratedResultsDict, columnTitles, gsuite=gsuite, results=results, gsuiteAppendAttrs=['similarity_score', 'p_value'], sortable=True) core.divEnd() core.divEnd() core.end() else: # Q4 mcfdrDepth = choices.mcfdrDepth if choices.mcfdrDepth else \ AnalysisDefHandler(REPLACE_TEMPLATES['$MCFDR$']).getOptionsAsText().values()[0][0] analysisDefString = REPLACE_TEMPLATES[ '$MCFDRv3$'] + ' -> CollectionSimilarityHypothesisWrapperStat' analysisSpec = AnalysisDefHandler(analysisDefString) analysisSpec.setChoice('MCFDR sampling depth', mcfdrDepth) analysisSpec.addParameter('assumptions', 'PermutedSegsAndIntersegsTrack') analysisSpec.addParameter('rawStatistic', 'MultitrackSummarizedInteractionV2Stat') analysisSpec.addParameter( 'pairwiseStatistic', GSuiteStatUtils. PAIRWISE_STAT_LABEL_TO_CLASS_MAPPING[similaryStatClassName]) analysisSpec.addParameter( 'summaryFunc', GSuiteStatUtils.SUMMARY_FUNCTIONS_MAPPER[summaryFunc]) analysisSpec.addParameter('multitrackSummaryFunc', 'avg') # should it be a choice? analysisSpec.addParameter('tail', 'right-tail') results = doAnalysis(analysisSpec, analysisBins, tracks).getGlobalResult() pval = results['P-value'] observed = results['TSMC_MultitrackSummarizedInteractionV2Stat'] significanceLevel = 'strong' if pval < 0.01 else ( 'weak' if pval < 0.05 else 'no') core = HtmlCore() core.begin() core.divBegin(divId='results-page') core.divBegin(divClass='results-section') core.header(analysisQuestion) core.paragraph(''' The tracks in the suite show %s significance in their collective similarity (average similarity of a track to the rest) of %s and corresponding p-value of %s, as measured by "%s" track similarity measure. ''' % (significanceLevel, strWithNatLangFormatting(observed), strWithNatLangFormatting(pval), similaryStatClassName)) core.divEnd() core.divEnd() core.end() print str(core)
def execute(cls, choices, galaxyFn=None, username=''): ''' Is called when execute-button is pushed by web-user. Should print output as HTML to standard out, which will be directed to a results page in Galaxy history. If getOutputFormat is anything else than HTML, the output should be written to the file with path galaxyFn. If needed, StaticFile can be used to get a path where additional files can be put (e.g. generated image files). choices is a list of selections made by web-user in each options box. ''' cls._setDebugModeIfSelected(choices) genome = choices.genome queryGSuite = getGSuiteFromGalaxyTN(choices.queryGSuite) refGSuite = getGSuiteFromGalaxyTN(choices.refGSuite) if choices.similarityFunc: similarityStatClassNameKey = choices.similarityFunc else: similarityStatClassNameKey = GSuiteStatUtils.T5_RATIO_OF_OBSERVED_TO_EXPECTED_OVERLAP isPointsVsSegments, pointsGSuite, segGSuite = cls.isPointsVsSegmentsAnalysis(queryGSuite, refGSuite) regSpec, binSpec = UserBinMixin.getRegsAndBinsSpec(choices) analysisBins = GalaxyInterface._getUserBinSource(regSpec, binSpec, genome=genome) queryTrackList = [Track(x.trackName, x.title) for x in queryGSuite.allTracks()] refTrackList = [Track(x.trackName, x.title) for x in refGSuite.allTracks()] queryTrackTitles = CommonConstants.TRACK_TITLES_SEPARATOR.join( [quote(x.title, safe='') for x in queryGSuite.allTracks()]) refTrackTitles = CommonConstants.TRACK_TITLES_SEPARATOR.join( [quote(x.title, safe='') for x in refGSuite.allTracks()]) analysisSpec = AnalysisSpec(GSuiteVsGSuiteWrapperStat) analysisSpec.addParameter('queryTracksNum', str(len(queryTrackList))) analysisSpec.addParameter('refTracksNum', str(len(refTrackList))) analysisSpec.addParameter('queryTrackTitleList', queryTrackTitles) analysisSpec.addParameter('refTrackTitleList', refTrackTitles) analysisSpec.addParameter('similarityStatClassName', GSuiteStatUtils.PAIRWISE_STAT_LABEL_TO_CLASS_MAPPING[similarityStatClassNameKey]) if choices.removeZeroRow: analysisSpec.addParameter('removeZeroRow', choices.removeZeroRow) if choices.removeZeroCol: analysisSpec.addParameter('removeZeroColumn', choices.removeZeroCol) resultsObj = doAnalysis(analysisSpec, analysisBins, queryTrackList + refTrackList) results = resultsObj.getGlobalResult() # baseDir = GalaxyRunSpecificFile([RAW_OVERLAP_TABLE_RESULT_KEY], galaxyFn).getDiskPath() # rawOverlapHeatmapPresenter = HeatmapFromDictOfDictsPresenter(resultsObj, baseDir, # 'Overlapping base-pair of tracks from the two suites', # printDimensions=False) rawOverlapTableData = results[RAW_OVERLAP_TABLE_RESULT_KEY] maxRawOverlap, maxROt1, maxROt2 = rawOverlapTableData.getMaxElement() similarityScoreTableData = results[SIMILARITY_SCORE_TABLE_RESULT_KEY] maxSimScore, maxSSt1, maxSSt2 = similarityScoreTableData.getMaxElement() baseDir = GalaxyRunSpecificFile([], galaxyFn=galaxyFn).getDiskPath() heatmapPresenter = HeatmapFromTableDataPresenter(resultsObj, baseDir=baseDir, header='Overlapping base-pairs between the tracks of the two suites', printDimensions=False) tablePresenter = MatrixGlobalValueFromTableDataPresenter(resultsObj, baseDir=baseDir, header='Table of overlapping base-pairs between the tracks of the two suites') core = HtmlCore() core.begin() core.divBegin(divId='results-page') core.divBegin(divId='svs-res-main-div', divClass='svs-res-main') core.divBegin(divId='raw-overlap-div', divClass='results-section') core.divBegin(divId='raw-overlap-table', divClass='svs-table-div') core.header('Base-pair overlaps between the tracks of the two GSuites') core.paragraph("""From the tracks in the two GSuites the highest base-pair overlap <b>(%s bps)</b> is observed for the pair of <b>'%s'</b> and <b>'%s'</b>.""" % (maxRawOverlap, maxROt1, maxROt2)) core.divBegin(divId='raw-table-result', divClass='result-div') core.divBegin(divId='raw-table-result', divClass='result-div-left') core.line('''Follow the links to view the results in an HTML table or raw tabular form:''') core.divEnd() core.divBegin(divId='raw-table-result', divClass='result-div-right') core.line(tablePresenter.getReference(RAW_OVERLAP_TABLE_RESULT_KEY)) core.divEnd()#rawoverlap table core.divEnd() core.divEnd() core.divBegin(divId='raw-overlap-heatmap', divClass='svs-heatmap-div') try: core.header('Heatmap of base-pair overlaps') core.divBegin(divId='raw-table-result', divClass='result-div-heatmap') core.divBegin(divId='raw-table-result', divClass='result-div-left') core.line('''Follow the links to view the heatmap in the desired format:''') core.divEnd() core.divBegin(divId='raw-table-result', divClass='result-div-right') core.line(heatmapPresenter.getReference(RAW_OVERLAP_TABLE_RESULT_KEY)) core.divEnd() core.divEnd() except: core.line('Heatmap for the base-pair overlaps could not be created.') core.divEnd() core.divEnd() core.divEnd()#rawoverlap heatmap core.divEnd()#rawoverlap core.divBegin(divId='sim-score-div', divClass='results-section') core.divBegin(divId='sim-score-table', divClass='svs-table-div') core.header('Similarity score between the tracks of the two GSuites measured by %s' % choices.similarityFunc) core.paragraph("""From the tracks in the two GSuites the highest similarity score <b>(%s)</b> is observed for the pair of <b>'%s'</b> and <b>'%s'</b>.""" % (maxSimScore, maxSSt1, maxSSt2)) core.divBegin(divId='raw-table-result', divClass='result-div') core.divBegin(divId='raw-table-result', divClass='result-div-left') core.line("""Follow the links to view the results in an HTML table or raw tabular form:""") core.divEnd() core.divBegin(divId='raw-table-result', divClass='result-div-right') core.line(tablePresenter.getReference(SIMILARITY_SCORE_TABLE_RESULT_KEY)) core.divEnd() core.divEnd() core.divEnd()#simscore table core.divBegin(divId='sim-score-heatmap', divClass='svs-heatmap-div') try: core.header('Heatmap of similarity scores') core.divBegin(divId='raw-table-result', divClass='result-div-heatmap') core.divBegin(divId='raw-table-result', divClass='result-div-left') core.line('''Follow the links to view the heatmap in the desired format:''') core.divEnd() core.divBegin(divId='raw-table-result', divClass='result-div-right') core.line(heatmapPresenter.getReference(SIMILARITY_SCORE_TABLE_RESULT_KEY)) core.divEnd() core.divEnd() except: core.line('Heatmap for the similarity score could not be created.') core.divEnd() core.divEnd() core.divEnd()#simscore heatmap core.divEnd()#simscore core.divEnd()#results # core.paragraph( # '''Table displaying the number of base-pairs overlapping between the tracks in the two suites:''') # core.tableFromDictOfDicts(rawOverlapTableData, firstColName='Track title') # # core.paragraph(rawOverlapHeatmapPresenter.getReference(resDictKey=RAW_OVERLAP_TABLE_RESULT_KEY)) # core.paragraph( # '''Table displaying the similarity score for the tracks in the two suites as measured by %s:''' % similarityStatClassNameKey) # core.tableFromDictOfDicts(similarityScoreTableData, firstColName='Track title') # core.divEnd() core.end() print str(core)
def run(self): assert self._referenceTrackFn is not None if (isinstance(self._referenceTrackFn, basestring)): regSpec, binSpec = 'file', self._referenceTrackFn elif (type(self._referenceTrackFn) == list): regSpec, binSpec = 'track', ':'.join(self._referenceTrackFn) trackName1 = self._queryTrackName trackName2 = None from gold.description.TrackInfo import TrackInfo formatName = TrackInfo(self._genome, trackName1).trackFormatName # formatConv = '' # if 'segments' in formatName: # formatConv = '[tf1:=SegmentToStartPointFormatConverter:]' # analysisDef = formatConv + '-> CountPointStat' from gold.statistic.CountStat import CountStat #analysisDef = '-> CountSegmentStat' if 'segments' in formatName else '-> CountPointStat' analysisDef = CountStat # print '_referenceTrackFn' + str(self._referenceTrackFn) # print '_queryTrackName' + str(self._queryTrackName) # # print 'trackName1' + str(trackName1) # print 'trackName2' + str(trackName2) #analysisDef = CountStat #print '<div class="debug">' #trackName1, trackName2, analysisDef = GalaxyInterface._cleanUpAnalysisDef(trackName1, trackName2, analysisDef) #trackName1, trackName2 = GalaxyInterface._cleanUpTracks([trackName1, trackName2], genome, realPreProc=True) # #userBinSource, fullRunArgs = GalaxyInterface._prepareRun(trackName1, trackName2, analysisDef, regSpec, binSpec, self._genome) #res = AnalysisDefJob(analysisDef, trackName1, trackName2, userBinSource, **fullRunArgs).run() #if it is not a gSuite #res = GalaxyInterface.runManual([trackName1, trackName2], analysisDef, regSpec, binSpec, self._genome, printResults=False, printHtmlWarningMsgs=False) #if gSuite from gold.application.HBAPI import PlainTrack from gold.application.HBAPI import doAnalysis from gold.description.AnalysisDefHandler import AnalysisSpec analysisBins = GalaxyInterface._getUserBinSource( regSpec, binSpec, self._genome) res = doAnalysis(AnalysisSpec(analysisDef), analysisBins, [PlainTrack(self._queryTrackName)]) #print 'ccc' #resultDict = res.getGlobalResult() resDictKeys = res.getResDictKeys() if len(resDictKeys) == 1: #assert len(resDictKeys)==1, resDictKeys resDictKey = resDictKeys[0] targetBins = [ bin for bin in res.keys() if res[bin][resDictKey] > 0 ] self._result = res self._intersectedReferenceBins = targetBins
def execute(cls, choices, galaxyFn=None, username=''): cls._setDebugModeIfSelected(choices) targetGSuite = getGSuiteFromGalaxyTN(choices.gSuiteFirst) refGSuite = getGSuiteFromGalaxyTN(choices.gSuiteSecond) regSpec, binSpec = UserBinMixin.getRegsAndBinsSpec(choices) analysisDef = 'dummy -> RawOverlapStat' # analysisDef = 'dummy [withOverlaps=yes] -> RawOverlapAllowSingleTrackOverlapsStat' results = OrderedDict() for targetTrack in targetGSuite.allTracks(): targetTrackName = targetTrack.title for refTrack in refGSuite.allTracks(): refTrackName = refTrack.title if targetTrack.trackName == refTrack.trackName: # print targetTrack.title # print targetTrack.trackName result = DetermineSuiteTracksCoincidingWithAnotherSuite.handleSameTrack( targetTrack.trackName, regSpec, binSpec, targetGSuite.genome, galaxyFn) else: result = GalaxyInterface.runManual( [targetTrack.trackName, refTrack.trackName], analysisDef, regSpec, binSpec, targetGSuite.genome, galaxyFn, printRunDescription=False, printResults=False, printProgress=False).getGlobalResult() if targetTrackName not in results: results[targetTrackName] = OrderedDict() results[targetTrackName][refTrackName] = result stat = STAT_OVERLAP_COUNT_BPS statIndex = STAT_LIST_INDEX[stat] title = '' processedResults = [] headerColumn = [] for targetTrackName in targetGSuite.allTrackTitles(): resultRowDict = processRawResults(results[targetTrackName]) resultColumn = [] headerColumn = [] for refTrackName, statList in resultRowDict.iteritems(): resultColumn.append(statList[statIndex]) headerColumn.append(refTrackName) processedResults.append(resultColumn) outputTable = {} for elN in range(0, len(headerColumn)): outputTable[elN] = {} outputTable[elN]['id'] = headerColumn[elN] transposedProcessedResults = [list(x) for x in zip(*processedResults)] # second question sumSecondgSuite # first question numSecondgSuite # fifth question numSecondgSuitePercentage for i in range(0, len(transposedProcessedResults)): outputTable[i]['sumSecondgSuite'] = sum( transposedProcessedResults[i]) if not 'numSecondgSuite' in outputTable[i]: outputTable[i]['numSecondgSuite'] = 0 for j in range(0, len(transposedProcessedResults[i])): if transposedProcessedResults[i][j] >= 1: outputTable[i]['numSecondgSuite'] += 1 else: outputTable[i]['numSecondgSuite'] += 0 outputTable[i]['numSecondgSuitePercentage'] = float( outputTable[i]['numSecondgSuite']) / float( targetGSuite.numTracks()) * 100 from gold.statistic.CountSegmentStat import CountSegmentStat from gold.statistic.CountPointStat import CountPointStat from gold.description.TrackInfo import TrackInfo from gold.statistic.CountStat import CountStat # third question numPairBpSecondgSuite # fourth question numFreqBpSecondgSuite i = 0 for refTrack in refGSuite.allTracks(): formatName = TrackInfo(refTrack.genome, refTrack.trackName).trackFormatName analysisDef = CountStat analysisBins = GalaxyInterface._getUserBinSource( regSpec, binSpec, refTrack.genome) results = doAnalysis(AnalysisSpec(analysisDef), analysisBins, [PlainTrack(refTrack.trackName)]) resultDict = results.getGlobalResult() if len(resultDict) == 0: outputTable[i]['numPairBpSecondgSuite'] = None outputTable[i]['numFreqBpSecondgSuite'] = None outputTable[i]['numFreqUniqueBpSecondgSuite'] = None else: outputTable[i]['numPairBpSecondgSuite'] = resultDict['Result'] if outputTable[i]['numPairBpSecondgSuite'] != 0: outputTable[i]['numFreqBpSecondgSuite'] = float( outputTable[i]['sumSecondgSuite']) / float( outputTable[i]['numPairBpSecondgSuite']) else: outputTable[i]['numFreqBpSecondgSuite'] = None if outputTable[i]['sumSecondgSuite'] != 0: outputTable[i]['numFreqUniqueBpSecondgSuite'] = float( outputTable[i]['numPairBpSecondgSuite']) / float( outputTable[i]['sumSecondgSuite']) else: outputTable[i]['numFreqUniqueBpSecondgSuite'] = None i += 1 # sortTable outputTableLine = [] for key, item in outputTable.iteritems(): line = [ item['id'], item['numSecondgSuite'], item['sumSecondgSuite'], item['numPairBpSecondgSuite'], item['numFreqBpSecondgSuite'], item['numFreqUniqueBpSecondgSuite'], item['numSecondgSuitePercentage'] ] outputTableLine.append(line) import operator outputTableLineSort = sorted(outputTableLine, key=operator.itemgetter(1), reverse=True) tableHeader = [ 'Region ID ', 'Number of cases with at least one event ', 'Total number of events', 'Genome coverage (unique bp)', 'Number of events per unique bp', 'Number of unique bp per event', 'Percentage of cases with at least one event' ] htmlCore = HtmlCore() htmlCore.begin() htmlCore.line( "<b>Identification of genomic elements with high event recurrence</b> " ) htmlCore.header(title) htmlCore.divBegin('resultsDiv') htmlCore.tableHeader(tableHeader, sortable=True, tableId='resultsTable') for line in outputTableLineSort: htmlCore.tableLine(line) plotRes = [] plotXAxis = [] for lineInx in range(1, len(outputTableLineSort[0])): plotResPart = [] plotXAxisPart = [] for lineInxO in range(0, len(outputTableLineSort)): # if outputTableLineSort[lineInxO][lineInx]!=0 and # if outputTableLineSort[lineInxO][lineInx]!=None: plotResPart.append(outputTableLineSort[lineInxO][lineInx]) plotXAxisPart.append(outputTableLineSort[lineInxO][0]) plotRes.append(plotResPart) plotXAxis.append(plotXAxisPart) htmlCore.tableFooter() htmlCore.divEnd() htmlCore.divBegin('plot', style='padding-top:20px;margin-top:20px;') vg = visualizationGraphs() res = vg.drawColumnCharts( plotRes, titleText=tableHeader[1:], categories=plotXAxis, height=500, xAxisRotation=270, xAxisTitle='Ragion ID', yAxisTitle='Number of cases with at least one event', marginTop=30, addTable=True, sortableAccordingToTable=True, legend=False) htmlCore.line(res) htmlCore.divEnd() htmlCore.hideToggle(styleClass='debug') htmlCore.end() print htmlCore
def _getTrackStats(self, trackName, analysisBins): analysisSpec = AnalysisSpec(SegmentTrackOverviewStat) # analysisBins = GlobalBinSource(self._genome) track = Track(trackName) return doAnalysis(analysisSpec, analysisBins, [track])