def getOptionsBoxResultsTable( cls, prevChoices): #To display results in HTML table if not prevChoices.source or prevChoices.transfer == 'Yes' or prevChoices.source.find('HyperBrowser') == -1 \ and not prevChoices.dataType: #and len([x for x,selected in prevChoices.dataType.iteritems() if selected]) == 0: return gsm = TrackGlobalSearchModule(cls.useSqlite) source = prevChoices.source.split('[')[0].strip() #dataTypes = [x.split('[')[0].strip() for x,selected in prevChoices.dataType.iteritems() if selected] dataTypes = [prevChoices.dataType.split('[')[0].strip()] rowDicts = None if prevChoices.outputType in [ None, 'select 10 random tracks', 'select 50 random tracks' ]: return elif 'all tracks' in prevChoices.outputType: rowDicts = gsm.getRowsDicts(prevChoices.search,prevChoices.subCategory,source,dataTypes,\ filterFileSuffix = True) elif prevChoices.outputType == 'Select tracks manually': rowDicts = gsm.getRowsDicts(prevChoices.search,prevChoices.subCategory,source,dataTypes,\ selectedFileIDs = prevChoices.results, filterFileSuffix = True) htmlTableDict = {} if rowDicts: for row in rowDicts: if 'url' in row: filename = row['url'].split('/')[-1] elif 'uri' in row: filename = row['uri'].split('/')[-1] elif '_url' in row: filename = row['_url'].split('/')[-1] else: filename = '<No filename>' rowList = [] for attr in cls.RESULT_COLS: if attr in row: rowList.append(unicode(row[attr])) htmlTableDict[filename] = rowList if len(htmlTableDict) == 0: return html = HtmlCore() html.tableFromDictionary(htmlTableDict, columnNames = ['File name'] + cls.RESULT_COLS_HEADER,\ tableId='t1', expandable=True) return '__rawstr__', unicode(html)
def execute(self, printHtmlBeginEnd=True, printTrackNamesTable=True): print GalaxyInterface.getHtmlBeginForRuns(self._galaxyFn) core = HtmlCore() if printTrackNamesTable: core.divBegin('trackNames') dataDict = OrderedDict([(x, []) for x in self._trackNames]) tblExpandable = True if len(self._trackNames) < 11: tblExpandable = False core.tableFromDictionary(dataDict, ['Tracks under analysis'], tableId="resTable", expandable=tblExpandable) # core.tableHeader(['Tracks under analysis:']) # for trackName in self._trackNames: # core.tableLine([trackName]) # core.tableFooter() core.divEnd() print core try: results = GalaxyInterface.run(self._tracks[0], self._tracks[1], self.getAnalysisDefinitionString(), self._regSpec, self._binSpec, self._genome, self._galaxyFn, printRunDescription=False, printHtmlBeginEnd=printHtmlBeginEnd, fromMainTool=False) if self.hasVisualization(): print self.visualizeResults(results) # except: # pass finally: core2 = HtmlCore() core2.hideToggle(styleClass='infomessagesmall') print core2 print GalaxyInterface.getHtmlEndForRuns()
def execute(choices, galaxyFn=None, username=''): ''' Is called when execute-button is pushed by web-user. Should print output as HTML to standard out, which will be directed to a results page in Galaxy history. If getOutputFormat is anything else than HTML, the output should be written to the file with path galaxyFn. If needed, StaticFile can be used to get a path where additional files can be put (e.g. generated image files). choices is a list of selections made by web-user in each options box. ''' gSuite = getGSuiteFromGalaxyTN(choices.gsuite) attributeNames = gSuite.attributes attributeValCountDict = dict() for attrName in attributeNames: attributeValCountDict[attrName] = defaultdict(int) for gsTrack in gSuite.allTracks(): for attrName in gsTrack.attributes: attributeValCountDict[attrName][gsTrack.getAttribute( attrName)] = attributeValCountDict[attrName][ gsTrack.getAttribute(attrName)] + 1 htmlCore = HtmlCore() htmlCore.begin() htmlCore.divBegin(divId='results-page') htmlCore.divBegin(divClass='results-section') htmlCore.header('Meta-data summary:') summaryList = [] locationLine = '''Location:<b> %s</b>. %s''' if gSuite.location == GSuiteConstants.UNKNOWN: summaryList.append(locationLine % ( gSuite.location, "The location of the tracks in the GSuite is not specified as remote or local." )) elif gSuite.location == GSuiteConstants.REMOTE: summaryList.append( locationLine % (gSuite.location, "The tracks in the GSuite are located in a remote server.")) elif gSuite.location == GSuiteConstants.LOCAL: summaryList.append( locationLine % (gSuite.location, "The tracks in the GSuite are located on your server.")) elif gSuite.location == GSuiteConstants.MULTIPLE: summaryList.append(locationLine % ( gSuite.location, "The tracks in the GSuite are located both on your local server and at a remote location." )) fileFormatLine = '''File format:<b> %s</b>. %s''' if gSuite.fileFormat == GSuiteConstants.UNKNOWN: summaryList.append(fileFormatLine % ( gSuite.fileFormat, "The file format of the tracks in the GSuite is not specified." )) elif gSuite.fileFormat == GSuiteConstants.PREPROCESSED: summaryList.append(fileFormatLine % ( gSuite.fileFormat, "The tracks in the GSuite are preprocessed and ready for analysis." )) elif gSuite.fileFormat == GSuiteConstants.PRIMARY: summaryList.append(fileFormatLine % ( gSuite.fileFormat, "The tracks in the GSuite can be manipulated, but must be preprocessed first for analysis." )) elif gSuite.fileFormat == GSuiteConstants.MULTIPLE: summaryList.append(fileFormatLine % ( gSuite.fileFormat, "The tracks in the GSuite are both in preprocessed and primary formats. Only preprocessed tracks can be analyzed by HyperBrowser tools." )) trackTypeLine = '''Track type:<b> %s</b>. %s''' if gSuite.trackType == GSuiteConstants.UNKNOWN: summaryList.append( trackTypeLine % (gSuite.trackType, "The track type of the tracks in the GSuite is not specified." )) elif gSuite.trackType == GSuiteConstants.MULTIPLE: summaryList.append( trackTypeLine % (gSuite.trackType, "The tracks in the GSuite are of different track types.")) else: summaryList.append(trackTypeLine % ( gSuite.trackType, "The tracks in the GSuite are all (subtypes) of the same type." )) genomeLine = '''Genome:<b> %s</b>. %s''' if gSuite.genome == GSuiteConstants.UNKNOWN: summaryList.append( genomeLine % (gSuite.genome, "The genome of the tracks in the GSuite is not specified.")) elif gSuite.fileFormat == GSuiteConstants.MULTIPLE: summaryList.append( genomeLine % (gSuite.genome, "The tracks in the GSuite are of different genomes.")) else: summaryList.append( genomeLine % (gSuite.genome, "The tracks in the GSuite come from the same genome.")) htmlCore.unorderedList(summaryList) if len(attributeNames) > 0: paragraph2 = ''' There are<b> %s </b>attributes in the GSuite. For each of the attributes the most frequent value is given in the table below. ''' % str(len(attributeNames)) htmlCore.paragraph(paragraph2) tableDataDict = OrderedDict() for attrName in attributeNames: maxVal, maxCount = max( attributeValCountDict[attrName].iteritems(), key=operator.itemgetter(1)) if maxCount == 1: mostFreqVal = '[All values are unique]' nrOfOccurrences = 1 elif maxCount == gSuite.numTracks(): mostFreqVal = maxVal nrOfOccurrences = str(gSuite.numTracks()) + ' [all tracks]' else: mostFreqValList = [ x for x, y in attributeValCountDict[attrName].iteritems() if y == maxCount ] mostFreqVal = ' | '.join(mostFreqValList) nrOfOccurrences = str(maxCount) + ' [tie]' if len( mostFreqValList) > 1 else maxCount tableDataDict[attrName] = [mostFreqVal, nrOfOccurrences] htmlCore.tableFromDictionary(tableDataDict, [ 'Attribute name', 'Most frequent value', 'Number of occurrences' ], sortable=False, expandable=False) htmlCore.divEnd() htmlCore.divEnd() htmlCore.end() print htmlCore
def execute(cls, choices, galaxyFn=None, username=''): import os gsuite = getGSuiteFromGalaxyTN(choices.gsuite) # match = int(choices.match) # mismatch = int(choices.mismatch) # delta = int(choices.delta) # pm = int(choices.pm) # pi = int(choices.pi) # minscore = int(choices.minscore) # maxperiod = int(choices.maxperiod) minConsensusLength = int(choices.minconsensus) if choices.minconsensus.isdigit() else None maxConsensusLength = int(choices.maxconsensus) if choices.maxconsensus.isdigit() else None minCopyNumber = int(choices.mincopynumber) if choices.mincopynumber.isdigit() else None parameters = [choices.match, choices.mismatch, choices.delta, choices.pm, choices.pi, choices.minscore, choices.maxperiod] resultsDict = OrderedDict() for gsTrack in gsuite.allTracks(): resFile = GalaxyRunSpecificFile(['trf', gsTrack.title, gsTrack.title + '.tmp'], galaxyFn) ensurePathExists(resFile.getDiskPath()) trackDirName = os.path.dirname(os.path.realpath(resFile.getDiskPath())) # parameters = ["2", "5", "7", "80", "10", "50", "300"] #Madeleine suggestion instruction = [cls.TRF_PATH, gsTrack.path] + parameters + ["-d", "-h"] pipe = subprocess.Popen(instruction, cwd=trackDirName, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) results, errors = pipe.communicate() outFileName = ".".join([os.path.basename(gsTrack.path)] + parameters + ["dat"]) outFilePath = os.path.join(trackDirName, outFileName) # print outFilePath resultList = cls.parseTRFResultFile(outFilePath, minConsensusLength, maxConsensusLength, minCopyNumber) if resultList: resultsDict[gsTrack.title] = resultList if choices.regionsGSuite: repeatRegionsBedTracksGSuite = GSuite() for trackName, trfResultList in resultsDict.iteritems(): trackUri = GalaxyGSuiteTrack.generateURI(galaxyFn=galaxyFn, extraFileName=("Repeat_regions_"+trackName), suffix='bed') gsTrack = GSuiteTrack(trackUri, title=("Repeat regions " + trackName), genome=gsuite.genome) ensurePathExists(gsTrack.path) with open(gsTrack.path, 'w') as bedFile: header = 'track name="' + trackName + '" description="' + trackName + '" priority=1' bedFile.write(header + os.linesep) for trfResult in trfResultList: for repeatRegion in trfResult._repeatRegionList: # if not repeatRegion.strand: # DebugUtil.insertBreakPoint() endPosition = repeatRegion.endPositionFullCopies if choices.rrCutoff == cls.CUTOFF_REPEAT_REGION else repeatRegion.endPosition bedFile.write('\t'.join([repeatRegion.chromosome, str(repeatRegion.startPosition), str(endPosition), repeatRegion.bedName, '0', str(repeatRegion.strand)]) + os.linesep) repeatRegionsBedTracksGSuite.addTrack(gsTrack) GSuiteComposer.composeToFile(repeatRegionsBedTracksGSuite, cls.extraGalaxyFn['Repeat regions (bed) GSuite']) if choices.monomersGSuite: monomersBedTracksGSuite = GSuite() for trackName, trfResultList in resultsDict.iteritems(): trackUri = GalaxyGSuiteTrack.generateURI(galaxyFn=galaxyFn, extraFileName=("Repeat_monomers_"+trackName), suffix='bed') gsTrack = GSuiteTrack(trackUri, title=("Repeat monomers " + trackName), genome=gsuite.genome) ensurePathExists(gsTrack.path) with open(gsTrack.path, 'w') as bedFile: header = 'track name="' + trackName + '" description="' + trackName + '" priority=1' bedFile.write(header + os.linesep) for trfResult in trfResultList: for repeatRegion in trfResult._repeatRegionList: # if not repeatRegion.strand: # DebugUtil.insertBreakPoint() for repeatMonomer in repeatRegion._monomers: # endPosition = repeatRegion.endPositionFullCopies if choices.rrCutoff == cls.CUTOFF_REPEAT_REGION else repeatRegion.endPosition bedFile.write('\t'.join([repeatRegion.chromosome, str(repeatMonomer.startPosition), str(repeatMonomer.endPosition), repeatMonomer.bedName, '0', str(repeatRegion.strand)]) + os.linesep) monomersBedTracksGSuite.addTrack(gsTrack) GSuiteComposer.composeToFile(monomersBedTracksGSuite, cls.extraGalaxyFn['Repeat monomers (bed) GSuite']) ################### analysisParamsTableColumnTitles = ['Parameter', 'Selected value'] analysisParamsDict = OrderedDict() analysisParamsDict['Tandem Repeat Finder tool version'] = cls.TRF_VERSION analysisParamsDict.update( OrderedDict([ ('Match', choices.match), ('Mismatch', choices.mismatch), ('Delta', choices.delta), ('Matching probability (Pm)', choices.pm), ('Indel probability (Pi)', choices.pi), ('Min score', choices.minscore), ('Max period', choices.maxperiod), ('Min consensus length', choices.minconsensus), ('Max consensus length', choices.maxconsensus), ('Min copy number', choices.mincopynumber)] ) ) ################### ################### countTableColumnTitles = ['Name', 'Nr of repeat regions', 'Avg copy number', 'Min copy number', 'Max copy number', 'Avg consensus length', 'Min consensus length', 'Max consensus length'] countTableDict = OrderedDict() from numpy import mean for trackName, trfResultList in resultsDict.iteritems(): countTableDict[trackName] = [] repeatRegionsNr = sum([x.repeatRegionsCount for x in trfResultList]) countTableDict[trackName].append(repeatRegionsNr) copyNumberList =[] for trfRes in trfResultList: copyNumberList += trfRes.copyNumberList if \ choices.rrCutoff == cls.CUTOFF_REPEAT_REGION else trfRes.realCopyNumberList countTableDict[trackName].append(mean(copyNumberList)) countTableDict[trackName].append(min(copyNumberList)) countTableDict[trackName].append(max(copyNumberList)) consensusLengthList = [] for trfRes in trfResultList: consensusLengthList += trfRes.consensusLengths countTableDict[trackName].append(mean(consensusLengthList)) countTableDict[trackName].append(min(consensusLengthList)) countTableDict[trackName].append(max(consensusLengthList)) ################### core = HtmlCore() core.begin() core.divBegin() # core.paragraph('''This tool reports repeat regions discovered by the TRF tool # ''') core.tableFromDictionary(analysisParamsDict, columnNames=analysisParamsTableColumnTitles, sortable=False) core.divEnd() core.divBegin() core.tableFromDictionary(countTableDict, columnNames=countTableColumnTitles, tableId='repeatCounts', sortable=True, presorted=0) core.divEnd() # core.divBegin() # for k, v in resultsDict.iteritems(): # core.line('track: ' + k) # # for val in v: # core.line(str(val)) # core.divEnd() core.end() print core