def printClusterPlots(cls, correlationMatrix, linkageMatrix, galaxyFn, distanceMeasure, labels, htmlCore): from numpy import amax, amin, isnan maxVal = amax(correlationMatrix) minVal = amin(correlationMatrix) seabornFile = GalaxyRunSpecificFile(['Image', distanceMeasure + 'seabornHeatmap.pdf'], galaxyFn) dendrogramFile = GalaxyRunSpecificFile(['Image', distanceMeasure + 'dendrogram.pdf'], galaxyFn) if minVal < 0 or isnan(minVal): MatplotlibPlots.seabornHeatmapPlot( correlationMatrix, labels, max=maxVal if maxVal >= 1 else 1, min=minVal if minVal <= -1 else -1, fileName=seabornFile, cmap="RdBu_r" ) else: MatplotlibPlots.seabornHeatmapPlot( correlationMatrix, labels, max=maxVal if maxVal >= 1 else 1, min=minVal if minVal <= 0 else 0, fileName=seabornFile ) MatplotlibPlots.dendrogramClusteringPlot(linkageMatrix, labels, dendrogramFile) htmlCore.line(seabornFile.getEmbeddedImage()) htmlCore.link('PDF of similarity matrix', seabornFile.getURL()) htmlCore.line(dendrogramFile.getEmbeddedImage()) htmlCore.link('PDF of dendrogram', dendrogramFile.getURL())
def plotDistances(cls, distances, galaxyFn, bins, r2, htmlCore): distanceBins, occurrences = cls.standardizeLineGraph(distances) distFile = GalaxyRunSpecificFile(['distancegraph.pdf'], galaxyFn) MatplotlibPlots.pointGraph(x=distanceBins[1:], y=occurrences, fileLocation=distFile, xlabel='Distances between SNP pairs in LD', ylabel='LD-pair count') dist = sorted(distances) histFile = GalaxyRunSpecificFile(['histogram.pdf'], galaxyFn) MatplotlibPlots.histogramPlot(dist, bins, histFile, 'Distances between SNP pairs in LD') htmlCore.divider(True) htmlCore.header('Line plot with distribution of distances between LD pairs, r<sup>2</sup> >= ' + str(r2)) htmlCore.line(distFile.getEmbeddedImage()) htmlCore.link('PDF of distances between tracks here', distFile.getURL()) htmlCore.divider(True) htmlCore.header('Histogram with distribution of distances between LD pairs, r<sup>2</sup> >= ' + str(r2)) htmlCore.line(histFile.getEmbeddedImage()) htmlCore.link('PDF histogram of distances here', histFile.getURL())
def printTextMatrixes(cls, correlationMatrix, linkageMatrix, distanceMatrix, galaxyFn, filename, htmlCore): # Print correlation matrix corrMatrixFile = GalaxyRunSpecificFile(['corr_matrix_result_' + filename + '.txt'], galaxyFn) corrMatrixPath = corrMatrixFile.getDiskPath(True) open(corrMatrixPath, 'w').write(str(correlationMatrix)) htmlCore.link('<br><br>View the raw text similarity/correlation matrix for this analysis', corrMatrixFile.getURL()) # Print distance matrix distMatrixFile = GalaxyRunSpecificFile(['dist_matrix_result_' + filename + '.txt'], galaxyFn) distMatrixPath = distMatrixFile.getDiskPath(True) open(distMatrixPath, 'w').write(str(distanceMatrix)) htmlCore.link('<br><br>View the raw text triangular distance matrix for this analysis', distMatrixFile.getURL()) # Print linkage matrix linkMatrixFile = GalaxyRunSpecificFile(['linkage_matrix_result_' + filename + '.txt'], galaxyFn) linkMatrixPath = linkMatrixFile.getDiskPath(True) open(linkMatrixPath, 'w').write(str(linkageMatrix)) htmlCore.link('<br><br>View the raw text linkage matrix for this analysis', linkMatrixFile.getURL())
def getSNPFrequencyStats(cls, bins, gSuite, galaxyFn, htmlCore): rsIDs = set() snpCount = [] analysisSpec = AnalysisSpec(UniquePointTrackStat) trackLabels = [] for gSuiteTrack in gSuite.allTracks(): track = Track(gSuiteTrack.trackName) trackLabels.append(gSuiteTrack.title) result = doAnalysis(analysisSpec, bins, [track]) if 'Result' in result.getGlobalResult(): observations = result.getGlobalResult()['Result'] snpCount.append(len(observations)) rsIDs.update(observations) snpcountFile = GalaxyRunSpecificFile(['snpfrequencies.pdf'], galaxyFn) MatplotlibPlots.pointGraphY(snpCount, snpcountFile, ylabel='SNP counts', xticks=trackLabels) snpdistributionFile = GalaxyRunSpecificFile(['snpfreqhistogram.pdf'], galaxyFn) MatplotlibPlots.histogramRugPlot(snpCount, 10, snpdistributionFile, 'SNP counts') totalSNPCount = sum(snpCount) cls.printStats(snpCount, 'track', htmlCore) htmlCore.line('Total number of SNPs: ' + str(totalSNPCount)) htmlCore.line('Unique SNPs: ' + str(len(rsIDs))) htmlCore.line('Overlapping rsIDs: ' + str(totalSNPCount - len(rsIDs))) htmlCore.divider(True) htmlCore.header('Graph of SNP frequencies in GSuite tracks') htmlCore.line(snpcountFile.getEmbeddedImage()) htmlCore.link('PDF of SNP frequency graph', snpcountFile.getURL()) htmlCore.divider(True) htmlCore.header('Histogram of SNP frequencies in GSuite tracks') htmlCore.line(snpdistributionFile.getEmbeddedImage()) htmlCore.link('PDF of SNP frequency histogram', snpdistributionFile.getURL()) cls.getInteractiveColumnChartWithLabels(snpCount, trackLabels, htmlCore)
def plotDistances(cls, distances, galaxyFn, distCase, htmlCore): # Plot distance graph xdata, ydata = cls.standardizeLineGraph(distances) distFile = GalaxyRunSpecificFile(['distancegraph.pdf'], galaxyFn) MatplotlibPlots.pointGraph(xdata[1:], ydata, distFile, 'Smallest distance for each point', 'Distance point count') # Write distance graph htmlCore.divider(True) htmlCore.header('Graph of smallest distances for all points ' + distCase + ' tracks in GSuite') htmlCore.line(distFile.getEmbeddedImage()) htmlCore.link('PDF of distance graph', distFile.getURL()) # Plot distance histograms dist = sorted(distances) bins = 20 histFile = GalaxyRunSpecificFile(['histogram.pdf'], galaxyFn) loghistFile = GalaxyRunSpecificFile(['loghistogram.pdf'], galaxyFn) MatplotlibPlots.histogramRugPlot(dist, bins, histFile, 'Distances') MatplotlibPlots.histogramRugPlot(log(dist), bins, loghistFile, 'Log of distances') helperText = 'The rugs/vertical lines at the bottom show the distribution of point distances.' # Write distance histograms htmlCore.divider(True) htmlCore.header('Histogram of smallest distances for all points ' + distCase + ' tracks in GSuite') htmlCore.line(helperText) htmlCore.line(histFile.getEmbeddedImage()) htmlCore.link('PDF of distance histogram', histFile.getURL()) htmlCore.header('Histogram of log of smallest distances for all points ' + distCase + ' tracks in GSuite') htmlCore.line(helperText) htmlCore.line(loghistFile.getEmbeddedImage()) htmlCore.link('PDF of log distance histogram', loghistFile.getURL()) # Plot and write interactive bar chart cls.getInteractiveColumnChart(dist, distCase, htmlCore)
def getLDDistancesOfMultipleRsquares(cls, ldGraphTrack, rSquareThresholds, galaxyFn, htmlCore): graph = LDExpansions.createRSquareGraph(ldGraphTrack, 0) positions = LDExpansions.createPositionDict(ldGraphTrack) ldDistances = [] rSquareLabels = [] bins = [] for rSquare, isSet in rSquareThresholds.items(): if not isSet: continue rSquareLabels.append(rSquare) rSquare = float(rSquare) distances = cls.findAllDistancesInLD(graph, positions, rSquare, htmlCore) bins, ldPairCount = cls.standardizeLineGraph(distances) ldDistances.append(ldPairCount) graphFile = GalaxyRunSpecificFile(['multipleLines.pdf'], galaxyFn) MatplotlibPlots.multipleLineGraph( bins[1:], ldDistances, rSquareLabels, graphFile, 'Distance', 'LD-pair count' ) if len(bins) == 0: return htmlCore.divider(True) htmlCore.header('Distribution of distances between LD pairs with different thresholds of r<sup>2</sup>') htmlCore.line(graphFile.getEmbeddedImage()) htmlCore.link('PDF of distances between tracks here', graphFile.getURL())
def MakeHeatmapFromTracks(cls, galaxyFn, **trKwArgs): tr1 = trKwArgs.get('tr1') tr2 = trKwArgs.get('tr2') tr3 = trKwArgs.get('tr3') tableRowEntryTemplate = """<tr><td>%s</td><td><a href="%s"><img src="%s" /></a></td></tr>""" #htmlTemplate = '''<head><link rel="stylesheet" type="text/css" href="image_zoom/styles/stylesheet.css" /><script language="javascript" type="text/javascript" src="image_zoom/scripts/mootools-1.2.1-core.js"></script><script language="javascript" type="text/javascript" src="image_zoom/scripts/mootools-1.2-more.js"></script><script language="javascript" type="text/javascript" src="image_zoom/scripts/ImageZoom.js"></script> # <script language="javascript" type="text/javascript" > # liste = %s; # function point_it(event){ # pos_x = event.offsetX?(event.offsetX):event.pageX-document.getElementById("zoomer_image").offsetLeft; # pos_y = event.offsetY?(event.offsetY):event.pageY-document.getElementById("zoomer_image").offsetTop; # pos_x = Math.floor(pos_x/10); # pos_y = Math.floor(pos_y/10); # alert("Hello World!, you clicked: " +liste[pos_y][pos_x]); # }</script> # </head><body><div id="container"><!-- Image zoom start --><div id="zoomer_big_container"></div><div id="zoomer_thumb"> <a href="%s" target="_blank" ><img src="%s" /></a></div><!-- Image zoom end --></div></body></html>''' javaScriptCode = ''' liste = %s; function point_it(event){ pos_x = event.offsetX?(event.offsetX):event.pageX-document.getElementById("zoomer_image").offsetLeft; pos_y = event.offsetY?(event.offsetY):event.pageY-document.getElementById("zoomer_image").offsetTop; pos_x = Math.floor(pos_x/10); pos_y = Math.floor(pos_y/10); alert("Hello World!, you clicked: " +liste[pos_y][pos_x]); } ''' ResultDicts = [cls.getValuesFromBedFile(tr1,colorPattern=(1,0,0))] ResultDicts += [cls.getValuesFromBedFile(tr2,colorPattern=(0,1,0))] if tr2 else [] ResultDicts += [cls.getValuesFromBedFile(tr3,colorPattern=(0,0,1))] if tr3 else [] htmlTableContent = [] resultDict = cls.syncResultDict(ResultDicts) for chrom, valList in resultDict.items(): areaList = [] #For doing recursive pattern picture posMatrix = cls.getResult(len(valList), 2,2) javaScriptList = [[0 for v in xrange(len(posMatrix[0])) ] for t in xrange(len(posMatrix))] rowLen = len(posMatrix[0]) im = Image.new("RGB", (rowLen, len(posMatrix)), "white") for yIndex, row in enumerate(posMatrix): for xIndex, elem in enumerate(row): im.putpixel((xIndex, yIndex), valList[elem]) region = yIndex*rowLen + xIndex javaScriptList[yIndex][xIndex] = chrom+':'+str(elem*10)+'-'+str((elem+1)*10)+': '+repr([ round((255-v)/255.0 ,2 ) for v in valList[elem]]) #areaList.append(areaTemplate % (xIndex*10, yIndex*10, xIndex*11, yIndex*11, repr(valList[elem]))) im2 = im.resize((len(posMatrix[0])*10, len(posMatrix)*10)) origSegsFile = GalaxyRunSpecificFile([chrom+'smallPic.png'], galaxyFn) origSegsFn = origSegsFile.getDiskPath(True) bigSegsFile = GalaxyRunSpecificFile([chrom+'BigPic.png'], galaxyFn) bigSegsFn = bigSegsFile.getDiskPath(True) im.save(origSegsFn) im2.save(bigSegsFn) #open('Recursive/'+chrom+'Zooming.html','w').write(htmlTemplate % (str(javaScriptList), chrom+'Big.png',chrom+'.png')) core = HtmlCore() core.begin( extraJavaScriptFns=['mootools-1.2.1-core.js', 'mootools-1.2-more.js', 'ImageZoom.js'], extraJavaScriptCode=javaScriptCode % str(javaScriptList), extraCssFns=['image_zoom.css'] ) core.styleInfoBegin(styleId='container') core.styleInfoBegin(styleId='zoomer_big_container') core.styleInfoEnd() core.styleInfoBegin(styleId='zoomer_thumb') core.link(url=bigSegsFile.getURL(), text=str(HtmlCore().image(origSegsFile.getURL())), popup=True) core.styleInfoEnd() core.styleInfoEnd() core.end() htmlfile = GalaxyRunSpecificFile([chrom+'.html'], galaxyFn) htmlfile.writeTextToFile(str(core)) htmlTableContent.append(tableRowEntryTemplate % (chrom, htmlfile.getURL(), origSegsFile.getURL())) #return str(core) #htmlTemplate % (str(javaScriptList), bigSegsFn, origSegsFn) ####### # FOr doing normal picture #columns = int(round((len(valList)/1000)+0.5)) #im = Image.new("RGB", (1000, columns), "white") #y=-1 #for index, valuTuple in enumerate(valList): # x = index%1000 # # if x == 0: # y+=1 # try: # im.putpixel((x, y), valuTuple) # except: # pass #im.save(chrom+'.png') #htmlTableContent.append(tableRowEntryTemplate % (chrom, chrom+'.png')) htmlPageTemplate = """<html><body><table border="1">%s</table></body></html>""" return htmlPageTemplate % ('\n'.join(htmlTableContent))