def printClusterPlots(cls, correlationMatrix, linkageMatrix, galaxyFn, distanceMeasure, labels, htmlCore): from numpy import amax, amin, isnan maxVal = amax(correlationMatrix) minVal = amin(correlationMatrix) seabornFile = GalaxyRunSpecificFile(['Image', distanceMeasure + 'seabornHeatmap.pdf'], galaxyFn) dendrogramFile = GalaxyRunSpecificFile(['Image', distanceMeasure + 'dendrogram.pdf'], galaxyFn) if minVal < 0 or isnan(minVal): MatplotlibPlots.seabornHeatmapPlot( correlationMatrix, labels, max=maxVal if maxVal >= 1 else 1, min=minVal if minVal <= -1 else -1, fileName=seabornFile, cmap="RdBu_r" ) else: MatplotlibPlots.seabornHeatmapPlot( correlationMatrix, labels, max=maxVal if maxVal >= 1 else 1, min=minVal if minVal <= 0 else 0, fileName=seabornFile ) MatplotlibPlots.dendrogramClusteringPlot(linkageMatrix, labels, dendrogramFile) htmlCore.line(seabornFile.getEmbeddedImage()) htmlCore.link('PDF of similarity matrix', seabornFile.getURL()) htmlCore.line(dendrogramFile.getEmbeddedImage()) htmlCore.link('PDF of dendrogram', dendrogramFile.getURL())
def plotDistances(cls, distances, galaxyFn, bins, r2, htmlCore): distanceBins, occurrences = cls.standardizeLineGraph(distances) distFile = GalaxyRunSpecificFile(['distancegraph.pdf'], galaxyFn) MatplotlibPlots.pointGraph(x=distanceBins[1:], y=occurrences, fileLocation=distFile, xlabel='Distances between SNP pairs in LD', ylabel='LD-pair count') dist = sorted(distances) histFile = GalaxyRunSpecificFile(['histogram.pdf'], galaxyFn) MatplotlibPlots.histogramPlot(dist, bins, histFile, 'Distances between SNP pairs in LD') htmlCore.divider(True) htmlCore.header('Line plot with distribution of distances between LD pairs, r<sup>2</sup> >= ' + str(r2)) htmlCore.line(distFile.getEmbeddedImage()) htmlCore.link('PDF of distances between tracks here', distFile.getURL()) htmlCore.divider(True) htmlCore.header('Histogram with distribution of distances between LD pairs, r<sup>2</sup> >= ' + str(r2)) htmlCore.line(histFile.getEmbeddedImage()) htmlCore.link('PDF histogram of distances here', histFile.getURL())
def getSNPFrequencyStats(cls, bins, gSuite, galaxyFn, htmlCore): rsIDs = set() snpCount = [] analysisSpec = AnalysisSpec(UniquePointTrackStat) trackLabels = [] for gSuiteTrack in gSuite.allTracks(): track = Track(gSuiteTrack.trackName) trackLabels.append(gSuiteTrack.title) result = doAnalysis(analysisSpec, bins, [track]) if 'Result' in result.getGlobalResult(): observations = result.getGlobalResult()['Result'] snpCount.append(len(observations)) rsIDs.update(observations) snpcountFile = GalaxyRunSpecificFile(['snpfrequencies.pdf'], galaxyFn) MatplotlibPlots.pointGraphY(snpCount, snpcountFile, ylabel='SNP counts', xticks=trackLabels) snpdistributionFile = GalaxyRunSpecificFile(['snpfreqhistogram.pdf'], galaxyFn) MatplotlibPlots.histogramRugPlot(snpCount, 10, snpdistributionFile, 'SNP counts') totalSNPCount = sum(snpCount) cls.printStats(snpCount, 'track', htmlCore) htmlCore.line('Total number of SNPs: ' + str(totalSNPCount)) htmlCore.line('Unique SNPs: ' + str(len(rsIDs))) htmlCore.line('Overlapping rsIDs: ' + str(totalSNPCount - len(rsIDs))) htmlCore.divider(True) htmlCore.header('Graph of SNP frequencies in GSuite tracks') htmlCore.line(snpcountFile.getEmbeddedImage()) htmlCore.link('PDF of SNP frequency graph', snpcountFile.getURL()) htmlCore.divider(True) htmlCore.header('Histogram of SNP frequencies in GSuite tracks') htmlCore.line(snpdistributionFile.getEmbeddedImage()) htmlCore.link('PDF of SNP frequency histogram', snpdistributionFile.getURL()) cls.getInteractiveColumnChartWithLabels(snpCount, trackLabels, htmlCore)
def plotDistances(cls, distances, galaxyFn, distCase, htmlCore): # Plot distance graph xdata, ydata = cls.standardizeLineGraph(distances) distFile = GalaxyRunSpecificFile(['distancegraph.pdf'], galaxyFn) MatplotlibPlots.pointGraph(xdata[1:], ydata, distFile, 'Smallest distance for each point', 'Distance point count') # Write distance graph htmlCore.divider(True) htmlCore.header('Graph of smallest distances for all points ' + distCase + ' tracks in GSuite') htmlCore.line(distFile.getEmbeddedImage()) htmlCore.link('PDF of distance graph', distFile.getURL()) # Plot distance histograms dist = sorted(distances) bins = 20 histFile = GalaxyRunSpecificFile(['histogram.pdf'], galaxyFn) loghistFile = GalaxyRunSpecificFile(['loghistogram.pdf'], galaxyFn) MatplotlibPlots.histogramRugPlot(dist, bins, histFile, 'Distances') MatplotlibPlots.histogramRugPlot(log(dist), bins, loghistFile, 'Log of distances') helperText = 'The rugs/vertical lines at the bottom show the distribution of point distances.' # Write distance histograms htmlCore.divider(True) htmlCore.header('Histogram of smallest distances for all points ' + distCase + ' tracks in GSuite') htmlCore.line(helperText) htmlCore.line(histFile.getEmbeddedImage()) htmlCore.link('PDF of distance histogram', histFile.getURL()) htmlCore.header('Histogram of log of smallest distances for all points ' + distCase + ' tracks in GSuite') htmlCore.line(helperText) htmlCore.line(loghistFile.getEmbeddedImage()) htmlCore.link('PDF of log distance histogram', loghistFile.getURL()) # Plot and write interactive bar chart cls.getInteractiveColumnChart(dist, distCase, htmlCore)
def getLDDistancesOfMultipleRsquares(cls, ldGraphTrack, rSquareThresholds, galaxyFn, htmlCore): graph = LDExpansions.createRSquareGraph(ldGraphTrack, 0) positions = LDExpansions.createPositionDict(ldGraphTrack) ldDistances = [] rSquareLabels = [] bins = [] for rSquare, isSet in rSquareThresholds.items(): if not isSet: continue rSquareLabels.append(rSquare) rSquare = float(rSquare) distances = cls.findAllDistancesInLD(graph, positions, rSquare, htmlCore) bins, ldPairCount = cls.standardizeLineGraph(distances) ldDistances.append(ldPairCount) graphFile = GalaxyRunSpecificFile(['multipleLines.pdf'], galaxyFn) MatplotlibPlots.multipleLineGraph( bins[1:], ldDistances, rSquareLabels, graphFile, 'Distance', 'LD-pair count' ) if len(bins) == 0: return htmlCore.divider(True) htmlCore.header('Distribution of distances between LD pairs with different thresholds of r<sup>2</sup>') htmlCore.line(graphFile.getEmbeddedImage()) htmlCore.link('PDF of distances between tracks here', graphFile.getURL())