def testSetupSlicerFreedman(self): """Test that setting up the slicer using bins=None works.""" dvmin = 0 dvmax = 1 dv = makeDataValues(1000, dvmin, dvmax, random=2234) self.testslicer = OneDSlicer(sliceColName='testdata', bins=None) self.testslicer.setupSlicer(dv) # How many bins do you expect from optimal binsize? from lsst.sims.maf.utils import optimalBins bins = optimalBins(dv['testdata']) np.testing.assert_equal(self.testslicer.nslice, bins)
def testSetupSlicerFreedman(self): """Test that setting up the slicer using bins=None works.""" dvmin = 0 dvmax = 1 dv = makeDataValues(1000, dvmin, dvmax, random=True) self.testslicer = OneDSlicer(sliceColName='testdata', bins=None) self.testslicer.setupSlicer(dv) # How many bins do you expect from optimal binsize? from lsst.sims.maf.utils import optimalBins bins = optimalBins(dv['testdata']) np.testing.assert_equal(self.testslicer.nslice, bins)
def __call__(self, metricValueIn, slicer, userPlotDict, fignum=None): """ Plot a histogram of metricValues (such as would come from a spatial slicer). """ # Adjust metric values by zeropoint or normVal, and use 'compressed' version of masked array. plotDict = {} plotDict.update(self.defaultPlotDict) plotDict.update(userPlotDict) metricValue = applyZPNorm(metricValueIn, plotDict) metricValue = metricValue.compressed() # Toss any NaNs or infs metricValue = metricValue[np.isfinite(metricValue)] # Determine percentile clipped X range, if set. (and xmin/max not set). if plotDict['xMin'] is None and plotDict['xMax'] is None: if plotDict['percentileClip']: plotDict['xMin'], plotDict['xMax'] = percentileClipping( metricValue, percentile=plotDict['percentileClip']) # Set the histogram range values, to avoid cases of trying to histogram single-valued data. # First we try to use the range specified by a user, if there is one. Then use the data if not. # all of this only works if plotDict is not cumulative. histRange = [plotDict['xMin'], plotDict['xMax']] if histRange[0] is None: histRange[0] = metricValue.min() if histRange[1] is None: histRange[1] = metricValue.max() # Need to have some range of values on the histogram, or it will fail. if histRange[0] == histRange[1]: warnings.warn( 'Histogram range was single-valued; expanding default range.') histRange[1] = histRange[0] + 1.0 # Set up the bins for the histogram. User specified 'bins' overrides 'binsize'. # Note that 'bins' could be a single number or an array, simply passed to plt.histogram. if plotDict['bins'] is not None: bins = plotDict['bins'] elif plotDict['binsize'] is not None: # If generating a cumulative histogram, want to use full range of data (but with given binsize). # .. but if user set histRange to be wider than full range of data, then # extend bins to cover this range, so we can make prettier plots. if plotDict['cumulative']: if plotDict['xMin'] is not None: # Potentially, expand the range for the cumulative histogram. bmin = np.min([metricValue.min(), plotDict['xMin']]) else: bmin = metricValue.min() if plotDict['xMax'] is not None: bmax = np.max([metricValue.max(), plotDict['xMax']]) else: bmax = metricValue.max() bins = np.arange(bmin, bmax + plotDict['binsize'] / 2.0, plotDict['binsize']) # Otherwise, not cumulative so just use metric values, without potential expansion. else: bins = np.arange(histRange[0], histRange[1] + plotDict['binsize'] / 2.0, plotDict['binsize']) # Catch edge-case where there is only 1 bin value if bins.size < 2: bins = np.arange(bins.min() - plotDict['binsize'] * 2.0, bins.max() + plotDict['binsize'] * 2.0, plotDict['binsize']) else: # If user did not specify bins or binsize, then we try to figure out a good number of bins. bins = optimalBins(metricValue) # Generate plots. fig = plt.figure(fignum, figsize=plotDict['figsize']) ax = fig.add_subplot(plotDict['subplot']) # Check if any data falls within histRange, because otherwise histogram generation will fail. if isinstance(bins, np.ndarray): condition = ((metricValue >= bins.min()) & (metricValue <= bins.max())) else: condition = ((metricValue >= histRange[0]) & (metricValue <= histRange[1])) plotValue = metricValue[condition] if len(plotValue) == 0: # No data is within histRange/bins. So let's just make a simple histogram anyway. n, b, p = plt.hist(metricValue, bins=50, histtype='step', cumulative=plotDict['cumulative'], log=plotDict['logScale'], label=plotDict['label'], color=plotDict['color']) else: # There is data to plot, and we've already ensured histRange/bins are more than single value. n, b, p = plt.hist(metricValue, bins=bins, range=histRange, histtype='step', log=plotDict['logScale'], cumulative=plotDict['cumulative'], label=plotDict['label'], color=plotDict['color']) hist_ylims = plt.ylim() if n.max() > hist_ylims[1]: plt.ylim(top=n.max()) if n.min() < hist_ylims[0] and not plotDict['logScale']: plt.ylim(bottom=n.min()) # Fill in axes labels and limits. # Option to use 'scale' to turn y axis into area or other value. def mjrFormatter(y, pos): if not isinstance(plotDict['scale'], numbers.Number): raise ValueError( 'plotDict["scale"] must be a number to scale the y axis.') return plotDict['yaxisformat'] % (y * plotDict['scale']) ax.yaxis.set_major_formatter(FuncFormatter(mjrFormatter)) # Set optional x, y limits. if 'xMin' in plotDict: plt.xlim(left=plotDict['xMin']) if 'xMax' in plotDict: plt.xlim(right=plotDict['xMax']) if 'yMin' in plotDict: plt.ylim(bottom=plotDict['yMin']) if 'yMax' in plotDict: plt.ylim(top=plotDict['yMax']) # Set/Add various labels. plt.xlabel(plotDict['xlabel'], fontsize=plotDict['fontsize']) plt.ylabel(plotDict['ylabel'], fontsize=plotDict['fontsize']) plt.title(plotDict['title']) if plotDict['labelsize'] is not None: plt.tick_params(axis='x', labelsize=plotDict['labelsize']) plt.tick_params(axis='y', labelsize=plotDict['labelsize']) # Return figure number return fig.number
def setupSlicer(self, simData, maps=None): """ Set up bins in slicer. """ if self.sliceColName is None: raise Exception('sliceColName was not defined when slicer instantiated.') sliceCol = simData[self.sliceColName] # Set bin min/max values. if self.binMin is None: self.binMin = sliceCol.min() if self.binMax is None: self.binMax = sliceCol.max() # Give warning if binMin = binMax, and do something at least slightly reasonable. if self.binMin == self.binMax: warnings.warn('binMin = binMax (maybe your data is single-valued?). ' 'Increasing binMax by 1 (or 2*binsize, if binsize set).') if self.binsize is not None: self.binMax = self.binMax + 2 * self.binsize else: self.binMax = self.binMax + 1 # Set bins. # Using binsize. if self.binsize is not None: if self.bins is not None: warnings.warn('Both binsize and bins have been set; Using binsize %f only.' %(self.binsize)) self.bins = np.arange(self.binMin, self.binMax+self.binsize/2.0, float(self.binsize), 'float') # Using bins value. else: # Bins was a sequence (np array or list) if hasattr(self.bins, '__iter__'): self.bins = np.sort(self.bins) # Or bins was a single value. else: if self.bins is None: self.bins = optimalBins(sliceCol, self.binMin, self.binMax) nbins = int(self.bins) self.binsize = (self.binMax - self.binMin) / float(nbins) self.bins = np.arange(self.binMin, self.binMax+self.binsize/2.0, self.binsize, 'float') # Set nbins to be one less than # of bins because last binvalue is RH edge only self.nslice = len(self.bins) - 1 # Set slicePoint metadata. self.slicePoints['sid'] = np.arange(self.nslice) self.slicePoints['bins'] = self.bins # Add metadata from maps. self._runMaps(maps) # Set up data slicing. self.simIdxs = np.argsort(simData[self.sliceColName]) simFieldsSorted = np.sort(simData[self.sliceColName]) # "left" values are location where simdata == bin value self.left = np.searchsorted(simFieldsSorted, self.bins[:-1], 'left') self.left = np.concatenate((self.left, np.array([len(self.simIdxs),]))) # Set up _sliceSimData method for this class. if self.cumulative: @wraps(self._sliceSimData) def _sliceSimData(islice): """ Slice simData on oneD sliceCol, to return relevant indexes for slicepoint. """ #this is the important part. The ids here define the pieces of data that get #passed on to subsequent slicers #cumulative version of 1D slicing idxs = self.simIdxs[0:self.left[islice+1]] return {'idxs':idxs, 'slicePoint':{'sid':islice, 'binLeft':self.bins[0], 'binRight':self.bins[islice+1]}} setattr(self, '_sliceSimData', _sliceSimData) else: @wraps(self._sliceSimData) def _sliceSimData(islice): """ Slice simData on oneD sliceCol, to return relevant indexes for slicepoint. """ idxs = self.simIdxs[self.left[islice]:self.left[islice+1]] return {'idxs':idxs, 'slicePoint':{'sid':islice, 'binLeft':self.bins[islice], 'binRight':self.bins[islice+1]}} setattr(self, '_sliceSimData', _sliceSimData)
def setupSlicer(self, simData, maps=None): """ Set up bins in slicer. """ if self.sliceColName is None: raise Exception( 'sliceColName was not defined when slicer instantiated.') sliceCol = simData[self.sliceColName] # Set bin min/max values. if self.binMin is None: self.binMin = sliceCol.min() if self.binMax is None: self.binMax = sliceCol.max() # Give warning if binMin = binMax, and do something at least slightly reasonable. if self.binMin == self.binMax: warnings.warn( 'binMin = binMax (maybe your data is single-valued?). ' 'Increasing binMax by 1 (or 2*binsize, if binsize set).') if self.binsize is not None: self.binMax = self.binMax + 2 * self.binsize else: self.binMax = self.binMax + 1 # Set bins. # Using binsize. if self.binsize is not None: if self.bins is not None: warnings.warn( 'Both binsize and bins have been set; Using binsize %f only.' % (self.binsize)) self.bins = np.arange(self.binMin, self.binMax + self.binsize / 2.0, float(self.binsize), 'float') # Using bins value. else: # Bins was a sequence (np array or list) if hasattr(self.bins, '__iter__'): self.bins = np.sort(self.bins) # Or bins was a single value. else: if self.bins is None: self.bins = optimalBins(sliceCol, self.binMin, self.binMax) nbins = int(self.bins) self.binsize = (self.binMax - self.binMin) / float(nbins) self.bins = np.arange(self.binMin, self.binMax + self.binsize / 2.0, self.binsize, 'float') # Set nbins to be one less than # of bins because last binvalue is RH edge only self.nslice = len(self.bins) - 1 # Set slicePoint metadata. self.slicePoints['sid'] = np.arange(self.nslice) self.slicePoints['bins'] = self.bins # Add metadata from maps. self._runMaps(maps) # Set up data slicing. self.simIdxs = np.argsort(simData[self.sliceColName]) simFieldsSorted = np.sort(simData[self.sliceColName]) # "left" values are location where simdata == bin value self.left = np.searchsorted(simFieldsSorted, self.bins[:-1], 'left') self.left = np.concatenate((self.left, np.array([ len(self.simIdxs), ]))) # Set up _sliceSimData method for this class. if self.cumulative: @wraps(self._sliceSimData) def _sliceSimData(islice): """ Slice simData on oneD sliceCol, to return relevant indexes for slicepoint. """ #this is the important part. The ids here define the pieces of data that get #passed on to subsequent slicers #cumulative version of 1D slicing idxs = self.simIdxs[0:self.left[islice + 1]] return { 'idxs': idxs, 'slicePoint': { 'sid': islice, 'binLeft': self.bins[0], 'binRight': self.bins[islice + 1] } } setattr(self, '_sliceSimData', _sliceSimData) else: @wraps(self._sliceSimData) def _sliceSimData(islice): """ Slice simData on oneD sliceCol, to return relevant indexes for slicepoint. """ idxs = self.simIdxs[self.left[islice]:self.left[islice + 1]] return { 'idxs': idxs, 'slicePoint': { 'sid': islice, 'binLeft': self.bins[islice], 'binRight': self.bins[islice + 1] } } setattr(self, '_sliceSimData', _sliceSimData)
def __call__(self, metricValueIn, slicer, userPlotDict, fignum=None): """ Plot a histogram of metricValues (such as would come from a spatial slicer). """ # Adjust metric values by zeropoint or normVal, and use 'compressed' version of masked array. plotDict = {} plotDict.update(self.defaultPlotDict) plotDict.update(userPlotDict) if plotDict['zp'] is not None: metricValue = metricValueIn.compressed() - plotDict['zp'] elif plotDict['normVal'] is not None: metricValue = metricValueIn.compressed() / plotDict['normVal'] else: metricValue = metricValueIn.compressed() # Determine percentile clipped X range, if set. (and xmin/max not set). if plotDict['xMin'] is None and plotDict['xMax'] is None: if plotDict['percentileClip']: plotDict['xMin'], plotDict['xMax'] = percentileClipping(metricValue, percentile=plotDict['percentileClip']) # Determine range for histogram. Note that if xmin/max are None, this will just be [None, None]. histRange = [plotDict['xMin'], plotDict['xMax']] # Should we use log scale on y axis? (if 'auto') if plotDict['logScale'] == 'auto': plotDict['logScale'] = False if np.min(histRange) > 0: if (np.log10(np.max(histRange) - np.log10(np.min(histRange))) > 3): plotDict['logScale'] = True # If binsize was specified, set up an array of bins for the histogram. if plotDict['binsize'] is not None: # If generating cumulative histogram, want to use full range of data (but with given binsize). # .. but if user set histRange to be wider than full range of data, then # extend bins to cover this range, so we can make prettier plots. if plotDict['cumulative'] is not False: if histRange[0] is not None: bmin = np.min([metricValue.min(), histRange[0]]) else: bmin = metricValue.min() if histRange[1] is not None: bmax = np.max([metricValue.max(), histRange[1]]) else: bmax = metricValue.max() bins = np.arange(bmin, bmax + plotDict['binsize'] / 2.0, plotDict['binsize']) # Catch edge-case where there is only 1 bin value if bins.size < 2: bins = np.arange(bmin, bmax + plotDict['binsize'], plotDict['binsize']) # Else try to set up bins using min/max values if specified, or full data range. else: if histRange[0] is not None: bmin = histRange[0] else: bmin = metricValue.min() if histRange[1] is not None: bmax = histRange[1] else: bmax = metricValue.max() bins = np.arange(bmin, bmax + plotDict['binsize'], plotDict['binsize']) # Otherwise, determine number of bins, if neither 'bins' or 'binsize' were specified. else: if plotDict['bins'] is None: bins = optimalBins(metricValue) else: bins = plotDict['bins'] # Generate plots. fig = plt.figure(fignum) if plotDict['cumulative'] is not False: # If cumulative is set, generate histogram without using histRange (to use full range of data). n, b, p = plt.hist(metricValue, bins=bins, histtype='step', log=plotDict['logScale'], cumulative=plotDict['cumulative'], label=plotDict['label'], color=plotDict['color']) else: # Plot non-cumulative histogram. # First, test if data falls within histRange, because otherwise histogram generation will fail. if np.min(histRange) is not None: if (histRange[0] is None) and (histRange[1] is not None): condition = (metricValue <= histRange[1]) elif (histRange[1] is None) and (histRange[0] is not None): condition = (metricValue >= histRange[0]) else: condition = ((metricValue >= histRange[0]) & (metricValue <= histRange[1])) plotValue = metricValue[condition] else: plotValue = metricValue # If there is only one value to histogram, need to set histRange, otherwise histogram will fail. rangePad = 20. if (np.unique(plotValue).size == 1) & (np.min(histRange) is None): warnings.warn('Only one metric value, making a guess at a good histogram range.') histRange = [plotValue.min() - rangePad, plotValue.max() + rangePad] if (plotValue.min() >= 0) & (histRange[0] < 0): # Reset histogram range if it went below 0. histRange[0] = 0. if 'binsize' in plotDict: bins = np.arange(histRange[0], histRange[1], plotDict['binsize']) else: bins = np.arange(histRange[0], histRange[1], (histRange[1] - histRange[0]) / 50.) # If there is no data within the histogram range, we will generate an empty plot. # If there is data, make the histogram. if plotValue.size > 0: # Generate histogram. if np.min(histRange) is None: histRange = None n, b, p = plt.hist(plotValue, bins=bins, histtype='step', log=plotDict['logScale'], cumulative=plotDict['cumulative'], range=histRange, label=plotDict['label'], color=plotDict['color']) # Fill in axes labels and limits. # Option to use 'scale' to turn y axis into area or other value. def mjrFormatter(y, pos): return plotDict['yaxisformat'] % (y * plotDict['scale']) ax = plt.gca() ax.yaxis.set_major_formatter(FuncFormatter(mjrFormatter)) # Set y limits. if 'yMin' in plotDict: if plotDict['yMin'] is not None: plt.ylim(ymin=plotDict['yMin']) else: # There is a bug in histype='step' that can screw up the ylim. # Comes up when running allSlicer.Cfg.py try: if plt.axis()[2] == max(n): plt.ylim([n.min(), n.max()]) except UnboundLocalError: # This happens if we were generating an empty plot (no histogram). # But in which case, the above error was probably not relevant. So skip it. pass if 'yMax' in plotDict: plt.ylim(ymax=plotDict['yMax']) # Set x limits. if plotDict['xMin'] is not None: plt.xlim(xmin=plotDict['xMin']) if plotDict['xMax'] is not None: plt.xlim(xmax=plotDict['xMax']) # Set/Add various labels. plt.xlabel(plotDict['xlabel'], fontsize=plotDict['fontsize']) plt.ylabel(plotDict['ylabel'], fontsize=plotDict['fontsize']) plt.title(plotDict['title']) if plotDict['labelsize'] is not None: plt.tick_params(axis='x', labelsize=plotDict['labelsize']) plt.tick_params(axis='y', labelsize=plotDict['labelsize']) # Return figure number return fig.number
def plotHistogram(self, metricValueIn, title=None, xlabel=None, units=None, ylabel=None, fignum=None, label=None, addLegend=False, legendloc='upper left', bins=None, binsize=None, cumulative=False, xMin=None, xMax=None, yMin=None, yMax=None, logScale='auto', scale=1.0, yaxisformat='%.3f', color='b', zp=None, normVal=None, percentileClip=None, **kwargs): """Plot a histogram of metricValue, labelled by metricLabel. title = the title for the plot (default None) fignum = the figure number to use (default None - will generate new figure) label = the label to use in the figure legend (default None) addLegend = flag for whether or not to add a legend (default False) legendloc = location for legend (default 'upper left') bins = bins for histogram (numpy array or # of bins) binsize = size of bins to use. Will override "bins" if both are set. (default None, uses Freedman-Diaconis rule to set binsize) cumulative = make histogram cumulative (default False) (<0 value makes cumulative the 'less than' way). xMin/Max = histogram range (default None, set by matplotlib hist) yMin/Max = histogram y range scale = scale y axis by 'scale' (i.e. to translate to area) zp = zeropoing to subtract off metricVals normVal = normalization value to divide metric values by (overrides zp). """ # Adjust metric values by zeropoint or normVal, and use 'compressed' version of masked array. if zp: metricValue = metricValueIn.compressed() - zp elif normVal: metricValue = metricValueIn.compressed()/normVal else: metricValue = metricValueIn.compressed() # Determine percentile clipped X range, if set. (and xmin/max not set). if xMin is None and xMax is None: if percentileClip: xMin, xMax = percentileClipping(metricValue, percentile=percentileClip) # Determine range for histogram. Note that if xmin/max are None, this will just be [None, None]. histRange = [xMin, xMax] # Should we use log scale on y axis? (if 'auto') if logScale == 'auto': logScale = False if np.min(histRange) > 0: if (np.log10(np.max(histRange)-np.log10(np.min(histRange))) > 3 ): logScale = True # If binsize was specified, set up an array of bins for the histogram. if binsize is not None: # If generating cumulative histogram, want to use full range of data (but with given binsize). # .. but if user set histRange to be wider than full range of data, then # extend bins to cover this range, so we can make prettier plots. if cumulative is not False: if histRange[0] is not None: bmin = np.min([metricValue.min(), histRange[0]]) else: bmin = metricValue.min() if histRange[1] is not None: bmax = np.max([metricValue.max(), histRange[1]]) else: bmax = metricValue.max() bins = np.arange(bmin, bmax+binsize/2.0, binsize) # Else try to set up bins using min/max values if specified, or full data range. else: if histRange[0] is not None: bmin = histRange[0] else: bmin = metricValue.min() if histRange[1] is not None: bmax = histRange[1] else: bmax = metricValue.max() bins = np.arange(bmin, bmax+binsize/2.0, binsize) # Otherwise, determine number of bins, if neither 'bins' or 'binsize' were specified. else: if bins is None: bins = optimalBins(metricValue) # Generate plots. fig = plt.figure(fignum) if cumulative is not False: # If cumulative is set, generate histogram without using histRange (to use full range of data). n, b, p = plt.hist(metricValue, bins=bins, histtype='step', log=logScale, cumulative=cumulative, label=label, color=color) else: # Plot non-cumulative histogram. # First, test if data falls within histRange, because otherwise histogram generation will fail. if np.min(histRange) is not None: if (histRange[0] is None) and (histRange[1] is not None): condition = (metricValue <= histRange[1]) elif (histRange[1] is None) and (histRange[0] is not None): condition = (metricValue >= histRange[0]) else: condition = ((metricValue >= histRange[0]) & (metricValue <= histRange[1])) plotValue = metricValue[condition] else: plotValue = metricValue # If there is only one value to histogram, need to set histRange, otherwise histogram will fail. rangePad = 20. if (np.unique(plotValue).size == 1) & (np.min(histRange) is None): warnings.warn('Only one metric value, making a guess at a good histogram range.') histRange = [plotValue.min()-rangePad, plotValue.max()+rangePad] if (plotValue.min() >= 0) & (histRange[0] < 0): # Reset histogram range if it went below 0. histRange[0] = 0. bins=np.arange(histRange[0], histRange[1], binsize) # If there is no data within the histogram range, we will generate an empty plot. # If there is data, make the histogram. if plotValue.size > 0: # Generate histogram. if np.min(histRange) is None: histRange = None n, b, p = plt.hist(plotValue, bins=bins, histtype='step', log=logScale, cumulative=cumulative, range=histRange, label=label, color=color) # Fill in axes labels and limits. # Option to use 'scale' to turn y axis into area or other value. def mjrFormatter(y, pos): return yaxisformat % (y * scale) ax = plt.gca() ax.yaxis.set_major_formatter(FuncFormatter(mjrFormatter)) # Set y limits. if yMin is not None: plt.ylim(ymin=yMin) else: # There is a bug in histype='step' that can screw up the ylim. Comes up when running allSlicer.Cfg.py try: if plt.axis()[2] == max(n): plt.ylim([n.min(),n.max()]) except UnboundLocalError: # This happens if we were generating an empty plot (no histogram). # But in which case, the above error was probably not relevant. So skip it. pass if yMax is not None: plt.ylim(ymax=yMax) # Set x limits. if xMin is not None: plt.xlim(xmin=xMin) if xMax is not None: plt.xlim(xmax=xMax) # Set/Add various labels. if not xlabel: xlabel = units if xlabel is not None: plt.xlabel(xlabel) if ylabel is not None: plt.ylabel(ylabel) if addLegend: plt.legend(fancybox=True, prop={'size':'smaller'}, loc=legendloc) if title!=None: plt.title(title) # Return figure number (so we can reuse this if desired). return fig.number
def __call__(self, metricValueIn, slicer, userPlotDict, fignum=None): """ Plot a histogram of metricValues (such as would come from a spatial slicer). """ # Adjust metric values by zeropoint or normVal, and use 'compressed' version of masked array. plotDict = {} plotDict.update(self.defaultPlotDict) plotDict.update(userPlotDict) metricValue = applyZPNorm(metricValueIn, plotDict) metricValue = metricValue.compressed() # Toss any NaNs or infs metricValue = metricValue[np.isfinite(metricValue)] # Determine percentile clipped X range, if set. (and xmin/max not set). if plotDict['xMin'] is None and plotDict['xMax'] is None: if plotDict['percentileClip']: plotDict['xMin'], plotDict['xMax'] = percentileClipping(metricValue, percentile=plotDict['percentileClip']) # Set the histogram range values, to avoid cases of trying to histogram single-valued data. # First we try to use the range specified by a user, if there is one. Then use the data if not. # all of this only works if plotDict is not cumulative. histRange = [plotDict['xMin'], plotDict['xMax']] if histRange[0] is None: histRange[0] = metricValue.min() if histRange[1] is None: histRange[1] = metricValue.max() # Need to have some range of values on the histogram, or it will fail. if histRange[0] == histRange[1]: warnings.warn('Histogram range was single-valued; expanding default range.') histRange[1] = histRange[0] + 1.0 # Set up the bins for the histogram. User specified 'bins' overrides 'binsize'. # Note that 'bins' could be a single number or an array, simply passed to plt.histogram. if plotDict['bins'] is not None: bins = plotDict['bins'] elif plotDict['binsize'] is not None: # If generating a cumulative histogram, want to use full range of data (but with given binsize). # .. but if user set histRange to be wider than full range of data, then # extend bins to cover this range, so we can make prettier plots. if plotDict['cumulative']: if plotDict['xMin'] is not None: # Potentially, expand the range for the cumulative histogram. bmin = np.min([metricValue.min(), plotDict['xMin']]) else: bmin = metricValue.min() if plotDict['xMax'] is not None: bmax = np.max([metricValue.max(), plotDict['xMax']]) else: bmax = metricValue.max() bins = np.arange(bmin, bmax + plotDict['binsize'] / 2.0, plotDict['binsize']) # Otherwise, not cumulative so just use metric values, without potential expansion. else: bins = np.arange(histRange[0], histRange[1] + plotDict['binsize'] / 2.0, plotDict['binsize']) # Catch edge-case where there is only 1 bin value if bins.size < 2: bins = np.arange(bins.min() - plotDict['binsize'] * 2.0, bins.max() + plotDict['binsize'] * 2.0, plotDict['binsize']) else: # If user did not specify bins or binsize, then we try to figure out a good number of bins. bins = optimalBins(metricValue) # Generate plots. fig = plt.figure(fignum, figsize=plotDict['figsize']) ax = fig.add_subplot(plotDict['subplot']) # Check if any data falls within histRange, because otherwise histogram generation will fail. if isinstance(bins, np.ndarray): condition = ((metricValue >= bins.min()) & (metricValue <= bins.max())) else: condition = ((metricValue >= histRange[0]) & (metricValue <= histRange[1])) plotValue = metricValue[condition] if len(plotValue) == 0: # No data is within histRange/bins. So let's just make a simple histogram anyway. n, b, p = plt.hist(metricValue, bins=50, histtype='step', cumulative=plotDict['cumulative'], log=plotDict['logScale'], label=plotDict['label'], color=plotDict['color']) else: # There is data to plot, and we've already ensured histRange/bins are more than single value. n, b, p = plt.hist(metricValue, bins=bins, range=histRange, histtype='step', log=plotDict['logScale'], cumulative=plotDict['cumulative'], label=plotDict['label'], color=plotDict['color']) hist_ylims = plt.ylim() if n.max() > hist_ylims[1]: plt.ylim(ymax = n.max()) if n.min() < hist_ylims[0] and not plotDict['logScale']: plt.ylim(ymin = n.min()) # Fill in axes labels and limits. # Option to use 'scale' to turn y axis into area or other value. def mjrFormatter(y, pos): if not isinstance(plotDict['scale'], numbers.Number): raise ValueError('plotDict["scale"] must be a number to scale the y axis.') return plotDict['yaxisformat'] % (y * plotDict['scale']) ax.yaxis.set_major_formatter(FuncFormatter(mjrFormatter)) # Set optional x, y limits. if 'xMin' in plotDict: plt.xlim(xmin=plotDict['xMin']) if 'xMax' in plotDict: plt.xlim(xmax=plotDict['xMax']) if 'yMin' in plotDict: plt.ylim(ymin=plotDict['yMin']) if 'yMax' in plotDict: plt.ylim(ymax=plotDict['yMax']) # Set/Add various labels. plt.xlabel(plotDict['xlabel'], fontsize=plotDict['fontsize']) plt.ylabel(plotDict['ylabel'], fontsize=plotDict['fontsize']) plt.title(plotDict['title']) if plotDict['labelsize'] is not None: plt.tick_params(axis='x', labelsize=plotDict['labelsize']) plt.tick_params(axis='y', labelsize=plotDict['labelsize']) # Return figure number return fig.number