def plothist(x, bin=None, nbins=None, xrange=None, yrange=None, min=None, max=None, overplot=False, color='black', xlog=False, ylog=False, nan=False, weights=None, norm=False, kernel=None, retpoints=False, adaptive=False, adaptive_thresh=30, adaptive_depth=[2,10], weight_norm=False, apply_func=None, **kw): """ Plot the 1D histogram Example: >> plothist(dat, bin=0.1, min=0, max=3) Keyword parameters: ------------------ bin the binsize(float) nbins number of bins(integer) It cannot be specified together with the bin= parameter xlog, ylog log the appropriate axis weights the 1-D array of weights used in the histogram creation nan boolean flag to ignore nan's norm boolean flag to normalize the histogram by the peak value min,max range of data for which the histogram is constructed retpoints boolean parameter controlling whether to return or not the computed histogram. If yes the tuple with two arrays (bin centers, Number of points in bins) is returned overplot boolean parameter for overplotting adaptive boolean for turning on/off the adaptive regime of histogramming (adaptive bin size). If True weights, nbins, bin,kernel parameters are ignored adaptive_thresh the limiting number of points in the bin for the adaptive histogramming (default 30) adaptive_depth the list of two integers for the detalisation levels of adaptive histogramming (default [2,10]) weight_norm if True the value in each bin is mean weight of points within the bin """ if nan: ind = numpy.isfinite(x) if weights is not None: ind = numpy.isfinite(weights)&ind dat = x[ind] if weights is not None: weights =weights[ind] else: dat = x maxNone = False if min is None: min = numpy.min(dat) if max is None: maxNone = True max = numpy.max(dat) if bin is None and nbins is None: nbins = 100 bin = (max - min) * 1. / nbins elif nbins is None: nbins = int(math.ceil((max - min) * 1. / bin)) if maxNone: max = min + nbins * bin elif bin is None: bin = (max - min) * 1. / nbins else: warnings.warn("both bin= and nbins= keywords were specified in the plothist call",RuntimeWarning) pass # if both nbins and bin are defined I don't do anything # it may be non-intuitive if kernel option is used, because # it uses both nbins and bin options if kernel is None: if not adaptive: if not np.isscalar(weights): hh, loc = numpy.histogram(dat, range=(min, max), bins=nbins, weights=weights) else: hh, loc = numpy.histogram(dat, range=(min, max), bins=nbins) hh = hh * weights if weight_norm: hh1, loc = numpy.histogram(dat, range=(min, max), bins=nbins, weights=None) hh = hh*1./hh1 else: import adabinner hh, loc = adabinner.hist(dat, xmin=min, xmax=max, hi=adaptive_depth, thresh=adaptive_thresh) hh1 = np.repeat(hh,2) loc1 = np.concatenate(([loc[0]],np.repeat(loc[1:-1],2),[loc[-1]])) else: loc1=numpy.linspace(min,max,nbins*5) import statistics if weights is not None: hh1 = statistics.pdf( dat, loc1, h=bin/2.,kernel=kernel,weight=weights)*bin*len(dat) else: hh1 = statistics.pdf( dat, loc1, h=bin/2.,kernel=kernel)*bin*len(dat) if overplot: func = oplot else: func = plot if norm: hh1=hh1*1./hh1.max() kw['ps'] = kw.get('ps') or 0 if 'yr' not in kw: kw['yr']=[hh1.min(),hh1.max()] if 'xr' not in kw: kw['xr']=[min,max] if apply_func is not None: hh1 = apply_func (loc1,hh1) func(loc1, hh1, color=color, xlog=xlog, ylog=ylog, **kw) if retpoints: return 0.5*(loc[1:]+loc[:-1]),hh
def plothist(x, bin=None, nbins=None, xrange=None, yrange=None, min=None, max=None, overplot=False, color='black', xlog=False, ylog=False, nan=False, weights=None, norm=False, kernel=None, retpoints=False, adaptive=False, adaptive_thresh=30, adaptive_depth=[2, 10], weight_norm=False, apply_func=None, statistic=None, knuth=False, cumulative=False, **kw): """ Plot the 1D histogram Example: >> plothist(dat, bin=0.1, min=0, max=3) Keyword parameters: ------------------ bin the binsize(float) nbins number of bins(integer) It cannot be specified together with the bin= parameter xlog, ylog log the appropriate axis weights the 1-D array of weights used in the histogram creation nan boolean flag to ignore nan's norm boolean flag to normalize the histogram by the peak value min,max range of data for which the histogram is constructed retpoints boolean parameter controlling whether to return or not the computed histogram. If yes the tuple with two arrays (bin centers, Number of points in bins) is returned overplot boolean parameter for overplotting adaptive boolean for turning on/off the adaptive regime of histogramming (adaptive bin size). If True weights, nbins, bin,kernel parameters are ignored adaptive_thresh the limiting number of points in the bin for the adaptive histogramming (default 30) adaptive_depth the list of two integers for the detalisation levels of adaptive histogramming (default [2,10]) weight_norm if True the value in each bin is mean weight of points within the bin """ if nan: ind = np.isfinite(x) if weights is not None: ind = np.isfinite(weights) & ind dat = x[ind] if weights is not None: weights = weights[ind] else: dat = x maxNone = False if min is None: min = np.nanmin(dat) if max is None: maxNone = True max = np.nanmax(dat) if bin is None and nbins is None: if not knuth: nbins = 100 else: nbins = __findKnuth(dat, min, max) bin = (max - min) * 1. / nbins elif nbins is None: nbins = int(math.ceil((max - min) * 1. / bin)) if maxNone: max = min + nbins * bin elif bin is None: bin = (max - min) * 1. / nbins else: warnings.warn( 'both bin= and nbins= keywords were specified in ' + 'the plothist call', RuntimeWarning) pass # if both nbins and bin are defined I don't do anything # it may be non-intuitive if kernel option is used, because # it uses both nbins and bin options if cumulative: if (kernel is not None or adaptive or weights is not None): raise RuntimeError( 'cumulative is incompatible with weights, kernel ' + 'or adaptive options') if kernel is None: if not adaptive: if np.isscalar(weights) and weights is not None: weights = np.zeros_like(dat) + weights if statistic is None: hh, loc = np.histogram(dat, range=(min, max), bins=nbins, weights=weights) if weight_norm: hh1, loc = np.histogram(dat, range=(min, max), bins=nbins, weights=None) hh = hh * 1. / hh1 else: S = scipy.stats.binned_statistic(dat, weights, statistic, range=[min, max], bins=nbins) hh = S.statistic loc = S.bin_edges else: import adabinner hh, loc = adabinner.hist(dat, xmin=min, xmax=max, hi=adaptive_depth, thresh=adaptive_thresh) if cumulative: hh = np.cumsum(hh) hh1 = np.repeat(hh, 2) loc1 = np.concatenate(([loc[0]], np.repeat(loc[1:-1], 2), [loc[-1]])) else: loc1 = np.linspace(min, max, nbins * 10) import sklearn.neighbors xind = np.isfinite(dat) kde = sklearn.neighbors.KernelDensity(bandwidth=bin, kernel=kernel) kde.fit(np.asarray(dat[xind]).flatten().reshape(-1, 1)) hh1 = np.exp(kde.score_samples(loc1.reshape(-1, 1))) if weights is not None: print('WARNING weights ignored for KDE !') if overplot: func = oplot else: func = plot if norm: hh1 = hh1 * 1. / hh1.max() kw['ps'] = kw.get('ps') or 0 if 'yr' not in kw: kw['yr'] = [np.nanmin(hh1), np.nanmax(hh1)] if 'xr' not in kw: kw['xr'] = [min, max] if apply_func is not None: hh1 = apply_func(loc1, hh1) func(loc1, hh1, color=color, xlog=xlog, ylog=ylog, **kw) if retpoints: return 0.5 * (loc[1:] + loc[:-1]), hh