def test_histogram_output(): rng = np.random.default_rng(0) X = rng.standard_normal(100) counts, bins = histogram(X, bins=10) assert_allclose(counts, [2, 0, 12, 14, 14, 17, 16, 8, 9, 8]) assert_allclose(bins, [ -2.32503077, -1.89228844, -1.4595461, -1.02680377, -0.59406143, -0.1613191, 0.27142324, 0.70416558, 1.13690791, 1.56965025, 2.00239258 ]) counts, bins = histogram(X, bins='scott') assert_allclose(counts, [2, 14, 27, 25, 16, 16]) assert_allclose(bins, [ -2.32503077, -1.59953424, -0.87403771, -0.14854117, 0.57695536, 1.3024519, 2.02794843 ]) counts, bins = histogram(X, bins='freedman') assert_allclose(counts, [2, 11, 16, 18, 22, 14, 13, 4]) assert_allclose(bins, [ -2.32503077, -1.74087192, -1.15671306, -0.5725542, 0.01160465, 0.59576351, 1.17992237, 1.76408122, 2.34824008 ], rtol=2e-7) counts, bins = histogram(X, bins='blocks') assert_allclose(counts, [3, 97]) assert_allclose(bins, [-2.32503077, -1.37136996, 2.00239258])
def test_histogram_output(): rng = np.random.RandomState(0) X = rng.randn(100) counts, bins = histogram(X, bins=10) assert_allclose(counts, [1, 5, 7, 13, 17, 18, 16, 11, 7, 5]) assert_allclose(bins, [ -2.55298982, -2.07071537, -1.58844093, -1.10616648, -0.62389204, -0.1416176, 0.34065685, 0.82293129, 1.30520574, 1.78748018, 2.26975462 ]) counts, bins = histogram(X, bins='scott') assert_allclose(counts, [2, 13, 23, 34, 16, 10, 2]) assert_allclose(bins, [ -2.55298982, -1.79299405, -1.03299829, -0.27300252, 0.48699324, 1.24698901, 2.00698477, 2.76698054 ]) counts, bins = histogram(X, bins='freedman') assert_allclose(counts, [2, 7, 13, 20, 26, 14, 11, 5, 2]) assert_allclose(bins, [ -2.55298982, -1.95796338, -1.36293694, -0.7679105, -0.17288406, 0.42214237, 1.01716881, 1.61219525, 2.20722169, 2.80224813 ]) with pytest.warns(AstropyUserWarning, match=r'p0 does not seem to accurate'): counts, bins = histogram(X, bins='blocks') assert_allclose(counts, [10, 61, 29]) assert_allclose(bins, [-2.55298982, -1.24381059, 0.46422235, 2.26975462])
def fig5(fig): yLabel5=plt.ylabel('Filtered Data') #xLabel5=plt.xlabel('Azimut') ax5=fig.add_subplot(111) dum5=histogram(filtered_data,bins='blocks') dum55=histogram(filtered_data,bins='knuth') dum555=histogram(filtered_data,bins='scott') dum5555=histogram(filtered_data,bins='freedman')
def pdf(self, bins=10, range=None, weights=None, normed=False, **kwargs): """ Computation of the Probability Density function of the signal Wrapper around histogram function from astropy.stats package. Parameters ---------- normed :obj: `bool` If set it compute the PDF of the normalize signal. Default is False bins : :obj: `int` or `list` or `str` (optional) If bins is a string, then it must be one of: - 'blocks' : use bayesian blocks for dynamic bin widths - 'knuth' : use Knuth's rule to determine bins - 'scott' : use Scott's rule to determine bins - 'freedman' : use the Freedman-Diaconis rule to determine bins range : tuple or None (optional) the minimum and maximum range for the histogram. If not specified, it will be (x.min(), x.max()) weights : array_like, optional Not Implemented other keyword arguments are described in numpy.histogram(). Returns ------- hist : array The values of the histogram. See ``normed`` and ``weights`` for a description of the possible semantics. bin_edges : array of dtype float Return the bin edges ``(length(hist)+1)``. See Also -------- numpy.histogram astropy.stats.histogram """ if normed: hist, bins_e = Astats.histogram(self.signorm, bins=bins, range=range, weights=weights, **kwargs) else: hist, bins_e = Astats.histogram(self.sig, bins=bins, range=range, weights=weights, **kwargs) return hist, bins_e
def test_histogram_badargs(N=1000, rseed=0): rng = np.random.RandomState(rseed) x = rng.randn(N) # weights is not supported for bins in ['scott', 'freedman', 'blocks']: with pytest.raises(NotImplementedError): histogram(x, bins, weights=x) # bad bins arg gives ValueError with pytest.raises(ValueError): histogram(x, bins='bad_argument')
def pdf_histogram(self, **kwargs): """ Compute histogram over the samples in the distribution. Parameters ---------- All keyword arguments are passed into `astropy.stats.histogram`. Note That some of these options may not be valid for some multidimensional distributions. Returns ------- hist : array The values of the histogram. Trailing dimension is the histogram dimension. bin_edges : array of dtype float Return the bin edges ``(length(hist)+1)``. Trailing dimension is the bin histogram dimension. """ distr = self.distribution raveled_distr = distr.reshape(distr.size//distr.shape[-1], distr.shape[-1]) nhists = [] bin_edges = [] for d in raveled_distr: nhist, bin_edge = stats.histogram(d, **kwargs) nhists.append(nhist) bin_edges.append(bin_edge) nhists = np.array(nhists) nh_shape = self.shape + (nhists.size//self.size,) bin_edges = np.array(bin_edges) be_shape = self.shape + (bin_edges.size//self.size,) return nhists.reshape(nh_shape), bin_edges.reshape(be_shape)
def pdf_histogram(self, **kwargs): """ Compute histogram over the samples in the distribution. Parameters ---------- All keyword arguments are passed into `astropy.stats.histogram`. Note That some of these options may not be valid for some multidimensional distributions. Returns ------- hist : array The values of the histogram. Trailing dimension is the histogram dimension. bin_edges : array of dtype float Return the bin edges ``(length(hist)+1)``. Trailing dimension is the bin histogram dimension. """ distr = self.distribution raveled_distr = distr.reshape(distr.size // distr.shape[-1], distr.shape[-1]) nhists = [] bin_edges = [] for d in raveled_distr: nhist, bin_edge = stats.histogram(d, **kwargs) nhists.append(nhist) bin_edges.append(bin_edge) nhists = np.array(nhists) nh_shape = self.shape + (nhists.size // self.size, ) bin_edges = np.array(bin_edges) be_shape = self.shape + (bin_edges.size // self.size, ) return nhists.reshape(nh_shape), bin_edges.reshape(be_shape)
def test_histogram(bin_type, N=1000, rseed=0): rng = np.random.RandomState(rseed) x = rng.randn(N) counts, bins = histogram(x, bin_type) assert (counts.sum() == len(x)) assert (len(counts) == len(bins) - 1)
def coverage_correction(full_fluxes, selected_fluxes): """Compute the correction to the coverage at each flux. Returns an interpolated function.""" from astropy.stats import histogram from scipy.interpolate import interp1d # full_binned, bin_vals = histogram(full_fluxes, bins='blocks') # selected_binned, bin_edges = histogram(selected_fluxes, bins=bin_vals) bins = np.logspace(np.log10(full_fluxes.min()) * 0.99999999, np.log10(full_fluxes.max()) * 1.00000001, num=20) full_binned, bin_vals = np.histogram(full_fluxes, bins=bins) selected_binned, bin_edges = histogram(selected_fluxes, bins=bins) if np.any(selected_binned) == 0: print(np.sum(selected_binned == 0), 'zeros in coverage correction') corrections = selected_binned / full_binned bin_centers = (bin_vals[1:] + bin_vals[:-1]) / 2 # print('corrections', corrections) return interp1d(bin_centers, corrections, fill_value=1, bounds_error=False, kind='nearest')
def test_histogram_output_knuth(): rng = np.random.RandomState(0) X = rng.randn(100) counts, bins = histogram(X, bins='knuth') assert_allclose(counts, [1, 6, 9, 14, 21, 22, 12, 8, 7]) assert_allclose(bins, [-2.55298982, -2.01712932, -1.48126883, -0.94540834, -0.40954784, 0.12631265, 0.66217314, 1.19803364, 1.73389413, 2.26975462])
def compute_hist(self, inp_col='parallax', hist_blocks='knuth'): """ Compute Histogram bins and heights """ inp_dat = self.cat[inp_col][self.cat[inp_col].mask == False] bin_heights, bin_borders = histogram(inp_dat, bins=hist_blocks) bin_center = bin_borders[1:] - np.diff(bin_borders)[0] / 2. self.hist_x = bin_center self.hist_y = bin_heights
def test_histogram_output_knuth(): rng = np.random.default_rng(0) X = rng.standard_normal(100) counts, bins = histogram(X, bins='knuth') assert_allclose(counts, [2, 1, 13, 19, 15, 18, 14, 10, 8]) assert_allclose(bins, [ -2.32503077, -1.84420596, -1.36338114, -0.88255632, -0.4017315, 0.07909331, 0.55991813, 1.04074295, 1.52156777, 2.00239258 ])
def getHistogramVals(data, meta_type, domain, source="numpy"): # check this: https://github.com/theodoregoetz/histogram if meta_type == MetaType.DISCRETE: # for discrete, we just have to count breaks = np.array([d for d in domain] + [domain[-1] + 1]) densities, breaks = np.histogram(data, bins=breaks, density=True) repr_points = domain return breaks, densities, repr_points if source == "R": from rpy2 import robjects init_rpy() result = robjects.r["getHistogram"](data) breaks = np.asarray(result[0]) densities = np.asarray(result[2]) mids = np.asarray(result[3]) return breaks, densities, mids if source == "kde": import statsmodels.api as sm kde = sm.nonparametric.KDEMultivariate(data, var_type="c", bw="cv_ls") bins = int((domain[1] - domain[0]) / kde.bw) bins = min(30, bins) cdf_x = np.linspace(domain[0], domain[1], 2 * bins) cdf_y = kde.cdf(cdf_x) breaks = np.interp(np.linspace(0, 1, bins), cdf_y, cdf_x) # inverse cdf mids = ((breaks + np.roll(breaks, -1)) / 2.0)[:-1] densities = kde.pdf(mids) densities / np.sum(densities) if len(densities.shape) == 0: densities = np.array([densities]) return breaks, densities, mids if source == "numpy": densities, breaks = np.histogram(data, bins="auto", density=True) mids = ((breaks + np.roll(breaks, -1)) / 2.0)[:-1] return breaks, densities, mids if source == "astropy": from astropy.stats import histogram densities, breaks = histogram(data, bins="blocks", density=True) mids = ((breaks + np.roll(breaks, -1)) / 2.0)[:-1] return breaks, densities, mids assert False, "unkown histogram method " + source
def test_histogram(bin_type, N=1000, rseed=0): rng = np.random.RandomState(rseed) x = rng.randn(N) # Warning is emitted for blocks with warnings.catch_warnings(): warnings.filterwarnings('ignore', message=r'.*p0 does not seem to accurate.*', category=AstropyUserWarning) counts, bins = histogram(x, bin_type) assert (counts.sum() == len(x)) assert (len(counts) == len(bins) - 1)
def test_hist_autobin(rseed=0): rng = np.random.RandomState(rseed) x = rng.randn(100) # 'knuth' bintype depends on scipy that is optional dependency if HAS_SCIPY: bintypes = [ 10, np.arange(-3, 3, 10), 'knuth', 'scott', 'freedman', 'blocks' ] else: bintypes = [10, np.arange(-3, 3, 10), 'scott', 'freedman', 'blocks'] for bintype in bintypes: for range in [None, (-3, 3)]: n1, bins1 = histogram(x, bintype, range=range) n2, bins2, patches = hist(x, bintype, range=range) assert_allclose(n1, n2) assert_allclose(bins1, bins2)
def hist_normalised(y, ax=None, **kwargs): #hh, bi = histogram(np.array(p1.members_velocities200[iproj]), bins='scott', density=True)#histtype='stepfilled',alpha=0.2)#, density=True) hh, bi = histogram(y, bins='scott', density=True) bin_widths = bi[1:] - bi[:-1] bin_centers = 0.5 * (bi[:-1] + bi[1:]) hist1b = hh / np.max(hh) bin_edge = np.append(bin_centers[0] - bin_widths[0], bin_centers) bin_edge = np.append(bin_edge, bin_centers[-1] + bin_widths[-1]) h1_edge = np.append([0], hist1b) h1_edge = np.append(h1_edge, [0]) ax.step(bin_edge, h1_edge, where='mid', color=kwargs.get("color", 0)) #'C'+str(iproj)) ax.bar(bin_centers, hist1b, width=bin_widths, align='center', color=kwargs.get("color", 0), alpha=0.5) #ax.bar(bin_centers, hist1b, width = bin_widths, align = 'center', color='C'+str(iproj), alpha = 0.5) return hist1b, bin_centers
def __init__(self,group,cont,cond,method='blocks'): """ Averages spectra in [frame]group based on cond[ition] if cond has dtype bool it makes two bins else it histograms cond usin bayesian blocks finds average spectra per bin cont[inuum] - continuum level for each row in the framegroup """ self.group = group block = group.frames[0].data for frm in group.frames[1:]: block = np.vstack((block,frm.data)) self.block = block if cond.dtype == np.dtype('bool'): idx, = np.where(cond) nidx, = np.where(~cond) binned = np.mean( block[idx,:],axis=0) con = np.mean( cont[idx]) self.binned = np.vstack( (binned, np.mean( block[nidx,:],axis=0)) ) self.con = np.vstack( (con , np.mean( cont[nidx])) ) self.bins = np.array([0,0,1]) self.counts = np.array([cond.sum(),~cond.sum()]) else: if len(cond) != block.shape[0]: raise IndexError("Length of cond does not match number of rows in framegroup") counts, bins = ast.histogram(cond,bins=method) sorting = np.digitize(cond,bins[:-1]) # :-1 to get the correct number of buckets from digitize binned = np.mean(block[sorting == 1,:],axis=0) con = np.mean( cont[sorting == 1]) for i in np.unique(sorting)[1:]: binned = np.vstack( (binned, np.mean(block[sorting == i,:],axis=0) )) con = np.vstack( (con , np.mean( cont[sorting == i]) )) self.binned = binned self.con = con self.counts = counts self.bins = bins self.sorting = sorting
def __bin_by_quant(self,quant,cuts): if cuts is not None: cuts, = np.where(cuts.reshape(-1)) else: cuts = np.ones(len(quant)).astype(bool) datablock = self.group.frames[0].data[:,self.idx] contblock = self.group.frames[0].cont.val(self.cent) for frm in self.group.frames[1:]: datablock = np.concatenate((datablock,frm.data[:,self.idx]),axis=0) contblock = np.concatenate((contblock,frm.cont.val(self.cent)),axis=0) datablock = datablock[cuts,:] contblock = contblock[cuts] counts, bins = ast.histogram(quant[cuts],bins='blocks') sorting = np.digitize(quant[cuts],bins[:-1]) # :-1 to get the correct number of buckets from digitize binned = np.mean(datablock[sorting == 1,:],axis=0) con = np.zeros(len(counts)); con[0] = np.mean( contblock[sorting == 1] ) for i in np.unique(sorting)[1:]: binned = np.vstack( (binned,np.mean(datablock[sorting == i,:],axis=0) )) con[i-1] = np.mean(contblock[sorting == i]) return binned,con,bins,counts
def pdf(self, bins=10, xrange=None, weights=None, normed=False, **kwargs): """ Computation of the Probability Density function of the normalized increments Wrapper around histogram function from astropy.stats package. Parameters ---------- bins : :obj: `int` or `list` or `str` (optional) If bins is a string, then it must be one of: - 'blocks' : use bayesian blocks for dynamic bin widths - 'knuth' : use Knuth's rule to determine bins - 'scott' : use Scott's rule to determine bins - 'freedman' : use the Freedman-Diaconis rule to determine bins xrange : tuple or None (optional) the minimum and maximum range for the histogram. If not specified, it will be (x.min(), x.max()) weights : array_like, optional Not Implemented **kwargs keyword accepted by self.pdf() and astropy.stats.histogram Returns ------- hist : Float The values of the histogram. See ``normed`` and ``weights`` for a description of the possible semantics. xbin : Float of dtype float Return the bins center. err : Float Error assuming a Poissonian deviation and propagated in a normalized pdf See Also -------- numpy.histogram astropy.stats.histogram """ hist, bins_e = Astats.histogram(self.wtN, bins=bins, range=xrange, weights=weights, density=True, **kwargs) xpdf = (bins_e[1:] + bins_e[:-1]) / 2. if xrange is None: xrange = [self.wtN.min(), self.wtN.max()] err = np.sqrt(hist / (np.count_nonzero( ((self.wtN >= xrange[0]) & (self.wtN <= xrange[1]))) * (bins_e[1] - bins_e[0]))) return hist, xpdf, err
def pdf(self, x, bins=10, range=None, weights=None, log=False, **kwargs): """ Computation of the Probability Density function of the signal Wrapper around histogram function from astropy.stats package. Parameters ---------- x : :obj: `ndarray` Variable for the computation of the probability density function bins : :obj: `int` or `list` or `str` (optional) If bins is a string, then it must be one of: - 'blocks' : use bayesian blocks for dynamic bin widths - 'knuth' : use Knuth's rule to determine bins - 'scott' : use Scott's rule to determine bins - 'freedman' : use the Freedman-Diaconis rule to determine bins range : tuple or None (optional) the minimum and maximum range for the histogram. If not specified, it will be (x.min(), x.max()) log : :obj: `bool` Default is False. If True and used with integer bins compute logarithmically spaced bins weights : array_like, optional Not Implemented other keyword arguments are described in numpy.histogram(). Returns ------- hist : array The values of the histogram. See ``normed`` and ``weights`` for a description of the possible semantics. bin_edges : array of dtype float Return the bin edges ``(length(hist)+1)``. See Also -------- numpy.histogram astropy.stats.histogram """ if log: if range is None: range = [x.min(), x.max()] if not isinstance(bins, int): print('Warning log bins only with nbins as int') print('Assuming 20 bins') nb = 20 else: nb = bins bins = np.logspace(np.log10(range[0]), np.log10(range[1]), nb) hist, bins_e = Astats.histogram(x, bins=bins, range=range, weights=weights, **kwargs) return hist, bins_e
def test_histogram(bin_type, N=1000, rseed=0): rng = np.random.default_rng(rseed) x = rng.standard_normal(N) counts, bins = histogram(x, bin_type) assert (counts.sum() == len(x)) assert (len(counts) == len(bins) - 1)
def match(dataCm): """Performs the Match calculation in Eq. 1 of Breivik & Larson (2018) Parameters ---------- dataCm : list List of two cumulative data sets for a single paramter Returns ------- match : list List of matches for each cumulative data set binwidth : float Binwidth of histograms used for match computation """ # DEFINE A LIST TO HOLD THE BINNED DATA: histo = [[], []] histoBinEdges = [[], []] # COMPUTE THE BINWIDTH FOR THE MOST COMPLETE DATA SET: # NOTE: THIS WILL BE THE BINWIDTH FOR ALL THE HISTOGRAMS IN THE HISTO LIST with warnings.catch_warnings(): warnings.filterwarnings( "ignore", message="divide by zero encountered in double_scalars") try: bw, binEdges = astroStats.knuth_bin_width(np.array(dataCm[0]), return_bins=True) except Exception: bw, binEdges = astroStats.scott_bin_width(np.array(dataCm[0]), return_bins=True) if bw < 1e-4: bw = 1e-4 binEdges = np.arange(binEdges[0], binEdges[-1], bw) # BIN THE DATA: for i in range(2): histo[i], histoBinEdges[i] = astroStats.histogram(dataCm[i], bins=binEdges, density=True) # COMPUTE THE MATCH: nominator = [] denominator1 = [] denominator2 = [] nominatorSum = [] denominator1Sum = [] denominator2Sum = [] histo2 = histo[1] histo1 = histo[0] for j in range(len(histo1)): nominator.append(histo1[j] * histo2[j]) denominator1.append((histo1[j] * histo1[j])) denominator2.append((histo2[j] * histo2[j])) nominatorSum.append(np.sum(nominator)) denominator1Sum.append(np.sum(denominator1)) denominator2Sum.append(np.sum(denominator2)) nominatorSum = np.array(nominatorSum) denominator1Sum = np.array(denominator1Sum) denominator2Sum = np.array(denominator2Sum) binwidth = binEdges[1] - binEdges[0] if binwidth < 1e-7: match = 1e-9 else: match = np.log10(1 - nominatorSum / np.sqrt(denominator1Sum * denominator2Sum)) return match[0], binwidth
#!/usr/bin/env python # -*- coding: utf-8 -*- ''' ''' import numpy as np import matplotlib.pyplot as plt from astropy.stats import histogram mu, sigma = 100, 15 data = mu + sigma * np.random.randn(5000) #weights = data / data.max() hist, bin_edges = histogram(data, bins='blocks') print 'Bin widths:', bin_edges[1:] - bin_edges[:-1] bin_centers = (bin_edges[:-1] + bin_edges[1:]) / 2 plt.plot(bin_centers, hist,'.') plt.show() #if __name__ == '__main__':
#!/usr/bin/env python # -*- coding: utf-8 -*- ''' ''' import numpy as np import matplotlib.pyplot as plt from astropy.stats import histogram mu, sigma = 100, 15 data = mu + sigma * np.random.randn(5000) #weights = data / data.max() hist, bin_edges = histogram(data, bins='blocks') print 'Bin widths:', bin_edges[1:] - bin_edges[:-1] bin_centers = (bin_edges[:-1] + bin_edges[1:]) / 2 plt.plot(bin_centers, hist, '.') plt.show() #if __name__ == '__main__':
def match(dataCm, nRuns): """Performs the Match calculation in Eq. 1 of Breivik & Larson (2018) Parameters ---------- dataCm : list List of cumulative data for a single paramter nRuns : int Length of the list Returns ------- match : list List of matches for each cumulative data set binwidth : float Binwidth of histograms used for match computation """ # DEFINE A LIST TO HOLD THE BINNED DATA: histo = [[] for i in range(nRuns)] histoBinEdges = [[] for i in range(int(nRuns))] # COMPUTE THE BINWIDTH FOR THE MOST COMPLETE DATA SET: # NOTE: THIS WILL BE THE BINWIDTH FOR ALL THE HISTOGRAMS IN THE HISTO LIST mainHisto, binEdges = astroStats.histogram(np.array(dataCm[len(dataCm) - 1]), bins='scott') # BIN THE DATA: for i in range(nRuns): histo[i], histoBinEdges[i] = astroStats.histogram(dataCm[i], bins=binEdges, density=True) # COMPUTE THE MATCH: nominator = [] denominator1 = [] denominator2 = [] nominatorSum = [] denominator1Sum = [] denominator2Sum = [] match = np.zeros(nRuns - 1) for i in range(1, nRuns): histo2 = histo[i] histo1 = histo[i - 1] for j in range(len(histo1)): nominator.append(histo1[j] * histo2[j]) denominator1.append((histo1[j] * histo1[j])) denominator2.append((histo2[j] * histo2[j])) nominatorSum.append(np.sum(nominator)) denominator1Sum.append(np.sum(denominator1)) denominator2Sum.append(np.sum(denominator2)) nominatorSum = np.array(nominatorSum, dtype=np.float128) denominator1Sum = np.array(denominator1Sum, dtype=np.float128) denominator2Sum = np.array(denominator2Sum, dtype=np.float128) for i in range(nRuns - 1): if binEdges[1] - binEdges[0] < 1e-7: match[i] = 1.0 else: match[i] = (nominatorSum[i] / np.sqrt(denominator1Sum[i] * denominator2Sum[i])) return match, binEdges[1] - binEdges[0]