def test_dd_zero_dedges(self): x = np.random.random((10000, 3)) v = np.random.random((10000)) bins = np.linspace(0, 1, 10) bins = np.append(bins, 1) bins = (bins, bins, bins) with assert_raises(ValueError, match='difference is numerically 0'): binned_statistic_dd(x, v, 'mean', bins=bins)
def test_dd_median(self): X = self.X v = self.v stat1, edges1, bc = binned_statistic_dd(X, v, "median", bins=3) stat2, edges2, bc = binned_statistic_dd(X, v, np.median, bins=3) assert_allclose(stat1, stat2) assert_allclose(edges1, edges2)
def test_dd_max(self): X = self.X v = self.v stat1, edges1, bc = binned_statistic_dd(X, v, 'max', bins=3) stat2, edges2, bc = binned_statistic_dd(X, v, np.max, bins=3) assert_allclose(stat1, stat2) assert_allclose(edges1, edges2)
def test_dd_median(self): X = self.X v = self.v stat1, edges1, bc = binned_statistic_dd(X, v, 'median', bins=3) stat2, edges2, bc = binned_statistic_dd(X, v, np.median, bins=3) assert_array_almost_equal(stat1, stat2) assert_array_almost_equal(edges1, edges2)
def test_dd_std(self): X = self.X v = self.v stat1, edges1, bc = binned_statistic_dd(X, v, "std", bins=3) stat2, edges2, bc = binned_statistic_dd(X, v, np.std, bins=3) assert_allclose(stat1, stat2) assert_allclose(edges1, edges2)
def test_dd_multi_values(self): X = self.X v = self.v w = self.w stat1v, edges1v, bc1v = binned_statistic_dd(X, v, np.std, bins=8) stat1w, edges1w, bc1w = binned_statistic_dd(X, w, np.std, bins=8) stat2, edges2, bc2 = binned_statistic_dd(X, [v, w], np.std, bins=8) assert_allclose(stat2[0], stat1v) assert_allclose(stat2[1], stat1w) assert_allclose(edges1v, edges2) assert_allclose(edges1w, edges2) assert_allclose(bc1v, bc2)
def test_dd_binned_statistic_result(self): # NOTE: tests the reuse of bin_edges from previous call x = np.random.random((10000, 3)) v = np.random.random((10000)) bins = np.linspace(0, 1, 10) bins = (bins, bins, bins) result = binned_statistic_dd(x, v, 'mean', bins=bins) stat = result.statistic result = binned_statistic_dd(x, v, 'mean', binned_statistic_result=result) stat2 = result.statistic assert_allclose(stat, stat2)
def test_dd_multi_values(self): X = self.X v = self.v w = self.w for stat in ["count", "sum", "mean", "std", "min", "max", "median", np.std]: stat1v, edges1v, bc1v = binned_statistic_dd(X, v, stat, bins=8) stat1w, edges1w, bc1w = binned_statistic_dd(X, w, stat, bins=8) stat2, edges2, bc2 = binned_statistic_dd(X, [v, w], stat, bins=8) assert_allclose(stat2[0], stat1v) assert_allclose(stat2[1], stat1w) assert_allclose(edges1v, edges2) assert_allclose(edges1w, edges2) assert_allclose(bc1v, bc2)
def __init__(self, ufed, dataframe, bins, min_count=1, adjust_centers=False): self._ufed = ufed try: self._bins = [bin for bin in bins] except TypeError: self._bins = [bins] * len(ufed.variables) sample = [dataframe[v.id] for v in ufed.variables] forces = self._compute_forces(ufed, dataframe) ranges = [(v.min_value, v.max_value) for v in ufed.variables] counts = stats.binned_statistic_dd(sample, [], statistic='count', bins=self._bins, range=ranges) index = np.where(counts.statistic.flatten() >= min_count) n = len(ufed.variables) if adjust_centers: means = stats.binned_statistic_dd(sample, sample + forces, bins=self._bins, range=ranges) self.centers = [ means.statistic[i].flatten()[index] for i in range(n) ] self.mean_forces = [ means.statistic[n + i].flatten()[index] for i in range(n) ] else: means = stats.binned_statistic_dd(sample, forces, bins=self._bins, range=ranges) bin_centers = [ 0.5 * (edges[1:] + edges[:-1]) for edges in counts.bin_edges ] center_points = np.stack( [np.array(point) for point in itertools.product(*bin_centers)]) self.centers = [center_points[:, i][index] for i in range(n)] self.mean_forces = [ statistic.flatten()[index] for statistic in means.statistic ]
def test_dd_result_attributes(self): X = self.X v = self.v res = binned_statistic_dd(X, v, 'count', bins=3) attributes = ('statistic', 'bin_edges', 'binnumber') check_named_results(res, attributes)
def _hist_with_flim( self, data: List[np.ndarray], edges: List[np.ndarray], chan: int ) -> Tuple[np.ndarray, Tuple[np.ndarray]]: """Run a slightly more complex processing pipeline when we need to calculate the lifetime of each pixel in the image. We use the scipy.binned_statistic function to histogram the photons again, but we use their lifetime as an input for the histogram, and calculate it for each bin. Parameters ---------- data : list of np.ndarray Photon arrival times in each of the dimensions edges : list of np.ndarray Histogram edges for each dimension. chan : int Channel number Returns ------- hist_with_flim : np.ndarray N-dimensional histogram, where N = len(data) """ resulting_tau, edges, binnumber = binned_statistic_dd( sample=data, values=self.df_dict[chan]["time_rel_pulse"].to_numpy(), statistic=calc_lifetime, bins=edges, ) # hist = HistWithIndex(data, edges) # hist.run() # valid_photons = hist.discard_out_of_bounds_photons() bloater = np.ones((self.flim_downsampling, self.flim_downsampling), dtype=np.uint8) return np.kron(resulting_tau, bloater)
def cond_mean(Xt,data,nbins): cond_mean, _ , _ = binned_statistic_dd(Xt,data,bins=nbins,expand_binnumbers=True) cond_mean[np.isnan(cond_mean)]=0 cond_mean = np.transpose(cond_mean) return cond_mean
def test_dd_result_attributes(self): X = self.X v = self.v res = binned_statistic_dd(X, v, "count", bins=3) attributes = ("statistic", "bin_edges", "binnumber") check_named_results(res, attributes)
def multi_dim_hist(data, nBins): ''' INPUTS: ------- - data: An [M x D] or [N x D] array of Numpy data. - nBins: (Integer), size of bins in Each dimension of the vector/data. OUTPUT: - 1D array: Normalized Vector, of the flattened histogram. STEPS: ------ - Compute a multi-dimensional Histogram, using 'scipy.stats.binned_statistic_dd()' in Python - Flatten the Histogram to a 1D vector, of size/length: N^d i.e Number-of-bins raised to power Number-of-dimensions. - Replace all zero-values of the bins with lowest non-zero value - Finally, Normalize the histogram to the range [0, 1] AUTHOR: ------- Ekpo Otu([email protected]) ''' hist = stats.binned_statistic_dd(data, values=False, statistic='count', bins=nBins)[0] hf = hist.flatten() hn = norm1D(hf) lowestNoneZero_min = min(i for i in hn if i > 0) lower = 0.98 * lowestNoneZero_min finalVector = np.where(hn < lowestNoneZero_min, lower, hn) return finalVector # ------------------------------------------------------------------------------------------------- #
def bin_flags(x_idx, y_idx, flags, x_label, y_label): bins = tuple([ N_grid.get(label, N_grid_default) for label in (x_label, y_label) ]) return binned_statistic_dd(grid[:, [x_idx, y_idx]], flags, statistic="sum", bins=bins)
def binning_positions_only(Rg_vec, phig_vec, Zg_vec, R_edges, phi_edges, Z_edges): counts_grid = stats.binned_statistic_dd([Rg_vec,phig_vec,Zg_vec], Rg_vec, #dummy array for count statistic='count', bins=[R_edges, phi_edges, Z_edges])[0] counts_pois_grid = np.sqrt(counts_grid) return np.array([counts_grid]), np.array([counts_pois_grid])
def setup(self, statistic): rng = np.random.default_rng(12345678) self.inp = rng.random(9999).reshape(3, 3333) * 200 self.subbin_x_edges = np.arange(0, 200, dtype=np.float32) self.subbin_y_edges = np.arange(0, 200, dtype=np.float64) self.ret = stats.binned_statistic_dd( [self.inp[0], self.inp[1]], self.inp[2], statistic=statistic, bins=[self.subbin_x_edges, self.subbin_y_edges])
def _bin_and_transform(self, x, y, bins, centers): bin_means, _, _ = binned_statistic_dd(x.as_matrix(), y.as_matrix(), bins=bins) shape = bin_means.shape x_new = [] for i in range(len(shape)): x_new.append(self._transform_x(i, centers, shape)) x = pd.DataFrame({c: col for c, col in zip(x.columns, x_new)}, columns=x.columns) return pd.DataFrame({'overlap': bin_means.reshape(-1)}), x
def test_dd_count(self): X = self.X v = self.v count1, edges1, bc = binned_statistic_dd(X, v, 'count', bins=3) count2, edges2 = np.histogramdd(X, bins=3) assert_array_almost_equal(count1, count2) assert_array_almost_equal(edges1, edges2)
def opt_est(Xt,data,nbins): cond_mean, _ , bins = binned_statistic_dd(Xt,data,bins=nbins,expand_binnumbers=True) cond_mean[np.isnan(cond_mean)]=0 bins = bins-1 bins[bins==nbins]=nbins-1 pred = np.zeros(Xt.shape[0]) pred=cond_mean[bins[0,:],bins[1,:],bins[2,:],bins[3,:],bins[4,:]] return pred
def test_dd_count(self): X = self.X v = self.v count1, edges1, bc = binned_statistic_dd(X, v, "count", bins=3) count2, edges2 = np.histogramdd(X, bins=3) assert_allclose(count1, count2) assert_allclose(edges1, edges2)
def test_dd_sum(self): X = self.X v = self.v sum1, edges1, bc = binned_statistic_dd(X, v, "sum", bins=3) sum2, edges2 = np.histogramdd(X, bins=3, weights=v) assert_allclose(sum1, sum2) assert_allclose(edges1, edges2)
def test_dd_sum(self): X = self.X v = self.v sum1, edges1, bc = binned_statistic_dd(X, v, 'sum', bins=3) sum2, edges2 = np.histogramdd(X, bins=3, weights=v) assert_array_almost_equal(sum1, sum2) assert_array_almost_equal(edges1, edges2)
def get_density(self,boundary,divs): divs2 = divs+1-divs%2 lim = np.linspace(-boundary,boundary,divs2+1) lims = [] #boundaries for the bins for i in range((self.x).shape[1]): #in each direction, the boundaries are the same lims.append(lim) #This gives the mass present in each box H,edges, numb=stats.binned_statistic_dd(self.x,self.m,statistic='sum',bins=lims) density = H /( (2*boundary/divs2)**2 ) #mass divided by the area of a cell return density, edges
def setup(self): np.random.seed(12345678) self.inp = np.random.rand(9999).reshape(3, 3333) * 200 self.subbin_x_edges = np.arange(0, 200, dtype=np.float32) self.subbin_y_edges = np.arange(0, 200, dtype=np.float64) self.ret = stats.binned_statistic_dd( [self.inp[0], self.inp[1]], self.inp[2], statistic="std", bins=[self.subbin_x_edges, self.subbin_y_edges])
def predict(self, X, stochastic=False): if not isinstance(X, list): X = [X] # append the current state X to the feature history self.feat_eng.append_feat(X) # get the feature history defined by the specified number of time lags. # Here, feat is an array with the same size as the neural network input layer feat = self.feat_eng.get_feat_history() feat = feat.reshape([1, self.n_feats]) # find in which bins the c_i samples fall _, _, binnumbers_i = stats.binned_statistic_dd(feat, np.zeros(self.N), bins=self.bins) # static correction for outliers, using precomputed mapping array binnumbers_i = self.mapping[binnumbers_i] # the neighbors in the selected bin, on the 'C manifold' neighbors = self.feats[self.sample_idx_per_bin[binnumbers_i[0]]] # the distance from the current point to all neighbors dists = np.linalg.norm(neighbors - feat, axis=1) # if the current bin does not contain enough points to form a simplex, # adjust K. if dists.size < self.n_feats + 1: K = dists.size # adjusted K else: K = self.n_feats + 1 # simplex K (manifold dimension + 1) # sort the distances + select the K nearest neighbors idx = np.argsort(dists)[0:K] simplex_idx = self.sample_idx_per_bin[binnumbers_i[0]][idx] if not stochastic: # compute the simplex weights w_i d_i = dists[idx] if d_i[0] == 0: u_i = np.zeros(K) u_i[0] = 1.0 else: u_i = np.exp(-d_i / d_i[0]) w_i = u_i / np.sum(u_i) # prediction is the weighted sum of correspoding samples # on the showdow manifold shadow_sample = np.sum(self.target[simplex_idx, -1] * w_i) return shadow_sample else: shadow_sample = self.sample_simplex(self.target[simplex_idx])[0] return shadow_sample[-1]
def test_dd_bincode(self): X = self.X[:20] v = self.v[:20] count1, edges1, bc = binned_statistic_dd(X, v, "count", bins=3) bc2 = np.array([63, 33, 86, 83, 88, 67, 57, 33, 42, 41, 82, 83, 92, 32, 36, 91, 43, 87, 81, 81]) bcount = [(bc == i).sum() for i in np.unique(bc)] assert_allclose(bc, bc2) count1adj = count1[count1.nonzero()] assert_allclose(bcount, count1adj)
def __init__(self, ufed, dataframe, bins): self._ufed = ufed try: self._bins = [bin for bin in bins] except TypeError: self._bins = [bins] * len(ufed.variables) sample = [] forces = [] ranges = [] for cv in ufed.variables: def function(dx): if cv.periodic: return cv.force_constant * ( dx - cv._range * np.rint(dx / cv._range)) else: return cv.force_constant * dx sample.append(dataframe[f's_{cv.id}']) forces.append(function(dataframe[cv.id] - dataframe[f's_{cv.id}'])) ranges.append((cv.min_value, cv.max_value)) counts = stats.binned_statistic_dd(sample, [], statistic='count', bins=self._bins, range=ranges) means = stats.binned_statistic_dd(sample, sample + forces, bins=self._bins, range=ranges) histogram = counts.statistic.flatten() index = np.where(histogram > 0) self.histogram = histogram[index] n = len(ufed.variables) self.centers = [means.statistic[i].flatten()[index] for i in range(n)] self.mean_forces = [ means.statistic[n + i].flatten()[index] for i in range(n) ]
def test_dd_binnumbers_unraveled(self): X = self.X v = self.v stat, edgesx, bcx = binned_statistic(X[:, 0], v, "mean", bins=10) stat, edgesy, bcy = binned_statistic(X[:, 1], v, "mean", bins=10) stat, edgesz, bcz = binned_statistic(X[:, 2], v, "mean", bins=10) stat2, edges2, bc2 = binned_statistic_dd(X, v, "mean", bins=10, expand_binnumbers=True) assert_allclose(bcx, bc2[0]) assert_allclose(bcy, bc2[1]) assert_allclose(bcz, bc2[2])
def test_dd_bincode(self): X = self.X[:20] v = self.v[:20] count1, edges1, bc = binned_statistic_dd(X, v, 'count', bins=3) bc2 = np.array([63, 33, 86, 83, 88, 67, 57, 33, 42, 41, 82, 83, 92, 32, 36, 91, 43, 87, 81, 81]) bcount = [(bc == i).sum() for i in np.unique(bc)] assert_array_almost_equal(bc, bc2) count1adj = count1[count1.nonzero()] assert_array_almost_equal(bcount, count1adj)
def test_dd_binnumbers_unraveled(self): X = self.X v = self.v stat, edgesx, bcx = binned_statistic(X[:, 0], v, 'mean', bins=15) stat, edgesy, bcy = binned_statistic(X[:, 1], v, 'mean', bins=20) stat, edgesz, bcz = binned_statistic(X[:, 2], v, 'mean', bins=10) stat2, edges2, bc2 = binned_statistic_dd( X, v, 'mean', bins=(15, 20, 10), expand_binnumbers=True) assert_allclose(bcx, bc2[0]) assert_allclose(bcy, bc2[1]) assert_allclose(bcz, bc2[2])
def test_gh14332(self): # Test the wrong output when the `sample` is close to bin edge x = [] size = 20 for i in range(size): x += [1 - 0.1**i] bins = np.linspace(0, 1, 11) sum1, edges1, bc = binned_statistic_dd(x, np.ones(len(x)), bins=[bins], statistic='sum') sum2, edges2 = np.histogram(x, bins=bins) assert_allclose(sum1, sum2) assert_allclose(edges1[0], edges2)
def digitize_tree(self, tree, leaf, statistic): # ATTENTION : NU IS NEGATIVE IN ANTOINE's TREES -----------------------------------------------------------------| # v x = [[getattr(el, beam_charge) for el in tree], [getattr(el, t) for el in tree], [getattr(el, Q2) for el in tree], [-getattr(el, nu) for el in tree]] val = [getattr(el, leaf) for el in tree] arr, _, _ = binned_statistic_dd( x, val, statistic=statistic, bins=[self.charge, self.t, self.qsq, self.nu]) chargeZip = zip((-1, 1), arr) charge_dict = dict(chargeZip) return charge_dict
def episode(self, param): # Ensure param values fall in bounds for v in param: if (v < 0.0) or (v > 1.0): print('param is out of bounds') exit(1) p = param[0:self.nb_dims] # discard potential useless dimensions self.params.append(p) # 1 - Find in which hypercube the parameter vector falls arr_p = np.array([p]) cubes = sp.binned_statistic_dd(arr_p, np.ones(arr_p.shape), 'count', bins=self.bnds).statistic cube_idx = tuple([v[0] for v in cubes[0].nonzero()]) # 2 - Check if hypercube is "unlocked" by checking if a previous adjacent neighbor is unlocked if all( v == 0 for v in cube_idx ): # If initial cube, no need to have unlocked neighbors to learn self.cube_competence[cube_idx] = min( self.cube_competence[cube_idx] + 1, self.max_per_cube) else: # Find index of previous adjacent neighboring hypercubes prev_cube_idx = [[idx, max(0, idx - 1)] for idx in cube_idx] previous_neighbors_idx = np.array( np.meshgrid(*prev_cube_idx)).T.reshape(-1, len(prev_cube_idx)) for pn_idx in previous_neighbors_idx: prev_idx = tuple(pn_idx) if all(v == cube_idx[i] for i, v in enumerate(prev_idx) ): # Original hypercube, not previous neighbor continue else: if self.cube_competence[prev_idx] >= ( 3 * (self.max_per_cube / 4)): # Previous neighbor with high comp self.cube_competence[cube_idx] = min( self.cube_competence[cube_idx] + 1, self.max_per_cube) break normalized_competence = np.interp(self.cube_competence[cube_idx], (0, self.max_per_cube), (0, 1)) # if self.noise >= 0.0: # normalized_competence = np.clip(normalized_competence + np.random.normal(0,self.noise), 0, 1) return normalized_competence
def test_dd_range_errors(self): # Test that descriptive exceptions are raised as appropriate for bad # values of the `range` argument. (See gh-12996) with assert_raises(ValueError, match='In range, start must be <= stop'): binned_statistic_dd([self.y], self.v, range=[[1, 0]]) with assert_raises( ValueError, match='In dimension 1 of range, start must be <= stop'): binned_statistic_dd([self.x, self.y], self.v, range=[[1, 0], [0, 1]]) with assert_raises( ValueError, match='In dimension 2 of range, start must be <= stop'): binned_statistic_dd([self.x, self.y], self.v, range=[[0, 1], [1, 0]]) with assert_raises(ValueError, match='range given for 1 dimensions; 2 required'): binned_statistic_dd([self.x, self.y], self.v, range=[[0, 1]])
def bin_point_data_3d(points, grid, cellsize, stat='count', bins=None, geoIm=True, mask=None): """ Bin three-dimensional point data with coordinate information to grid cells points: an array of coordinates of points grid: a tuple or a list of coordinates of grid cell center bins: a tuple or a list of the bin edges in each dimension. Either grid or bins must be specified. geoIm: If true, the returned array will be arranged by geo-image index for the first 2D space """ grd_x, grd_y, grd_t = grid if bins is None: # Since grd_x grd_y correspond to the center of each grid cell, we add half cell size to each side of the grid coordinate to # get the edge values xedges = np.r_[grd_x - cellsize[0] / 2, grd_x[-1] + cellsize[0] / 2] yedges = np.r_[grd_y - cellsize[1] / 2, grd_y[-1] + cellsize[1] / 2] tedges = np.r_[grd_t, grd_t[-1] + cellsize[2]] bins = (xedges, yedges, tedges) Bin3d_stat, _, _ = binned_statistic_dd(sample=points, values=None, statistic=stat, bins=bins) if geoIm == True: for i in Bin3d_stat.shape[2]: Bin3d_stat[:, :, i] = array_to_geoIm(Bin3d_stat[:, :, i]) if mask is not None: for i in Bin3d_stat.shape[2]: Bin3d_stat[:, :, i][mask == False] = np.nan return Bin3d_stat
def neighbour_func(self,S, w_i, nb_func = np.mean, nbin = 5, how = 'evenly'): ''' compute a function of w_i for each point in S within a binned neighbourhood wrapper for 'scipy.stats.binned_statistic_dd' function Args: S (N, dim_s): the spatial variable w_i (N,): the soft cluster assignment of each neuron nb_func (callable; 1d --> scalars), default is np.mean: the function to compute nbin (int), default 5: number of spatial bins in each dimension how ({'evenly', 'quantiles'}), default 'evenly': make the bin edges evenly spaced or spaced as quantiles ''' dim_s = S.shape[1] if how == 'evenly': bins = tuple([np.linspace(S[:,d].min(),S[:,d].max(),nbin+1) \ for d in range(dim_s)]) elif how == 'quantiles': bins = tuple([np.quantile(S[:,d],np.arange(nbin+1)/nbin) \ for d in range(dim_s)]) stat ,_, which_nb = sts.binned_statistic_dd( S, w_i, statistic = nb_func, bins = bins) if dim_s != 1: which_nb -= (nbin+3) # need to correct for silly indexing for b in range(nbin): deez = np.isin(which_nb,[range(b*(nbin+2),(b+1)*(nbin+2))]) which_nb[deez] -= b*2 else: which_nb -= 1 pi_i = stat.flatten()[which_nb] return pi_i, which_nb
def main(cl_max_mag, lkl_method, bin_method, lkl_weight): ''' Prepare observed cluster array here to save time before the algorithm to find the best synthetic cluster fit is used. ''' mags_cols_cl, memb_probs = dataProcess(cl_max_mag) if lkl_method == 'tolstoy': # Square errors here to not repeat the same calculations each time a # new synthetic cluster is matched. e_mags_cols = [] for e_m in zip(*zip(*cl_max_mag)[1:][3]): e_mags_cols.append(np.square(e_m)) for e_c in zip(*zip(*cl_max_mag)[1:][5]): e_mags_cols.append(np.square(e_c)) # Store and pass to use in likelihood function. The 'obs_st' list is # made up of: # obs_st = [star_1, star_2, ...] # star_i = [phot_1, phot_2, phot_3, ...] # phot_j = [phot_val, error] # Where 'phot_j' is a photometric dimension (magnitude or color), and # 'phot_val', 'error' the associated value and error for 'star_i'. obs_st = [] mags_cols = mags_cols_cl[0] + mags_cols_cl[1] for st_phot, st_e_phot in zip(zip(*mags_cols), zip(*e_mags_cols)): obs_st.append(zip(*[st_phot, st_e_phot])) obs_clust = [obs_st, memb_probs] elif lkl_method == 'duong': # Define variables to communicate with package 'R'. import rpy2.robjects as robjects from rpy2.robjects.packages import importr ks = importr('ks') kde_test = ks.kde_test hpi_kfe = ks.Hpi_kfe # CMD for cluster region. mags_cols = mags_cols_cl[0] + mags_cols_cl[1] matrix_cl = np.ravel(np.column_stack((mags_cols))) # matrix_cl = [] # for st in obs_st: # matrix_cl.append(st[0][0]) # matrix_cl.append(st[1][0]) rows_cl = int(len(matrix_cl) / 2) # Create matrices for these CMDs. m_cl = robjects.r.matrix(robjects.FloatVector(matrix_cl), nrow=rows_cl, byrow=True) # Bandwidth matrices. hpic = hpi_kfe(x=m_cl, binned=True) obs_clust = [kde_test, hpi_kfe, m_cl, hpic] elif lkl_method in ['dolphin', 'mighell']: # Obtain bin edges for each dimension, defining a grid. bin_edges = bin_edges_f(bin_method, mags_cols_cl) # Put all magnitudes and colors into a single list. obs_mags_cols = mags_cols_cl[0] + mags_cols_cl[1] # Obtain histogram for observed cluster. cl_histo = np.histogramdd(obs_mags_cols, bins=bin_edges)[0] w_stat = {'mean': np.mean, 'max': np.max, 'median': np.median} # Weights that will be applied to each bin. bin_w = np.nan_to_num(binned_statistic_dd( obs_mags_cols, memb_probs, statistic=w_stat[lkl_weight], bins=bin_edges)[0]) # Flatten N-dimensional histograms. cl_histo_f = np.array(cl_histo).ravel() bin_weight_f = np.array(bin_w).ravel() # Index of bins where n_i = 0 (no observed stars). Used by the # 'Dolphin' and 'Mighell' likelihoods. cl_z_idx = [cl_histo_f != 0] # Remove all bins where n_i = 0 (no observed stars). Used by the # 'Dolphin' likelihood. cl_histo_f_z = cl_histo_f[cl_z_idx] bin_weight_f_z = bin_weight_f[cl_z_idx] # (Weighted) Dolphin n_i dependent constant. # n_i constant: 2 * [sum(n_i * ln(n_i)) - N] = # 2 * [sum(n_i * ln(n_i)) - sum(n_i)] = # 2 * sum(n_i * ln(n_i) - n_i) = # 2 * sum(n_i * (ln(n_i) - 1)) = # Weighted: 2 * sum(w_i * n_i * (ln(n_i) - 1)) dolphin_cst = 2. * np.sum( bin_weight_f_z * cl_histo_f_z * (np.log(cl_histo_f_z) - 1.)) obs_clust = [bin_edges, cl_histo, cl_histo_f, cl_z_idx, cl_histo_f_z, dolphin_cst, bin_weight_f_z] return obs_clust
b = np.linspace(-2, 2, 11) binstats(x, y, 10, np.mean) binstats(x, y, b, np.mean) binstats(x, y, b, np.mean, nmin=100) binstats(x, [y, z], 10, lambda x, y: np.mean(x + y)) binstats(x, [y, z], 10, lambda x, y: [np.mean(x), np.std(y)]) binstats([x, y], z, (10, 10), np.mean) binstats([x, y], z, [b, b], np.mean) binstats([x, y], [z, z], 10, lambda x, y: [np.mean(x), np.std(y)]) b1 = np.linspace(-2, 2, 11) b2 = np.linspace(-2, 2, 21) binstats([x, y], [z, z], [b1, b2], lambda x, y: [np.mean(x), np.std(y)]) from scipy.stats import binned_statistic_dd s1 = binned_statistic_dd(x, x, 'std', bins=[b])[0] s2 = binstats(x, x, bins=b, func=np.std)[0] # print(s1, s2) assert np.allclose(s1, s2) s1 = binned_statistic_dd([x, y], z, 'sum', bins=[b, b])[0] s2 = binstats([x, y], z, bins=[b, b], func=np.sum)[0] # print(s1, s2) assert np.allclose(s1, s2) a = quantile(np.arange(10), q=[0.1, 0.5, 0.85]) assert np.allclose(a, [0.5, 4.5, 8.]) a = np.arange(12).reshape(3, 4) b = quantile(a, q=0.5, axis=0) c = quantile(a, q=0.5, axis=1) assert np.allclose(b, [4., 5., 6., 7.])