Esempio n. 1
0
 def test_dd_zero_dedges(self):
     x = np.random.random((10000, 3))
     v = np.random.random((10000))
     bins = np.linspace(0, 1, 10)
     bins = np.append(bins, 1)
     bins = (bins, bins, bins)
     with assert_raises(ValueError, match='difference is numerically 0'):
         binned_statistic_dd(x, v, 'mean', bins=bins)
    def test_dd_median(self):
        X = self.X
        v = self.v

        stat1, edges1, bc = binned_statistic_dd(X, v, "median", bins=3)
        stat2, edges2, bc = binned_statistic_dd(X, v, np.median, bins=3)

        assert_allclose(stat1, stat2)
        assert_allclose(edges1, edges2)
Esempio n. 3
0
    def test_dd_max(self):
        X = self.X
        v = self.v

        stat1, edges1, bc = binned_statistic_dd(X, v, 'max', bins=3)
        stat2, edges2, bc = binned_statistic_dd(X, v, np.max, bins=3)

        assert_allclose(stat1, stat2)
        assert_allclose(edges1, edges2)
Esempio n. 4
0
    def test_dd_max(self):
        X = self.X
        v = self.v

        stat1, edges1, bc = binned_statistic_dd(X, v, 'max', bins=3)
        stat2, edges2, bc = binned_statistic_dd(X, v, np.max, bins=3)

        assert_allclose(stat1, stat2)
        assert_allclose(edges1, edges2)
Esempio n. 5
0
    def test_dd_median(self):
        X = self.X
        v = self.v

        stat1, edges1, bc = binned_statistic_dd(X, v, 'median', bins=3)
        stat2, edges2, bc = binned_statistic_dd(X, v, np.median, bins=3)

        assert_array_almost_equal(stat1, stat2)
        assert_array_almost_equal(edges1, edges2)
    def test_dd_median(self):
        X = self.X
        v = self.v

        stat1, edges1, bc = binned_statistic_dd(X, v, 'median', bins=3)
        stat2, edges2, bc = binned_statistic_dd(X, v, np.median, bins=3)

        assert_array_almost_equal(stat1, stat2)
        assert_array_almost_equal(edges1, edges2)
    def test_dd_std(self):
        X = self.X
        v = self.v

        stat1, edges1, bc = binned_statistic_dd(X, v, "std", bins=3)
        stat2, edges2, bc = binned_statistic_dd(X, v, np.std, bins=3)

        assert_allclose(stat1, stat2)
        assert_allclose(edges1, edges2)
Esempio n. 8
0
    def test_dd_multi_values(self):
        X = self.X
        v = self.v
        w = self.w

        stat1v, edges1v, bc1v = binned_statistic_dd(X, v, np.std, bins=8)
        stat1w, edges1w, bc1w = binned_statistic_dd(X, w, np.std, bins=8)
        stat2, edges2, bc2 = binned_statistic_dd(X, [v, w], np.std, bins=8)

        assert_allclose(stat2[0], stat1v)
        assert_allclose(stat2[1], stat1w)
        assert_allclose(edges1v, edges2)
        assert_allclose(edges1w, edges2)
        assert_allclose(bc1v, bc2)
Esempio n. 9
0
    def test_dd_multi_values(self):
        X = self.X
        v = self.v
        w = self.w

        stat1v, edges1v, bc1v = binned_statistic_dd(X, v, np.std, bins=8)
        stat1w, edges1w, bc1w = binned_statistic_dd(X, w, np.std, bins=8)
        stat2, edges2, bc2 = binned_statistic_dd(X, [v, w], np.std, bins=8)

        assert_allclose(stat2[0], stat1v)
        assert_allclose(stat2[1], stat1w)
        assert_allclose(edges1v, edges2)
        assert_allclose(edges1w, edges2)
        assert_allclose(bc1v, bc2)
Esempio n. 10
0
    def test_dd_binned_statistic_result(self):
        # NOTE: tests the reuse of bin_edges from previous call
        x = np.random.random((10000, 3))
        v = np.random.random((10000))
        bins = np.linspace(0, 1, 10)
        bins = (bins, bins, bins)

        result = binned_statistic_dd(x, v, 'mean', bins=bins)
        stat = result.statistic

        result = binned_statistic_dd(x, v, 'mean',
                                     binned_statistic_result=result)
        stat2 = result.statistic

        assert_allclose(stat, stat2)
Esempio n. 11
0
    def test_dd_multi_values(self):
        X = self.X
        v = self.v
        w = self.w

        for stat in ["count", "sum", "mean", "std", "min", "max", "median",
                     np.std]:
            stat1v, edges1v, bc1v = binned_statistic_dd(X, v, stat, bins=8)
            stat1w, edges1w, bc1w = binned_statistic_dd(X, w, stat, bins=8)
            stat2, edges2, bc2 = binned_statistic_dd(X, [v, w], stat, bins=8)
            assert_allclose(stat2[0], stat1v)
            assert_allclose(stat2[1], stat1w)
            assert_allclose(edges1v, edges2)
            assert_allclose(edges1w, edges2)
            assert_allclose(bc1v, bc2)
Esempio n. 12
0
    def __init__(self,
                 ufed,
                 dataframe,
                 bins,
                 min_count=1,
                 adjust_centers=False):
        self._ufed = ufed
        try:
            self._bins = [bin for bin in bins]
        except TypeError:
            self._bins = [bins] * len(ufed.variables)

        sample = [dataframe[v.id] for v in ufed.variables]
        forces = self._compute_forces(ufed, dataframe)
        ranges = [(v.min_value, v.max_value) for v in ufed.variables]

        counts = stats.binned_statistic_dd(sample, [],
                                           statistic='count',
                                           bins=self._bins,
                                           range=ranges)
        index = np.where(counts.statistic.flatten() >= min_count)

        n = len(ufed.variables)
        if adjust_centers:
            means = stats.binned_statistic_dd(sample,
                                              sample + forces,
                                              bins=self._bins,
                                              range=ranges)
            self.centers = [
                means.statistic[i].flatten()[index] for i in range(n)
            ]
            self.mean_forces = [
                means.statistic[n + i].flatten()[index] for i in range(n)
            ]
        else:
            means = stats.binned_statistic_dd(sample,
                                              forces,
                                              bins=self._bins,
                                              range=ranges)
            bin_centers = [
                0.5 * (edges[1:] + edges[:-1]) for edges in counts.bin_edges
            ]
            center_points = np.stack(
                [np.array(point) for point in itertools.product(*bin_centers)])
            self.centers = [center_points[:, i][index] for i in range(n)]
            self.mean_forces = [
                statistic.flatten()[index] for statistic in means.statistic
            ]
Esempio n. 13
0
    def test_dd_result_attributes(self):
        X = self.X
        v = self.v

        res = binned_statistic_dd(X, v, 'count', bins=3)
        attributes = ('statistic', 'bin_edges', 'binnumber')
        check_named_results(res, attributes)
Esempio n. 14
0
    def _hist_with_flim(
        self, data: List[np.ndarray], edges: List[np.ndarray], chan: int
    ) -> Tuple[np.ndarray, Tuple[np.ndarray]]:
        """Run a slightly more complex processing pipeline when we need to calculate
        the lifetime of each pixel in the image.
        We use the scipy.binned_statistic function to histogram the photons again,
        but we use their lifetime as an input for the histogram, and calculate it
        for each bin.

        Parameters
        ----------
        data : list of np.ndarray
            Photon arrival times in each of the dimensions
        edges : list of np.ndarray
            Histogram edges for each dimension.
        chan : int
            Channel number

        Returns
        -------
        hist_with_flim : np.ndarray
            N-dimensional histogram, where N = len(data)
        """
        resulting_tau, edges, binnumber = binned_statistic_dd(
            sample=data,
            values=self.df_dict[chan]["time_rel_pulse"].to_numpy(),
            statistic=calc_lifetime,
            bins=edges,
        )
        # hist = HistWithIndex(data, edges)
        # hist.run()
        # valid_photons = hist.discard_out_of_bounds_photons()
        bloater = np.ones((self.flim_downsampling, self.flim_downsampling), dtype=np.uint8)
        return np.kron(resulting_tau, bloater)
Esempio n. 15
0
def cond_mean(Xt,data,nbins):

    cond_mean, _ , _ = binned_statistic_dd(Xt,data,bins=nbins,expand_binnumbers=True)
    cond_mean[np.isnan(cond_mean)]=0
    cond_mean = np.transpose(cond_mean)

    return cond_mean
    def test_dd_result_attributes(self):
        X = self.X
        v = self.v

        res = binned_statistic_dd(X, v, "count", bins=3)
        attributes = ("statistic", "bin_edges", "binnumber")
        check_named_results(res, attributes)
def multi_dim_hist(data, nBins):
    '''
	INPUTS:
	-------
		- data: An [M x D] or [N x D] array of Numpy data.
		- nBins: (Integer), size of bins in Each dimension of the vector/data.
	OUTPUT:
		- 1D array: Normalized Vector, of the flattened histogram.
	STEPS:
	------
		- Compute a multi-dimensional Histogram, using 'scipy.stats.binned_statistic_dd()' in Python
		- Flatten the Histogram to a 1D vector, of size/length: N^d i.e Number-of-bins raised to power Number-of-dimensions.
		- Replace all zero-values of the bins with lowest non-zero value
		- Finally, Normalize the histogram to the range [0, 1]
	AUTHOR:
	-------
	Ekpo Otu([email protected])
	'''
    hist = stats.binned_statistic_dd(data,
                                     values=False,
                                     statistic='count',
                                     bins=nBins)[0]
    hf = hist.flatten()
    hn = norm1D(hf)
    lowestNoneZero_min = min(i for i in hn if i > 0)
    lower = 0.98 * lowestNoneZero_min
    finalVector = np.where(hn < lowestNoneZero_min, lower, hn)
    return finalVector


# ------------------------------------------------------------------------------------------------- #
Esempio n. 18
0
    def test_dd_result_attributes(self):
        X = self.X
        v = self.v

        res = binned_statistic_dd(X, v, 'count', bins=3)
        attributes = ('statistic', 'bin_edges', 'binnumber')
        check_named_results(res, attributes)
Esempio n. 19
0
 def bin_flags(x_idx, y_idx, flags, x_label, y_label):
     bins = tuple([
         N_grid.get(label, N_grid_default) for label in (x_label, y_label)
     ])
     return binned_statistic_dd(grid[:, [x_idx, y_idx]],
                                flags,
                                statistic="sum",
                                bins=bins)
Esempio n. 20
0
def binning_positions_only(Rg_vec, phig_vec, Zg_vec, R_edges, phi_edges, Z_edges):
    counts_grid = stats.binned_statistic_dd([Rg_vec,phig_vec,Zg_vec],
                                            Rg_vec, #dummy array for count
                                            statistic='count',
                                            bins=[R_edges, phi_edges, Z_edges])[0]
    counts_pois_grid = np.sqrt(counts_grid)

    return np.array([counts_grid]), np.array([counts_pois_grid])
Esempio n. 21
0
 def setup(self, statistic):
     rng = np.random.default_rng(12345678)
     self.inp = rng.random(9999).reshape(3, 3333) * 200
     self.subbin_x_edges = np.arange(0, 200, dtype=np.float32)
     self.subbin_y_edges = np.arange(0, 200, dtype=np.float64)
     self.ret = stats.binned_statistic_dd(
         [self.inp[0], self.inp[1]], self.inp[2], statistic=statistic,
         bins=[self.subbin_x_edges, self.subbin_y_edges])
 def _bin_and_transform(self, x, y, bins, centers):
     bin_means, _, _ = binned_statistic_dd(x.as_matrix(), y.as_matrix(), bins=bins)
     shape = bin_means.shape
     x_new = []
     for i in range(len(shape)):
         x_new.append(self._transform_x(i, centers, shape))
     x = pd.DataFrame({c: col for c, col in zip(x.columns, x_new)}, columns=x.columns)
     return pd.DataFrame({'overlap': bin_means.reshape(-1)}), x
Esempio n. 23
0
    def test_dd_count(self):
        X = self.X
        v = self.v

        count1, edges1, bc = binned_statistic_dd(X, v, 'count', bins=3)
        count2, edges2 = np.histogramdd(X, bins=3)

        assert_array_almost_equal(count1, count2)
        assert_array_almost_equal(edges1, edges2)
Esempio n. 24
0
def opt_est(Xt,data,nbins):

    cond_mean, _ , bins = binned_statistic_dd(Xt,data,bins=nbins,expand_binnumbers=True)
    cond_mean[np.isnan(cond_mean)]=0
    bins = bins-1
    bins[bins==nbins]=nbins-1
    pred = np.zeros(Xt.shape[0])
    pred=cond_mean[bins[0,:],bins[1,:],bins[2,:],bins[3,:],bins[4,:]]
    return pred
    def test_dd_count(self):
        X = self.X
        v = self.v

        count1, edges1, bc = binned_statistic_dd(X, v, "count", bins=3)
        count2, edges2 = np.histogramdd(X, bins=3)

        assert_allclose(count1, count2)
        assert_allclose(edges1, edges2)
    def test_dd_sum(self):
        X = self.X
        v = self.v

        sum1, edges1, bc = binned_statistic_dd(X, v, "sum", bins=3)
        sum2, edges2 = np.histogramdd(X, bins=3, weights=v)

        assert_allclose(sum1, sum2)
        assert_allclose(edges1, edges2)
Esempio n. 27
0
    def test_dd_sum(self):
        X = self.X
        v = self.v

        sum1, edges1, bc = binned_statistic_dd(X, v, 'sum', bins=3)
        sum2, edges2 = np.histogramdd(X, bins=3, weights=v)

        assert_array_almost_equal(sum1, sum2)
        assert_array_almost_equal(edges1, edges2)
    def test_dd_count(self):
        X = self.X
        v = self.v

        count1, edges1, bc = binned_statistic_dd(X, v, 'count', bins=3)
        count2, edges2 = np.histogramdd(X, bins=3)

        assert_array_almost_equal(count1, count2)
        assert_array_almost_equal(edges1, edges2)
    def test_dd_sum(self):
        X = self.X
        v = self.v

        sum1, edges1, bc = binned_statistic_dd(X, v, 'sum', bins=3)
        sum2, edges2 = np.histogramdd(X, bins=3, weights=v)

        assert_array_almost_equal(sum1, sum2)
        assert_array_almost_equal(edges1, edges2)
Esempio n. 30
0
 def get_density(self,boundary,divs):
     divs2    = divs+1-divs%2
     lim      = np.linspace(-boundary,boundary,divs2+1)
     lims     = [] #boundaries for the bins
     for i in range((self.x).shape[1]): #in each direction, the boundaries are the same
         lims.append(lim)
     #This gives the mass present in each box
     H,edges, numb=stats.binned_statistic_dd(self.x,self.m,statistic='sum',bins=lims)
     density  = H /( (2*boundary/divs2)**2 ) #mass divided by the area of a cell
     return density, edges
Esempio n. 31
0
 def setup(self):
     np.random.seed(12345678)
     self.inp = np.random.rand(9999).reshape(3, 3333) * 200
     self.subbin_x_edges = np.arange(0, 200, dtype=np.float32)
     self.subbin_y_edges = np.arange(0, 200, dtype=np.float64)
     self.ret = stats.binned_statistic_dd(
         [self.inp[0], self.inp[1]],
         self.inp[2],
         statistic="std",
         bins=[self.subbin_x_edges, self.subbin_y_edges])
Esempio n. 32
0
    def predict(self, X, stochastic=False):

        if not isinstance(X, list):
            X = [X]
        # append the current state X to the feature history
        self.feat_eng.append_feat(X)
        # get the feature history defined by the specified number of time lags.
        # Here, feat is an array with the same size as the neural network input layer
        feat = self.feat_eng.get_feat_history()
        feat = feat.reshape([1, self.n_feats])

        # find in which bins the c_i samples fall
        _, _, binnumbers_i = stats.binned_statistic_dd(feat,
                                                       np.zeros(self.N),
                                                       bins=self.bins)

        # static correction for outliers, using precomputed mapping array
        binnumbers_i = self.mapping[binnumbers_i]

        # the neighbors in the selected bin, on the 'C manifold'
        neighbors = self.feats[self.sample_idx_per_bin[binnumbers_i[0]]]

        # the distance from the current point to all neighbors
        dists = np.linalg.norm(neighbors - feat, axis=1)

        # if the current bin does not contain enough points to form a simplex,
        # adjust K.
        if dists.size < self.n_feats + 1:
            K = dists.size  # adjusted K
        else:
            K = self.n_feats + 1  # simplex K (manifold dimension + 1)

        # sort the distances + select the K nearest neighbors
        idx = np.argsort(dists)[0:K]
        simplex_idx = self.sample_idx_per_bin[binnumbers_i[0]][idx]

        if not stochastic:
            # compute the simplex weights w_i
            d_i = dists[idx]
            if d_i[0] == 0:
                u_i = np.zeros(K)
                u_i[0] = 1.0
            else:
                u_i = np.exp(-d_i / d_i[0])
            w_i = u_i / np.sum(u_i)

            # prediction is the weighted sum of correspoding samples
            # on the showdow manifold
            shadow_sample = np.sum(self.target[simplex_idx, -1] * w_i)

            return shadow_sample
        else:
            shadow_sample = self.sample_simplex(self.target[simplex_idx])[0]
            return shadow_sample[-1]
    def test_dd_bincode(self):
        X = self.X[:20]
        v = self.v[:20]

        count1, edges1, bc = binned_statistic_dd(X, v, "count", bins=3)
        bc2 = np.array([63, 33, 86, 83, 88, 67, 57, 33, 42, 41, 82, 83, 92, 32, 36, 91, 43, 87, 81, 81])

        bcount = [(bc == i).sum() for i in np.unique(bc)]

        assert_allclose(bc, bc2)
        count1adj = count1[count1.nonzero()]
        assert_allclose(bcount, count1adj)
Esempio n. 34
0
    def __init__(self, ufed, dataframe, bins):
        self._ufed = ufed
        try:
            self._bins = [bin for bin in bins]
        except TypeError:
            self._bins = [bins] * len(ufed.variables)

        sample = []
        forces = []
        ranges = []
        for cv in ufed.variables:

            def function(dx):
                if cv.periodic:
                    return cv.force_constant * (
                        dx - cv._range * np.rint(dx / cv._range))
                else:
                    return cv.force_constant * dx

            sample.append(dataframe[f's_{cv.id}'])
            forces.append(function(dataframe[cv.id] - dataframe[f's_{cv.id}']))
            ranges.append((cv.min_value, cv.max_value))

        counts = stats.binned_statistic_dd(sample, [],
                                           statistic='count',
                                           bins=self._bins,
                                           range=ranges)
        means = stats.binned_statistic_dd(sample,
                                          sample + forces,
                                          bins=self._bins,
                                          range=ranges)
        histogram = counts.statistic.flatten()
        index = np.where(histogram > 0)

        self.histogram = histogram[index]
        n = len(ufed.variables)
        self.centers = [means.statistic[i].flatten()[index] for i in range(n)]
        self.mean_forces = [
            means.statistic[n + i].flatten()[index] for i in range(n)
        ]
    def test_dd_binnumbers_unraveled(self):
        X = self.X
        v = self.v

        stat, edgesx, bcx = binned_statistic(X[:, 0], v, "mean", bins=10)
        stat, edgesy, bcy = binned_statistic(X[:, 1], v, "mean", bins=10)
        stat, edgesz, bcz = binned_statistic(X[:, 2], v, "mean", bins=10)

        stat2, edges2, bc2 = binned_statistic_dd(X, v, "mean", bins=10, expand_binnumbers=True)

        assert_allclose(bcx, bc2[0])
        assert_allclose(bcy, bc2[1])
        assert_allclose(bcz, bc2[2])
Esempio n. 36
0
    def test_dd_bincode(self):
        X = self.X[:20]
        v = self.v[:20]

        count1, edges1, bc = binned_statistic_dd(X, v, 'count', bins=3)
        bc2 = np.array([63, 33, 86, 83, 88, 67, 57, 33, 42, 41, 82, 83, 92,
                        32, 36, 91, 43, 87, 81, 81])

        bcount = [(bc == i).sum() for i in np.unique(bc)]

        assert_array_almost_equal(bc, bc2)
        count1adj = count1[count1.nonzero()]
        assert_array_almost_equal(bcount, count1adj)
Esempio n. 37
0
    def test_dd_binnumbers_unraveled(self):
        X = self.X
        v = self.v

        stat, edgesx, bcx = binned_statistic(X[:, 0], v, 'mean', bins=15)
        stat, edgesy, bcy = binned_statistic(X[:, 1], v, 'mean', bins=20)
        stat, edgesz, bcz = binned_statistic(X[:, 2], v, 'mean', bins=10)

        stat2, edges2, bc2 = binned_statistic_dd(
            X, v, 'mean', bins=(15, 20, 10), expand_binnumbers=True)

        assert_allclose(bcx, bc2[0])
        assert_allclose(bcy, bc2[1])
        assert_allclose(bcz, bc2[2])
Esempio n. 38
0
    def test_gh14332(self):
        # Test the wrong output when the `sample` is close to bin edge
        x = []
        size = 20
        for i in range(size):
            x += [1 - 0.1**i]

        bins = np.linspace(0, 1, 11)
        sum1, edges1, bc = binned_statistic_dd(x,
                                               np.ones(len(x)),
                                               bins=[bins],
                                               statistic='sum')
        sum2, edges2 = np.histogram(x, bins=bins)

        assert_allclose(sum1, sum2)
        assert_allclose(edges1[0], edges2)
Esempio n. 39
0
    def digitize_tree(self, tree, leaf, statistic):
        # ATTENTION : NU IS NEGATIVE IN ANTOINE's TREES -----------------------------------------------------------------|
        #                                                                                                                v
        x = [[getattr(el, beam_charge) for el in tree],
             [getattr(el, t) for el in tree], [getattr(el, Q2) for el in tree],
             [-getattr(el, nu) for el in tree]]
        val = [getattr(el, leaf) for el in tree]
        arr, _, _ = binned_statistic_dd(
            x,
            val,
            statistic=statistic,
            bins=[self.charge, self.t, self.qsq, self.nu])

        chargeZip = zip((-1, 1), arr)
        charge_dict = dict(chargeZip)
        return charge_dict
Esempio n. 40
0
    def episode(self, param):
        # Ensure param values fall in bounds
        for v in param:
            if (v < 0.0) or (v > 1.0):
                print('param is out of bounds')
                exit(1)
        p = param[0:self.nb_dims]  # discard potential useless dimensions
        self.params.append(p)

        # 1 - Find in which hypercube the parameter vector falls
        arr_p = np.array([p])
        cubes = sp.binned_statistic_dd(arr_p,
                                       np.ones(arr_p.shape),
                                       'count',
                                       bins=self.bnds).statistic
        cube_idx = tuple([v[0] for v in cubes[0].nonzero()])

        # 2 - Check if hypercube is "unlocked" by checking if a previous adjacent neighbor is unlocked
        if all(
                v == 0 for v in cube_idx
        ):  # If initial cube, no need to have unlocked neighbors to learn
            self.cube_competence[cube_idx] = min(
                self.cube_competence[cube_idx] + 1, self.max_per_cube)
        else:  # Find index of previous adjacent neighboring hypercubes
            prev_cube_idx = [[idx, max(0, idx - 1)] for idx in cube_idx]
            previous_neighbors_idx = np.array(
                np.meshgrid(*prev_cube_idx)).T.reshape(-1, len(prev_cube_idx))
            for pn_idx in previous_neighbors_idx:
                prev_idx = tuple(pn_idx)
                if all(v == cube_idx[i] for i, v in enumerate(prev_idx)
                       ):  # Original hypercube, not previous neighbor
                    continue
                else:
                    if self.cube_competence[prev_idx] >= (
                            3 * (self.max_per_cube /
                                 4)):  # Previous neighbor with high comp
                        self.cube_competence[cube_idx] = min(
                            self.cube_competence[cube_idx] + 1,
                            self.max_per_cube)
                        break
        normalized_competence = np.interp(self.cube_competence[cube_idx],
                                          (0, self.max_per_cube), (0, 1))
        # if self.noise >= 0.0:
        #     normalized_competence = np.clip(normalized_competence + np.random.normal(0,self.noise), 0, 1)
        return normalized_competence
Esempio n. 41
0
 def test_dd_range_errors(self):
     # Test that descriptive exceptions are raised as appropriate for bad
     # values of the `range` argument. (See gh-12996)
     with assert_raises(ValueError,
                        match='In range, start must be <= stop'):
         binned_statistic_dd([self.y], self.v, range=[[1, 0]])
     with assert_raises(
             ValueError,
             match='In dimension 1 of range, start must be <= stop'):
         binned_statistic_dd([self.x, self.y],
                             self.v,
                             range=[[1, 0], [0, 1]])
     with assert_raises(
             ValueError,
             match='In dimension 2 of range, start must be <= stop'):
         binned_statistic_dd([self.x, self.y],
                             self.v,
                             range=[[0, 1], [1, 0]])
     with assert_raises(ValueError,
                        match='range given for 1 dimensions; 2 required'):
         binned_statistic_dd([self.x, self.y], self.v, range=[[0, 1]])
Esempio n. 42
0
def bin_point_data_3d(points,
                      grid,
                      cellsize,
                      stat='count',
                      bins=None,
                      geoIm=True,
                      mask=None):
    """
    Bin three-dimensional point data with coordinate information to grid cells
    points: an array of coordinates of points
    grid: a tuple or a list of coordinates of grid cell center
    bins: a tuple or a list of the bin edges in each dimension.
          Either grid or bins must be specified.
    geoIm: If true, the returned array will be arranged by geo-image index for the first 2D space
    """

    grd_x, grd_y, grd_t = grid
    if bins is None:
        # Since grd_x grd_y correspond to the center of each grid cell, we add half cell size to each side of the grid coordinate to
        # get the edge values
        xedges = np.r_[grd_x - cellsize[0] / 2, grd_x[-1] + cellsize[0] / 2]
        yedges = np.r_[grd_y - cellsize[1] / 2, grd_y[-1] + cellsize[1] / 2]
        tedges = np.r_[grd_t, grd_t[-1] + cellsize[2]]
        bins = (xedges, yedges, tedges)

    Bin3d_stat, _, _ = binned_statistic_dd(sample=points,
                                           values=None,
                                           statistic=stat,
                                           bins=bins)

    if geoIm == True:
        for i in Bin3d_stat.shape[2]:
            Bin3d_stat[:, :, i] = array_to_geoIm(Bin3d_stat[:, :, i])

    if mask is not None:
        for i in Bin3d_stat.shape[2]:
            Bin3d_stat[:, :, i][mask == False] = np.nan

    return Bin3d_stat
Esempio n. 43
0
 def neighbour_func(self,S, w_i, nb_func = np.mean, nbin = 5, how = 'evenly'):
     '''
     compute a function of w_i for each point in S within a binned neighbourhood
     
     wrapper for 'scipy.stats.binned_statistic_dd' function
     
     Args:
         S (N, dim_s): the spatial variable
         w_i (N,): the soft cluster assignment of each neuron 
         nb_func (callable; 1d --> scalars), default is np.mean: the function
             to compute
         nbin (int), default 5: number of spatial bins in each dimension
         how ({'evenly', 'quantiles'}), default 'evenly': make the bin edges
             evenly spaced or spaced as quantiles
     '''
     
     dim_s = S.shape[1]
     if how == 'evenly':
         bins = tuple([np.linspace(S[:,d].min(),S[:,d].max(),nbin+1) \
                       for d in range(dim_s)])
     elif how == 'quantiles':
         bins = tuple([np.quantile(S[:,d],np.arange(nbin+1)/nbin) \
                       for d in range(dim_s)])
         
     stat ,_, which_nb = sts.binned_statistic_dd(
             S, w_i, statistic = nb_func, bins = bins)
     
     if dim_s != 1:
         which_nb -= (nbin+3) # need to correct for silly indexing
         for b in range(nbin):
             deez = np.isin(which_nb,[range(b*(nbin+2),(b+1)*(nbin+2))])
             which_nb[deez] -= b*2
     else:
         which_nb -= 1
     
     pi_i = stat.flatten()[which_nb]
     
     return pi_i, which_nb
Esempio n. 44
0
def main(cl_max_mag, lkl_method, bin_method, lkl_weight):
    '''
    Prepare observed cluster array here to save time before the algorithm to
    find the best synthetic cluster fit is used.
    '''

    mags_cols_cl, memb_probs = dataProcess(cl_max_mag)

    if lkl_method == 'tolstoy':

        # Square errors here to not repeat the same calculations each time a
        # new synthetic cluster is matched.
        e_mags_cols = []
        for e_m in zip(*zip(*cl_max_mag)[1:][3]):
            e_mags_cols.append(np.square(e_m))
        for e_c in zip(*zip(*cl_max_mag)[1:][5]):
            e_mags_cols.append(np.square(e_c))

        # Store and pass to use in likelihood function. The 'obs_st' list is
        # made up of:
        # obs_st = [star_1, star_2, ...]
        # star_i = [phot_1, phot_2, phot_3, ...]
        # phot_j = [phot_val, error]
        # Where 'phot_j' is a photometric dimension (magnitude or color), and
        # 'phot_val', 'error' the associated value and error for 'star_i'.
        obs_st = []
        mags_cols = mags_cols_cl[0] + mags_cols_cl[1]
        for st_phot, st_e_phot in zip(zip(*mags_cols), zip(*e_mags_cols)):
            obs_st.append(zip(*[st_phot, st_e_phot]))
        obs_clust = [obs_st, memb_probs]

    elif lkl_method == 'duong':
        # Define variables to communicate with package 'R'.
        import rpy2.robjects as robjects
        from rpy2.robjects.packages import importr
        ks = importr('ks')
        kde_test = ks.kde_test
        hpi_kfe = ks.Hpi_kfe

        # CMD for cluster region.
        mags_cols = mags_cols_cl[0] + mags_cols_cl[1]
        matrix_cl = np.ravel(np.column_stack((mags_cols)))
        # matrix_cl = []
        # for st in obs_st:
        #     matrix_cl.append(st[0][0])
        #     matrix_cl.append(st[1][0])
        rows_cl = int(len(matrix_cl) / 2)

        # Create matrices for these CMDs.
        m_cl = robjects.r.matrix(robjects.FloatVector(matrix_cl),
                                 nrow=rows_cl, byrow=True)

        # Bandwidth matrices.
        hpic = hpi_kfe(x=m_cl, binned=True)

        obs_clust = [kde_test, hpi_kfe, m_cl, hpic]

    elif lkl_method in ['dolphin', 'mighell']:
        # Obtain bin edges for each dimension, defining a grid.
        bin_edges = bin_edges_f(bin_method, mags_cols_cl)

        # Put all magnitudes and colors into a single list.
        obs_mags_cols = mags_cols_cl[0] + mags_cols_cl[1]
        # Obtain histogram for observed cluster.
        cl_histo = np.histogramdd(obs_mags_cols, bins=bin_edges)[0]

        w_stat = {'mean': np.mean, 'max': np.max, 'median': np.median}
        # Weights that will be applied to each bin.
        bin_w = np.nan_to_num(binned_statistic_dd(
            obs_mags_cols, memb_probs, statistic=w_stat[lkl_weight],
            bins=bin_edges)[0])

        # Flatten N-dimensional histograms.
        cl_histo_f = np.array(cl_histo).ravel()
        bin_weight_f = np.array(bin_w).ravel()

        # Index of bins where n_i = 0 (no observed stars). Used by the
        # 'Dolphin' and 'Mighell' likelihoods.
        cl_z_idx = [cl_histo_f != 0]

        # Remove all bins where n_i = 0 (no observed stars). Used by the
        # 'Dolphin' likelihood.
        cl_histo_f_z = cl_histo_f[cl_z_idx]
        bin_weight_f_z = bin_weight_f[cl_z_idx]

        # (Weighted) Dolphin n_i dependent constant.
        # n_i constant: 2 * [sum(n_i * ln(n_i)) - N] =
        # 2 * [sum(n_i * ln(n_i)) - sum(n_i)] =
        # 2 * sum(n_i * ln(n_i) - n_i) =
        # 2 * sum(n_i * (ln(n_i) - 1)) =
        # Weighted: 2 * sum(w_i * n_i * (ln(n_i) - 1))
        dolphin_cst = 2. * np.sum(
            bin_weight_f_z * cl_histo_f_z * (np.log(cl_histo_f_z) - 1.))

        obs_clust = [bin_edges, cl_histo, cl_histo_f, cl_z_idx, cl_histo_f_z,
                     dolphin_cst, bin_weight_f_z]

    return obs_clust
Esempio n. 45
0
File: stats.py Progetto: syrte/handy
    b = np.linspace(-2, 2, 11)
    binstats(x, y, 10, np.mean)
    binstats(x, y, b, np.mean)
    binstats(x, y, b, np.mean, nmin=100)
    binstats(x, [y, z], 10, lambda x, y: np.mean(x + y))
    binstats(x, [y, z], 10, lambda x, y: [np.mean(x), np.std(y)])
    binstats([x, y], z, (10, 10), np.mean)
    binstats([x, y], z, [b, b], np.mean)
    binstats([x, y], [z, z], 10, lambda x, y: [np.mean(x), np.std(y)])

    b1 = np.linspace(-2, 2, 11)
    b2 = np.linspace(-2, 2, 21)
    binstats([x, y], [z, z], [b1, b2], lambda x, y: [np.mean(x), np.std(y)])

    from scipy.stats import binned_statistic_dd
    s1 = binned_statistic_dd(x, x, 'std', bins=[b])[0]
    s2 = binstats(x, x, bins=b, func=np.std)[0]
    # print(s1, s2)
    assert np.allclose(s1, s2)

    s1 = binned_statistic_dd([x, y], z, 'sum', bins=[b, b])[0]
    s2 = binstats([x, y], z, bins=[b, b], func=np.sum)[0]
    # print(s1, s2)
    assert np.allclose(s1, s2)

    a = quantile(np.arange(10), q=[0.1, 0.5, 0.85])
    assert np.allclose(a, [0.5, 4.5, 8.])
    a = np.arange(12).reshape(3, 4)
    b = quantile(a, q=0.5, axis=0)
    c = quantile(a, q=0.5, axis=1)
    assert np.allclose(b, [4., 5., 6., 7.])