def _statistic(self, x, y): r""" Helper function that calculates the MGC test statistic. Parameters ---------- x, y : ndarray Input data matrices. `x` and `y` must have the same number of samples. That is, the shapes must be `(n, p)` and `(n, q)` where `n` is the number of samples and `p` and `q` are the number of dimensions. Alternatively, `x` and `y` can be distance matrices, where the shapes must both be `(n, n)`. Returns ------- stat : float The computed MGC statistic. """ distx = x disty = y if not self.is_distance: distx = self.compute_distance(x) disty = self.compute_distance(y) with warnings.catch_warnings(): warnings.filterwarnings("ignore") mgc = multiscale_graphcorr(distx, disty, compute_distance=None, reps=0) stat = mgc.stat self.stat = stat return stat
def test(self, x, y, reps=1000, workers=1): r""" Calculates the MGC test statistic and p-value. Parameters ---------- x, y : ndarray Input data matrices. `x` and `y` must have the same number of samples. That is, the shapes must be `(n, p)` and `(n, q)` where `n` is the number of samples and `p` and `q` are the number of dimensions. Alternatively, `x` and `y` can be distance matrices, where the shapes must both be `(n, n)`. reps : int, optional (default: 1000) The number of replications used to estimate the null distribution when using the permutation test used to calculate the p-value. workers : int, optional (default: 1) The number of cores to parallelize the p-value computation over. Supply -1 to use all cores available to the Process. Returns ------- stat : float The computed MGC statistic. pvalue : float The computed MGC p-value. mgc_dict : dict Contains additional useful returns containing the following keys: - mgc_map : ndarray A 2D representation of the latent geometry of the relationship. - opt_scale : (int, int) The estimated optimal scale as a `(x, y)` pair. Examples -------- >>> import numpy as np >>> from hyppo.independence import MGC >>> x = np.arange(100) >>> y = x >>> stat, pvalue, _ = MGC().test(x, y) >>> '%.1f, %.3f' % (stat, pvalue) '1.0, 0.001' In addition, the inputs can be distance matrices. Using this is the, same as before, except the ``compute_distance`` parameter must be set to ``None``. >>> import numpy as np >>> from hyppo.independence import MGC >>> x = np.ones((10, 10)) - np.identity(10) >>> y = 2 * x >>> mgc = MGC(compute_distance=None) >>> stat, pvalue, _ = mgc.test(x, y) >>> '%.1f, %.2f' % (stat, pvalue) '0.0, 1.00' """ check_input = _CheckInputs( x, y, reps=reps, ) x, y = check_input() x, y = compute_dist(x, y, metric=self.compute_distance, **self.kwargs) self.is_distance = True # using our joblib implementation instead of multiprocessing backend in # scipy gives significantly faster results with warnings.catch_warnings(): warnings.filterwarnings("ignore") _, _, mgc_dict = multiscale_graphcorr(x, y, compute_distance=None, reps=0) mgc_dict.pop("null_dist") stat, pvalue = super(MGC, self).test(x, y, reps, workers) self.mgc_dict = mgc_dict return stat, pvalue, mgc_dict
# colorbar cbar = ax.figure.colorbar(im, ax=ax) cbar.ax.set_ylabel("", rotation=-90, va="bottom") ax.invert_yaxis() # Turn spines off and create white grid. for _, spine in ax.spines.items(): spine.set_visible(False) # optimal scale opt_scale = mgc_dict["opt_scale"] ax.scatter(opt_scale[0], opt_scale[1], marker='X', s=200, color='red') # other formatting ax.tick_params(bottom="off", left="off") ax.set_xlabel('#Neighbors for X', fontsize=15) ax.set_ylabel('#Neighbors for Y', fontsize=15) ax.tick_params(axis="x", labelsize=15) ax.tick_params(axis="y", labelsize=15) ax.set_xlim(0, 100) ax.set_ylim(0, 100) np.random.seed(12345678) x = np.linspace(-1, 1, num=100) y = x + 0.3 * np.random.random(x.size) _, _, mgc_dict = multiscale_graphcorr(x, y) mgc_plot(x, y, mgc_dict)
dcov2_xx = (A * A).sum()/float(n * n) dcov2_yy = (B * B).sum()/float(n * n) dcor = np.sqrt(dcov2_xy)/np.sqrt(np.sqrt(dcov2_xx) * np.sqrt(dcov2_yy)) return dcor #################i X, Y = [], [] x, y = stats.uniform.rvs(-1,2,100), stats.uniform.rvs(-1,2,100) for i in range(np.size(x)): if ( np.sqrt(x[i]**2 + y[i]**2) <= 1 ) and ( np.sqrt(x[i]**2 + y[i]**2) >= 1/2 ): X.append(x[i]) Y.append(y[i]) X, Y = np.array(X), np.array(Y) print( stats.pearsonr(X,Y) ) print( stats.multiscale_graphcorr(X,Y).stat,stats.multiscale_graphcorr(X,Y).pvalue ) print( distcorr(X,Y) ) ################ii x, y = stats.uniform.rvs(-1,2,10), stats.uniform.rvs(-1,2,10) print( stats.pearsonr(x,y) ) print( stats.multiscale_graphcorr(x,y).stat,stats.multiscale_graphcorr(x,y).pvalue ) print( distcorr(x,y) ) #################iii X, Y = [], [] x, y = stats.uniform.rvs(-3/2,3,100), stats.uniform.rvs(-3/2,3,100) for i in range(np.size(x)): if ( (x[i]+1)**2+(y[i]-1)**2 <= 1/4 ) or ( (x[i]-1)**2+(y[i]-1)**2 <= 1/4 ) or ( (x[i]+1)**2+(y[i]+1)**2 <= 1/4 ) or ( (x[i]-1)**2+(y[i]+1)**2 <= 1/4 ): X.append(x[i]) Y.append(y[i])
def test(self, x, y, reps=1000, workers=1, random_state=None): r""" Calculates the MGC test statistic and p-value. Parameters ---------- x, y : ndarray Input data matrices. `x` and `y` must have the same number of samples. That is, the shapes must be `(n, p)` and `(n, q)` where `n` is the number of samples and `p` and `q` are the number of dimensions. Alternatively, `x` and `y` can be distance matrices, where the shapes must both be `(n, n)`. reps : int, optional (default: 1000) The number of replications used to estimate the null distribution when using the permutation test used to calculate the p-value. workers : int, optional (default: 1) The number of cores to parallelize the p-value computation over. Supply -1 to use all cores available to the Process. random_state : int or np.random.RandomState instance, (default: None) If already a RandomState instance, use it. If seed is an int, return a new RandomState instance seeded with seed. If None, use np.random.RandomState. Returns ------- stat : float The computed MGC statistic. pvalue : float The computed MGC p-value. mgc_dict : dict Contains additional useful returns containing the following keys: - mgc_map : ndarray A 2D representation of the latent geometry of the relationship. - opt_scale : (int, int) The estimated optimal scale as a `(x, y)` pair. - null_dist : list The null distribution derived from the permuted matrices Examples -------- >>> import numpy as np >>> from mgc.independence import MGC >>> x = np.arange(100) >>> y = x >>> stat, pvalue, _ = MGC().test(x, y) >>> '%.1f, %.3f' % (stat, pvalue) '1.0, 0.001' The number of replications can give p-values with higher confidence (greater alpha levels). >>> import numpy as np >>> from mgc.independence import MGC >>> x = np.arange(100) >>> y = x >>> stat, pvalue, _ = MGC().test(x, y, reps=10000) >>> '%.1f, %.3f' % (stat, pvalue) '1.0, 0.000' In addition, the inputs can be distance matrices. Using this is the, same as before, except the ``compute_distance`` parameter must be set to ``None``. >>> import numpy as np >>> from mgc.independence import MGC >>> x = np.ones((10, 10)) - np.identity(10) >>> y = 2 * x >>> mgc = MGC(compute_distance=None) >>> stat, pvalue, _ = mgc.test(x, y) >>> '%.1f, %.2f' % (stat, pvalue) '0.0, 0.93' """ check_input = _CheckInputs(x, y, dim=2, reps=reps, compute_distance=self.compute_distance) x, y = check_input() if self.is_distance: check_xy_distmat(x, y) return multiscale_graphcorr( x, y, compute_distance=self.compute_distance, reps=reps, workers=workers, random_state=random_state, )
# colorbar cbar = ax.figure.colorbar(im, ax=ax) cbar.ax.set_ylabel("", rotation=-90, va="bottom") ax.invert_yaxis() # Turn spines off and create white grid. for _, spine in ax.spines.items(): spine.set_visible(False) # optimal scale opt_scale = mgc_dict["opt_scale"] ax.scatter(opt_scale[0], opt_scale[1], marker='X', s=200, color='red') # other formatting ax.tick_params(bottom="off", left="off") ax.set_xlabel('#Neighbors for X', fontsize=15) ax.set_ylabel('#Neighbors for Y', fontsize=15) ax.tick_params(axis="x", labelsize=15) ax.tick_params(axis="y", labelsize=15) ax.set_xlim(0, 100) ax.set_ylim(0, 100) rng = np.random.default_rng() unif = np.array(rng.uniform(0, 5, size=100)) x = unif * np.cos(np.pi * unif) y = unif * np.sin(np.pi * unif) + 0.4 * rng.random(x.size) _, _, mgc_dict = multiscale_graphcorr(x, y, random_state=rng) mgc_plot(x, y, mgc_dict)