Esempio n. 1
0
    def _statistic(self, x, y):
        r"""
        Helper function that calculates the MGC test statistic.

        Parameters
        ----------
        x, y : ndarray
            Input data matrices. `x` and `y` must have the same number of
            samples. That is, the shapes must be `(n, p)` and `(n, q)` where
            `n` is the number of samples and `p` and `q` are the number of
            dimensions. Alternatively, `x` and `y` can be distance matrices,
            where the shapes must both be `(n, n)`.

        Returns
        -------
        stat : float
            The computed MGC statistic.
        """
        distx = x
        disty = y

        if not self.is_distance:
            distx = self.compute_distance(x)
            disty = self.compute_distance(y)

        with warnings.catch_warnings():
            warnings.filterwarnings("ignore")
            mgc = multiscale_graphcorr(distx, disty, compute_distance=None, reps=0)
        stat = mgc.stat
        self.stat = stat

        return stat
Esempio n. 2
0
    def test(self, x, y, reps=1000, workers=1):
        r"""
        Calculates the MGC test statistic and p-value.

        Parameters
        ----------
        x, y : ndarray
            Input data matrices. `x` and `y` must have the same number of
            samples. That is, the shapes must be `(n, p)` and `(n, q)` where
            `n` is the number of samples and `p` and `q` are the number of
            dimensions. Alternatively, `x` and `y` can be distance matrices,
            where the shapes must both be `(n, n)`.
        reps : int, optional (default: 1000)
            The number of replications used to estimate the null distribution
            when using the permutation test used to calculate the p-value.
        workers : int, optional (default: 1)
            The number of cores to parallelize the p-value computation over.
            Supply -1 to use all cores available to the Process.

        Returns
        -------
        stat : float
            The computed MGC statistic.
        pvalue : float
            The computed MGC p-value.
        mgc_dict : dict
            Contains additional useful returns containing the following keys:

                - mgc_map : ndarray
                    A 2D representation of the latent geometry of the relationship.
                - opt_scale : (int, int)
                    The estimated optimal scale as a `(x, y)` pair.

        Examples
        --------
        >>> import numpy as np
        >>> from hyppo.independence import MGC
        >>> x = np.arange(100)
        >>> y = x
        >>> stat, pvalue, _ = MGC().test(x, y)
        >>> '%.1f, %.3f' % (stat, pvalue)
        '1.0, 0.001'

        In addition, the inputs can be distance matrices. Using this is the,
        same as before, except the ``compute_distance`` parameter must be set
        to ``None``.

        >>> import numpy as np
        >>> from hyppo.independence import MGC
        >>> x = np.ones((10, 10)) - np.identity(10)
        >>> y = 2 * x
        >>> mgc = MGC(compute_distance=None)
        >>> stat, pvalue, _ = mgc.test(x, y)
        >>> '%.1f, %.2f' % (stat, pvalue)
        '0.0, 1.00'
        """
        check_input = _CheckInputs(
            x,
            y,
            reps=reps,
        )
        x, y = check_input()

        x, y = compute_dist(x, y, metric=self.compute_distance, **self.kwargs)
        self.is_distance = True

        # using our joblib implementation instead of multiprocessing backend in
        # scipy gives significantly faster results
        with warnings.catch_warnings():
            warnings.filterwarnings("ignore")
            _, _, mgc_dict = multiscale_graphcorr(x,
                                                  y,
                                                  compute_distance=None,
                                                  reps=0)
        mgc_dict.pop("null_dist")

        stat, pvalue = super(MGC, self).test(x, y, reps, workers)
        self.mgc_dict = mgc_dict

        return stat, pvalue, mgc_dict
Esempio n. 3
0
    # colorbar
    cbar = ax.figure.colorbar(im, ax=ax)
    cbar.ax.set_ylabel("", rotation=-90, va="bottom")
    ax.invert_yaxis()

    # Turn spines off and create white grid.
    for _, spine in ax.spines.items():
        spine.set_visible(False)

    # optimal scale
    opt_scale = mgc_dict["opt_scale"]
    ax.scatter(opt_scale[0], opt_scale[1], marker='X', s=200, color='red')

    # other formatting
    ax.tick_params(bottom="off", left="off")
    ax.set_xlabel('#Neighbors for X', fontsize=15)
    ax.set_ylabel('#Neighbors for Y', fontsize=15)
    ax.tick_params(axis="x", labelsize=15)
    ax.tick_params(axis="y", labelsize=15)
    ax.set_xlim(0, 100)
    ax.set_ylim(0, 100)


np.random.seed(12345678)
x = np.linspace(-1, 1, num=100)
y = x + 0.3 * np.random.random(x.size)

_, _, mgc_dict = multiscale_graphcorr(x, y)
mgc_plot(x, y, mgc_dict)
Esempio n. 4
0
    dcov2_xx = (A * A).sum()/float(n * n)
    dcov2_yy = (B * B).sum()/float(n * n)
    dcor = np.sqrt(dcov2_xy)/np.sqrt(np.sqrt(dcov2_xx) * np.sqrt(dcov2_yy))
    return dcor

#################i
X, Y = [], []
x, y = stats.uniform.rvs(-1,2,100), stats.uniform.rvs(-1,2,100)
for i in range(np.size(x)):
    if ( np.sqrt(x[i]**2 + y[i]**2) <= 1 ) and ( np.sqrt(x[i]**2 + y[i]**2) >= 1/2 ):
        X.append(x[i])
        Y.append(y[i])

X, Y = np.array(X), np.array(Y)
print( stats.pearsonr(X,Y) )
print( stats.multiscale_graphcorr(X,Y).stat,stats.multiscale_graphcorr(X,Y).pvalue )
print( distcorr(X,Y) )

################ii
x, y = stats.uniform.rvs(-1,2,10), stats.uniform.rvs(-1,2,10)
print( stats.pearsonr(x,y) )
print( stats.multiscale_graphcorr(x,y).stat,stats.multiscale_graphcorr(x,y).pvalue )
print( distcorr(x,y) )

#################iii
X, Y = [], []
x, y = stats.uniform.rvs(-3/2,3,100), stats.uniform.rvs(-3/2,3,100)
for i in range(np.size(x)):
    if ( (x[i]+1)**2+(y[i]-1)**2 <= 1/4 ) or ( (x[i]-1)**2+(y[i]-1)**2 <= 1/4 ) or ( (x[i]+1)**2+(y[i]+1)**2 <= 1/4 ) or ( (x[i]-1)**2+(y[i]+1)**2 <= 1/4 ):
        X.append(x[i])
        Y.append(y[i])
Esempio n. 5
0
    def test(self, x, y, reps=1000, workers=1, random_state=None):
        r"""
        Calculates the MGC test statistic and p-value.

        Parameters
        ----------
        x, y : ndarray
            Input data matrices. `x` and `y` must have the same number of
            samples. That is, the shapes must be `(n, p)` and `(n, q)` where
            `n` is the number of samples and `p` and `q` are the number of
            dimensions. Alternatively, `x` and `y` can be distance matrices,
            where the shapes must both be `(n, n)`.
        reps : int, optional (default: 1000)
            The number of replications used to estimate the null distribution
            when using the permutation test used to calculate the p-value.
        workers : int, optional (default: 1)
            The number of cores to parallelize the p-value computation over.
            Supply -1 to use all cores available to the Process.
        random_state : int or np.random.RandomState instance, (default: None)
            If already a RandomState instance, use it.
            If seed is an int, return a new RandomState instance seeded with seed.
            If None, use np.random.RandomState.

        Returns
        -------
        stat : float
            The computed MGC statistic.
        pvalue : float
            The computed MGC p-value.
        mgc_dict : dict
            Contains additional useful returns containing the following keys:

                - mgc_map : ndarray
                    A 2D representation of the latent geometry of the relationship.
                - opt_scale : (int, int)
                    The estimated optimal scale as a `(x, y)` pair.
                - null_dist : list
                    The null distribution derived from the permuted matrices

        Examples
        --------
        >>> import numpy as np
        >>> from mgc.independence import MGC
        >>> x = np.arange(100)
        >>> y = x
        >>> stat, pvalue, _ = MGC().test(x, y)
        >>> '%.1f, %.3f' % (stat, pvalue)
        '1.0, 0.001'

        The number of replications can give p-values with higher confidence
        (greater alpha levels).

        >>> import numpy as np
        >>> from mgc.independence import MGC
        >>> x = np.arange(100)
        >>> y = x
        >>> stat, pvalue, _ = MGC().test(x, y, reps=10000)
        >>> '%.1f, %.3f' % (stat, pvalue)
        '1.0, 0.000'

        In addition, the inputs can be distance matrices. Using this is the,
        same as before, except the ``compute_distance`` parameter must be set
        to ``None``.

        >>> import numpy as np
        >>> from mgc.independence import MGC
        >>> x = np.ones((10, 10)) - np.identity(10)
        >>> y = 2 * x
        >>> mgc = MGC(compute_distance=None)
        >>> stat, pvalue, _ = mgc.test(x, y)
        >>> '%.1f, %.2f' % (stat, pvalue)
        '0.0, 0.93'
        """
        check_input = _CheckInputs(x,
                                   y,
                                   dim=2,
                                   reps=reps,
                                   compute_distance=self.compute_distance)
        x, y = check_input()

        if self.is_distance:
            check_xy_distmat(x, y)

        return multiscale_graphcorr(
            x,
            y,
            compute_distance=self.compute_distance,
            reps=reps,
            workers=workers,
            random_state=random_state,
        )
Esempio n. 6
0
    # colorbar
    cbar = ax.figure.colorbar(im, ax=ax)
    cbar.ax.set_ylabel("", rotation=-90, va="bottom")
    ax.invert_yaxis()

    # Turn spines off and create white grid.
    for _, spine in ax.spines.items():
        spine.set_visible(False)

    # optimal scale
    opt_scale = mgc_dict["opt_scale"]
    ax.scatter(opt_scale[0], opt_scale[1], marker='X', s=200, color='red')

    # other formatting
    ax.tick_params(bottom="off", left="off")
    ax.set_xlabel('#Neighbors for X', fontsize=15)
    ax.set_ylabel('#Neighbors for Y', fontsize=15)
    ax.tick_params(axis="x", labelsize=15)
    ax.tick_params(axis="y", labelsize=15)
    ax.set_xlim(0, 100)
    ax.set_ylim(0, 100)


rng = np.random.default_rng()
unif = np.array(rng.uniform(0, 5, size=100))
x = unif * np.cos(np.pi * unif)
y = unif * np.sin(np.pi * unif) + 0.4 * rng.random(x.size)

_, _, mgc_dict = multiscale_graphcorr(x, y, random_state=rng)
mgc_plot(x, y, mgc_dict)