예제 #1
0
    def sample(self, n, seed=872):
        """
        Rejection sampling.
        """
        d = len(self.freqs)
        sigma2 = self.sigma2
        freqs = self.freqs
        with util.NumpySeedContext(seed=seed):
            # rejection sampling
            sam = np.zeros((n, d))
            # sample block_size*d at a time.
            block_size = 500
            from_ind = 0
            while from_ind < n:
                # The proposal q is N(0, sigma2*I)
                X = np.random.randn(block_size, d) * np.sqrt(sigma2)
                q_un = np.exp(old_div(-np.sum(X**2, 1), (2.0 * sigma2)))
                # unnormalized density p
                p_un = q_un * (1 + np.prod(np.cos(X * freqs), 1))
                c = 2.0
                I = stats.uniform.rvs(size=block_size) < old_div(
                    p_un, (c * q_un))

                # accept
                accepted_count = np.sum(I)
                to_take = min(n - from_ind, accepted_count)
                end_ind = from_ind + to_take

                AX = X[I, :]
                X_take = AX[:to_take, :]
                sam[from_ind:end_ind, :] = X_take
                from_ind = end_ind
        return Data(sam)
예제 #2
0
    def simulate_null_dist(eigs, J, n_simulate=2000, seed=7):
        """
        Simulate the null distribution using the spectrums of the covariance
        matrix of the U-statistic. The simulated statistic is n*FSSD^2 where
        FSSD is an unbiased estimator.

        - eigs: a numpy array of estimated eigenvalues of the covariance
          matrix. eigs is of length d*J, where d is the input dimension, and
        - J: the number of test locations.

        Return a numpy array of simulated statistics.
        """
        d = old_div(len(eigs), J)
        assert d > 0
        # draw at most d x J x block_size values at a time
        block_size = max(20, int(old_div(1000.0, (d * J))))
        fssds = np.zeros(n_simulate)
        from_ind = 0
        with util.NumpySeedContext(seed=seed):
            while from_ind < n_simulate:
                to_draw = min(block_size, n_simulate - from_ind)
                # draw chi^2 random variables.
                chi2 = np.random.randn(d * J, to_draw) ** 2

                # an array of length to_draw
                sim_fssds = eigs.dot(chi2 - 1.0)
                # store
                end_ind = from_ind + to_draw
                fssds[from_ind:end_ind] = sim_fssds
                from_ind = end_ind
        return fssds
예제 #3
0
 def sample(self, n, seed=3):
     with util.NumpySeedContext(seed=seed):
         X = np.log(self.nonhom_linear(size=n))
         if len(X.shape) == 1:
             # This can happen if d=1
             X = X[:, np.newaxis]
         return Data(X)
예제 #4
0
 def sample(self, n, seed=2):
     with util.NumpySeedContext(seed=seed):
         d = len(self.mean)
         mean = self.mean
         variance = self.variance
         X = np.random.randn(n, d) * np.sqrt(variance) + mean
         return Data(X)
예제 #5
0
 def sample(self, n, seed=3):
     with util.NumpySeedContext(seed=seed):
         X = np.log(old_div(1, self.inh2d(lamb_bar=n)) - 1)
         if len(X.shape) == 1:
             # This can happen if d=1
             X = X[:, np.newaxis]
         return Data(X)
예제 #6
0
    def sample(self, n, seed=29):
        pmix = self.pmix
        means = self.means
        variances = self.variances
        k, d = self.means.shape
        sam_list = []
        with util.NumpySeedContext(seed=seed):
            # counts for each mixture component
            counts = np.random.multinomial(n, pmix, size=1)

            # counts is a 2d array
            counts = counts[0]

            # For each component, draw from its corresponding mixture component.
            for i, nc in enumerate(counts):
                # construct the component
                # https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.stats.multivariate_normal.html
                cov = variances[i]
                mnorm = stats.multivariate_normal(means[i], cov)
                # Sample from ith component
                sam_i = mnorm.rvs(size=nc)
                sam_list.append(sam_i)
            sample = np.vstack(sam_list)
            assert sample.shape[0] == n
            np.random.shuffle(sample)
        return Data(sample)
예제 #7
0
def gaussbern_rbm_probs(stds_perturb_B, dx=50, dh=10, n=sample_size):
    """
    Get a sequence of Gaussian-Bernoulli RBM problems.
    We follow the parameter settings as described in section 6 of Liu et al.,
    2016.

    - stds_perturb_B: a list of Gaussian noise standard deviations for perturbing B.
    - dx: observed dimension
    - dh: latent dimension
    """
    probs = []
    for i, std in enumerate(stds_perturb_B):
        with util.NumpySeedContext(seed=i + 1000):
            B = np.random.randint(0, 2, (dx, dh)) * 2 - 1.0
            b = np.random.randn(dx)
            c = np.random.randn(dh)
            p = density.GaussBernRBM(B, b, c)

            if std <= 1e-8:
                B_perturb = B
            else:
                B_perturb = B + np.random.randn(dx, dh) * std
            gb_rbm = data.DSGaussBernRBM(B_perturb, b, c, burnin=2000)

            probs.append((std, p, gb_rbm))
    return probs
예제 #8
0
 def sample(self, n, seed=3):
     with util.NumpySeedContext(seed=seed):
         X_gmm, llh = self.gmm_sample(N=n)
         X = X_gmm
         if len(X.shape) == 1:
             # This can happen if d=1
             X = X[:, np.newaxis]
         return Data(X)
예제 #9
0
 def sample(self, n, seed=3):
     with util.NumpySeedContext(seed=seed):
         mvn = stats.multivariate_normal(self.mean, self.cov)
         X = mvn.rvs(size=n)
         if len(X.shape) == 1:
             # This can happen if d=1
             X = X[:, np.newaxis]
         return Data(X)
예제 #10
0
 def sample(self, n, seed=3):
     with util.NumpySeedContext(seed=seed):
         X = stats.gamma.rvs(self.alpha,
                             size=n,
                             scale=old_div(1.0, self.beta))
         if len(X.shape) == 1:
             # This can happen if d=1
             X = X[:, np.newaxis]
         return Data(X)
예제 #11
0
    def test_log_den(self):
        n = 7
        with util.NumpySeedContext(seed=16):
            for d in [3, 1]:
                variance = 1.1
                mean = np.random.randn(d)
                X = np.random.rand(n, d) + 1

                isonorm = density.IsotropicNormal(mean, variance)
                log_dens = isonorm.log_den(X)
                my_log_dens = -np.sum((X - mean)**2, 1) / (2.0 * variance)

                # check correctness
                np.testing.assert_almost_equal(log_dens, my_log_dens)
예제 #12
0
    def test_grad_log(self):
        n = 8
        with util.NumpySeedContext(seed=17):
            for d in [4, 1]:
                variance = 1.2
                mean = np.random.randn(d) + 1
                X = np.random.rand(n, d) - 2

                isonorm = density.IsotropicNormal(mean, variance)
                grad_log = isonorm.grad_log(X)
                my_grad_log = -(X - mean) / variance

                # check correctness
                np.testing.assert_almost_equal(grad_log, my_grad_log)
예제 #13
0
    def test_multivariate_normal_density(self):
        for i in range(4):
            with util.NumpySeedContext(seed=i + 8):
                d = i + 2
                cov = stats.wishart(df=10 + d, scale=np.eye(d)).rvs(size=1)
                mean = np.random.randn(d)
                X = np.random.randn(11, d)
                den_estimate = density.GaussianMixture.multivariate_normal_density(
                    mean, cov, X)

                mnorm = stats.multivariate_normal(mean=mean, cov=cov)
                den_truth = mnorm.pdf(X)

                np.testing.assert_almost_equal(den_estimate, den_truth)
예제 #14
0
    def test_basic(self):
        """
        Nothing special. Just test basic things.
        """
        # sample
        n = 10
        d = 3
        with util.NumpySeedContext(seed=29):
            X = np.random.randn(n, d) * 3
            k = kernel.KGauss(sigma2=1)
            K = k.eval(X, X)

            self.assertEqual(K.shape, (n, n))
            self.assertTrue(np.all(K >= 0 - 1e-6))
            self.assertTrue(np.all(K <= 1 + 1e-6), "K not bounded by 1")
예제 #15
0
    def test_pair_gradX_Y(self):
        # sample
        n = 11
        d = 3
        with util.NumpySeedContext(seed=20):
            X = np.random.randn(n, d) * 4
            Y = np.random.randn(n, d) * 2
            k = kernel.KGauss(sigma2=2.1)
            # n x d
            pair_grad = k.pair_gradX_Y(X, Y)
            loop_grad = np.zeros((n, d))
            for i in range(n):
                for j in range(d):
                    loop_grad[i, j] = k.gradX_Y(X[[i], :], Y[[i], :], j)

            testing.assert_almost_equal(pair_grad, loop_grad)
예제 #16
0
    def test_gradX_y(self):
        n = 10
        with util.NumpySeedContext(seed=10):
            for d in [1, 3]:
                y = np.random.randn(d) * 2
                X = np.random.rand(n, d) * 3

                sigma2 = 1.3
                k = kernel.KGauss(sigma2=sigma2)
                # n x d
                G = k.gradX_y(X, y)
                # check correctness
                K = k.eval(X, y[np.newaxis, :])
                myG = -K / sigma2 * (X - y)

                self.assertEqual(G.shape, myG.shape)
                testing.assert_almost_equal(G, myG)
예제 #17
0
def gaussbern_rbm_tuple(var, dx=50, dh=10, n=sample_size):
    """
    Get a tuple of Gaussian-Bernoulli RBM problems.
    We follow the parameter settings as described in section 6 of Liu et al.,
    2016.

    - var: Gaussian noise variance for perturbing B.
    - dx: observed dimension
    - dh: latent dimension

    Return p, a DataSource
    """
    with util.NumpySeedContext(seed=1000):
        B = np.random.randint(0, 2, (dx, dh)) * 2 - 1.0
        b = np.random.randn(dx)
        c = np.random.randn(dh)
        p = density.GaussBernRBM(B, b, c)

        B_perturb = B + np.random.randn(dx, dh) * np.sqrt(var)
        gb_rbm = data.DSGaussBernRBM(B_perturb, b, c, burnin=50)

    return p, gb_rbm
예제 #18
0
    def perform_test(self, dat, return_simulated_stats=False, return_ustat_gram=False):
        """
        dat: a instance of Data
        """
        with util.ContextTimer() as t:
            alpha = self.alpha
            n_simulate = self.n_simulate
            X = dat.data()
            n = X.shape[0]

            _, H = self.compute_stat(dat, return_ustat_gram=True)
            test_stat = n * np.mean(H)
            # bootrapping
            sim_stats = np.zeros(n_simulate)
            with util.NumpySeedContext(seed=self.seed):
                for i in range(n_simulate):
                    W = self.bootstrapper(n)
                    # n * [ (1/n^2) * \sum_i \sum_j h(x_i, x_j) w_i w_j ]
                    boot_stat = W.dot(H.dot(old_div(W, float(n))))
                    # This is a bootstrap version of n*V_n
                    sim_stats[i] = boot_stat

            # approximate p-value with the permutations
            pvalue = np.mean(sim_stats > test_stat)

        results = {
            "alpha": self.alpha,
            "pvalue": pvalue,
            "test_stat": test_stat,
            "h0_rejected": pvalue < alpha,
            "n_simulate": n_simulate,
            "time_secs": t.secs,
        }
        if return_simulated_stats:
            results["sim_stats"] = sim_stats
        if return_ustat_gram:
            results["H"] = H

        return results
예제 #19
0
    def test_gradXY_sum(self):
        n = 11
        with util.NumpySeedContext(seed=12):
            for d in [3, 1]:
                X = np.random.randn(n, d)
                sigma2 = 1.4
                k = kernel.KGauss(sigma2=sigma2)

                # n x n
                myG = np.zeros((n, n))
                K = k.eval(X, X)
                for i in range(n):
                    for j in range(n):
                        diffi2 = np.sum((X[i, :] - X[j, :])**2)
                        # myG[i, j] = -diffi2*K[i, j]/(sigma2**2)+ d*K[i, j]/sigma2
                        myG[i, j] = K[i, j] / sigma2 * (d - diffi2 / sigma2)

                # check correctness
                G = k.gradXY_sum(X, X)

                self.assertEqual(G.shape, myG.shape)
                testing.assert_almost_equal(G, myG)
예제 #20
0
    def sample(self, n, seed=29):
        pmix = self.pmix
        means = self.means
        variances = self.variances
        k, d = self.means.shape
        sam_list = []
        with util.NumpySeedContext(seed=seed):
            # counts for each mixture component
            counts = np.random.multinomial(n, pmix, size=1)

            # counts is a 2d array
            counts = counts[0]

            # For each component, draw from its corresponding mixture component.
            for i, nc in enumerate(counts):
                # Sample from ith component
                sam_i = np.random.randn(nc, d) * np.sqrt(
                    variances[i]) + means[i]
                sam_list.append(sam_i)
            sample = np.vstack(sam_list)
            assert sample.shape[0] == n
            np.random.shuffle(sample)
        return Data(sample)
예제 #21
0
    def sample(self, n, seed=3, return_latent=False):
        """
        Sample by blocked Gibbs sampling
        """
        B = self.B
        b = self.b
        c = self.c
        dh = len(c)
        dx = len(b)

        # Initialize the state of the Markov chain
        with util.NumpySeedContext(seed=seed):
            X = np.random.randn(n, dx)
            H = np.random.randint(1, 2, (n, dh)) * 2 - 1.0

            # burn-in
            for t in range(self.burnin):
                X, H = self._blocked_gibbs_next(X, H)
            # sampling
            X, H = self._blocked_gibbs_next(X, H)
        if return_latent:
            return Data(X), H
        else:
            return Data(X)
예제 #22
0
def gbrbm_perturb(var_perturb_B, dx=50, dh=10):
    """
    Get a Gaussian-Bernoulli RBM problem where the first entry of the B matrix
    (the matrix linking the latent and the observation) is perturbed.

    - var_perturb_B: Gaussian noise variance for perturbing B.
    - dx: observed dimension
    - dh: latent dimension

    Return p (density), data source
    """
    with util.NumpySeedContext(seed=10):
        B = np.random.randint(0, 2, (dx, dh)) * 2 - 1.0
        b = np.random.randn(dx)
        c = np.random.randn(dh)
        p = density.GaussBernRBM(B, b, c)

        B_perturb = np.copy(B)
        if var_perturb_B > 1e-7:
            B_perturb[0, 0] = B_perturb[
                0, 0] + np.random.randn(1) * np.sqrt(var_perturb_B)
        ds = data.DSGaussBernRBM(B_perturb, b, c, burnin=2000)

    return p, ds
예제 #23
0
 def sample(self, n, seed=4):
     with util.NumpySeedContext(seed=seed):
         X = np.random.laplace(loc=self.loc,
                               scale=self.scale,
                               size=(n, self.d))
         return Data(X)
예제 #24
0
 def sample(self, n, seed=5):
     with util.NumpySeedContext(seed=seed):
         X = stats.t.rvs(df=self.df, size=n)
         X = X[:, np.newaxis]
         return Data(X)