Esempio n. 1
0
 def sample(self, n, seed=3):
     with util.NumpySeedContext(seed=seed):
         X = np.log(old_div(1, self.inh2d(lamb_bar=n)) - 1)
         if len(X.shape) == 1:
             # This can happen if d=1
             X = X[:, np.newaxis]
         return Data(X)
    def sample_d_variates(w, n, D, seed=81):
        """
        Return an n x D sample matrix. 
        """
        with util.NumpySeedContext(seed=seed):
            # rejection sampling
            sam = np.zeros((n, D))
            # sample block_size*D at a time.
            block_size = 500
            from_ind = 0
            while from_ind < n:
                # uniformly randomly draw x, y from U(-pi, pi)
                X = stats.uniform.rvs(loc=-math.pi,
                                      scale=2 * math.pi,
                                      size=D * block_size)
                X = np.reshape(X, (block_size, D))
                un_den = 1.0 + np.prod(np.sin(w * X), 1)
                I = stats.uniform.rvs(size=block_size) < un_den / 2.0

                # accept
                accepted_count = np.sum(I)
                to_take = min(n - from_ind, accepted_count)
                end_ind = from_ind + to_take

                AX = X[I, :]
                X_take = AX[:to_take, :]
                sam[from_ind:end_ind, :] = X_take
                from_ind = end_ind
        return sam
Esempio n. 3
0
    def sample(self, n, seed=29):
        pmix = self.pmix
        means = self.means
        variances = self.variances
        k, d = self.means.shape
        sam_list = []
        with util.NumpySeedContext(seed=seed):
            # counts for each mixture component
            counts = np.random.multinomial(n, pmix, size=1)

            # counts is a 2d array
            counts = counts[0]

            # For each component, draw from its corresponding mixture component.
            for i, nc in enumerate(counts):
                # construct the component
                # https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.stats.multivariate_normal.html
                cov = variances[i]
                mnorm = stats.multivariate_normal(means[i], cov)
                # Sample from ith component
                sam_i = mnorm.rvs(size=nc)
                sam_list.append(sam_i)
            sample = np.vstack(sam_list)
            assert sample.shape[0] == n
            np.random.shuffle(sample)
        return Data(sample)
Esempio n. 4
0
 def sample(self, n, seed=2):
     with util.NumpySeedContext(seed=seed):
         d = len(self.mean)
         mean = self.mean
         variance = self.variance
         X = np.random.randn(n, d) * np.sqrt(variance) + mean
         return Data(X)
Esempio n. 5
0
    def sample(self, n, seed=872):
        """
        Rejection sampling.
        """
        d = len(self.freqs)
        sigma2 = self.sigma2
        freqs = self.freqs
        with util.NumpySeedContext(seed=seed):
            # rejection sampling
            sam = np.zeros((n, d))
            # sample block_size*d at a time.
            block_size = 500
            from_ind = 0
            while from_ind < n:
                # The proposal q is N(0, sigma2*I)
                X = np.random.randn(block_size, d) * np.sqrt(sigma2)
                q_un = np.exp(old_div(-np.sum(X**2, 1), (2.0 * sigma2)))
                # unnormalized density p
                p_un = q_un * (1 + np.prod(np.cos(X * freqs), 1))
                c = 2.0
                I = stats.uniform.rvs(size=block_size) < old_div(
                    p_un, (c * q_un))

                # accept
                accepted_count = np.sum(I)
                to_take = min(n - from_ind, accepted_count)
                end_ind = from_ind + to_take

                AX = X[I, :]
                X_take = AX[:to_take, :]
                sam[from_ind:end_ind, :] = X_take
                from_ind = end_ind
        return Data(sam)
Esempio n. 6
0
 def sample(self, n, seed=3):
     with util.NumpySeedContext(seed=seed):
         X = np.log(self.nonhom_linear(size=n))
         if len(X.shape) == 1:
             # This can happen if d=1
             X = X[:, np.newaxis]
         return Data(X)
Esempio n. 7
0
    def simulate_null_dist(eigs, J, n_simulate=2000, seed=7):
        """
        Simulate the null distribution using the spectrums of the covariance 
        matrix of the U-statistic. The simulated statistic is n*FSSD^2 where
        FSSD is an unbiased estimator.

        - eigs: a numpy array of estimated eigenvalues of the covariance
          matrix. eigs is of length d*J, where d is the input dimension, and 
        - J: the number of test locations.

        Return a numpy array of simulated statistics.
        """
        d = old_div(len(eigs), J)
        assert d > 0
        # draw at most d x J x block_size values at a time
        block_size = max(20, int(old_div(1000.0, (d * J))))
        fssds = np.zeros(n_simulate)
        from_ind = 0
        with util.NumpySeedContext(seed=seed):
            while from_ind < n_simulate:
                to_draw = min(block_size, n_simulate - from_ind)
                # draw chi^2 random variables.
                chi2 = np.random.randn(d * J, to_draw)**2

                # an array of length to_draw
                sim_fssds = eigs.dot(chi2 - 1.0)
                # store
                end_ind = from_ind + to_draw
                fssds[from_ind:end_ind] = sim_fssds
                from_ind = end_ind
        return fssds
Esempio n. 8
0
 def sample(self, n, seed=3):
     with util.NumpySeedContext(seed=seed):
         X_gmm, llh = self.gmm_sample(N=n)
         X = X_gmm
         if len(X.shape) == 1:
             # This can happen if d=1
             X = X[:, np.newaxis]
         return Data(X)
Esempio n. 9
0
 def sample(self, n, seed=3):
     with util.NumpySeedContext(seed=seed):
         mvn = stats.multivariate_normal(self.mean, self.cov)
         X = mvn.rvs(size=n)
         if len(X.shape) == 1:
             # This can happen if d=1
             X = X[:, np.newaxis]
         return Data(X)
 def sample(self, n, seed):
     d = self.dimx
     with util.NumpySeedContext(seed=seed):
         Z = np.random.randn(n, 1)
         X = np.random.randn(n, d)
         Xs = np.sign(X)
         Y = np.prod(Xs, 1)[:, np.newaxis] * np.abs(Z)
     return PairedData(X, Y, label='gauss_sign_dx%d' % d)
Esempio n. 11
0
 def sample(self, n, seed=3):
     with util.NumpySeedContext(seed=seed):
         X = stats.gamma.rvs(self.alpha,
                             size=n,
                             scale=old_div(1.0, self.beta))
         if len(X.shape) == 1:
             # This can happen if d=1
             X = X[:, np.newaxis]
         return Data(X)
 def sample(self, n, seed):
     d = self.dimx
     with util.NumpySeedContext(seed=seed):
         Z = np.random.randn(n, d / 2 + 1)
         X = np.random.randn(n, d)
         Y = np.zeros((n, 1))
         for j in range(d / 2):
             Y = Y + np.sign(X[:, [2 * j]] * X[:, [2 * j + 1]]) * np.abs(
                 Z[:, [j]])
         Y = np.sqrt(2.0 / d) * Y + Z[:, [d / 2]]
     return PairedData(X, Y, label='pairwise_sign_dx%d' % self.dimx)
    def sample(self, n, seed=44):
        with util.NumpySeedContext(seed=seed + 100):
            NX = np.random.randn(n, self.ndx)
            NY = np.random.randn(n, self.ndy)

            pdata = self.ps.sample(n, seed=seed)
            X, Y = pdata.xy()
            Zx = np.hstack((X, NX))
            Zy = np.hstack((Y, NY))
            new_label = None if pdata.label is None else \
                pdata.label + '_ndx%d'%self.ndx + '_ndy%d'%self.ndy
            return PairedData(Zx, Zy, label=new_label)
Esempio n. 14
0
    def fcompute_pvalues_for_processes(self, U_matrix, chane_prob, num_bootstrapped_stats=300):
        N = U_matrix.shape[0]
        bootsraped_stats = np.zeros(num_bootstrapped_stats)

        with util.NumpySeedContext(seed=10):
            for proc in range(num_bootstrapped_stats):
                # W = np.sign(orsetinW[:,proc])
                W = simulatepm(N, chane_prob)
                WW = np.outer(W, W)
                st = np.mean(U_matrix * WW)
                bootsraped_stats[proc] = N * st

        stat = N * np.mean(U_matrix)

        return float(np.sum(bootsraped_stats > stat)) / num_bootstrapped_stats
Esempio n. 15
0
    def gen_features(self, X):
        # The following block of code is deterministic given seed.
        # Fourier transform formula from
        # http://mathworld.wolfram.com/FourierTransformGaussian.html
        with util.NumpySeedContext(seed=self.seed):
            n, d = X.shape

            draws = self.n_features // 2
            W = np.random.randn(draws, d) / np.sqrt(self.sigma2)
            # n x draws
            XWT = X.dot(W.T)
            Z1 = np.cos(XWT)
            Z2 = np.sin(XWT)
            Z = np.hstack((Z1, Z2)) * np.sqrt(2.0 / self.n_features)
        return Z
Esempio n. 16
0
    def perform_test(self,
                     dat,
                     return_simulated_stats=False,
                     return_ustat_gram=False):
        """
        dat: a instance of Data
        """
        with util.ContextTimer() as t:
            alpha = self.alpha
            n_simulate = self.n_simulate
            X = dat.data()
            n = X.shape[0]

            _, H = self.compute_stat(dat, return_ustat_gram=True)
            test_stat = n * np.mean(H)
            # bootrapping
            sim_stats = np.zeros(n_simulate)
            with util.NumpySeedContext(seed=self.seed):
                for i in range(n_simulate):
                    W = self.bootstrapper(n)
                    # n * [ (1/n^2) * \sum_i \sum_j h(x_i, x_j) w_i w_j ]
                    boot_stat = W.dot(H.dot(old_div(W, float(n))))
                    # This is a bootstrap version of n*V_n
                    sim_stats[i] = boot_stat

            # approximate p-value with the permutations
            pvalue = np.mean(sim_stats > test_stat)

        results = {
            'alpha': self.alpha,
            'pvalue': pvalue,
            'test_stat': test_stat,
            'h0_rejected': pvalue < alpha,
            'n_simulate': n_simulate,
            'time_secs': t.secs,
        }
        if return_simulated_stats:
            results['sim_stats'] = sim_stats
        if return_ustat_gram:
            results['H'] = H

        return results
Esempio n. 17
0
    def sample(self, n, seed=29):
        pmix = self.pmix
        means = self.means
        variances = self.variances
        k, d = self.means.shape
        sam_list = []
        with util.NumpySeedContext(seed=seed):
            # counts for each mixture component
            counts = np.random.multinomial(n, pmix, size=1)

            # counts is a 2d array
            counts = counts[0]

            # For each component, draw from its corresponding mixture component.
            for i, nc in enumerate(counts):
                # Sample from ith component
                sam_i = np.random.randn(nc, d) * np.sqrt(
                    variances[i]) + means[i]
                sam_list.append(sam_i)
            sample = np.vstack(sam_list)
            assert sample.shape[0] == n
            np.random.shuffle(sample)
        return Data(sample)
Esempio n. 18
0
    def sample(self, n, seed=3, return_latent=False):
        """
        Sample by blocked Gibbs sampling
        """
        B = self.B
        b = self.b
        c = self.c
        dh = len(c)
        dx = len(b)

        # Initialize the state of the Markov chain
        with util.NumpySeedContext(seed=seed):
            X = np.random.randn(n, dx)
            H = np.random.randint(1, 2, (n, dh)) * 2 - 1.0

            # burn-in
            for t in range(self.burnin):
                X, H = self._blocked_gibbs_next(X, H)
            # sampling
            X, H = self._blocked_gibbs_next(X, H)
        if return_latent:
            return Data(X), H
        else:
            return Data(X)
Esempio n. 19
0
 def sample(self, n, seed=4):
     with util.NumpySeedContext(seed=seed):
         X = np.random.laplace(loc=self.loc,
                               scale=self.scale,
                               size=(n, self.d))
         return Data(X)
Esempio n. 20
0
 def sample(self, n, seed=5):
     with util.NumpySeedContext(seed=seed):
         X = stats.t.rvs(df=self.df, size=n)
         X = X[:, np.newaxis]
         return Data(X)