Beispiel #1
0
 def sample(self, n, seed=3):
     with util.NumpySeedContext(seed=seed):
         X = np.log(self.nonhom_linear(size=n))
         if len(X.shape) == 1:
             # This can happen if d=1
             X = X[:, np.newaxis]
         return Data(X)
Beispiel #2
0
def gaussbern_rbm_probs(stds_perturb_B, dx=50, dh=10, n=sample_size):
    """
    Get a sequence of Gaussian-Bernoulli RBM problems.
    We follow the parameter settings as described in section 6 of Liu et al.,
    2016.

    - stds_perturb_B: a list of Gaussian noise standard deviations for perturbing B.
    - dx: observed dimension
    - dh: latent dimension
    """
    probs = []
    for i, std in enumerate(stds_perturb_B):
        with util.NumpySeedContext(seed=i + 1000):
            B = np.random.randint(0, 2, (dx, dh)) * 2 - 1.0
            b = np.random.randn(dx)
            c = np.random.randn(dh)
            p = density.GaussBernRBM(B, b, c)

            if std <= 1e-8:
                B_perturb = B
            else:
                B_perturb = B + np.random.randn(dx, dh) * std
            gb_rbm = data.DSGaussBernRBM(B_perturb, b, c, burnin=2000)

            probs.append((std, p, gb_rbm))
    return probs
Beispiel #3
0
    def sample(self, n, seed=29):
        pmix = self.pmix
        means = self.means
        variances = self.variances
        k, d = self.means.shape
        sam_list = []
        with util.NumpySeedContext(seed=seed):
            # counts for each mixture component
            counts = np.random.multinomial(n, pmix, size=1)

            # counts is a 2d array
            counts = counts[0]

            # For each component, draw from its corresponding mixture component.
            for i, nc in enumerate(counts):
                # construct the component
                # https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.stats.multivariate_normal.html
                cov = variances[i]
                mnorm = stats.multivariate_normal(means[i], cov)
                # Sample from ith component
                sam_i = mnorm.rvs(size=nc)
                sam_list.append(sam_i)
            sample = np.vstack(sam_list)
            assert sample.shape[0] == n
            np.random.shuffle(sample)
        return Data(sample)
Beispiel #4
0
 def sample(self, n, seed=2):
     with util.NumpySeedContext(seed=seed):
         d = len(self.mean)
         mean = self.mean
         variance = self.variance
         X = np.random.randn(n, d) * np.sqrt(variance) + mean
         return Data(X)
Beispiel #5
0
    def sample(self, n, seed=872):
        """
        Rejection sampling.
        """
        d = len(self.freqs)
        sigma2 = self.sigma2
        freqs = self.freqs
        with util.NumpySeedContext(seed=seed):
            # rejection sampling
            sam = np.zeros((n, d))
            # sample block_size*d at a time.
            block_size = 500
            from_ind = 0
            while from_ind < n:
                # The proposal q is N(0, sigma2*I)
                X = np.random.randn(block_size, d) * np.sqrt(sigma2)
                q_un = np.exp(-np.sum(X**2, 1) / (2.0 * sigma2))
                # unnormalized density p
                p_un = q_un * (1 + np.prod(np.cos(X * freqs), 1))
                c = 2.0
                I = stats.uniform.rvs(size=block_size) < p_un / (c * q_un)

                # accept
                accepted_count = np.sum(I)
                to_take = min(n - from_ind, accepted_count)
                end_ind = from_ind + to_take

                AX = X[I, :]
                X_take = AX[:to_take, :]
                sam[from_ind:end_ind, :] = X_take
                from_ind = end_ind
        return Data(sam)
Beispiel #6
0
 def sample(self, n, seed=3):
     with util.NumpySeedContext(seed=seed):
         X = np.log(1 / self.inh2d(lamb_bar=n) - 1)
         if len(X.shape) == 1:
             # This can happen if d=1
             X = X[:, np.newaxis]
         return Data(X)
Beispiel #7
0
 def sample(self, n, seed=3):
     with util.NumpySeedContext(seed=seed):
         X = stats.gamma.rvs(self.alpha, size=n, scale=1.0 / self.beta)
         if len(X.shape) == 1:
             # This can happen if d=1
             X = X[:, np.newaxis]
         return Data(X)
Beispiel #8
0
    def simulate_null_dist(eigs, J, n_simulate=2000, seed=7):
        """
        Simulate the null distribution using the spectrums of the covariance 
        matrix of the U-statistic. The simulated statistic is n*FSSD^2 where
        FSSD is an unbiased estimator.

        - eigs: a numpy array of estimated eigenvalues of the covariance
          matrix. eigs is of length d*J, where d is the input dimension, and 
        - J: the number of test locations.

        Return a numpy array of simulated statistics.
        """
        d = len(eigs) / J
        assert d > 0
        # draw at most d x J x block_size values at a time
        block_size = max(20, int(1000.0 / (d * J)))
        fssds = np.zeros(n_simulate)
        from_ind = 0
        with util.NumpySeedContext(seed=seed):
            while from_ind < n_simulate:
                to_draw = min(block_size, n_simulate - from_ind)
                # draw chi^2 random variables.
                chi2 = np.random.randn(d * J, to_draw)**2

                # an array of length to_draw
                sim_fssds = eigs.dot(chi2 - 1.0)
                # store
                end_ind = from_ind + to_draw
                fssds[from_ind:end_ind] = sim_fssds
                from_ind = end_ind
        return fssds
Beispiel #9
0
 def sample(self, n, seed=3):
     with util.NumpySeedContext(seed=seed):
         X_gmm, llh = self.gmm_sample(N=n)
         X = X_gmm
         if len(X.shape) == 1:
             # This can happen if d=1
             X = X[:, np.newaxis]
         return Data(X)
Beispiel #10
0
 def sample(self, n, seed=3):
     with util.NumpySeedContext(seed=seed):
         mvn = stats.multivariate_normal(self.mean, self.cov)
         X = mvn.rvs(size=n)
         if len(X.shape) == 1:
             # This can happen if d=1
             X = X[:, np.newaxis]
         return Data(X)
Beispiel #11
0
    def test_grad_log(self):
        n = 8
        with util.NumpySeedContext(seed=17):
            for d in [4, 1]:
                variance = 1.2
                mean = np.random.randn(d) + 1
                X = np.random.rand(n, d) - 2 

                isonorm = density.IsotropicNormal(mean, variance)
                grad_log = isonorm.grad_log(X)
                my_grad_log = -(X-mean)/variance

                # check correctness 
                np.testing.assert_almost_equal(grad_log, my_grad_log)
Beispiel #12
0
    def test_log_den(self):
        n = 7
        with util.NumpySeedContext(seed=16):
            for d in [3, 1]:
                variance = 1.1
                mean = np.random.randn(d)
                X = np.random.rand(n, d) + 1

                isonorm = density.IsotropicNormal(mean, variance)
                log_dens = isonorm.log_den(X)
                my_log_dens = -np.sum((X-mean)**2, 1)/(2.0*variance)

                # check correctness 
                np.testing.assert_almost_equal(log_dens, my_log_dens)
Beispiel #13
0
    def test_multivariate_normal_density(self):
        for i in range(4):
            with util.NumpySeedContext(seed=i + 8):
                d = i + 2
                cov = stats.wishart(df=10 + d, scale=np.eye(d)).rvs(size=1)
                mean = np.random.randn(d)
                X = np.random.randn(11, d)
                den_estimate = density.GaussianMixture.multivariate_normal_density(
                    mean, cov, X)

                mnorm = stats.multivariate_normal(mean=mean, cov=cov)
                den_truth = mnorm.pdf(X)

                np.testing.assert_almost_equal(den_estimate, den_truth)
Beispiel #14
0
    def test_basic(self):
        """
        Nothing special. Just test basic things.
        """
        # sample
        n = 10
        d = 3
        with util.NumpySeedContext(seed=29):
            X = np.random.randn(n, d) * 3
            k = kernel.KGauss(sigma2=1)
            K = k.eval(X, X)

            self.assertEqual(K.shape, (n, n))
            self.assertTrue(np.all(K >= 0 - 1e-6))
            self.assertTrue(np.all(K <= 1 + 1e-6), 'K not bounded by 1')
Beispiel #15
0
    def test_pair_gradX_Y(self):
        # sample
        n = 11
        d = 3
        with util.NumpySeedContext(seed=20):
            X = np.random.randn(n, d) * 4
            Y = np.random.randn(n, d) * 2
            k = kernel.KGauss(sigma2=2.1)
            # n x d
            pair_grad = k.pair_gradX_Y(X, Y)
            loop_grad = np.zeros((n, d))
            for i in range(n):
                for j in range(d):
                    loop_grad[i, j] = k.gradX_Y(X[[i], :], Y[[i], :], j)

            testing.assert_almost_equal(pair_grad, loop_grad)
Beispiel #16
0
    def test_gradX_y(self):
        n = 10
        with util.NumpySeedContext(seed=10):
            for d in [1, 3]:
                y = np.random.randn(d) * 2
                X = np.random.rand(n, d) * 3

                sigma2 = 1.3
                k = kernel.KGauss(sigma2=sigma2)
                # n x d
                G = k.gradX_y(X, y)
                # check correctness
                K = k.eval(X, y[np.newaxis, :])
                myG = -K / sigma2 * (X - y)

                self.assertEqual(G.shape, myG.shape)
                testing.assert_almost_equal(G, myG)
Beispiel #17
0
def med_heuristic(models, ref, subsample=1000, seed=100):
    # subsample first
    n = ref.shape[0]
    assert subsample > 0
    sub_models = []
    with util.NumpySeedContext(seed=seed):
        ind = np.random.choice(n, min(subsample, n), replace=False)
        for i in range(len(models)):
            sub_models.append(models[i][ind, :])
        sub_ref = ref[ind, :]

    med_mz = np.zeros(len(sub_models))
    for i, model in enumerate(sub_models):
        sq_pdist_mz = util.dist_matrix(model, sub_ref)**2
        med_mz[i] = np.median(sq_pdist_mz)**0.5

    sigma2 = 0.5 * np.mean(med_mz)**2
    return sigma2
Beispiel #18
0
    def perform_test(self,
                     dat,
                     return_simulated_stats=False,
                     return_ustat_gram=False):
        """
        dat: a instance of Data
        """
        with util.ContextTimer() as t:
            alpha = self.alpha
            n_simulate = self.n_simulate
            X = dat.data()
            n = X.shape[0]

            _, H = self.compute_stat(dat, return_ustat_gram=True)
            test_stat = n * np.mean(H)
            # bootrapping
            sim_stats = np.zeros(n_simulate)
            with util.NumpySeedContext(seed=self.seed):
                for i in range(n_simulate):
                    W = self.bootstrapper(n)
                    # n * [ (1/n^2) * \sum_i \sum_j h(x_i, x_j) w_i w_j ]
                    boot_stat = W.dot(H.dot(old_div(W, float(n))))
                    # This is a bootstrap version of n*V_n
                    sim_stats[i] = boot_stat

            # approximate p-value with the permutations
            pvalue = np.mean(sim_stats > test_stat)

        results = {
            'alpha': self.alpha,
            'pvalue': pvalue,
            'test_stat': test_stat,
            'h0_rejected': pvalue < alpha,
            'n_simulate': n_simulate,
            'time_secs': t.secs,
            "H_mu": H.mean(),
            "H_sigma": H.std()
        }
        if return_simulated_stats:
            results['sim_stats'] = sim_stats
        if return_ustat_gram:
            results['H'] = H

        return results
Beispiel #19
0
def training_model(model, data, SAVE_DIR):
    held_out, train_set = data[:HELD_OUT], data[HELD_OUT:]
    x = tf.placeholder(tf.float32, [None, DIM], name="subsample")
    loss = model.loss(x)
    ploss = model.ploss(x, BETA)
    opt = tf.train.AdamOptimizer(LEARNING_RATE).minimize(ploss)
    saver = tf.train.Saver(tf.trainable_variables(model.name))
    minLoss = 1e10
    noChange = 0
    with tf.Session() as sess:
        print(" Training ")
        sess.run([
            tf.global_variables_initializer(),
            tf.local_variables_initializer()
        ])
        tf.get_default_graph().finalize()
        for i in range(N_EPOCHS):
            with util.NumpySeedContext(i):
                train_set = np.random.permutation(train_set)
            for j in range(int(TRAIN_SIZE / BATCH_SIZE)):
                subsample = train_set[j * BATCH_SIZE:(j + 1) * BATCH_SIZE, :]
                _, = sess.run([opt], feed_dict={"subsample:0": subsample})
            ## Early stopping
            val, pval = sess.run([loss, ploss],
                                 feed_dict={"subsample:0": held_out})
            print(val, pval)
            minLoss = np.min([minLoss, val])
            if np.allclose(minLoss, val):
                print("{0} loss at epoch: {1}".format(minLoss, i))
                noChange = noChange + 1
                if not os.path.isdir(SAVE_DIR + "/{0}".format(i)):
                    os.mkdir(SAVE_DIR + "/{0}".format(i))
                saver.save(sess,
                           SAVE_DIR + "/{0}/model".format(i),
                           write_meta_graph=False)
            else:
                noChange = 0
            if noChange > GIVE_UP:
                break
        print(" END and SAVE {0}".format(i))
        if not os.path.isdir(SAVE_DIR + "/end"):
            os.mkdir(SAVE_DIR + "/end")
        saver.save(sess, SAVE_DIR + "/end/model", write_meta_graph=False)
Beispiel #20
0
    def test_gradXY_sum(self):
        n = 11
        with util.NumpySeedContext(seed=12):
            for d in [3, 1]:
                X = np.random.randn(n, d)
                sigma2 = 1.4
                k = kernel.KGauss(sigma2=sigma2)

                # n x n
                myG = np.zeros((n, n))
                K = k.eval(X, X)
                for i in range(n):
                    for j in range(n):
                        diffi2 = np.sum((X[i, :] - X[j, :])**2)
                        #myG[i, j] = -diffi2*K[i, j]/(sigma2**2)+ d*K[i, j]/sigma2
                        myG[i, j] = K[i, j] / sigma2 * (d - diffi2 / sigma2)

                # check correctness
                G = k.gradXY_sum(X, X)

                self.assertEqual(G.shape, myG.shape)
                testing.assert_almost_equal(G, myG)
Beispiel #21
0
    def sample(self, n, seed=29):
        pmix = self.pmix
        means = self.means
        variances = self.variances
        k, d = self.means.shape
        sam_list = []
        with util.NumpySeedContext(seed=seed):
            # counts for each mixture component 
            counts = np.random.multinomial(n, pmix, size=1)

            # counts is a 2d array
            counts = counts[0]

            # For each component, draw from its corresponding mixture component.            
            for i, nc in enumerate(counts):
                # Sample from ith component
                sam_i = np.random.randn(nc, d)*np.sqrt(variances[i]) + means[i]
                sam_list.append(sam_i)
            sample = np.vstack(sam_list)
            assert sample.shape[0] == n
            np.random.shuffle(sample)
        return Data(sample)
Beispiel #22
0
def gaussbern_rbm_tuple(var, dx=50, dh=10, n=sample_size):
    """
    Get a tuple of Gaussian-Bernoulli RBM problems.
    We follow the parameter settings as described in section 6 of Liu et al.,
    2016.

    - var: Gaussian noise variance for perturbing B.
    - dx: observed dimension
    - dh: latent dimension

    Return p, a DataSource
    """
    with util.NumpySeedContext(seed=1000):
        B = np.random.randint(0, 2, (dx, dh)) * 2 - 1.0
        b = np.random.randn(dx)
        c = np.random.randn(dh)
        p = density.GaussBernRBM(B, b, c)

        B_perturb = B + np.random.randn(dx, dh) * np.sqrt(var)
        gb_rbm = data.DSGaussBernRBM(B_perturb, b, c, burnin=50)

    return p, gb_rbm
Beispiel #23
0
def gbrbm_perturb(var_perturb_B, dx=50, dh=10):
    """
    Get a Gaussian-Bernoulli RBM problem where the first entry of the B matrix
    (the matrix linking the latent and the observation) is perturbed.

    - var_perturb_B: Gaussian noise variance for perturbing B.
    - dx: observed dimension
    - dh: latent dimension

    Return p (density), data source
    """
    with util.NumpySeedContext(seed=10):
        B = np.random.randint(0, 2, (dx, dh))*2 - 1.0
        b = np.random.randn(dx)
        c = np.random.randn(dh)
        p = density.GaussBernRBM(B, b, c)

        B_perturb = np.copy(B)
        if var_perturb_B > 1e-7:
            B_perturb[0, 0] = B_perturb[0, 0] + \
                np.random.randn(1)*np.sqrt(var_perturb_B)
        ds = data.DSGaussBernRBM(B_perturb, b, c, burnin=2000)

    return p, ds
Beispiel #24
0
    def sample(self, n, seed=3, return_latent=False):
        """
        Sample by blocked Gibbs sampling
        """
        B = self.B
        b = self.b
        c = self.c
        dh = len(c)
        dx = len(b)

        # Initialize the state of the Markov chain
        with util.NumpySeedContext(seed=seed):
            X = np.random.randn(n, dx)
            H = np.random.randint(1, 2, (n, dh)) * 2 - 1.0

            # burn-in
            for t in range(self.burnin):
                X, H = self._blocked_gibbs_next(X, H)
            # sampling
            X, H = self._blocked_gibbs_next(X, H)
        if return_latent:
            return Data(X), H
        else:
            return Data(X)
Beispiel #25
0
 def sample(self, n, seed=4):
     with util.NumpySeedContext(seed=seed):
         X = np.random.laplace(loc=self.loc,
                               scale=self.scale,
                               size=(n, self.d))
         return Data(X)
Beispiel #26
0
 def sample(self, n, seed=5):
     with util.NumpySeedContext(seed=seed):
         X = stats.t.rvs(df=self.df, size=n)
         X = X[:, np.newaxis]
         return Data(X)