Exemple #1
0
    def test_ustat_h1_mean_variance(self):
        seed = 20
        # sample
        n = 200
        alpha = 0.01
        for d in [1, 4]:
            mean = np.zeros(d)
            variance = 1
            isonorm = density.IsotropicNormal(mean, variance)

            draw_mean = mean + 2
            draw_variance = variance + 1
            X = util.randn(n, d,
                           seed=seed) * np.sqrt(draw_variance) + draw_mean
            dat = data.Data(X)

            # Test
            for J in [1, 3]:
                sig2 = util.meddistance(X, subsample=1000)**2
                k = kernel.KGauss(sig2)

                # random test locations
                V = util.fit_gaussian_draw(X, J, seed=seed + 1)

                null_sim = gof.FSSDH0SimCovObs(n_simulate=200, seed=3)
                fssd = gof.FSSD(isonorm, k, V, null_sim=null_sim, alpha=alpha)
                fea_tensor = fssd.feature_tensor(X)

                u_mean, u_variance = gof.FSSD.ustat_h1_mean_variance(
                    fea_tensor)

                # assertions
                self.assertGreaterEqual(u_variance, 0)
                # should reject H0
                self.assertGreaterEqual(u_mean, 0)
Exemple #2
0
    def test_basic(self):
        d = 3
        p = density.IsotropicNormal(mean=np.zeros(d), variance=3.0)
        q = density.IsotropicNormal(mean=np.zeros(d) + 2, variance=3.0)
        k = kernel.KGauss(2.0)

        ds = q.get_datasource()
        n = 97
        dat = ds.sample(n, seed=3)

        witness = gof.SteinWitness(p, k, dat)
        # points to evaluate the witness
        J = 4
        V = np.random.randn(J, d) * 2
        evals = witness(V)

        testing.assert_equal(evals.shape, (J, d))
Exemple #3
0
    def test_grad_log(self):
        n = 8
        with util.NumpySeedContext(seed=17):
            for d in [4, 1]:
                variance = 1.2
                mean = np.random.randn(d) + 1
                X = np.random.rand(n, d) - 2 

                isonorm = density.IsotropicNormal(mean, variance)
                grad_log = isonorm.grad_log(X)
                my_grad_log = -(X-mean)/variance

                # check correctness 
                np.testing.assert_almost_equal(grad_log, my_grad_log)
Exemple #4
0
    def test_log_den(self):
        n = 7
        with util.NumpySeedContext(seed=16):
            for d in [3, 1]:
                variance = 1.1
                mean = np.random.randn(d)
                X = np.random.rand(n, d) + 1

                isonorm = density.IsotropicNormal(mean, variance)
                log_dens = isonorm.log_den(X)
                my_log_dens = -np.sum((X-mean)**2, 1)/(2.0*variance)

                # check correctness 
                np.testing.assert_almost_equal(log_dens, my_log_dens)
Exemple #5
0
    def test_optimized_fssd(self):
        """
        Test FSSD test with parameter optimization.
        """
        seed = 4
        # sample size
        n = 179
        alpha = 0.01
        for d in [1, 3]:
            mean = np.zeros(d)
            variance = 1.0
            p = density.IsotropicNormal(mean, variance)
            # Mean difference. obvious reject
            ds = data.DSIsotropicNormal(mean + 4, variance + 0)
            dat = ds.sample(n, seed=seed)
            # test
            for J in [1, 4]:
                opts = {
                    'reg': 1e-2,
                    'max_iter': 10,
                    'tol_fun': 1e-3,
                    'disp': False
                }
                tr, te = dat.split_tr_te(tr_proportion=0.3, seed=seed + 1)

                Xtr = tr.X
                gwidth0 = util.meddistance(Xtr, subsample=1000)**2
                # random test locations
                V0 = util.fit_gaussian_draw(Xtr, J, seed=seed + 1)
                V_opt, gw_opt, opt_result = \
                gof.GaussFSSD.optimize_locs_widths(p, tr, gwidth0, V0, **opts)

                # construct a test
                k_opt = kernel.KGauss(gw_opt)
                null_sim = gof.FSSDH0SimCovObs(n_simulate=2000, seed=10)
                fssd_opt = gof.FSSD(p,
                                    k_opt,
                                    V_opt,
                                    null_sim=null_sim,
                                    alpha=alpha)
                fssd_opt_result = fssd_opt.perform_test(
                    te, return_simulated_stats=True)
                assert fssd_opt_result['h0_rejected']
Exemple #6
0
    def test_auto_init_opt_fssd(self):
        """
        Test FSSD-opt test with automatic parameter initialization.
        """
        seed = 5
        # sample size
        n = 191
        alpha = 0.01
        for d in [1, 4]:
            mean = np.zeros(d)
            variance = 1.0
            p = density.IsotropicNormal(mean, variance)
            # Mean difference. obvious reject
            ds = data.DSIsotropicNormal(mean + 4, variance + 0)
            dat = ds.sample(n, seed=seed)
            # test
            for J in [1, 3]:
                opts = {
                    'reg': 1e-2,
                    'max_iter': 10,
                    'tol_fun': 1e-3,
                    'disp': False
                }
                tr, te = dat.split_tr_te(tr_proportion=0.3, seed=seed + 1)

                V_opt, gw_opt, opt_result = \
                gof.GaussFSSD.optimize_auto_init(p, tr, J, **opts)

                # construct a test
                k_opt = kernel.KGauss(gw_opt)
                null_sim = gof.FSSDH0SimCovObs(n_simulate=2000, seed=10)
                fssd_opt = gof.FSSD(p,
                                    k_opt,
                                    V_opt,
                                    null_sim=null_sim,
                                    alpha=alpha)
                fssd_opt_result = fssd_opt.perform_test(
                    te, return_simulated_stats=True)
                assert fssd_opt_result['h0_rejected']
Exemple #7
0
    def test_basic(self):
        """
        Nothing special. Just test basic things.
        """
        seed = 12
        # sample
        n = 100
        alpha = 0.01
        for d in [1, 4]:
            mean = np.zeros(d)
            variance = 1
            isonorm = density.IsotropicNormal(mean, variance)

            # only one dimension of the mean is shifted
            #draw_mean = mean + np.hstack((1, np.zeros(d-1)))
            draw_mean = mean + 0
            draw_variance = variance + 1
            X = util.randn(n, d,
                           seed=seed) * np.sqrt(draw_variance) + draw_mean
            dat = data.Data(X)

            # Test
            for J in [1, 3]:
                sig2 = util.meddistance(X, subsample=1000)**2
                k = kernel.KGauss(sig2)

                # random test locations
                V = util.fit_gaussian_draw(X, J, seed=seed + 1)
                null_sim = gof.FSSDH0SimCovObs(n_simulate=200, seed=3)
                fssd = gof.FSSD(isonorm, k, V, null_sim=null_sim, alpha=alpha)

                tresult = fssd.perform_test(dat, return_simulated_stats=True)

                # assertions
                self.assertGreaterEqual(tresult['pvalue'], 0)
                self.assertLessEqual(tresult['pvalue'], 1)
Exemple #8
0
def get_pqsource_list(prob_label):
    """
    Return [(prob_param, p, ds) for ... ], a list of tuples
    where 
    - prob_param: a problem parameters. Each parameter has to be a
      scalar (so that we can plot them later). Parameters are preferably
      positive integers.
    - p: a Density representing the distribution p
    - ds: a DataSource, each corresponding to one parameter setting.
        The DataSource generates sample from q.
    """
    sg_ds = [1, 5, 10, 15]
    gmd_ds = [5, 20, 40, 60]
    # vary the mean
    gmd_d10_ms = [0, 0.02, 0.04, 0.06]
    gvinc_d1_vs = [1, 1.5, 2, 2.5]
    gvinc_d5_vs = [1, 1.5, 2, 2.5]
    gvsub1_d1_vs = [0.1, 0.3, 0.5, 0.7]
    gvd_ds = [1, 5, 10, 15]

    #gb_rbm_dx50_dh10_stds = [0, 0.01, 0.02, 0.03]
    gb_rbm_dx50_dh10_stds = [0, 0.02, 0.04, 0.06]
    #gb_rbm_dx50_dh10_stds = [0]
    gb_rbm_dx50_dh40_stds = [0, 0.01, 0.02, 0.04, 0.06]
    glaplace_ds = [1, 5, 10, 15]
    prob2tuples = {
        # H0 is true. vary d. P = Q = N(0, I)
        'sg': [(d, density.IsotropicNormal(np.zeros(d), 1),
                data.DSIsotropicNormal(np.zeros(d), 1)) for d in sg_ds],

        # vary d. P = N(0, I), Q = N( (c,..0), I)
        'gmd': [(d, density.IsotropicNormal(np.zeros(d), 1),
                 data.DSIsotropicNormal(np.hstack((1, np.zeros(d - 1))), 1))
                for d in gmd_ds],
        # P = N(0, I), Q = N( (m, ..0), I). Vary m
        'gmd_d10_ms': [(m, density.IsotropicNormal(np.zeros(10), 1),
                        data.DSIsotropicNormal(np.hstack((m, np.zeros(9))), 1))
                       for m in gmd_d10_ms],
        # d=1. Increase the variance. P = N(0, I). Q = N(0, v*I)
        'gvinc_d1': [(var, density.IsotropicNormal(np.zeros(1), 1),
                      data.DSIsotropicNormal(np.zeros(1), var))
                     for var in gvinc_d1_vs],
        # d=5. Increase the variance. P = N(0, I). Q = N(0, v*I)
        'gvinc_d5': [(var, density.IsotropicNormal(np.zeros(5), 1),
                      data.DSIsotropicNormal(np.zeros(5), var))
                     for var in gvinc_d5_vs],
        # d=1. P=N(0,1), Q(0,v). Consider the variance below 1.
        'gvsub1_d1': [(var, density.IsotropicNormal(np.zeros(1), 1),
                       data.DSIsotropicNormal(np.zeros(1), var))
                      for var in gvsub1_d1_vs],
        # Gaussian variance difference problem. Only the variance
        # of the first dimenion differs. d varies.
        'gvd': [(d, density.Normal(np.zeros(d), np.eye(d)),
                 data.DSNormal(np.zeros(d),
                               np.diag(np.hstack((2, np.ones(d - 1))))))
                for d in gvd_ds],

        # Gaussian Bernoulli RBM. dx=50, dh=10
        'gbrbm_dx50_dh10':
        gaussbern_rbm_probs(gb_rbm_dx50_dh10_stds, dx=50, dh=10,
                            n=sample_size),

        # Gaussian Bernoulli RBM. dx=50, dh=40
        'gbrbm_dx50_dh40':
        gaussbern_rbm_probs(gb_rbm_dx50_dh40_stds, dx=50, dh=40,
                            n=sample_size),

        # p: N(0, I), q: standard Laplace. Vary d
        'glaplace': [
            (
                d,
                density.IsotropicNormal(np.zeros(d), 1),
                # Scaling of 1/sqrt(2) will make the variance 1.
                data.DSLaplace(d=d, loc=0, scale=1.0 / np.sqrt(2)))
            for d in glaplace_ds
        ],
    }
    if prob_label not in prob2tuples:
        raise ValueError('Unknown problem label. Need to be one of %s' %
                         str(prob2tuples.keys()))
    return prob2tuples[prob_label]
Exemple #9
0
def get_ns_pqsource(prob_label):
    """
    Return (ns, p, ds), a tuple of
    where 
    - ns: a list of sample sizes
    - p: a Density representing the distribution p
    - ds: a DataSource, each corresponding to one parameter setting.
        The DataSource generates sample from q.
    """
    gmd_p01_d10_ns = [1000, 3000, 5000]

    #gb_rbm_dx50_dh10_vars = [0, 1e-3, 2e-3, 3e-3]
    prob2tuples = { 

            # vary d. P = N(0, I), Q = N( (c,..0), I)
            'gmd_p03_d10_ns': (gmd_p01_d10_ns,
                density.IsotropicNormal(np.zeros(10), 1),
                data.DSIsotropicNormal(np.hstack((0.03, np.zeros(10-1))), 1) 
                ),

            # Gaussian Bernoulli RBM. dx=50, dh=10 
            # Perturbation variance to B[0, 0] is 0.1
            'gbrbm_dx50_dh10_vp1': 
                ([i*1000 for i in range(1, 4+1)], ) + 
                #([1000, 5000], ) + 
                gbrbm_perturb(var_perturb_B=0.1, dx=50, dh=10), 

            # Gaussian Bernoulli RBM. dx=50, dh=40 
            # Perturbation variance to B[0, 0] is 0.1
            'gbrbm_dx50_dh40_vp1': 
                ([i*1000 for i in range(1, 4+1)], ) + 
                #([1000, 5000], ) + 
                gbrbm_perturb(var_perturb_B=0.1, dx=50, dh=40), 

            # Gaussian Bernoulli RBM. dx=50, dh=10 
            # No perturbation
            'gbrbm_dx50_dh10_h0': 
                ([i*1000 for i in range(1, 4+1)], ) + 
                #([1000, 5000], ) + 
                gbrbm_perturb(var_perturb_B=0, dx=50, dh=10), 

            # Gaussian Bernoulli RBM. dx=50, dh=40 
            # No perturbation
            'gbrbm_dx50_dh40_h0': 
                ([i*1000 for i in range(1, 4+1)], ) + 
                #([1000, 5000], ) + 
                gbrbm_perturb(var_perturb_B=0, dx=50, dh=40), 

            # Gaussian Bernoulli RBM. dx=20, dh=10 
            # Perturbation variance to B[0, 0] is 0.1
            'gbrbm_dx20_dh10_vp1': 
                ([i*1000 for i in range(2, 5+1)], ) + 
                gbrbm_perturb(var_perturb_B=0.1, dx=20, dh=10), 

            # Gaussian Bernoulli RBM. dx=20, dh=10 
            # No perturbation
            'gbrbm_dx20_dh10_h0': 
                ([i*1000 for i in range(2, 5+1)], ) + 
                gbrbm_perturb(var_perturb_B=0, dx=20, dh=10), 


            }
    if prob_label not in prob2tuples:
        raise ValueError('Unknown problem label. Need to be one of %s'%str(prob2tuples.keys()) )
    return prob2tuples[prob_label]
def get_pqsource(prob_label):
    """
    Return (p, ds), a tuple of
    - p: a Density representing the distribution p
    - ds: a DataSource, each corresponding to one parameter setting.
        The DataSource generates sample from q.
    """
    prob2tuples = {
        # H0 is true. vary d. P = Q = N(0, I)
        'sg5':
        (density.IsotropicNormal(np.zeros(5),
                                 1), data.DSIsotropicNormal(np.zeros(5), 1)),

        # P = N(0, I), Q = N( (0.2,..0), I)
        'gmd5': (density.IsotropicNormal(np.zeros(5), 1),
                 data.DSIsotropicNormal(np.hstack((0.2, np.zeros(4))), 1)),
        'gmd1': (density.IsotropicNormal(np.zeros(1), 1),
                 data.DSIsotropicNormal(np.ones(1) * 0.2, 1)),

        # P = N(0, I), Q = N( (1,..0), I)
        'gmd100': (density.IsotropicNormal(np.zeros(100), 1),
                   data.DSIsotropicNormal(np.hstack((1, np.zeros(99))), 1)),

        # Gaussian variance difference problem. Only the variance
        # of the first dimenion differs. d varies.
        'gvd5': (density.Normal(np.zeros(5), np.eye(5)),
                 data.DSNormal(np.zeros(5), np.diag(np.hstack(
                     (2, np.ones(4)))))),
        'gvd10': (density.Normal(np.zeros(10), np.eye(10)),
                  data.DSNormal(np.zeros(10),
                                np.diag(np.hstack((2, np.ones(9)))))),

        # Gaussian Bernoulli RBM. dx=50, dh=10. H0 is true
        'gbrbm_dx50_dh10_v0':
        gaussbern_rbm_tuple(0, dx=50, dh=10, n=sample_size),

        # Gaussian Bernoulli RBM. dx=5, dh=3. H0 is true
        'gbrbm_dx5_dh3_v0':
        gaussbern_rbm_tuple(0, dx=5, dh=3, n=sample_size),

        # Gaussian Bernoulli RBM. dx=50, dh=10.
        'gbrbm_dx50_dh10_v1em3':
        gaussbern_rbm_tuple(1e-3, dx=50, dh=10, n=sample_size),

        # Gaussian Bernoulli RBM. dx=5, dh=3. Perturb with noise = 1e-2.
        'gbrbm_dx5_dh3_v5em3':
        gaussbern_rbm_tuple(5e-3, dx=5, dh=3, n=sample_size),

        # Gaussian mixture of two components. Uniform mixture weights.
        # p = 0.5*N(0, 1) + 0.5*N(3, 0.01)
        # q = 0.5*N(-3, 0.01) + 0.5*N(0, 1)
        'gmm_d1': (density.IsoGaussianMixture(np.array([[0], [3.0]]),
                                              np.array([1, 0.01])),
                   data.DSIsoGaussianMixture(np.array([[-3.0], [0]]),
                                             np.array([0.01, 1]))),

        # p = N(0, 1)
        # q = 0.1*N([-10, 0,..0], 0.001) + 0.9*N([0,0,..0], 1)
        'g_vs_gmm_d5': (density.IsotropicNormal(np.zeros(5), 1),
                        data.DSIsoGaussianMixture(np.vstack((np.hstack(
                            (0.0, np.zeros(4))), np.zeros(5))),
                                                  np.array([0.0001, 1]),
                                                  pmix=[0.1, 0.9])),
        'g_vs_gmm_d2': (density.IsotropicNormal(np.zeros(2), 1),
                        data.DSIsoGaussianMixture(np.vstack((np.hstack(
                            (0.0, np.zeros(1))), np.zeros(2))),
                                                  np.array([0.01, 1]),
                                                  pmix=[0.1, 0.9])),
        'g_vs_gmm_d1': (density.IsotropicNormal(np.zeros(1), 1),
                        data.DSIsoGaussianMixture(np.array([[0.0], [0]]),
                                                  np.array([0.01, 1]),
                                                  pmix=[0.1, 0.9])),
    }
    if prob_label not in prob2tuples:
        raise ValueError('Unknown problem label. Need to be one of %s' %
                         str(prob2tuples.keys()))
    return prob2tuples[prob_label]