Ejemplo n.º 1
0
def perform_me_test(train_miss_impute,
                    test_miss_impute,
                    train_full,
                    test_full,
                    alpha,
                    test_locs_miss=None,
                    gwidth_miss=None,
                    test_locs_full=None,
                    gwidth_full=None):
    me_result = np.zeros(2)

    op = {
        'n_test_locs': 10,  # number of test locations to optimize
        'max_iter': 200,  # maximum number of gradient ascent iterations
        'locs_step_size': 1.0,  # step size for the test locations (features)
        'gwidth_step_size': 0.1,  # step size for the Gaussian width
        'tol_fun':
        1e-4,  # stop if the objective does not increase more than this.
        'seed': 0  # random seed
    }

    sb_data_miss_impute = TSTData(train_miss_impute, test_miss_impute)
    train_miss_impute_sb, dumy = sb_data_miss_impute.split_tr_te(
        tr_proportion=1, seed=1)
    dumy, test_miss_impute_sb = sb_data_miss_impute.split_tr_te(
        tr_proportion=0, seed=1)
    #half_size = int(train_miss_impute.shape[0]/2)
    #train_miss_impute_sb = TSTData(train_miss_impute[:half_size], train_miss_impute[half_size:half_size*2])
    #test_miss_impute_sb = TSTData(train_miss_impute, test_miss_impute)

    if test_locs_miss is None:
        test_locs_miss, gwidth_miss, info = tst.MeanEmbeddingTest.optimize_locs_width(
            train_miss_impute_sb, alpha, **op)
    met_opt = tst.MeanEmbeddingTest(test_locs_miss, gwidth_miss, alpha)
    test_result = met_opt.perform_test(test_miss_impute_sb)
    if test_result['h0_rejected']:
        me_result[0] = 1

    sb_data_full = TSTData(train_full, test_full)
    train_full_sb, dumy = sb_data_full.split_tr_te(tr_proportion=1, seed=1)
    dumy, test_full_sb = sb_data_full.split_tr_te(tr_proportion=0, seed=1)

    if test_locs_full is None:
        test_locs_full, gwidth_full, info = tst.MeanEmbeddingTest.optimize_locs_width(
            train_full_sb, alpha, **op)
    met_opt = tst.MeanEmbeddingTest(test_locs_full, gwidth_full, alpha)
    test_result = met_opt.perform_test(test_full_sb)
    if test_result['h0_rejected']:
        me_result[1] = 1

    return me_result, test_locs_miss, gwidth_miss, test_locs_full, gwidth_full
Ejemplo n.º 2
0
def TST_ME(Fea, N1, alpha, is_train, test_locs, gwidth, J=1, seed=15):
    """run ME test."""
    Fea = get_item(Fea, is_cuda)
    tst_data = data.TSTData(Fea[0:N1, :], Fea[N1:, :])
    h = 0
    if is_train:
        op = {
            'n_test_locs': J,  # number of test locations to optimize
            'max_iter': 300,  # maximum number of gradient ascent iterations
            'locs_step_size':
            1.0,  # step size for the test locations (features)
            'gwidth_step_size': 0.1,  # step size for the Gaussian width
            'tol_fun':
            1e-4,  # stop if the objective does not increase more than this.
            'seed': seed + 5,  # random seed
        }
        test_locs, gwidth, info = tst.MeanEmbeddingTest.optimize_locs_width(
            tst_data, alpha, **op)
        return test_locs, gwidth
    else:
        met_opt = tst.MeanEmbeddingTest(test_locs, gwidth, alpha)
        test_result = met_opt.perform_test(tst_data)
        if test_result['h0_rejected']:
            h = 1
        return h
Ejemplo n.º 3
0
    def _get_metest_opt(self, dat, op=None):
        seed = self.seed
        if op is None:
            op = {
                'n_test_locs': self.n_locs,
                'seed': seed + 5,
                'max_iter': 100,
                'batch_proportion': 1.0,
                'locs_step_size': 1.0,
                'gwidth_step_size': 0.1,
                'tol_fun': 1e-4,
                'reg': 1e-6
            }
        seed = self.seed
        alpha = self.alpha
        p = self.p
        # Draw sample from p. #sample to draw is the same as that of dat
        ds = p.get_datasource()
        p_sample = ds.sample(dat.sample_size(), seed=seed)
        xtr, xte = p_sample.split_tr_te(tr_proportion=self.tr_proportion,
                                        seed=seed + 18)
        # ytr, yte are of type data.Data
        ytr, yte = dat.split_tr_te(tr_proportion=self.tr_proportion,
                                   seed=seed + 12)

        # training and test data
        tr_tst_data = fdata.TSTData(xtr.data(), ytr.data())
        te_tst_data = fdata.TSTData(xte.data(), yte.data())

        # Train the ME test
        V_opt, gw2_opt, _ = tst.MeanEmbeddingTest.optimize_locs_width(
            tr_tst_data, alpha, **op)
        metest = tst.MeanEmbeddingTest(V_opt, gw2_opt, alpha)
        return metest, tr_tst_data, te_tst_data
Ejemplo n.º 4
0
def job_met_gwopt(sample_source, tr, te, r):
    """MeanEmbeddingTest. Optimize only the Gaussian width. 
    Fix the test locations."""
    op_gwidth = {'max_iter': 200, 'gwidth_step_size': 0.1,  
                 'batch_proportion': 1.0, 'tol_fun': 1e-3}
    # optimize on the training set
    T_randn = tst.MeanEmbeddingTest.init_locs_2randn(tr, J, seed=r+92856)
    gwidth, info = tst.MeanEmbeddingTest.optimize_gwidth(tr, T_randn, **op_gwidth)
    met_gwopt = tst.MeanEmbeddingTest(T_randn, gwidth, alpha)
    raise ValueError('Use job_met_gwgrid instead')
    return met_gwopt.perform_test(te)
Ejemplo n.º 5
0
def job_met_opt(sample_source, tr, te, r):
    """MeanEmbeddingTest with test locations optimzied.
    Return results from calling perform_test()"""
    # MeanEmbeddingTest. optimize the test locations
    met_opt_options = {'n_test_locs': J, 'max_iter': 200, 
            'locs_step_size': 0.1, 'gwidth_step_size': 0.1, 'seed': r+92856,
            'tol_fun': 1e-3}
    test_locs, gwidth, info = tst.MeanEmbeddingTest.optimize_locs_width(tr, alpha, **met_opt_options)
    met_opt = tst.MeanEmbeddingTest(test_locs, gwidth, alpha)
    met_opt_test  = met_opt.perform_test(te)
    return met_opt_test
Ejemplo n.º 6
0
    def test(self, X, Y):
        XY = self.preprocess(X, Y)

        locations = fot_tst.MeanEmbeddingTest.init_locs_subset(XY, self.J)
        med = fot_util.meddistance(XY.stack_xy(), 1000)
        kernel = fot_kernel.KGauss(med)
        ME = fot_tst.MeanEmbeddingTest(locations, med, alpha=self.alpha)

        result = ME.perform_test(XY)
        p_val = result['pvalue']
        return p_val
Ejemplo n.º 7
0
def job_met_opt(sample_source, tr, te, r):
    """MeanEmbeddingTest with test locations optimzied."""
    # MeanEmbeddingTest. optimize the test locations
    with util.ContextTimer() as t:
        met_opt_options = {'n_test_locs': J, 'max_iter': 200, 
                'locs_step_size': 500.0, 'gwidth_step_size': 0.2, 'seed': r+92856,
                'tol_fun': 1e-4}
        test_locs, gwidth, info = tst.MeanEmbeddingTest.optimize_locs_width(tr, alpha, **met_opt_options)
        met_opt = tst.MeanEmbeddingTest(test_locs, gwidth, alpha)
        met_opt_test  = met_opt.perform_test(te)

    result = {'test_method': met_opt, 'test_result': met_opt_test, 'time_secs': t.secs}
    return result
Ejemplo n.º 8
0
def job_met_gwgrid(sample_source, tr, te, r, J):
    """MeanEmbeddingTest. Optimize only the Gaussian width with grid search
    Fix the test locations."""
    # optimize on the training set
    T_randn = tst.MeanEmbeddingTest.init_locs_2randn(tr, J, seed=r + 92856)
    med = util.meddistance(tr.stack_xy(), 1000)
    list_gwidth = np.hstack(((med**2) * (2.0**np.linspace(-5, 5, 40))))
    list_gwidth.sort()
    besti, powers = tst.MeanEmbeddingTest.grid_search_gwidth(
        tr, T_randn, list_gwidth, alpha)

    best_width2 = list_gwidth[besti]
    met_grid = tst.MeanEmbeddingTest(T_randn, best_width2, alpha)
    return met_grid.perform_test(te)
Ejemplo n.º 9
0
    def test(self, X, Y):
        XY = self.preprocess(X, Y)
        train, test = XY.split_tr_te(tr_proportion=self.split_ratio)

        locations = fot_tst.MeanEmbeddingTest.init_locs_subset(train, self.J)
        med = fot_util.meddistance(train.stack_xy(), 1000)
        gwidth, info = fot_tst.MeanEmbeddingTest.optimize_gwidth(
            train, locations, med**2)

        ME = fot_tst.MeanEmbeddingTest(locations, gwidth, alpha=self.alpha)

        result = ME.perform_test(test)
        p_val = result['pvalue']
        return p_val
Ejemplo n.º 10
0
def job_met_opt5(sample_source, tr, te, r):
    """MeanEmbeddingTest with test locations optimzied.
    Large step size
    Return results from calling perform_test()"""
    with util.ContextTimer() as t:
        # MeanEmbeddingTest. optimize the test locations
        met_opt_options = {'n_test_locs': J, 'max_iter': 200, 
                'locs_step_size': 0.5, 'gwidth_step_size': 0.1, 'seed': r+92856,
                'tol_fun': 1e-3}
        test_locs, gwidth, info = tst.MeanEmbeddingTest.optimize_locs_width(tr, alpha, **met_opt_options)
        met_opt = tst.MeanEmbeddingTest(test_locs, gwidth, alpha)
        met_opt_test  = met_opt.perform_test(te)
    return {
            #'test_method': met_opt, 
            'test_result': met_opt_test,
            'time_secs': t.secs}
Ejemplo n.º 11
0
    def test(self, X, Y):
        XY = self.preprocess(X, Y)

        train, test = XY.split_tr_te(tr_proportion=self.split_ratio)

        with contextlib.redirect_stdout(None):
            test_locs, gwidth, info = fot_tst.MeanEmbeddingTest.optimize_locs_width(
                train,
                self.alpha,
                n_test_locs=self.J,
            )

        ME = fot_tst.MeanEmbeddingTest(test_locs, gwidth, alpha=self.alpha)

        result = ME.perform_test(test)
        p_val = result['pvalue']
        return p_val
Ejemplo n.º 12
0
    def __init__(self, p, gwidth2, test_locs, alpha=0.01, seed=28):
        """
        p: an instance of UnnormalizedDensity
        gwidth2: Gaussian width squared for the Gaussian kernel
        test_locs: J x d numpy array of J locations to test the difference
        alpha: significance level 
        """
        super(GaussMETest, self).__init__(p, alpha)
        self.gwidth2 = gwidth2
        self.test_locs = test_locs
        self.seed = seed
        ds = p.get_datasource()
        if ds is None:
            raise ValueError('%s test requires a density p which implements get_datasource(', str(GaussMETest))

        # Construct the ME test
        metest = tst.MeanEmbeddingTest(test_locs, gwidth2, alpha=alpha)
        self.metest = metest
Ejemplo n.º 13
0
def job_met_gwopt(prob_label, tr, te, r, ni, n):
    """MeanEmbeddingTest. Optimize only the Gaussian width. 
    Fix the test locations."""
    raise ValueError('Use job_met_gwgrid instead')
    with util.ContextTimer() as t:
        op_gwidth = {
            'max_iter': 200,
            'gwidth_step_size': 0.1,
            'batch_proportion': 1.0,
            'tol_fun': 1e-3
        }
        # optimize on the training set
        T_randn = tst.MeanEmbeddingTest.init_locs_2randn(tr, J, seed=r + 92856)
        gwidth, info = tst.MeanEmbeddingTest.optimize_gwidth(
            tr, T_randn, **op_gwidth)
        met_gwopt = tst.MeanEmbeddingTest(T_randn, gwidth, alpha)
    return {
        #'test_method': met_gwopt,
        'test_result': met_gwopt.perform_test(te),
        'time_secs': t.secs
    }
Ejemplo n.º 14
0
def job_met_gwgrid(prob_label, tr, te, r, ni, n):
    """MeanEmbeddingTest. Optimize only the Gaussian width with grid search
    Fix the test locations."""

    with util.ContextTimer() as t:
        # optimize on the training set
        T_randn = tst.MeanEmbeddingTest.init_locs_2randn(tr, J, seed=r + 92856)
        med = util.meddistance(tr.stack_xy(), 1000)
        list_gwidth = np.hstack(((med**2) * (2.0**np.linspace(-5, 5, 40))))
        list_gwidth.sort()
        besti, powers = tst.MeanEmbeddingTest.grid_search_gwidth(
            tr, T_randn, list_gwidth, alpha)

        best_width2 = list_gwidth[besti]
        met_grid = tst.MeanEmbeddingTest(T_randn, best_width2, alpha)
        met_grid_result = met_grid.perform_test(te)
    return {
        #'test_method': met_grid,
        'test_result': met_grid_result,
        'time_secs': t.secs
    }
Ejemplo n.º 15
0
def job_met_opt10(prob_label, tr, te, r, ni, n):
    """MeanEmbeddingTest with test locations optimzied.
    Return results from calling perform_test()"""
    # MeanEmbeddingTest. optimize the test locations
    with util.ContextTimer() as t:
        met_opt_options = {
            'n_test_locs': J,
            'max_iter': 200,
            'locs_step_size': 5.0,
            'gwidth_step_size': 0.2,
            'seed': r + 92856,
            'tol_fun': 1e-3
        }
        test_locs, gwidth, info = tst.MeanEmbeddingTest.optimize_locs_width(
            tr, alpha, **met_opt_options)
        met_opt = tst.MeanEmbeddingTest(test_locs, gwidth, alpha)
        met_opt_test = met_opt.perform_test(te)
    return {
        #'test_method': met_opt,
        'test_result': met_opt_test,
        'time_secs': t.secs
    }
Ejemplo n.º 16
0
def wtest(p, q, alpha=0.05):
    op = {
        'n_test_locs': 2,
        'seed': 0,
        'max_iter': 200,
        'batch_proportion': 1.0,
        'locs_step_size': 1.0,
        'gwidth_step_size': 0.1,
        'tol_fun': 1e-4
    }
    if (p.ndim == 1): p = p[:, np.newaxis]
    if (q.ndim == 1): q = q[:, np.newaxis]
    d = data.TSTData(p, q)
    d_tr, d_te = d.split_tr_te(tr_proportion=0.5)
    test_locs, gwidth, info = tst.MeanEmbeddingTest.optimize_locs_width(
        d_tr, alpha, **op)
    met_opt = tst.MeanEmbeddingTest(test_locs, gwidth, alpha)
    r = met_opt.perform_test(d_te)
    if (r['test_stat'] == -1):
        r['test_stat'] = np.nan
        r['pvalue'] = np.nan
    return r['test_stat'], r['pvalue']