Ejemplo n.º 1
0
    def optimize_auto_init(p, dat, J, **ops):
        """
        Optimize parameters by calling optimize_locs_widths(). Automatically 
        initialize the test locations and the Gaussian width.

        Return optimized locations, Gaussian width, optimization info
        """
        assert J > 0
        # Use grid search to initialize the gwidth
        X = dat.data()
        n_gwidth_cand = 5
        gwidth_factors = 2.0**np.linspace(-3, 3, n_gwidth_cand)
        med2 = util.meddistance(X, 1000)**2

        k = kernel.KGauss(med2 * 2)
        # fit a Gaussian to the data and draw to initialize V0
        V0 = util.fit_gaussian_draw(X, J, seed=829, reg=1e-6)
        list_gwidth = np.hstack(((med2) * gwidth_factors))
        besti, objs = GaussFSSD.grid_search_gwidth(p, dat, V0, list_gwidth)
        gwidth = list_gwidth[besti]
        assert util.is_real_num(
            gwidth), 'gwidth not real. Was %s' % str(gwidth)
        assert gwidth > 0, 'gwidth not positive. Was %.3g' % gwidth
        logging.info('After grid search, gwidth=%.3g' % gwidth)

        V_opt, gwidth_opt, info = GaussFSSD.optimize_locs_widths(
            p, dat, gwidth, V0, **ops)

        # set the width bounds
        #fac_min = 5e-2
        #fac_max = 5e3
        #gwidth_lb = fac_min*med2
        #gwidth_ub = fac_max*med2
        #gwidth_opt = max(gwidth_lb, min(gwidth_opt, gwidth_ub))
        return V_opt, gwidth_opt, info
Ejemplo n.º 2
0
    def perform_test(self,
                     dat,
                     candidate_kernels=None,
                     return_mmdtest=False,
                     tr_proportion=0.2,
                     reg=1e-3):
        """
        dat: an instance of Data
        candidate_kernels: a list of Kernel's to choose from
        tr_proportion: proportion of sample to be used to choosing the best
            kernel
        reg: regularization parameter for the test power criterion 
        """
        with util.ContextTimer() as t:
            seed = self.seed
            p = self.p
            ds = p.get_datasource()
            p_sample = ds.sample(dat.sample_size(), seed=seed + 77)
            xtr, xte = p_sample.split_tr_te(tr_proportion=tr_proportion,
                                            seed=seed + 18)
            # ytr, yte are of type data.Data
            ytr, yte = dat.split_tr_te(tr_proportion=tr_proportion,
                                       seed=seed + 12)

            # training and test data
            tr_tst_data = fdata.TSTData(xtr.data(), ytr.data())
            te_tst_data = fdata.TSTData(xte.data(), yte.data())

            if candidate_kernels is None:
                # Assume a Gaussian kernel. Construct a list of
                # kernels to try based on multiples of the median heuristic
                med = util.meddistance(tr_tst_data.stack_xy(), 1000)
                list_gwidth = np.hstack(
                    ((med**2) * (2.0**np.linspace(-4, 4, 10))))
                list_gwidth.sort()
                candidate_kernels = [kernel.KGauss(gw2) for gw2 in list_gwidth]

            alpha = self.alpha

            # grid search to choose the best Gaussian width
            besti, powers = tst.QuadMMDTest.grid_search_kernel(
                tr_tst_data, candidate_kernels, alpha, reg=reg)
            # perform test
            best_ker = candidate_kernels[besti]
            mmdtest = tst.QuadMMDTest(best_ker, self.n_permute, alpha=alpha)
            results = mmdtest.perform_test(te_tst_data)
            if return_mmdtest:
                results['mmdtest'] = mmdtest

        results['time_secs'] = t.secs
        return results
Ejemplo n.º 3
0
    def optimize_locs_widths(
        p,
        dat,
        gwidth0,
        test_locs0,
        reg=1e-2,
        max_iter=100,
        tol_fun=1e-5,
        disp=False,
        locs_bounds_frac=100,
        gwidth_lb=None,
        gwidth_ub=None,
        use_2terms=False,
    ):
        """
        Optimize the test locations and the Gaussian kernel width by 
        maximizing a test power criterion. data should not be the same data as
        used in the actual test (i.e., should be a held-out set). 
        This function is deterministic.

        - data: a Data object
        - test_locs0: Jxd numpy array. Initial V.
        - reg: reg to add to the mean/sqrt(variance) criterion to become
            mean/sqrt(variance + reg)
        - gwidth0: initial value of the Gaussian width^2
        - max_iter: #gradient descent iterations
        - tol_fun: termination tolerance of the objective value
        - disp: True to print convergence messages
        - locs_bounds_frac: When making box bounds for the test_locs, extend
            the box defined by coordinate-wise min-max by std of each coordinate
            multiplied by this number.
        - gwidth_lb: absolute lower bound on the Gaussian width^2
        - gwidth_ub: absolute upper bound on the Gaussian width^2
        - use_2terms: If True, then besides the signal-to-noise ratio
          criterion, the objective function will also include the first term
          that is dropped.

        #- If the lb, ub bounds are None, use fraction of the median heuristics 
        #    to automatically set the bounds.
        
        Return (V test_locs, gaussian width, optimization info log)
        """
        J = test_locs0.shape[0]
        X = dat.data()
        n, d = X.shape

        # Parameterize the Gaussian width with its square root (then square later)
        # to automatically enforce the positivity.
        def obj(sqrt_gwidth, V):
            return -GaussFSSD.power_criterion(
                p, dat, sqrt_gwidth**2, V, reg=reg, use_2terms=use_2terms)

        flatten = lambda gwidth, V: np.hstack((gwidth, V.reshape(-1)))

        def unflatten(x):
            sqrt_gwidth = x[0]
            V = np.reshape(x[1:], (J, d))
            return sqrt_gwidth, V

        def flat_obj(x):
            sqrt_gwidth, V = unflatten(x)
            return obj(sqrt_gwidth, V)

        # gradient
        #grad_obj = autograd.elementwise_grad(flat_obj)
        # Initial point
        x0 = flatten(np.sqrt(gwidth0), test_locs0)

        #make sure that the optimized gwidth is not too small or too large.
        fac_min = 1e-2
        fac_max = 1e2
        med2 = util.meddistance(X, subsample=1000)**2
        if gwidth_lb is None:
            gwidth_lb = max(fac_min * med2, 1e-3)
        if gwidth_ub is None:
            gwidth_ub = min(fac_max * med2, 1e5)

        # Make a box to bound test locations
        X_std = np.std(X, axis=0)
        # X_min: length-d array
        X_min = np.min(X, axis=0)
        X_max = np.max(X, axis=0)
        # V_lb: J x d
        V_lb = np.tile(X_min - locs_bounds_frac * X_std, (J, 1))
        V_ub = np.tile(X_max + locs_bounds_frac * X_std, (J, 1))
        # (J*d+1) x 2. Take square root because we parameterize with the square
        # root
        x0_lb = np.hstack((np.sqrt(gwidth_lb), np.reshape(V_lb, -1)))
        x0_ub = np.hstack((np.sqrt(gwidth_ub), np.reshape(V_ub, -1)))
        x0_bounds = list(zip(x0_lb, x0_ub))

        # optimize. Time the optimization as well.
        # https://docs.scipy.org/doc/scipy/reference/optimize.minimize-lbfgsb.html
        grad_obj = autograd.elementwise_grad(flat_obj)
        with util.ContextTimer() as timer:
            opt_result = scipy.optimize.minimize(
                flat_obj,
                x0,
                method='L-BFGS-B',
                bounds=x0_bounds,
                tol=tol_fun,
                options={
                    'maxiter': max_iter,
                    'ftol': tol_fun,
                    'disp': disp,
                    'gtol': 1.0e-07,
                },
                jac=grad_obj,
            )

        opt_result = dict(opt_result)
        opt_result['time_secs'] = timer.secs
        x_opt = opt_result['x']
        sq_gw_opt, V_opt = unflatten(x_opt)
        gw_opt = sq_gw_opt**2

        assert util.is_real_num(
            gw_opt), 'gw_opt is not real. Was %s' % str(gw_opt)

        return V_opt, gw_opt, opt_result
Ejemplo n.º 4
0
d =2
# sample
n = 800

mean = np.zeros(d)
variance = 1.0
qmean = mean.copy()
qmean[0] = 0
qvariance = variance

p = density.IsotropicNormal(mean, variance)
ds = data.DSIsotropicNormal(qmean, qvariance)
# ds = data.DSLaplace(d=d, loc=0, scale=1.0/np.sqrt(2))
dat = ds.sample(n, seed=seed+1)
X = dat.data()
sig2 = util.meddistance(X, subsample=1000) ** 2


def simulatepm(N, p_change):
    '''

    :param N:
    :param p_change:
    :return:
    '''
    X = np.zeros(N) - 1
    change_sign = np.random.rand(N) < p_change
    for i in range(N):
        if change_sign[i]:
            X[i] = -X[i - 1]
        else:
def main():

    random.seed(0)

    n = 6000  # number of data points divisable by num_Gassians
    num_Gaussians = 3
    input_dim = 2
    mean_param = np.zeros((input_dim, num_Gaussians))
    cov_param = np.zeros((input_dim, input_dim, num_Gaussians))

    mean_param[:, 0] = [2, 8]
    mean_param[:, 1] = [-10, -4]
    mean_param[:, 2] = [-1, -7]

    cov_mat = np.empty((2, 2))
    cov_mat[0, 0] = 1
    cov_mat[1, 1] = 4
    cov_mat[0, 1] = -0.25
    cov_mat[1, 0] = -0.25
    cov_param[:, :, 0] = cov_mat

    cov_mat[0, 1] = 0.4
    cov_mat[1, 0] = 0.4
    cov_param[:, :, 1] = cov_mat

    cov_param[:, :, 2] = 2 * np.eye(input_dim)

    data_samps, true_labels = generate_data(mean_param, cov_param, n)

    # test how to use RFF for computing the kernel matrix
    med = util.meddistance(data_samps)
    # sigma2 = med**2
    sigma2 = med  # it seems to be more useful to use smaller length scale than median heuristic
    print('length scale from median heuristic is', sigma2)

    # random Fourier features
    n_features = 100
    n_classes = num_Gaussians
    """ training a Generator via minimizing MMD """
    mini_batch_size = 1000

    input_size = 10
    hidden_size_1 = 100
    hidden_size_2 = 50
    output_size = input_dim + n_classes

    # model = Generative_Model(input_dim=input_dim, how_many_Gaussians=num_Gaussians)
    model = Generative_Model(input_size=input_size,
                             hidden_size_1=hidden_size_1,
                             hidden_size_2=hidden_size_2,
                             output_size=output_size,
                             n_classes=n_classes)

    # optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
    optimizer = optim.Adam(model.parameters(), lr=1e-2)
    # optimizer = optim.SGD(model.parameters(), lr=0.001)
    how_many_epochs = 1000
    how_many_iter = np.int(n / mini_batch_size)

    training_loss_per_epoch = np.zeros(how_many_epochs)

    draws = n_features // 2
    W_freq = np.random.randn(draws, input_dim) / np.sqrt(sigma2)
    """ computing mean embedding of true data """
    emb1_input_features = RFF_Gauss(n_features, torch.Tensor(data_samps),
                                    W_freq)
    emb1_labels = torch.Tensor(true_labels)
    outer_emb1 = torch.einsum('ki,kj->kij', [emb1_input_features, emb1_labels])
    mean_emb1 = torch.mean(outer_emb1, 0)

    print('Starting Training')

    for epoch in range(
            how_many_epochs):  # loop over the dataset multiple times

        running_loss = 0.0

        for i in range(how_many_iter):

            # zero the parameter gradients
            optimizer.zero_grad()
            outputs = model(torch.randn((mini_batch_size, input_size)))

            samp_input_features = outputs[:, 0:input_dim]
            samp_labels = outputs[:, -n_classes:]
            """ computing mean embedding of generated samples """
            emb2_input_features = RFF_Gauss(n_features, samp_input_features,
                                            W_freq)
            emb2_labels = samp_labels
            outer_emb2 = torch.einsum('ki,kj->kij',
                                      [emb2_input_features, emb2_labels])
            mean_emb2 = torch.mean(outer_emb2, 0)

            loss = torch.norm(mean_emb1 - mean_emb2, p=2)**2

            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()

        if running_loss <= 1e-4:
            break
        print('epoch # and running loss are ', [epoch, running_loss])
        training_loss_per_epoch[epoch] = running_loss

    plt.figure(1)
    plt.subplot(121)
    true_labl = np.argmax(true_labels, axis=1)
    plt.scatter(data_samps[:, 0],
                data_samps[:, 1],
                c=true_labl,
                label=true_labl)
    plt.title('true data')

    plt.subplot(122)
    model.eval()
    generated_samples = samp_input_features.detach().numpy()

    generated_labels = samp_labels.detach().numpy()
    labl = np.argmax(generated_labels, axis=1)
    plt.scatter(generated_samples[:, 0],
                generated_samples[:, 1],
                c=labl,
                label=labl)
    plt.title('simulated data')

    plt.figure(2)
    plt.plot(training_loss_per_epoch)
    plt.title('MMD as a function of epoch')

    plt.show()