Beispiel #1
0
    colorImg = True
    binarize = False
    
    if True:
        n_hidden = (500,500)
        n_z = 300
        dir = 'models/svhn_yz_x_300-500-500/'
        from anglepy.models import GPUVAE_YZ_X
        model = GPUVAE_YZ_X(None, n_x, n_y, n_hidden, n_z, n_hidden[::-1], nonlinear, nonlinear, type_px, type_qz=type_qz, type_pz=type_pz, prior_sd=100, init_sd=1e-2)
        w = ndict.loadz(dir+'w_best.ndict.tar.gz')
        v = ndict.loadz(dir+'v_best.ndict.tar.gz')
        ndict.set_value(model.w, w)
        ndict.set_value(model.v, v)

        # PCA
        f_enc, f_dec = pp.PCA_fromfile(dir+'pca_params.ndict.tar.gz')
        
if dataset == 'mnist':
    # MNIST
    import anglepy.data.mnist as mnist
    train_x, train_y, valid_x, valid_y, test_x, test_y = mnist.load_numpy(size=28, binarize_y=True)
    f_enc, f_dec = lambda x:x, lambda x:x
    
    n_x = 28*28
    dim_input = (28,28)
    type_qz = 'gaussianmarg'
    type_pz = 'gaussianmarg'
    type_px = 'bernoulli'
    nonlinear = 'softplus'
    
    n_y = 10
Beispiel #2
0
def main(n_passes, n_labeled, n_z, n_hidden, dataset, seed, alpha,
         n_minibatches, comment):
    '''
    Learn a variational auto-encoder with generative model p(x,y,z)=p(y)p(z)p(x|y,z)
    And where 'x' is always observed and 'y' is _sometimes_ observed (hence semi-supervised).
    We're going to use q(y|x) as a classification model.
    '''

    import time
    logdir = 'results/learn_yz_x_ss_' + dataset + '_' + str(n_z) + '-' + str(
        n_hidden) + '_nlabeled' + str(n_labeled) + '_alpha' + str(
            alpha) + '_seed' + str(seed) + '_' + comment + '-' + str(
                int(time.time())) + '/'
    if not os.path.exists(logdir): os.makedirs(logdir)
    print('logdir:', logdir)

    print(sys.argv[0], n_labeled, n_z, n_hidden, dataset, seed, comment)

    np.random.seed(seed)

    # Init data
    if dataset == 'mnist_2layer':

        size = 28
        dim_input = (size, size)

        # Load model for feature extraction
        path = 'models/mnist_z_x_50-500-500_longrun/'  #'models/mnist_z_x_50-600-600/'
        l1_v = ndict.loadz(path + 'v.ndict.tar.gz')
        l1_w = ndict.loadz(path + 'w.ndict.tar.gz')
        n_h = (500, 500)
        from anglepy.models.VAE_Z_X import VAE_Z_X
        l1_model = VAE_Z_X(n_x=28 * 28,
                           n_hidden_q=n_h,
                           n_z=50,
                           n_hidden_p=n_h,
                           nonlinear_q='softplus',
                           nonlinear_p='softplus',
                           type_px='bernoulli',
                           type_qz='gaussianmarg',
                           type_pz='gaussianmarg',
                           prior_sd=1)

        # Load dataset
        import anglepy.data.mnist as mnist
        # load train and test sets
        train_x, train_y, valid_x, valid_y, test_x, test_y = mnist.load_numpy_split(
            size, binarize_y=True)

        # create labeled/unlabeled split in training set
        x_l, y_l, x_u, y_u = mnist.create_semisupervised(
            train_x, train_y, n_labeled)

        # Extract features

        # 1. Determine which dimensions to keep
        def transform(v, _x):
            return l1_model.dist_qz['z'](*([_x] + list(v.values()) +
                                           [np.ones((1, _x.shape[1]))]))

        q_mean, _ = transform(l1_v, x_u[0:1000])
        idx_keep = np.std(q_mean, axis=1) > 0.1

        # 2. Select dimensions
        for key in ['mean_b', 'mean_w', 'logvar_b', 'logvar_w']:
            l1_v[key] = l1_v[key][idx_keep, :]
        l1_w['w0'] = l1_w['w0'][:, idx_keep]

        # 3. Extract features
        x_mean_u, x_logvar_u = transform(l1_v, x_u)
        x_mean_l, x_logvar_l = transform(l1_v, x_l)
        x_unlabeled = {'mean': x_mean_u, 'logvar': x_logvar_u, 'y': y_u}
        x_labeled = {'mean': x_mean_l, 'logvar': x_logvar_l, 'y': y_l}

        valid_x, _ = transform(l1_v, valid_x)
        test_x, _ = transform(l1_v, test_x)

        n_x = np.sum(idx_keep)
        n_y = 10

        type_pz = 'gaussianmarg'
        type_px = 'gaussian'
        nonlinear = 'softplus'

        colorImg = False

    if dataset == 'svhn_2layer':

        size = 32
        dim_input = (size, size)

        # Load model for feature extraction
        path = 'models/tmp/svhn_z_x_300-500-500/'
        l1_v = ndict.loadz(path + 'v.ndict.tar.gz')
        l1_w = ndict.loadz(path + 'w.ndict.tar.gz')
        f_enc, f_dec = pp.PCA_fromfile(path + 'pca_params.ndict.tar.gz', True)
        from anglepy.models.VAE_Z_X import VAE_Z_X
        n_x = l1_v['w0'].shape[1]  #=600
        l1_model = VAE_Z_X(n_x=n_x,
                           n_hidden_q=(600, 600),
                           n_z=300,
                           n_hidden_p=(600, 600),
                           nonlinear_q='softplus',
                           nonlinear_p='softplus',
                           type_px='gaussian',
                           type_qz='gaussianmarg',
                           type_pz='gaussianmarg',
                           prior_sd=1)

        # SVHN dataset
        import anglepy.data.svhn as svhn
        size = 32
        train_x, train_y, valid_x, valid_y, test_x, test_y = svhn.load_numpy_split(
            False, binarize_y=True,
            extra=False)  #norb.load_resized(size, binarize_y=True)

        #train_x = np.hstack((_train_x, extra_x))
        #train_y = np.hstack((_train_y, extra_y))[:,:604000]

        # create labeled/unlabeled split in training set
        import anglepy.data.mnist as mnist
        x_l, y_l, x_u, y_u = mnist.create_semisupervised(
            train_x, train_y, n_labeled)

        # Extract features

        # 1. Determine which dimensions to keep
        def transform(v, _x):
            return l1_model.dist_qz['z'](*([f_enc(_x)] + list(v.values()) +
                                           [np.ones((1, _x.shape[1]))]))

        # 2. We're keeping all latent dimensions

        # 3. Extract features
        x_mean_u, x_logvar_u = transform(l1_v, x_u)
        x_mean_l, x_logvar_l = transform(l1_v, x_l)
        x_unlabeled = {'mean': x_mean_u, 'logvar': x_logvar_u, 'y': y_u}
        x_labeled = {'mean': x_mean_l, 'logvar': x_logvar_l, 'y': y_l}

        valid_x, _ = transform(l1_v, valid_x)
        test_x, _ = transform(l1_v, test_x)

        n_x = l1_w['w0'].shape[1]
        n_y = 10

        type_pz = 'gaussianmarg'
        type_px = 'gaussian'
        nonlinear = 'softplus'

    # Init VAE model p(x,y,z)
    from anglepy.models.VAE_YZ_X import VAE_YZ_X
    uniform_y = True
    model = VAE_YZ_X(n_x,
                     n_y,
                     n_hidden,
                     n_z,
                     n_hidden,
                     nonlinear,
                     nonlinear,
                     type_px,
                     type_qz="gaussianmarg",
                     type_pz=type_pz,
                     prior_sd=1,
                     uniform_y=uniform_y)
    v, w = model.init_w(1e-3)

    # Init q(y|x) model
    from anglepy.models.MLP_Categorical import MLP_Categorical
    n_units = [n_x] + list(n_hidden) + [n_y]
    model_qy = MLP_Categorical(n_units=n_units,
                               prior_sd=1,
                               nonlinearity=nonlinear)
    u = model_qy.init_w(1e-3)

    # Just test
    if False:
        u = ndict.loadz('u.ndict.tar.gz')
        v = ndict.loadz('v.ndict.tar.gz')
        w = ndict.loadz('w.ndict.tar.gz')
        pass

    # Progress hook
    t0 = time.time()

    def hook(t, u, v, w, ll):

        # Get classification error of validation and test sets
        def error(dataset_x, dataset_y):
            _, _, _z = model_qy.gen_xz(u, {'x': dataset_x}, {})
            return np.sum(
                np.argmax(_z['py'], axis=0) != np.argmax(dataset_y, axis=0)
            ) / (0.0 + dataset_y.shape[1])

        valid_error = error(valid_x, valid_y)
        test_error = error(test_x, test_y)

        # Log
        ndict.savez(u, logdir + 'u')
        ndict.savez(v, logdir + 'v')
        ndict.savez(w, logdir + 'w')

        dt = time.time() - t0

        print(dt, t, ll, valid_error, test_error)
        with open(logdir + 'hook.txt', 'a') as f:
            print(dt, t, ll, valid_error, test_error, file=f)

        return valid_error

    # Optimize
    result = optim_vae_ss_adam(alpha,
                               model_qy,
                               model,
                               x_labeled,
                               x_unlabeled,
                               n_y,
                               u,
                               v,
                               w,
                               n_minibatches=n_minibatches,
                               n_passes=n_passes,
                               hook=hook)

    return result